coreylowman · coreylowman · Oct 2, 2022 · Sep 30, 2022 · Oct 2, 2022 · Oct 2, 2022
diff --git a/examples/01-tensor.rs b/examples/01-tensor.rs
@@ -0,0 +1,29 @@
+//! Intro to dfdx::tensor
+
+use rand::thread_rng;
+
+use dfdx::tensor::{tensor, HasArrayData, Tensor1D, Tensor2D, Tensor3D, TensorCreator};
+
+fn main() {
+    // easily create tensors using the `tensor` function
+    let _: Tensor1D<5> = tensor([1.0, 2.0, 3.0, 4.0, 5.0]);
+
+    // you can also use [TensorCreator::new]
+    let _: Tensor1D<5> = TensorCreator::new([1.0, 2.0, 3.0, 4.0, 5.0]);
+
+    // [TensorCreator] has other helpful methods such as all zeros and all ones
+    let _: Tensor2D<2, 3> = TensorCreator::zeros();
+    let _: Tensor2D<2, 3> = TensorCreator::ones();
+
+    // we can also create random tensors
+    let mut rng = thread_rng();
+    let a: Tensor3D<2, 3, 4> = TensorCreator::randn(&mut rng);
+
+    // use `.data()` to access the underlying array
+    let a_data: &[[[f32; 4]; 3]; 2] = a.data();
+    println!("a={:?}", a_data);
+
+    // you can clone() a tensor (or duplicate()):
+    let a_copy = a.clone();
+    assert_eq!(a_copy.data(), a.data());
+}
diff --git a/examples/02-ops.rs b/examples/02-ops.rs
@@ -0,0 +1,32 @@
+//! Intro to dfdx::tensor_ops
+
+use rand::prelude::*;
+
+use dfdx::tensor::{HasArrayData, Tensor0D, Tensor2D, TensorCreator};
+use dfdx::tensor_ops::add;
+
+fn main() {
+    let mut rng = StdRng::seed_from_u64(0);
+
+    let a: Tensor2D<2, 3> = TensorCreator::randn(&mut rng);
+    dbg!(a.data());
+
+    let b: Tensor2D<2, 3> = TensorCreator::randn(&mut rng);
+    dbg!(b.data());
+
+    // we can do binary operations like add two tensors together
+    let c = add(a, &b);
+    dbg!(c.data());
+
+    // or unary operations like apply the `relu` function to each element
+    let d = c.relu();
+    dbg!(d.data());
+
+    // we can add/sub/mul/div scalar values to tensors
+    let e = d + 0.5;
+    dbg!(e.data());
+
+    // or reduce tensors to smaller sizes
+    let f: Tensor0D = e.mean();
+    dbg!(f.data());
+}
diff --git a/examples/03-nn.rs b/examples/03-nn.rs
@@ -0,0 +1,29 @@
+//! Intro to dfdx::nn
+
+use rand::prelude::*;
+
+use dfdx::nn::{Linear, Module, ReLU, ResetParams};
+use dfdx::tensor::{Tensor1D, Tensor2D, TensorCreator};
+
+fn main() {
+    // nn exposes many different neural network types, like the Linear layer!
+    let mut m: Linear<4, 2> = Default::default();
+
+    // at first they are initialized to zeros, but you can randomize them too
+    let mut rng = StdRng::seed_from_u64(0);
+    m.reset_params(&mut rng);
+
+    // they act on tensors using the forward method
+    let x: Tensor1D<4> = TensorCreator::zeros();
+    let _: Tensor1D<2> = m.forward(x);
+
+    // most of them can also act on many different shapes of tensors
+    let x: Tensor2D<10, 4> = TensorCreator::zeros();
+    let _: Tensor2D<10, 2> = m.forward(x);
+
+    // you can also combine multiple modules with tuples
+    let mlp: (Linear<4, 2>, ReLU, Linear<2, 1>) = Default::default();
+
+    let x: Tensor1D<4> = TensorCreator::zeros();
+    let _: Tensor1D<1> = mlp.forward(x);
+}
diff --git a/examples/04-gradients.rs b/examples/04-gradients.rs
@@ -0,0 +1,35 @@
+//! Intro to dfdx::gradients and tapes
+
+use rand::prelude::*;
+
+use dfdx::gradients::{Gradients, NoneTape, OwnedTape};
+use dfdx::tensor::{Tensor0D, Tensor2D, TensorCreator};
+use dfdx::tensor_ops::matmul;
+
+fn main() {
+    let mut rng = StdRng::seed_from_u64(0);
+
+    // tensors are first created with no tapes on them - the NoneTape!
+    let weight: Tensor2D<4, 2, NoneTape> = TensorCreator::randn(&mut rng);
+    let a: Tensor2D<3, 4, NoneTape> = TensorCreator::randn(&mut rng);
+
+    // the first step to tracing is to call .trace()
+    // this sticks a gradient tape into the input tensor!
+    let b: Tensor2D<3, 4, OwnedTape> = a.trace();
+
+    // the tape will automatically move around as you perform ops
+    let c: Tensor2D<3, 2, OwnedTape> = matmul(b, &weight);
+    let d: Tensor2D<3, 2, OwnedTape> = c.sin();
+    let e: Tensor0D<OwnedTape> = d.mean();
+
+    // finally you can use .backward() to extract the gradients!
+    let gradients: Gradients = e.backward();
+
+    // now you can extract gradients for specific tensors
+    // by querying with them
+    let weight_grad: &[[f32; 2]; 4] = gradients.ref_gradient(&weight);
+    dbg!(weight_grad);
+
+    let a_grad: &[[f32; 4]; 3] = gradients.ref_gradient(&a);
+    dbg!(a_grad);
+}
diff --git a/examples/05-optim.rs b/examples/05-optim.rs
@@ -0,0 +1,60 @@
+//! Intro to dfdx::optim
+
+use rand::prelude::*;
+
+use dfdx::gradients::{Gradients, OwnedTape};
+use dfdx::losses::mse_loss;
+use dfdx::nn::{Linear, Module, ReLU, ResetParams, Tanh};
+use dfdx::optim::{Momentum, Optimizer, Sgd, SgdConfig};
+use dfdx::tensor::{HasArrayData, Tensor2D, TensorCreator};
+
+// first let's declare our neural network to optimze
+type Mlp = (
+    (Linear<5, 32>, ReLU),
+    (Linear<32, 32>, ReLU),
+    (Linear<32, 2>, Tanh),
+);
+
+fn main() {
+    let mut rng = StdRng::seed_from_u64(0);
+
+    // The first step to optimizing is to initialize the optimizer.
+    // Here we construct a stochastic gradient descent optimizer
+    // for our Mlp.
+    let mut sgd: Sgd<Mlp> = Sgd::new(SgdConfig {
+        lr: 1e-1,
+        momentum: Some(Momentum::Nesterov(0.9)),
+    });
+
+    // let's initialize our model and some dummy data
+    let mut mlp: Mlp = Default::default();
+    mlp.reset_params(&mut rng);
+    let x: Tensor2D<3, 5> = TensorCreator::randn(&mut rng);
+    let y: Tensor2D<3, 2> = TensorCreator::randn(&mut rng);
+
+    // first we pass our gradient tracing input through the network
+    let prediction: Tensor2D<3, 2, OwnedTape> = mlp.forward(x.trace());
+
+    // next compute the loss against the target dummy data
+    let loss = mse_loss(prediction, &y);
+    dbg!(loss.data());
+
+    // extract the gradients
+    let gradients: Gradients = loss.backward();
+
+    // the final step is to use our optimizer to update our model
+    // given the gradients we've calculated.
+    // This will modify our model!
+    sgd.update(&mut mlp, gradients)
+        .expect("Oops, there were some unused params");
+
+    // let's do this a couple times to make sure the loss decreases!
+    for i in 0..5 {
+        let prediction = mlp.forward(x.trace());
+        let loss = mse_loss(prediction, &y);
+        println!("Loss after update {i}: {:?}", loss.data());
+        let gradients: Gradients = loss.backward();
+        sgd.update(&mut mlp, gradients)
+            .expect("Oops, there were some unused params");
+    }
+}
diff --git a/examples/mnist_classifier.rs → examples/06-mnist.rs b/examples/mnist_classifier.rs → examples/06-mnist.rs
@@ -1,3 +1,7 @@
+//! This example ties all the previous ones together
+//! to build a neural network that learns to recognize
+//! the MNIST digits.
+
 use dfdx::prelude::*;
 use indicatif::ProgressBar;
 use mnist::*;
@@ -39,16 +43,21 @@ impl MnistDataset {
     }
 }
 
+// our network structure
 type Mlp = (
     (Linear<784, 512>, ReLU),
     (Linear<512, 128>, ReLU),
     (Linear<128, 32>, ReLU),
     Linear<32, 10>,
 );
 
+// training batch size
 const BATCH_SIZE: usize = 32;
 
 fn main() {
+    // ftz substantially improves performance
+    dfdx::flush_denormals_to_zero();
+
     let mnist_path = std::env::args()
         .nth(1)
         .unwrap_or_else(|| "./datasets/MNIST/raw".to_string());
@@ -58,10 +67,12 @@ fn main() {
 
     let mut rng = StdRng::seed_from_u64(0);
 
+    // initialize model and optimizer
     let mut model: Mlp = Default::default();
     model.reset_params(&mut rng);
     let mut opt: Adam<Mlp> = Default::default();
 
+    // initialize dataset
     let dataset = MnistDataset::train(&mnist_path);
     println!("Found {:?} training images", dataset.len());
 
@@ -94,6 +105,7 @@ fn main() {
         );
     }
 
+    // save our model to a .npz file
     model
         .save("mnist-classifier.npz")
         .expect("failed to save model");

diff --git a/examples/custom.rs → examples/07-custom-module.rs b/examples/custom.rs → examples/07-custom-module.rs
@@ -1,5 +1,10 @@
-use dfdx::prelude::*;
-use rand::prelude::{SeedableRng, StdRng};
+//! Demonstrates how to build a custom [nn::Module] without using tuples
+
+use rand::prelude::*;
+
+use dfdx::gradients::{CanUpdateWithGradients, GradientProvider, OwnedTape, Tape, UnusedTensors};
+use dfdx::nn::{Linear, Module, ReLU, ResetParams};
+use dfdx::tensor::{Tensor1D, Tensor2D, TensorCreator};
 
 /// Custom model struct
 /// This case is trivial and should be done with a tuple of linears and relus,
@@ -11,6 +16,7 @@ struct Mlp<const IN: usize, const INNER: usize, const OUT: usize> {
     relu: ReLU,
 }
 
+// ResetParams lets you randomize a model's parameters
 impl<const IN: usize, const INNER: usize, const OUT: usize> ResetParams for Mlp<IN, INNER, OUT> {
     fn reset_params<R: rand::Rng>(&mut self, rng: &mut R) {
         self.l1.reset_params(rng);
@@ -19,6 +25,7 @@ impl<const IN: usize, const INNER: usize, const OUT: usize> ResetParams for Mlp<
     }
 }
 
+// CanUpdateWithGradients lets you update a model's parameters using gradients
 impl<const IN: usize, const INNER: usize, const OUT: usize> CanUpdateWithGradients
     for Mlp<IN, INNER, OUT>
 {
@@ -29,25 +36,29 @@ impl<const IN: usize, const INNER: usize, const OUT: usize> CanUpdateWithGradien
     }
 }
 
-// Impl module for single forward pass
+// impl Module for single item
 impl<const IN: usize, const INNER: usize, const OUT: usize> Module<Tensor1D<IN>>
     for Mlp<IN, INNER, OUT>
 {
     type Output = Tensor1D<OUT>;
 
-    fn forward(&self, input: Tensor1D<IN>) -> Self::Output {
-        self.l2.forward(self.relu.forward(self.l1.forward(input)))
+    fn forward(&self, x: Tensor1D<IN>) -> Self::Output {
+        let x = self.l1.forward(x);
+        let x = self.relu.forward(x);
+        self.l2.forward(x)
     }
 }
 
-// Impl module for batch forward pass
-impl<const BATCH: usize, const IN: usize, const INNER: usize, const OUT: usize, T: Tape>
-    Module<Tensor2D<BATCH, IN, T>> for Mlp<IN, INNER, OUT>
+// impl Module for batch of items
+impl<const BATCH: usize, const IN: usize, const INNER: usize, const OUT: usize, TAPE: Tape>
+    Module<Tensor2D<BATCH, IN, TAPE>> for Mlp<IN, INNER, OUT>
 {
-    type Output = Tensor2D<BATCH, OUT, T>;
+    type Output = Tensor2D<BATCH, OUT, TAPE>;
 
-    fn forward(&self, input: Tensor2D<BATCH, IN, T>) -> Self::Output {
-        self.l2.forward(self.relu.forward(self.l1.forward(input)))
+    fn forward(&self, x: Tensor2D<BATCH, IN, TAPE>) -> Self::Output {
+        let x = self.l1.forward(x);
+        let x = self.relu.forward(x);
+        self.l2.forward(x)
     }
 }
 
@@ -63,9 +74,9 @@ fn main() {
 
     // Forward pass with a single sample
     let sample: Tensor1D<10> = Tensor1D::randn(&mut rng);
-    let _y = model.forward(sample);
+    let _: Tensor1D<10> = model.forward(sample);
 
     // Forward pass with a batch of samples
     let batch: Tensor2D<BATCH_SIZE, 10> = Tensor2D::randn(&mut rng);
-    let _y = model.forward(batch);
+    let _: Tensor2D<BATCH_SIZE, 10, OwnedTape> = model.forward(batch.trace());
 }
diff --git a/examples/08-tensor-broadcast-reduce.rs b/examples/08-tensor-broadcast-reduce.rs
@@ -0,0 +1,38 @@
+//! Demonstrates broadcasting tensors to different sizes, and axis reductions
+//! with BroadcastTo and ReduceTo
+
+use dfdx::arrays::Axis;
+use dfdx::tensor::{tensor, HasArrayData, Tensor1D, Tensor2D, Tensor4D};
+use dfdx::tensor_ops::BroadcastTo;
+
+fn main() {
+    let a: Tensor1D<3> = tensor([1.0, 2.0, 3.0]);
+
+    // to broadcast, use `BroadcastTo::broadcast()` and specify
+    // the output type. the axes that are broadcast are inferred for you!
+    let b: Tensor2D<5, 3> = a.broadcast();
+    assert_eq!(b.data(), &[[1.0, 2.0, 3.0]; 5]);
+
+    // we can really broadcast any axes on either side
+    // here a (5,3) tensor is broacast to (7,5,3,2).
+    // so 7 is added in front, and 2 is added last
+    let c: Tensor4D<7, 5, 3, 2> = b.broadcast();
+    assert_eq!(c.data(), &[[[[1.0; 2], [2.0; 2], [3.0; 2]]; 5]; 7]);
+
+    // the opposite of broadcast is reducing
+    // we've already introduced one reduction which is mean
+    let d: Tensor2D<5, 3> = c.mean();
+    assert_eq!(d.data(), &[[1.0, 2.0, 3.0]; 5]);
+
+    // generally you can just specify the output type
+    // and the reduction & broadcast will work.
+    // sometimes it's ambiguous though
+    let e: Tensor1D<1> = tensor([1.0]);
+
+    // here rust doesn't know if the new axis is the first or second
+    // so we have to explicitly tell it
+    let f: Tensor2D<1, 1> = BroadcastTo::<_, Axis<1>>::broadcast(e);
+
+    // reductions have the same problem when it's ambiguous
+    let _: Tensor1D<1> = f.mean::<_, Axis<0>>();
+}
diff --git a/examples/09-tensor-permute.rs b/examples/09-tensor-permute.rs
@@ -0,0 +1,23 @@
+//! Demonstrates how to re-order (permute/transpose) the axes of a tensor
+
+use dfdx::arrays::Axes3;
+use dfdx::tensor::{Tensor3D, TensorCreator};
+use dfdx::tensor_ops::PermuteTo;
+
+fn main() {
+    let a: Tensor3D<3, 5, 7> = TensorCreator::zeros();
+
+    // permuting is as easy as just expressing the desired type
+    let b: Tensor3D<7, 5, 3> = a.permute();
+
+    // we can do any of the expected combinations!
+    let _: Tensor3D<5, 7, 3> = b.permute();
+
+    // just like broadcast/reduce there are times when
+    // inference is impossible because of ambiguities
+    let c: Tensor3D<1, 1, 1> = TensorCreator::zeros();
+
+    // when axes have the same sizes you'll have to indicate
+    // the axes explicitly to get around this
+    let _: Tensor3D<1, 1, 1> = PermuteTo::<_, Axes3<1, 0, 2>>::permute(c);
+}