SciML
diff --git a/‎docs/Project.toml‎
Lines changed: 16 additions & 0 deletions b/‎docs/Project.toml‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎docs/make.jl‎
Lines changed: 11 additions & 1 deletion b/‎docs/make.jl‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎docs/src/examples/augmented_neural_ode.md‎
Lines changed: 24 additions & 21 deletions b/‎docs/src/examples/augmented_neural_ode.md‎
Lines changed: 24 additions & 21 deletions
diff --git a/‎docs/src/examples/collocation.md‎
Lines changed: 11 additions & 8 deletions b/‎docs/src/examples/collocation.md‎
Lines changed: 11 additions & 8 deletions
diff --git a/‎docs/src/examples/hamiltonian_nn.md‎
Lines changed: 9 additions & 7 deletions b/‎docs/src/examples/hamiltonian_nn.md‎
Lines changed: 9 additions & 7 deletions
@@ -1,5 +1,21 @@
 [deps]
+DiffEqFlux = "aae7a2af-3d4f-5e19-a356-7da93b79d9d0"
+DiffEqSensitivity = "41bf760c-e81c-5289-8e54-58b1f1f8abe2"
+DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
+Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
+Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
+OptimizationFlux = "253f991c-a7b2-45f8-8852-8b9a9df78a86"
+OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
+OptimizationPolyalgorithms = "500b13db-7e66-49ce-bda4-eed966be6282"
+OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [compat]
 Documenter = "0.27"
@@ -1,5 +1,8 @@
 using Documenter, DiffEqFlux
 
+ENV["GKSwstype"] = "100"
+using Plots
+
 include("pages.jl")
 
 makedocs(
@@ -8,7 +11,14 @@ makedocs(
     clean = true,
     doctest = false,
     modules = [DiffEqFlux],
-
+    strict=[
+        :doctest, 
+        :linkcheck, 
+        :parse_error,
+        :example_block,
+        # Other available options are
+        # :autodocs_block, :cross_references, :docs_block, :eval_block, :example_block, :footnote, :meta_block, :missing_docs, :setup_block
+    ],
     format = Documenter.HTML(analytics = "UA-90474609-3",
                              assets = ["assets/favicon.ico"],
                              canonical="https://diffeqflux.sciml.ai/stable/"),
 
@@ -2,7 +2,7 @@
 
 ## Copy-Pasteable Code
 
-```julia
+```@example augneuralode_cp
 using DiffEqFlux, DifferentialEquations
 using Statistics, LinearAlgebra, Plots
 using Flux.Data: DataLoader
@@ -28,7 +28,7 @@ function concentric_sphere(dim, inner_radius_range, outer_radius_range,
     end
     data = cat(data..., dims=2)
     labels = cat(labels..., dims=2)
-    return DataLoader((data |> gpu, labels |> gpu); batchsize=batch_size, shuffle=true,
+    DataLoader((data |> gpu, labels |> gpu); batchsize=batch_size, shuffle=true,
                       partial=false)
 end
 
@@ -43,6 +43,7 @@ function construct_model(out_dim, input_dim, hidden_dim, augment_dim)
                      reltol = 1e-3, abstol = 1e-3, save_start = false) |> gpu
     node = augment_dim == 0 ? node : AugmentedNDELayer(node, augment_dim)
     return Chain((x, p=node.p) -> node(x, p),
+                 Array,
                  diffeqarray_to_array,
                  Dense(input_dim, out_dim) |> gpu), node.p |> gpu
 end
@@ -67,34 +68,34 @@ println("Generating Dataset")
 
 dataloader = concentric_sphere(2, (0.0, 2.0), (3.0, 4.0), 2000, 2000; batch_size = 256)
 
+iter = 0
 cb = function()
-    global iter += 1
+    global iter 
+    iter += 1
     if iter % 10 == 0
         println("Iteration $iter || Loss = $(loss_node(dataloader.data[1], dataloader.data[2]))")
     end
 end
 
 model, parameters = construct_model(1, 2, 64, 0)
 opt = ADAM(0.005)
-iter = 0
 
 println("Training Neural ODE")
 
 for _ in 1:10
-    Flux.train!(loss_node, Flux.params([parameters, model]), dataloader, opt, cb = cb)
+    Flux.train!(loss_node, Flux.params(parameters, model), dataloader, opt, cb = cb)
 end
 
 plt_node = plot_contour(model)
 
 model, parameters = construct_model(1, 2, 64, 1)
 opt = ADAM(0.005)
-iter = 0
 
 println()
 println("Training Augmented Neural ODE")
 
 for _ in 1:10
-    Flux.train!(loss_node, Flux.params([parameters, model]), dataloader, opt, cb = cb)
+    Flux.train!(loss_node, Flux.params(parameters, model), dataloader, opt, cb = cb)
 end
 
 plt_anode = plot_contour(model)
@@ -104,7 +105,7 @@ plt_anode = plot_contour(model)
 
 ## Loading required packages
 
-```julia
+```@example augneuralode
 using DiffEqFlux, DifferentialEquations
 using Statistics, LinearAlgebra, Plots
 using Flux.Data: DataLoader
@@ -118,7 +119,7 @@ circle, and `-1` to any point which lies between the inner and outer circle. Our
 `random_point_in_sphere` samples points uniformly between 2 concentric circles/spheres of radii
 `min_radius` and `max_radius` respectively.
 
-```julia
+```@example augneuralode
 function random_point_in_sphere(dim, min_radius, max_radius)
     distance = (max_radius - min_radius) .* (rand(1) .^ (1.0 / dim)) .+ min_radius
     direction = randn(dim)
@@ -130,7 +131,7 @@ end
 Next, we will construct a dataset of these points and use Flux's DataLoader to automatically minibatch
 and shuffle the data.
 
-```julia
+```@example augneuralode
 function concentric_sphere(dim, inner_radius_range, outer_radius_range,
                            num_samples_inner, num_samples_outer; batch_size = 64)
     data = []
@@ -145,7 +146,7 @@ function concentric_sphere(dim, inner_radius_range, outer_radius_range,
     end
     data = cat(data..., dims=2)
     labels = cat(labels..., dims=2)
-    return DataLoader(data |> gpu, labels |> gpu; batchsize=batch_size, shuffle=true,
+    return DataLoader((data |> gpu, labels |> gpu); batchsize=batch_size, shuffle=true,
                       partial=false)
 end
 ```
@@ -162,7 +163,7 @@ and construct that layer accordingly.
 In order to run the models on GPU, we need to manually transfer the models to GPU. First one is the network
 predicting the derivatives inside the Neural ODE and the other one is the last layer in the Chain.
 
-```julia
+```@example augneuralode
 diffeqarray_to_array(x) = reshape(gpu(x), size(x)[1:2])
 
 function construct_model(out_dim, input_dim, hidden_dim, augment_dim)
@@ -174,6 +175,7 @@ function construct_model(out_dim, input_dim, hidden_dim, augment_dim)
                      reltol = 1e-3, abstol = 1e-3, save_start = false) |> gpu
     node = augment_dim == 0 ? node : (AugmentedNDELayer(node, augment_dim) |> gpu)
     return Chain((x, p=node.p) -> node(x, p),
+                 Array,
                  diffeqarray_to_array,
                  Dense(input_dim, out_dim) |> gpu), node.p |> gpu
 end
@@ -183,7 +185,7 @@ end
 
 Here, we define an utility to plot our model regression results as a heatmap.
 
-```julia
+```@example augneuralode
 function plot_contour(model, npoints = 300)
     grid_points = zeros(2, npoints ^ 2)
     idx = 1
@@ -206,7 +208,7 @@ end
 We use the L2 distance between the model prediction `model(x)` and the actual prediction `y` as the
 optimization objective.
 
-```julia
+```@example augneuralode
 loss_node(x, y) = mean((model(x) .- y) .^ 2)
 ```
 
@@ -215,15 +217,16 @@ loss_node(x, y) = mean((model(x) .- y) .^ 2)
 Next, we generate the dataset. We restrict ourselves to 2 dimensions as it is easy to visualize.
 We sample a total of `4000` data points.
 
-```julia
+```@example augneuralode
 dataloader = concentric_sphere(2, (0.0, 2.0), (3.0, 4.0), 2000, 2000; batch_size = 256)
 ```
 
 ### Callback Function
 
 Additionally we define a callback function which displays the total loss at specific intervals.
 
-```julia
+```@example augneuralode
+iter = 0
 cb = function()
     global iter += 1
     if iter % 10 == 1
@@ -236,7 +239,7 @@ end
 
 We use ADAM as the optimizer with a learning rate of 0.005
 
-```julia
+```@example augneuralode
 opt = ADAM(0.005)
 ```
 
@@ -246,11 +249,11 @@ To train our neural ode model, we need to pass the appropriate learnable paramet
 returned by the `construct_models` function. It is simply the `node.p` vector. We then train our model
 for `20` epochs.
 
-```julia
+```@example augneuralode
 model, parameters = construct_model(1, 2, 64, 0)
 
 for _ in 1:10
-    Flux.train!(loss_node, Flux.params([model, parameters]), dataloader, opt, cb = cb)
+    Flux.train!(loss_node, Flux.params(model, parameters), dataloader, opt, cb = cb)
 end
 ```
 
@@ -265,11 +268,11 @@ Our training configuration will be same as that of Neural ODE. Only in this case
 input with a single zero. This makes the problem 3 dimensional and as such it is possible to find
 a function which can be expressed by the neural ode. For more details and proofs please refer to [1].
 
-```julia
+```@example augneuralode
 model, parameters = construct_model(1, 2, 64, 1)
 
 for _ in 1:10
-    Flux.train!(loss_node, Flux.params([model, parameters]), dataloader, opt, cb = cb)
+    Flux.train!(loss_node, Flux.params(model, parameters), dataloader, opt, cb = cb)
 end
 ```
 
 
@@ -4,7 +4,7 @@ One can avoid a lot of the computational cost of the ODE solver by
 pretraining the neural network against a smoothed collocation of the
 data. First the example and then an explanation.
 
-```julia
+```@example collocation_cp
 using Lux, DiffEqFlux, OrdinaryDiffEq, DiffEqSensitivity, Optimization, OptimizationFlux, Plots
 
 using Random
@@ -79,7 +79,7 @@ optprob = Optimization.OptimizationProblem(optf, Lux.ComponentArray(pinit))
 
 numerical_neuralode = Optimization.solve(optprob,
                                        ADAM(0.05),
-                                       cb = callback,
+                                       callback = callback,
                                        maxiters = 300)
 
 nn_sol, st = prob_neuralode(u0, numerical_neuralode.u, st)
@@ -93,8 +93,11 @@ savefig("post_trained.png")
 The smoothed collocation is a spline fit of the datapoints which allows
 us to get a an estimate of the approximate noiseless dynamics:
 
-```julia
-using Flux, DiffEqFlux, Optimization, OptimizationFlux, DifferentialEquations, Plots
+```@example collocation
+using Lux, DiffEqFlux, Optimization, OptimizationFlux, DifferentialEquations, Plots
+
+using Random
+rng = Random.default_rng()
 
 u0 = Float32[2.0; 0.0]
 datasize = 300
@@ -120,7 +123,7 @@ plot!(tsteps,u',lw=5)
 We can then differentiate the smoothed function to get estimates of the
 derivative at each datapoint:
 
-```julia
+```@example collocation
 plot(tsteps,du')
 ```
 
@@ -130,7 +133,7 @@ Because we have `(u',u)` pairs, we can write a loss function that
 calculates the squared difference between `f(u,p,t)` and `u'` at each
 point, and find the parameters which minimize this difference:
 
-```julia
+```@example collocation
 dudt2 = Lux.Chain(ActivationFunction(x -> x.^3),
                   Lux.Dense(2, 50, tanh),
                   Lux.Dense(50, 2))
@@ -170,7 +173,7 @@ full solution all throughout the timeseries, but it does have a drift.
 We can continue to optimize like this, or we can use this as the
 initial condition to the next phase of our fitting:
 
-```julia
+```@example collocation
 function predict_neuralode(p)
   Array(prob_neuralode(u0, p, st)[1])
 end
@@ -187,7 +190,7 @@ optprob = Optimization.OptimizationProblem(optf, Lux.ComponentArray(pinit))
 
 numerical_neuralode = Optimization.solve(optprob,
                                        ADAM(0.05),
-                                       cb = callback,
+                                       callback = callback,
                                        maxiters = 300)
 
 nn_sol, st = prob_neuralode(u0, numerical_neuralode.u, st)
 
@@ -8,8 +8,8 @@ m\ddot(x) + kx = 0
 
 Now we make some simplifying assumptions, and assign ``m = 1`` and ``k = 1``. Analytically solving this equation, we get ``x = sin(t)``. Hence, ``q = sin(t)``, and ``p = cos(t)``. Using these solutions we generate our dataset and fit the `NeuralHamiltonianDE` to learn the dynamics of this system.
 
-```julia
-using DiffEqFlux, DifferentialEquations, Statistics, Plots
+```@example hamiltonian_cp
+using Flux, DiffEqFlux, DifferentialEquations, Statistics, Plots, ReverseDiff
 
 t = range(0.0f0, 1.0f0, length = 1024)
 π_32 = Float32(π)
@@ -20,7 +20,7 @@ dpdt = -2π_32 .* q_t
 
 data = cat(q_t, p_t, dims = 1)
 target = cat(dqdt, dpdt, dims = 1)
-dataloader = Flux.Data.DataLoader(data, target; batchsize=256, shuffle=true)
+dataloader = Flux.Data.DataLoader((data, target); batchsize=256, shuffle=true)
 
 hnn = HamiltonianNN(
     Chain(Dense(2, 64, relu), Dense(64, 1))
@@ -65,7 +65,9 @@ ylabel!("Momentum (p)")
 
 The HNN predicts the gradients ``(\dot(q), \dot(p))`` given ``(q, p)``. Hence, we generate the pairs ``(q, p)`` using the equations given at the top. Additionally to supervise the training we also generate the gradients. Next we use use Flux DataLoader for automatically batching our dataset.
 
-```julia
+```@example hamiltonian
+using Flux, DiffEqFlux, DifferentialEquations, Statistics, Plots, ReverseDiff
+
 t = range(0.0f0, 1.0f0, length = 1024)
 π_32 = Float32(π)
 q_t = reshape(sin.(2π_32 * t), 1, :)
@@ -75,14 +77,14 @@ dpdt = -2π_32 .* q_t
 
 data = cat(q_t, p_t, dims = 1)
 target = cat(dqdt, dpdt, dims = 1)
-dataloader = Flux.Data.DataLoader(data, target; batchsize=256, shuffle=true)
+dataloader = Flux.Data.DataLoader((data, target); batchsize=256, shuffle=true)
 ```
 
 ### Training the HamiltonianNN
 
 We parameterize the HamiltonianNN with a small MultiLayered Perceptron (HNN also works with the Fast* Layers provided in DiffEqFlux). HNNs are trained by optimizing the gradients of the Neural Network. Zygote currently doesn't support nesting itself, so we will be using ReverseDiff in the training loop to compute the gradients of the HNN Layer for Optimization.
 
-```julia
+```@example hamiltonian
 hnn = HamiltonianNN(
     Chain(Dense(2, 64, relu), Dense(64, 1))
 )
@@ -112,7 +114,7 @@ callback()
 
 In order to visualize the learned trajectories, we need to solve the ODE. We will use the `NeuralHamiltonianDE` layer which is essentially a wrapper over `HamiltonianNN` layer and solves the ODE.
 
-```julia
+```@example hamiltonian
 model = NeuralHamiltonianDE(
     hnn, (0.0f0, 1.0f0),
     Tsit5(), save_everystep = false,