From 57086c0c4d6df06a54e4825b44250334d0eb531a Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 12:14:21 +0000 Subject: [PATCH 01/20] Update to v0.42 --- Manifest.toml | 130 +++++++++++++++++++++++--------------------------- Project.toml | 2 +- 2 files changed, 61 insertions(+), 71 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index df26f9c06..ffe0e5598 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -2,12 +2,12 @@ julia_version = "1.11.7" manifest_format = "2.0" -project_hash = "13af05c5c830ccbd16d00856d8cce62a9368ce6a" +project_hash = "2294ed99ca36fd3146a910892cadcb05f17b53a2" [[deps.ADTypes]] -git-tree-sha1 = "8be2ae325471fc20b11c27bb34b518541d07dd3a" +git-tree-sha1 = "8b2b045b22740e4be20654175cc38291d48539db" uuid = "47edcb42-4c32-4615-8424-f2b9edc5f35b" -version = "1.19.0" +version = "1.20.0" weakdeps = ["ChainRulesCore", "ConstructionBase", "EnzymeCore"] [deps.ADTypes.extensions] @@ -55,9 +55,9 @@ version = "0.4.5" [[deps.Accessors]] deps = ["CompositionsBase", "ConstructionBase", "Dates", "InverseFunctions", "MacroTools"] -git-tree-sha1 = "3b86719127f50670efe356bc11073d84b4ed7a5d" +git-tree-sha1 = "856ecd7cebb68e5fc87abecd2326ad59f0f911f3" uuid = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" -version = "0.1.42" +version = "0.1.43" [deps.Accessors.extensions] AxisKeysExt = "AxisKeys" @@ -131,14 +131,17 @@ weakdeps = ["Libtask"] AdvancedPSLibtaskExt = "Libtask" [[deps.AdvancedVI]] -deps = ["ADTypes", "Accessors", "DiffResults", "DifferentiationInterface", "Distributions", "DocStringExtensions", "FillArrays", "Functors", "LinearAlgebra", "LogDensityProblems", "Optimisers", "ProgressMeter", "Random", "StatsBase"] -git-tree-sha1 = "59c9723a71ed815eafec430d4cafa592b5889b96" +deps = ["ADTypes", "Accessors", "ChainRulesCore", "DiffResults", "DifferentiationInterface", "Distributions", "DocStringExtensions", "FillArrays", "Functors", "LinearAlgebra", "LogDensityProblems", "Optimisers", "ProgressMeter", "Random", "StatsBase"] +git-tree-sha1 = "9196795a7474c48ec7d73056a52a786e61dac062" uuid = "b5ca4192-6429-45e5-a2d9-87aec30a685c" -version = "0.4.1" -weakdeps = ["Bijectors"] +version = "0.6.0" +weakdeps = ["Bijectors", "Enzyme", "Mooncake", "ReverseDiff"] [deps.AdvancedVI.extensions] - AdvancedVIBijectorsExt = "Bijectors" + AdvancedVIBijectorsExt = ["Bijectors", "Optimisers"] + AdvancedVIEnzymeExt = ["Enzyme", "ChainRulesCore"] + AdvancedVIMooncakeExt = ["Mooncake", "ChainRulesCore"] + AdvancedVIReverseDiffExt = ["ReverseDiff", "ChainRulesCore"] [[deps.AliasTables]] deps = ["PtrArrays", "Random"] @@ -215,9 +218,9 @@ version = "7.22.0" [[deps.ArrayLayouts]] deps = ["FillArrays", "LinearAlgebra", "StaticArrays"] -git-tree-sha1 = "122a06c8266e00035bfa572887ab52c344526eb4" +git-tree-sha1 = "e0b47732a192dd59b9d079a06d04235e2f833963" uuid = "4c555306-a7a7-4459-81d9-ec55ddd5c99a" -version = "1.12.1" +version = "1.12.2" weakdeps = ["SparseArrays"] [deps.ArrayLayouts.extensions] @@ -304,9 +307,9 @@ version = "0.1.1" [[deps.Bijectors]] deps = ["ArgCheck", "ChainRulesCore", "ChangesOfVariables", "Distributions", "DocStringExtensions", "Functors", "InverseFunctions", "IrrationalConstants", "LinearAlgebra", "LogExpFunctions", "MappedArrays", "Random", "Reexport", "Roots", "SparseArrays", "Statistics"] -git-tree-sha1 = "642af9e4f33cfe6930088418da4c2b0d2b9de169" +git-tree-sha1 = "bbdaafb28e31f2948916670a8eb7fd94ec12406f" uuid = "76274a88-744f-5084-9051-94815aaf08c4" -version = "0.15.12" +version = "0.15.14" weakdeps = ["ChainRules", "DistributionsAD", "EnzymeCore", "ForwardDiff", "LazyArrays", "Mooncake", "ReverseDiff"] [deps.Bijectors.extensions] @@ -863,9 +866,9 @@ version = "3.5.1" [[deps.DynamicPPL]] deps = ["ADTypes", "AbstractMCMC", "AbstractPPL", "Accessors", "BangBang", "Bijectors", "Chairmarks", "Compat", "ConstructionBase", "DifferentiationInterface", "Distributions", "DocStringExtensions", "InteractiveUtils", "LinearAlgebra", "LogDensityProblems", "MacroTools", "OrderedCollections", "Printf", "Random", "Statistics", "Test"] -git-tree-sha1 = "91f24ed9e834a6c98793352cec683242870b6d78" +git-tree-sha1 = "b87bc878118011066248821177ba26b62a999248" uuid = "366bfd00-2699-11ea-058f-f148b4cae6d8" -version = "0.38.9" +version = "0.39.1" [deps.DynamicPPL.extensions] DynamicPPLChainRulesCoreExt = ["ChainRulesCore"] @@ -1197,9 +1200,9 @@ version = "0.2.0" [[deps.GPUCompiler]] deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "PrecompileTools", "Preferences", "Scratch", "Serialization", "TOML", "Tracy", "UUIDs"] -git-tree-sha1 = "90554fe518adab1b4c8f7a04d26c414482a240ca" +git-tree-sha1 = "6e5a25bc455da8e8d88b6b7377e341e9af1929f0" uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" -version = "1.7.4" +version = "1.7.5" [[deps.GR]] deps = ["Artifacts", "Base64", "DelimitedFiles", "Downloads", "GR_jll", "HTTP", "JSON", "Libdl", "LinearAlgebra", "Preferences", "Printf", "Qt6Wayland_jll", "Random", "Serialization", "Sockets", "TOML", "Tar", "Test", "p7zip_jll"] @@ -1239,9 +1242,9 @@ version = "9.55.1+0" [[deps.Glib_jll]] deps = ["Artifacts", "GettextRuntime_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Libiconv_jll", "Libmount_jll", "PCRE2_jll", "Zlib_jll"] -git-tree-sha1 = "50c11ffab2a3d50192a228c313f05b5b5dc5acb2" +git-tree-sha1 = "6b4d2dc81736fe3980ff0e8879a9fc7c33c44ddf" uuid = "7746bdde-850d-59dc-9ae8-88ece973131d" -version = "2.86.0+0" +version = "2.86.2+0" [[deps.Graphite2_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] @@ -1548,9 +1551,9 @@ version = "0.1.17" [[deps.LazyArrays]] deps = ["ArrayLayouts", "FillArrays", "LinearAlgebra", "MacroTools", "SparseArrays"] -git-tree-sha1 = "0f68899e54e5e98cff674bbe6380bcf89f603787" +git-tree-sha1 = "70ebe3bcf87d6a1e7435ef5182c13a91161ba9b8" uuid = "5078a376-72f3-5289-bfd5-ec5146d43c02" -version = "2.9.1" +version = "2.9.4" [deps.LazyArrays.extensions] LazyArraysBandedMatricesExt = "BandedMatrices" @@ -1681,10 +1684,10 @@ weakdeps = ["LineSearches"] LineSearchLineSearchesExt = "LineSearches" [[deps.LineSearches]] -deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf"] -git-tree-sha1 = "a8b1215fb05581a1f9e403bec46a1333e7eb1ffb" +deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Printf"] +git-tree-sha1 = "9ea3422d03222c6de679934d1c08f0a99405aa03" uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" -version = "7.4.1" +version = "7.5.1" [[deps.LinearAlgebra]] deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] @@ -1693,9 +1696,9 @@ version = "1.11.0" [[deps.LinearSolve]] deps = ["ArrayInterface", "ChainRulesCore", "ConcreteStructs", "DocStringExtensions", "EnumX", "GPUArraysCore", "InteractiveUtils", "Krylov", "LazyArrays", "Libdl", "LinearAlgebra", "MKL_jll", "Markdown", "OpenBLAS_jll", "PrecompileTools", "Preferences", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLLogging", "SciMLOperators", "Setfield", "StaticArraysCore", "UnPack"] -git-tree-sha1 = "e00dff84aded96c3ec03cfe46ff8d13e0c5afc44" +git-tree-sha1 = "006238a52d94ee8618c9f905fbabb177d37e4db0" uuid = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" -version = "3.47.0" +version = "3.48.1" [deps.LinearSolve.extensions] LinearSolveAMDGPUExt = "AMDGPU" @@ -2098,9 +2101,9 @@ version = "0.8.1" [[deps.Mooncake]] deps = ["ADTypes", "ChainRules", "ChainRulesCore", "DispatchDoctor", "ExprTools", "Graphs", "LinearAlgebra", "MistyClosures", "Random", "Test"] -git-tree-sha1 = "f44ed967a443b8d4235d548264f0b19c411adb17" +git-tree-sha1 = "0127d6b56adef8977212b5225628119fa5a65ffe" uuid = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" -version = "0.4.180" +version = "0.4.182" [deps.Mooncake.extensions] MooncakeAllocCheckExt = "AllocCheck" @@ -2174,12 +2177,6 @@ git-tree-sha1 = "b0154a615d5b2b6cf7a2501123b793577d0b9950" uuid = "079eb43e-fd8e-5478-9966-2cf3e3edb778" version = "2.10.0+0" -[[deps.NLsolve]] -deps = ["Distances", "LineSearches", "LinearAlgebra", "NLSolversBase", "Printf", "Reexport"] -git-tree-sha1 = "019f12e9a1a7880459d0173c182e6a99365d7ac1" -uuid = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" -version = "4.5.1" - [[deps.NNlib]] deps = ["Adapt", "Atomix", "ChainRulesCore", "GPUArraysCore", "KernelAbstractions", "LinearAlgebra", "Random", "ScopedValues", "Statistics"] git-tree-sha1 = "eb6eb10b675236cee09a81da369f94f16d77dc2f" @@ -2272,9 +2269,9 @@ version = "4.12.0" [[deps.NonlinearSolveBase]] deps = ["ADTypes", "Adapt", "ArrayInterface", "CommonSolve", "Compat", "ConcreteStructs", "DifferentiationInterface", "EnzymeCore", "FastClosures", "LinearAlgebra", "Markdown", "MaybeInplace", "Preferences", "Printf", "RecursiveArrayTools", "SciMLBase", "SciMLJacobianOperators", "SciMLLogging", "SciMLOperators", "SciMLStructures", "Setfield", "StaticArraysCore", "SymbolicIndexingInterface", "TimerOutputs"] -git-tree-sha1 = "a72dc6e5bba0fd9bb3bd9cc4abade8552d9fc982" +git-tree-sha1 = "3f963293e13d5eff19214b068f0cc0a28da8ea27" uuid = "be0214bd-f91f-a760-ac4e-3421ce2b2da0" -version = "2.4.0" +version = "2.5.0" weakdeps = ["BandedMatrices", "ChainRulesCore", "Enzyme", "ForwardDiff", "LineSearch", "LinearSolve", "Mooncake", "ReverseDiff", "SparseArrays", "SparseMatrixColorings", "Tracker"] [deps.NonlinearSolveBase.extensions] @@ -2360,9 +2357,9 @@ version = "0.8.5+0" [[deps.OpenSSL]] deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "NetworkOptions", "OpenSSL_jll", "Sockets"] -git-tree-sha1 = "386b47442468acfb1add94bf2d85365dea10cbab" +git-tree-sha1 = "1d1aaa7d449b58415f97d2839c318b70ffb525a0" uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c" -version = "1.6.0" +version = "1.6.1" [[deps.OpenSSL_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] @@ -2378,9 +2375,9 @@ version = "0.5.6+0" [[deps.Optim]] deps = ["Compat", "EnumX", "FillArrays", "ForwardDiff", "LineSearches", "LinearAlgebra", "NLSolversBase", "NaNMath", "PositiveFactorizations", "Printf", "SparseArrays", "StatsBase"] -git-tree-sha1 = "61942645c38dd2b5b78e2082c9b51ab315315d10" +git-tree-sha1 = "48968edaf014f67e58fe4c8a4ce72d392aed3294" uuid = "429524aa-4258-5aef-a3af-852621145aeb" -version = "1.13.2" +version = "1.13.3" [deps.Optim.extensions] OptimMOIExt = "MathOptInterface" @@ -2406,9 +2403,9 @@ version = "0.4.6" [[deps.Optimization]] deps = ["ADTypes", "ArrayInterface", "ConsoleProgressMonitor", "DocStringExtensions", "LinearAlgebra", "Logging", "LoggingExtras", "OptimizationBase", "Printf", "Reexport", "SciMLBase", "SparseArrays", "TerminalLoggers"] -git-tree-sha1 = "b0afd00640ed7a122dfdd6f7c3e676079ce75dc0" +git-tree-sha1 = "9c8a9bc453fa1b5a5913ffa77d4b9b8b586f1de8" uuid = "7f7a1694-90dd-40f0-9382-eb1efda571ba" -version = "5.1.0" +version = "5.2.0" [[deps.OptimizationBase]] deps = ["ADTypes", "ArrayInterface", "DifferentiationInterface", "DocStringExtensions", "FastClosures", "LinearAlgebra", "PDMats", "Reexport", "SciMLBase", "SparseArrays", "SparseConnectivityTracer", "SparseMatrixColorings"] @@ -2679,12 +2676,6 @@ git-tree-sha1 = "0662b083e11420952f2e62e17eddae7fc07d5997" uuid = "36c8627f-9965-5494-a995-c6b170f724f3" version = "1.57.0+0" -[[deps.Parameters]] -deps = ["OrderedCollections", "UnPack"] -git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe" -uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a" -version = "0.12.3" - [[deps.Parsers]] deps = ["Dates", "PrecompileTools", "UUIDs"] git-tree-sha1 = "7d2f8f21da5db6a806faf7b9b292296da42b2810" @@ -2720,9 +2711,9 @@ version = "1.4.4" [[deps.Plots]] deps = ["Base64", "Contour", "Dates", "Downloads", "FFMPEG", "FixedPointNumbers", "GR", "JLFzf", "JSON", "LaTeXStrings", "Latexify", "LinearAlgebra", "Measures", "NaNMath", "Pkg", "PlotThemes", "PlotUtils", "PrecompileTools", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "RelocatableFolders", "Requires", "Scratch", "Showoff", "SparseArrays", "Statistics", "StatsBase", "TOML", "UUIDs", "UnicodeFun", "Unzip"] -git-tree-sha1 = "12ce661880f8e309569074a61d3767e5756a199f" +git-tree-sha1 = "7b990898534ea9797bf9bf21bd086850e5d9f817" uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" -version = "1.41.1" +version = "1.41.2" [deps.Plots.extensions] FileIOExt = "FileIO" @@ -3044,9 +3035,9 @@ version = "0.5.2" [[deps.SciMLBase]] deps = ["ADTypes", "Accessors", "Adapt", "ArrayInterface", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "EnumX", "FunctionWrappersWrappers", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "Markdown", "Moshi", "PreallocationTools", "PrecompileTools", "Preferences", "Printf", "RecipesBase", "RecursiveArrayTools", "Reexport", "RuntimeGeneratedFunctions", "SciMLLogging", "SciMLOperators", "SciMLPublic", "SciMLStructures", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface"] -git-tree-sha1 = "dc93eb05a8101a58c844e0e20a47f8a92be33048" +git-tree-sha1 = "b06382e2f1ff0c9aff4f88f560673c800df6099f" uuid = "0bca4576-84f4-4d90-8ffe-ffa030f20462" -version = "2.127.0" +version = "2.128.0" [deps.SciMLBase.extensions] SciMLBaseChainRulesCoreExt = "ChainRulesCore" @@ -3095,15 +3086,15 @@ version = "0.1.11" [[deps.SciMLLogging]] deps = ["Logging", "LoggingExtras", "Preferences"] -git-tree-sha1 = "70d5b2fc50fde8d868f906b54045eb12b490e867" +git-tree-sha1 = "1713f77f37f6809cd5b051197ea9adf139101ae5" uuid = "a6db7da4-7206-11f0-1eab-35f2a5dbe1d1" -version = "1.5.0" +version = "1.6.0" [[deps.SciMLOperators]] deps = ["Accessors", "ArrayInterface", "DocStringExtensions", "LinearAlgebra", "MacroTools"] -git-tree-sha1 = "a1e12aee1eb7e6f957e8483eeebf9a98f3e135d6" +git-tree-sha1 = "9deb07bb4e6f95712bda5617a717caed701fbe4b" uuid = "c0aeaf25-5076-4817-a8d5-81caf7dfa961" -version = "1.13.0" +version = "1.14.0" weakdeps = ["SparseArrays", "StaticArraysCore"] [deps.SciMLOperators.extensions] @@ -3340,9 +3331,9 @@ weakdeps = ["SparseArrays"] [[deps.StatsAPI]] deps = ["LinearAlgebra"] -git-tree-sha1 = "9d72a13a3f4dd3795a195ac5a44d7d6ff5f552ff" +git-tree-sha1 = "178ed29fd5b2a2cfc3bd31c13375ae925623ff36" uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" -version = "1.7.1" +version = "1.8.0" [[deps.StatsBase]] deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"] @@ -3380,10 +3371,10 @@ uuid = "9672c7b4-1e72-59bd-8a11-6ac3964bc41f" version = "2.8.0" [[deps.StochasticDiffEq]] -deps = ["ADTypes", "Adapt", "ArrayInterface", "DataStructures", "DiffEqBase", "DiffEqNoiseProcess", "DocStringExtensions", "FastPower", "FiniteDiff", "ForwardDiff", "JumpProcesses", "LevyArea", "LinearAlgebra", "Logging", "MuladdMacro", "NLsolve", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqNonlinearSolve", "Random", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLOperators", "SparseArrays", "StaticArrays", "UnPack"] -git-tree-sha1 = "a7d5d87185450b61a95000547c85401ffd8e6e42" +deps = ["ADTypes", "Adapt", "ArrayInterface", "DataStructures", "DiffEqBase", "DiffEqNoiseProcess", "DocStringExtensions", "FastPower", "FiniteDiff", "ForwardDiff", "JumpProcesses", "LevyArea", "LinearAlgebra", "Logging", "MuladdMacro", "OrdinaryDiffEqCore", "OrdinaryDiffEqDifferentiation", "OrdinaryDiffEqNonlinearSolve", "Random", "RecursiveArrayTools", "Reexport", "SciMLBase", "SciMLOperators", "SimpleNonlinearSolve", "SparseArrays", "StaticArrays", "UnPack"] +git-tree-sha1 = "a4f362cce06506d1be3a6cdd50b4828c8a0d3a7b" uuid = "789caeaf-c7a9-5a7d-9973-96adeb23e2a0" -version = "6.84.0" +version = "6.85.0" [[deps.StrideArraysCore]] deps = ["ArrayInterface", "CloseOpenIntervals", "IfElse", "LayoutPointers", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface", "ThreadingUtilities"] @@ -3556,9 +3547,9 @@ version = "0.5.5" [[deps.TimeZones]] deps = ["Artifacts", "Dates", "Downloads", "InlineStrings", "Mocking", "Printf", "Scratch", "TZJData", "Unicode", "p7zip_jll"] -git-tree-sha1 = "06f4f1f3e8ff09e42e59b043a747332e88e01aba" +git-tree-sha1 = "d422301b2a1e294e3e4214061e44f338cafe18a2" uuid = "f269a46b-ccf7-5d73-abea-4c690281aa53" -version = "1.22.1" +version = "1.22.2" weakdeps = ["RecipesBase"] [deps.TimeZones.extensions] @@ -3644,14 +3635,13 @@ version = "1.6.0" [[deps.Turing]] deps = ["ADTypes", "AbstractMCMC", "AbstractPPL", "Accessors", "AdvancedHMC", "AdvancedMH", "AdvancedPS", "AdvancedVI", "BangBang", "Bijectors", "Compat", "DataStructures", "Distributions", "DistributionsAD", "DocStringExtensions", "DynamicPPL", "EllipticalSliceSampling", "ForwardDiff", "Libtask", "LinearAlgebra", "LogDensityProblems", "MCMCChains", "NamedArrays", "Optimization", "OptimizationOptimJL", "OrderedCollections", "Printf", "Random", "Reexport", "SciMLBase", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"] -git-tree-sha1 = "766f66b4a785a42c89df27a8aac30e99d0fe2f94" +git-tree-sha1 = "69b42b532b4a92c6093e46be2b1a9d62a155489d" uuid = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" -version = "0.41.4" -weakdeps = ["DynamicHMC", "Optim"] +version = "0.42.0" +weakdeps = ["DynamicHMC"] [deps.Turing.extensions] TuringDynamicHMCExt = "DynamicHMC" - TuringOptimExt = ["Optim", "AbstractPPL"] [[deps.URIs]] git-tree-sha1 = "bef26fb046d031353ef97a82e3fdb6afe7f21b1a" @@ -4027,6 +4017,6 @@ version = "4.1.0+0" [[deps.xkbcommon_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libxcb_jll", "Xorg_xkeyboard_config_jll"] -git-tree-sha1 = "fbf139bce07a534df0e699dbb5f5cc9346f95cc1" +git-tree-sha1 = "a1fc6507a40bf504527d0d4067d718f8e179b2b8" uuid = "d8fb68d0-12a3-5cfd-a85a-d49703b185fd" -version = "1.9.2+0" +version = "1.13.0+0" diff --git a/Project.toml b/Project.toml index 6ea13f35e..b13d7e2f2 100644 --- a/Project.toml +++ b/Project.toml @@ -51,4 +51,4 @@ StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" [compat] -Turing = "0.41" +Turing = "0.42" From 9ecf0c3ee731b39d3c8294dcfce7da05de3226e8 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 12:14:27 +0000 Subject: [PATCH 02/20] Update command to reset Quarto Julia process in README --- README.md | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index b0a29d4e9..7bcd4d958 100644 --- a/README.md +++ b/README.md @@ -95,19 +95,15 @@ If you wish to speed up local rendering, there are two options available: ## Troubleshooting build issues -As described in the [Quarto docs](https://quarto.org/docs/computations/julia.html#using-the-julia-engine), Quarto's Julia engine uses a worker process behind the scenes. +Quarto's Julia engine uses a separate worker process behind the scenes. Sometimes this can result in issues with old package code not being unloaded (e.g. when package versions are upgraded). -If you find that Quarto's execution is failing with errors that aren't reproducible via a normal REPL, try adding the `--execute-daemon-restart` flag to the `quarto render` command: +If you find that Quarto's execution is failing with errors that aren't reproducible via a normal REPL, try running: ```bash -quarto render /path/to/index.qmd --execute-daemon-restart +quarto call engine julia kill ``` -And also, kill any stray Quarto processes that are still running (sometimes it keeps running in the background): - -```bash -pkill -9 -f quarto -``` +before rerunning the build (see [the Quarto docs](https://quarto.org/docs/computations/julia.html#quarto-call-engine-julia-commands) for more information). ## Licence From 0848518fe385c8de4dc29fa71e4105ff75a411d2 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 12:19:28 +0000 Subject: [PATCH 03/20] Update threadsafe docs for v0.42 --- usage/threadsafe-evaluation/index.qmd | 157 ++++++++++++++++++-------- 1 file changed, 110 insertions(+), 47 deletions(-) diff --git a/usage/threadsafe-evaluation/index.qmd b/usage/threadsafe-evaluation/index.qmd index cd28dd727..d02a14aff 100755 --- a/usage/threadsafe-evaluation/index.qmd +++ b/usage/threadsafe-evaluation/index.qmd @@ -6,6 +6,13 @@ julia: - "--threads=4" --- +```{julia} +#| echo: false +#| output: false +using Pkg; +Pkg.instantiate(); +``` + A common technique to speed up Julia code is to use multiple threads to run computations in parallel. The Julia manual [has a section on multithreading](https://docs.julialang.org/en/v1/manual/multi-threading), which is a good introduction to the topic. @@ -17,17 +24,10 @@ Please note that this is a rapidly-moving topic, and things may change in future If you are ever unsure about what works and doesn't, please don't hesitate to ask on [Slack](https://julialang.slack.com/archives/CCYDC34A0) or [Discourse](https://discourse.julialang.org/c/domain/probprog/48) ::: -## MCMC sampling - -For complete clarity, this page has nothing to do with parallel sampling of MCMC chains using - -```julia -sample(model, sampler, MCMCThreads(), N, nchains) +```{julia} +println("This notebook is being run with $(Threads.nthreads()) threads.") ``` -That parallelisation exists outside of the model evaluation, and thus is independent of the model contents. -This page only discusses threading _inside_ Turing models. - ## Threading in Turing models Given that Turing models mostly contain 'plain' Julia code, one might expect that all threading constructs such as `Threads.@threads` or `Threads.@spawn` can be used inside Turing models. @@ -35,8 +35,10 @@ Given that Turing models mostly contain 'plain' Julia code, one might expect tha This is, to some extent, true: for example, you can use threading constructs to speed up deterministic computations. For example, here we use parallelism to speed up a transformation of `x`: -```julia -@model function f(y) +```{julia} +using Turing + +@model function parallel(y) x ~ dist x_transformed = similar(x) Threads.@threads for i in eachindex(x) @@ -48,8 +50,11 @@ end In general, for code that does not involve tilde-statements (`x ~ dist`), threading works exactly as it does in regular Julia code. -**However, extra care must be taken when using tilde-statements (`x ~ dist`) inside threaded blocks.** -The reason for this is because tilde-statements modify the internal VarInfo object used for model evaluation. +**However, extra care must be taken when using tilde-statements (`x ~ dist`), or `@addlogprob!`, inside threaded blocks.** + +::: {.callout-note} +## Why are tilde-statements special? +Tilde-statements are expanded by the `@model` macro into something that modifies the internal VarInfo object used for model evaluation. Essentially, `x ~ dist` expands to something like ```julia @@ -58,16 +63,17 @@ x, __varinfo__ = DynamicPPL.tilde_assume!!(..., __varinfo__) and writing into `__varinfo__` is, _in general_, not threadsafe. Thus, parallelising tilde-statements can lead to data races [as described in the Julia manual](https://docs.julialang.org/en/v1/manual/multi-threading/#Using-@threads-without-data-races). +::: + +## Threaded observations -## Threaded tilde-statements +**As of version 0.42, Turing only supports the use of tilde-statements inside threaded blocks when these are observations (i.e., likelihood terms).** -**As of version 0.41, Turing only supports the use of tilde-statements inside threaded blocks when these are observations (i.e., likelihood terms).** +However, such models **must** be marked by the user as requiring threadsafe evaluation, using `setthreadsafe`. This means that the following code is safe to use: ```{julia} -using Turing - @model function threaded_obs(N) x ~ Normal() y = Vector{Float64}(undef, N) @@ -78,13 +84,14 @@ end N = 100 y = randn(N) -model = threaded_obs(N) | (; y = y) +threadunsafe_model = threaded_obs(N) | (; y = y) +threadsafe_model = setthreadsafe(threadunsafe_model, true) ``` Evaluating this model is threadsafe, in that Turing guarantees to provide the correct result in functions such as: ```{julia} -logjoint(model, (; x = 0.0)) +logjoint(threadsafe_model, (; x = 0.0)) ``` (we can compare with the true value) @@ -93,29 +100,36 @@ logjoint(model, (; x = 0.0)) logpdf(Normal(), 0.0) + sum(logpdf.(Normal(0.0), y)) ``` -When sampling, you must disable model checking, but otherwise results will be correct: +Note that if you do not use `setthreadsafe`, the above code may give wrong results, or even error: ```{julia} -sample(model, NUTS(), 100; check_model=false, progress=false) +logjoint(threadunsafe_model, (; x = 0.0)) ``` -::: {.callout-warning} -## Upcoming changes +You can sample from this model and safely use functions such as `predict` or `returned`, as long as the model is always marked as threadsafe: -Starting from DynamicPPL 0.39, if you use tilde-statements or `@addlogprob!` inside threaded blocks, you will have to declare this upfront using: +```{julia} +model = setthreadsafe(threaded_obs(N) | (; y = y), true) +chn = sample(model, NUTS(), 100; check_model=false, progress=false) +``` -```julia -model = threaded_obs() | (; y = randn(N)) -threadsafe_model = setthreadsafe(model, true) +```{julia} +pmodel = setthreadsafe(threaded_obs(N), true) # don't condition on data +predict(pmodel, chn) ``` -Then you can sample from `threadsafe_model` as before. +::: {.callout-warning} +## Previous versions + +Up until Turing v0.41, you did not need to use `setthreadsafe` to enable threadsafe evaluation, and it was automatically enabled whenever Julia was launched with more than one thread. + +There were several reasons for changing this: one major one is because threadsafe evaluation comes with a performance cost, which can sometimes be substantial (see below). -The reason for this change is because threadsafe evaluation comes with a performance cost, which can sometimes be substantial. -In the past, threadsafe evaluation was always enabled, i.e., this cost was *always* incurred whenever Julia was launched with more than one thread. -However, this is not an appropriate way to determine whether threadsafe evaluation is needed! +Furthermore, the number of threads is not an appropriate way to determine whether threadsafe evaluation is needed! ::: +## Threaded assumptions / sampling latent values + **On the other hand, parallelising the sampling of latent values is not supported.** Attempting to do this will either error or give wrong results. @@ -136,25 +150,72 @@ model = threaded_assume_bad(100) model() ``` -**Note, in particular, that this means that you cannot currently use `predict` to sample new data in parallel.** +## When is threadsafe evaluation really needed? -:::{.callout-note} -## Threaded `predict` +You only need to enable threadsafe evaluation if you are using tilde-statements or `@addlogprob!` inside threaded blocks. -Support for threaded `predict` will be added in DynamicPPL 0.39 (see [this pull request](https://github.com/TuringLang/DynamicPPL.jl/pull/1130)). -::: +Specifically, you do *not* need to enable threadsafe evaluation if: + +- You have parallelism inside the model, but it does not involve tilde-statements or `@addlogprob!`. + + ```julia + @model function parallel_no_tilde(y) + x ~ Normal() + fy = similar(y) + Threads.@threads for i in eachindex(y) + fy[i] = some_expensive_function(x, y[i]) + end + end + # This does not need setthreadsafe + model = parallel_no_tilde(y) + ``` + +- You are sampling from a model using `MCMCThreads()`, but the model itself does not contain any parallel tilde-statements or `@addlogprob!`. + + ```julia + @model function no_parallel(y) + x ~ Normal() + y ~ Normal(x) + end + + # This does not need setthreadsafe + model = no_parallel(1.0) + chn = sample(model, NUTS(), MCMCThreads(), 100) + ``` + +## Performance considerations + +As described above, one of the major considerations behind the introduction of `setthreadsafe` is that threadsafe evaluation comes with a performance cost. -That is, even for `threaded_obs` where `y` was originally an observed term, you _cannot_ do: +Consider a simple model that does not use threading: ```{julia} -#| error: true -model = threaded_obs(N) | (; y = y) -chn = sample(model, NUTS(), 100; check_model=false, progress=false) +@model function gdemo() + s ~ InverseGamma(2, 3) + m ~ Normal(0, sqrt(s)) + 1.5 ~ Normal(m, sqrt(s)) + 2.0 ~ Normal(m, sqrt(s)) +end +model_no_threadsafe = gdemo() +model_threadsafe = setthreadsafe(gdemo(), true) +``` -pmodel = threaded_obs(N) # don't condition on data -predict(pmodel, chn) +One can see that evaluation of the threadsafe model is substantially slower: + +```{julia} +using Chairmarks, DynamicPPL + +function benchmark_eval(m) + vi = VarInfo(m) + display(median(@be DynamicPPL.evaluate!!($m, $vi))) +end + +benchmark_eval(model_no_threadsafe) +benchmark_eval(model_threadsafe) ``` +In previous versions of Turing, this cost would **always** be incurred whenever Julia was launched with multiple threads, even if the model did not use any threading at all! + ## Alternatives to threaded observation An alternative to using threaded observations is to manually calculate the log-likelihood term (which can be parallelised using any of Julia's standard mechanisms), and then _outside_ of the threaded block, [add it to the model using `@addlogprob!`]({{< meta usage-modifying-logprob >}}). @@ -198,8 +259,10 @@ On the other hand, one benefit of rewriting the model this way is that sampling using Random N = 100 y = randn(N) +# Note that since `@addlogprob!` is outside of the threaded block, we don't +# need to use `setthreadsafe`. model = threaded_obs_addlogprob(N, y) -nuts_kwargs = (check_model=false, progress=false, verbose=false) +nuts_kwargs = (progress=false, verbose=false) chain1 = sample(Xoshiro(468), model, NUTS(), MCMCThreads(), 1000, 4; nuts_kwargs...) chain2 = sample(Xoshiro(468), model, NUTS(), MCMCThreads(), 1000, 4; nuts_kwargs...) @@ -210,8 +273,8 @@ In contrast, the original `threaded_obs` (which used tilde inside `Threads.@thre (In principle, we would like to fix this bug, but we haven't yet investigated where it stems from.) ```{julia} -model = threaded_obs(N) | (; y = y) -nuts_kwargs = (check_model=false, progress=false, verbose=false) +model = setthreadsafe(threaded_obs(N) | (; y = y), true) +nuts_kwargs = (progress=false, verbose=false) chain1 = sample(Xoshiro(468), model, NUTS(), MCMCThreads(), 1000, 4; nuts_kwargs...) chain2 = sample(Xoshiro(468), model, NUTS(), MCMCThreads(), 1000, 4; nuts_kwargs...) mean(chain1[:x]), mean(chain2[:x]) # oops! @@ -258,13 +321,13 @@ As it happens, much of what is needed in DynamicPPL can be constructed such that For example, as long as there is no need to *sample* new values of random variables, it is actually fine to completely omit the metadata object. This is the case for `LogDensityFunction`: since values are provided as the input vector, there is no need to store it in metadata. We need only calculate the associated log-prior probability, which is stored in an accumulator. -Thus, starting from DynamicPPL v0.39, `LogDensityFunction` itself will in fact be completely threadsafe. +Thus, since DynamicPPL v0.39, `LogDensityFunction` itself is completely threadsafe. Technically speaking, this is achieved using `OnlyAccsVarInfo`, which is a subtype of `VarInfo` that only contains accumulators, and no metadata at all. It implements enough of the `VarInfo` interface to be used in model evaluation, but will error if any functions attempt to modify or read its metadata. There is currently an ongoing push to use `OnlyAccsVarInfo` in as many settings as we possibly can. -For example, this is why `predict` will be threadsafe in DynamicPPL v0.39: instead of modifying metadata to store the predicted values, we store them inside a `ValuesAsInModelAccumulator` instead, and combine them at the end of evaluation. +For example, this is why `predict` is threadsafe in DynamicPPL v0.39: instead of modifying metadata to store the predicted values, we store them inside a `ValuesAsInModelAccumulator` instead, and combine them at the end of evaluation. However, propagating these changes up to Turing will require a substantial amount of additional work, since there are many places in Turing which currently rely on a full VarInfo (with metadata). See, e.g., [this PR](https://github.com/TuringLang/DynamicPPL.jl/pull/1154) for more information. From 49a19986676a69723d0545d438485f7d06b93027 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 12:22:09 +0000 Subject: [PATCH 04/20] update FAQ --- faq/index.qmd | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/faq/index.qmd b/faq/index.qmd index bac8f2e06..18527dfb1 100644 --- a/faq/index.qmd +++ b/faq/index.qmd @@ -47,7 +47,7 @@ To understand more about how Turing determines whether a variable is treated as ## Can I use parallelism / threads in my model? -Yes, but with important caveats! There are two types of parallelism to consider: +Yes, but with some important caveats: ### 1. Parallel Sampling (Multiple Chains) Turing.jl fully supports sampling multiple chains in parallel: @@ -58,16 +58,24 @@ Turing.jl fully supports sampling multiple chains in parallel: See the [Core Functionality guide]({{}}#sampling-multiple-chains) for examples. ### 2. Threading Within Models -Using threads inside your model (e.g., `Threads.@threads`) requires more care: + +Using threads inside your model (e.g., `Threads.@threads`) requires more care. +In particular, only threaded **observe** statements are safe to use; threaded **assume** statements can lead to crashes or incorrect results. +Please see the [Threadsafe Evaluation page]({{< meta usage/threadsafe-evaluation >}}) for complete details. ```julia @model function f(y) x = Vector{Float64}(undef, length(y)) Threads.@threads for i in eachindex(y) - x[i] ~ Normal() # UNSAFE: `assume` statements in @threads can crash! - y[i] ~ Normal(x[i]) # `observe` statements are okay + # This would be unsafe! + # x[i] ~ Normal() + # This is safe: + y[i] ~ Normal() end end +# If you have parallel tilde-statements or `@addlogprob!` in a model, +# you must mark the model as threadsafe: +model = setthreadsafe(f(y), true) ``` **Important limitations:** @@ -76,8 +84,6 @@ end - **Assume statements** (sampling statements): Often crash unpredictably or produce incorrect results - **AD backend compatibility**: Many AD backends don't support threading. Check the [multithreaded column in ADTests](https://turinglang.org/ADTests/) for compatibility -For safe parallelism within models, consider vectorised operations instead of explicit threading. - ## How do I check the type stability of my Turing model? Type stability is crucial for performance. Check out: From 88e1161b5a0b8333ddf2e082f70e48f62bfaa802 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 12:32:36 +0000 Subject: [PATCH 05/20] Update version in _quarto.yml --- _quarto.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_quarto.yml b/_quarto.yml index 473fad7b1..156e25fb5 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -43,7 +43,7 @@ website: href: https://turinglang.org/team/ right: # Current version - - text: "v0.41" + - text: "v0.42" menu: - text: Changelog href: https://turinglang.org/docs/changelog.html From 071d5a361f5c4d7dd204891d61e1396cb45fa47d Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 12:32:43 +0000 Subject: [PATCH 06/20] Update VI tutorial --- tutorials/variational-inference/index.qmd | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tutorials/variational-inference/index.qmd b/tutorials/variational-inference/index.qmd index 940ea30ca..8f3cff39f 100755 --- a/tutorials/variational-inference/index.qmd +++ b/tutorials/variational-inference/index.qmd @@ -20,12 +20,12 @@ Let's start with a minimal example. Consider a `Turing.Model`, which we denote as `model`. Approximating the posterior associated with `model` via VI is as simple as -```{julia} -#| eval: false +```julia m = model(data...) # instantiate model on the data q_init = q_fullrank_gaussian(m) # initial variational approximation vi(m, q_init, 1000) # perform VI with the default algorithm on `m` for 1000 iterations ``` + Thus, it's no more work than standard MCMC sampling in Turing. The default algorithm uses stochastic gradient descent to minimise the (exclusive) [KL divergence](https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence). This approach is commonly referred to as *automatic differentiation variational inference* (ADVI)[^KTRGB2017], *stochastic gradient VI*[^TL2014], and *black-box variational inference*[^RGB2014] with the reparameterization gradient[^KW2014][^RMW2014][^TL2014]. @@ -240,10 +240,16 @@ Our implementation supports any optimiser that implements the [Optimisers.jl](ht For instance, let's try using `Optimisers.Adam`[^KB2014], which is a popular choice. Since `AdvancedVI` does not implement a proximal operator for `Optimisers.Adam`, we must use the `AdvancedVI.ClipScale()` projection operator, which ensures that the scale matrix of the variational approximation is positive definite. (See the paper by J. Domke 2020[^D2020] for more detail about the use of a projection operator.) + ```{julia} using Optimisers -_, _, info_adam, _ = vi(m, q_init, n_iters; show_progress=false, callback=callback, optimizer=Optimisers.Adam(3e-3), operator=ClipScale()); +_, _, info_adam, _ = vi( + m, q_init, n_iters; + show_progress=false, + callback=callback, + algorithm=KLMinRepGradDescent(AutoForwardDiff(); optimizer=Optimisers.Adam(3e-3), operator=ClipScale()) +); ``` ```{julia} @@ -252,6 +258,7 @@ elbo_adam = [i.elbo_avg for i in info_adam[iters]] Plots.plot(iters, elbo_mf, xlabel="Iterations", ylabel="ELBO", label="DoWG") Plots.plot!(iters, elbo_adam, xlabel="Iterations", ylabel="ELBO", label="Adam") ``` + Compared to the default option `AdvancedVI.DoWG()`, we can see that `Optimisers.Adam(3e-3)` is converging more slowly. With more step size tuning, it is possible that `Optimisers.Adam` could perform better or equal. That is, most common optimisers require some degree of tuning to perform better or comparably to `AdvancedVI.DoWG()` or `AdvancedVI.DoG()`, which do not require much tuning at all. @@ -261,6 +268,7 @@ Due to this fact, they are referred to as parameter-free optimizers. So far, we have only used the mean-field Gaussian family. This, however, approximates the posterior covariance with a diagonal matrix. To model the full covariance matrix, we can use the *full-rank* Gaussian family[^TL2014][^KTRGB2017]: + ```{julia} q_init_fr = q_fullrank_gaussian(m); ``` @@ -273,6 +281,7 @@ The term *full-rank* might seem a bit peculiar since covariance matrices are alw This term, however, traditionally comes from the fact that full-rank families use full-rank factors in addition to the diagonal of the covariance. In contrast to the mean-field family, the full-rank family will often result in more computation per optimisation step and slower convergence, especially in high dimensions: + ```{julia} q_fr, _, info_fr, _ = vi(m, q_init_fr, n_iters; show_progress=false, callback) @@ -281,12 +290,14 @@ Plots.plot(elbo_mf, xlabel="Iterations", ylabel="ELBO", label="Mean-Field", ylim elbo_fr = [i.elbo_avg for i in info_fr[iters]] Plots.plot!(elbo_fr, xlabel="Iterations", ylabel="ELBO", label="Full-Rank", ylims=(-200, Inf)) ``` + However, we can see that the full-rank families achieve a higher ELBO in the end. Due to the relationship between the ELBO and the Kullback-Leibler divergence, this indicates that the full-rank covariance is much more accurate. This trade-off between statistical accuracy and optimisation speed is often referred to as the *statistical-computational trade-off*. The fact that we can control this trade-off through the choice of variational family is a strength, rather than a limitation, of variational inference. We can also visualise the covariance matrix. + ```{julia} heatmap(cov(rand(q_fr, 100_000), dims=2)) ``` From e4d56e881a70264ede669d77b099c0a979168760 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 13:11:06 +0000 Subject: [PATCH 07/20] lp -> logjoint --- tutorials/bayesian-neural-networks/index.qmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tutorials/bayesian-neural-networks/index.qmd b/tutorials/bayesian-neural-networks/index.qmd index e2a8774f9..1032622f8 100755 --- a/tutorials/bayesian-neural-networks/index.qmd +++ b/tutorials/bayesian-neural-networks/index.qmd @@ -227,7 +227,7 @@ nn_forward(x, θ) = nn(x, vector_to_parameters(θ, ps)) fig = plot_data() # Find the index that provided the highest log posterior in the chain. -_, i = findmax(ch[:lp]) +_, i = findmax(ch[:logjoint]) # Extract the max row value from i. i = i.I[1] @@ -286,4 +286,4 @@ anim = @gif for i in 1:n_end end every 5 ``` -This has been an introduction to the applications of Turing and Lux in defining Bayesian neural networks. \ No newline at end of file +This has been an introduction to the applications of Turing and Lux in defining Bayesian neural networks. From f433446c7926ba3e062d5816a908dee69216b707 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 13:32:04 +0000 Subject: [PATCH 08/20] Fix VI interface --- tutorials/variational-inference/index.qmd | 57 ++++++++++++++++++----- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/tutorials/variational-inference/index.qmd b/tutorials/variational-inference/index.qmd index 8f3cff39f..586f77003 100755 --- a/tutorials/variational-inference/index.qmd +++ b/tutorials/variational-inference/index.qmd @@ -148,6 +148,7 @@ m = linear_regression(train, train_label, n_obs, n_vars); To run VI, we must first set a *variational family*. For instance, the most commonly used family is the mean-field Gaussian family. For this, Turing provides functions that automatically construct the initialisation corresponding to the model `m`: + ```{julia} q_init = q_meanfield_gaussian(m); ``` @@ -161,10 +162,12 @@ Here is a detailed documentation for the constructor: As we can see, the precise initialisation can be customized through the keyword arguments. Let's run VI with the default setting: + ```{julia} n_iters = 1000 -q_avg, q_last, info, state = vi(m, q_init, n_iters; show_progress=false); +q_avg, info, state = vi(m, q_init, n_iters; show_progress=false); ``` + The default setting uses the `AdvancedVI.RepGradELBO` objective, which corresponds to a variant of what is known as *automatic differentiation VI*[^KTRGB2017] or *stochastic gradient VI*[^TL2014] or *black-box VI*[^RGB2014] with the reparameterization gradient[^KW2014][^RMW2014][^TL2014]. The default optimiser we use is `AdvancedVI.DoWG`[^KMJ2023] combined with a proximal operator. (The use of proximal operators with VI on a location-scale family is discussed in detail by J. Domke[^D2020][^DGG2023] and others[^KOWMG2023].) @@ -178,8 +181,24 @@ First, here is the full documentation for `vi`: ## Values Returned by `vi` The main output of the algorithm is `q_avg`, the average of the parameters generated by the optimisation algorithm. For computing `q_avg`, the default setting uses what is known as polynomial averaging[^SZ2013]. -Usually, `q_avg` will perform better than the last-iterate `q_last`. +Usually, `q_avg` will perform better than the last-iterate `q_last`, which cana be obtained by disabling averaging: + +```{julia} +q_last, _, _ = vi( + m, + q_init, + n_iters; + show_progress=false, + algorithm=KLMinRepGradDescent( + AutoForwardDiff(); + operator=AdvancedVI.ClipScale(), + averager=AdvancedVI.NoAveraging() + ), +); +``` + For instance, we can compare the ELBO of the two: + ```{julia} @info("Objective of q_avg and q_last", ELBO_q_avg = estimate_objective(AdvancedVI.RepGradELBO(32), q_avg, LogDensityFunction(m)), @@ -194,6 +213,7 @@ For the default setting, which is `RepGradELBO`, it contains the ELBO estimated ```{julia} Plots.plot([i.elbo for i in info], xlabel="Iterations", ylabel="ELBO", label="info") ``` + Since the ELBO is estimated by a small number of samples, it appears noisy. Furthermore, at each step, the ELBO is evaluated on `q_last`, not `q_avg`, which is the actual output that we care about. To obtain more accurate ELBO estimates evaluated on `q_avg`, we have to define a custom callback function. @@ -203,30 +223,38 @@ To inspect the progress of optimisation in more detail, one can define a custom For example, the following callback function estimates the ELBO on `q_avg` every 10 steps with a larger number of samples: ```{julia} -function callback(; stat, averaged_params, restructure, kwargs...) - if mod(stat.iteration, 10) == 1 +using DynamicPPL: DynamicPPL + +function callback(; iteration, averaged_params, restructure, kwargs...) + if mod(iteration, 10) == 1 q_avg = restructure(averaged_params) - obj = AdvancedVI.RepGradELBO(128) - elbo_avg = estimate_objective(obj, q_avg, LogDensityFunction(m)) + obj = AdvancedVI.RepGradELBO(128) # 128 samples for ELBO estimation + vi = DynamicPPL.link!!(DynamicPPL.VarInfo(m), m); + elbo_avg = -estimate_objective(obj, q_avg, LogDensityFunction(m, DynamicPPL.getlogjoint_internal, vi)) (elbo_avg = elbo_avg,) else nothing end end; ``` + The `NamedTuple` returned by `callback` will be appended to the corresponding entry of `info`, and it will also be displayed on the progress meter if `show_progress` is set as `true`. The custom callback can be supplied to `vi` as a keyword argument: + ```{julia} -q_mf, _, info_mf, _ = vi(m, q_init, n_iters; show_progress=false, callback=callback); +q_mf, info_mf, _ = vi(m, q_init, n_iters; show_progress=false, callback=callback); ``` Let's plot the result: + ```{julia} iters = 1:10:length(info_mf) elbo_mf = [i.elbo_avg for i in info_mf[iters]] -Plots.plot!(iters, elbo_mf, xlabel="Iterations", ylabel="ELBO", label="callback", ylims=(-200,Inf)) +Plots.plot([i.elbo for i in info], xlabel="Iterations", ylabel="ELBO", label="info", linewidth=0.4) +Plots.plot!(iters, elbo_mf, xlabel="Iterations", ylabel="ELBO", label="callback", ylims=(-200,Inf), linewidth=2) ``` + We can see that the ELBO values are less noisy and progress more smoothly due to averaging. ## Using Different Optimisers @@ -244,7 +272,7 @@ Since `AdvancedVI` does not implement a proximal operator for `Optimisers.Adam`, ```{julia} using Optimisers -_, _, info_adam, _ = vi( +_, info_adam, _ = vi( m, q_init, n_iters; show_progress=false, callback=callback, @@ -265,6 +293,7 @@ That is, most common optimisers require some degree of tuning to perform better Due to this fact, they are referred to as parameter-free optimizers. ## Using Full-Rank Variational Families + So far, we have only used the mean-field Gaussian family. This, however, approximates the posterior covariance with a diagonal matrix. To model the full covariance matrix, we can use the *full-rank* Gaussian family[^TL2014][^KTRGB2017]: @@ -283,7 +312,7 @@ This term, however, traditionally comes from the fact that full-rank families us In contrast to the mean-field family, the full-rank family will often result in more computation per optimisation step and slower convergence, especially in high dimensions: ```{julia} -q_fr, _, info_fr, _ = vi(m, q_init_fr, n_iters; show_progress=false, callback) +q_fr, info_fr, _ = vi(m, q_init_fr, n_iters; show_progress=false, callback) Plots.plot(elbo_mf, xlabel="Iterations", ylabel="ELBO", label="Mean-Field", ylims=(-200, Inf)) @@ -292,7 +321,7 @@ Plots.plot!(elbo_fr, xlabel="Iterations", ylabel="ELBO", label="Full-Rank", ylim ``` However, we can see that the full-rank families achieve a higher ELBO in the end. -Due to the relationship between the ELBO and the Kullback-Leibler divergence, this indicates that the full-rank covariance is much more accurate. +Due to the relationship between the ELBO and the Kullback–Leibler divergence, this indicates that the full-rank covariance is much more accurate. This trade-off between statistical accuracy and optimisation speed is often referred to as the *statistical-computational trade-off*. The fact that we can control this trade-off through the choice of variational family is a strength, rather than a limitation, of variational inference. @@ -342,19 +371,21 @@ avg[union(sym2range[:coefficients]...)] ``` For further convenience, we can wrap the samples into a `Chains` object to summarise the results. + ```{julia} varinf = Turing.DynamicPPL.VarInfo(m) vns_and_values = Turing.DynamicPPL.varname_and_value_leaves(Turing.DynamicPPL.values_as(varinf, OrderedDict)) varnames = map(first, vns_and_values) vi_chain = Chains(reshape(z', (size(z,2), size(z,1), 1)), varnames) ``` + (Since we're drawing independent samples, we can simply ignore the ESS and Rhat metrics.) Unfortunately, extracting `varnames` is a bit verbose at the moment, but hopefully will become simpler in the near future. Let's compare this against samples from `NUTS`: ```{julia} -mcmc_chain = sample(m, NUTS(), 10_000, drop_warmup=true, progress=false); +mcmc_chain = sample(m, NUTS(), 10_000; progress=false); vi_mean = mean(vi_chain)[:, 2] mcmc_mean = mean(mcmc_chain, names(mcmc_chain, :parameters))[:, 2] @@ -362,6 +393,7 @@ mcmc_mean = mean(mcmc_chain, names(mcmc_chain, :parameters))[:, 2] plot(mcmc_mean; xticks=1:1:length(mcmc_mean), label="mean of NUTS") plot!(vi_mean; label="mean of VI") ``` + That looks pretty good! But let's see how the predictive distributions looks for the two. ## Making Predictions @@ -516,6 +548,7 @@ title!("MCMC (NUTS)") plot(p1, p2, p3; layout=(1, 3), size=(900, 250), label="") ``` + We can see that the full-rank VI approximation is very close to the predictions from MCMC samples. Also, the coverage of full-rank VI and MCMC is much better the crude mean-field approximation. From e5dc46789c0bca995f4339a72e4ce7aa5cd00dfb Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 13:32:53 +0000 Subject: [PATCH 09/20] better code --- tutorials/variational-inference/index.qmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tutorials/variational-inference/index.qmd b/tutorials/variational-inference/index.qmd index 586f77003..70dfa5491 100755 --- a/tutorials/variational-inference/index.qmd +++ b/tutorials/variational-inference/index.qmd @@ -224,13 +224,13 @@ For example, the following callback function estimates the ELBO on `q_avg` every ```{julia} using DynamicPPL: DynamicPPL +linked_vi = DynamicPPL.link!!(DynamicPPL.VarInfo(m), m); function callback(; iteration, averaged_params, restructure, kwargs...) if mod(iteration, 10) == 1 q_avg = restructure(averaged_params) obj = AdvancedVI.RepGradELBO(128) # 128 samples for ELBO estimation - vi = DynamicPPL.link!!(DynamicPPL.VarInfo(m), m); - elbo_avg = -estimate_objective(obj, q_avg, LogDensityFunction(m, DynamicPPL.getlogjoint_internal, vi)) + elbo_avg = -estimate_objective(obj, q_avg, LogDensityFunction(m, DynamicPPL.getlogjoint_internal, linked_vi)) (elbo_avg = elbo_avg,) else nothing From 730d6a4b8e984b6b8a5a689338bd37544435f634 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 13:34:38 +0000 Subject: [PATCH 10/20] lp -> logjoint --- developers/inference/abstractmcmc-interface/index.qmd | 2 +- usage/sampler-visualisation/index.qmd | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/developers/inference/abstractmcmc-interface/index.qmd b/developers/inference/abstractmcmc-interface/index.qmd index 82682808b..df617b7a4 100755 --- a/developers/inference/abstractmcmc-interface/index.qmd +++ b/developers/inference/abstractmcmc-interface/index.qmd @@ -320,4 +320,4 @@ It looks like we're extremely close to our true parameters of `Normal(5,3)`, tho ## Conclusion -We've seen how to implement the sampling interface for general projects. Turing's interface methods are ever-evolving, so please open an issue at [AbstractMCMC](https://github.com/TuringLang/AbstractMCMC.jl) with feature requests or problems. \ No newline at end of file +We've seen how to implement the sampling interface for general projects. Turing's interface methods are ever-evolving, so please open an issue at [AbstractMCMC](https://github.com/TuringLang/AbstractMCMC.jl) with feature requests or problems. diff --git a/usage/sampler-visualisation/index.qmd b/usage/sampler-visualisation/index.qmd index 958bf1f74..5ddbd89f8 100755 --- a/usage/sampler-visualisation/index.qmd +++ b/usage/sampler-visualisation/index.qmd @@ -57,7 +57,7 @@ function plot_sampler(chain; label="") val = get(chain, [:s², :m, :lp]) ss = link.(Ref(InverseGamma(2, 3)), val.s²) ms = val.m - lps = val.lp + lps = val.logjoint # How many surface points to sample. granularity = 100 @@ -196,4 +196,4 @@ Next, we plot using 50 particles. ```{julia} c = sample(model, PG(50), 1000) plot_sampler(c) -``` \ No newline at end of file +``` From 2ff19b82ca2714867f58603d18657ef9d22b5d06 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 13:45:55 +0000 Subject: [PATCH 11/20] lp -> logjoint --- usage/sampler-visualisation/index.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usage/sampler-visualisation/index.qmd b/usage/sampler-visualisation/index.qmd index 5ddbd89f8..f3661be6b 100755 --- a/usage/sampler-visualisation/index.qmd +++ b/usage/sampler-visualisation/index.qmd @@ -54,7 +54,7 @@ evaluate(m1, m2) = logjoint(model, (m=m2, s²=invlink.(Ref(InverseGamma(2, 3)), function plot_sampler(chain; label="") # Extract values from chain. - val = get(chain, [:s², :m, :lp]) + val = get(chain, [:s², :m, :logjoint]) ss = link.(Ref(InverseGamma(2, 3)), val.s²) ms = val.m lps = val.logjoint From 1aed939cc05927758aa8e48dffccc6c86598000a Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 13:48:27 +0000 Subject: [PATCH 12/20] fix shortcode --- faq/index.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/faq/index.qmd b/faq/index.qmd index 18527dfb1..145edbf61 100644 --- a/faq/index.qmd +++ b/faq/index.qmd @@ -61,7 +61,7 @@ See the [Core Functionality guide]({{}}#sampling-multip Using threads inside your model (e.g., `Threads.@threads`) requires more care. In particular, only threaded **observe** statements are safe to use; threaded **assume** statements can lead to crashes or incorrect results. -Please see the [Threadsafe Evaluation page]({{< meta usage/threadsafe-evaluation >}}) for complete details. +Please see the [Threadsafe Evaluation page]({{< meta usage-threadsafe-evaluation >}}) for complete details. ```julia @model function f(y) From fbe1bec8a364d8879b5324139ec25a40bf64b936 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 18:14:20 +0000 Subject: [PATCH 13/20] Fix external sampler interface --- .../inference/implementing-samplers/index.qmd | 122 ++++++++++++------ 1 file changed, 82 insertions(+), 40 deletions(-) diff --git a/developers/inference/implementing-samplers/index.qmd b/developers/inference/implementing-samplers/index.qmd index 9eb50d185..11d4a1545 100644 --- a/developers/inference/implementing-samplers/index.qmd +++ b/developers/inference/implementing-samplers/index.qmd @@ -18,12 +18,13 @@ In this tutorial, we'll go through step-by-step how to implement a "simple" samp In particular, we're going to implement a version of **Metropolis-adjusted Langevin (MALA)**. -Note that we will implement this sampler in the [AbstractMCMC.jl](https://github.com/TuringLang/AbstractMCMC.jl) framework, completely "ignoring" Turing.jl until the very end of the tutorial, at which point we'll use a single line of code to make the resulting sampler available to Turing.jl. This is to really drive home the point that one can implement samplers in a way that is accessible to all of Turing.jl's users without having to use Turing.jl yourself. - +Note that we will implement this sampler in the [AbstractMCMC.jl](https://github.com/TuringLang/AbstractMCMC.jl) framework, completely "ignoring" Turing.jl until the very end of the tutorial, at which point we'll use a single line of code to make the resulting sampler available to Turing.jl. +This is to really drive home the point that one can implement samplers in a way that is accessible to all of Turing.jl's users without having to use Turing.jl yourself. ## Quick overview of MALA -We can view MALA as a single step of the leapfrog integrator with resampling of momentum $p$ at every step.[^2] To make that statement a bit more concrete, we first define the *extended* target $\bar{\gamma}(x, p)$ as +We can view MALA as a single step of the leapfrog integrator with resampling of momentum $p$ at every step.[^2] +To make that statement a bit more concrete, we first define the *extended* target $\bar{\gamma}(x, p)$ as \begin{equation*} \log \bar{\gamma}(x, p) \propto \log \gamma(x) + \log \gamma_{\mathcal{N}(0, M)}(p) @@ -73,16 +74,16 @@ i.e. we accept the proposal $\tilde{x}$ with probability $\alpha$ and reject it, There are a few things we need from the "target" / "model" / density that we want to sample from: -1. We need access to log-density *evaluations* $\log \gamma(x)$ so we can compute the acceptance ratio involving $\log \bar{\gamma}(x, p)$. -2. We need access to log-density *gradients* $\nabla \log \gamma(x)$ so we can compute the Leapfrog steps $L_{\epsilon}(x, p)$. -3. We also need access to the "size" of the model so we can determine the size of $M$. +1. We need access to log-density *evaluations* $\log \gamma(x)$ so we can compute the acceptance ratio involving $\log \bar{\gamma}(x, p)$. +2. We need access to log-density *gradients* $\nabla \log \gamma(x)$ so we can compute the Leapfrog steps $L_{\epsilon}(x, p)$. +3. We also need access to the "size" of the model so we can determine the size of $M$. Luckily for us, there is a package called [LogDensityProblems.jl](https://github.com/tpapp/LogDensityProblems.jl) which provides an interface for *exactly* this! To demonstrate how one can implement the "[LogDensityProblems.jl](https://github.com/tpapp/LogDensityProblems.jl) interface"[^1] we will use a simple Gaussian model as an example: ```{julia} -using LogDensityProblems: LogDensityProblems; +using LogDensityProblems: LogDensityProblems # Let's define some type that represents the model. struct IsotropicNormalModel{M<:AbstractVector{<:Real}} @@ -98,15 +99,16 @@ function LogDensityProblems.logdensity(model::IsotropicNormalModel, x::AbstractV end ``` -This gives us all of the properties we want for our MALA sampler with the exception of the computation of the *gradient* $\nabla \log \gamma(x)$. There is the method `LogDensityProblems.logdensity_and_gradient` which should return a 2-tuple where the first entry is the evaluation of the logdensity $\log \gamma(x)$ and the second entry is the gradient $\nabla \log \gamma(x)$. +This gives us all of the properties we want for our MALA sampler with the exception of the computation of the *gradient* $\nabla \log \gamma(x)$. +There is the method `LogDensityProblems.logdensity_and_gradient` which should return a 2-tuple where the first entry is the evaluation of the logdensity $\log \gamma(x)$ and the second entry is the gradient $\nabla \log \gamma(x)$. -There are two ways to "implement" this method: 1) we implement it by hand, which is feasible in the case of our `IsotropicNormalModel`, or b) we defer the implementation of this to an automatic differentiation backend. +There are two ways to "implement" this method: 1) we implement it by hand, which is feasible in the case of our `IsotropicNormalModel`, or 2) we defer the implementation of this to an automatic differentiation backend. To implement it by hand we can simply do ```{julia} # Tell LogDensityProblems.jl that first-order, i.e. gradient information, is available. -LogDensityProblems.capabilities(model::IsotropicNormalModel) = LogDensityProblems.LogDensityOrder{1}() +LogDensityProblems.capabilities(::Type{<:IsotropicNormalModel}) = LogDensityProblems.LogDensityOrder{1}() # Implement `logdensity_and_gradient`. function LogDensityProblems.logdensity_and_gradient(model::IsotropicNormalModel, x) @@ -131,7 +133,7 @@ To defer it to an automatic differentiation backend, we can do ```{julia} # Tell LogDensityProblems.jl we only have access to 0-th order information. -LogDensityProblems.capabilities(model::IsotropicNormalModel) = LogDensityProblems.LogDensityOrder{0}() +LogDensityProblems.capabilities(::Type{<:IsotropicNormalModel}) = LogDensityProblems.LogDensityOrder{0}() # Use `LogDensityProblemsAD`'s `ADgradient` in combination with some AD backend to implement `logdensity_and_gradient`. using LogDensityProblemsAD, ADTypes, ForwardDiff @@ -141,14 +143,18 @@ LogDensityProblems.logdensity(model_with_grad, x_example) We'll continue with the second approach in this tutorial since this is typically what one does in practice, because there are better hobbies to spend time on than deriving gradients by hand. -At this point, one might wonder how we're going to tie this back to Turing.jl in the end. Effectively, when working with inference methods that only require log-density evaluations and / or higher-order information of the log-density, Turing.jl actually converts the user-provided `Model` into an object implementing the above methods for [LogDensityProblems.jl](https://github.com/tpapp/LogDensityProblems.jl). As a result, most samplers provided by Turing.jl are actually implemented to work with [LogDensityProblems.jl](https://github.com/tpapp/LogDensityProblems.jl), enabling their use both *within* Turing.jl and *outside* of Turing.jl! Moreover, there exists similar conversions for Stan through BridgeStan and Stan[LogDensityProblems.jl](https://github.com/tpapp/LogDensityProblems.jl), which means that a sampler supporting the [LogDensityProblems.jl](https://github.com/tpapp/LogDensityProblems.jl) interface can easily be used on both Turing.jl *and* Stan models (in addition to user-provided models, as our `IsotropicNormalModel` above)! - +At this point, one might wonder how we're going to tie this back to Turing.jl in the end. +Effectively, when working with inference methods that only require log-density evaluations and / or higher-order information of the log-density, Turing.jl actually converts the user-provided `Model` into an object implementing the above methods for [LogDensityProblems.jl](https://github.com/tpapp/LogDensityProblems.jl). +As a result, most samplers provided by Turing.jl are actually implemented to work with [LogDensityProblems.jl](https://github.com/tpapp/LogDensityProblems.jl), enabling their use both *within* Turing.jl and *outside* of Turing.jl! +Moreover, there exists similar conversions for Stan through BridgeStan and Stan[LogDensityProblems.jl](https://github.com/tpapp/LogDensityProblems.jl), which means that a sampler supporting the [LogDensityProblems.jl](https://github.com/tpapp/LogDensityProblems.jl) interface can easily be used on both Turing.jl *and* Stan models (in addition to user-provided models, as our `IsotropicNormalModel` above)! ## Implementing MALA in [AbstractMCMC.jl](https://github.com/TuringLang/AbstractMCMC.jl) Now that we've established that a model implementing the [LogDensityProblems.jl](https://github.com/tpapp/LogDensityProblems.jl) interface provides us with all the information we need from $\log \gamma(x)$, we can address the question: given an object that implements the [LogDensityProblems.jl](https://github.com/tpapp/LogDensityProblems.jl) interface, how can we define a sampler for it? -We're going to do this by making our sampler a sub-type of `AbstractMCMC.AbstractSampler` in addition to implementing a few methods from [AbstractMCMC.jl](https://github.com/TuringLang/AbstractMCMC.jl). Why? Because it gets us *a lot* of functionality for free, as we will see later. +We're going to do this by making our sampler a sub-type of `AbstractMCMC.AbstractSampler` in addition to implementing a few methods from [AbstractMCMC.jl](https://github.com/TuringLang/AbstractMCMC.jl). +Why? +Because it gets us *a lot* of functionality for free, as we will see later. Moreover, [AbstractMCMC.jl](https://github.com/TuringLang/AbstractMCMC.jl) provides a very natural interface for MCMC algorithms. @@ -165,7 +171,11 @@ struct MALA{T,A} <: AbstractMCMC.AbstractSampler end ``` -Notice how we've added the suffix `_init` to both the stepsize and the covariance matrix. We've done this because a `AbstractMCMC.AbstractSampler` should be *immutable*. Of course there might be many scenarios where we want to allow something like the stepsize and / or the covariance matrix to vary between iterations, e.g. during the burn-in / adaptation phase of the sampling process we might want to adjust the parameters using statistics computed from these initial iterations. But information which can change between iterations *should not go in the sampler itself*! Instead, this information should go in the sampler *state*. +Notice how we've added the suffix `_init` to both the stepsize and the covariance matrix. +We've done this because a `AbstractMCMC.AbstractSampler` should be *immutable*. +Of course there might be many scenarios where we want to allow something like the stepsize and / or the covariance matrix to vary between iterations, e.g. during the burn-in / adaptation phase of the sampling process we might want to adjust the parameters using statistics computed from these initial iterations. +But information which can change between iterations *should not go in the sampler itself*! +Instead, this information should go in the sampler *state*. The sampler state should at the very least contain all the necessary information to perform the next MCMC iteration, but usually contains further information, e.g. quantities and statistics useful for evaluating whether the sampler has converged. @@ -175,14 +185,16 @@ We will use the following sampler state for our `MALA` sampler: struct MALAState{A<:AbstractVector{<:Real}} "current position" x::A + "whether the proposal was accepted" + accepted::Bool end ``` -This might seem overly redundant: we're defining a type `MALAState` and it only contains a simple vector of reals. -In this particular case we indeed could have dropped this and simply used a `AbstractVector{<:Real}` as our sampler state, but typically, as we will see later, one wants to include other quantities in the sampler state. -For example, if we also wanted to adapt the parameters of our `MALA`, e.g. alter the stepsize depending on acceptance rates, in which case we should also put `ϵ` in the state, but for now we'll keep things simple. +If we also wanted to adapt the parameters of our `MALA`, e.g. alter the stepsize depending on acceptance rates, we could also put `ϵ` in the state, but for now we'll keep things simple. -Moreover, we also want a _sample_ type, which is a type meant for "public consumption", i.e. the end-user. This is generally going to contain a subset of the information present in the state. But in such a simple scenario as this, we similarly only have a `AbstractVector{<:Real}`: +Moreover, we also want a _sample_ type, which is a type meant for "public consumption", i.e. the end-user. +This is generally going to contain a subset of the information present in the state. +But in such a simple scenario as this, we similarly only have a `AbstractVector{<:Real}`: ```{julia} struct MALASample{A<:AbstractVector{<:Real}} @@ -220,7 +232,8 @@ Moreover, there is a specific `AbstractMCMC.AbstractModel` which is used to indi Since, as we discussed earlier, in our case we're indeed going to work with types that support the [LogDensityProblems.jl](https://github.com/tpapp/LogDensityProblems.jl) interface, we'll define `AbstractMCMC.step` for such a `AbstractMCMC.LogDensityModel`. -Note that `AbstractMCMC.LogDensityModel` has no other purpose; it has a single field called `logdensity`, and it does nothing else. But by wrapping the model in `AbstractMCMC.LogDensityModel`, it allows samplers that want to work with [LogDensityProblems.jl](https://github.com/tpapp/LogDensityProblems.jl) to define their `AbstractMCMC.step` on this type without running into method ambiguities. +Note that `AbstractMCMC.LogDensityModel` has no other purpose; it has a single field called `logdensity`, and it does nothing else. +But by wrapping the model in `AbstractMCMC.LogDensityModel`, it allows samplers that want to work with [LogDensityProblems.jl](https://github.com/tpapp/LogDensityProblems.jl) to define their `AbstractMCMC.step` on this type without running into method ambiguities. All in all, that means that the signature for our `AbstractMCMC.step` is going to be the following: @@ -277,10 +290,10 @@ function AbstractMCMC.step( logα = logp̃ - logp state_new = if log(rand(rng)) < logα # Accept. - MALAState(x̃) + MALAState(x̃, true) else # Reject. - MALAState(x) + MALAState(x, false) end # Return the "sample" and the sampler state. return MALASample(state_new.x), state_new @@ -314,7 +327,7 @@ using Random, LinearAlgebra rng = Random.default_rng() sampler = MALA(1, I) -state = MALAState(zeros(LogDensityProblems.dimension(model))) +state = MALAState(zeros(LogDensityProblems.dimension(model)), true) x_next, state_next = AbstractMCMC.step( rng, @@ -381,7 +394,7 @@ function AbstractMCMC.step( # Let's just create the initial state by sampling using a Gaussian. x = randn(rng, LogDensityProblems.dimension(model)) - return MALASample(x), MALAState(x) + return MALASample(x), MALAState(x, true) end ``` @@ -397,19 +410,30 @@ hcat(mean(samples_matrix; dims=2), std(samples_matrix; dims=2)) As we promised, all of this hassle of implementing our `MALA` sampler in a way that uses [LogDensityProblems.jl](https://github.com/tpapp/LogDensityProblems.jl) and [AbstractMCMC.jl](https://github.com/TuringLang/AbstractMCMC.jl) gets us something more than *just* an "automatic" implementation of `AbstractMCMC.sample`. -It also enables use with Turing.jl through the `externalsampler`, but we need to do one final thing first: we need to tell Turing.jl how to extract a vector of parameters from the "sample" returned in our implementation of `AbstractMCMC.step`. In our case, the "sample" is a `MALASample`, so we just need the following line: +It also enables use with Turing.jl through the `externalsampler`, but we need to do one final thing first: we need to tell Turing.jl how to extract a vector of parameters from the state returned in our implementation of `AbstractMCMC.step`. +In our case, the state is a `MALAState`, so we just need the following line: ```{julia} -using Turing -using DynamicPPL +# Overload the `getparams` method for our "state" type. +AbstractMCMC.getparams(state::MALAState) = state.x +``` + +Optionally, we can also implement `AbstractMCMC.getstats` which instead returns a NamedTuple of statistics about the current state. +When sampling with Turing, these statistics will be included in the output chain. -# Overload the `getparams` method for our "sample" type, which is just a vector. -Turing.Inference.getparams(::DynamicPPL.Model, sample::MALASample) = sample.x +```{julia} +AbstractMCMC.getstats(state::MALAState) = (accepted=state.accepted,) ``` And with that, we're good to go! +::: {.callout-note} +Up until Turing.jl v0.41, you would have needed to define `Turing.Inference.getparams(::MALASample)`. This has been changed in favour of the AbstractMCMC interface, which means that you no longer need to depend on Turing.jl to implement an external sampler. +::: + ```{julia} +using Turing + # Our previous model defined as a Turing.jl model. @model mvnormal_model() = x ~ MvNormal([-5., 0., 5.], I) # Instantiate our model. @@ -424,7 +448,8 @@ Pretty neat, eh? ### Models with constrained parameters -One thing we've sort of glossed over in all of the above is that MALA, at least how we've implemented it, requires $x$ to live in $\mathbb{R}^d$ for some $d > 0$. If some of the parameters were in fact constrained, e.g. we were working with a `Beta` distribution which has support on the interval $(0, 1)$, *not* on $\mathbb{R}^d$, we could easily end up outside of the valid range $(0, 1)$. +One thing we've sort of glossed over in all of the above is that MALA, at least how we've implemented it, requires $x$ to live in $\mathbb{R}^d$ for some $d > 0$. +If some of the parameters were in fact constrained, e.g. we were working with a `Beta` distribution which has support on the interval $(0, 1)$, *not* on $\mathbb{R}^d$, we could easily end up outside of the valid range $(0, 1)$. ```{julia} @model beta_model() = x ~ Beta(3, 3) @@ -432,19 +457,31 @@ turing_model = beta_model() chain = sample(turing_model, externalsampler(sampler), 10_000; progress=false) ``` -Yep, that still works, but only because Turing.jl actually *transforms* the `turing_model` from constrained to unconstrained, so that the `sampler` provided to `externalsampler` is actually always working in unconstrained space! This is not always desirable, so we can turn this off: +In fact, this still works, because by default Turing.jl will transform the constrained parameters to an unconstrained space before passing them to the `externalsampler`. +If this is undesirable, you can pass the `unconstrained` keyword argument to `externalsampler`: ```{julia} -chain = sample(turing_model, externalsampler(sampler; unconstrained=false), 10_000; progress=false) +chain_constrained = sample(turing_model, externalsampler(sampler; unconstrained=false), 10_000; progress=false) ``` -The fun thing is that this still sort of works because +In fact, this still sort of works because `logpdf` doesn't error when evaluating outside of the support of the distribution, but instead returns `-Inf`: ```{julia} logpdf(Beta(3, 3), 10.0) ``` -and so the samples that fall outside of the range are always rejected. But do notice how much worse all the diagnostics are, e.g. `ess_tail` is very poor compared to when we use `unconstrained=true`. Moreover, in more complex cases this won't just result in a "nice" `-Inf` log-density value, but instead will error: +and so the samples that fall outside of the range are always rejected. +But do notice how much worse all the diagnostics are, e.g. `ess_tail` is very poor compared to when we use `unconstrained=true`: + +```{julia} +ess(chain) +``` + +```{julia} +ess(chain_constrained) +``` + +Moreover, in more complex cases this won't just result in a "nice" `-Inf` log-density value, but instead will error: ```{julia} #| error: true @@ -456,18 +493,25 @@ end sample(demo(), externalsampler(sampler; unconstrained=false), 10_000; progress=false) ``` -As expected, we run into a `DomainError` at some point, while if we set `unconstrained=true`, letting Turing.jl transform the model to an unconstrained form behind the scenes, everything works as expected: +As expected, we run into a `DomainError` at some point. +This would not have happened while if we set `unconstrained=true`, letting Turing.jl transform the model to an unconstrained form behind the scenes, everything works as expected: ```{julia} sample(demo(), externalsampler(sampler; unconstrained=true), 10_000; progress=false) ``` -Neat! +If you have implemented a sampler and you know for sure that your sampler _cannot_ work with unconstrained parameters, you can disable the default behaviour by overloading the following method: -Similarly, which automatic differentiation backend one should use can be specified through the `adtype` keyword argument too. For example, if we want to use [ReverseDiff.jl](https://github.com/JuliaDiff/ReverseDiff.jl) instead of the default [ForwardDiff.jl](https://github.com/JuliaDiff/ForwardDiff.jl): +```julia +# This cell isn't actually run; it's just a demonstration. +AbstractMCMC.requires_unconstrained_space(::MALA) = false +``` + +Similarly, which automatic differentiation backend one should use can be specified through the `adtype` keyword argument too. +For example, if we want to use [ReverseDiff.jl](https://github.com/JuliaDiff/ReverseDiff.jl) instead of the default [ForwardDiff.jl](https://github.com/JuliaDiff/ForwardDiff.jl): ```{julia} -using ReverseDiff: ReverseDiff +import ReverseDiff # Specify that we want to use `AutoReverseDiff`. sample( demo(), @@ -477,8 +521,6 @@ sample( ) ``` -Double-neat. - ## Summary At this point it's worth maybe reminding ourselves what we did and also *why* we did it: From 2326ba6fb5c5102f5147beed8ea4f6cd2b99ce59 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 18:16:00 +0000 Subject: [PATCH 14/20] reword --- developers/inference/implementing-samplers/index.qmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/developers/inference/implementing-samplers/index.qmd b/developers/inference/implementing-samplers/index.qmd index 11d4a1545..33ba67995 100644 --- a/developers/inference/implementing-samplers/index.qmd +++ b/developers/inference/implementing-samplers/index.qmd @@ -457,14 +457,14 @@ turing_model = beta_model() chain = sample(turing_model, externalsampler(sampler), 10_000; progress=false) ``` -In fact, this still works, because by default Turing.jl will transform the constrained parameters to an unconstrained space before passing them to the `externalsampler`. +By default, Turing.jl avoids such a situation from occurring by transforming the constrained parameters to an unconstrained space before passing them to the `externalsampler`. If this is undesirable, you can pass the `unconstrained` keyword argument to `externalsampler`: ```{julia} chain_constrained = sample(turing_model, externalsampler(sampler; unconstrained=false), 10_000; progress=false) ``` -In fact, this still sort of works because `logpdf` doesn't error when evaluating outside of the support of the distribution, but instead returns `-Inf`: +It turns out that this still sort of works because `logpdf` doesn't error when evaluating outside of the support of the distribution, but instead returns `-Inf`: ```{julia} logpdf(Beta(3, 3), 10.0) From 6c2260d7829c2cb7ffed446ad782cad6b11d219a Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 18:16:53 +0000 Subject: [PATCH 15/20] fix meta shortcode --- developers/models/varinfo-overview/index.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/developers/models/varinfo-overview/index.qmd b/developers/models/varinfo-overview/index.qmd index 98aa7a6f7..063463a1d 100644 --- a/developers/models/varinfo-overview/index.qmd +++ b/developers/models/varinfo-overview/index.qmd @@ -233,7 +233,7 @@ If you use it, you must pay close attention to correctness:** 1. For models with multiple variables, the order in which these variables occur in the vector is not obvious. The short answer is that it depends on the order in which the variables are added to the VarInfo during its initialisation. If you have models where the order of variables can vary from one execution to another, then `unflatten` can easily lead to incorrect results. -2. The meaning of the values passed in will generally depend on whether the VarInfo is linked or not (see the [Variable Transformations page]({{< meta developers/transforms/dynamicppl >}}) for more information about linked VarInfos). You must make sure that the values passed in are consistent with the link status of the VarInfo. In contrast, `InitFromParams` always uses unlinked values. +2. The meaning of the values passed in will generally depend on whether the VarInfo is linked or not (see the [Variable Transformations page]({{< meta dev-transforms-dynamicppl >}}) for more information about linked VarInfos). You must make sure that the values passed in are consistent with the link status of the VarInfo. In contrast, `InitFromParams` always uses unlinked values. 3. While `unflatten` modifies the parameter values stored in the VarInfo, it does not modify any other information, such as log probabilities. Thus, after calling `unflatten`, your VarInfo will be in an inconsistent state, and you should not attempt to read any other information from it until you have called `evaluate!!` again (which recomputes e.g. log probabilities). From dfa691505e5106b1962ab549f984ae7d65d2ac64 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 18:29:27 +0000 Subject: [PATCH 16/20] fix transforms --- developers/transforms/dynamicppl/index.qmd | 46 +++++----------------- 1 file changed, 10 insertions(+), 36 deletions(-) diff --git a/developers/transforms/dynamicppl/index.qmd b/developers/transforms/dynamicppl/index.qmd index 8f57af7eb..ef502fdda 100644 --- a/developers/transforms/dynamicppl/index.qmd +++ b/developers/transforms/dynamicppl/index.qmd @@ -126,50 +126,24 @@ This sequence of events is summed up in the following diagram, where `f(..., arg In the final part of this article, we will take a more in-depth look at the internal DynamicPPL machinery that allows us to convert between representations and obtain the correct probability densities. Before that, though, we will take a quick high-level look at how the HMC sampler in Turing.jl uses the functions introduced so far. -## Case study: HMC in Turing.jl +## HMC in Turing.jl While DynamicPPL provides the _functionality_ for transforming variables, the transformation itself happens at an even higher level, i.e. in the sampler itself. The HMC sampler in Turing.jl is in [this file](https://github.com/TuringLang/Turing.jl/blob/5b24cebe773922e0f3d5c4cb7f53162eb758b04d/src/mcmc/hmc.jl). -In the first step of sampling, it calls `link` on the sampler. -This transformation is preserved throughout the sampling process, meaning that `is_transformed()` always returns true. - -We can observe this by inserting print statements into the model. -Here, `__varinfo__` is the internal symbol for the `VarInfo` object used in model evaluation: - -```{julia} -setprogress!(false) - -@model function demo2() - x ~ LogNormal() - if x isa AbstractFloat - println("-----------") - println("model repn: $(DynamicPPL.getindex(__varinfo__, @varname(x)))") - println("internal repn: $(DynamicPPL.getindex_internal(__varinfo__, @varname(x)))") - println("is_transformed: $(is_transformed(__varinfo__, @varname(x)))") - end -end - -sample(demo2(), HMC(0.1, 3), 3); -``` - - -(Here, the check on `if x isa AbstractFloat` prevents the printing from occurring during computation of the derivative.) -You can see that during the three sampling steps, `is_transformed` is always kept as `true`. - -::: {.callout-note} -The first two model evaluations where `is_transformed` is `false` occur prior to the actual sampling. -One occurs when the model is checked for correctness (using [`DynamicPPL.check_model_and_trace`](https://github.com/TuringLang/DynamicPPL.jl/blob/ba490bf362653e1aaefe298364fe3379b60660d3/src/debug_utils.jl#L582-L612)). -The second occurs because the model is evaluated once to generate a set of initial parameters inside [DynamicPPL's implementation of `AbstractMCMC.step`](https://github.com/TuringLang/DynamicPPL.jl/blob/ba490bf362653e1aaefe298364fe3379b60660d3/src/sampler.jl#L98-L117). -Both of these steps occur with all samplers in Turing.jl, so are not specific to the HMC example shown here. -::: +In the first step of sampling, it calls `link` on the sampler. What this means is that from the perspective of the HMC sampler, it _never_ sees the constrained variable: it always thinks that it is sampling from an unconstrained distribution. The biggest prerequisite for this to work correctly is that the potential energy term in the Hamiltonian—or in other words, the model log density—must be programmed correctly to include the Jacobian term. This is exactly the same as how we had to make sure to define `logq(y)` correctly in the toy HMC example above. -Within Turing.jl, this is correctly handled because a statement like `x ~ LogNormal()` in the model definition above is translated into `assume(LogNormal(), @varname(x), __varinfo__)`, defined [here](https://github.com/TuringLang/DynamicPPL.jl/blob/ba490bf362653e1aaefe298364fe3379b60660d3/src/context_implementations.jl#L225-L229). -If you follow the trail of function calls, you can verify that the `assume` function does indeed check for the presence of the `is_transformed` flag and adds the Jacobian term accordingly. +Within Turing.jl, this is correctly handled by calling + +```julia +x, inv_logjac = with_logabsdet_jacobian(y, inverse_transform) +``` + +and then passing `inv_logjac` to DynamicPPL's `LogJacobianAccumulator`. ## A deeper dive into DynamicPPL's internal machinery @@ -415,4 +389,4 @@ In this chapter of the Turing docs, we've looked at: - the higher-level usage of transforms in DynamicPPL and Turing. This will hopefully have equipped you with a better understanding of how constrained variables are handled in the Turing framework. -With this knowledge, you should especially find it easier to navigate DynamicPPL's `VarInfo` type, which forms the backbone of model evaluation. \ No newline at end of file +With this knowledge, you should especially find it easier to navigate DynamicPPL's `VarInfo` type, which forms the backbone of model evaluation. From 0d9057bf941c947af6846154d8882d976fa943f4 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 18:53:38 +0000 Subject: [PATCH 17/20] no NodeTrait --- developers/contexts/submodel-condition/index.qmd | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/developers/contexts/submodel-condition/index.qmd b/developers/contexts/submodel-condition/index.qmd index 75bf34031..20dbf1901 100755 --- a/developers/contexts/submodel-condition/index.qmd +++ b/developers/contexts/submodel-condition/index.qmd @@ -265,18 +265,11 @@ We should like `myprefix(big_ctx, @varname(x))` to return `@varname(a.b.x)`. Consider the following naive implementation, which mirrors a lot of code in the tilde-pipeline: ```{julia} -using DynamicPPL: NodeTrait, IsLeaf, IsParent, childcontext, AbstractContext +using DynamicPPL: childcontext, AbstractContext, AbstractParentContext using AbstractPPL: AbstractPPL -function myprefix(ctx::DynamicPPL.AbstractContext, vn::VarName) - return myprefix(NodeTrait(ctx), ctx, vn) -end -function myprefix(::IsLeaf, ::AbstractContext, vn::VarName) - return vn -end -function myprefix(::IsParent, ctx::AbstractContext, vn::VarName) - return myprefix(childcontext(ctx), vn) -end +myprefix(::AbstractContext, vn::VarName) = vn +myprefix(ctx::AbstractParentContext, vn::VarName) = myprefix(childcontext(ctx), vn) function myprefix(ctx::DynamicPPL.PrefixContext, vn::VarName) # The functionality to actually manipulate the VarNames is in AbstractPPL new_vn = AbstractPPL.prefix(vn, ctx.vn_prefix) From f23e451956d93922af34a811c777c77b377bfb05 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 18:58:52 +0000 Subject: [PATCH 18/20] fix manual `Model` invocation --- developers/compiler/model-manual/index.qmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/developers/compiler/model-manual/index.qmd b/developers/compiler/model-manual/index.qmd index a21916b11..09979f8ae 100755 --- a/developers/compiler/model-manual/index.qmd +++ b/developers/compiler/model-manual/index.qmd @@ -54,7 +54,7 @@ function gdemo2(model, varinfo, x) # value and the updated varinfo. return nothing, varinfo end -gdemo2(x) = DynamicPPL.Model(gdemo2, (; x)) +gdemo2(x) = DynamicPPL.Model{false}(gdemo2, (; x)) # Instantiate a Model object with our data variables. model2 = gdemo2([1.5, 2.0]) @@ -66,4 +66,4 @@ We can sample from this model in the same way: chain = sample(model2, NUTS(), 1000; progress=false) ``` -The subsequent pages in this section will show how the `@model` macro does this behind-the-scenes. \ No newline at end of file +The subsequent pages in this section will show how the `@model` macro does this behind-the-scenes. From 55aedc95f13515e8ee8c2a1bff4c97b95d6b30d3 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 18:59:38 +0000 Subject: [PATCH 19/20] add clarifying comment --- developers/compiler/model-manual/index.qmd | 3 +++ 1 file changed, 3 insertions(+) diff --git a/developers/compiler/model-manual/index.qmd b/developers/compiler/model-manual/index.qmd index 09979f8ae..886ee62ea 100755 --- a/developers/compiler/model-manual/index.qmd +++ b/developers/compiler/model-manual/index.qmd @@ -54,6 +54,9 @@ function gdemo2(model, varinfo, x) # value and the updated varinfo. return nothing, varinfo end + +# The `false` type parameter here indicates that this model does not need +# threadsafe evaluation (see the threadsafe evaluation page for details) gdemo2(x) = DynamicPPL.Model{false}(gdemo2, (; x)) # Instantiate a Model object with our data variables. From 9e57102000f463c18fe8cc677502f912bb220abb Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Thu, 4 Dec 2025 20:23:47 +0000 Subject: [PATCH 20/20] fix returned --- .../bayesian-time-series-analysis/index.qmd | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/tutorials/bayesian-time-series-analysis/index.qmd b/tutorials/bayesian-time-series-analysis/index.qmd index 4ed1a64aa..1e8846c68 100755 --- a/tutorials/bayesian-time-series-analysis/index.qmd +++ b/tutorials/bayesian-time-series-analysis/index.qmd @@ -175,11 +175,6 @@ function mean_ribbon(samples) return m, (m - low, up - m) end -function get_decomposition(model, x, cyclic_features, chain, op) - chain_params = get_sections(chain, :parameters) - return returned(model(x, cyclic_features, op), chain_params) -end - function plot_fit(x, y, decomp, ymax) trend = mapreduce(x -> x.trend, hcat, decomp) cyclic = mapreduce(x -> x.cyclic, hcat, decomp) @@ -219,8 +214,9 @@ function plot_fit(x, y, decomp, ymax) return trend_plt, cyclic_plt end -chain = sample(decomp_model(x, cyclic_features, +) | (; y=yf), NUTS(), 2000, progress=false) -yf_samples = predict(decomp_model(x, cyclic_features, +), chain) +model = decomp_model(x, cyclic_features, +) | (; y = yf) +chain = sample(model, NUTS(), 2000, progress=false) +yf_samples = predict(decondition(model), chain) m, conf = mean_ribbon(yf_samples) predictive_plt = plot( t, @@ -233,7 +229,7 @@ predictive_plt = plot( ) scatter!(predictive_plt, t, yf .+ yf_max; color=2, label="Data", legend=:topleft) -decomp = get_decomposition(decomp_model, x, cyclic_features, chain, +) +decomp = returned(model, chain) decomposed_plt = plot_fit(t, yf, decomp, yf_max) plot(predictive_plt, decomposed_plt...; layout=(3, 1), size=(700, 1000)) ``` @@ -308,8 +304,9 @@ scatter!(t, yf; color=2, label="Data") ``` ```{julia} -chain = sample(decomp_model(x, cyclic_features, .*) | (; y=yg), NUTS(), 2000, progress=false) -yg_samples = predict(decomp_model(x, cyclic_features, .*), chain) +model = decomp_model(x, cyclic_features, .*) | (; y = yg) +chain = sample(model, NUTS(), 2000, progress=false) +yg_samples = predict(decondition(model), chain) m, conf = mean_ribbon(yg_samples) predictive_plt = plot( t, @@ -322,7 +319,7 @@ predictive_plt = plot( ) scatter!(predictive_plt, t, yg; color=2, label="Data", legend=:topleft) -decomp = get_decomposition(decomp_model, x, cyclic_features, chain, .*) +decomp = returned(model, chain) decomposed_plt = plot_fit(t, yg, decomp, 0) plot(predictive_plt, decomposed_plt...; layout=(3, 1), size=(700, 1000)) ``` @@ -337,14 +334,15 @@ let end ``` -The model fits! What about the infered cyclic components? +The model fits! +What about the inferred cyclic components? ```{julia} βc = Array(group(chain, :βc)) plot_cyclic_features(βc[:, begin:num_freqs, :], βc[:, (num_freqs + 1):end, :]) ``` -While multiplicative model fits to the data, it does not recover the true parameters for this dataset. +While a multiplicative model does manage to fit the data, it does not recover the true parameters for this dataset. # Wrapping up