pymc-devs
diff --git a/‎examples/samplers/fast_sampling_with_jax_and_numba.ipynb
Lines changed: 262 additions & 69 deletions b/‎examples/samplers/fast_sampling_with_jax_and_numba.ipynb
Lines changed: 262 additions & 69 deletions
diff --git a/‎examples/samplers/fast_sampling_with_jax_and_numba.myst.md
Lines changed: 186 additions & 40 deletions b/‎examples/samplers/fast_sampling_with_jax_and_numba.myst.md
Lines changed: 186 additions & 40 deletions
@@ -62,6 +62,12 @@ BlackJAX offers another JAX-based sampling implementation focused on flexibility
 
 +++
 
+## Installation Requirements
+
+To use the various sampling backends, you need to install the corresponding packages. Nutpie is the recommended high-performance option and can be installed with pip or conda/mamba (e.g. `conda install nutpie`). For JAX-based workflows, NumPyro provides mature functionality and is installed with the `numpyro` package. BlackJAX offers an alternative JAX implementation and is available in the `blackjax` package.
+
++++
+
 ## Performance Guidelines
 
 Understanding when to use each sampler depends on several key factors including model size, variable types, and computational requirements.
@@ -73,28 +79,57 @@ Models containing **discrete variables** must use PyMC's built-in sampler, as it
 **Numba** excels at CPU optimization and provides consistent performance across different model types. It's particularly effective for models with complex mathematical operations that benefit from just-in-time compilation. **JAX** offers superior performance for very large models and provides natural GPU acceleration, making it ideal when computational resources are a limiting factor. The **C** backend serves as a reliable fallback option with broad compatibility but typically offers lower performance than the alternatives.
 
 ```{code-cell} ipython3
-import platform
+import time
+
+from collections import defaultdict
 
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
+import numpyro
+import pandas as pd
 import pymc as pm
 
-if platform.system() == "linux":
-    import multiprocessing
+numpyro.set_host_device_count(4)
 
-    multiprocessing.set_start_method("spawn", force=True)
+%config InlineBackend.figure_format = 'retina'
+az.style.use("arviz-darkgrid")
 
 rng = np.random.default_rng(seed=42)
 print(f"Running on PyMC v{pm.__version__}")
 ```
 
 ```{code-cell} ipython3
-%config InlineBackend.figure_format = 'retina'
-az.style.use("arviz-darkgrid")
-```
+import time
+
+from collections import defaultdict
+
+# Dictionary to store all results
+results = defaultdict(dict)
+
+
+class TimingContext:
+    def __init__(self, name):
+        self.name = name
+
+    def __enter__(self):
+        self.start_wall = time.perf_counter()
+        self.start_cpu = time.process_time()
+        return self
+
+    def __exit__(self, *args):
+        self.end_wall = time.perf_counter()
+        self.end_cpu = time.process_time()
+
+        wall_time = self.end_wall - self.start_wall
+        cpu_time = self.end_cpu - self.start_cpu
 
-We'll demonstrate the performance differences using a Probabilistic Principal Component Analysis (PPCA) model.
+        results[self.name]["wall_time"] = wall_time
+        results[self.name]["cpu_time"] = cpu_time
+
+        print(f"Wall time: {wall_time:.1f} s")
+        print(f"CPU time: {cpu_time:.1f} s")
+```
 
 ```{code-cell} ipython3
 def build_toy_dataset(N, D, K, sigma=1):
@@ -129,10 +164,14 @@ plt.title("Simulated data set")
 ```
 
 ```{code-cell} ipython3
-with pm.Model() as PPCA:
-    w = pm.Normal("w", mu=0, sigma=2, shape=[D, K], transform=pm.distributions.transforms.Ordered())
-    z = pm.Normal("z", mu=0, sigma=1, shape=[N, K])
-    x = pm.Normal("x", mu=w.dot(z.T), sigma=1, shape=[D, N], observed=data)
+def ppca_model():
+    with pm.Model() as model:
+        w = pm.Normal(
+            "w", mu=0, sigma=2, shape=[D, K], transform=pm.distributions.transforms.Ordered()
+        )
+        z = pm.Normal("z", mu=0, sigma=1, shape=[N, K])
+        x = pm.Normal("x", mu=w.dot(z.T), sigma=1, shape=[D, N], observed=data)
+    return model
 ```
 
 ## Performance Comparison
@@ -142,44 +181,154 @@ Now let's compare the performance of different sampling backends on our PPCA mod
 ### 1. PyMC Default Sampler (Python NUTS)
 
 ```{code-cell} ipython3
-%%time
-with PPCA:
-    idata_pymc = pm.sample(progressbar=False)
+n_draws = 2000
+n_tune = 2000
+
+with TimingContext("PyMC Default"):
+    with ppca_model():
+        idata_pymc = pm.sample(draws=n_draws, tune=n_tune, progressbar=False)
+
+ess_pymc = az.ess(idata_pymc)
+min_ess = min([ess_pymc[var].values.min() for var in ess_pymc.data_vars])
+mean_ess = np.mean([ess_pymc[var].values.mean() for var in ess_pymc.data_vars])
+results["PyMC Default"]["min_ess"] = min_ess
+results["PyMC Default"]["mean_ess"] = mean_ess
+print(f"Min ESS: {min_ess:.0f}, Mean ESS: {mean_ess:.0f}")
 ```
 
-### 2. Nutpie with Numba Backend
+### 2. Nutpie Sampler with Numba Backend
 
 ```{code-cell} ipython3
-%%time
-with PPCA:
-    idata_nutpie_numba = pm.sample(
-        nuts_sampler="nutpie", nuts_sampler_kwargs={"backend": "numba"}, progressbar=False
-    )
+with TimingContext("Nutpie Numba"):
+    with ppca_model():
+        idata_nutpie_numba = pm.sample(
+            draws=n_draws,
+            tune=n_tune,
+            nuts_sampler="nutpie",
+            nuts_sampler_kwargs={"backend": "numba"},
+            progressbar=False,
+        )
+
+ess_nutpie_numba = az.ess(idata_nutpie_numba)
+min_ess = min([ess_nutpie_numba[var].values.min() for var in ess_nutpie_numba.data_vars])
+mean_ess = np.mean([ess_nutpie_numba[var].values.mean() for var in ess_nutpie_numba.data_vars])
+results["Nutpie Numba"]["min_ess"] = min_ess
+results["Nutpie Numba"]["mean_ess"] = mean_ess
+print(f"Min ESS: {min_ess:.0f}, Mean ESS: {mean_ess:.0f}")
 ```
 
-### 3. Nutpie with JAX Backend
+### 3. Nutpie Sampler with JAX Backend
 
 ```{code-cell} ipython3
-%%time
-with PPCA:
-    idata_nutpie_jax = pm.sample(
-        nuts_sampler="nutpie", nuts_sampler_kwargs={"backend": "jax"}, progressbar=False
-    )
+with TimingContext("Nutpie JAX"):
+    with ppca_model():
+        idata_nutpie_jax = pm.sample(
+            draws=n_draws,
+            tune=n_tune,
+            nuts_sampler="nutpie",
+            nuts_sampler_kwargs={"backend": "jax"},
+            progressbar=False,
+        )
+
+ess_nutpie_jax = az.ess(idata_nutpie_jax)
+min_ess = min([ess_nutpie_jax[var].values.min() for var in ess_nutpie_jax.data_vars])
+mean_ess = np.mean([ess_nutpie_jax[var].values.mean() for var in ess_nutpie_jax.data_vars])
+results["Nutpie JAX"]["min_ess"] = min_ess
+results["Nutpie JAX"]["mean_ess"] = mean_ess
+print(f"Min ESS: {min_ess:.0f}, Mean ESS: {mean_ess:.0f}")
 ```
 
 ### 4. NumPyro Sampler
 
 ```{code-cell} ipython3
-%%time
-with PPCA:
-    idata_numpyro = pm.sample(nuts_sampler="numpyro", progressbar=False)
+with TimingContext("NumPyro"):
+    with ppca_model():
+        idata_numpyro = pm.sample(
+            draws=n_draws, tune=n_tune, nuts_sampler="numpyro", progressbar=False
+        )
+
+ess_numpyro = az.ess(idata_numpyro)
+min_ess = min([ess_numpyro[var].values.min() for var in ess_numpyro.data_vars])
+mean_ess = np.mean([ess_numpyro[var].values.mean() for var in ess_numpyro.data_vars])
+results["NumPyro"]["min_ess"] = min_ess
+results["NumPyro"]["mean_ess"] = mean_ess
+print(f"Min ESS: {min_ess:.0f}, Mean ESS: {mean_ess:.0f}")
 ```
 
-## Installation Requirements
+```{code-cell} ipython3
+timing_data = []
+for backend_name, metrics in results.items():
+    wall_time = metrics.get("wall_time", 0)
+    cpu_time = metrics.get("cpu_time", 0)
+    min_ess = metrics.get("min_ess", 0)
+    mean_ess = metrics.get("mean_ess", 0)
+    ess_per_sec = mean_ess / wall_time if wall_time > 0 else 0
+
+    timing_data.append(
+        {
+            "Sampling Backend": backend_name,
+            "Wall Time (s)": f"{wall_time:.1f}",
+            "CPU Time (s)": f"{cpu_time:.1f}",
+            "Min ESS": f"{min_ess:.0f}",
+            "Mean ESS": f"{mean_ess:.0f}",
+            "ESS/sec": f"{ess_per_sec:.0f}",
+            "Parallel Efficiency": f"{cpu_time/wall_time:.2f}" if wall_time > 0 else "N/A",
+        }
+    )
 
-To use the various sampling backends, you need to install the corresponding packages. Nutpie is the recommended high-performance option and can be installed with pip or conda/mamba (e.g. `conda install nutpie`). For JAX-based workflows, NumPyro provides mature functionality and is installed with the `numpyro` package. BlackJAX offers an alternative JAX implementation and is available in the `blackjax` package.
+timing_df = pd.DataFrame(timing_data)
+timing_df = timing_df.sort_values("ESS/sec", ascending=False)
 
-+++
+print("\nPerformance Summary Table:")
+print("=" * 100)
+print(timing_df.to_string(index=False))
+print("=" * 100)
+
+best_backend = timing_df.iloc[0]["Sampling Backend"]
+best_ess_per_sec = timing_df.iloc[0]["ESS/sec"]
+print(f"\nMost efficient backend: {best_backend} with {best_ess_per_sec} ESS/second")
+```
+
+```{code-cell} ipython3
+fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(12, 8))
+
+backends = timing_df["Sampling Backend"].tolist()
+wall_times = [float(val) for val in timing_df["Wall Time (s)"].tolist()]
+mean_ess_values = [float(val) for val in timing_df["Mean ESS"].tolist()]
+ess_per_sec_values = [float(val) for val in timing_df["ESS/sec"].tolist()]
+
+ax1.bar(backends, wall_times, color="skyblue")
+ax1.set_ylabel("Wall Time (seconds)")
+ax1.set_title("Sampling Time")
+ax1.tick_params(axis="x", rotation=45)
+
+ax2.bar(backends, mean_ess_values, color="lightgreen")
+ax2.set_ylabel("Mean ESS")
+ax2.set_title("Effective Sample Size")
+ax2.tick_params(axis="x", rotation=45)
+
+ax3.bar(backends, ess_per_sec_values, color="coral")
+ax3.set_ylabel("ESS per Second")
+ax3.set_title("Sampling Efficiency")
+ax3.tick_params(axis="x", rotation=45)
+
+ax4.scatter(wall_times, mean_ess_values, s=200, alpha=0.6)
+for i, backend in enumerate(backends):
+    ax4.annotate(
+        backend,
+        (wall_times[i], mean_ess_values[i]),
+        xytext=(5, 5),
+        textcoords="offset points",
+        fontsize=9,
+    )
+ax4.set_xlabel("Wall Time (seconds)")
+ax4.set_ylabel("Mean ESS")
+ax4.set_title("Time vs. Effective Sample Size")
+ax4.grid(True, alpha=0.3)
+
+plt.tight_layout()
+plt.show()
+```
 
 ## Special Cases and Advanced Usage
 
@@ -190,13 +339,13 @@ In certain scenarios, you may need to use PyMC's Python-based sampler while stil
 The following examples demonstrate how to use PyMC's built-in sampler with different compilation targets. The `fast_run` mode uses optimized C compilation, which provides good performance while maintaining full compatibility. The `numba` mode offers the only way to access Numba's just-in-time compilation benefits when using PyMC's sampler. The `jax` mode enables JAX compilation, though for JAX workflows, Nutpie or NumPyro typically provide better performance.
 
 ```{code-cell} ipython3
-with PPCA:
+with ppca_model():
     idata_c = pm.sample(nuts_sampler="pymc", compile_kwargs={"mode": "fast_run"}, progressbar=False)
 
-# with PPCA:
+# with ppca_model():
 #     idata_pymc_numba = pm.sample(nuts_sampler="pymc", compile_kwargs={"mode": "numba"}, progressbar=False)
 
-# with PPCA:
+# with ppca_model():
 #     idata_pymc_jax = pm.sample(nuts_sampler="pymc", compile_kwargs={"mode": "jax"}, progressbar=False)
 ```
 
@@ -221,12 +370,9 @@ with pm.Model() as discrete_model:
 ## Authors
 
 - Originally authored by Thomas Wiecki in July 2023  
-- Substantially updated and expanded by Chris Fonnesbeck in May 2025
+- Updated and expanded by Chris Fonnesbeck in May 2025
 
 ```{code-cell} ipython3
 %load_ext watermark
 %watermark -n -u -v -iv -w -p pytensor,arviz,pymc,numpyro,blackjax,nutpie
 ```
-
-:::{include} ../page_footer.md
-:::