Skip to content

Commit 70c1649

Browse files
TeemuSailynojawilliambdeanjuanitorduz
authored
Implement add_lift_test_measurements for MMMM (#1738)
* Add 'scaling' to MMM class docstring. * implement add_lift_test_measurements for MMMM. * Fix time-varying media lift test implementation for MMMM. * Test MMMM lift tests * Add runnable documentation example. * Fix imports in documentation example. * add type hints * fix typo * add tests for different scaling configurations * Fix lift test scaling using xarray and model scalers. * tests lift test exceptions * Fix test for dims=(). * Add mock sampling to lift test tests * test with different dimensions of saturation parameters * test for different dimensions of saturation parameters * add docstring for the private functions * fix typo * run pre-commit * Test transfrom functions. --------- Co-authored-by: Will Dean <wd60622@gmail.com> Co-authored-by: Will Dean <57733339+williambdean@users.noreply.github.com> Co-authored-by: Juan Orduz <juanitorduz@gmail.com>
1 parent 0cec2e7 commit 70c1649

File tree

3 files changed

+887
-6
lines changed

3 files changed

+887
-6
lines changed

pymc_marketing/mmm/lift_test.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,7 @@ def scale_channel_lift_measurements(
525525
channel_col: str,
526526
channel_columns: list[str],
527527
transform: Callable[[np.ndarray], np.ndarray],
528+
dim_cols: list[str] | None = None,
528529
) -> pd.DataFrame:
529530
"""Scale the lift measurements for a specific channel.
530531
@@ -542,20 +543,25 @@ def scale_channel_lift_measurements(
542543
a subset of these values.
543544
transform : Callable[[np.ndarray], np.ndarray]
544545
Function to scale the lift measurements.
546+
dim_cols : list[str], optional
547+
Column names for model dimensions.
545548
546549
Returns
547550
-------
548551
pd.DataFrame
549552
DataFrame with the scaled lift measurements.
550553
551554
"""
552-
# DataFrame with MultiIndex (RangeIndex, channel_col)
555+
# either [*dim_cols , channel_col], or [channel_col]
556+
index_cols: list[str] = (dim_cols if dim_cols else []) + [channel_col]
557+
# DataFrame with MultiIndex (RangeIndex, index_cols),
558+
# where dim_cols is optional.
553559
# columns: x, delta_x
554-
df_original = df_lift_test.loc[:, [channel_col, "x", "delta_x"]].set_index(
555-
channel_col, append=True
560+
df_original = df_lift_test.loc[:, [*index_cols, "x", "delta_x"]].set_index(
561+
index_cols, append=True
556562
)
557563

558-
# DataFrame with MultiIndex (RangeIndex, (x, delta_x))
564+
# DataFrame with MultiIndex (RangeIndex, (x, *dim_cols , delta_x))
559565
# columns: channel_columns values
560566
df_to_rescale = (
561567
df_original.pipe(_swap_columns_and_last_index_level)
@@ -572,7 +578,7 @@ def scale_channel_lift_measurements(
572578
return (
573579
df_rescaled.pipe(_swap_columns_and_last_index_level)
574580
.loc[df_original.index, :]
575-
.reset_index(channel_col)
581+
.reset_index(index_cols)
576582
)
577583

578584

@@ -610,6 +616,7 @@ def scale_lift_measurements(
610616
channel_columns: list[str | int],
611617
channel_transform: Callable[[np.ndarray], np.ndarray],
612618
target_transform: Callable[[np.ndarray], np.ndarray],
619+
dim_cols: list[str] | None = None,
613620
) -> pd.DataFrame:
614621
"""Scale the DataFrame with lift test results to be used in the model.
615622
@@ -629,6 +636,8 @@ def scale_lift_measurements(
629636
Function to scale the lift measurements.
630637
target_transform : Callable[[np.ndarray], np.ndarray]
631638
Function to scale the target.
639+
dim_cols : list[str], optional
640+
Names of the columns for channel dimensions
632641
633642
Returns
634643
-------
@@ -643,6 +652,7 @@ def scale_lift_measurements(
643652
channel_col=channel_col,
644653
channel_columns=channel_columns, # type: ignore
645654
transform=channel_transform,
655+
dim_cols=dim_cols,
646656
)
647657
df_target_scaled = scale_target_for_lift_measurements(
648658
df_lift_test["delta_y"],

pymc_marketing/mmm/multidimensional.py

Lines changed: 244 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
import json
1919
import warnings
20-
from collections.abc import Sequence
20+
from collections.abc import Callable, Sequence
2121
from copy import deepcopy
2222
from typing import Annotated, Any, Literal
2323

@@ -46,6 +46,10 @@
4646
)
4747
from pymc_marketing.mmm.events import EventEffect
4848
from pymc_marketing.mmm.fourier import YearlyFourier
49+
from pymc_marketing.mmm.lift_test import (
50+
add_lift_measurements_to_likelihood_from_saturation,
51+
scale_lift_measurements,
52+
)
4953
from pymc_marketing.mmm.plot import MMMPlotSuite
5054
from pymc_marketing.mmm.scaling import Scaling, VariableScaling
5155
from pymc_marketing.mmm.tvp import infer_time_index
@@ -94,6 +98,9 @@ class MMM(ModelBuilder):
9498
Whether to use time-varying effects for media channels.
9599
dims : tuple | None
96100
Additional dimensions for the model.
101+
scaling : Scaling | dict | None
102+
Scaling methods to be used for the target variable and the marketing channels.
103+
Defaults to max scaling for both.
97104
model_config : dict | None
98105
Configuration settings for the model.
99106
sampler_config : dict | None
@@ -1505,6 +1512,242 @@ def sample_posterior_predictive(
15051512

15061513
return posterior_predictive_samples
15071514

1515+
def _make_channel_transform(
1516+
self, df_lift_test: pd.DataFrame
1517+
) -> Callable[[np.ndarray], np.ndarray]:
1518+
"""Create a function for transforming the channel data into the same scale as in the model.
1519+
1520+
Parameters
1521+
----------
1522+
df_lift_test : pd.DataFrame
1523+
Lift test measurements.
1524+
1525+
Returns
1526+
-------
1527+
Callable[[np.ndarray], np.ndarray]
1528+
The function for scaling the channel data.
1529+
"""
1530+
# The transformer will be passed a np.ndarray of data corresponding to this index.
1531+
index_cols = [*list(self.dims), "channel"]
1532+
# We reconstruct the input dataframe following the transformations performed within
1533+
# `lift_test.scale_channel_lift_measurements()``.
1534+
input_df = (
1535+
df_lift_test.loc[:, [*index_cols, "x", "delta_x"]]
1536+
.set_index(index_cols, append=True)
1537+
.stack()
1538+
.unstack(level=-2)
1539+
.reindex(self.channel_columns, axis=1) # type: ignore
1540+
.fillna(0)
1541+
)
1542+
1543+
def channel_transform(input: np.ndarray) -> np.ndarray:
1544+
"""Transform lift test channel data to the same scale as in the model."""
1545+
# reconstruct the df corresponding to the input np.ndarray.
1546+
reconstructed = (
1547+
pd.DataFrame(data=input, index=input_df.index, columns=input_df.columns)
1548+
.stack()
1549+
.unstack(level=-2)
1550+
)
1551+
return (
1552+
(
1553+
# Scale the data according to the scaler coords.
1554+
reconstructed.to_xarray() / self.scalers._channel
1555+
)
1556+
.to_dataframe()
1557+
.fillna(0)
1558+
.stack()
1559+
.unstack(level=-2)
1560+
.loc[input_df.index, :]
1561+
.values
1562+
)
1563+
1564+
# Finally return the scaled data as a np.ndarray corresponding to the input index order.
1565+
return channel_transform
1566+
1567+
def _make_target_transform(
1568+
self, df_lift_test: pd.DataFrame
1569+
) -> Callable[[np.ndarray], np.ndarray]:
1570+
"""Create a function for transforming the target measurements into the same scale as in the model.
1571+
1572+
Parameters
1573+
----------
1574+
df_lift_test : pd.DataFrame
1575+
Lift test measurements.
1576+
1577+
Returns
1578+
-------
1579+
Callable[[np.ndarray], np.ndarray]
1580+
The function for scaling the target data.
1581+
"""
1582+
# These are the same order as in the original lift test measurements.
1583+
index_cols = [*list(self.dims), "channel"]
1584+
input_idx = df_lift_test.set_index(index_cols, append=True).index
1585+
1586+
def target_transform(input: np.ndarray) -> np.ndarray:
1587+
"""Transform lift test measurements and sigma to the same scale as in the model."""
1588+
# Reconstruct the input df column with the correct index.
1589+
reconstructed = pd.DataFrame(
1590+
data=input, index=input_idx, columns=["target"]
1591+
)
1592+
return (
1593+
(
1594+
# Scale the measurements.
1595+
reconstructed.to_xarray() / self.scalers._target
1596+
)
1597+
.to_dataframe()
1598+
.loc[input_idx, :]
1599+
.values
1600+
)
1601+
1602+
# Finally, return the scaled measurements as a np.ndarray corresponding to
1603+
# the input index order.
1604+
return target_transform
1605+
1606+
def add_lift_test_measurements(
1607+
self,
1608+
df_lift_test: pd.DataFrame,
1609+
dist: type[pm.Distribution] = pm.Gamma,
1610+
name: str = "lift_measurements",
1611+
) -> None:
1612+
"""Add lift tests to the model.
1613+
1614+
The model for the difference of a channel's saturation curve is created
1615+
from `x` and `x + delta_x` for each channel. This random variable is
1616+
then conditioned using the empirical lift, `delta_y`, and `sigma` of the lift test
1617+
with the specified distribution `dist`.
1618+
1619+
The pseudo-code for the lift test is as follows:
1620+
1621+
.. code-block:: python
1622+
1623+
model_estimated_lift = saturation_curve(x + delta_x) - saturation_curve(x)
1624+
empirical_lift = delta_y
1625+
dist(abs(model_estimated_lift), sigma=sigma, observed=abs(empirical_lift))
1626+
1627+
1628+
The model has to be built before adding the lift tests.
1629+
1630+
Parameters
1631+
----------
1632+
df_lift_test : pd.DataFrame
1633+
DataFrame with lift test results with at least the following columns:
1634+
* `DIM_NAME`: dimension name. One column per dimension in `mmm.dims`.
1635+
* `channel`: channel name. Must be present in `channel_columns`.
1636+
* `x`: x axis value of the lift test.
1637+
* `delta_x`: change in x axis value of the lift test.
1638+
* `delta_y`: change in y axis value of the lift test.
1639+
* `sigma`: standard deviation of the lift test.
1640+
dist : pm.Distribution, optional
1641+
The distribution to use for the likelihood, by default pm.Gamma
1642+
name : str, optional
1643+
The name of the likelihood of the lift test contribution(s),
1644+
by default "lift_measurements". Name change required if calling
1645+
this method multiple times.
1646+
1647+
Raises
1648+
------
1649+
RuntimeError
1650+
If the model has not been built yet.
1651+
KeyError
1652+
If the 'channel' column or any of the model dimensions is not present
1653+
in df_lift_test.
1654+
1655+
Examples
1656+
--------
1657+
Build the model first then add lift test measurements.
1658+
1659+
.. code-block:: python
1660+
1661+
import pandas as pd
1662+
import numpy as np
1663+
1664+
from pymc_marketing.mmm import GeometricAdstock, LogisticSaturation
1665+
1666+
from pymc_marketing.mmm.multidimensional import MMM
1667+
1668+
model = MMM(
1669+
date_column="date",
1670+
channel_columns=["x1", "x2"],
1671+
target_column="target",
1672+
adstock=GeometricAdstock(l_max=8),
1673+
saturation=LogisticSaturation(),
1674+
yearly_seasonality=2,
1675+
dims=("geo",),
1676+
)
1677+
1678+
X = pd.DataFrame(
1679+
{
1680+
"date": np.tile(
1681+
pd.date_range(start="2025-01-01", end="2025-05-01", freq="W"), 2
1682+
),
1683+
"x1": np.random.rand(34),
1684+
"x2": np.random.rand(34),
1685+
"target": np.random.rand(34),
1686+
"geo": 17 * ["FIN"] + 17 * ["SWE"],
1687+
}
1688+
)
1689+
y = X["target"]
1690+
1691+
model.build_model(X.drop(columns=["target"]), y)
1692+
1693+
df_lift_test = pd.DataFrame(
1694+
{
1695+
"channel": ["x1", "x1"],
1696+
"geo": ["FIN", "SWE"],
1697+
"x": [1, 1],
1698+
"delta_x": [0.1, 0.2],
1699+
"delta_y": [0.1, 0.1],
1700+
"sigma": [0.1, 0.1],
1701+
}
1702+
)
1703+
1704+
model.add_lift_test_measurements(df_lift_test)
1705+
1706+
"""
1707+
if not hasattr(self, "model"):
1708+
raise RuntimeError(
1709+
"The model has not been built yet. Please, build the model first."
1710+
)
1711+
1712+
if "channel" not in df_lift_test.columns:
1713+
raise KeyError(
1714+
"The 'channel' column is required to map the lift measurements to the model."
1715+
)
1716+
1717+
for dim in self.dims:
1718+
if dim not in df_lift_test.columns:
1719+
raise KeyError(
1720+
f"The {dim} column is required to map the lift measurements to the model."
1721+
)
1722+
1723+
# Function to scale "delta_y", and "sigma" to same scale as target in model.
1724+
target_transform = self._make_target_transform(df_lift_test)
1725+
1726+
# Function to scale "x" and "delta_x" to the same scale as their respective channels.
1727+
channel_transform = self._make_channel_transform(df_lift_test)
1728+
1729+
df_lift_test_scaled = scale_lift_measurements(
1730+
df_lift_test=df_lift_test,
1731+
channel_col="channel",
1732+
channel_columns=self.channel_columns, # type: ignore
1733+
channel_transform=channel_transform,
1734+
target_transform=target_transform,
1735+
dim_cols=list(self.dims),
1736+
)
1737+
# This is coupled with the name of the
1738+
# latent process Deterministic
1739+
time_varying_var_name = (
1740+
"media_latent_process" if self.time_varying_media else None
1741+
)
1742+
add_lift_measurements_to_likelihood_from_saturation(
1743+
df_lift_test=df_lift_test_scaled,
1744+
saturation=self.saturation,
1745+
time_varying_var_name=time_varying_var_name,
1746+
model=self.model,
1747+
dist=dist,
1748+
name=name,
1749+
)
1750+
15081751

15091752
def create_sample_kwargs(
15101753
sampler_config: dict[str, Any] | None,

0 commit comments

Comments
 (0)