Skip to content

Commit 7a30d18

Browse files
authored
Handle static covariates in ModifiedBetaGeoModel (#1815)
1 parent fb8eff5 commit 7a30d18

File tree

3 files changed

+543
-26
lines changed

3 files changed

+543
-26
lines changed

pymc_marketing/clv/models/beta_geo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ class BetaGeoModel(CLVModel):
6161
* `b`: Shape parameter of dropout process; defaults to `1-phi_dropout` * `kappa_dropout`
6262
* `phi_dropout`: Nested prior for a and b priors; defaults to `Prior("Uniform", lower=0, upper=1)`
6363
* `kappa_dropout`: Nested prior for a and b priors; defaults to `Prior("Pareto", alpha=1, m=1)`
64-
* `purchase_covariates`: Coefficients for purchase rate covariates; defaults to `Normal(0, 3)`
65-
* `dropout_covariates`: Coefficients for dropout covariates; defaults to `Normal.dist(0, 3)`
64+
* `purchase_covariates`: Coefficients for purchase rate covariates; defaults to `Normal(0, 1)`
65+
* `dropout_covariates`: Coefficients for dropout covariates; defaults to `Normal.dist(0, 1)`
6666
* `purchase_covariate_cols`: List containing column names of covariates for customer purchase rates.
6767
* `dropout_covariate_cols`: List containing column names of covariates for customer dropouts.
6868
sampler_config : dict, optional

pymc_marketing/clv/models/modified_beta_geo.py

Lines changed: 146 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"""Modified Beta-Geometric Negative Binomial Distribution (MBG/NBD) model for a non-contractual customer population across continuous time.""" # noqa: E501
1515

1616
from collections.abc import Sequence
17+
from typing import Literal
1718

1819
import numpy as np
1920
import pandas as pd
@@ -57,6 +58,10 @@ class ModifiedBetaGeoModel(BetaGeoModel):
5758
* `b`: Shape parameter of dropout process; defaults to `1-phi_dropout` * `kappa_dropout`
5859
* `phi_dropout`: Nested prior for a and b priors; defaults to `Prior("Uniform", lower=0, upper=1)`
5960
* `kappa_dropout`: Nested prior for a and b priors; defaults to `Prior("Pareto", alpha=1, m=1)`
61+
* `purchase_covariates`: Coefficients for purchase rate covariates; defaults to `Normal(0, 1)`
62+
* `dropout_covariates`: Coefficients for dropout covariates; defaults to `Normal.dist(0, 1)`
63+
* `purchase_covariate_cols`: List containing column names of covariates for customer purchase rates.
64+
* `dropout_covariate_cols`: List containing column names of covariates for customer dropouts.
6065
sampler_config : dict, optional
6166
Dictionary of sampler parameters. Defaults to *None*.
6267
@@ -139,29 +144,131 @@ class ModifiedBetaGeoModel(BetaGeoModel):
139144
def build_model(self) -> None: # type: ignore[override]
140145
"""Build the model."""
141146
coords = {
147+
"purchase_covariate": self.purchase_covariate_cols,
148+
"dropout_covariate": self.dropout_covariate_cols,
142149
"customer_id": self.data["customer_id"],
143150
"obs_var": ["recency", "frequency"],
144151
}
145152
with pm.Model(coords=coords) as self.model:
146153
# purchase rate priors
147-
alpha = self.model_config["alpha"].create_variable("alpha")
148-
r = self.model_config["r"].create_variable("r")
154+
if self.purchase_covariate_cols:
155+
purchase_data = pm.Data(
156+
"purchase_data",
157+
self.data[self.purchase_covariate_cols],
158+
dims=["customer_id", "purchase_covariate"],
159+
)
160+
self.model_config["purchase_coefficient"].dims = "purchase_covariate"
161+
purchase_coefficient_alpha = self.model_config[
162+
"purchase_coefficient"
163+
].create_variable("purchase_coefficient_alpha")
164+
165+
alpha_scale = self.model_config["alpha"].create_variable("alpha_scale")
166+
alpha = pm.Deterministic(
167+
"alpha",
168+
(
169+
alpha_scale
170+
* pm.math.exp(
171+
-pm.math.dot(purchase_data, purchase_coefficient_alpha)
172+
)
173+
),
174+
dims="customer_id",
175+
)
176+
else:
177+
alpha = self.model_config["alpha"].create_variable("alpha")
149178

150179
# dropout priors
151180
if "a" in self.model_config and "b" in self.model_config:
152-
a = self.model_config["a"].create_variable("a")
153-
b = self.model_config["b"].create_variable("b")
181+
if self.dropout_covariate_cols:
182+
dropout_data = pm.Data(
183+
"dropout_data",
184+
self.data[self.dropout_covariate_cols],
185+
dims=["customer_id", "dropout_covariate"],
186+
)
187+
188+
self.model_config["dropout_coefficient"].dims = "dropout_covariate"
189+
dropout_coefficient_a = self.model_config[
190+
"dropout_coefficient"
191+
].create_variable("dropout_coefficient_a")
192+
dropout_coefficient_b = self.model_config[
193+
"dropout_coefficient"
194+
].create_variable("dropout_coefficient_b")
195+
196+
a_scale = self.model_config["a"].create_variable("a_scale")
197+
b_scale = self.model_config["b"].create_variable("b_scale")
198+
a = pm.Deterministic(
199+
"a",
200+
a_scale
201+
* pm.math.exp(pm.math.dot(dropout_data, dropout_coefficient_a)),
202+
dims="customer_id",
203+
)
204+
b = pm.Deterministic(
205+
"b",
206+
b_scale
207+
* pm.math.exp(pm.math.dot(dropout_data, dropout_coefficient_b)),
208+
dims="customer_id",
209+
)
210+
else:
211+
a = self.model_config["a"].create_variable("a")
212+
b = self.model_config["b"].create_variable("b")
154213
else:
155214
# hierarchical pooling of dropout rate priors
156-
phi_dropout = self.model_config["phi_dropout"].create_variable(
157-
"phi_dropout"
158-
)
159-
kappa_dropout = self.model_config["kappa_dropout"].create_variable(
160-
"kappa_dropout"
161-
)
162-
163-
a = pm.Deterministic("a", phi_dropout * kappa_dropout)
164-
b = pm.Deterministic("b", (1.0 - phi_dropout) * kappa_dropout)
215+
if self.dropout_covariate_cols:
216+
dropout_data = pm.Data(
217+
"dropout_data",
218+
self.data[self.dropout_covariate_cols],
219+
dims=["customer_id", "dropout_covariate"],
220+
)
221+
222+
self.model_config["dropout_coefficient"].dims = "dropout_covariate"
223+
dropout_coefficient_a = self.model_config[
224+
"dropout_coefficient"
225+
].create_variable("dropout_coefficient_a")
226+
dropout_coefficient_b = self.model_config[
227+
"dropout_coefficient"
228+
].create_variable("dropout_coefficient_b")
229+
230+
phi_dropout = self.model_config["phi_dropout"].create_variable(
231+
"phi_dropout"
232+
)
233+
kappa_dropout = self.model_config["kappa_dropout"].create_variable(
234+
"kappa_dropout"
235+
)
236+
237+
a_scale = pm.Deterministic(
238+
"a_scale",
239+
phi_dropout * kappa_dropout,
240+
)
241+
b_scale = pm.Deterministic(
242+
"b_scale",
243+
(1.0 - phi_dropout) * kappa_dropout,
244+
)
245+
246+
a = pm.Deterministic(
247+
"a",
248+
a_scale
249+
* pm.math.exp(pm.math.dot(dropout_data, dropout_coefficient_a)),
250+
dims="customer_id",
251+
)
252+
b = pm.Deterministic(
253+
"b",
254+
b_scale
255+
* pm.math.exp(pm.math.dot(dropout_data, dropout_coefficient_b)),
256+
dims="customer_id",
257+
)
258+
259+
else:
260+
phi_dropout = self.model_config["phi_dropout"].create_variable(
261+
"phi_dropout"
262+
)
263+
kappa_dropout = self.model_config["kappa_dropout"].create_variable(
264+
"kappa_dropout"
265+
)
266+
267+
a = pm.Deterministic("a", phi_dropout * kappa_dropout)
268+
b = pm.Deterministic("b", (1.0 - phi_dropout) * kappa_dropout)
269+
270+
# r remains unchanged with or without covariates
271+
r = self.model_config["r"].create_variable("r")
165272

166273
ModifiedBetaGeoNBD(
167274
name="recency_frequency",
@@ -346,10 +453,11 @@ def distribution_new_customer(
346453
*,
347454
T: int | np.ndarray | pd.Series | None = None,
348455
random_seed: RandomState | None = None,
349-
var_names: Sequence[str] = ("dropout", "purchase_rate"),
456+
var_names: Sequence[
457+
Literal["dropout", "purchase_rate", "recency_frequency"]
458+
] = ("dropout", "purchase_rate", "recency_frequency"),
350459
n_samples: int = 1000,
351460
) -> xarray.Dataset:
352-
# TODO: This is extraneous now, until a new distribution block is added.
353461
"""Compute posterior predictive samples of dropout, purchase rate and frequency/recency of new customers."""
354462
if data is None:
355463
data = self.data
@@ -369,14 +477,30 @@ def distribution_new_customer(
369477
coords = self.model.coords.copy() # type: ignore
370478
coords["customer_id"] = data["customer_id"]
371479

372-
with pm.Model(coords=coords):
373-
a = pm.HalfFlat("a")
374-
b = pm.HalfFlat("b")
375-
alpha = pm.HalfFlat("alpha")
376-
r = pm.HalfFlat("r")
480+
with pm.Model(coords=coords) as pred_model:
481+
if self.purchase_covariate_cols:
482+
alpha = pm.Flat("alpha", dims=["customer_id"])
483+
else:
484+
alpha = pm.Flat("alpha")
485+
486+
if self.dropout_covariate_cols:
487+
a = pm.Flat("a", dims=["customer_id"])
488+
b = pm.Flat("b", dims=["customer_id"])
489+
else:
490+
a = pm.Flat("a")
491+
b = pm.Flat("b")
377492

378-
pm.Beta("dropout", alpha=a, beta=b)
379-
pm.Gamma("purchase_rate", alpha=r, beta=alpha)
493+
r = pm.Flat("r")
494+
495+
pm.Beta(
496+
"dropout", alpha=a, beta=b, dims=pred_model.named_vars_to_dims.get("a")
497+
)
498+
pm.Gamma(
499+
"purchase_rate",
500+
alpha=r,
501+
beta=alpha,
502+
dims=pred_model.named_vars_to_dims.get("alpha"),
503+
)
380504

381505
ModifiedBetaGeoNBD(
382506
name="recency_frequency",

0 commit comments

Comments
 (0)