1414"""Modified Beta-Geometric Negative Binomial Distribution (MBG/NBD) model for a non-contractual customer population across continuous time.""" # noqa: E501
1515
1616from collections .abc import Sequence
17+ from typing import Literal
1718
1819import numpy as np
1920import pandas as pd
@@ -57,6 +58,10 @@ class ModifiedBetaGeoModel(BetaGeoModel):
5758 * `b`: Shape parameter of dropout process; defaults to `1-phi_dropout` * `kappa_dropout`
5859 * `phi_dropout`: Nested prior for a and b priors; defaults to `Prior("Uniform", lower=0, upper=1)`
5960 * `kappa_dropout`: Nested prior for a and b priors; defaults to `Prior("Pareto", alpha=1, m=1)`
61+ * `purchase_covariates`: Coefficients for purchase rate covariates; defaults to `Normal(0, 1)`
62+ * `dropout_covariates`: Coefficients for dropout covariates; defaults to `Normal.dist(0, 1)`
63+ * `purchase_covariate_cols`: List containing column names of covariates for customer purchase rates.
64+ * `dropout_covariate_cols`: List containing column names of covariates for customer dropouts.
6065 sampler_config : dict, optional
6166 Dictionary of sampler parameters. Defaults to *None*.
6267
@@ -139,29 +144,131 @@ class ModifiedBetaGeoModel(BetaGeoModel):
139144 def build_model (self ) -> None : # type: ignore[override]
140145 """Build the model."""
141146 coords = {
147+ "purchase_covariate" : self .purchase_covariate_cols ,
148+ "dropout_covariate" : self .dropout_covariate_cols ,
142149 "customer_id" : self .data ["customer_id" ],
143150 "obs_var" : ["recency" , "frequency" ],
144151 }
145152 with pm .Model (coords = coords ) as self .model :
146153 # purchase rate priors
147- alpha = self .model_config ["alpha" ].create_variable ("alpha" )
148- r = self .model_config ["r" ].create_variable ("r" )
154+ if self .purchase_covariate_cols :
155+ purchase_data = pm .Data (
156+ "purchase_data" ,
157+ self .data [self .purchase_covariate_cols ],
158+ dims = ["customer_id" , "purchase_covariate" ],
159+ )
160+ self .model_config ["purchase_coefficient" ].dims = "purchase_covariate"
161+ purchase_coefficient_alpha = self .model_config [
162+ "purchase_coefficient"
163+ ].create_variable ("purchase_coefficient_alpha" )
164+
165+ alpha_scale = self .model_config ["alpha" ].create_variable ("alpha_scale" )
166+ alpha = pm .Deterministic (
167+ "alpha" ,
168+ (
169+ alpha_scale
170+ * pm .math .exp (
171+ - pm .math .dot (purchase_data , purchase_coefficient_alpha )
172+ )
173+ ),
174+ dims = "customer_id" ,
175+ )
176+ else :
177+ alpha = self .model_config ["alpha" ].create_variable ("alpha" )
149178
150179 # dropout priors
151180 if "a" in self .model_config and "b" in self .model_config :
152- a = self .model_config ["a" ].create_variable ("a" )
153- b = self .model_config ["b" ].create_variable ("b" )
181+ if self .dropout_covariate_cols :
182+ dropout_data = pm .Data (
183+ "dropout_data" ,
184+ self .data [self .dropout_covariate_cols ],
185+ dims = ["customer_id" , "dropout_covariate" ],
186+ )
187+
188+ self .model_config ["dropout_coefficient" ].dims = "dropout_covariate"
189+ dropout_coefficient_a = self .model_config [
190+ "dropout_coefficient"
191+ ].create_variable ("dropout_coefficient_a" )
192+ dropout_coefficient_b = self .model_config [
193+ "dropout_coefficient"
194+ ].create_variable ("dropout_coefficient_b" )
195+
196+ a_scale = self .model_config ["a" ].create_variable ("a_scale" )
197+ b_scale = self .model_config ["b" ].create_variable ("b_scale" )
198+ a = pm .Deterministic (
199+ "a" ,
200+ a_scale
201+ * pm .math .exp (pm .math .dot (dropout_data , dropout_coefficient_a )),
202+ dims = "customer_id" ,
203+ )
204+ b = pm .Deterministic (
205+ "b" ,
206+ b_scale
207+ * pm .math .exp (pm .math .dot (dropout_data , dropout_coefficient_b )),
208+ dims = "customer_id" ,
209+ )
210+ else :
211+ a = self .model_config ["a" ].create_variable ("a" )
212+ b = self .model_config ["b" ].create_variable ("b" )
154213 else :
155214 # hierarchical pooling of dropout rate priors
156- phi_dropout = self .model_config ["phi_dropout" ].create_variable (
157- "phi_dropout"
158- )
159- kappa_dropout = self .model_config ["kappa_dropout" ].create_variable (
160- "kappa_dropout"
161- )
162-
163- a = pm .Deterministic ("a" , phi_dropout * kappa_dropout )
164- b = pm .Deterministic ("b" , (1.0 - phi_dropout ) * kappa_dropout )
215+ if self .dropout_covariate_cols :
216+ dropout_data = pm .Data (
217+ "dropout_data" ,
218+ self .data [self .dropout_covariate_cols ],
219+ dims = ["customer_id" , "dropout_covariate" ],
220+ )
221+
222+ self .model_config ["dropout_coefficient" ].dims = "dropout_covariate"
223+ dropout_coefficient_a = self .model_config [
224+ "dropout_coefficient"
225+ ].create_variable ("dropout_coefficient_a" )
226+ dropout_coefficient_b = self .model_config [
227+ "dropout_coefficient"
228+ ].create_variable ("dropout_coefficient_b" )
229+
230+ phi_dropout = self .model_config ["phi_dropout" ].create_variable (
231+ "phi_dropout"
232+ )
233+ kappa_dropout = self .model_config ["kappa_dropout" ].create_variable (
234+ "kappa_dropout"
235+ )
236+
237+ a_scale = pm .Deterministic (
238+ "a_scale" ,
239+ phi_dropout * kappa_dropout ,
240+ )
241+ b_scale = pm .Deterministic (
242+ "b_scale" ,
243+ (1.0 - phi_dropout ) * kappa_dropout ,
244+ )
245+
246+ a = pm .Deterministic (
247+ "a" ,
248+ a_scale
249+ * pm .math .exp (pm .math .dot (dropout_data , dropout_coefficient_a )),
250+ dims = "customer_id" ,
251+ )
252+ b = pm .Deterministic (
253+ "b" ,
254+ b_scale
255+ * pm .math .exp (pm .math .dot (dropout_data , dropout_coefficient_b )),
256+ dims = "customer_id" ,
257+ )
258+
259+ else :
260+ phi_dropout = self .model_config ["phi_dropout" ].create_variable (
261+ "phi_dropout"
262+ )
263+ kappa_dropout = self .model_config ["kappa_dropout" ].create_variable (
264+ "kappa_dropout"
265+ )
266+
267+ a = pm .Deterministic ("a" , phi_dropout * kappa_dropout )
268+ b = pm .Deterministic ("b" , (1.0 - phi_dropout ) * kappa_dropout )
269+
270+ # r remains unchanged with or without covariates
271+ r = self .model_config ["r" ].create_variable ("r" )
165272
166273 ModifiedBetaGeoNBD (
167274 name = "recency_frequency" ,
@@ -346,10 +453,11 @@ def distribution_new_customer(
346453 * ,
347454 T : int | np .ndarray | pd .Series | None = None ,
348455 random_seed : RandomState | None = None ,
349- var_names : Sequence [str ] = ("dropout" , "purchase_rate" ),
456+ var_names : Sequence [
457+ Literal ["dropout" , "purchase_rate" , "recency_frequency" ]
458+ ] = ("dropout" , "purchase_rate" , "recency_frequency" ),
350459 n_samples : int = 1000 ,
351460 ) -> xarray .Dataset :
352- # TODO: This is extraneous now, until a new distribution block is added.
353461 """Compute posterior predictive samples of dropout, purchase rate and frequency/recency of new customers."""
354462 if data is None :
355463 data = self .data
@@ -369,14 +477,30 @@ def distribution_new_customer(
369477 coords = self .model .coords .copy () # type: ignore
370478 coords ["customer_id" ] = data ["customer_id" ]
371479
372- with pm .Model (coords = coords ):
373- a = pm .HalfFlat ("a" )
374- b = pm .HalfFlat ("b" )
375- alpha = pm .HalfFlat ("alpha" )
376- r = pm .HalfFlat ("r" )
480+ with pm .Model (coords = coords ) as pred_model :
481+ if self .purchase_covariate_cols :
482+ alpha = pm .Flat ("alpha" , dims = ["customer_id" ])
483+ else :
484+ alpha = pm .Flat ("alpha" )
485+
486+ if self .dropout_covariate_cols :
487+ a = pm .Flat ("a" , dims = ["customer_id" ])
488+ b = pm .Flat ("b" , dims = ["customer_id" ])
489+ else :
490+ a = pm .Flat ("a" )
491+ b = pm .Flat ("b" )
377492
378- pm .Beta ("dropout" , alpha = a , beta = b )
379- pm .Gamma ("purchase_rate" , alpha = r , beta = alpha )
493+ r = pm .Flat ("r" )
494+
495+ pm .Beta (
496+ "dropout" , alpha = a , beta = b , dims = pred_model .named_vars_to_dims .get ("a" )
497+ )
498+ pm .Gamma (
499+ "purchase_rate" ,
500+ alpha = r ,
501+ beta = alpha ,
502+ dims = pred_model .named_vars_to_dims .get ("alpha" ),
503+ )
380504
381505 ModifiedBetaGeoNBD (
382506 name = "recency_frequency" ,
0 commit comments