@@ -418,6 +418,132 @@ def test_sample_posterior_predictive_same_data(single_dim_data, mock_pymc_sample
418418 )
419419
420420
421+ def test_sample_posterior_predictive_same_data_with_include_last_observations (
422+ single_dim_data , mock_pymc_sample
423+ ):
424+ """
425+ Test that using include_last_observations=True with training data (overlapping dates)
426+ raises a ValueError with a clear error message.
427+ """
428+ X , y = single_dim_data
429+ X_train = X .iloc [:- 5 ]
430+ y_train = y .iloc [:- 5 ]
431+
432+ # Build and fit the model
433+ adstock = GeometricAdstock (l_max = 2 )
434+ saturation = LogisticSaturation ()
435+
436+ mmm = MMM (
437+ date_column = "date" ,
438+ target_column = "target" ,
439+ channel_columns = ["channel_1" , "channel_2" , "channel_3" ],
440+ adstock = adstock ,
441+ saturation = saturation ,
442+ )
443+
444+ mmm .build_model (X_train , y_train )
445+ mmm .fit (X_train , y_train , draws = 200 , tune = 100 , chains = 1 , random_seed = 123 )
446+
447+ # Try to use include_last_observations=True with the same training data
448+ # This should raise a ValueError
449+ with pytest .raises (
450+ ValueError ,
451+ match = "Cannot use include_last_observations=True when input dates overlap with training dates" ,
452+ ):
453+ mmm .sample_posterior_predictive (
454+ X_train , # Same training data
455+ include_last_observations = True , # This should trigger the error
456+ extend_idata = False ,
457+ random_seed = 123 ,
458+ )
459+
460+
461+ def test_sample_posterior_predictive_partial_overlap_with_include_last_observations (
462+ single_dim_data , mock_pymc_sample
463+ ):
464+ """
465+ Test that even partial date overlap with include_last_observations=True raises ValueError.
466+ """
467+ X , y = single_dim_data
468+ X_train = X .iloc [:- 5 ]
469+ y_train = y .iloc [:- 5 ]
470+
471+ # Build and fit the model
472+ adstock = GeometricAdstock (l_max = 2 )
473+ saturation = LogisticSaturation ()
474+
475+ mmm = MMM (
476+ date_column = "date" ,
477+ target_column = "target" ,
478+ channel_columns = ["channel_1" , "channel_2" , "channel_3" ],
479+ adstock = adstock ,
480+ saturation = saturation ,
481+ )
482+
483+ mmm .build_model (X_train , y_train )
484+ mmm .fit (X_train , y_train , draws = 200 , tune = 100 , chains = 1 , random_seed = 123 )
485+
486+ # Create data that partially overlaps with training data
487+ # Take the last 3 training dates + 3 new future dates
488+ overlap_data = X .iloc [- 8 :- 2 ] # This will include some training dates
489+
490+ # This should raise a ValueError due to partial overlap
491+ with pytest .raises (
492+ ValueError ,
493+ match = "Cannot use include_last_observations=True when input dates overlap with training dates" ,
494+ ):
495+ mmm .sample_posterior_predictive (
496+ overlap_data ,
497+ include_last_observations = True ,
498+ extend_idata = False ,
499+ random_seed = 123 ,
500+ )
501+
502+
503+ def test_sample_posterior_predictive_no_overlap_with_include_last_observations (
504+ single_dim_data , mock_pymc_sample
505+ ):
506+ """
507+ Test that include_last_observations=True works correctly when there's no date overlap.
508+ """
509+ X , y = single_dim_data
510+ X_train = X .iloc [:- 5 ]
511+ X_new = X .iloc [- 5 :] # Non-overlapping future dates
512+ y_train = y .iloc [:- 5 ]
513+
514+ # Build and fit the model
515+ adstock = GeometricAdstock (l_max = 2 )
516+ saturation = LogisticSaturation ()
517+
518+ mmm = MMM (
519+ date_column = "date" ,
520+ target_column = "target" ,
521+ channel_columns = ["channel_1" , "channel_2" , "channel_3" ],
522+ adstock = adstock ,
523+ saturation = saturation ,
524+ )
525+
526+ mmm .build_model (X_train , y_train )
527+ mmm .fit (X_train , y_train , draws = 200 , tune = 100 , chains = 1 , random_seed = 123 )
528+
529+ # This should work fine since dates don't overlap
530+ try :
531+ result = mmm .sample_posterior_predictive (
532+ X_new , # Non-overlapping dates
533+ include_last_observations = True , # Should work fine
534+ extend_idata = False ,
535+ random_seed = 123 ,
536+ )
537+
538+ # Verify that the result includes the expected dates
539+ # (should be l_max training dates + new prediction dates, then sliced to remove l_max)
540+ expected_dates = X_new ["date" ].values
541+ np .testing .assert_array_equal (result .coords ["date" ].values , expected_dates )
542+
543+ except ValueError as e :
544+ pytest .fail (f"Unexpected error when using non-overlapping dates: { e } " )
545+
546+
421547@pytest .fixture
422548def df_events () -> pd .DataFrame :
423549 return pd .DataFrame (
0 commit comments