11use v6.d ;
22
3+ use Statistics::Distributions::Utilities;
4+
35unit module ML::SparseMatrixRecommender::Utilities ;
46
57# | Get the Titanic dataset. Returns an array of hashmaps.
@@ -47,3 +49,53 @@ our sub convert-to-wide-form(
4749
4850 return @ dsDataWide ;
4951}
52+
53+ # ==========================================================
54+ # Categorize to intervals
55+ # ==========================================================
56+ our sub categorize-to-intervals (
57+ @ vec ,
58+ : $ breaks is copy = Whatever ,
59+ : $ probs is copy = Whatever ,
60+ Bool : $ interval-names = False ) returns List {
61+ # Validate input vector
62+ die " The first argument is expected to be an array of numeric values."
63+ unless @ vec . all ~~ Numeric : D ;
64+
65+ # Handle probabilities
66+ my @ mprobs = do if $ probs . isa (Whatever ) {
67+ (^ 11 ) >>/>> 10 ;
68+ } elsif $ probs ~~ (Array : D | List : D | Seq : D ) && $ probs . all ~~ Numeric : D {
69+ $ probs . unique . sort
70+ } else {
71+ die ' The $probs argument is expected to be a list of probabilities or Whatever.'
72+ }
73+
74+ # Determine breaks
75+ my @ mbreaks = do if $ breaks . isa (Whatever ) {
76+ my @ q = Statistics::Distributions::Utilities::quantile(@ vec , @ mprobs );
77+ @ q . unique . sort ;
78+ } elsif $ breaks ~~ (Array : D | List : D | Seq : D ) && $ breaks . all ~~ Numeric : D {
79+ $ breaks . grep (Numeric ). unique . sort ;
80+ } else {
81+ die ' The $breaks argument is expected to be a list numbers or Whatever.'
82+ }
83+
84+ die " Need at least two distinct break points to define intervals"
85+ unless @ mbreaks ≥ 2 ;
86+
87+ # Categorize each value using binary search equivalent
88+ my @ res = Statistics::Distributions::Utilities::find-interval(@ vec , @ mbreaks );
89+
90+ # Interval names, if specified
91+ if $ interval-names {
92+ my @ names = @ mbreaks . rotor(2 => -1 ). map ({" { $ _ . head} ≤v<{ $ _ . tail } " });
93+ @ names . push : " { @ mbreaks . tail } ≤v<∞" ;
94+
95+ @ res = @ res . map : -> $ i {
96+ $ i < @ names . elems ?? @ names [$ i ] !! @ names . tail
97+ }
98+ }
99+
100+ return @ res ;
101+ }
0 commit comments