Skip to content

Commit 9cfcd46

Browse files
committed
feat:First version of &categorize-to-intervals.
1 parent 4a9b1af commit 9cfcd46

File tree

1 file changed

+52
-0
lines changed

1 file changed

+52
-0
lines changed

lib/ML/SparseMatrixRecommender/Utilities.rakumod

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use v6.d;
22

3+
use Statistics::Distributions::Utilities;
4+
35
unit module ML::SparseMatrixRecommender::Utilities;
46

57
#| Get the Titanic dataset. Returns an array of hashmaps.
@@ -47,3 +49,53 @@ our sub convert-to-wide-form(
4749

4850
return @dsDataWide;
4951
}
52+
53+
#==========================================================
54+
# Categorize to intervals
55+
#==========================================================
56+
our sub categorize-to-intervals(
57+
@vec,
58+
:$breaks is copy = Whatever,
59+
:$probs is copy = Whatever,
60+
Bool :$interval-names = False) returns List {
61+
# Validate input vector
62+
die "The first argument is expected to be an array of numeric values."
63+
unless @vec.all ~~ Numeric:D;
64+
65+
# Handle probabilities
66+
my @mprobs = do if $probs.isa(Whatever) {
67+
(^11) >>/>> 10;
68+
} elsif $probs ~~ (Array:D | List:D | Seq:D) && $probs.all ~~ Numeric:D {
69+
$probs.unique.sort
70+
} else {
71+
die 'The $probs argument is expected to be a list of probabilities or Whatever.'
72+
}
73+
74+
# Determine breaks
75+
my @mbreaks = do if $breaks.isa(Whatever) {
76+
my @q = Statistics::Distributions::Utilities::quantile(@vec, @mprobs);
77+
@q.unique.sort;
78+
} elsif $breaks ~~ (Array:D | List:D | Seq:D) && $breaks.all ~~ Numeric:D {
79+
$breaks.grep(Numeric).unique.sort;
80+
} else {
81+
die 'The $breaks argument is expected to be a list numbers or Whatever.'
82+
}
83+
84+
die "Need at least two distinct break points to define intervals"
85+
unless @mbreaks2;
86+
87+
# Categorize each value using binary search equivalent
88+
my @res = Statistics::Distributions::Utilities::find-interval(@vec, @mbreaks);
89+
90+
# Interval names, if specified
91+
if $interval-names {
92+
my @names = @mbreaks.rotor(2 => -1).map({"{$_.head}≤v<{$_.tail}"});
93+
@names.push: "{@mbreaks.tail}≤v<∞";
94+
95+
@res = @res.map: -> $i {
96+
$i < @names.elems ?? @names[$i] !! @names.tail
97+
}
98+
}
99+
100+
return @res;
101+
}

0 commit comments

Comments
 (0)