Skip to content

Commit eb4e713

Browse files
giuliabaldiniGiulia Baldini
andauthored
1 different results on windows for test dataset (#2)
* More specific output for test * Change the order of the initializers * Change int to sizet and remove unused variable * Cast size_t to int for comparison warning * Convert weights to size_t * Change old school cast to static cast * change everything to size_t for consinstency * Missed declaration * Change all points back to double * Fix some types * Add some static casts * Many more static conversions * Forgotten one definition * More static casts * Remove last warnings2 * Add git hasH * Fix workflow * Fix workflow * Syntax of git hash that works for pull and push --------- Co-authored-by: Giulia Baldini <giulia.baldini@hhu.de>
1 parent b4f5d3f commit eb4e713

File tree

11 files changed

+71
-67
lines changed

11 files changed

+71
-67
lines changed

.github/workflows/gitlab.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,8 @@ jobs:
2828
access_token: ${{ secrets.DEPLOY_ACCESS_TOKEN }}
2929
id: '3102'
3030
ref: 'main'
31-
variables: '{"PROJECT":"bico"}'
31+
variables: |
32+
{
33+
"PROJECT": "bico",
34+
"GIT_HASH": "${{ github.event.pull_request.head.sha || github.sha }}"
35+
}

bico/_core.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class BicoExternal
3535
virtual ~BicoExternal();
3636
void addData(double const *array, uint n);
3737
void addPoint(double const *array);
38-
int compute(int *sample_weights,
38+
size_t compute(double *sample_weights,
3939
double *points);
4040

4141
private:
@@ -66,7 +66,7 @@ void BicoExternal::addPoint(double const *array)
6666
*_bico << p;
6767
}
6868

69-
int BicoExternal::compute(int *sample_weights,
69+
size_t BicoExternal::compute(double *sample_weights,
7070
double *points)
7171
{
7272
// Retrieve coreset
@@ -82,7 +82,7 @@ int BicoExternal::compute(int *sample_weights,
8282
points[i * _d + j] = sol->proxysets[0][i][j];
8383
}
8484
}
85-
int m = sol->proxysets[0].size();
85+
size_t m = sol->proxysets[0].size();
8686
delete sol;
8787

8888
return m;
@@ -123,7 +123,7 @@ extern "C"
123123
#if defined(_WIN32) || defined(__CYGWIN__)
124124
__declspec(dllexport)
125125
#endif
126-
int compute(BicoExternal *bico, int *sample_weights,
126+
size_t compute(BicoExternal *bico, double *sample_weights,
127127
double *points) { return bico->compute(sample_weights, points); }
128128

129129
#if defined(_WIN32) || defined(__CYGWIN__)

bico/base/proxyprovider.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,15 @@ template<typename T> class ProxyProvider {
2424
* number of computed clusters, proxies (e.g. cluster centers) or the size of a coreset.
2525
* The sizes can be retrieved by a call to size_of_solution().
2626
*/
27-
virtual unsigned int number_of_solutions() const = 0;
27+
virtual size_t number_of_solutions() const = 0;
2828

2929
/**
3030
* @brief returns the size of a particular solution
3131
*
3232
* @param index number between 0 and @ref number_of_solutions()-1
3333
* @return the size for the requested clustering
3434
*/
35-
virtual unsigned int size_of_solution(unsigned int index) const = 0;
35+
virtual size_t size_of_solution(unsigned int index) const = 0;
3636

3737
/**
3838
* @brief returns the proxy for the specified clustering and cluster

bico/base/weightmodifier.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,4 @@ template<typename T> class WeightModifier
2828

2929
}
3030

31-
#endif
31+
#endif

bico/clustering/bico.h

Lines changed: 45 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -143,34 +143,34 @@ template<typename T> class Bico : public StreamingAlgorithm<T>
143143
int bucket_min = bucket_number;
144144
int mins;
145145

146-
if ((bucket_number < 0) || (bucket_number > outer.buckets[0].size() - 1))
146+
if ((bucket_number < 0) || (bucket_number > static_cast<int>(outer.buckets[0].size()) - 1))
147147
{
148148
// The bucket does not exist (yet)
149149
mins = 0;
150150
}
151151
else
152152
{
153153
// Search for the projection with smallest bucket size
154-
mins = outer.buckets[mini][bucket_min].size();
155-
for (int i = 1; i < outer.L; i++)
154+
mins = static_cast<int>(outer.buckets[mini][bucket_min].size());
155+
for (size_t i = 1; i < outer.L; i++)
156156
{
157-
val = outer.project(element, i);
158-
bucket_number = outer.calcBucketNumber(i, val);
159-
if ((bucket_number >= 0) & (bucket_number <= outer.buckets[i].size() - 1))
157+
val = outer.project(element, static_cast<int>(i));
158+
bucket_number = outer.calcBucketNumber(static_cast<int>(i), val);
159+
if ((bucket_number >= 0) & (bucket_number <= static_cast<int>(outer.buckets[i].size()) - 1))
160160
{
161-
int s = outer.buckets[i][bucket_number].size();
161+
int s = static_cast<int>(outer.buckets[i][bucket_number].size());
162162
if (s < mins)
163163
{
164164
mins = s;
165165
bucket_min = bucket_number;
166-
mini = i;
166+
mini = static_cast<int>(i);
167167
}
168168
}
169169
else
170170
{
171171
mins = 0;
172172
bucket_min = bucket_number;
173-
mini = i;
173+
mini = static_cast<int>(i);
174174
break;
175175
}
176176
}
@@ -185,7 +185,7 @@ template<typename T> class Bico : public StreamingAlgorithm<T>
185185
// Bucket does not exist => create one
186186
outer.allocateBucket(rnd, true);
187187
}
188-
else if (bucket_number > outer.buckets[rnd].size() - 1)
188+
else if (bucket_number > static_cast<int>(outer.buckets[rnd].size()) - 1)
189189
{
190190
// Bucket does not exist => create one
191191
outer.allocateBucket(rnd, false);
@@ -524,22 +524,22 @@ template<template <typename> class P = std::less> struct comparePairFirst
524524

525525
template<typename T> Bico<T>::Bico(size_t dim, size_t k, size_t p, size_t nMax, int seed,
526526
DissimilarityMeasure<T>* measure, WeightModifier<T>* weightModifier) :
527+
k(k),
528+
L(p),
527529
nodeIdCounter(0),
528530
measure(measure->clone()),
529531
weightModifier(weightModifier->clone()),
530532
maxNumOfCFs(nMax),
531533
curNumOfCFs(0),
532-
k(k),
533-
L(p),
534+
dimension(dim),
534535
optEst(-1),
535536
root(new BicoNode(*this)),
536537
bufferPhase(true),
537-
numOfRebuilds(0),
538538
buffer(),
539539
projection_buffer(),
540540
minDist(std::numeric_limits<double>::infinity()),
541541
pairwise_different(0),
542-
dimension(dim)
542+
numOfRebuilds(0)
543543
{
544544
Randomness::initialize(seed);
545545
RandomGenerator rg = Randomness::getRandomGenerator();
@@ -558,8 +558,8 @@ dimension(dim)
558558
// To have the same results, we cache values pair-wise,
559559
// then return them in swapped order to put them in the same order.
560560
size_t i = 0;
561-
float vals[2] = { };
562-
auto getRandomValue = [&]() -> float {
561+
double vals[2] = { };
562+
auto getRandomValue = [&]() -> double {
563563
if (! (i % 2)) {
564564
vals[0] = realDist(rg);
565565
vals[1] = realDist(rg);
@@ -568,17 +568,17 @@ dimension(dim)
568568
};
569569
#endif
570570

571-
for (int i = 0; i < L; i++)
571+
for (size_t i = 0; i < L; i++)
572572
{
573573
maxVal[i] = -1;
574574
norm = 0.0;
575-
for (int j = 0; j < dimension; j++)
575+
for (size_t j = 0; j < dimension; j++)
576576
{
577577
rndpoint[j] = getRandomValue();
578578
norm += rndpoint[j] * rndpoint[j];
579579
}
580580
norm = std::sqrt(norm);
581-
for (int j = 0; j < dimension; j++)
581+
for (size_t j = 0; j < dimension; j++)
582582
{
583583
rndpoint[j] /= norm;
584584
}
@@ -604,7 +604,7 @@ template<typename T> void Bico<T>::initializeNN()
604604
{
605605
double maxBuckets = 10000;
606606
double Size = 0;
607-
for (int i = 0; i < L; i++)
607+
for (size_t i = 0; i < L; i++)
608608
{
609609
// Compute new bucket size
610610
if (buckets[i].size() == 1)
@@ -613,15 +613,15 @@ template<typename T> void Bico<T>::initializeNN()
613613
}
614614
else
615615
{
616-
bucket_radius[i] = (long long int) ceil(sqrt(getR(1)));
616+
bucket_radius[i] = (double) ceil(sqrt(getR(1)));
617617
Size = (int) ceil((borders[i].second - borders[i].first) / (double) bucket_radius[i]);
618618
if(Size < 0 || Size > maxBuckets)
619619
{
620620
bucket_radius[i] = (borders[i].second - borders[i].first) / maxBuckets;
621621
Size = (int) ceil((borders[i].second - borders[i].first) / (double) bucket_radius[i]);
622622
}
623623
}
624-
for (int l = 0; l < buckets[i].size(); l++) buckets[i][l].clear();
624+
for (size_t l = 0; l < buckets[i].size(); l++) buckets[i][l].clear();
625625
// Create new buckets
626626
buckets[i].clear();
627627
buckets[i].resize((int) ceil(Size));
@@ -635,11 +635,11 @@ template<typename T> void Bico<T>::allocateBucket(int bucket, bool left)
635635
// Push front bucket
636636
borders[bucket].first = 2 * borders[bucket].first - borders[bucket].second;
637637
std::vector < std::vector<typename BicoNode::FeatureList::iterator >> a(2 * buckets[bucket].size());
638-
for (int i = 0; i < buckets[bucket].size(); i++)
638+
for (size_t i = 0; i < buckets[bucket].size(); i++)
639639
{
640640
a[buckets[bucket].size() + i] = buckets[bucket][i];
641641
}
642-
for (int l = 0; l < buckets[bucket].size(); l++) buckets[bucket][l].clear();
642+
for (size_t l = 0; l < buckets[bucket].size(); l++) buckets[bucket][l].clear();
643643
buckets[bucket].clear();
644644
buckets[bucket] = a;
645645
}
@@ -648,11 +648,11 @@ template<typename T> void Bico<T>::allocateBucket(int bucket, bool left)
648648
// Push back bucket
649649
borders[bucket].second = 2 * borders[bucket].second - borders[bucket].first;
650650
std::vector < std::vector<typename BicoNode::FeatureList::iterator >> a(2 * buckets[bucket].size());
651-
for (int i = 0; i < buckets[bucket].size(); i++)
651+
for (size_t i = 0; i < buckets[bucket].size(); i++)
652652
{
653653
a[i] = buckets[bucket][i];
654654
}
655-
for (int l = 0; l < buckets[bucket].size(); l++) buckets[bucket][l].clear();
655+
for (size_t l = 0; l < buckets[bucket].size(); l++) buckets[bucket][l].clear();
656656
buckets[bucket].clear();
657657
buckets[bucket] = a;
658658
}
@@ -661,7 +661,7 @@ template<typename T> void Bico<T>::allocateBucket(int bucket, bool left)
661661
template<typename T> double Bico<T>::project(T point, int i)
662662
{
663663
double ip = 0.0;
664-
for (int j = 0; j < dimension; j++)
664+
for (size_t j = 0; j < dimension; j++)
665665
{
666666
ip += point[j]*(rndprojections[i][j]);
667667
}
@@ -689,7 +689,7 @@ template<typename T> void Bico<T>::computeTraverse(BicoNode* node, ProxySolution
689689
for (auto it = node->begin(); it != node->end(); ++it)
690690
{
691691
T point(it->first.cog());
692-
weightModifier->setWeight(point, it->first.number);
692+
weightModifier->setWeight(point, static_cast<double>(it->first.number));
693693
solution->proxysets[0].push_back(point);
694694
computeTraverse(it->second, solution);
695695
}
@@ -700,9 +700,9 @@ template<typename T> Bico<T>& Bico<T>::operator<<(T const & element)
700700
if (bufferPhase)
701701
{
702702
// Update bucket configuration
703-
for (int i = 0; i < L; i++)
703+
for (size_t i = 0; i < L; i++)
704704
{
705-
double val = std::abs(project(element, i));
705+
double val = std::abs(project(element, static_cast<int>(i)));
706706
if (val > maxVal[i] || maxVal[i] == -1)
707707
{
708708
maxVal[i] = val;
@@ -722,14 +722,12 @@ template<typename T> Bico<T>& Bico<T>::operator<<(T const & element)
722722
{
723723
// Sort projection values and determine smallest distance on projection line
724724
std::sort(projection_buffer.begin(), projection_buffer.end(), comparePairFirst<>());
725-
double minProjDist = std::numeric_limits<double>::infinity();
726725
double minProjRealDist = std::numeric_limits<double>::infinity();
727-
for(int i = 0; i < pairwise_different-2; ++i)
726+
for(size_t i = 0; i < pairwise_different-2; ++i)
728727
{
729728
double tmpDist = projection_buffer[i+1].first - projection_buffer[i].first;
730729
if(tmpDist < minProjRealDist)
731730
{
732-
minProjDist = tmpDist;
733731
double tmpMinProjRealDist = measure->dissimilarity(*projection_buffer[i].second, *projection_buffer[i+1].second);
734732
if(tmpMinProjRealDist > 0)
735733
{
@@ -744,11 +742,11 @@ template<typename T> Bico<T>& Bico<T>::operator<<(T const & element)
744742
double lowerEnd = projection_buffer[0].first;
745743
double upperEnd = lowerEnd + minProjRealDist;
746744
double minDist = minProjRealDist;
747-
for(int i = 0; i < pairwise_different-1; ++i)
745+
for(size_t i = 0; i < pairwise_different-1; ++i)
748746
{
749747
if(projection_buffer[i].first >= upperEnd)
750748
{
751-
upperIndex = i;
749+
upperIndex = static_cast<int>(i);
752750

753751
for(int j = lowerIndex; j < upperIndex; ++j)
754752
{
@@ -763,7 +761,7 @@ template<typename T> Bico<T>& Bico<T>::operator<<(T const & element)
763761
}
764762
}
765763

766-
lowerIndex = i;
764+
lowerIndex = static_cast<int>(i);
767765
lowerEnd = projection_buffer[i].first;
768766
upperEnd = lowerEnd + minProjRealDist;
769767
}
@@ -773,9 +771,9 @@ template<typename T> Bico<T>& Bico<T>::operator<<(T const & element)
773771
optEst = 16.0 * minDist;
774772
//std::cout << "minDist = " << minDist << std::endl;
775773
//std::cout << "optEst = " << minDist << std::endl;
776-
long long int radius = (long long int) ceil(sqrt(getR(1)));
774+
double radius = (double) ceil(sqrt(getR(1)));
777775
borders.resize(L);
778-
for (int i = 0; i < L; i++)
776+
for (size_t i = 0; i < L; i++)
779777
{
780778
borders[i].first = -maxVal[i];
781779
borders[i].second = maxVal[i];
@@ -797,25 +795,25 @@ template<typename T> Bico<T>& Bico<T>::operator<<(T const & element)
797795

798796
template<typename T> void Bico<T>::insertIntoNN(typename BicoNode::FeatureList::iterator iteratorElement)
799797
{
800-
for (int i = 0; i < L; i++)
798+
for (size_t i = 0; i < L; i++)
801799
{
802-
double val = project(iteratorElement->first.representative, i);
803-
int bucket_number = calcBucketNumber(i, val);
800+
double val = project(iteratorElement->first.representative, static_cast<int>(i));
801+
int bucket_number = calcBucketNumber(static_cast<int>(i), val);
804802

805803
if (bucket_number < 0)
806804
{
807805
while (bucket_number < 0)
808806
{
809-
allocateBucket(i, true);
810-
bucket_number = calcBucketNumber(i, val);
807+
allocateBucket(static_cast<int>(i), true);
808+
bucket_number = calcBucketNumber(static_cast<int>(i), val);
811809
}
812810
}
813-
else if (bucket_number > buckets[i].size() - 1)
811+
else if (bucket_number > static_cast<int>(buckets[i].size()) - 1)
814812
{
815-
while (bucket_number > buckets[i].size() - 1)
813+
while (bucket_number > static_cast<int>(buckets[i].size()) - 1)
816814
{
817-
allocateBucket(i, false);
818-
bucket_number = calcBucketNumber(i, val);
815+
allocateBucket(static_cast<int>(i), false);
816+
bucket_number = calcBucketNumber(static_cast<int>(i), val);
819817
}
820818
}
821819
buckets[i][bucket_number].push_back(iteratorElement);

bico/clustering/cfentry.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ template<typename T> void CFEntry<T>::insert(T const & x)
108108
WeightedObject const * wm = static_cast<WeightedObject const *>(&x);
109109
weight = wm->getWeight();
110110
}
111-
number += weight;
111+
number += static_cast<size_t>(weight);
112112
LS += weight * x;
113113
SS += weight * (x*x);
114114
}
@@ -121,7 +121,7 @@ template<typename T> void CFEntry<T>::remove(T const & x)
121121
WeightedObject const * wm = static_cast<WeightedObject const *>(&x);
122122
weight = wm->getWeight();
123123
}
124-
number -= weight;
124+
number -= static_cast<size_t>(weight);
125125
LS -= weight * x;
126126
SS -= weight * (x*x);
127127
}
@@ -138,4 +138,4 @@ template<typename T> double CFEntry<T>::kMeansCost(T const & center)
138138

139139
}
140140

141-
#endif
141+
#endif

bico/core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,10 @@ def _compute_coreset(self, fit_coreset: bool = False) -> "BICO":
136136
"This BICO instance is not fitted yet. " "Call `fit` or `partial_fit`."
137137
)
138138

139-
c_coreset_weights = (ctypes.c_int * self.summary_size)()
139+
c_coreset_weights = (ctypes.c_double * self.summary_size)()
140140
c_points = (ctypes.c_double * self.n_features_in_ * self.summary_size)()
141141

142-
_DLL.compute.restype = ctypes.c_int
142+
_DLL.compute.restype = ctypes.c_size_t
143143
n_found_points = _DLL.compute(self.bico_obj_, c_coreset_weights, c_points)
144144

145145
self._coreset_weights: np.ndarray = np.ctypeslib.as_array(c_coreset_weights)[

0 commit comments

Comments
 (0)