Skip to content

Commit 65e9022

Browse files
committed
remove usage of non-standard types in std::char_traits
1 parent cbdf843 commit 65e9022

20 files changed

+246
-145
lines changed

extras/rapidfuzz_amalgamated.hpp

Lines changed: 92 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
22
// SPDX-License-Identifier: MIT
33
// RapidFuzz v1.0.2
4-
// Generated: 2024-07-02 16:47:26.932914
4+
// Generated: 2024-10-24 12:06:59.588890
55
// ----------------------------------------------------------
66
// This file is an amalgamation of multiple different files.
77
// You probably shouldn't edit it directly.
@@ -4511,8 +4511,8 @@ void lcs_simd(Range<size_t*> scores, const BlockPatternMatchVector& block, const
45114511
#endif
45124512

45134513
template <size_t N, bool RecordMatrix, typename PMV, typename InputIt1, typename InputIt2>
4514-
auto lcs_unroll(const PMV& block, const Range<InputIt1>&, const Range<InputIt2>& s2, size_t score_cutoff = 0)
4515-
-> LCSseqResult<RecordMatrix>
4514+
auto lcs_unroll(const PMV& block, const Range<InputIt1>&, const Range<InputIt2>& s2,
4515+
size_t score_cutoff = 0) -> LCSseqResult<RecordMatrix>
45164516
{
45174517
uint64_t S[N];
45184518
unroll<size_t, N>([&](size_t i) { S[i] = ~UINT64_C(0); });
@@ -6662,12 +6662,12 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase<CachedJaroWinkler
66626662
};
66636663

66646664
template <typename Sentence1>
6665-
explicit CachedJaroWinkler(const Sentence1& s1_, double _prefix_weight = 0.1)
6666-
-> CachedJaroWinkler<char_type<Sentence1>>;
6665+
explicit CachedJaroWinkler(const Sentence1& s1_,
6666+
double _prefix_weight = 0.1) -> CachedJaroWinkler<char_type<Sentence1>>;
66676667

66686668
template <typename InputIt1>
6669-
CachedJaroWinkler(InputIt1 first1, InputIt1 last1, double _prefix_weight = 0.1)
6670-
-> CachedJaroWinkler<iter_value_t<InputIt1>>;
6669+
CachedJaroWinkler(InputIt1 first1, InputIt1 last1,
6670+
double _prefix_weight = 0.1) -> CachedJaroWinkler<iter_value_t<InputIt1>>;
66716671

66726672
} // namespace rapidfuzz
66736673

@@ -7135,8 +7135,8 @@ size_t levenshtein_hyrroe2003_small_band(const BlockPatternMatchVector& PM, cons
71357135
}
71367136

71377137
template <bool RecordMatrix, typename InputIt1, typename InputIt2>
7138-
auto levenshtein_hyrroe2003_small_band(const Range<InputIt1>& s1, const Range<InputIt2>& s2, size_t max)
7139-
-> LevenshteinResult<RecordMatrix, false>
7138+
auto levenshtein_hyrroe2003_small_band(const Range<InputIt1>& s1, const Range<InputIt2>& s2,
7139+
size_t max) -> LevenshteinResult<RecordMatrix, false>
71407140
{
71417141
assert(max <= s1.size());
71427142
assert(max <= s2.size());
@@ -8358,12 +8358,12 @@ struct CachedLevenshtein : public detail::CachedDistanceBase<CachedLevenshtein<C
83588358
};
83598359

83608360
template <typename Sentence1>
8361-
explicit CachedLevenshtein(const Sentence1& s1_, LevenshteinWeightTable aWeights = {1, 1, 1})
8362-
-> CachedLevenshtein<char_type<Sentence1>>;
8361+
explicit CachedLevenshtein(const Sentence1& s1_, LevenshteinWeightTable aWeights = {
8362+
1, 1, 1}) -> CachedLevenshtein<char_type<Sentence1>>;
83638363

83648364
template <typename InputIt1>
8365-
CachedLevenshtein(InputIt1 first1, InputIt1 last1, LevenshteinWeightTable aWeights = {1, 1, 1})
8366-
-> CachedLevenshtein<iter_value_t<InputIt1>>;
8365+
CachedLevenshtein(InputIt1 first1, InputIt1 last1,
8366+
LevenshteinWeightTable aWeights = {1, 1, 1}) -> CachedLevenshtein<iter_value_t<InputIt1>>;
83678367

83688368
} // namespace rapidfuzz
83698369

@@ -9151,35 +9151,39 @@ CachedPrefix(InputIt1 first1, InputIt1 last1) -> CachedPrefix<iter_value_t<Input
91519151

91529152
namespace rapidfuzz {
91539153

9154-
template <typename CharT, typename InputIt1, typename InputIt2>
9155-
std::basic_string<CharT> editops_apply(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2,
9156-
InputIt2 last2)
9154+
namespace detail {
9155+
template <typename ReturnType, typename InputIt1, typename InputIt2>
9156+
ReturnType editops_apply_impl(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2,
9157+
InputIt2 last2)
91579158
{
91589159
auto len1 = static_cast<size_t>(std::distance(first1, last1));
91599160
auto len2 = static_cast<size_t>(std::distance(first2, last2));
91609161

9161-
std::basic_string<CharT> res_str;
9162+
ReturnType res_str;
91629163
res_str.resize(len1 + len2);
91639164
size_t src_pos = 0;
91649165
size_t dest_pos = 0;
91659166

91669167
for (const auto& op : ops) {
91679168
/* matches between last and current editop */
91689169
while (src_pos < op.src_pos) {
9169-
res_str[dest_pos] = static_cast<CharT>(first1[static_cast<ptrdiff_t>(src_pos)]);
9170+
res_str[dest_pos] =
9171+
static_cast<typename ReturnType::value_type>(first1[static_cast<ptrdiff_t>(src_pos)]);
91709172
src_pos++;
91719173
dest_pos++;
91729174
}
91739175

91749176
switch (op.type) {
91759177
case EditType::None:
91769178
case EditType::Replace:
9177-
res_str[dest_pos] = static_cast<CharT>(first2[static_cast<ptrdiff_t>(op.dest_pos)]);
9179+
res_str[dest_pos] =
9180+
static_cast<typename ReturnType::value_type>(first2[static_cast<ptrdiff_t>(op.dest_pos)]);
91789181
src_pos++;
91799182
dest_pos++;
91809183
break;
91819184
case EditType::Insert:
9182-
res_str[dest_pos] = static_cast<CharT>(first2[static_cast<ptrdiff_t>(op.dest_pos)]);
9185+
res_str[dest_pos] =
9186+
static_cast<typename ReturnType::value_type>(first2[static_cast<ptrdiff_t>(op.dest_pos)]);
91839187
dest_pos++;
91849188
break;
91859189
case EditType::Delete: src_pos++; break;
@@ -9188,7 +9192,8 @@ std::basic_string<CharT> editops_apply(const Editops& ops, InputIt1 first1, Inpu
91889192

91899193
/* matches after the last editop */
91909194
while (src_pos < len1) {
9191-
res_str[dest_pos] = static_cast<CharT>(first1[static_cast<ptrdiff_t>(src_pos)]);
9195+
res_str[dest_pos] =
9196+
static_cast<typename ReturnType::value_type>(first1[static_cast<ptrdiff_t>(src_pos)]);
91929197
src_pos++;
91939198
dest_pos++;
91949199
}
@@ -9197,35 +9202,30 @@ std::basic_string<CharT> editops_apply(const Editops& ops, InputIt1 first1, Inpu
91979202
return res_str;
91989203
}
91999204

9200-
template <typename CharT, typename Sentence1, typename Sentence2>
9201-
std::basic_string<CharT> editops_apply(const Editops& ops, const Sentence1& s1, const Sentence2& s2)
9202-
{
9203-
return editops_apply<CharT>(ops, detail::to_begin(s1), detail::to_end(s1), detail::to_begin(s2),
9204-
detail::to_end(s2));
9205-
}
9206-
9207-
template <typename CharT, typename InputIt1, typename InputIt2>
9208-
std::basic_string<CharT> opcodes_apply(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2,
9209-
InputIt2 last2)
9205+
template <typename ReturnType, typename InputIt1, typename InputIt2>
9206+
ReturnType opcodes_apply_impl(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2,
9207+
InputIt2 last2)
92109208
{
92119209
auto len1 = static_cast<size_t>(std::distance(first1, last1));
92129210
auto len2 = static_cast<size_t>(std::distance(first2, last2));
92139211

9214-
std::basic_string<CharT> res_str;
9212+
ReturnType res_str;
92159213
res_str.resize(len1 + len2);
92169214
size_t dest_pos = 0;
92179215

92189216
for (const auto& op : ops) {
92199217
switch (op.type) {
92209218
case EditType::None:
92219219
for (auto i = op.src_begin; i < op.src_end; ++i) {
9222-
res_str[dest_pos++] = static_cast<CharT>(first1[static_cast<ptrdiff_t>(i)]);
9220+
res_str[dest_pos++] =
9221+
static_cast<typename ReturnType::value_type>(first1[static_cast<ptrdiff_t>(i)]);
92239222
}
92249223
break;
92259224
case EditType::Replace:
92269225
case EditType::Insert:
92279226
for (auto i = op.dest_begin; i < op.dest_end; ++i) {
9228-
res_str[dest_pos++] = static_cast<CharT>(first2[static_cast<ptrdiff_t>(i)]);
9227+
res_str[dest_pos++] =
9228+
static_cast<typename ReturnType::value_type>(first2[static_cast<ptrdiff_t>(i)]);
92299229
}
92309230
break;
92319231
case EditType::Delete: break;
@@ -9236,11 +9236,62 @@ std::basic_string<CharT> opcodes_apply(const Opcodes& ops, InputIt1 first1, Inpu
92369236
return res_str;
92379237
}
92389238

9239+
} // namespace detail
9240+
9241+
template <typename CharT, typename InputIt1, typename InputIt2>
9242+
std::basic_string<CharT> editops_apply_str(const Editops& ops, InputIt1 first1, InputIt1 last1,
9243+
InputIt2 first2, InputIt2 last2)
9244+
{
9245+
return detail::editops_apply_impl<std::basic_string<CharT>>(ops, first1, last1, first2, last2);
9246+
}
9247+
9248+
template <typename CharT, typename Sentence1, typename Sentence2>
9249+
std::basic_string<CharT> editops_apply_str(const Editops& ops, const Sentence1& s1, const Sentence2& s2)
9250+
{
9251+
return detail::editops_apply_impl<std::basic_string<CharT>>(ops, detail::to_begin(s1), detail::to_end(s1),
9252+
detail::to_begin(s2), detail::to_end(s2));
9253+
}
9254+
9255+
template <typename CharT, typename InputIt1, typename InputIt2>
9256+
std::basic_string<CharT> opcodes_apply_str(const Opcodes& ops, InputIt1 first1, InputIt1 last1,
9257+
InputIt2 first2, InputIt2 last2)
9258+
{
9259+
return detail::opcodes_apply_impl<std::basic_string<CharT>>(ops, first1, last1, first2, last2);
9260+
}
9261+
9262+
template <typename CharT, typename Sentence1, typename Sentence2>
9263+
std::basic_string<CharT> opcodes_apply_str(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2)
9264+
{
9265+
return detail::opcodes_apply_impl<std::basic_string<CharT>>(ops, detail::to_begin(s1), detail::to_end(s1),
9266+
detail::to_begin(s2), detail::to_end(s2));
9267+
}
9268+
9269+
template <typename CharT, typename InputIt1, typename InputIt2>
9270+
std::vector<CharT> editops_apply_vec(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2,
9271+
InputIt2 last2)
9272+
{
9273+
return detail::editops_apply_impl<std::vector<CharT>>(ops, first1, last1, first2, last2);
9274+
}
9275+
9276+
template <typename CharT, typename Sentence1, typename Sentence2>
9277+
std::vector<CharT> editops_apply_vec(const Editops& ops, const Sentence1& s1, const Sentence2& s2)
9278+
{
9279+
return detail::editops_apply_impl<std::vector<CharT>>(ops, detail::to_begin(s1), detail::to_end(s1),
9280+
detail::to_begin(s2), detail::to_end(s2));
9281+
}
9282+
9283+
template <typename CharT, typename InputIt1, typename InputIt2>
9284+
std::vector<CharT> opcodes_apply_vec(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2,
9285+
InputIt2 last2)
9286+
{
9287+
return detail::opcodes_apply_impl<std::vector<CharT>>(ops, first1, last1, first2, last2);
9288+
}
9289+
92399290
template <typename CharT, typename Sentence1, typename Sentence2>
9240-
std::basic_string<CharT> opcodes_apply(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2)
9291+
std::vector<CharT> opcodes_apply_vec(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2)
92419292
{
9242-
return opcodes_apply<CharT>(ops, detail::to_begin(s1), detail::to_end(s1), detail::to_begin(s2),
9243-
detail::to_end(s2));
9293+
return detail::opcodes_apply_impl<std::vector<CharT>>(ops, detail::to_begin(s1), detail::to_end(s1),
9294+
detail::to_begin(s2), detail::to_end(s2));
92449295
}
92459296

92469297
} // namespace rapidfuzz
@@ -9669,8 +9720,8 @@ explicit CachedPartialTokenSortRatio(const Sentence1& s1)
96699720
-> CachedPartialTokenSortRatio<char_type<Sentence1>>;
96709721

96719722
template <typename InputIt1>
9672-
CachedPartialTokenSortRatio(InputIt1 first1, InputIt1 last1)
9673-
-> CachedPartialTokenSortRatio<iter_value_t<InputIt1>>;
9723+
CachedPartialTokenSortRatio(InputIt1 first1,
9724+
InputIt1 last1) -> CachedPartialTokenSortRatio<iter_value_t<InputIt1>>;
96749725

96759726
/**
96769727
* @brief Compares the words in the strings based on unique and common words
@@ -9793,8 +9844,8 @@ template <typename Sentence1>
97939844
explicit CachedPartialTokenSetRatio(const Sentence1& s1) -> CachedPartialTokenSetRatio<char_type<Sentence1>>;
97949845

97959846
template <typename InputIt1>
9796-
CachedPartialTokenSetRatio(InputIt1 first1, InputIt1 last1)
9797-
-> CachedPartialTokenSetRatio<iter_value_t<InputIt1>>;
9847+
CachedPartialTokenSetRatio(InputIt1 first1,
9848+
InputIt1 last1) -> CachedPartialTokenSetRatio<iter_value_t<InputIt1>>;
97989849

97999850
/**
98009851
* @brief Helper method that returns the maximum of fuzz::token_set_ratio and

fuzzing/fuzz_damerau_levenshtein_distance.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
#include <stdexcept>
99
#include <string>
1010

11-
void validate_distance(size_t reference_dist, const std::basic_string<uint8_t>& s1,
12-
const std::basic_string<uint8_t>& s2, size_t score_cutoff)
11+
void validate_distance(size_t reference_dist, const std::vector<uint8_t>& s1, const std::vector<uint8_t>& s2,
12+
size_t score_cutoff)
1313
{
1414
if (reference_dist > score_cutoff) reference_dist = score_cutoff + 1;
1515

@@ -26,7 +26,7 @@ void validate_distance(size_t reference_dist, const std::basic_string<uint8_t>&
2626

2727
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
2828
{
29-
std::basic_string<uint8_t> s1, s2;
29+
std::vector<uint8_t> s1, s2;
3030
if (!extract_strings(data, size, s1, s2)) return 0;
3131

3232
size_t reference_dist = rapidfuzz_reference::damerau_levenshtein_distance(s1, s2);
@@ -40,8 +40,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
4040

4141
/* test long sequences */
4242
for (unsigned int i = 2; i < 9; ++i) {
43-
std::basic_string<uint8_t> s1_ = str_multiply(s1, pow<size_t>(2, i));
44-
std::basic_string<uint8_t> s2_ = str_multiply(s2, pow<size_t>(2, i));
43+
std::vector<uint8_t> s1_ = vec_multiply(s1, pow<size_t>(2, i));
44+
std::vector<uint8_t> s2_ = vec_multiply(s2, pow<size_t>(2, i));
4545

4646
if (s1_.size() > 10000 || s2_.size() > 10000) break;
4747

fuzzing/fuzz_indel_distance.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
#include <stdexcept>
99
#include <string>
1010

11-
void validate_distance(const std::basic_string<uint8_t>& s1, const std::basic_string<uint8_t>& s2,
12-
size_t score_cutoff)
11+
void validate_distance(const std::vector<uint8_t>& s1, const std::vector<uint8_t>& s2, size_t score_cutoff)
1312
{
1413
auto dist = rapidfuzz::indel_distance(s1, s2, score_cutoff);
1514
auto reference_dist = rapidfuzz_reference::indel_distance(s1, s2, score_cutoff);
@@ -25,7 +24,7 @@ void validate_distance(const std::basic_string<uint8_t>& s1, const std::basic_st
2524

2625
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
2726
{
28-
std::basic_string<uint8_t> s1, s2;
27+
std::vector<uint8_t> s1, s2;
2928
if (!extract_strings(data, size, s1, s2)) return 0;
3029

3130
validate_distance(s1, s2, 0);

fuzzing/fuzz_indel_editops.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99

1010
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
1111
{
12-
std::basic_string<uint8_t> s1, s2;
12+
std::vector<uint8_t> s1, s2;
1313
if (!extract_strings(data, size, s1, s2)) return 0;
1414

1515
size_t score = rapidfuzz_reference::indel_distance(s1, s2);
1616
rapidfuzz::Editops ops = rapidfuzz::indel_editops(s1, s2);
1717

18-
if (ops.size() == score && s2 != rapidfuzz::editops_apply<uint8_t>(ops, s1, s2))
18+
if (ops.size() == score && s2 != rapidfuzz::editops_apply_vec<uint8_t>(ops, s1, s2))
1919
throw std::logic_error("levenshtein_editops failed");
2020

2121
return 0;

fuzzing/fuzz_jaro_similarity.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@ bool is_close(double a, double b, double epsilon)
1414
}
1515

1616
template <size_t MaxLen>
17-
void validate_simd(const std::basic_string<uint8_t>& s1, const std::basic_string<uint8_t>& s2)
17+
void validate_simd(const std::vector<uint8_t>& s1, const std::vector<uint8_t>& s2)
1818
{
1919
#ifdef RAPIDFUZZ_SIMD
2020
size_t count = s1.size() / MaxLen + ((s1.size() % MaxLen) != 0);
2121
if (count == 0) return;
2222

2323
rapidfuzz::experimental::MultiJaro<MaxLen> scorer(count);
2424

25-
std::vector<std::basic_string<uint8_t>> strings;
25+
std::vector<std::vector<uint8_t>> strings;
2626

2727
for (auto it1 = s1.begin(); it1 != s1.end(); it1 += MaxLen) {
2828
if (std::distance(it1, s1.end()) < static_cast<ptrdiff_t>(MaxLen)) {
@@ -59,7 +59,7 @@ void validate_simd(const std::basic_string<uint8_t>& s1, const std::basic_string
5959
#endif
6060
}
6161

62-
void validate_distance(const std::basic_string<uint8_t>& s1, const std::basic_string<uint8_t>& s2)
62+
void validate_distance(const std::vector<uint8_t>& s1, const std::vector<uint8_t>& s2)
6363
{
6464
double reference_sim = rapidfuzz_reference::jaro_similarity(s1, s2);
6565
double sim = rapidfuzz::jaro_similarity(s1, s2);
@@ -80,15 +80,15 @@ void validate_distance(const std::basic_string<uint8_t>& s1, const std::basic_st
8080

8181
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
8282
{
83-
std::basic_string<uint8_t> s1, s2;
83+
std::vector<uint8_t> s1, s2;
8484
if (!extract_strings(data, size, s1, s2)) return 0;
8585

8686
validate_distance(s1, s2);
8787

8888
/* test long sequences */
8989
for (unsigned int i = 2; i < 9; ++i) {
90-
std::basic_string<uint8_t> s1_ = str_multiply(s1, pow<size_t>(2, i));
91-
std::basic_string<uint8_t> s2_ = str_multiply(s2, pow<size_t>(2, i));
90+
std::vector<uint8_t> s1_ = vec_multiply(s1, pow<size_t>(2, i));
91+
std::vector<uint8_t> s2_ = vec_multiply(s2, pow<size_t>(2, i));
9292

9393
if (s1_.size() > 10000 || s2_.size() > 10000) break;
9494

fuzzing/fuzz_lcs_similarity.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
#include <string>
1010

1111
template <size_t MaxLen>
12-
void validate_simd(const std::basic_string<uint8_t>& s1, const std::basic_string<uint8_t>& s2)
12+
void validate_simd(const std::vector<uint8_t>& s1, const std::vector<uint8_t>& s2)
1313
{
1414
#ifdef RAPIDFUZZ_SIMD
1515
size_t count = s1.size() / MaxLen + ((s1.size() % MaxLen) != 0);
1616
rapidfuzz::experimental::MultiLCSseq<MaxLen> scorer(count);
1717

18-
std::vector<std::basic_string<uint8_t>> strings;
18+
std::vector<std::vector<uint8_t>> strings;
1919

2020
for (auto it1 = s1.begin(); it1 != s1.end(); it1 += MaxLen) {
2121
if (std::distance(it1, s1.end()) < static_cast<ptrdiff_t>(MaxLen)) {
@@ -51,7 +51,7 @@ void validate_simd(const std::basic_string<uint8_t>& s1, const std::basic_string
5151

5252
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
5353
{
54-
std::basic_string<uint8_t> s1, s2;
54+
std::vector<uint8_t> s1, s2;
5555
if (!extract_strings(data, size, s1, s2)) {
5656
return 0;
5757
}

0 commit comments

Comments
 (0)