Skip to content

Commit 48bf10f

Browse files
ckennellycopybara-github
authored andcommitted
Record insert misses in hashtable profiling.
By comparing to the total number of objects, we can better determine the hit/miss ratio of various call sites and suitable container reservation sizes based on typical inputs. PiperOrigin-RevId: 833469575 Change-Id: I2583676cefaf42b416adf172328a824b630e24b4
1 parent c81100b commit 48bf10f

File tree

6 files changed

+75
-0
lines changed

6 files changed

+75
-0
lines changed

absl/container/internal/hashtablez_sampler.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ void HashtablezInfo::PrepareForSampling(int64_t stride,
8181
capacity.store(0, std::memory_order_relaxed);
8282
size.store(0, std::memory_order_relaxed);
8383
num_erases.store(0, std::memory_order_relaxed);
84+
num_insert_hits.store(0, std::memory_order_relaxed);
8485
num_rehashes.store(0, std::memory_order_relaxed);
8586
max_probe_length.store(0, std::memory_order_relaxed);
8687
total_probe_length.store(0, std::memory_order_relaxed);

absl/container/internal/hashtablez_sampler.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ struct HashtablezInfo : public profiling_internal::Sample<HashtablezInfo> {
8282
std::atomic<size_t> capacity;
8383
std::atomic<size_t> size;
8484
std::atomic<size_t> num_erases;
85+
std::atomic<size_t> num_insert_hits;
8586
std::atomic<size_t> num_rehashes;
8687
std::atomic<size_t> max_probe_length;
8788
std::atomic<size_t> total_probe_length;
@@ -111,6 +112,16 @@ struct HashtablezInfo : public profiling_internal::Sample<HashtablezInfo> {
111112

112113
void RecordRehashSlow(HashtablezInfo* info, size_t total_probe_length);
113114

115+
// This is inline to avoid calling convention overhead for an otherwise
116+
// lightweight operation.
117+
inline void RecordInsertHitSlow(HashtablezInfo* info) {
118+
// We avoid fetch_add since no other thread should be mutating the table
119+
// simultaneously without synchronization.
120+
info->num_insert_hits.store(
121+
info->num_insert_hits.load(std::memory_order_relaxed) + 1,
122+
std::memory_order_relaxed);
123+
}
124+
114125
void RecordReservationSlow(HashtablezInfo* info, size_t target_capacity);
115126

116127
void RecordClearedReservationSlow(HashtablezInfo* info);
@@ -184,6 +195,11 @@ class HashtablezInfoHandle {
184195
RecordEraseSlow(info_);
185196
}
186197

198+
inline void RecordInsertHit() {
199+
if (ABSL_PREDICT_TRUE(info_ == nullptr)) return;
200+
RecordInsertHitSlow(info_);
201+
}
202+
187203
friend inline void swap(HashtablezInfoHandle& lhs,
188204
HashtablezInfoHandle& rhs) {
189205
std::swap(lhs.info_, rhs.info_);
@@ -210,6 +226,7 @@ class HashtablezInfoHandle {
210226
inline void RecordInsertMiss(size_t /*hash*/,
211227
size_t /*distance_from_desired*/) {}
212228
inline void RecordErase() {}
229+
inline void RecordInsertHit() {}
213230

214231
friend inline void swap(HashtablezInfoHandle& /*lhs*/,
215232
HashtablezInfoHandle& /*rhs*/) {}

absl/container/internal/hashtablez_sampler_test.cc

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ TEST(HashtablezInfoTest, PrepareForSampling) {
9999
EXPECT_EQ(info.capacity.load(), 0);
100100
EXPECT_EQ(info.size.load(), 0);
101101
EXPECT_EQ(info.num_erases.load(), 0);
102+
EXPECT_EQ(info.num_insert_hits.load(), 0);
102103
EXPECT_EQ(info.num_rehashes.load(), 0);
103104
EXPECT_EQ(info.max_probe_length.load(), 0);
104105
EXPECT_EQ(info.total_probe_length.load(), 0);
@@ -116,6 +117,7 @@ TEST(HashtablezInfoTest, PrepareForSampling) {
116117
info.capacity.store(1, std::memory_order_relaxed);
117118
info.size.store(1, std::memory_order_relaxed);
118119
info.num_erases.store(1, std::memory_order_relaxed);
120+
info.num_insert_hits.store(1, std::memory_order_relaxed);
119121
info.max_probe_length.store(1, std::memory_order_relaxed);
120122
info.total_probe_length.store(1, std::memory_order_relaxed);
121123
info.hashes_bitwise_or.store(1, std::memory_order_relaxed);
@@ -131,6 +133,7 @@ TEST(HashtablezInfoTest, PrepareForSampling) {
131133
EXPECT_EQ(info.capacity.load(), 0);
132134
EXPECT_EQ(info.size.load(), 0);
133135
EXPECT_EQ(info.num_erases.load(), 0);
136+
EXPECT_EQ(info.num_insert_hits.load(), 0);
134137
EXPECT_EQ(info.num_rehashes.load(), 0);
135138
EXPECT_EQ(info.max_probe_length.load(), 0);
136139
EXPECT_EQ(info.total_probe_length.load(), 0);
@@ -221,6 +224,25 @@ TEST(HashtablezInfoTest, RecordErase) {
221224
EXPECT_EQ(info.soo_capacity, 1);
222225
}
223226

227+
TEST(HashtablezInfoTest, RecordInsertHit) {
228+
const int64_t test_stride = 31;
229+
const size_t test_element_size = 29;
230+
const size_t test_key_size = 27;
231+
const size_t test_value_size = 25;
232+
233+
HashtablezInfo info;
234+
absl::MutexLock l(info.init_mu);
235+
info.PrepareForSampling(test_stride, test_element_size,
236+
/*key_size=*/test_key_size,
237+
/*value_size=*/test_value_size,
238+
/*soo_capacity_value=*/1);
239+
EXPECT_EQ(info.num_insert_hits.load(), 0);
240+
RecordInsertHitSlow(&info);
241+
EXPECT_EQ(info.num_insert_hits.load(), 1);
242+
RecordInsertHitSlow(&info);
243+
EXPECT_EQ(info.num_insert_hits.load(), 2);
244+
}
245+
224246
TEST(HashtablezInfoTest, RecordRehash) {
225247
const int64_t test_stride = 33;
226248
const size_t test_element_size = 31;

absl/container/internal/raw_hash_set.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3173,6 +3173,7 @@ class raw_hash_set {
31733173
}
31743174
if (!empty()) {
31753175
if (equal_to(key, single_slot())) {
3176+
common().infoz().RecordInsertHit();
31763177
return {single_iterator(), false};
31773178
}
31783179
}
@@ -3204,6 +3205,7 @@ class raw_hash_set {
32043205
if (ABSL_PREDICT_TRUE(equal_to(key, slot_array() + seq.offset(i)))) {
32053206
index = seq.offset(i);
32063207
inserted = false;
3208+
common().infoz().RecordInsertHit();
32073209
return;
32083210
}
32093211
}

absl/container/internal/raw_hash_set_test.cc

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2790,6 +2790,7 @@ TYPED_TEST(RawHashSamplerTest, Sample) {
27902790
absl::flat_hash_set<const HashtablezInfo*> preexisting_info(10);
27912791
absl::flat_hash_map<size_t, int> observed_checksums(10);
27922792
absl::flat_hash_map<ssize_t, int> reservations(10);
2793+
absl::flat_hash_map<std::pair<size_t, size_t>, int> hit_misses(10);
27932794

27942795
start_size += sampler.Iterate([&](const HashtablezInfo& info) {
27952796
preexisting_info.insert(&info);
@@ -2802,14 +2803,23 @@ TYPED_TEST(RawHashSamplerTest, Sample) {
28022803

28032804
const bool do_reserve = (i % 10 > 5);
28042805
const bool do_rehash = !do_reserve && (i % 10 > 0);
2806+
const bool do_first_insert_hit = i % 2 == 0;
2807+
const bool do_second_insert_hit = i % 4 == 0;
28052808

28062809
if (do_reserve) {
28072810
// Don't reserve on all tables.
28082811
tables.back().reserve(10 * (i % 10));
28092812
}
28102813

28112814
tables.back().insert(1);
2815+
if (do_first_insert_hit) {
2816+
tables.back().insert(1);
2817+
tables.back().insert(1);
2818+
}
28122819
tables.back().insert(i % 5);
2820+
if (do_second_insert_hit) {
2821+
tables.back().insert(i % 5);
2822+
}
28132823

28142824
if (do_rehash) {
28152825
// Rehash some other tables.
@@ -2823,6 +2833,10 @@ TYPED_TEST(RawHashSamplerTest, Sample) {
28232833
observed_checksums[info.hashes_bitwise_xor.load(
28242834
std::memory_order_relaxed)]++;
28252835
reservations[info.max_reserve.load(std::memory_order_relaxed)]++;
2836+
hit_misses[std::make_pair(
2837+
info.num_insert_hits.load(std::memory_order_relaxed),
2838+
info.size.load(std::memory_order_relaxed))]++;
2839+
28262840
EXPECT_EQ(info.inline_element_size, sizeof(typename TypeParam::value_type));
28272841
EXPECT_EQ(info.key_size, sizeof(typename TypeParam::key_type));
28282842
EXPECT_EQ(info.value_size, sizeof(typename TypeParam::value_type));
@@ -2850,6 +2864,21 @@ TYPED_TEST(RawHashSamplerTest, Sample) {
28502864
EXPECT_NEAR((100 * count) / static_cast<double>(tables.size()), 0.1, 0.05)
28512865
<< reservation;
28522866
}
2867+
2868+
EXPECT_THAT(hit_misses, testing::SizeIs(6));
2869+
const double sampled_tables = end_size - start_size;
2870+
// i % 20: { 1, 11 }
2871+
EXPECT_NEAR((hit_misses[{1, 1}] / sampled_tables), 0.10, 0.02);
2872+
// i % 20: { 6 }
2873+
EXPECT_NEAR((hit_misses[{3, 1}] / sampled_tables), 0.05, 0.02);
2874+
// i % 20: { 0, 4, 8, 12 }
2875+
EXPECT_NEAR((hit_misses[{3, 2}] / sampled_tables), 0.20, 0.02);
2876+
// i % 20: { 2, 10, 14, 18 }
2877+
EXPECT_NEAR((hit_misses[{2, 2}] / sampled_tables), 0.20, 0.02);
2878+
// i % 20: { 16 }
2879+
EXPECT_NEAR((hit_misses[{4, 1}] / sampled_tables), 0.05, 0.02);
2880+
// i % 20: { 3, 5, 7, 9, 13, 15, 17, 19 }
2881+
EXPECT_NEAR((hit_misses[{0, 2}] / sampled_tables), 0.40, 0.02);
28532882
}
28542883

28552884
std::vector<const HashtablezInfo*> SampleSooMutation(

absl/profiling/hashtable.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ StatusOr<std::string> MarshalHashtableProfile(
6060
const auto capacity_id = builder.InternString("capacity");
6161
const auto size_id = builder.InternString("size");
6262
const auto num_erases_id = builder.InternString("num_erases");
63+
const auto num_insert_hits_id = builder.InternString("num_insert_hits");
6364
const auto num_rehashes_id = builder.InternString("num_rehashes");
6465
const auto max_probe_length_id = builder.InternString("max_probe_length");
6566
const auto total_probe_length_id = builder.InternString("total_probe_length");
@@ -89,6 +90,9 @@ StatusOr<std::string> MarshalHashtableProfile(
8990
add_label(size_id, info.size.load(std::memory_order_relaxed));
9091
add_label(num_erases_id,
9192
info.num_erases.load(std::memory_order_relaxed));
93+
// TODO(b/436909492): Revisit whether this value is useful.
94+
add_label(num_insert_hits_id,
95+
info.num_insert_hits.load(std::memory_order_relaxed));
9296
add_label(num_rehashes_id,
9397
info.num_rehashes.load(std::memory_order_relaxed));
9498
add_label(max_probe_length_id,

0 commit comments

Comments
 (0)