Skip to content

Commit 52b1146

Browse files
committed
[yugabyte#24947] DocDB: Fetch intents DB when querying vector index
Summary: The vector index is updated only when transaction is applied. So we could get into situation when transaction was committed, but not yet applied. In this case we should scan intents DB for updated vectors and take them into account when populating read result. Jira: DB-14085 Test Plan: PgVectorIndexTest.NotApplied/* Reviewers: arybochkin Reviewed By: arybochkin Subscribers: ybase, yql Tags: #jenkins-ready Differential Revision: https://phorge.dev.yugabyte.com/D40042
1 parent 81f32da commit 52b1146

25 files changed

+339
-229
lines changed

src/yb/docdb/doc_reader.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ Result<DocHybridTime> GetTableTombstoneTime(
220220
auto iter = CreateIntentAwareIterator(
221221
doc_db, BloomFilterMode::USE_BLOOM_FILTER, table_id, rocksdb::kDefaultQueryId, txn_op_context,
222222
read_operation_data.WithStatistics(nullptr));
223-
iter->Seek(table_id);
223+
iter->Seek(table_id, SeekFilter::kAll);
224224
const auto& entry_data = VERIFY_RESULT_REF(iter->Fetch());
225225
if (!entry_data || !entry_data.value.FirstByteIs(dockv::ValueEntryTypeAsChar::kTombstone) ||
226226
entry_data.key != table_id) {
@@ -253,7 +253,7 @@ Result<std::optional<SubDocument>> TEST_GetSubDocument(
253253
DOCDB_DEBUG_LOG("GetSubDocument for key $0 @ $1", sub_doc_key.ToDebugHexString(),
254254
iter->read_time().ToString());
255255

256-
iter->Seek(sub_doc_key);
256+
iter->Seek(sub_doc_key, SeekFilter::kAll);
257257
const auto& fetched = VERIFY_RESULT_REF(iter->Fetch());
258258
if (!fetched || !fetched.key.starts_with(sub_doc_key)) {
259259
return std::nullopt;
@@ -1845,7 +1845,7 @@ Result<DocReaderResult> DocDBTableReader::Get(
18451845
// It means that other columns have NULL values, so if such column present, then
18461846
// we should return row consisting of NULLs.
18471847
// Here we check if there are columns values not listed in projection.
1848-
data_.iter->Seek(root_doc_key->AsSlice());
1848+
data_.iter->Seek(root_doc_key->AsSlice(), SeekFilter::kAll);
18491849
const auto& new_fetched_entry = VERIFY_RESULT_REF(data_.iter->Fetch());
18501850
if (!new_fetched_entry) {
18511851
return DocReaderResult::kNotFound;

src/yb/docdb/doc_reader_redis.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ Status BuildSubDocument(
194194
? doc_value.timestamp()
195195
: write_time.hybrid_time().GetPhysicalValueMicros());
196196
if (!data.high_index->CanInclude(current_values_observed)) {
197-
iter->SeekOutOfSubDoc(&key_copy);
197+
iter->SeekOutOfSubDoc(SeekFilter::kAll, &key_copy);
198198
DCHECK(iter->Fetch().ok()); // Enforce call to Fetch in debug mode
199199
return Status::OK();
200200
}
@@ -203,7 +203,7 @@ Status BuildSubDocument(
203203
}
204204
(*num_values_observed)++;
205205
VLOG(3) << "SeekOutOfSubDoc: " << SubDocKey::DebugSliceToString(key);
206-
iter->SeekOutOfSubDoc(&key_copy);
206+
iter->SeekOutOfSubDoc(SeekFilter::kAll, &key_copy);
207207
DCHECK(iter->Fetch().ok()); // Enforce call to Fetch in debug mode
208208
return Status::OK();
209209
} else {
@@ -221,7 +221,7 @@ Status BuildSubDocument(
221221
iter, data.Adjusted(key, &descendant), low_ts,
222222
num_values_observed));
223223
}
224-
iter->Revalidate();
224+
iter->Revalidate(SeekFilter::kAll);
225225
if (descendant.value_type() == ValueEntryType::kInvalid) {
226226
// The document was not found in this level (maybe a tombstone was encountered).
227227
continue;
@@ -426,7 +426,7 @@ Status GetRedisSubDocument(
426426
if (seek_fwd_suffices) {
427427
db_iter->SeekForward(key_slice);
428428
} else {
429-
db_iter->Seek(key_slice);
429+
db_iter->Seek(key_slice, SeekFilter::kAll);
430430
}
431431
{
432432
auto temp_key = data.subdocument_key;
@@ -470,7 +470,7 @@ Status GetRedisSubDocument(
470470
upperbound_buffer.Append(key_slice.WithoutPrefix(upperbound_buffer.size()));
471471
upperbound_buffer.PushBack(dockv::KeyEntryTypeAsChar::kHighest);
472472
IntentAwareIteratorUpperboundScope upperbound_scope2(upperbound_buffer.AsSlice(), db_iter);
473-
db_iter->Revalidate();
473+
db_iter->Revalidate(SeekFilter::kAll);
474474
RETURN_NOT_OK(BuildSubDocument(db_iter, data, max_overwrite_ht,
475475
&num_values_observed));
476476
*data.doc_found = data.result->value_type() != ValueEntryType::kInvalid;
@@ -518,7 +518,7 @@ Status GetRedisSubDocument(
518518
}
519519
// Make sure the iterator is placed outside the whole document in the end.
520520
key_bytes.Truncate(dockey_size);
521-
db_iter->SeekOutOfSubDoc(&key_bytes);
521+
db_iter->SeekOutOfSubDoc(SeekFilter::kAll, &key_bytes);
522522
return Status::OK();
523523
}
524524

src/yb/docdb/doc_rowwise_iterator.cc

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -153,9 +153,16 @@ void DocRowwiseIterator::InitResult() {
153153
}
154154
}
155155

156+
void DocRowwiseIterator::Refresh(SeekFilter seek_filter) {
157+
done_ = false;
158+
seek_filter_ = seek_filter;
159+
}
160+
156161
inline void DocRowwiseIterator::Seek(Slice key) {
157162
VLOG_WITH_FUNC(3) << " Seeking to " << key << "/" << dockv::DocKey::DebugSliceToString(key);
158163

164+
DCHECK(!done_);
165+
159166
prev_doc_found_ = DocReaderResult::kNotFound;
160167

161168
// We do not have values before dockv::KeyEntryTypeAsChar::kNullLow, but there is
@@ -164,18 +171,18 @@ inline void DocRowwiseIterator::Seek(Slice key) {
164171
// Another option would be changing kLowest value to kNullLow. But there are much more scenarios
165172
// that could be affected and should be tested.
166173
if (!key.empty() && key[0] >= dockv::KeyEntryTypeAsChar::kNullLow) {
167-
db_iter_->Seek(key, Full::kTrue);
174+
db_iter_->Seek(key, seek_filter_, Full::kTrue);
168175
return;
169176
}
170177

171178
auto shared_prefix = shared_key_prefix();
172179
if (!shared_prefix.empty()) {
173-
db_iter_->Seek(shared_prefix, Full::kFalse);
180+
db_iter_->Seek(shared_prefix, seek_filter_, Full::kFalse);
174181
return;
175182
}
176183

177184
const auto null_low = dockv::KeyEntryTypeAsChar::kNullLow;
178-
db_iter_->Seek(Slice(&null_low, 1), Full::kFalse);
185+
db_iter_->Seek(Slice(&null_low, 1), seek_filter_, Full::kFalse);
179186
}
180187

181188
inline void DocRowwiseIterator::SeekPrevDocKey(Slice key) {
@@ -190,18 +197,20 @@ inline void DocRowwiseIterator::SeekPrevDocKey(Slice key) {
190197

191198
Status DocRowwiseIterator::AdvanceIteratorToNextDesiredRow(bool row_finished,
192199
bool current_fetched_row_skipped) {
193-
if (!IsFetchedRowStatic() &&
200+
if (seek_filter_ == SeekFilter::kAll && !IsFetchedRowStatic() &&
194201
VERIFY_RESULT(scan_choices_->AdvanceToNextRow(&row_key_, db_iter_.get(),
195202
current_fetched_row_skipped))) {
196203
return Status::OK();
197204
}
198205
if (!is_forward_scan_) {
199206
VLOG(4) << __PRETTY_FUNCTION__ << " setting as PrevDocKey";
207+
RSTATUS_DCHECK_EQ(seek_filter_, SeekFilter::kAll, IllegalState,
208+
"Backward scan is not supported with this filter");
200209
db_iter_->PrevDocKey(row_key_);
201210
} else if (row_finished) {
202-
db_iter_->Revalidate();
211+
db_iter_->Revalidate(seek_filter_);
203212
} else {
204-
db_iter_->SeekOutOfSubDoc(&row_key_);
213+
db_iter_->SeekOutOfSubDoc(seek_filter_, &row_key_);
205214
}
206215

207216
return Status::OK();

src/yb/docdb/doc_rowwise_iterator.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ class DocRowwiseIterator final : public DocRowwiseIteratorBase {
8080

8181
void Seek(Slice key) override;
8282

83+
// Refreshes the iterator if it was in finished state.
84+
// filter - filter mode that should be used with refreshed iterator.
85+
void Refresh(SeekFilter filter);
86+
8387
HybridTime TEST_MaxSeenHt() override;
8488

8589
// key slice should point to block of memory, that contains kHighest after the end.
@@ -161,6 +165,8 @@ class DocRowwiseIterator final : public DocRowwiseIteratorBase {
161165
bool use_fast_backward_scan_ = false;
162166

163167
DeadlineInfo deadline_info_;
168+
169+
SeekFilter seek_filter_ = SeekFilter::kAll;
164170
};
165171

166172
} // namespace docdb

src/yb/docdb/doc_write_batch.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ Status DocWriteBatch::SeekToKeyPrefix(IntentAwareIterator* doc_iter, HasAncestor
110110
const auto prev_key_prefix_exact = current_entry_.found_exact_key_prefix;
111111

112112
// Seek the value.
113-
doc_iter->Seek(key_prefix_.AsSlice());
113+
doc_iter->Seek(key_prefix_.AsSlice(), SeekFilter::kAll);
114114
VLOG_WITH_FUNC(4) << SubDocKey::DebugSliceToString(key_prefix_.AsSlice())
115115
<< ", prev_subdoc_ht: " << prev_subdoc_ht
116116
<< ", prev_key_prefix_exact: " << prev_key_prefix_exact

src/yb/docdb/docdb_fwd.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
namespace yb::docdb {
2929

30+
class BoundedRocksDbIterator;
3031
class ConsensusFrontier;
3132
class DeadlineInfo;
3233
class DocDBCompactionFilterFactory;
@@ -69,6 +70,7 @@ struct ObjectLockPrefix;
6970
struct PgsqlReadOperationData;
7071
struct ReadOperationData;
7172
struct VectorIndexInsertEntry;
73+
struct VectorIndexSearchResultEntry;
7274

7375
using DocKeyHash = uint16_t;
7476
using DocReadContextPtr = std::shared_ptr<DocReadContext>;
@@ -88,6 +90,7 @@ using VectorIndexPtr = std::shared_ptr<VectorIndex>;
8890
using VectorIndexes = std::vector<VectorIndexPtr>;
8991
using VectorIndexesPtr = std::shared_ptr<VectorIndexes>;
9092
using VectorIndexInsertEntries = std::vector<VectorIndexInsertEntry>;
93+
using VectorIndexSearchResult = std::vector<VectorIndexSearchResultEntry>;
9194

9295
YB_STRONGLY_TYPED_BOOL(SkipFlush);
9396
YB_STRONGLY_TYPED_BOOL(SkipSeek);

src/yb/docdb/docrowwiseiterator-test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2012,7 +2012,7 @@ SubDocKey(DocKey([], ["row2", 22222]), [SystemColumnId(0); HT{ physical: 1000 }]
20122012
if (use_seek_forward) {
20132013
iter.SeekForward(kEncodedDocKey2);
20142014
} else {
2015-
iter.Seek(kEncodedDocKey2);
2015+
iter.Seek(kEncodedDocKey2, SeekFilter::kAll);
20162016
}
20172017
if (VLOG_IS_ON(1)) {
20182018
iter.DebugDump();

src/yb/docdb/intent_aware_iterator.cc

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -232,11 +232,12 @@ IntentAwareIterator::IntentAwareIterator(
232232
}
233233

234234
void IntentAwareIterator::Seek(const dockv::DocKey &doc_key) {
235-
Seek(doc_key.Encode(), Full::kFalse);
235+
Seek(doc_key.Encode(), SeekFilter::kAll, Full::kFalse);
236236
}
237237

238-
void IntentAwareIterator::Seek(Slice key, Full full) {
239-
VLOG_WITH_FUNC(4) << "key: " << DebugDumpKeyToStr(key) << ", full: " << full;
238+
void IntentAwareIterator::Seek(Slice key, SeekFilter filter, Full full) {
239+
VLOG_WITH_FUNC(4)
240+
<< "key: " << DebugDumpKeyToStr(key) << ", full: " << full << ", filter: " << filter;
240241
DOCDB_DEBUG_SCOPE_LOG(
241242
key.ToDebugString(),
242243
std::bind(&IntentAwareIterator::DebugDump, this));
@@ -246,7 +247,17 @@ void IntentAwareIterator::Seek(Slice key, Full full) {
246247

247248
SeekTriggered();
248249

249-
SkipFutureRecords<Direction::kForward>(ROCKSDB_SEEK(&iter_, key));
250+
[&] {
251+
switch (filter) {
252+
case SeekFilter::kAll:
253+
SkipFutureRecords<Direction::kForward>(ROCKSDB_SEEK(&iter_, key));
254+
return;
255+
case SeekFilter::kIntentsOnly:
256+
regular_entry_.Reset();
257+
return;
258+
}
259+
FATAL_INVALID_ENUM_VALUE(SeekFilter, filter);
260+
}();
250261
if (intent_iter_.Initialized()) {
251262
if (!SetIntentUpperbound()) {
252263
return;
@@ -348,7 +359,7 @@ void IntentAwareIterator::SeekPastSubKey(Slice key) {
348359
FillEntry();
349360
}
350361

351-
void IntentAwareIterator::SeekOutOfSubDoc(KeyBytes* key_bytes) {
362+
void IntentAwareIterator::SeekOutOfSubDoc(SeekFilter filter, KeyBytes* key_bytes) {
352363
VLOG_WITH_FUNC(4) << DebugDumpKeyToStr(*key_bytes);
353364
if (!status_.ok()) {
354365
return;
@@ -358,7 +369,17 @@ void IntentAwareIterator::SeekOutOfSubDoc(KeyBytes* key_bytes) {
358369

359370
auto prefix_len = intent_iter_.Initialized()
360371
? IntentPrepareSeek(*key_bytes, KeyEntryTypeAsChar::kMaxByte) : 0;
361-
SkipFutureRecords<Direction::kForward>(docdb::SeekOutOfSubKey(key_bytes, &iter_));
372+
[&] {
373+
switch (filter) {
374+
case SeekFilter::kAll:
375+
SkipFutureRecords<Direction::kForward>(docdb::SeekOutOfSubKey(key_bytes, &iter_));
376+
return;
377+
case SeekFilter::kIntentsOnly:
378+
regular_entry_.Reset();
379+
return;
380+
}
381+
FATAL_INVALID_ENUM_VALUE(SeekFilter, filter);
382+
}();
362383
IntentSeekForward(prefix_len);
363384
FillEntry();
364385
}
@@ -645,7 +666,7 @@ void IntentAwareIterator::SeekToLatestSubDocKeyInternal() {
645666
return;
646667
}
647668
subdockey_slice.remove_suffix(1);
648-
Seek(subdockey_slice);
669+
Seek(subdockey_slice, SeekFilter::kAll);
649670
}
650671

651672
void IntentAwareIterator::SeekToLatestDocKeyInternal() {
@@ -656,13 +677,23 @@ void IntentAwareIterator::SeekToLatestDocKeyInternal() {
656677
if (!HandleStatus(dockey_size)) {
657678
return;
658679
}
659-
Seek(Slice(subdockey_slice.data(), *dockey_size));
680+
Seek(Slice(subdockey_slice.data(), *dockey_size), SeekFilter::kAll);
660681
}
661682

662-
void IntentAwareIterator::Revalidate() {
683+
void IntentAwareIterator::Revalidate(SeekFilter seek_filter) {
663684
VLOG_WITH_FUNC(4);
664685

665-
SkipFutureRecords<Direction::kForward>(iter_.Entry());
686+
[&] {
687+
switch (seek_filter) {
688+
case SeekFilter::kAll:
689+
SkipFutureRecords<Direction::kForward>(iter_.Entry());
690+
return;
691+
case SeekFilter::kIntentsOnly:
692+
regular_entry_.Reset();
693+
return;
694+
}
695+
FATAL_INVALID_ENUM_VALUE(SeekFilter, seek_filter);
696+
}();
666697
if (intent_iter_.Initialized()) {
667698
if (!SetIntentUpperbound()) {
668699
return;

src/yb/docdb/intent_aware_iterator.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,12 @@ class IntentAwareIterator final : public IntentAwareIteratorIf {
6262
IntentAwareIterator(const IntentAwareIterator& other) = delete;
6363
void operator=(const IntentAwareIterator& other) = delete;
6464

65-
void Revalidate();
65+
void Revalidate(SeekFilter seek_filter);
6666

6767
// Seek to the smallest key which is greater or equal than doc_key.
6868
void Seek(const dockv::DocKey& doc_key);
6969

70-
void Seek(Slice key, Full full = Full::kTrue) override;
70+
void Seek(Slice key, SeekFilter seek_filter, Full full = Full::kTrue) override;
7171

7272
// Seek forward to specified encoded key (it is responsibility of caller to make sure it
7373
// doesn't have hybrid time).
@@ -82,7 +82,7 @@ class IntentAwareIterator final : public IntentAwareIteratorIf {
8282
// For efficiency, this overload takes a non-const KeyBytes pointer avoids memory allocation by
8383
// using the KeyBytes buffer to prepare the key to seek to by appending an extra byte. The
8484
// appended byte is removed when the method returns.
85-
void SeekOutOfSubDoc(dockv::KeyBytes* key_bytes) override;
85+
void SeekOutOfSubDoc(SeekFilter seek_filter, dockv::KeyBytes* key_bytes) override;
8686

8787
// Seek to last doc key.
8888
void SeekToLastDocKey();
@@ -139,10 +139,6 @@ class IntentAwareIterator final : public IntentAwareIteratorIf {
139139
// Returns HybridTime::kInvalid if no such record was found.
140140
Result<HybridTime> FindOldestRecord(Slice key_without_ht, HybridTime min_hybrid_time);
141141

142-
size_t NumberOfBytesAppendedDuringSeekForward() const {
143-
return 1 + encoded_read_time_.global_limit.size();
144-
}
145-
146142
void DebugDump();
147143

148144
std::string DebugPosToString() override;

src/yb/docdb/intent_aware_iterator_interface.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ namespace yb {
2121
namespace docdb {
2222

2323
YB_DEFINE_ENUM(Direction, (kForward)(kBackward));
24+
YB_DEFINE_ENUM(SeekFilter, (kAll)(kIntentsOnly));
2425
YB_STRONGLY_TYPED_BOOL(Full);
2526

2627
struct FetchedEntry {
@@ -75,7 +76,8 @@ class IntentAwareIteratorIf {
7576
// hybrid time).
7677
// full means that key was fully specified, and we could add intent type at the end of the key,
7778
// to skip read only intents.
78-
virtual void Seek(Slice key, Full full = Full::kTrue) = 0;
79+
// filter - allows to ignore regular records during seek.
80+
virtual void Seek(Slice key, SeekFilter filter, Full full = Full::kTrue) = 0;
7981

8082
// Seek forward to specified encoded key (it is responsibility of caller to make sure it
8183
// doesn't have hybrid time). For efficiency, the method that takes a non-const KeyBytes pointer
@@ -88,7 +90,8 @@ class IntentAwareIteratorIf {
8890
// time). For efficiency, the method takes a non-const KeyBytes pointer avoids memory allocation
8991
// by using the KeyBytes buffer to prepare the key to seek to by appending an extra byte. The
9092
// appended byte is removed when the method returns.
91-
virtual void SeekOutOfSubDoc(dockv::KeyBytes* key_bytes) = 0;
93+
// filter - allows to ignore regular records during seek.
94+
virtual void SeekOutOfSubDoc(SeekFilter filter, dockv::KeyBytes* key_bytes) = 0;
9295

9396
// Positions the iterator at the beginning of the DocKey found before the given encoded_doc_key.
9497
// If fast backward scan is enabled, the method positions the iterator at the end (at the last

0 commit comments

Comments
 (0)