Skip to content

Commit 5cd3998

Browse files
committed
[yugabyte#24958] DocDB: Fix pg_locks test TestWaitStartTimeIsConsistentAcrossWaiterReEntries
Summary: Test `PgGetLockStatusTest.TestWaitStartTimeIsConsistentAcrossWaiterReEntries` relied on sleeps in the test to assert that wait start time of waiter requests remains the same upon re-entries of the request to the wait-queue. The same rpc request could re-enter the wait-queue in the following ways: 1. Waiter is resumed, re-does conflict resolution, finds new blocker(s), enters the queue again 2. Waiter request times out in the wait-queue (30s default), callback is invoked with Timeout status, the host tserver retries the rpc if it is well within the deadline, and the request re-enters the wait-queue. The revision re-constructs the test by introducing a sync point for better synchronization. Additionally, it has one other change - previously, the wait start time during conflict resolution was being set prior to the request entering the wait-queue. The diff modifies this by setting the wait start time post the request enters the wait-queue (this is consistent with the logic that we have in `TryPreWait`). Note that the wait start time field is only used for incrementing the metric `tablet::TabletEventStats::kTotalWaitQueueTime`. Jira: DB-14094 Test Plan: Jenkins ./yb_build.sh fastdebug --cxx-test='TEST_F(PgGetLockStatusTest, TestWaitStartTimeIsConsistentAcrossWaiterReEntries) {' -n 50 --tp 1 Reviewers: pjain, patnaik.balivada Reviewed By: patnaik.balivada Subscribers: yql, ybase Differential Revision: https://phorge.dev.yugabyte.com/D40062
1 parent b426a72 commit 5cd3998

File tree

2 files changed

+27
-8
lines changed

2 files changed

+27
-8
lines changed

src/yb/docdb/conflict_resolution.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,7 @@ class WaitOnConflictResolver : public ConflictResolver {
681681
if (!wait_start_time_.Initialized()) {
682682
wait_start_time_ = MonoTime::Now();
683683
}
684+
DEBUG_ONLY_TEST_SYNC_POINT("ConflictResolver::MaybeSetWaitStartTime");
684685
}
685686

686687
void TryPreWait() {
@@ -705,13 +706,14 @@ class WaitOnConflictResolver : public ConflictResolver {
705706
VTRACE(3, "Waiting on $0 transactions after $1 tries.",
706707
conflict_data_->NumActiveTransactions(), wait_for_iters_);
707708

708-
MaybeSetWaitStartTime();
709-
return wait_queue_->WaitOn(
709+
RETURN_NOT_OK(wait_queue_->WaitOn(
710710
context_->transaction_id(), context_->subtransaction_id(), lock_batch_,
711711
ConsumeTransactionDataAndReset(), status_tablet_id_, serial_no_,
712712
context_->GetTxnStartUs(), request_start_us_, request_id_, deadline_,
713713
std::bind(&WaitOnConflictResolver::GetLockStatusInfo, shared_from(this)),
714-
std::bind(&WaitOnConflictResolver::WaitingDone, shared_from(this), _1, _2));
714+
std::bind(&WaitOnConflictResolver::WaitingDone, shared_from(this), _1, _2)));
715+
MaybeSetWaitStartTime();
716+
return Status::OK();
715717
}
716718

717719
// Note: we must pass in shared_this to keep the WaitOnConflictResolver alive until the wait queue

src/yb/yql/pgwrapper/pg_get_lock_status-test.cc

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -779,10 +779,10 @@ TEST_F(PgGetLockStatusTest, TestPgLocksWhileDMLsInProgress) {
779779
ASSERT_OK(status_future.get());
780780
}
781781

782+
#ifndef NDEBUG
782783
TEST_F(PgGetLockStatusTest, TestWaitStartTimeIsConsistentAcrossWaiterReEntries) {
783784
constexpr int kMinTxnAgeMs = 1;
784-
constexpr auto waiter_refresh_secs = 2;
785-
ANNOTATE_UNPROTECTED_WRITE(FLAGS_refresh_waiter_timeout_ms) = waiter_refresh_secs * 1000;
785+
ANNOTATE_UNPROTECTED_WRITE(FLAGS_refresh_waiter_timeout_ms) = 5 * 1000 * kTimeMultiplier;
786786

787787
auto conn = ASSERT_RESULT(Connect());
788788
ASSERT_OK(conn.ExecuteFormat("SET yb_locks_min_txn_age='$0ms'", kMinTxnAgeMs));
@@ -791,6 +791,15 @@ TEST_F(PgGetLockStatusTest, TestWaitStartTimeIsConsistentAcrossWaiterReEntries)
791791
auto init_stamp = ASSERT_RESULT(conn.FetchRow<MonoDelta>(
792792
"SELECT DISTINCT(waitend) FROM pg_locks WHERE granted"));
793793

794+
std::atomic<uint> total_num_waiting_requests{0};
795+
yb::SyncPoint::GetInstance()->SetCallBack(
796+
"ConflictResolver::MaybeSetWaitStartTime",
797+
[&](void* arg) {
798+
total_num_waiting_requests++;
799+
});
800+
yb::SyncPoint::GetInstance()->ClearTrace();
801+
yb::SyncPoint::GetInstance()->EnableProcessing();
802+
794803
auto status_future_write_req = std::async(std::launch::async, [&]() -> Status {
795804
auto conn = VERIFY_RESULT(Connect());
796805
RETURN_NOT_OK(conn.StartTransaction(IsolationLevel::SNAPSHOT_ISOLATION));
@@ -806,8 +815,12 @@ TEST_F(PgGetLockStatusTest, TestWaitStartTimeIsConsistentAcrossWaiterReEntries)
806815
return Status::OK();
807816
});
808817

809-
810818
SleepFor(2ms * FLAGS_heartbeat_interval_ms * kTimeMultiplier);
819+
// The above two requests (read & write) should have been blocked by session_1 and should have
820+
// entered the wait-queue, resulting in total_num_waiting_requests >= 2.
821+
ASSERT_OK(WaitFor(
822+
[&] { return total_num_waiting_requests >= 2; }, 10s * kTimeMultiplier,
823+
"Long wait for waiting requests to enter the wait-queue"));
811824
auto now_stamp = ASSERT_RESULT(conn.FetchRow<MonoDelta>("SELECT NOW()"));
812825

813826
auto wait_start_time_1_query =
@@ -823,8 +836,11 @@ TEST_F(PgGetLockStatusTest, TestWaitStartTimeIsConsistentAcrossWaiterReEntries)
823836
ASSERT_RESULT(conn.FetchRow<MonoDelta>(wait_start_time_2_query));
824837
ASSERT_LE(init_stamp, wait_start_time_2);
825838
ASSERT_GE(now_stamp, wait_start_time_2);
826-
827-
SleepFor(2 * waiter_refresh_secs * 1s * kTimeMultiplier);
839+
// Wait for the 2 waiter requests in the wait-queue to timeout, and re-enter the queue, resulting
840+
// in total_num_waiting_requests >= 4.
841+
ASSERT_OK(WaitFor(
842+
[&] { return total_num_waiting_requests >= 4; }, 10s * kTimeMultiplier,
843+
"Long wait for waiting requests to re-enter the wait-queue"));
828844
ASSERT_EQ(wait_start_time_1,
829845
ASSERT_RESULT(conn.FetchRow<MonoDelta>(wait_start_time_1_query)));
830846
ASSERT_EQ(wait_start_time_2,
@@ -833,6 +849,7 @@ TEST_F(PgGetLockStatusTest, TestWaitStartTimeIsConsistentAcrossWaiterReEntries)
833849
ASSERT_OK(status_future_write_req.get());
834850
ASSERT_OK(status_future_read_req.get());
835851
}
852+
#endif // NDEBUG
836853

837854
TEST_F(PgGetLockStatusTest, TestLockStatusRespHasHostNodeSet) {
838855
constexpr int kMinTxnAgeMs = 1;

0 commit comments

Comments
 (0)