Skip to content

Commit 136e713

Browse files
author
Adithya Bharadwaj
committed
[yugabyte#15136] CDCSDK: Fix for crash when running cdcsdk with before image
Summary: We observed a crash while running TPCC workload with CDCSDK enabled. The stack trace is: ``` (gdb) bt #0 0x0000557f25b11910 in yb::DatumMessagePB::MergeFrom(yb::DatumMessagePB const&) () #1 0x0000557f258a41ef in yb::cdc::PopulateBeforeImage(std::__1::shared_ptr<yb::tablet::TabletPeer> const&, yb::ReadHybridTime const&, yb::cdc::RowMessage*, std::__1::unordered_map<unsigned int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::hash<unsigned int>, std::__1::equal_to<unsigned int>, std::__1::allocator<std::__1::pair<unsigned int const, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > > const&, std::__1::unordered_map<unsigned int, std::__1::vector<yb::master::PgAttributePB, std::__1::allocator<yb::master::PgAttributePB> >, std::__1::hash<unsigned int>, std::__1::equal_to<unsigned int>, std::__1::allocator<std::__1::pair<unsigned int const, std::__1::vector<yb::master::PgAttributePB, std::__1::allocator<yb::master::PgAttributePB> > > > > const&, yb::docdb::SubDocKey const&, yb::Schema const&, unsigned int) () #2 0x0000557f258a7304 in yb::cdc::PopulateCDCSDKIntentRecord(yb::OpId const&, yb::StronglyTypedUuid<yb::TransactionId_Tag> const&, std::__1::vector<yb::docdb::IntentKeyValueForCDC, std::__1::allocator<yb::docdb::IntentKeyValueForCDC> > const&, yb::cdc::StreamMetadata const&, std::__1::shared_ptr<yb::tablet::TabletPeer> const&, std::__1::unordered_map<unsigned int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::hash<unsigned int>, std::__1::equal_to<unsigned int>, std::__1::allocator<std::__1::pair<unsigned int const, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > > const&, std::__1::unordered_map<unsigned int, std::__1::vector<yb::master::PgAttributePB, std::__1::allocator<yb::master::PgAttributePB> >, std::__1::hash<unsigned int>, std::__1::equal_to<unsigned int>, std::__1::allocator<std::__1::pair<unsigned int const, std::__1::vector<yb::master::PgAttributePB, std::__1::allocator<yb::master::PgAttributePB> > > > > const&, yb::cdc::GetChangesResponsePB*, yb::ScopedTrackedConsumption*, unsigned int*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*, yb::Schema*, unsigned int, unsigned long const&) () #3 0x0000557f258aaa27 in yb::cdc::ProcessIntents(yb::OpId const&, yb::StronglyTypedUuid<yb::TransactionId_Tag> const&, yb::cdc::StreamMetadata const&, std::__1::unordered_map<unsigned int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::hash<unsigned int>, std::__1::equal_to<unsigned int>, std::__1::allocator<std::__1::pair<unsigned int const, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > > const&, std::__1::unordered_map<unsigned int, std::__1::vector<yb::master::PgAttributePB, std::__1::allocator<yb::master::PgAttributePB> >, std::__1::hash<unsigned int>, std::__1::equal_to<unsigned int>, std::__1::allocator<std::__1::pair<unsigned int const, std::__1::vector<yb::master::PgAttributePB, std::__1::allocator<yb::master::PgAttributePB> > > > > const&, yb::cdc::GetChangesResponsePB*, yb::ScopedTrackedConsumption*, yb::cdc::CDCSDKCheckpointPB*, std::__1::shared_ptr<yb::tablet::TabletPeer> const&, std::__1::vector<yb::docdb::IntentKeyValueForCDC, std::__1::allocator<yb::docdb::IntentKeyValueForCDC> >*, yb::docdb::ApplyTransactionState*, yb::client::YBClient*, std::__1::shared_ptr<yb::Schema>*, unsigned int*, unsigned long const&) () #4 0x0000557f258b00c1 in yb::cdc::GetChangesForCDCSDK(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, yb::cdc::CDCSDKCheckpointPB const&, yb::cdc::StreamMetadata const&, std::__1::shared_ptr<yb::tablet::TabletPeer> const&, std::__1::shared_ptr<yb::MemTracker> const&, std::__1::unordered_map<unsigned int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::hash<unsigned int>, std::__1::equal_to<unsigned int>, std::__1::allocator<std::__1::pair<unsigned int const, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > > const&, std::__1::unordered_map<unsigned int, std::__1::vector<yb::master::PgAttributePB, std::__1::allocator<yb::master::PgAttributePB> >, std::__1::hash<unsigned int>, std::__1::equal_to<unsigned int>, std::__1::allocator<std::__1::pair<unsigned int const, std::__1::vector<yb::master::PgAttributePB, std::__1::allocator<yb::master::PgAttributePB> > > > > const&, yb::client::YBClient*, yb::consensus::ReplicateMsgsHolder*, yb::cdc::GetChangesResponsePB*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*, std::__1::shared_ptr<yb::Schema>*, unsigned int*, yb::OpId*, long*, std::__1::chrono::time_point<yb::CoarseMonoClock, std::__1::chrono::duration<long long, std::__1::ratio<1l, 1000000000l> > >) () #5 0x0000557f2586c448 in yb::cdc::CDCServiceImpl::GetChanges(yb::cdc::GetChangesRequestPB const*, yb::cdc::GetChangesResponsePB*, yb::rpc::RpcContext) () #6 0x0000557f25908246 in std::__1::__function::__func<yb::cdc::CDCServiceIf::InitMethods(scoped_refptr<yb::MetricEntity> const&)::$_3, std::__1::allocator<yb::cdc::CDCServiceIf::InitMethods(scoped_refptr<yb::MetricEntity> const&)::$_3>, void (std::__1::shared_ptr<yb::rpc::InboundCall>)>::operator()(std::__1::shared_ptr<yb::rpc::InboundCall>&&) () #7 0x0000557f2590a6af in yb::cdc::CDCServiceIf::Handle(std::__1::shared_ptr<yb::rpc::InboundCall>) () #8 0x0000557f26227a1e in yb::rpc::ServicePoolImpl::Handle(std::__1::shared_ptr<yb::rpc::InboundCall>) () #9 0x0000557f2616db2f in yb::rpc::InboundCall::InboundCallTask::Run() () #10 0x0000557f26236583 in yb::rpc::(anonymous namespace)::Worker::Execute() () #11 0x0000557f268698cf in yb::Thread::SuperviseThread(void*) () yugabyte#12 0x00007fa6fce89694 in ?? () yugabyte#13 0x0000000000000000 in ?? () ``` The problem is in the method: PopulateBeforeImage When we drop a column, the the row won't have data for the dropped column, and hence will not be added to the "old_tuple" member of RowMessage. This will mean the size of "old_tuple" does not match the number of columns in the schema. Which means this line: "row_message->old_tuple(static_cast<int>(index))" could lead to an out of bounds exception. Instead, now we are keeping track of the found columns in the row. Test Plan: Running existing ctests Reviewers: srangavajjula, sdash, skumar Reviewed By: sdash, skumar Differential Revision: https://phabricator.dev.yugabyte.com/D21338
1 parent 524fd98 commit 136e713

File tree

2 files changed

+52
-7
lines changed

2 files changed

+52
-7
lines changed

ent/src/yb/cdc/cdcsdk_producer.cc

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ Status PopulateBeforeImage(
227227

228228
std::vector<ColumnSchema> columns(schema.columns());
229229

230+
size_t found_columns = 0;
230231
if (row.ColumnCount() == columns.size()) {
231232
for (size_t index = 0; index < row.ColumnCount(); ++index) {
232233
bool column_updated = false;
@@ -236,25 +237,27 @@ Status PopulateBeforeImage(
236237
tablet_peer, columns[index], PrimitiveValue(), enum_oid_label_map, composite_atts_map,
237238
row_message->add_old_tuple(), &ql_value.value()));
238239
if (row_message->op() == RowMessage_Op_UPDATE) {
240+
const auto& old_tuple_column_name =
241+
row_message->old_tuple(static_cast<int>(found_columns)).column_name();
239242
for (int new_tuple_index = 0; new_tuple_index < row_message->new_tuple_size();
240243
++new_tuple_index) {
241244
if (row_message->new_tuple(static_cast<int>(new_tuple_index)).column_name() ==
242-
columns[index].name()) {
245+
old_tuple_column_name) {
243246
column_updated = true;
244247
break;
245248
}
246249
}
247250
if (!column_updated) {
248-
*(row_message->add_new_tuple()) = row_message->old_tuple(static_cast<int>(index));
251+
auto new_tuple_pb = row_message->mutable_new_tuple()->Add();
252+
new_tuple_pb->CopyFrom(row_message->old_tuple(static_cast<int>(found_columns)));
249253
}
250254
}
255+
found_columns += 1;
251256
}
252257
}
253-
} else {
254-
if (row_message->op() != RowMessage_Op_DELETE) {
255-
for (size_t index = 0; index < schema.num_columns(); ++index) {
256-
row_message->add_old_tuple();
257-
}
258+
} else if (row_message->op() != RowMessage_Op_DELETE) {
259+
for (size_t index = 0; index < schema.num_columns(); ++index) {
260+
row_message->add_old_tuple();
258261
}
259262
}
260263
return Status::OK();

ent/src/yb/integration-tests/cdcsdk_ysql-test.cc

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9778,6 +9778,48 @@ TEST_F(CDCSDKYsqlTest, YB_DISABLE_TEST_IN_TSAN(TestExpiredStreamWithCompaction))
97789778
ASSERT_LE(count_compaction_after_expired, count_after_compaction);
97799779
}
97809780

9781+
TEST_F(CDCSDKYsqlTest, YB_DISABLE_TEST_IN_TSAN(TestColumnDropBeforeImage)) {
9782+
ANNOTATE_UNPROTECTED_WRITE(FLAGS_timestamp_history_retention_interval_sec) = 0;
9783+
ASSERT_OK(SetUpWithParams(3, 1, false));
9784+
auto table = ASSERT_RESULT(CreateTable(&test_cluster_, kNamespaceName, kTableName));
9785+
google::protobuf::RepeatedPtrField<master::TabletLocationsPB> tablets;
9786+
ASSERT_OK(test_client()->GetTablets(table, 0, &tablets, nullptr));
9787+
ASSERT_EQ(tablets.size(), 1);
9788+
CDCStreamId stream_id =
9789+
ASSERT_RESULT(CreateDBStream(CDCCheckpointType::IMPLICIT, CDCRecordType::ALL));
9790+
auto set_resp = ASSERT_RESULT(SetCDCCheckpoint(stream_id, tablets));
9791+
ASSERT_FALSE(set_resp.has_error());
9792+
9793+
auto conn = ASSERT_RESULT(test_cluster_.ConnectToDB(kNamespaceName));
9794+
9795+
ASSERT_OK(conn.Execute("INSERT INTO test_table VALUES (1, 2)"));
9796+
ASSERT_OK(conn.Execute("UPDATE test_table SET value_1 = 3 WHERE key = 1"));
9797+
ASSERT_OK(conn.Execute("ALTER TABLE test_table ADD COLUMN value_2 INT"));
9798+
ASSERT_OK(conn.Execute("UPDATE test_table SET value_2 = 4 WHERE key = 1"));
9799+
ASSERT_OK(conn.Execute("ALTER TABLE test_table DROP COLUMN value_2"));
9800+
9801+
// The count array stores counts of DDL, INSERT, UPDATE, DELETE, READ, TRUNCATE in that order.
9802+
const uint32_t expected_count[] = {3, 1, 2, 0, 0, 0};
9803+
uint32_t count[] = {0, 0, 0, 0, 0, 0};
9804+
9805+
ExpectedRecordWithThreeColumns expected_records[] = {
9806+
{0, 0, 0}, {1, 2, INT_MAX}, {1, 3, INT_MAX}, {0, 0, INT_MAX}, {1, 3, 4}, {}};
9807+
ExpectedRecordWithThreeColumns expected_before_image_records[] = {
9808+
{}, {}, {1, 2, INT_MAX}, {}, {1, 3, INT_MAX}, {}};
9809+
9810+
GetChangesResponsePB change_resp = ASSERT_RESULT(GetChangesFromCDC(stream_id, tablets));
9811+
9812+
uint32_t record_size = change_resp.cdc_sdk_proto_records_size();
9813+
for (uint32_t i = 0; i < record_size; ++i) {
9814+
const CDCSDKProtoRecordPB record = change_resp.cdc_sdk_proto_records(i);
9815+
CheckRecordWithThreeColumns(
9816+
record, expected_records[i], count, true, expected_before_image_records[i]);
9817+
}
9818+
LOG(INFO) << "Got " << count[1] << " insert record and " << count[2] << " update record";
9819+
9820+
CheckCount(expected_count, count);
9821+
}
9822+
97819823
} // namespace enterprise
97829824
} // namespace cdc
97839825
} // namespace yb

0 commit comments

Comments
 (0)