Skip to content

Commit 0e51069

Browse files
committed
[yugabyte#26847] YSQL: Use __thread in regex code
Summary: PG is designed for use in single-threaded environments, so it often uses static (global) vars. When this code is evaluated as part of expression pushdown, it's executed from a multi-threaded environment, and so the static variables are no longer safe. `__thread` is a simpler way to denote a thread-local variable in C code. Switching to this format keeps correctness, but makes the changes from PG much smaller. ``` src/lint/diff_file_with_upstream.py src/postgres/src/backend/regex/regc_pg_locale.c | grep -v "yb_switch_fallthrough" | grep -E "<|>" < /* YB includes */ < #include "pg_yb_utils.h" < < static YB_THREAD_LOCAL PG_Locale_Strategy pg_regex_strategy; < static YB_THREAD_LOCAL pg_locale_t pg_regex_locale; < static YB_THREAD_LOCAL Oid pg_regex_collation; > static PG_Locale_Strategy pg_regex_strategy; > static pg_locale_t pg_regex_locale; > static Oid pg_regex_collation; < static YB_THREAD_LOCAL pg_ctype_cache *pg_ctype_cache_list = NULL; > static pg_ctype_cache *pg_ctype_cache_list = NULL; ``` ``` src/lint/diff_file_with_upstream.py src/postgres/src/backend/utils/adt/regexp.c | grep -v "yb_switch_fallthrough" | grep -E "<|>" < /* YB includes */ < #include "pg_yb_utils.h" < < static YB_THREAD_LOCAL int num_res = 0; /* # of cached re's */ < static YB_THREAD_LOCAL cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */ > static int num_res = 0; /* # of cached re's */ > static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */ ``` Jira: DB-16251 Test Plan: ``` ./yb_build.sh --cxx-test pgwrapper_pg_mini-test --gtest_filter PgMiniTest.RegexPushdown ./yb_build.sh tsan--cxx-test pgwrapper_pg_mini-test --gtest_filter PgMiniTest.RegexPushdown ``` Reviewers: kfranz Reviewed By: kfranz Subscribers: yql, svc_phabricator Differential Revision: https://phorge.dev.yugabyte.com/D44528
1 parent 88ee3de commit 0e51069

File tree

8 files changed

+68
-273
lines changed

8 files changed

+68
-273
lines changed

src/postgres/src/backend/regex/regc_pg_locale.c

Lines changed: 56 additions & 111 deletions
Large diffs are not rendered by default.

src/postgres/src/backend/utils/adt/regexp.c

Lines changed: 12 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@
3939
#include "utils/varlena.h"
4040

4141
/* YB includes */
42-
#include "yb/yql/pggate/ybc_pg_typedefs.h"
43-
#include "yb/yql/pggate/ybc_pggate.h"
42+
#include "pg_yb_utils.h"
4443

4544
#define PG_GETARG_TEXT_PP_IF_EXISTS(_n) \
4645
(PG_NARGS() > (_n) ? PG_GETARG_TEXT_PP(_n) : NULL)
@@ -110,66 +109,9 @@ typedef struct cached_re_str
110109
regex_t cre_re; /* the compiled regular expression */
111110
} cached_re_str;
112111

113-
/*
114-
* YB: A single static cache works in Postgres, but is not safe in a
115-
* multi-threaded environment when we pushdown regex filters to the tserver.
116-
* The following infrastructure is necessary to create a thread local cache
117-
* for regex compilation.
118-
*/
119-
typedef struct YbReCacheInfo
120-
{
121-
int *num;
122-
cached_re_str *array;
123-
} YbReCacheInfo;
112+
static YB_THREAD_LOCAL int num_res = 0; /* # of cached re's */
113+
static YB_THREAD_LOCAL cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */
124114

125-
static void
126-
YbFreeRe(cached_re_str *re)
127-
{
128-
pg_regfree(&re->cre_re);
129-
free(re->cre_pat);
130-
}
131-
132-
static void
133-
YbFreeReCache(YbcPgThreadLocalRegexpCache *cache)
134-
{
135-
Assert(cache && cache->array);
136-
cached_re_str *re = cache->array;
137-
138-
for (cached_re_str *re_end = re + cache->num; re != re_end; ++re)
139-
YbFreeRe(re);
140-
}
141-
142-
static YbReCacheInfo
143-
YbGetReCacheInfo()
144-
{
145-
if (IsMultiThreadedMode())
146-
{
147-
YbcPgThreadLocalRegexpCache *cache = YBCPgGetThreadLocalRegexpCache();
148-
149-
if (!cache)
150-
{
151-
cache = YBCPgInitThreadLocalRegexpCache((sizeof(cached_re_str) *
152-
MAX_CACHED_RES),
153-
&YbFreeReCache);
154-
Assert(cache && cache->array);
155-
}
156-
157-
return (YbReCacheInfo)
158-
{
159-
.num = &cache->num,
160-
.array = (cached_re_str *) cache->array,
161-
};
162-
}
163-
164-
static int num_res = 0; /* # of cached re's */
165-
static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */
166-
167-
return (YbReCacheInfo)
168-
{
169-
.num = &num_res,
170-
.array = re_array,
171-
};
172-
}
173115

174116
/* Local functions */
175117
static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
@@ -207,16 +149,12 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation)
207149
cached_re_str re_temp;
208150
char errMsg[100];
209151

210-
YbReCacheInfo yb_re_cache_info = YbGetReCacheInfo();
211-
int *num_res = yb_re_cache_info.num;
212-
cached_re_str *re_array = yb_re_cache_info.array;
213-
214152
/*
215153
* Look for a match among previously compiled REs. Since the data
216154
* structure is self-organizing with most-used entries at the front, our
217155
* search strategy can just be to scan from the front.
218156
*/
219-
for (i = 0; i < *num_res; i++)
157+
for (i = 0; i < num_res; i++)
220158
{
221159
if (re_array[i].cre_pat_len == text_re_len &&
222160
re_array[i].cre_flags == cflags &&
@@ -297,18 +235,19 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation)
297235
* Okay, we have a valid new item in re_temp; insert it into the storage
298236
* array. Discard last entry if needed.
299237
*/
300-
if (*num_res >= MAX_CACHED_RES)
238+
if (num_res >= MAX_CACHED_RES)
301239
{
302-
--*num_res;
303-
Assert(*num_res < MAX_CACHED_RES);
304-
YbFreeRe(&re_array[*num_res]);
240+
--num_res;
241+
Assert(num_res < MAX_CACHED_RES);
242+
pg_regfree(&re_array[num_res].cre_re);
243+
free(re_array[num_res].cre_pat);
305244
}
306245

307-
if (*num_res > 0)
308-
memmove(&re_array[1], &re_array[0], *num_res * sizeof(cached_re_str));
246+
if (num_res > 0)
247+
memmove(&re_array[1], &re_array[0], num_res * sizeof(cached_re_str));
309248

310249
re_array[0] = re_temp;
311-
(*num_res)++;
250+
num_res++;
312251

313252
return &re_array[0].cre_re;
314253
}

src/postgres/src/tools/pgindent/yb_typedefs.list

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,6 @@ YbQueryDiagnosticsParams
152152
YbQueryDiagnosticsPgss
153153
YbQueryDiagnosticsStatusType
154154
YbRange
155-
YbReCacheInfo
156155
YbRegisterOuterMatchFn_t
157156
YbRelationAttrsProcessingState
158157
YbResetBatchFn_t
@@ -271,9 +270,6 @@ YbcPgSysColumns
271270
YbcPgSysTablePrefetcherCacheMode
272271
YbcPgTableDesc
273272
YbcPgTabletsDescriptor
274-
YbcPgThreadLocalRegexpCache
275-
YbcPgThreadLocalRegexpCacheCleanup
276-
YbcPgThreadLocalRegexpMetadata
277273
YbcPgTransactionSetting
278274
YbcPgTxnSnapshot
279275
YbcPgTypeAttrs

src/yb/yql/pggate/pggate_thread_local_vars.cc

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -21,24 +21,6 @@
2121
#include "yb/yql/pggate/ybc_pg_typedefs.h"
2222
#include "yb/util/logging.h"
2323

24-
class CachedRegexpHolder {
25-
public:
26-
CachedRegexpHolder(size_t buffer_size,
27-
YbcPgThreadLocalRegexpCacheCleanup cleanup)
28-
: buffer_(buffer_size, 0), cleanup_(cleanup), cache_{.num = 0, .array = buffer_.data()} {}
29-
30-
~CachedRegexpHolder() {
31-
cleanup_(&cache_);
32-
}
33-
34-
YbcPgThreadLocalRegexpCache& cache() { return cache_; }
35-
36-
private:
37-
std::vector<char> buffer_;
38-
const YbcPgThreadLocalRegexpCacheCleanup cleanup_;
39-
YbcPgThreadLocalRegexpCache cache_ = {};
40-
};
41-
4224
namespace yb::pggate {
4325

4426
/*
@@ -50,8 +32,6 @@ thread_local void* pg_strtok_ptr = nullptr;
5032
thread_local int yb_expression_version = 0;
5133
thread_local void* jump_buffer = nullptr;
5234
thread_local void* err_status = nullptr;
53-
thread_local std::optional<CachedRegexpHolder> re_cache;
54-
thread_local YbcPgThreadLocalRegexpMetadata re_metadata;
5535

5636
//-----------------------------------------------------------------------------
5737
// Memory context.
@@ -117,19 +97,4 @@ void PgSetThreadLocalYbExpressionVersion(int yb_expr_version) {
11797
yb_expression_version = yb_expr_version;
11898
}
11999

120-
YbcPgThreadLocalRegexpCache* PgGetThreadLocalRegexpCache() {
121-
return re_cache ? &re_cache->cache() : nullptr;
122-
}
123-
124-
YbcPgThreadLocalRegexpCache* PgInitThreadLocalRegexpCache(
125-
size_t buffer_size, YbcPgThreadLocalRegexpCacheCleanup cleanup) {
126-
DCHECK(!re_cache);
127-
re_cache.emplace(buffer_size, cleanup);
128-
return &re_cache->cache();
129-
}
130-
131-
YbcPgThreadLocalRegexpMetadata* PgGetThreadLocalRegexpMetadata() {
132-
return &re_metadata;
133-
}
134-
135100
} // namespace yb::pggate

src/yb/yql/pggate/pggate_thread_local_vars.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -77,19 +77,4 @@ void PgSetThreadLocalStrTokPtr(char *new_pg_strtok_ptr);
7777
int PgGetThreadLocalYbExpressionVersion();
7878
void PgSetThreadLocalYbExpressionVersion(int yb_expr_version);
7979

80-
/*
81-
* CachedRegexpHolder (from regex/regex.c)
82-
* These are used to cache the compiled regexes
83-
*/
84-
YbcPgThreadLocalRegexpCache* PgGetThreadLocalRegexpCache();
85-
YbcPgThreadLocalRegexpCache* PgInitThreadLocalRegexpCache(
86-
size_t buffer_size, YbcPgThreadLocalRegexpCacheCleanup cleanup);
87-
88-
/*
89-
* Regular Expression Metadata (from regc_pg_locale.c)
90-
* These are used to cache the collation and locale information for regular
91-
* expression processing.
92-
*/
93-
YbcPgThreadLocalRegexpMetadata* PgGetThreadLocalRegexpMetadata();
94-
9580
} // namespace yb::pggate

src/yb/yql/pggate/ybc_pg_typedefs.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -905,21 +905,6 @@ typedef struct {
905905
const char* tgt_owner;
906906
} YbcCloneInfo;
907907

908-
// A thread-safe way to cache compiled regexes.
909-
typedef struct {
910-
int num;
911-
void* array;
912-
} YbcPgThreadLocalRegexpCache;
913-
914-
typedef void (*YbcPgThreadLocalRegexpCacheCleanup)(YbcPgThreadLocalRegexpCache*);
915-
916-
// A thread-safe way to control the behavior of regex matching.
917-
typedef struct {
918-
int pg_regex_strategy; // PG_Locale_Strategy
919-
void* pg_regex_locale; // struct pg_locale_t
920-
YbcPgOid pg_regex_collation;
921-
} YbcPgThreadLocalRegexpMetadata;
922-
923908
typedef struct {
924909
void *slot;
925910
} YbcPgInsertOnConflictKeyInfo;

src/yb/yql/pggate/ybc_pggate.cc

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2399,19 +2399,6 @@ void YBCPgSetThreadLocalYbExpressionVersion(int yb_expr_version) {
23992399
PgSetThreadLocalYbExpressionVersion(yb_expr_version);
24002400
}
24012401

2402-
YbcPgThreadLocalRegexpCache* YBCPgGetThreadLocalRegexpCache() {
2403-
return PgGetThreadLocalRegexpCache();
2404-
}
2405-
2406-
YbcPgThreadLocalRegexpCache* YBCPgInitThreadLocalRegexpCache(
2407-
size_t buffer_size, YbcPgThreadLocalRegexpCacheCleanup cleanup) {
2408-
return PgInitThreadLocalRegexpCache(buffer_size, cleanup);
2409-
}
2410-
2411-
YbcPgThreadLocalRegexpMetadata* YBCPgGetThreadLocalRegexpMetadata() {
2412-
return PgGetThreadLocalRegexpMetadata();
2413-
}
2414-
24152402
void* YBCPgSetThreadLocalJumpBuffer(void* new_buffer) {
24162403
return PgSetThreadLocalJumpBuffer(new_buffer);
24172404
}

src/yb/yql/pggate/ybc_pggate.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -870,13 +870,6 @@ void* YBCPgSetThreadLocalErrStatus(void* new_status);
870870

871871
void* YBCPgGetThreadLocalErrStatus();
872872

873-
YbcPgThreadLocalRegexpCache* YBCPgGetThreadLocalRegexpCache();
874-
875-
YbcPgThreadLocalRegexpCache* YBCPgInitThreadLocalRegexpCache(
876-
size_t buffer_size, YbcPgThreadLocalRegexpCacheCleanup cleanup);
877-
878-
YbcPgThreadLocalRegexpMetadata* YBCPgGetThreadLocalRegexpMetadata();
879-
880873
void YBCPgResetCatalogReadTime();
881874

882875
YbcStatus YBCNewGetLockStatusDataSRF(YbcPgFunction *handle);

0 commit comments

Comments
 (0)