@@ -39,11 +39,11 @@ void *memmove(void *dest, const void *src, size_t n) {
3939#ifdef __wasm_simd128__
4040
4141__attribute__((weak ))
42- int memcmp (const void * v1 , const void * v2 , size_t n ) {
42+ int memcmp (const void * vl , const void * vr , size_t n ) {
4343 // Scalar algorithm.
4444 if (n < sizeof (v128_t )) {
45- const unsigned char * u1 = (unsigned char * )v1 ;
46- const unsigned char * u2 = (unsigned char * )v2 ;
45+ const unsigned char * u1 = (unsigned char * )vl ;
46+ const unsigned char * u2 = (unsigned char * )vr ;
4747 while (n -- ) {
4848 if (* u1 != * u2 ) return * u1 - * u2 ;
4949 u1 ++ ;
@@ -56,32 +56,32 @@ int memcmp(const void *v1, const void *v2, size_t n) {
5656 // Find the first different character in the objects.
5757 // Unaligned loads handle the case where the objects
5858 // have mismatching alignments.
59- const v128_t * w1 = (v128_t * )v1 ;
60- const v128_t * w2 = (v128_t * )v2 ;
59+ const v128_t * v1 = (v128_t * )vl ;
60+ const v128_t * v2 = (v128_t * )vr ;
6161 while (n ) {
62- const v128_t cmp = wasm_i8x16_eq (wasm_v128_load (w1 ), wasm_v128_load (w2 ));
62+ const v128_t cmp = wasm_i8x16_eq (wasm_v128_load (v1 ), wasm_v128_load (v2 ));
6363 // Bitmask is slow on AArch64, all_true is much faster.
6464 if (!wasm_i8x16_all_true (cmp )) {
6565 // Find the offset of the first zero bit (little-endian).
6666 size_t ctz = __builtin_ctz (~wasm_i8x16_bitmask (cmp ));
67- const unsigned char * u1 = (unsigned char * )w1 + ctz ;
68- const unsigned char * u2 = (unsigned char * )w2 + ctz ;
67+ const unsigned char * u1 = (unsigned char * )v1 + ctz ;
68+ const unsigned char * u2 = (unsigned char * )v2 + ctz ;
6969 // This may help the compiler if the function is inlined.
7070 __builtin_assume (* u1 - * u2 != 0 );
7171 return * u1 - * u2 ;
7272 }
7373 // This makes n a multiple of sizeof(v128_t)
7474 // for every iteration except the first.
7575 size_t align = (n - 1 ) % sizeof (v128_t ) + 1 ;
76- w1 = (v128_t * )((char * )w1 + align );
77- w2 = (v128_t * )((char * )w2 + align );
76+ v1 = (v128_t * )((char * )v1 + align );
77+ v2 = (v128_t * )((char * )v2 + align );
7878 n -= align ;
7979 }
8080 return 0 ;
8181}
8282
8383__attribute__((weak ))
84- void * memchr (const void * v , int c , size_t n ) {
84+ void * memchr (const void * s , int c , size_t n ) {
8585 // When n is zero, a function that locates a character finds no occurrence.
8686 // Otherwise, decrement n to ensure sub_overflow overflows
8787 // when n would go equal-to-or-below zero.
@@ -92,12 +92,13 @@ void *memchr(const void *v, int c, size_t n) {
9292 // memchr must behave as if it reads characters sequentially
9393 // and stops as soon as a match is found.
9494 // Aligning ensures loads beyond the first match are safe.
95- uintptr_t align = (uintptr_t )v % sizeof (v128_t );
96- const v128_t * w = (v128_t * )((char * )v - align );
97- const v128_t wc = wasm_i8x16_splat (c );
95+ // Volatile avoids compiler tricks around out of bounds loads.
96+ uintptr_t align = (uintptr_t )s % sizeof (v128_t );
97+ const volatile v128_t * v = (v128_t * )((char * )s - align );
98+ const v128_t vc = wasm_i8x16_splat (c );
9899
99100 for (;;) {
100- const v128_t cmp = wasm_i8x16_eq (* w , wc );
101+ const v128_t cmp = wasm_i8x16_eq (* v , vc );
101102 // Bitmask is slow on AArch64, any_true is much faster.
102103 if (wasm_v128_any_true (cmp )) {
103104 // Clear the bits corresponding to alignment (little-endian)
@@ -113,36 +114,36 @@ void *memchr(const void *v, int c, size_t n) {
113114 // That's a match, unless it is beyond the end of the object.
114115 // Recall that we decremented n, so less-than-or-equal-to is correct.
115116 size_t ctz = __builtin_ctz (mask );
116- return ctz - align <= n ? (char * )w + ctz : NULL ;
117+ return ctz - align <= n ? (char * )v + ctz : NULL ;
117118 }
118119 }
119120 // Decrement n; if it overflows we're done.
120121 if (__builtin_sub_overflow (n , sizeof (v128_t ) - align , & n )) {
121122 return NULL ;
122123 }
123124 align = 0 ;
124- w ++ ;
125+ v ++ ;
125126 }
126127}
127128
128129__attribute__((weak ))
129- void * memrchr (const void * v , int c , size_t n ) {
130+ void * memrchr (const void * s , int c , size_t n ) {
130131 // memrchr is allowed to read up to n bytes from the object.
131132 // Search backward for the last matching character.
132- const v128_t * w = (v128_t * )((char * )v + n );
133- const v128_t wc = wasm_i8x16_splat (c );
133+ const v128_t * v = (v128_t * )((char * )s + n );
134+ const v128_t vc = wasm_i8x16_splat (c );
134135 for (; n >= sizeof (v128_t ); n -= sizeof (v128_t )) {
135- const v128_t cmp = wasm_i8x16_eq (wasm_v128_load (-- w ), wc );
136+ const v128_t cmp = wasm_i8x16_eq (wasm_v128_load (-- v ), vc );
136137 // Bitmask is slow on AArch64, any_true is much faster.
137138 if (wasm_v128_any_true (cmp )) {
138139 // Find the offset of the last one bit (little-endian).
139140 size_t clz = __builtin_clz (wasm_i8x16_bitmask (cmp )) - 15 ;
140- return (char * )(w + 1 ) - clz ;
141+ return (char * )(v + 1 ) - clz ;
141142 }
142143 }
143144
144145 // Scalar algorithm.
145- const char * a = (char * )w ;
146+ const char * a = (char * )v ;
146147 while (n -- ) {
147148 if (* (-- a ) == (char )c ) return (char * )a ;
148149 }
@@ -154,12 +155,13 @@ size_t strlen(const char *s) {
154155 // strlen must stop as soon as it finds the terminator.
155156 // Aligning ensures loads beyond the terminator are safe.
156157 uintptr_t align = (uintptr_t )s % sizeof (v128_t );
157- const v128_t * w = (v128_t * )(s - align );
158+ const volatile v128_t * v = (v128_t * )(s - align );
158159
159160 for (;;) {
161+ const v128_t vv = * v ;
160162 // Bitmask is slow on AArch64, all_true is much faster.
161- if (!wasm_i8x16_all_true (* w )) {
162- const v128_t cmp = wasm_i8x16_eq (* w , (v128_t ){});
163+ if (!wasm_i8x16_all_true (vv )) {
164+ const v128_t cmp = wasm_i8x16_eq (vv , (v128_t ){});
163165 // Clear the bits corresponding to alignment (little-endian)
164166 // so we can count trailing zeros.
165167 int mask = wasm_i8x16_bitmask (cmp ) >> align << align ;
@@ -170,11 +172,11 @@ size_t strlen(const char *s) {
170172 // it's as if we didn't find anything.
171173 if (mask ) {
172174 // Find the offset of the first one bit (little-endian).
173- return (char * )w - s + __builtin_ctz (mask );
175+ return (char * )v - s + __builtin_ctz (mask );
174176 }
175177 }
176178 align = 0 ;
177- w ++ ;
179+ v ++ ;
178180 }
179181}
180182
@@ -268,12 +270,14 @@ int strncmp(const char *s1, const char *s2, size_t n) {
268270static char * __strchrnul (const char * s , int c ) {
269271 // strchrnul must stop as soon as it finds the terminator.
270272 // Aligning ensures loads beyond the terminator are safe.
273+ // Volatile avoids compiler tricks around out of bounds loads.
271274 uintptr_t align = (uintptr_t )s % sizeof (v128_t );
272- const v128_t * w = (v128_t * )(s - align );
273- const v128_t wc = wasm_i8x16_splat (c );
275+ const volatile v128_t * v = (v128_t * )(s - align );
276+ const v128_t vc = wasm_i8x16_splat (c );
274277
275278 for (;;) {
276- const v128_t cmp = wasm_i8x16_eq (* w , (v128_t ){}) | wasm_i8x16_eq (* w , wc );
279+ const v128_t vv = * v ;
280+ const v128_t cmp = wasm_i8x16_eq (vv , (v128_t ){}) | wasm_i8x16_eq (vv , vc );
277281 // Bitmask is slow on AArch64, any_true is much faster.
278282 if (wasm_v128_any_true (cmp )) {
279283 // Clear the bits corresponding to alignment (little-endian)
@@ -286,11 +290,11 @@ static char *__strchrnul(const char *s, int c) {
286290 // it's as if we didn't find anything.
287291 if (mask ) {
288292 // Find the offset of the first one bit (little-endian).
289- return (char * )w + __builtin_ctz (mask );
293+ return (char * )v + __builtin_ctz (mask );
290294 }
291295 }
292296 align = 0 ;
293- w ++ ;
297+ v ++ ;
294298 }
295299}
296300
@@ -371,14 +375,15 @@ __attribute__((weak))
371375size_t strspn (const char * s , const char * c ) {
372376 // strspn must stop as soon as it finds the terminator.
373377 // Aligning ensures loads beyond the terminator are safe.
378+ // Volatile avoids compiler tricks around out of bounds loads.
374379 uintptr_t align = (uintptr_t )s % sizeof (v128_t );
375- const v128_t * w = (v128_t * )(s - align );
380+ const volatile v128_t * v = (v128_t * )(s - align );
376381
377382 if (!c [0 ]) return 0 ;
378383 if (!c [1 ]) {
379- const v128_t wc = wasm_i8x16_splat (* c );
384+ const v128_t vc = wasm_i8x16_splat (* c );
380385 for (;;) {
381- const v128_t cmp = wasm_i8x16_eq (* w , wc );
386+ const v128_t cmp = wasm_i8x16_eq (* v , vc );
382387 // Bitmask is slow on AArch64, all_true is much faster.
383388 if (!wasm_i8x16_all_true (cmp )) {
384389 // Clear the bits corresponding to alignment (little-endian)
@@ -391,11 +396,11 @@ size_t strspn(const char *s, const char *c) {
391396 // it's as if we didn't find anything.
392397 if (mask ) {
393398 // Find the offset of the first one bit (little-endian).
394- return (char * )w - s + __builtin_ctz (mask );
399+ return (char * )v - s + __builtin_ctz (mask );
395400 }
396401 }
397402 align = 0 ;
398- w ++ ;
403+ v ++ ;
399404 }
400405 }
401406
@@ -407,7 +412,7 @@ size_t strspn(const char *s, const char *c) {
407412 }
408413
409414 for (;;) {
410- const v128_t cmp = __wasm_v128_chkbits (bitmap , * w );
415+ const v128_t cmp = __wasm_v128_chkbits (bitmap , * v );
411416 // Bitmask is slow on AArch64, all_true is much faster.
412417 if (!wasm_i8x16_all_true (cmp )) {
413418 // Clear the bits corresponding to alignment (little-endian)
@@ -420,11 +425,11 @@ size_t strspn(const char *s, const char *c) {
420425 // it's as if we didn't find anything.
421426 if (mask ) {
422427 // Find the offset of the first one bit (little-endian).
423- return (char * )w - s + __builtin_ctz (mask );
428+ return (char * )v - s + __builtin_ctz (mask );
424429 }
425430 }
426431 align = 0 ;
427- w ++ ;
432+ v ++ ;
428433 }
429434}
430435
@@ -434,8 +439,9 @@ size_t strcspn(const char *s, const char *c) {
434439
435440 // strcspn must stop as soon as it finds the terminator.
436441 // Aligning ensures loads beyond the terminator are safe.
442+ // Volatile avoids compiler tricks around out of bounds loads.
437443 uintptr_t align = (uintptr_t )s % sizeof (v128_t );
438- const v128_t * w = (v128_t * )(s - align );
444+ const volatile v128_t * v = (v128_t * )(s - align );
439445
440446 __wasm_v128_bitmap256_t bitmap = {};
441447
@@ -445,7 +451,7 @@ size_t strcspn(const char *s, const char *c) {
445451 } while (* c ++ );
446452
447453 for (;;) {
448- const v128_t cmp = __wasm_v128_chkbits (bitmap , * w );
454+ const v128_t cmp = __wasm_v128_chkbits (bitmap , * v );
449455 // Bitmask is slow on AArch64, any_true is much faster.
450456 if (wasm_v128_any_true (cmp )) {
451457 // Clear the bits corresponding to alignment (little-endian)
@@ -458,11 +464,11 @@ size_t strcspn(const char *s, const char *c) {
458464 // it's as if we didn't find anything.
459465 if (mask ) {
460466 // Find the offset of the first one bit (little-endian).
461- return (char * )w - s + __builtin_ctz (mask );
467+ return (char * )v - s + __builtin_ctz (mask );
462468 }
463469 }
464470 align = 0 ;
465- w ++ ;
471+ v ++ ;
466472 }
467473}
468474
0 commit comments