From ebff295c3c90d2b6fa2ef757151d8b93e768fbfc Mon Sep 17 00:00:00 2001 From: jrfnl Date: Wed, 2 Apr 2025 17:12:14 +0200 Subject: [PATCH] Tokenizer/PHP: `T_GOTO_LABEL` no longer contains colon When support for the `goto` construct was introduced, it was elected to tokenize the "goto target label" as a separate `T_GOTO_LABEL` token, including the colon following it. This is wrong as PHP does not enforce for the colon to be the next token and allows whitespace and comments between the goto label and the colon. In which case, the tokenization in PHPCS would be incorrect as the label would stay a `T_STRING` instead of becoming `T_GOTO_LABEL`. This commit fixes this. It also introduces a new `T_GOTO_COLON` token and re-tokenizes the colon (separate from the label) to `T_GOTO_COLON`. This choice was made so as not to confuse the Tokenizer (nor sniffs) with yet another usage for `T_COLON`, which is an ambiguous enough token as it is (alternative control structures, switch - case statements, return type statements, enum types, ternaries etc). This change is covered by the existing (updated) tests. Includes one minor update to a sniff to allow for this change. Refs: * https://www.php.net/manual/en/control-structures.goto.php * https://3v4l.org/ccbVD Fixes squizlabs/PHP_CodeSniffer 3161 Fixes 185 --- .../PHP/DisallowMultipleAssignmentsSniff.php | 2 +- src/Tokenizers/PHP.php | 81 ++++++++++++------- src/Util/Tokens.php | 1 + tests/Core/Tokenizers/PHP/GotoLabelTest.inc | 4 +- tests/Core/Tokenizers/PHP/GotoLabelTest.php | 19 +++-- 5 files changed, 70 insertions(+), 37 deletions(-) diff --git a/src/Standards/Squiz/Sniffs/PHP/DisallowMultipleAssignmentsSniff.php b/src/Standards/Squiz/Sniffs/PHP/DisallowMultipleAssignmentsSniff.php index 2a953b4625..86ecde9a06 100644 --- a/src/Standards/Squiz/Sniffs/PHP/DisallowMultipleAssignmentsSniff.php +++ b/src/Standards/Squiz/Sniffs/PHP/DisallowMultipleAssignmentsSniff.php @@ -156,7 +156,7 @@ public function process(File $phpcsFile, $stackPtr) if ($tokens[$varToken]['code'] === T_VARIABLE || $tokens[$varToken]['code'] === T_OPEN_TAG - || $tokens[$varToken]['code'] === T_GOTO_LABEL + || $tokens[$varToken]['code'] === T_GOTO_COLON || $tokens[$varToken]['code'] === T_INLINE_THEN || $tokens[$varToken]['code'] === T_INLINE_ELSE || $tokens[$varToken]['code'] === T_SEMICOLON diff --git a/src/Tokenizers/PHP.php b/src/Tokenizers/PHP.php index 325b034567..67f52d7ee4 100644 --- a/src/Tokenizers/PHP.php +++ b/src/Tokenizers/PHP.php @@ -364,6 +364,7 @@ class PHP extends Tokenizer T_FUNC_C => 12, T_GLOBAL => 6, T_GOTO => 4, + T_GOTO_COLON => 1, T_HALT_COMPILER => 15, T_IF => 2, T_IMPLEMENTS => 10, @@ -2199,45 +2200,67 @@ function return types. We want to keep the parenthesis map clean, if ($tokenIsArray === true && $token[0] === T_STRING - && isset($tokens[($stackPtr + 1)]) === true - && $tokens[($stackPtr + 1)] === ':' && (is_array($tokens[($stackPtr - 1)]) === false || $tokens[($stackPtr - 1)][0] !== T_PAAMAYIM_NEKUDOTAYIM) ) { - $stopTokens = [ - T_CASE => true, - T_SEMICOLON => true, - T_OPEN_TAG => true, - T_OPEN_CURLY_BRACKET => true, - T_INLINE_THEN => true, - T_ENUM => true, - ]; - - for ($x = ($newStackPtr - 1); $x > 0; $x--) { - if (isset($stopTokens[$finalTokens[$x]['code']]) === true) { - break; + // Find next non-empty token. + for ($i = ($stackPtr + 1); $i < $numTokens; $i++) { + if (is_array($tokens[$i]) === true + && isset(Tokens::$emptyTokens[$tokens[$i][0]]) === true + ) { + continue; } + + break; } - if ($finalTokens[$x]['code'] !== T_CASE - && $finalTokens[$x]['code'] !== T_INLINE_THEN - && $finalTokens[$x]['code'] !== T_ENUM - ) { - $finalTokens[$newStackPtr] = [ - 'content' => $token[1].':', - 'code' => T_GOTO_LABEL, - 'type' => 'T_GOTO_LABEL', + if (isset($tokens[$i]) === true && $tokens[$i] === ':') { + // Okay, so we have a colon, now we need to make sure that this is not + // class constant access, a ternary, enum or switch case. + $stopTokens = [ + T_CASE => true, + T_SEMICOLON => true, + T_OPEN_TAG => true, + T_OPEN_CURLY_BRACKET => true, + T_INLINE_THEN => true, + T_ENUM => true, ]; - if (PHP_CODESNIFFER_VERBOSITY > 1) { - StatusWriter::write("* token $stackPtr changed from T_STRING to T_GOTO_LABEL", 2); - StatusWriter::write('* skipping T_COLON token '.($stackPtr + 1), 2); + for ($x = ($newStackPtr - 1); $x > 0; $x--) { + if (isset($stopTokens[$finalTokens[$x]['code']]) === true) { + break; + } } - $newStackPtr++; - $stackPtr++; - continue; - } + if ($finalTokens[$x]['code'] !== T_CASE + && $finalTokens[$x]['code'] !== T_INLINE_THEN + && $finalTokens[$x]['code'] !== T_ENUM + ) { + $finalTokens[$newStackPtr] = [ + 'content' => $token[1], + 'code' => T_GOTO_LABEL, + 'type' => 'T_GOTO_LABEL', + ]; + + if (PHP_CODESNIFFER_VERBOSITY > 1) { + StatusWriter::write("* token $stackPtr changed from T_STRING to T_GOTO_LABEL", 2); + } + + // Modify the original token stack for the colon as potential + // whitespace/comments between still needs to get the normal treatment. + $tokens[$i] = [ + 0 => T_GOTO_COLON, + 1 => ':', + ]; + + if (PHP_CODESNIFFER_VERBOSITY > 1) { + StatusWriter::write("* token $i changed from \":\" to T_GOTO_COLON in the original token stack", 2); + } + + $newStackPtr++; + continue; + }//end if + }//end if }//end if /* diff --git a/src/Util/Tokens.php b/src/Util/Tokens.php index 3f2d603765..420bda4b85 100644 --- a/src/Util/Tokens.php +++ b/src/Util/Tokens.php @@ -54,6 +54,7 @@ define('T_OPEN_SHORT_ARRAY', 'PHPCS_T_OPEN_SHORT_ARRAY'); define('T_CLOSE_SHORT_ARRAY', 'PHPCS_T_CLOSE_SHORT_ARRAY'); define('T_GOTO_LABEL', 'PHPCS_T_GOTO_LABEL'); +define('T_GOTO_COLON', 'PHPCS_T_GOTO_COLON'); define('T_BINARY_CAST', 'PHPCS_T_BINARY_CAST'); define('T_OPEN_USE_GROUP', 'PHPCS_T_OPEN_USE_GROUP'); define('T_CLOSE_USE_GROUP', 'PHPCS_T_CLOSE_USE_GROUP'); diff --git a/tests/Core/Tokenizers/PHP/GotoLabelTest.inc b/tests/Core/Tokenizers/PHP/GotoLabelTest.inc index 249064a231..98503faf28 100644 --- a/tests/Core/Tokenizers/PHP/GotoLabelTest.inc +++ b/tests/Core/Tokenizers/PHP/GotoLabelTest.inc @@ -20,7 +20,7 @@ echo "i = $i"; assertIsInt($label); $this->assertSame($testContent, $tokens[$label]['content']); + $next = $this->phpcsFile->findNext(Tokens::$emptyTokens, ($label + 1), null, true); + + $this->assertIsInt($next); + $this->assertSame(T_GOTO_COLON, $tokens[$next]['code']); + $this->assertSame('T_GOTO_COLON', $tokens[$next]['type']); + $this->assertSame(':', $tokens[$next]['content']); + }//end testGotoDeclaration() @@ -107,19 +116,19 @@ public static function dataGotoDeclaration() return [ 'label in goto declaration - marker' => [ 'testMarker' => '/* testGotoDeclaration */', - 'testContent' => 'marker:', + 'testContent' => 'marker', ], 'label in goto declaration - end' => [ 'testMarker' => '/* testGotoDeclarationOutsideLoop */', - 'testContent' => 'end:', + 'testContent' => 'end', ], 'label in goto declaration - def' => [ 'testMarker' => '/* testGotoDeclarationInSwitch */', - 'testContent' => 'def:', + 'testContent' => 'def', ], 'label in goto declaration - label' => [ 'testMarker' => '/* testGotoDeclarationInFunction */', - 'testContent' => 'label:', + 'testContent' => 'label', ], ];