@@ -92,15 +92,9 @@ protected function removeCommentsFromTag($tag)
9292 $ content = $ matches [2 ];
9393 $ closingTag = $ matches [3 ];
9494
95- // Split content by lines and process each line
96- $ lines = preg_split ('/\r\n|\r|\n/ ' , $ content );
97- $ processedLines = [];
98-
99- foreach ($ lines as $ line ) {
100- $ processedLines [] = $ this ->removeSingleLineCommentFromLine ($ line );
101- }
102-
103- $ processedContent = implode ($ lineEnding , $ processedLines );
95+ // Process the whole content at once (supports multi-line template
96+ // literals and complex regex literals) for correctness and performance.
97+ $ processedContent = $ this ->removeSingleLineCommentsFromContent ($ content );
10498
10599 // Reconstruct the tag with processed content
106100 return $ openingTag . $ processedContent . $ closingTag ;
@@ -120,82 +114,223 @@ protected function removeCommentsFromTag($tag)
120114 */
121115 protected function removeSingleLineCommentFromLine ($ line )
122116 {
123- // Early return for lines without //
117+ // Fast path: no comments at all
124118 if (strpos ($ line , '// ' ) === false ) {
125119 return $ line ;
126120 }
127121
128- $ result = '' ;
129- $ length = strlen ($ line );
130- $ inSingleQuote = false ;
131- $ inDoubleQuote = false ;
132- $ inRegex = false ;
133- $ escaped = false ;
134-
135- for ($ i = 0 ; $ i < $ length ; $ i ++) {
136- $ char = $ line [$ i ];
137- $ nextChar = $ i + 1 < $ length ? $ line [$ i + 1 ] : '' ;
138- $ prevChar = $ i > 0 ? $ line [$ i - 1 ] : '' ;
139-
140- // Handle escape sequences
141- if ($ escaped ) {
142- $ result .= $ char ;
143- $ escaped = false ;
144- continue ;
145- }
122+ // If there are no quotes/backticks and no regex literal, we can do a fast, simple check.
123+ // This avoids invoking the heavier regex when not necessary — a common
124+ // case is lines like: var x = 1; // Comment
125+ // However, if there are regex literals in the line (e.g. /http:\/\/\//),
126+ // we must avoid the fast path as it can't safely detect // inside them.
127+ // Quick check for escaped slash sequences (e.g. http:\/\/) that indicate
128+ // the presence of regex literals or escaped slashes in general.
129+ $ hasEscapedSlash = strpos ($ line , '\\/ ' ) !== false ;
130+ if (strpos ($ line , '" ' ) === false && strpos ($ line , "' " ) === false && strpos ($ line , '` ' ) === false && !$ hasEscapedSlash ) {
131+ $ offset = 0 ;
132+ while (($ pos = strpos ($ line , '// ' , $ offset )) !== false ) {
133+ $ prevChar = $ pos > 0 ? $ line [$ pos - 1 ] : '' ;
146134
147- if ($ char === '\\' && ($ inSingleQuote || $ inDoubleQuote || $ inRegex )) {
148- $ result .= $ char ;
149- $ escaped = true ;
150- continue ;
151- }
135+ // URLs like http://example.com are preceded by :, so ignore these
136+ if ($ prevChar === ': ' ) {
137+ // Skip over this occurrence (it's likely a protocol spec)
138+ $ offset = $ pos + 2 ;
139+ continue ;
140+ }
152141
153- // Toggle quote states
154- if ($ char === '" ' && !$ inSingleQuote && !$ inRegex ) {
155- $ inDoubleQuote = !$ inDoubleQuote ;
156- $ result .= $ char ;
157- continue ;
142+ // Comment starts here — strip it
143+ return substr ($ line , 0 , $ pos );
158144 }
159145
160- if ($ char === "' " && !$ inDoubleQuote && !$ inRegex ) {
161- $ inSingleQuote = !$ inSingleQuote ;
162- $ result .= $ char ;
163- continue ;
164- }
146+ return $ line ;
147+ }
165148
166- // Handle regex literals (basic detection)
167- if ($ char === '/ ' && !$ inSingleQuote && !$ inDoubleQuote ) {
168- // Check if this might be a regex literal
169- // Simple heuristic: regex usually comes after =, (, [, ,, return, or at start
170- if ($ prevChar === '= ' || $ prevChar === '( ' || $ prevChar === '[ ' || $ prevChar === ', ' || $ prevChar === ' ' ) {
171- // Look ahead to see if this looks like a regex (not a comment)
172- if ($ nextChar !== '/ ' && $ nextChar !== '* ' ) {
173- $ inRegex = true ;
174- $ result .= $ char ;
175- continue ;
149+ // More complex lines can contain strings, regexes, or backticks — use a
150+ // single PCRE step which skips strings/regex literals and removes // comments
151+ // that are not preceded by a colon.
152+ $ pattern = <<<'PATTERN'
153+ /(?:(?:"(?:\\.|[^"\\])*")|(?:'(?:\\.|[^'\\])*')|(?:`[^`]*`)|(?:\/(?:\\.|[^\/\\])+\/[a-zA-Z]*))(*SKIP)(*F)|(?<!:)\/\/[^\r\n]*/su
154+ PATTERN;
155+
156+ // preg_replace will remove matched // comments but will skip strings/regexes
157+ $ result = preg_replace ($ pattern , '' , $ line );
158+
159+ // preg_replace returns null on error; if that happens fall back to original line
160+ return $ result === null ? $ line : $ result ;
161+ }
162+
163+ /**
164+ * Remove // comments from full content (possibly multi-line) while preserving
165+ * strings, template literals, and regex literals in the content.
166+ *
167+ * This function avoids splitting lines so that multi-line template literals
168+ * (backticks) are preserved correctly.
169+ *
170+ * @param string $content
171+ * @return string
172+ */
173+ protected function removeSingleLineCommentsFromContent ($ content )
174+ {
175+
176+ // Fallback to a linear scanner: it's safer than a single complex PCRE
177+ // and supports multi-line template literals and complex regexes.
178+ $ length = strlen ($ content );
179+ $ out = '' ;
180+
181+ $ inSingle = false ;
182+ $ inDouble = false ;
183+ $ inBacktick = false ;
184+ $ inRegex = false ;
185+ $ inRegexCharClass = false ;
186+ $ escaped = false ;
187+
188+ for ($ i = 0 ; $ i < $ length ; $ i ++) {
189+ $ char = $ content [$ i ];
190+ $ next = $ i + 1 < $ length ? $ content [$ i + 1 ] : '' ;
191+
192+ if ($ escaped ) {
193+ $ out .= $ char ;
194+ $ escaped = false ;
195+ continue ;
196+ }
197+
198+ if ($ char === '\\' ) {
199+ $ out .= $ char ;
200+ $ escaped = true ;
201+ continue ;
202+ }
203+
204+ if ($ inSingle ) {
205+ if ($ char === "' " ) {
206+ $ inSingle = false ;
176207 }
208+ $ out .= $ char ;
209+ continue ;
210+ }
211+
212+ if ($ inDouble ) {
213+ if ($ char === '" ' ) {
214+ $ inDouble = false ;
215+ }
216+ $ out .= $ char ;
217+ continue ;
218+ }
219+
220+ if ($ inBacktick ) {
221+ if ($ char === '` ' ) {
222+ $ inBacktick = false ;
223+ }
224+ $ out .= $ char ;
225+ continue ;
177226 }
178227
179- // End of regex literal
180228 if ($ inRegex ) {
181- $ inRegex = false ;
182- $ result .= $ char ;
229+ // Handle char classes inside regex
230+ if ($ inRegexCharClass ) {
231+ if ($ char === '] ' && !$ escaped ) {
232+ $ inRegexCharClass = false ;
233+ }
234+ $ out .= $ char ;
235+ continue ;
236+ }
237+
238+ if ($ char === '[ ' ) {
239+ $ inRegexCharClass = true ;
240+ $ out .= $ char ;
241+ continue ;
242+ }
243+
244+ if ($ char === '/ ' && !$ escaped ) {
245+ $ inRegex = false ;
246+ $ out .= $ char ;
247+ // Append any regex flags
248+ $ j = $ i + 1 ;
249+ while ($ j < $ length && preg_match ('/[a-zA-Z]/ ' , $ content [$ j ])) {
250+ $ out .= $ content [$ j ];
251+ $ j ++;
252+ }
253+ $ i = $ j - 1 ;
254+ continue ;
255+ }
256+
257+ $ out .= $ char ;
183258 continue ;
184259 }
185- }
186260
187- // Check for // comment outside of strings
188- if (! $ inSingleQuote && ! $ inDoubleQuote && ! $ inRegex && $ char === ' / ' && $ nextChar === ' / ' ) {
189- // Check if this is not part of a URL (preceded by :)
190- if ( $ prevChar !== ' : ' ) {
191- // Found a comment, remove everything from here to end of line
192- break ;
261+ // Not inside string, backtick or regex
262+ // Start single-quoted string
263+ if ( $ char === " ' " ) {
264+ $ inSingle = true ;
265+ $ out .= $ char ;
266+ continue ;
193267 }
194- }
195268
196- $ result .= $ char ;
197- }
269+ // Start double-quoted string
270+ if ($ char === '" ' ) {
271+ $ inDouble = true ;
272+ $ out .= $ char ;
273+ continue ;
274+ }
275+
276+ // Start backtick template literal
277+ if ($ char === '` ' ) {
278+ $ inBacktick = true ;
279+ $ out .= $ char ;
280+ continue ;
281+ }
282+
283+ // Detect start of comment
284+ if ($ char === '/ ' && $ next === '/ ' ) {
285+ // Ensure '//' isn't part of a url (http://) — check previous char
286+ $ prevIndex = strlen ($ out ) - 1 ;
287+ $ prevChar = $ prevIndex >= 0 ? $ out [$ prevIndex ] : '' ;
288+ if ($ prevChar === ': ' ) {
289+ // it's likely a URL-like, keep it
290+ $ out .= $ char ;
291+ continue ;
292+ }
293+
294+ // Skip until end of line
295+ $ i += 2 ; // skip the //
296+ while ($ i < $ length && $ content [$ i ] !== "\n" && $ content [$ i ] !== "\r" ) {
297+ $ i ++;
298+ }
299+ // Append newline if present (preserve newline to keep structure)
300+ if ($ i < $ length && $ content [$ i ] === "\r" ) {
301+ $ out .= "\r" ;
302+ if ($ i + 1 < $ length && $ content [$ i + 1 ] === "\n" ) {
303+ $ out .= "\n" ;
304+ $ i ++;
305+ }
306+ } elseif ($ i < $ length && $ content [$ i ] === "\n" ) {
307+ $ out .= "\n" ;
308+ }
309+ continue ;
310+ }
311+
312+ // Potential start of regex literal
313+ if ($ char === '/ ' ) {
314+ // Heuristic: regex often comes after these characters or at start
315+ $ prevNonSpaceIndex = strlen ($ out ) - 1 ;
316+ while ($ prevNonSpaceIndex >= 0 && ctype_space ($ out [$ prevNonSpaceIndex ])) {
317+ $ prevNonSpaceIndex --;
318+ }
319+ $ prevNonSpaceChar = $ prevNonSpaceIndex >= 0 ? $ out [$ prevNonSpaceIndex ] : '' ;
320+
321+ if ($ prevNonSpaceChar === '' || in_array ($ prevNonSpaceChar , ['= ' , '( ' , '[ ' , ', ' , ': ' , '? ' , '! ' , '{ ' , '} ' , '; ' , '+ ' , '- ' , '* ' , '/ ' , '% ' ])) {
322+ // This is likely a regex
323+ $ inRegex = true ;
324+ $ out .= $ char ;
325+ continue ;
326+ }
327+ // Otherwise it's a division operator
328+ }
329+
330+ // Default: append char
331+ $ out .= $ char ;
332+ }
198333
199- return $ result ;
334+ return $ out ;
200335 }
201336}
0 commit comments