11# -*- coding: utf-8 -*-
22
33import unittest
4- from test .support import script_helper , captured_stdout , requires_subprocess , requires_resource
4+ from test import support
5+ from test .support import script_helper
56from test .support .os_helper import TESTFN , unlink , rmtree
67from test .support .import_helper import unload
78import importlib
@@ -64,7 +65,7 @@ def test_issue7820(self):
6465 # two bytes in common with the UTF-8 BOM
6566 self .assertRaises (SyntaxError , eval , b'\xef \xbb \x20 ' )
6667
67- @requires_subprocess ()
68+ @support . requires_subprocess ()
6869 def test_20731 (self ):
6970 sub = subprocess .Popen ([sys .executable ,
7071 os .path .join (os .path .dirname (__file__ ),
@@ -268,6 +269,17 @@ def test_second_non_utf8_coding_line(self):
268269 b'print(ascii("\xc3 \xa4 "))\n ' )
269270 self .check_script_output (src , br"'\xc3\u20ac'" )
270271
272+ def test_first_utf8_coding_line_error (self ):
273+ src = (b'#coding:ascii \xc3 \xa4 \n '
274+ b'raise RuntimeError\n ' )
275+ self .check_script_error (src , br"(\(unicode error\) )?'ascii' codec can't decode byte" )
276+
277+ def test_second_utf8_coding_line_error (self ):
278+ src = (b'#!/usr/bin/python\n '
279+ b'#coding:ascii \xc3 \xa4 \n '
280+ b'raise RuntimeError\n ' )
281+ self .check_script_error (src , br"(\(unicode error\) )?'ascii' codec can't decode byte" )
282+
271283 def test_utf8_bom (self ):
272284 src = (b'\xef \xbb \xbf print(ascii("\xc3 \xa4 "))\n ' )
273285 self .check_script_output (src , br"'\xe4'" )
@@ -283,10 +295,80 @@ def test_utf8_bom_and_utf8_coding_line(self):
283295 b'print(ascii("\xc3 \xa4 "))\n ' )
284296 self .check_script_output (src , br"'\xe4'" )
285297
286- def test_utf8_non_utf8_comment_line_error (self ):
298+ def test_utf8_bom_and_non_utf8_first_coding_line (self ):
299+ src = (b'\xef \xbb \xbf #coding:iso-8859-15\n '
300+ b'raise RuntimeError\n ' )
301+ self .check_script_error (src ,
302+ br"encoding problem: iso-8859-15 with BOM" ,
303+ lineno = 1 )
304+
305+ def test_utf8_bom_and_non_utf8_second_coding_line (self ):
306+ src = (b'\xef \xbb \xbf #first\n '
307+ b'#coding:iso-8859-15\n '
308+ b'raise RuntimeError\n ' )
309+ self .check_script_error (src ,
310+ br"encoding problem: iso-8859-15 with BOM" ,
311+ lineno = 2 )
312+
313+ def test_non_utf8_shebang (self ):
314+ src = (b'#!/home/\xa4 /bin/python\n '
315+ b'#coding:iso-8859-15\n '
316+ b'print(ascii("\xc3 \xa4 "))\n ' )
317+ self .check_script_output (src , br"'\xc3\u20ac'" )
318+
319+ def test_utf8_shebang_error (self ):
320+ src = (b'#!/home/\xc3 \xa4 /bin/python\n '
321+ b'#coding:ascii\n '
322+ b'raise RuntimeError\n ' )
323+ self .check_script_error (src , br"(\(unicode error\) )?'ascii' codec can't decode byte" )
324+
325+ def test_non_utf8_shebang_error (self ):
326+ src = (b'#!/home/\xa4 /bin/python\n '
327+ b'raise RuntimeError\n ' )
328+ self .check_script_error (src , br"Non-UTF-8 code starting with .* on line 1" ,
329+ lineno = 1 )
330+
331+ def test_non_utf8_second_line_error (self ):
332+ src = (b'#first\n '
333+ b'#second\xa4 \n '
334+ b'raise RuntimeError\n ' )
335+ self .check_script_error (src ,
336+ br"Non-UTF-8 code starting with .* on line 2" ,
337+ lineno = 2 )
338+
339+ def test_non_utf8_third_line_error (self ):
340+ src = (b'#first\n '
341+ b'#second\n '
342+ b'#third\xa4 \n '
343+ b'raise RuntimeError\n ' )
344+ self .check_script_error (src ,
345+ br"Non-UTF-8 code starting with .* on line 3" ,
346+ lineno = 3 )
347+
348+ def test_utf8_bom_non_utf8_third_line_error (self ):
349+ src = (b'\xef \xbb \xbf #first\n '
350+ b'#second\n '
351+ b'#third\xa4 \n '
352+ b'raise RuntimeError\n ' )
353+ self .check_script_error (src ,
354+ br"Non-UTF-8 code starting with .* on line 3|"
355+ br"'utf-8' codec can't decode byte" ,
356+ lineno = 3 )
357+
358+ def test_utf_8_non_utf8_third_line_error (self ):
359+ src = (b'#coding: utf-8\n '
360+ b'#second\n '
361+ b'#third\xa4 \n '
362+ b'raise RuntimeError\n ' )
363+ self .check_script_error (src ,
364+ br"Non-UTF-8 code starting with .* on line 3|"
365+ br"'utf-8' codec can't decode byte" ,
366+ lineno = 3 )
367+
368+ def test_utf8_non_utf8_third_line_error (self ):
287369 src = (b'#coding: utf8\n '
288- b'#\n '
289- b'#\xa4 \n '
370+ b'#second \n '
371+ b'#third \xa4 \n '
290372 b'raise RuntimeError\n ' )
291373 self .check_script_error (src ,
292374 br"'utf-8' codec can't decode byte|"
@@ -327,7 +409,7 @@ def test_nul_in_second_coding_line(self):
327409class UTF8ValidatorTest (unittest .TestCase ):
328410 @unittest .skipIf (not sys .platform .startswith ("linux" ),
329411 "Too slow to run on non-Linux platforms" )
330- @requires_resource ('cpu' )
412+ @support . requires_resource ('cpu' )
331413 def test_invalid_utf8 (self ):
332414 # This is a port of test_utf8_decode_invalid_sequences in
333415 # test_unicode.py to exercise the separate utf8 validator in
@@ -393,19 +475,29 @@ def check(content):
393475 check (b'\xF4 ' + cb + b'\xBF \xBF ' )
394476
395477
478+ @support .force_not_colorized_test_class
396479class BytesSourceEncodingTest (AbstractSourceEncodingTest , unittest .TestCase ):
397480
398481 def check_script_output (self , src , expected ):
399- with captured_stdout () as stdout :
482+ with support . captured_stdout () as stdout :
400483 exec (src )
401484 out = stdout .getvalue ().encode ('latin1' )
402485 self .assertEqual (out .rstrip (), expected )
403486
404- def check_script_error (self , src , expected ):
405- with self .assertRaisesRegex (SyntaxError , expected . decode () ) as cm :
487+ def check_script_error (self , src , expected , lineno = ... ):
488+ with self .assertRaises (SyntaxError ) as cm :
406489 exec (src )
490+ exc = cm .exception
491+ self .assertRegex (str (exc ), expected .decode ())
492+ if lineno is not ...:
493+ self .assertEqual (exc .lineno , lineno )
494+ line = src .splitlines ()[lineno - 1 ].decode (errors = 'replace' )
495+ if lineno == 1 :
496+ line = line .removeprefix ('\ufeff ' )
497+ self .assertEqual (line , exc .text )
407498
408499
500+ @support .force_not_colorized_test_class
409501class FileSourceEncodingTest (AbstractSourceEncodingTest , unittest .TestCase ):
410502
411503 def check_script_output (self , src , expected ):
@@ -416,13 +508,22 @@ def check_script_output(self, src, expected):
416508 res = script_helper .assert_python_ok (fn )
417509 self .assertEqual (res .out .rstrip (), expected )
418510
419- def check_script_error (self , src , expected ):
511+ def check_script_error (self , src , expected , lineno = ... ):
420512 with tempfile .TemporaryDirectory () as tmpd :
421513 fn = os .path .join (tmpd , 'test.py' )
422514 with open (fn , 'wb' ) as fp :
423515 fp .write (src )
424516 res = script_helper .assert_python_failure (fn )
425- self .assertRegex (res .err .rstrip ().splitlines ()[- 1 ], b'SyntaxError.*?' + expected )
517+ err = res .err .rstrip ()
518+ self .assertRegex (err .splitlines ()[- 1 ], b'SyntaxError: ' + expected )
519+ if lineno is not ...:
520+ self .assertIn (f', line { lineno } \n ' .encode (),
521+ err .replace (os .linesep .encode (), b'\n ' ))
522+ line = src .splitlines ()[lineno - 1 ].decode (errors = 'replace' )
523+ if lineno == 1 :
524+ line = line .removeprefix ('\ufeff ' )
525+ self .assertIn (line .encode (), err )
526+
426527
427528
428529if __name__ == "__main__" :
0 commit comments