From 217af1d38db3e1e875180c6fa160f0fc80e46003 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 28 Aug 2018 09:35:25 +0200 Subject: [PATCH] bpo-34403, bpo-34207: Fix test_utf8_mode.test_cmd_line() Make the test more generic: instead of hardcoding the encoding, get the locale encoding at runtime, and then make sure that the command line is properly decoded from the locale encoding. Test also that the UTF-8 Mode decodes command line arguments from UTF-8 with the C locale. --- Lib/test/test_utf8_mode.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py index 3e918fd54ce3ca..5af35aed614355 100644 --- a/Lib/test/test_utf8_mode.py +++ b/Lib/test/test_utf8_mode.py @@ -206,9 +206,6 @@ def test_locale_getpreferredencoding(self): @unittest.skipIf(MS_WINDOWS, 'test specific to Unix') def test_cmd_line(self): - arg = 'h\xe9\u20ac'.encode('utf-8') - arg_utf8 = arg.decode('utf-8') - arg_ascii = arg.decode('ascii', 'surrogateescape') code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))' def check(utf8_opt, expected, **kw): @@ -216,14 +213,26 @@ def check(utf8_opt, expected, **kw): args = out.partition(':')[2].rstrip() self.assertEqual(args, ascii(expected), out) - check('utf8', [arg_utf8]) - if sys.platform == 'darwin' or support.is_android: - c_arg = arg_utf8 - elif sys.platform.startswith("aix"): - c_arg = arg.decode('iso-8859-1') - else: - c_arg = arg_ascii - check('utf8=0', [c_arg], LC_ALL='C') + # UTF-8 Mode must use the UTF-8 encoding for any locale + arg = 'h\xe9\u20ac\U0010ffff'.encode('utf-8') + check('utf8', [arg.decode('utf-8')]) + check('utf8', [arg.decode('utf-8')], LC_ALL='C') + + # Non-ASCII byte string. Don't test Euro sign (U+20AC): Roman8 doesn't + # support it, and HP-UX uses Roman8 encoding for its C locale. The + # test just requires a single non-ASCII character to validate the code. + arg = b'h\xa7\xe9' + + # Get the locale encoding when the UTF-8 mode is disabled + out = self.get_output('-X', 'utf8=0', '-c', + 'import locale; print(locale.getpreferredencoding())', + LC_ALL='C') + encoding = out.rstrip() + + # Check that the command line is decoded from the locale encoding + with self.subTest(encoding=encoding): + check('utf8=0', [arg.decode(encoding, 'surrogateescape')], + LC_ALL='C') def test_optim_level(self): # CPython: check that Py_Main() doesn't increment Py_OptimizeFlag