Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix] Reading utf8 encoded source code from windows stdin will throw UnicodeEncodeError #430

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
37 changes: 27 additions & 10 deletions autopep8.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#!/usr/bin/env python

# Copyright (C) 2010-2011 Hideo Hattori
# Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
# Copyright (C) 2013-2016 Hideo Hattori, Steven Myint, Bill Wendling
Expand Down Expand Up @@ -1286,8 +1284,8 @@ def fix_w504(self, result):
if m:
next_line_indent = m.span()[1]
self.source[line_index + 1] = '{}{} {}'.format(
next_line[:next_line_indent], target_operator,
next_line[next_line_indent:])
next_line[:next_line_indent], target_operator,
next_line[next_line_indent:])

def fix_w605(self, result):
(line_index, _, target) = get_index_offset_contents(result,
Expand All @@ -1298,7 +1296,7 @@ def fix_w605(self, result):
return
for (pos, _msg) in get_w605_position(tokens):
self.source[line_index] = '{}r{}'.format(
target[:pos], target[pos:])
target[:pos], target[pos:])


def get_w605_position(tokens):
Expand Down Expand Up @@ -1797,7 +1795,7 @@ def _shorten_line(tokens, source, indentation, indent_word,

second_indent = indentation
if (first.rstrip().endswith('(') and
source[end_offset:].lstrip().startswith(')')):
source[end_offset:].lstrip().startswith(')')):
pass
elif first.rstrip().endswith('('):
second_indent += indent_word
Expand Down Expand Up @@ -3234,9 +3232,11 @@ def fix_code(source, options=None, encoding=None, apply_config=False):
"""
options = _get_options(options, apply_config)

if hasattr(source, 'readlines'):
return fix_lines(source.readlines(), options=options)

if not isinstance(source, unicode):
source = source.decode(encoding or get_encoding())

sio = io.StringIO(source)
return fix_lines(sio.readlines(), options=options)

Expand Down Expand Up @@ -4009,6 +4009,11 @@ def wrap_output(output, encoding):
else output)


def wrap_input(input, encoding):
reader_getter = codecs.getreader(encoding)
return reader_getter(input.buffer if hasattr(input, 'buffer') else input)


def get_encoding():
"""Return preferred encoding."""
return locale.getpreferredencoding() or sys.getdefaultencoding()
Expand Down Expand Up @@ -4038,12 +4043,24 @@ def main(argv=None, apply_config=True):
if args.files == ['-']:
assert not args.in_place

encoding = sys.stdin.encoding or get_encoding()
from lib2to3.pgen2 import tokenize as lib2to3_tokenize
if hasattr(sys.stdin, 'buffer'):
data = sys.stdin.buffer.read()
else:
data = sys.stdin.read()
bytes_stdin_io = io.BytesIO(data)

# detect encoding dynamic
input_encoding = \
lib2to3_tokenize.detect_encoding(bytes_stdin_io.readline)[0]
output_encoding = sys.stdin.encoding or get_encoding()

bytes_stdin_io = io.BytesIO(data)
stdin = wrap_input(bytes_stdin_io, input_encoding)
stdout = wrap_output(sys.stdout, output_encoding)
# LineEndingWrapper is unnecessary here due to the symmetry between
# standard in and standard out.
wrap_output(sys.stdout, encoding=encoding).write(
fix_code(sys.stdin.read(), args, encoding=encoding))
stdout.write(fix_code(stdin, args))
else:
if args.in_place or args.diff:
args.files = list(set(args.files))
Expand Down
2 changes: 2 additions & 0 deletions test/suite/windows-gb2312.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
utf8 = '测试一下gb2312编码'
print(gbk)
21 changes: 21 additions & 0 deletions test/test_gb2312_stdin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os
import platform
import re

def main():
os_name = platform.platform()
if not re.match('windows', os_name, flags=re.IGNORECASE):
return

dir_path = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(dir_path, 'suite', 'windows-gb2312.txt')
print('test old version....')
os.system('type %s | autopep8 -' % path)

print('test new version')
autopep8_path = os.path.join(dir_path, '..', 'autopep8.py')
os.system('type %s | python %s -' % (path, autopep8_path))


if __name__ == "__main__":
main()