From b12230df66609249f8686cc7b3fb4407860a9f8c Mon Sep 17 00:00:00 2001 From: Daniele Palombo <387690+DanielePalombo@users.noreply.github.com> Date: Fri, 7 Jul 2023 15:10:11 +0200 Subject: [PATCH] Fix encoding problem incompatible character encodings: ASCII-8BIT and UTF-8 Taken from simplecov-ruby/simplecov#866 --- .../formatters/html/formatter.rb | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/lib/deprecations_collector/formatters/html/formatter.rb b/lib/deprecations_collector/formatters/html/formatter.rb index c51a974..66da1d6 100644 --- a/lib/deprecations_collector/formatters/html/formatter.rb +++ b/lib/deprecations_collector/formatters/html/formatter.rb @@ -54,7 +54,35 @@ def formatted_source_file(source_file) end def readfile(source_file) - File.open(filename(source_file), "rb", &:readlines) + load_source(filename(source_file)) + end + + def load_source(file_name) + lines = [] + # The default encoding is UTF-8 + File.open(file_name, "rb:UTF-8") do |file| + line = file.gets + + # Check for shbang + if /\A#!/.match?(line) + lines << line + line = file.gets + end + return lines unless line + + check_magic_comment(file, line) + lines.concat([line], file.readlines) + end + + lines + end + + def check_magic_comment(file, line) + # Check for encoding magic comment + # Encoding magic comment must be placed at first line except for shbang + if (match = /\A#\s*(?:-\*-)?\s*(?:en)?coding:\s*(\S+)\s*(?:-\*-)?\s*\z/.match(line)) + file.set_encoding(match[1], "UTF-8") + end end def grouped(files)