Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert source file encoding to UTF-8 #865

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions .rubocop.yml
@@ -1,6 +1,9 @@
AllCops:
Exclude:
- "spec/fixtures/iso-8859.rb"
- "spec/fixtures/utf-8.rb"
- "spec/fixtures/utf-8-magic.rb"
- "spec/fixtures/euc-jp.rb"
- "tmp/**/*"
- "vendor/bundle/**/*"
- "vendor/bundle/**/.*"
Expand Down
28 changes: 27 additions & 1 deletion lib/simplecov/source_file.rb
Expand Up @@ -25,7 +25,7 @@ def project_filename
def src
# We intentionally read source code lazily to
# suppress reading unused source code.
@src ||= File.open(filename, "rb", &:readlines)
@src ||= load_source
end
alias source src

Expand Down Expand Up @@ -175,6 +175,32 @@ def build_no_cov_chunks
end
end

def load_source
lines = []
# The default encoding is UTF-8
File.open(filename, "rb:UTF-8") do |file|
line = file.gets

# Check for shbang
Tietew marked this conversation as resolved.
Show resolved Hide resolved
if /\A#!/.match?(line)
lines << line
line = file.gets
end
return lines unless line

check_magic_comment(file, line)
lines.concat([line], file.readlines)
end
end

def check_magic_comment(file, line)
# Check for encoding magic comment
# Encoding magic comment must be placed at first line except for shbang
if (match = /\A#\s*(?:-\*-)?\s*(?:en)?coding:\s*(\S+)\s*(?:-\*-)?\s*\z/.match(line))
file.set_encoding(match[1], "UTF-8")
end
end

def build_lines
coverage_exceeding_source_warn if coverage_data["lines"].size > src.size
lines = src.map.with_index(1) do |src, i|
Expand Down
2 changes: 2 additions & 0 deletions spec/fixtures/euc-jp.rb
@@ -0,0 +1,2 @@
# encoding: EUC-JP
puts "135��C"
2 changes: 2 additions & 0 deletions spec/fixtures/utf-8-magic.rb
@@ -0,0 +1,2 @@
# encoding: UTF-8
puts "135°C"
38 changes: 38 additions & 0 deletions spec/source_file_spec.rb
Expand Up @@ -695,4 +695,42 @@
end
end
end

context "a file contains non-ASCII characters" do
COVERAGE_FOR_SINGLE_LINE = {"lines" => [nil]}.freeze
COVERAGE_FOR_DOUBLE_LINES = {"lines" => [nil]}.freeze

shared_examples_for "converting to UTF-8" do
it "has all source lines of encoding UTF-8" do
subject.lines.each do |line|
expect(line.source.encoding).to eq(Encoding::UTF_8)
expect(line.source).to be_valid_encoding
end
end
end

describe "UTF-8 without magic comment" do
subject do
SimpleCov::SourceFile.new(source_fixture("utf-8.rb"), COVERAGE_FOR_SINGLE_LINE)
end

it_behaves_like "converting to UTF-8"
end

describe "UTF-8 with magic comment" do
subject do
SimpleCov::SourceFile.new(source_fixture("utf-8-magic.rb"), COVERAGE_FOR_DOUBLE_LINES)
end

it_behaves_like "converting to UTF-8"
end

describe "EUC-JP with magic comment" do
subject do
SimpleCov::SourceFile.new(source_fixture("euc-jp.rb"), COVERAGE_FOR_DOUBLE_LINES)
end

it_behaves_like "converting to UTF-8"
end
end
end