diff --git a/.rubocop.yml b/.rubocop.yml index ce1bd2aa..9c6eebd0 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,6 +1,12 @@ AllCops: Exclude: - "spec/fixtures/iso-8859.rb" + - "spec/fixtures/utf-8.rb" + - "spec/fixtures/utf-8-magic.rb" + - "spec/fixtures/euc-jp.rb" + - "spec/fixtures/empty_euc-jp.rb" + - "spec/fixtures/euc-jp-shebang.rb" + - "test_projects/encodings/**/*" - "tmp/**/*" - "vendor/bundle/**/*" - "vendor/bundle/**/.*" diff --git a/Gemfile b/Gemfile index feb1b1c0..3fda3c41 100644 --- a/Gemfile +++ b/Gemfile @@ -6,7 +6,7 @@ source "https://rubygems.org" # gem "simplecov-html", path: File.dirname(__FILE__) + "/../simplecov-html" # Uncomment this to use development version of html formatter from github -# gem "simplecov-html", github: "colszowka/simplecov-html" +gem "simplecov-html", github: "colszowka/simplecov-html", branch: "revert-encoding-replace" group :development do gem "apparition", "0.5.0" diff --git a/Gemfile.lock b/Gemfile.lock index d1958615..e016d71d 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,3 +1,10 @@ +GIT + remote: https://github.com/colszowka/simplecov-html.git + revision: abf08dc387856f2c22e45e33d7c786d5bfc57d5e + branch: revert-encoding-replace + specs: + simplecov-html (0.12.0) + GIT remote: https://github.com/cucumber/aruba.git revision: 8b7dc956daf47051237700488e8d866fb229db29 @@ -111,7 +118,6 @@ GEM ruby-progressbar (~> 1.7) unicode-display_width (>= 1.4.0, < 1.7) ruby-progressbar (1.10.1) - simplecov-html (0.12.0) spoon (0.0.6) ffi test-unit (3.3.5) @@ -142,7 +148,8 @@ DEPENDENCIES rspec (~> 3.2) rubocop simplecov! + simplecov-html! test-unit BUNDLED WITH - 2.1.2 + 2.1.4 diff --git a/features/encoding.feature b/features/encoding.feature new file mode 100644 index 00000000..0db8e1c5 --- /dev/null +++ b/features/encoding.feature @@ -0,0 +1,38 @@ +@rspec + +Feature: + + We've experienced some problems given source file encoding. + We want to make sure we try the appropriate encoding and + can display it correcty in the formatter. + + Background: + Given I'm working on the project "encodings" + + Scenario: Running tests produces coverage and it's mostly legible + When I open the coverage report generated with `bundle exec rspec spec` + Then I should see the groups: + | name | coverage | files | + | All Files | 55.56% | 4 | + + When I open the detailed view for "lib/utf8.rb" + Then "�" should not be visible + And "🇯🇵" should be visible + And "おはよう" should be visible + + + When I close the detailed view + And I open the detailed view for "lib/euc_jp.rb" + Then "�" should not be visible + And "おはよう" should be visible + + When I close the detailed view + And I open the detailed view for "lib/euc_jp_not_declared.rb" + Then "�" should not be visible + And "Fun3" should be visible + + When I close the detailed view + And I open the detailed view for "lib/euc_jp_not_declared_tracked.rb" + # no way around it I guess + Then "�" should be visible + And "NoDeclare" should be visible diff --git a/features/step_definitions/html_steps.rb b/features/step_definitions/html_steps.rb index 14b69d7a..a2cffa7b 100644 --- a/features/step_definitions/html_steps.rb +++ b/features/step_definitions/html_steps.rb @@ -63,6 +63,10 @@ expect(page).to have_css(".header h3", visible: true, text: file_path) end +When "I close the detailed view" do + click_button "cboxClose" +end + Then /^I should see coverage branch data like "(.+)"$/ do |text| expect(find(".hits", visible: true, text: text)).to be_truthy end diff --git a/features/step_definitions/web_steps.rb b/features/step_definitions/web_steps.rb index e6a7f27a..6a3bedba 100644 --- a/features/step_definitions/web_steps.rb +++ b/features/step_definitions/web_steps.rb @@ -57,6 +57,17 @@ def with_scope(locator) end end +# the default in our settings is still to check unvisible content +# as well and until we change that these steps similar to "should (not) +# see" are necessary +Then "{string} should be visible" do |text| + expect(page).to have_content(:visible, text) +end + +Then "{string} should not be visible" do |text| + expect(page).to have_no_content(:visible, text) +end + Then /^show me the page$/ do save_and_open_page # rubocop:disable Lint/Debugger end diff --git a/lib/simplecov/source_file.rb b/lib/simplecov/source_file.rb index af0922d9..4cb9f632 100644 --- a/lib/simplecov/source_file.rb +++ b/lib/simplecov/source_file.rb @@ -25,7 +25,7 @@ def project_filename def src # We intentionally read source code lazily to # suppress reading unused source code. - @src ||= File.open(filename, "rb", &:readlines) + @src ||= load_source end alias source src @@ -175,6 +175,51 @@ def build_no_cov_chunks end end + def load_source + lines = [] + # The default encoding is UTF-8 + File.open(filename, "rb:UTF-8") do |file| + current_line = file.gets + + if shebang?(current_line) + lines << current_line + current_line = file.gets + end + + read_lines(file, lines, current_line) + end + end + + SHEBANG_REGEX = /\A#!/.freeze + def shebang?(line) + SHEBANG_REGEX.match?(line) + end + + def read_lines(file, lines, current_line) + return lines unless current_line + + set_encoding_based_on_magic_comment(file, current_line) + lines.concat([current_line], ensure_remove_undefs(file.readlines)) + end + + RUBY_FILE_ENCODING_MAGIC_COMMENT_REGEX = /\A#\s*(?:-\*-)?\s*(?:en)?coding:\s*(\S+)\s*(?:-\*-)?\s*\z/.freeze + def set_encoding_based_on_magic_comment(file, line) + # Check for encoding magic comment + # Encoding magic comment must be placed at first line except for shebang + if (match = RUBY_FILE_ENCODING_MAGIC_COMMENT_REGEX.match(line)) + file.set_encoding(match[1], "UTF-8") + end + end + + def ensure_remove_undefs(file_lines) + # invalid/undef replace are technically not really necessary but nice to + # have and work around a JRuby incompatibility. Also moved here from + # simplecov-html to have encoding shenaningans in one place. See #866 + # also setting these option on `file.set_encoding` doesn't seem to work + # properly so it has to be done here. + file_lines.each { |line| line.encode!("UTF-8", invalid: :replace, undef: :replace) } + end + def build_lines coverage_exceeding_source_warn if coverage_data["lines"].size > src.size lines = src.map.with_index(1) do |src, i| diff --git a/spec/fixtures/empty_euc-jp.rb b/spec/fixtures/empty_euc-jp.rb new file mode 100644 index 00000000..e69de29b diff --git a/spec/fixtures/euc-jp-shebang.rb b/spec/fixtures/euc-jp-shebang.rb new file mode 100644 index 00000000..fb2858bb --- /dev/null +++ b/spec/fixtures/euc-jp-shebang.rb @@ -0,0 +1,3 @@ +#!/usr/bin/env ruby +# encoding: EUC-JP +puts "135C" diff --git a/spec/fixtures/euc-jp.rb b/spec/fixtures/euc-jp.rb new file mode 100644 index 00000000..0eecd26e --- /dev/null +++ b/spec/fixtures/euc-jp.rb @@ -0,0 +1,2 @@ +# encoding: EUC-JP +puts "135C" diff --git a/spec/fixtures/utf-8-magic.rb b/spec/fixtures/utf-8-magic.rb new file mode 100644 index 00000000..64996ba2 --- /dev/null +++ b/spec/fixtures/utf-8-magic.rb @@ -0,0 +1,2 @@ +# encoding: UTF-8 +puts "135°C" diff --git a/spec/source_file_spec.rb b/spec/source_file_spec.rb index 48ad3cc2..86f62b3a 100644 --- a/spec/source_file_spec.rb +++ b/spec/source_file_spec.rb @@ -695,4 +695,82 @@ end end end + + context "a file contains non-ASCII characters" do + COVERAGE_FOR_SINGLE_LINE = {"lines" => [nil]}.freeze + COVERAGE_FOR_DOUBLE_LINES = {"lines" => [nil, 1]}.freeze + COVERAGE_FOR_TRIPLE_LINES = {"lines" => [nil, nil, 1]}.freeze + DEGREE_135_LINE = "puts \"135°C\"\n" + + shared_examples_for "converting to UTF-8" do + it "has all source lines of encoding UTF-8" do + subject.lines.each do |line| + expect(line.source.encoding).to eq(Encoding::UTF_8) + expect(line.source).to be_valid_encoding + end + end + end + + describe "UTF-8 without magic comment" do + subject do + SimpleCov::SourceFile.new(source_fixture("utf-8.rb"), COVERAGE_FOR_SINGLE_LINE) + end + + it_behaves_like "converting to UTF-8" + + it "has the line with 135°C" do + expect(subject.line(1).source).to eq DEGREE_135_LINE + end + end + + describe "UTF-8 with magic comment" do + subject do + SimpleCov::SourceFile.new(source_fixture("utf-8-magic.rb"), COVERAGE_FOR_DOUBLE_LINES) + end + + it_behaves_like "converting to UTF-8" + + it "has the line with 135°C" do + expect(subject.line(2).source).to eq DEGREE_135_LINE + end + end + + describe "EUC-JP with magic comment" do + subject do + SimpleCov::SourceFile.new(source_fixture("euc-jp.rb"), COVERAGE_FOR_DOUBLE_LINES) + end + + it_behaves_like "converting to UTF-8" + + it "has the line with 135°C" do + expect(subject.line(2).source).to eq DEGREE_135_LINE + end + end + + describe "EUC-JP with magic comment and shebang" do + subject do + SimpleCov::SourceFile.new(source_fixture("euc-jp-shebang.rb"), COVERAGE_FOR_TRIPLE_LINES) + end + + it_behaves_like "converting to UTF-8" + + it "has all the right lines" do + expect(subject.lines.map(&:source)).to eq [ + "#!/usr/bin/env ruby\n", + "# encoding: EUC-JP\n", + DEGREE_135_LINE + ] + end + end + + describe "empty euc-jp file" do + subject do + SimpleCov::SourceFile.new(source_fixture("empty_euc-jp.rb"), "lines" => []) + end + + it "has empty lines" do + expect(subject.lines).to be_empty + end + end + end end diff --git a/test_projects/encodings/lib/euc_jp.rb b/test_projects/encodings/lib/euc_jp.rb new file mode 100644 index 00000000..edbb0e54 --- /dev/null +++ b/test_projects/encodings/lib/euc_jp.rb @@ -0,0 +1,10 @@ +# encoding: EUC-JP +# frozen_string_literal: true + +class Fun2 + MSG = "Ϥ褦" + + def no_moji + "tada!" + end +end diff --git a/test_projects/encodings/lib/euc_jp_not_declared.rb b/test_projects/encodings/lib/euc_jp_not_declared.rb new file mode 100644 index 00000000..96c7be2a --- /dev/null +++ b/test_projects/encodings/lib/euc_jp_not_declared.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class Fun3 + # ruby actually breaks on the not decalred multibyte characters, + # which is nice I guess + MSG = "Something Else" + + def no_moji + "tada!" + end +end diff --git a/test_projects/encodings/lib/euc_jp_not_declared_tracked.rb b/test_projects/encodings/lib/euc_jp_not_declared_tracked.rb new file mode 100644 index 00000000..1c90a3fc --- /dev/null +++ b/test_projects/encodings/lib/euc_jp_not_declared_tracked.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true +# Ruby can't even execute this file but it might be added +# via track_files or similar means and we still don't wanna crash! + +class NoDeclare + MSG = "Ϥ褦" + + def ?? + "tada!" + end +end diff --git a/test_projects/encodings/lib/utf8.rb b/test_projects/encodings/lib/utf8.rb new file mode 100644 index 00000000..b4faaa27 --- /dev/null +++ b/test_projects/encodings/lib/utf8.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +class Fun + MSG = "おはよう" + + def 🇯🇵 + "tada!" + end +end diff --git a/test_projects/encodings/spec/simple_spec.rb b/test_projects/encodings/spec/simple_spec.rb new file mode 100644 index 00000000..a4aa1399 --- /dev/null +++ b/test_projects/encodings/spec/simple_spec.rb @@ -0,0 +1,7 @@ +require_relative "spec_helper" + +describe Fun do + it "call things" do + expect(subject.🇯🇵).to eq "tada!" + end +end diff --git a/test_projects/encodings/spec/spec_helper.rb b/test_projects/encodings/spec/spec_helper.rb new file mode 100644 index 00000000..8c23ba9c --- /dev/null +++ b/test_projects/encodings/spec/spec_helper.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +require "simplecov" + +SimpleCov.start do + track_files "lib/euc_jp_not_declared_tracked.rb" +end + +require_relative "../lib/utf8.rb" +require_relative "../lib/euc_jp.rb" +require_relative "../lib/euc_jp_not_declared.rb"