simplecov-ruby · PragTob · Feb 23, 2020 · Feb 14, 2020 · Feb 16, 2020 · Feb 16, 2020
diff --git a/.rubocop.yml b/.rubocop.yml
@@ -1,6 +1,12 @@
 AllCops:
   Exclude:
     - "spec/fixtures/iso-8859.rb"
+    - "spec/fixtures/utf-8.rb"
+    - "spec/fixtures/utf-8-magic.rb"
+    - "spec/fixtures/euc-jp.rb"
+    - "spec/fixtures/empty_euc-jp.rb"
+    - "spec/fixtures/euc-jp-shebang.rb"
+    - "test_projects/encodings/**/*"
     - "tmp/**/*"
     - "vendor/bundle/**/*"
     - "vendor/bundle/**/.*"

diff --git a/Gemfile b/Gemfile
@@ -6,7 +6,7 @@ source "https://rubygems.org"
 # gem "simplecov-html", path: File.dirname(__FILE__) + "/../simplecov-html"
 
 # Uncomment this to use development version of html formatter from github
-# gem "simplecov-html", github: "colszowka/simplecov-html"
+gem "simplecov-html", github: "colszowka/simplecov-html", branch: "revert-encoding-replace"
 
 group :development do
   gem "apparition", "0.5.0"

diff --git a/Gemfile.lock b/Gemfile.lock
@@ -1,3 +1,10 @@
+GIT
+  remote: https://github.com/colszowka/simplecov-html.git
+  revision: abf08dc387856f2c22e45e33d7c786d5bfc57d5e
+  branch: revert-encoding-replace
+  specs:
+    simplecov-html (0.12.0)
+
 GIT
   remote: https://github.com/cucumber/aruba.git
   revision: 8b7dc956daf47051237700488e8d866fb229db29
@@ -111,7 +118,6 @@ GEM
       ruby-progressbar (~> 1.7)
       unicode-display_width (>= 1.4.0, < 1.7)
     ruby-progressbar (1.10.1)
-    simplecov-html (0.12.0)
     spoon (0.0.6)
       ffi
     test-unit (3.3.5)
@@ -142,7 +148,8 @@ DEPENDENCIES
   rspec (~> 3.2)
   rubocop
   simplecov!
+  simplecov-html!
   test-unit
 
 BUNDLED WITH
-   2.1.2
+   2.1.4
diff --git a/features/encoding.feature b/features/encoding.feature
@@ -0,0 +1,38 @@
+@rspec
+
+Feature:
+
+  We've experienced some problems given source file encoding.
+  We want to make sure we try the appropriate encoding and
+  can display it correcty in the formatter.
+
+  Background:
+    Given I'm working on the project "encodings"
+
+  Scenario: Running tests produces coverage and it's mostly legible
+  When I open the coverage report generated with `bundle exec rspec spec`
+  Then I should see the groups:
+    | name      | coverage | files |
+    | All Files | 55.56%   | 4     |
+
+  When I open the detailed view for "lib/utf8.rb"
+  Then "�" should not be visible
+  And "🇯🇵" should be visible
+  And "おはよう" should be visible
+
+
+  When I close the detailed view
+  And I open the detailed view for "lib/euc_jp.rb"
+  Then "�" should not be visible
+  And "おはよう" should be visible
+
+  When I close the detailed view
+  And I open the detailed view for "lib/euc_jp_not_declared.rb"
+  Then "�" should not be visible
+  And "Fun3" should be visible
+
+  When I close the detailed view
+  And I open the detailed view for "lib/euc_jp_not_declared_tracked.rb"
+  # no way around it I guess
+  Then "�" should be visible
+  And "NoDeclare" should be visible
diff --git a/features/step_definitions/html_steps.rb b/features/step_definitions/html_steps.rb
@@ -63,6 +63,10 @@
   expect(page).to have_css(".header h3", visible: true, text: file_path)
 end
 
+When "I close the detailed view" do
+  click_button "cboxClose"
+end
+
 Then /^I should see coverage branch data like "(.+)"$/ do |text|
   expect(find(".hits", visible: true, text: text)).to be_truthy
 end
diff --git a/features/step_definitions/web_steps.rb b/features/step_definitions/web_steps.rb
@@ -57,6 +57,17 @@ def with_scope(locator)
   end
 end
 
+# the default in our settings is still to check unvisible content
+# as well and until we change that these steps similar to "should (not)
+# see" are necessary
+Then "{string} should be visible" do |text|
+  expect(page).to have_content(:visible, text)
+end
+
+Then "{string} should not be visible" do |text|
+  expect(page).to have_no_content(:visible, text)
+end
+
 Then /^show me the page$/ do
   save_and_open_page # rubocop:disable Lint/Debugger
 end

diff --git a/lib/simplecov/source_file.rb b/lib/simplecov/source_file.rb
@@ -25,7 +25,7 @@ def project_filename
     def src
       # We intentionally read source code lazily to
       # suppress reading unused source code.
-      @src ||= File.open(filename, "rb", &:readlines)
+      @src ||= load_source
     end
     alias source src
 
@@ -175,6 +175,51 @@ def build_no_cov_chunks
       end
     end
 
+    def load_source
+      lines = []
+      # The default encoding is UTF-8
+      File.open(filename, "rb:UTF-8") do |file|
+        current_line = file.gets
+
+        if shebang?(current_line)
+          lines << current_line
+          current_line = file.gets
+        end
+
+        read_lines(file, lines, current_line)
+      end
+    end
+
+    SHEBANG_REGEX = /\A#!/.freeze
+    def shebang?(line)
+      SHEBANG_REGEX.match?(line)
+    end
+
+    def read_lines(file, lines, current_line)
+      return lines unless current_line
+
+      set_encoding_based_on_magic_comment(file, current_line)
+      lines.concat([current_line], ensure_remove_undefs(file.readlines))
+    end
+
+    RUBY_FILE_ENCODING_MAGIC_COMMENT_REGEX = /\A#\s*(?:-\*-)?\s*(?:en)?coding:\s*(\S+)\s*(?:-\*-)?\s*\z/.freeze
+    def set_encoding_based_on_magic_comment(file, line)
+      # Check for encoding magic comment
+      # Encoding magic comment must be placed at first line except for shebang
+      if (match = RUBY_FILE_ENCODING_MAGIC_COMMENT_REGEX.match(line))
+        file.set_encoding(match[1], "UTF-8")
+      end
+    end
+
+    def ensure_remove_undefs(file_lines)
+      # invalid/undef replace are technically not really necessary but nice to
+      # have and work around a JRuby incompatibility. Also moved here from
+      # simplecov-html to have encoding shenaningans in one place. See #866
+      # also setting these option on `file.set_encoding` doesn't seem to work
+      # properly so it has to be done here.
+      file_lines.each { |line| line.encode!("UTF-8", invalid: :replace, undef: :replace) }
+    end
+
     def build_lines
       coverage_exceeding_source_warn if coverage_data["lines"].size > src.size
       lines = src.map.with_index(1) do |src, i|

diff --git a/spec/fixtures/empty_euc-jp.rb b/spec/fixtures/empty_euc-jp.rb
diff --git a/spec/fixtures/euc-jp-shebang.rb b/spec/fixtures/euc-jp-shebang.rb
@@ -0,0 +1,3 @@
+#!/usr/bin/env ruby
+# encoding: EUC-JP
+puts "135��C"
diff --git a/spec/fixtures/euc-jp.rb b/spec/fixtures/euc-jp.rb
@@ -0,0 +1,2 @@
+# encoding: EUC-JP
+puts "135��C"
diff --git a/spec/fixtures/utf-8-magic.rb b/spec/fixtures/utf-8-magic.rb
@@ -0,0 +1,2 @@
+# encoding: UTF-8
+puts "135°C"
diff --git a/spec/source_file_spec.rb b/spec/source_file_spec.rb
@@ -695,4 +695,82 @@
       end
     end
   end
+
+  context "a file contains non-ASCII characters" do
+    COVERAGE_FOR_SINGLE_LINE = {"lines" => [nil]}.freeze
+    COVERAGE_FOR_DOUBLE_LINES = {"lines" => [nil, 1]}.freeze
+    COVERAGE_FOR_TRIPLE_LINES = {"lines" => [nil, nil, 1]}.freeze
+    DEGREE_135_LINE = "puts \"135°C\"\n"
+
+    shared_examples_for "converting to UTF-8" do
+      it "has all source lines of encoding UTF-8" do
+        subject.lines.each do |line|
+          expect(line.source.encoding).to eq(Encoding::UTF_8)
+          expect(line.source).to be_valid_encoding
+        end
+      end
+    end
+
+    describe "UTF-8 without magic comment" do
+      subject do
+        SimpleCov::SourceFile.new(source_fixture("utf-8.rb"), COVERAGE_FOR_SINGLE_LINE)
+      end
+
+      it_behaves_like "converting to UTF-8"
+
+      it "has the line with 135°C" do
+        expect(subject.line(1).source).to eq DEGREE_135_LINE
+      end
+    end
+
+    describe "UTF-8 with magic comment" do
+      subject do
+        SimpleCov::SourceFile.new(source_fixture("utf-8-magic.rb"), COVERAGE_FOR_DOUBLE_LINES)
+      end
+
+      it_behaves_like "converting to UTF-8"
+
+      it "has the line with 135°C" do
+        expect(subject.line(2).source).to eq DEGREE_135_LINE
+      end
+    end
+
+    describe "EUC-JP with magic comment" do
+      subject do
+        SimpleCov::SourceFile.new(source_fixture("euc-jp.rb"), COVERAGE_FOR_DOUBLE_LINES)
+      end
+
+      it_behaves_like "converting to UTF-8"
+
+      it "has the line with 135°C" do
+        expect(subject.line(2).source).to eq DEGREE_135_LINE
+      end
+    end
+
+    describe "EUC-JP with magic comment and shebang" do
+      subject do
+        SimpleCov::SourceFile.new(source_fixture("euc-jp-shebang.rb"), COVERAGE_FOR_TRIPLE_LINES)
+      end
+
+      it_behaves_like "converting to UTF-8"
+
+      it "has all the right lines" do
+        expect(subject.lines.map(&:source)).to eq [
+          "#!/usr/bin/env ruby\n",
+          "# encoding: EUC-JP\n",
+          DEGREE_135_LINE
+        ]
+      end
+    end
+
+    describe "empty euc-jp file" do
+      subject do
+        SimpleCov::SourceFile.new(source_fixture("empty_euc-jp.rb"), "lines" => [])
+      end
+
+      it "has empty lines" do
+        expect(subject.lines).to be_empty
+      end
+    end
+  end
 end
diff --git a/test_projects/encodings/lib/euc_jp.rb b/test_projects/encodings/lib/euc_jp.rb
@@ -0,0 +1,10 @@
+# encoding: EUC-JP
+# frozen_string_literal: true
+
+class Fun2
+  MSG = "���Ϥ褦"
+
+  def no_moji
+    "tada!"
+  end
+end
diff --git a/test_projects/encodings/lib/euc_jp_not_declared.rb b/test_projects/encodings/lib/euc_jp_not_declared.rb
@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+
+class Fun3
+  # ruby actually breaks on the not decalred multibyte characters,
+  # which is nice I guess
+  MSG = "Something Else"
+
+  def no_moji
+    "tada!"
+  end
+end
diff --git a/test_projects/encodings/lib/euc_jp_not_declared_tracked.rb b/test_projects/encodings/lib/euc_jp_not_declared_tracked.rb
@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+# Ruby can't even execute this file but it might be added
+# via track_files or similar means and we still don't wanna crash!
+
+class NoDeclare
+  MSG = "���Ϥ褦"
+
+  def ??
+    "tada!"
+  end
+end
diff --git a/test_projects/encodings/lib/utf8.rb b/test_projects/encodings/lib/utf8.rb
@@ -0,0 +1,9 @@
+# frozen_string_literal: true
+
+class Fun
+  MSG = "おはよう"
+
+  def 🇯🇵
+    "tada!"
+  end
+end
diff --git a/test_projects/encodings/spec/simple_spec.rb b/test_projects/encodings/spec/simple_spec.rb
@@ -0,0 +1,7 @@
+require_relative "spec_helper"
+
+describe Fun do
+  it "call things" do
+    expect(subject.🇯🇵).to eq "tada!"
+  end
+end
diff --git a/test_projects/encodings/spec/spec_helper.rb b/test_projects/encodings/spec/spec_helper.rb
@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+
+require "simplecov"
+
+SimpleCov.start do
+  track_files "lib/euc_jp_not_declared_tracked.rb"
+end
+
+require_relative "../lib/utf8.rb"
+require_relative "../lib/euc_jp.rb"
+require_relative "../lib/euc_jp_not_declared.rb"