From 74abc9280aaf493a91a6997ef4f984d4b07b5b9b Mon Sep 17 00:00:00 2001
From: Daniel Vandersluis <daniel.vandersluis@gmail.com>
Date: Tue, 17 Aug 2021 15:48:16 -0400
Subject: [PATCH] [Fix #10014] Fix `Style/Encoding` to handle more situations
 properly.

Refactors `Style/Encoding` to make use of the `MagicComment` class. Now `Style/Encoding` can handle the following situations:
* multiple encoding magic comments
* encoding magic comments following other magic comments
* magic comments containing multiple directives (only the encoding is removed)
---
 .../fix_fix_styleencoding_to_handle_more.md   |  1 +
 lib/rubocop/cop/style/encoding.rb             | 41 ++++++---
 lib/rubocop/magic_comment.rb                  | 59 +++++++++---
 spec/rubocop/cop/style/encoding_spec.rb       | 92 ++++++++++++++++---
 spec/rubocop/magic_comment_spec.rb            | 74 +++++++++++++++
 5 files changed, 222 insertions(+), 45 deletions(-)
 create mode 100644 changelog/fix_fix_styleencoding_to_handle_more.md

diff --git a/changelog/fix_fix_styleencoding_to_handle_more.md b/changelog/fix_fix_styleencoding_to_handle_more.md
new file mode 100644
index 00000000000..764914ea8c9
--- /dev/null
+++ b/changelog/fix_fix_styleencoding_to_handle_more.md
@@ -0,0 +1 @@
+* [#10014](https://github.com/rubocop/rubocop/issues/10014): Fix `Style/Encoding` to handle more situations properly. ([@dvandersluis][])
diff --git a/lib/rubocop/cop/style/encoding.rb b/lib/rubocop/cop/style/encoding.rb
index 473a4a9c774..5dceabf98b6 100644
--- a/lib/rubocop/cop/style/encoding.rb
+++ b/lib/rubocop/cop/style/encoding.rb
@@ -13,38 +13,49 @@ class Encoding < Base
         include RangeHelp
         extend AutoCorrector
 
-        MSG_UNNECESSARY = 'Unnecessary utf-8 encoding comment.'
+        MSG = 'Unnecessary utf-8 encoding comment.'
         ENCODING_PATTERN = /#.*coding\s?[:=]\s?(?:UTF|utf)-8/.freeze
         SHEBANG = '#!'
 
         def on_new_investigation
           return if processed_source.buffer.source.empty?
 
-          line_number = encoding_line_number(processed_source)
-          return unless (@message = offense(processed_source, line_number))
+          comments.each do |line_number, comment|
+            next unless offense?(comment)
 
-          range = processed_source.buffer.line_range(line_number + 1)
-          add_offense(range, message: @message) do |corrector|
-            corrector.remove(range_with_surrounding_space(range: range, side: :right))
+            register_offense(line_number, comment)
           end
         end
 
         private
 
-        def offense(processed_source, line_number)
-          line = processed_source[line_number]
+        def comments
+          processed_source.lines.each.with_index.with_object({}) do |(line, line_number), comments|
+            next if line.start_with?(SHEBANG)
 
-          MSG_UNNECESSARY if encoding_omitable?(line)
+            comment = MagicComment.parse(line)
+            return comments unless comment.valid?
+
+            comments[line_number + 1] = comment
+          end
         end
 
-        def encoding_omitable?(line)
-          ENCODING_PATTERN.match?(line)
+        def offense?(comment)
+          comment.encoding_specified? && comment.encoding.casecmp('utf-8').zero?
         end
 
-        def encoding_line_number(processed_source)
-          line_number = 0
-          line_number += 1 if processed_source[line_number].start_with?(SHEBANG)
-          line_number
+        def register_offense(line_number, comment)
+          range = processed_source.buffer.line_range(line_number)
+
+          add_offense(range) do |corrector|
+            text = comment.without(:encoding)
+
+            if text.blank?
+              corrector.remove(range_with_surrounding_space(range: range, side: :right))
+            else
+              corrector.replace(range, text)
+            end
+          end
         end
       end
     end
diff --git a/lib/rubocop/magic_comment.rb b/lib/rubocop/magic_comment.rb
index baa70679837..785cbc89c78 100644
--- a/lib/rubocop/magic_comment.rb
+++ b/lib/rubocop/magic_comment.rb
@@ -7,6 +7,11 @@ module RuboCop
   class MagicComment
     # @see https://git.io/vMC1C IRB's pattern for matching magic comment tokens
     TOKEN = /[[:alnum:]\-_]+/.freeze
+    KEYWORDS = {
+      encoding: '(?:en)?coding',
+      frozen_string_literal: 'frozen[_-]string[_-]literal',
+      shareable_constant_value: 'shareable[_-]constant[_-]value'
+    }.freeze
 
     # Detect magic comment format and pass it to the appropriate wrapper.
     #
@@ -15,8 +20,8 @@ class MagicComment
     # @return [RuboCop::MagicComment]
     def self.parse(comment)
       case comment
-      when EmacsComment::FORMAT then EmacsComment.new(comment)
-      when VimComment::FORMAT   then VimComment.new(comment)
+      when EmacsComment::REGEXP then EmacsComment.new(comment)
+      when VimComment::REGEXP   then VimComment.new(comment)
       else
         SimpleComment.new(comment)
       end
@@ -30,6 +35,10 @@ def any?
       frozen_string_literal_specified? || encoding_specified? || shareable_constant_value_specified?
     end
 
+    def valid?
+      @comment.start_with?('#') && any?
+    end
+
     # Does the magic comment enable the frozen string literal feature.
     #
     # Test whether the frozen string literal value is `true`. Cannot
@@ -111,6 +120,18 @@ def extract(pattern)
     #
     # @abstract
     class EditorComment < MagicComment
+      def encoding
+        match(self.class::KEYWORDS[:encoding])
+      end
+
+      # Rewrite the comment without a given token type
+      def without(type)
+        remaining = tokens.grep_v(/\A#{self.class::KEYWORDS[type.to_sym]}/)
+        return '' if remaining.empty?
+
+        self.class::FORMAT % remaining.join(self.class::SEPARATOR)
+      end
+
       private
 
       # Find a token starting with the provided keyword and extract its value.
@@ -135,7 +156,7 @@ def match(keyword)
       #
       # @return [Array<String>]
       def tokens
-        extract(self.class::FORMAT).split(self.class::SEPARATOR).map(&:strip)
+        extract(self.class::REGEXP).split(self.class::SEPARATOR).map(&:strip)
       end
     end
 
@@ -151,22 +172,19 @@ def tokens
     # @see https://www.gnu.org/software/emacs/manual/html_node/emacs/Specify-Coding.html
     # @see https://git.io/vMCXh Emacs handling in Ruby's parse.y
     class EmacsComment < EditorComment
-      FORMAT    = /-\*-(.+)-\*-/.freeze
+      REGEXP    = /-\*-(.+)-\*-/.freeze
+      FORMAT    = '# -*- %s -*-'
       SEPARATOR = ';'
       OPERATOR  = ':'
 
-      def encoding
-        match('(?:en)?coding')
-      end
-
       private
 
       def extract_frozen_string_literal
-        match('frozen[_-]string[_-]literal')
+        match(KEYWORDS[:frozen_string_literal])
       end
 
       def extract_shareable_constant_value
-        match('shareable[_-]constant[_-]values')
+        match(KEYWORDS[:shareable_constant_value])
       end
     end
 
@@ -179,9 +197,11 @@ def extract_shareable_constant_value
     #
     #   comment.encoding # => 'ascii-8bit'
     class VimComment < EditorComment
-      FORMAT    = /#\s*vim:\s*(.+)/.freeze
+      REGEXP    = /#\s*vim:\s*(.+)/.freeze
+      FORMAT    = '# vim: %s'
       SEPARATOR = ', '
       OPERATOR  = '='
+      KEYWORDS = MagicComment::KEYWORDS.merge(encoding: 'fileencoding').freeze
 
       # For some reason the fileencoding keyword only works if there
       # is at least one other token included in the string. For example
@@ -193,7 +213,7 @@ class VimComment < EditorComment
       #      # vim: foo=bar, fileencoding=ascii-8bit
       #
       def encoding
-        match('fileencoding') if tokens.size > 1
+        super if tokens.size > 1
       end
 
       # Vim comments cannot specify frozen string literal behavior.
@@ -219,7 +239,16 @@ def shareable_constant_value; end
     class SimpleComment < MagicComment
       # Match `encoding` or `coding`
       def encoding
-        extract(/\A\s*\#.*\b(?:en)?coding: (#{TOKEN})/io)
+        extract(/\A\s*\#.*\b#{KEYWORDS[:encoding]}: (#{TOKEN})/io)
+      end
+
+      # Rewrite the comment without a given token type
+      def without(type)
+        if @comment.match?(/\A#\s*#{self.class::KEYWORDS[type.to_sym]}/)
+          ''
+        else
+          @comment
+        end
       end
 
       private
@@ -232,11 +261,11 @@ def encoding
       # Case-insensitive and dashes/underscores are acceptable.
       # @see https://git.io/vM7Mg
       def extract_frozen_string_literal
-        extract(/\A\s*#\s*frozen[_-]string[_-]literal:\s*(#{TOKEN})\s*\z/io)
+        extract(/\A\s*#\s*#{KEYWORDS[:frozen_string_literal]}:\s*(#{TOKEN})\s*\z/io)
       end
 
       def extract_shareable_constant_value
-        extract(/\A\s*#\s*shareable[_-]constant[_-]value:\s*(#{TOKEN})\s*\z/io)
+        extract(/\A\s*#\s*#{KEYWORDS[:shareable_constant_value]}:\s*(#{TOKEN})\s*\z/io)
       end
     end
   end
diff --git a/spec/rubocop/cop/style/encoding_spec.rb b/spec/rubocop/cop/style/encoding_spec.rb
index 897a2299062..f8eacbd9e8a 100644
--- a/spec/rubocop/cop/style/encoding_spec.rb
+++ b/spec/rubocop/cop/style/encoding_spec.rb
@@ -1,19 +1,26 @@
 # frozen_string_literal: true
 
 RSpec.describe RuboCop::Cop::Style::Encoding, :config do
-  it 'registers no offense when no encoding present' do
+  it 'does not register an offense when no encoding present' do
     expect_no_offenses(<<~RUBY)
       def foo() end
     RUBY
   end
 
-  it 'registers no offense when encoding present but not UTF-8' do
+  it 'does not register an offense when encoding present but not UTF-8' do
     expect_no_offenses(<<~RUBY)
       # encoding: us-ascii
       def foo() end
     RUBY
   end
 
+  it 'does not register an offense on a different magic comment type' do
+    expect_no_offenses(<<~RUBY)
+      # frozen-string-literal: true
+      def foo() end
+    RUBY
+  end
+
   it 'registers an offense when encoding present and UTF-8' do
     expect_offense(<<~RUBY)
       # encoding: utf-8
@@ -40,10 +47,12 @@ def foo() end
     RUBY
   end
 
-  it 'registers an offense for vim-style encoding comments' do
+  it 'registers an offense and corrects if there are multiple encoding magic comments' do
     expect_offense(<<~RUBY)
-      # vim:filetype=ruby, fileencoding=utf-8
-      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
+      # encoding: utf-8
+      ^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
+      # coding: utf-8
+      ^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
       def foo() end
     RUBY
 
@@ -52,22 +61,75 @@ def foo() end
     RUBY
   end
 
-  it 'registers no offense when encoding is in the wrong place' do
-    expect_no_offenses(<<~RUBY)
-      def foo() end
+  it 'registers an offense and corrects the magic comment follows another magic comment' do
+    expect_offense(<<~RUBY)
+      # frozen-string-literal: true
       # encoding: utf-8
+      ^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
+      def foo() end
+    RUBY
+
+    expect_correction(<<~RUBY)
+      # frozen-string-literal: true
+      def foo() end
     RUBY
   end
 
-  it 'registers an offense for encoding inserted by magic_encoding gem' do
-    expect_offense(<<~RUBY)
-      # -*- encoding : utf-8 -*-
-      ^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
-      def foo() 'ä' end
+  it 'does not register an offense when encoding is not at the top of the file' do
+    expect_no_offenses(<<~RUBY)
+      # frozen-string-literal: true
+
+      # encoding: utf-8
+      def foo() end
     RUBY
+  end
 
-    expect_correction(<<~RUBY)
-      def foo() 'ä' end
+  it 'does not register an offense when encoding is in the wrong place' do
+    expect_no_offenses(<<~RUBY)
+      def foo() end
+      # encoding: utf-8
     RUBY
   end
+
+  context 'vim comments' do
+    it 'registers an offense and corrects' do
+      expect_offense(<<~RUBY)
+        # vim:filetype=ruby, fileencoding=utf-8
+        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
+        def foo() end
+      RUBY
+
+      expect_correction(<<~RUBY)
+        # vim: filetype=ruby
+        def foo() end
+      RUBY
+    end
+  end
+
+  context 'emacs comment' do
+    it 'registers an offense for encoding' do
+      expect_offense(<<~RUBY)
+        # -*- encoding : utf-8 -*-
+        ^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
+        def foo() 'ä' end
+      RUBY
+
+      expect_correction(<<~RUBY)
+        def foo() 'ä' end
+      RUBY
+    end
+
+    it 'only removes encoding if there are other editor comments' do
+      expect_offense(<<~RUBY)
+        # -*- encoding : utf-8; mode: enh-ruby -*-
+        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
+        def foo() 'ä' end
+      RUBY
+
+      expect_correction(<<~RUBY)
+        # -*- mode: enh-ruby -*-
+        def foo() 'ä' end
+      RUBY
+    end
+  end
 end
diff --git a/spec/rubocop/magic_comment_spec.rb b/spec/rubocop/magic_comment_spec.rb
index 241aeafc2d0..8ce7afa3d1e 100644
--- a/spec/rubocop/magic_comment_spec.rb
+++ b/spec/rubocop/magic_comment_spec.rb
@@ -121,6 +121,40 @@
 
   include_examples 'magic comment', '# vim:fileencoding=utf-8', encoding: nil
 
+  describe '#valid?' do
+    subject { described_class.parse(comment).valid? }
+
+    context 'with an empty string' do
+      let(:comment) { '' }
+
+      it { is_expected.to eq(false) }
+    end
+
+    context 'with a non magic comment' do
+      let(:comment) { '# do something' }
+
+      it { is_expected.to eq(false) }
+    end
+
+    context 'with an encoding comment' do
+      let(:comment) { '# encoding: utf-8' }
+
+      it { is_expected.to eq(true) }
+    end
+
+    context 'with an frozen string literal comment' do
+      let(:comment) { '# frozen-string-literal: true' }
+
+      it { is_expected.to eq(true) }
+    end
+
+    context 'with an shareable constant value comment' do
+      let(:comment) { '# shareable-constant-value: literal' }
+
+      it { is_expected.to eq(true) }
+    end
+  end
+
   describe '#valid_shareable_constant_value?' do
     subject { described_class.parse(comment).valid_shareable_constant_value? }
 
@@ -160,4 +194,44 @@
       it { is_expected.to eq(false) }
     end
   end
+
+  describe '#without' do
+    subject { described_class.parse(comment).without(:encoding) }
+
+    context 'simple format' do
+      context 'when the entire comment is a single value' do
+        let(:comment) { '# encoding: utf-8' }
+
+        it { is_expected.to eq('') }
+      end
+
+      context 'when the comment contains a different magic value' do
+        let(:comment) { '# frozen-string-literal: true' }
+
+        it { is_expected.to eq(comment) }
+      end
+    end
+
+    context 'emacs format' do
+      context 'with one token' do
+        let(:comment) { '# -*- coding: ASCII-8BIT -*-' }
+
+        it { is_expected.to eq('') }
+      end
+
+      context 'with multiple tokens' do
+        let(:comment) { '# -*- coding: ASCII-8BIT; frozen_string_literal: true -*-' }
+
+        it { is_expected.to eq('# -*- frozen_string_literal: true -*-') }
+      end
+    end
+
+    context 'vim format' do
+      context 'when the comment has multiple tokens' do
+        let(:comment) { '# vim: filetype=ruby, fileencoding=ascii-8bit' }
+
+        it { is_expected.to eq('# vim: filetype=ruby') }
+      end
+    end
+  end
 end