From 74abc9280aaf493a91a6997ef4f984d4b07b5b9b Mon Sep 17 00:00:00 2001 From: Daniel Vandersluis Date: Tue, 17 Aug 2021 15:48:16 -0400 Subject: [PATCH] [Fix #10014] Fix `Style/Encoding` to handle more situations properly. Refactors `Style/Encoding` to make use of the `MagicComment` class. Now `Style/Encoding` can handle the following situations: * multiple encoding magic comments * encoding magic comments following other magic comments * magic comments containing multiple directives (only the encoding is removed) --- .../fix_fix_styleencoding_to_handle_more.md | 1 + lib/rubocop/cop/style/encoding.rb | 41 ++++++--- lib/rubocop/magic_comment.rb | 59 +++++++++--- spec/rubocop/cop/style/encoding_spec.rb | 92 ++++++++++++++++--- spec/rubocop/magic_comment_spec.rb | 74 +++++++++++++++ 5 files changed, 222 insertions(+), 45 deletions(-) create mode 100644 changelog/fix_fix_styleencoding_to_handle_more.md diff --git a/changelog/fix_fix_styleencoding_to_handle_more.md b/changelog/fix_fix_styleencoding_to_handle_more.md new file mode 100644 index 00000000000..764914ea8c9 --- /dev/null +++ b/changelog/fix_fix_styleencoding_to_handle_more.md @@ -0,0 +1 @@ +* [#10014](https://github.com/rubocop/rubocop/issues/10014): Fix `Style/Encoding` to handle more situations properly. ([@dvandersluis][]) diff --git a/lib/rubocop/cop/style/encoding.rb b/lib/rubocop/cop/style/encoding.rb index 473a4a9c774..5dceabf98b6 100644 --- a/lib/rubocop/cop/style/encoding.rb +++ b/lib/rubocop/cop/style/encoding.rb @@ -13,38 +13,49 @@ class Encoding < Base include RangeHelp extend AutoCorrector - MSG_UNNECESSARY = 'Unnecessary utf-8 encoding comment.' + MSG = 'Unnecessary utf-8 encoding comment.' ENCODING_PATTERN = /#.*coding\s?[:=]\s?(?:UTF|utf)-8/.freeze SHEBANG = '#!' def on_new_investigation return if processed_source.buffer.source.empty? - line_number = encoding_line_number(processed_source) - return unless (@message = offense(processed_source, line_number)) + comments.each do |line_number, comment| + next unless offense?(comment) - range = processed_source.buffer.line_range(line_number + 1) - add_offense(range, message: @message) do |corrector| - corrector.remove(range_with_surrounding_space(range: range, side: :right)) + register_offense(line_number, comment) end end private - def offense(processed_source, line_number) - line = processed_source[line_number] + def comments + processed_source.lines.each.with_index.with_object({}) do |(line, line_number), comments| + next if line.start_with?(SHEBANG) - MSG_UNNECESSARY if encoding_omitable?(line) + comment = MagicComment.parse(line) + return comments unless comment.valid? + + comments[line_number + 1] = comment + end end - def encoding_omitable?(line) - ENCODING_PATTERN.match?(line) + def offense?(comment) + comment.encoding_specified? && comment.encoding.casecmp('utf-8').zero? end - def encoding_line_number(processed_source) - line_number = 0 - line_number += 1 if processed_source[line_number].start_with?(SHEBANG) - line_number + def register_offense(line_number, comment) + range = processed_source.buffer.line_range(line_number) + + add_offense(range) do |corrector| + text = comment.without(:encoding) + + if text.blank? + corrector.remove(range_with_surrounding_space(range: range, side: :right)) + else + corrector.replace(range, text) + end + end end end end diff --git a/lib/rubocop/magic_comment.rb b/lib/rubocop/magic_comment.rb index baa70679837..785cbc89c78 100644 --- a/lib/rubocop/magic_comment.rb +++ b/lib/rubocop/magic_comment.rb @@ -7,6 +7,11 @@ module RuboCop class MagicComment # @see https://git.io/vMC1C IRB's pattern for matching magic comment tokens TOKEN = /[[:alnum:]\-_]+/.freeze + KEYWORDS = { + encoding: '(?:en)?coding', + frozen_string_literal: 'frozen[_-]string[_-]literal', + shareable_constant_value: 'shareable[_-]constant[_-]value' + }.freeze # Detect magic comment format and pass it to the appropriate wrapper. # @@ -15,8 +20,8 @@ class MagicComment # @return [RuboCop::MagicComment] def self.parse(comment) case comment - when EmacsComment::FORMAT then EmacsComment.new(comment) - when VimComment::FORMAT then VimComment.new(comment) + when EmacsComment::REGEXP then EmacsComment.new(comment) + when VimComment::REGEXP then VimComment.new(comment) else SimpleComment.new(comment) end @@ -30,6 +35,10 @@ def any? frozen_string_literal_specified? || encoding_specified? || shareable_constant_value_specified? end + def valid? + @comment.start_with?('#') && any? + end + # Does the magic comment enable the frozen string literal feature. # # Test whether the frozen string literal value is `true`. Cannot @@ -111,6 +120,18 @@ def extract(pattern) # # @abstract class EditorComment < MagicComment + def encoding + match(self.class::KEYWORDS[:encoding]) + end + + # Rewrite the comment without a given token type + def without(type) + remaining = tokens.grep_v(/\A#{self.class::KEYWORDS[type.to_sym]}/) + return '' if remaining.empty? + + self.class::FORMAT % remaining.join(self.class::SEPARATOR) + end + private # Find a token starting with the provided keyword and extract its value. @@ -135,7 +156,7 @@ def match(keyword) # # @return [Array] def tokens - extract(self.class::FORMAT).split(self.class::SEPARATOR).map(&:strip) + extract(self.class::REGEXP).split(self.class::SEPARATOR).map(&:strip) end end @@ -151,22 +172,19 @@ def tokens # @see https://www.gnu.org/software/emacs/manual/html_node/emacs/Specify-Coding.html # @see https://git.io/vMCXh Emacs handling in Ruby's parse.y class EmacsComment < EditorComment - FORMAT = /-\*-(.+)-\*-/.freeze + REGEXP = /-\*-(.+)-\*-/.freeze + FORMAT = '# -*- %s -*-' SEPARATOR = ';' OPERATOR = ':' - def encoding - match('(?:en)?coding') - end - private def extract_frozen_string_literal - match('frozen[_-]string[_-]literal') + match(KEYWORDS[:frozen_string_literal]) end def extract_shareable_constant_value - match('shareable[_-]constant[_-]values') + match(KEYWORDS[:shareable_constant_value]) end end @@ -179,9 +197,11 @@ def extract_shareable_constant_value # # comment.encoding # => 'ascii-8bit' class VimComment < EditorComment - FORMAT = /#\s*vim:\s*(.+)/.freeze + REGEXP = /#\s*vim:\s*(.+)/.freeze + FORMAT = '# vim: %s' SEPARATOR = ', ' OPERATOR = '=' + KEYWORDS = MagicComment::KEYWORDS.merge(encoding: 'fileencoding').freeze # For some reason the fileencoding keyword only works if there # is at least one other token included in the string. For example @@ -193,7 +213,7 @@ class VimComment < EditorComment # # vim: foo=bar, fileencoding=ascii-8bit # def encoding - match('fileencoding') if tokens.size > 1 + super if tokens.size > 1 end # Vim comments cannot specify frozen string literal behavior. @@ -219,7 +239,16 @@ def shareable_constant_value; end class SimpleComment < MagicComment # Match `encoding` or `coding` def encoding - extract(/\A\s*\#.*\b(?:en)?coding: (#{TOKEN})/io) + extract(/\A\s*\#.*\b#{KEYWORDS[:encoding]}: (#{TOKEN})/io) + end + + # Rewrite the comment without a given token type + def without(type) + if @comment.match?(/\A#\s*#{self.class::KEYWORDS[type.to_sym]}/) + '' + else + @comment + end end private @@ -232,11 +261,11 @@ def encoding # Case-insensitive and dashes/underscores are acceptable. # @see https://git.io/vM7Mg def extract_frozen_string_literal - extract(/\A\s*#\s*frozen[_-]string[_-]literal:\s*(#{TOKEN})\s*\z/io) + extract(/\A\s*#\s*#{KEYWORDS[:frozen_string_literal]}:\s*(#{TOKEN})\s*\z/io) end def extract_shareable_constant_value - extract(/\A\s*#\s*shareable[_-]constant[_-]value:\s*(#{TOKEN})\s*\z/io) + extract(/\A\s*#\s*#{KEYWORDS[:shareable_constant_value]}:\s*(#{TOKEN})\s*\z/io) end end end diff --git a/spec/rubocop/cop/style/encoding_spec.rb b/spec/rubocop/cop/style/encoding_spec.rb index 897a2299062..f8eacbd9e8a 100644 --- a/spec/rubocop/cop/style/encoding_spec.rb +++ b/spec/rubocop/cop/style/encoding_spec.rb @@ -1,19 +1,26 @@ # frozen_string_literal: true RSpec.describe RuboCop::Cop::Style::Encoding, :config do - it 'registers no offense when no encoding present' do + it 'does not register an offense when no encoding present' do expect_no_offenses(<<~RUBY) def foo() end RUBY end - it 'registers no offense when encoding present but not UTF-8' do + it 'does not register an offense when encoding present but not UTF-8' do expect_no_offenses(<<~RUBY) # encoding: us-ascii def foo() end RUBY end + it 'does not register an offense on a different magic comment type' do + expect_no_offenses(<<~RUBY) + # frozen-string-literal: true + def foo() end + RUBY + end + it 'registers an offense when encoding present and UTF-8' do expect_offense(<<~RUBY) # encoding: utf-8 @@ -40,10 +47,12 @@ def foo() end RUBY end - it 'registers an offense for vim-style encoding comments' do + it 'registers an offense and corrects if there are multiple encoding magic comments' do expect_offense(<<~RUBY) - # vim:filetype=ruby, fileencoding=utf-8 - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment. + # encoding: utf-8 + ^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment. + # coding: utf-8 + ^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment. def foo() end RUBY @@ -52,22 +61,75 @@ def foo() end RUBY end - it 'registers no offense when encoding is in the wrong place' do - expect_no_offenses(<<~RUBY) - def foo() end + it 'registers an offense and corrects the magic comment follows another magic comment' do + expect_offense(<<~RUBY) + # frozen-string-literal: true # encoding: utf-8 + ^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment. + def foo() end + RUBY + + expect_correction(<<~RUBY) + # frozen-string-literal: true + def foo() end RUBY end - it 'registers an offense for encoding inserted by magic_encoding gem' do - expect_offense(<<~RUBY) - # -*- encoding : utf-8 -*- - ^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment. - def foo() 'ä' end + it 'does not register an offense when encoding is not at the top of the file' do + expect_no_offenses(<<~RUBY) + # frozen-string-literal: true + + # encoding: utf-8 + def foo() end RUBY + end - expect_correction(<<~RUBY) - def foo() 'ä' end + it 'does not register an offense when encoding is in the wrong place' do + expect_no_offenses(<<~RUBY) + def foo() end + # encoding: utf-8 RUBY end + + context 'vim comments' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + # vim:filetype=ruby, fileencoding=utf-8 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment. + def foo() end + RUBY + + expect_correction(<<~RUBY) + # vim: filetype=ruby + def foo() end + RUBY + end + end + + context 'emacs comment' do + it 'registers an offense for encoding' do + expect_offense(<<~RUBY) + # -*- encoding : utf-8 -*- + ^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment. + def foo() 'ä' end + RUBY + + expect_correction(<<~RUBY) + def foo() 'ä' end + RUBY + end + + it 'only removes encoding if there are other editor comments' do + expect_offense(<<~RUBY) + # -*- encoding : utf-8; mode: enh-ruby -*- + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment. + def foo() 'ä' end + RUBY + + expect_correction(<<~RUBY) + # -*- mode: enh-ruby -*- + def foo() 'ä' end + RUBY + end + end end diff --git a/spec/rubocop/magic_comment_spec.rb b/spec/rubocop/magic_comment_spec.rb index 241aeafc2d0..8ce7afa3d1e 100644 --- a/spec/rubocop/magic_comment_spec.rb +++ b/spec/rubocop/magic_comment_spec.rb @@ -121,6 +121,40 @@ include_examples 'magic comment', '# vim:fileencoding=utf-8', encoding: nil + describe '#valid?' do + subject { described_class.parse(comment).valid? } + + context 'with an empty string' do + let(:comment) { '' } + + it { is_expected.to eq(false) } + end + + context 'with a non magic comment' do + let(:comment) { '# do something' } + + it { is_expected.to eq(false) } + end + + context 'with an encoding comment' do + let(:comment) { '# encoding: utf-8' } + + it { is_expected.to eq(true) } + end + + context 'with an frozen string literal comment' do + let(:comment) { '# frozen-string-literal: true' } + + it { is_expected.to eq(true) } + end + + context 'with an shareable constant value comment' do + let(:comment) { '# shareable-constant-value: literal' } + + it { is_expected.to eq(true) } + end + end + describe '#valid_shareable_constant_value?' do subject { described_class.parse(comment).valid_shareable_constant_value? } @@ -160,4 +194,44 @@ it { is_expected.to eq(false) } end end + + describe '#without' do + subject { described_class.parse(comment).without(:encoding) } + + context 'simple format' do + context 'when the entire comment is a single value' do + let(:comment) { '# encoding: utf-8' } + + it { is_expected.to eq('') } + end + + context 'when the comment contains a different magic value' do + let(:comment) { '# frozen-string-literal: true' } + + it { is_expected.to eq(comment) } + end + end + + context 'emacs format' do + context 'with one token' do + let(:comment) { '# -*- coding: ASCII-8BIT -*-' } + + it { is_expected.to eq('') } + end + + context 'with multiple tokens' do + let(:comment) { '# -*- coding: ASCII-8BIT; frozen_string_literal: true -*-' } + + it { is_expected.to eq('# -*- frozen_string_literal: true -*-') } + end + end + + context 'vim format' do + context 'when the comment has multiple tokens' do + let(:comment) { '# vim: filetype=ruby, fileencoding=ascii-8bit' } + + it { is_expected.to eq('# vim: filetype=ruby') } + end + end + end end