Skip to content

Commit

Permalink
[Fix #10014] Fix Style/Encoding to handle more situations properly.
Browse files Browse the repository at this point in the history
Refactors `Style/Encoding` to make use of the `MagicComment` class. Now `Style/Encoding` can handle the following situations:
* multiple encoding magic comments
* encoding magic comments following other magic comments
* magic comments containing multiple directives (only the encoding is removed)
  • Loading branch information
dvandersluis authored and bbatsov committed Aug 18, 2021
1 parent a2730c3 commit 85219e5
Show file tree
Hide file tree
Showing 5 changed files with 222 additions and 45 deletions.
1 change: 1 addition & 0 deletions changelog/fix_fix_styleencoding_to_handle_more.md
@@ -0,0 +1 @@
* [#10014](https://github.com/rubocop/rubocop/issues/10014): Fix `Style/Encoding` to handle more situations properly. ([@dvandersluis][])
41 changes: 26 additions & 15 deletions lib/rubocop/cop/style/encoding.rb
Expand Up @@ -13,38 +13,49 @@ class Encoding < Base
include RangeHelp
extend AutoCorrector

MSG_UNNECESSARY = 'Unnecessary utf-8 encoding comment.'
MSG = 'Unnecessary utf-8 encoding comment.'
ENCODING_PATTERN = /#.*coding\s?[:=]\s?(?:UTF|utf)-8/.freeze
SHEBANG = '#!'

def on_new_investigation
return if processed_source.buffer.source.empty?

line_number = encoding_line_number(processed_source)
return unless (@message = offense(processed_source, line_number))
comments.each do |line_number, comment|
next unless offense?(comment)

range = processed_source.buffer.line_range(line_number + 1)
add_offense(range, message: @message) do |corrector|
corrector.remove(range_with_surrounding_space(range: range, side: :right))
register_offense(line_number, comment)
end
end

private

def offense(processed_source, line_number)
line = processed_source[line_number]
def comments
processed_source.lines.each.with_index.with_object({}) do |(line, line_number), comments|
next if line.start_with?(SHEBANG)

MSG_UNNECESSARY if encoding_omitable?(line)
comment = MagicComment.parse(line)
return comments unless comment.valid?

comments[line_number + 1] = comment
end
end

def encoding_omitable?(line)
ENCODING_PATTERN.match?(line)
def offense?(comment)
comment.encoding_specified? && comment.encoding.casecmp('utf-8').zero?
end

def encoding_line_number(processed_source)
line_number = 0
line_number += 1 if processed_source[line_number].start_with?(SHEBANG)
line_number
def register_offense(line_number, comment)
range = processed_source.buffer.line_range(line_number)

add_offense(range) do |corrector|
text = comment.without(:encoding)

if text.blank?
corrector.remove(range_with_surrounding_space(range: range, side: :right))
else
corrector.replace(range, text)
end
end
end
end
end
Expand Down
59 changes: 44 additions & 15 deletions lib/rubocop/magic_comment.rb
Expand Up @@ -7,6 +7,11 @@ module RuboCop
class MagicComment
# @see https://git.io/vMC1C IRB's pattern for matching magic comment tokens
TOKEN = /[[:alnum:]\-_]+/.freeze
KEYWORDS = {
encoding: '(?:en)?coding',
frozen_string_literal: 'frozen[_-]string[_-]literal',
shareable_constant_value: 'shareable[_-]constant[_-]value'
}.freeze

# Detect magic comment format and pass it to the appropriate wrapper.
#
Expand All @@ -15,8 +20,8 @@ class MagicComment
# @return [RuboCop::MagicComment]
def self.parse(comment)
case comment
when EmacsComment::FORMAT then EmacsComment.new(comment)
when VimComment::FORMAT then VimComment.new(comment)
when EmacsComment::REGEXP then EmacsComment.new(comment)
when VimComment::REGEXP then VimComment.new(comment)
else
SimpleComment.new(comment)
end
Expand All @@ -30,6 +35,10 @@ def any?
frozen_string_literal_specified? || encoding_specified? || shareable_constant_value_specified?
end

def valid?
@comment.start_with?('#') && any?
end

# Does the magic comment enable the frozen string literal feature.
#
# Test whether the frozen string literal value is `true`. Cannot
Expand Down Expand Up @@ -111,6 +120,18 @@ def extract(pattern)
#
# @abstract
class EditorComment < MagicComment
def encoding
match(self.class::KEYWORDS[:encoding])
end

# Rewrite the comment without a given token type
def without(type)
remaining = tokens.grep_v(/\A#{self.class::KEYWORDS[type.to_sym]}/)
return '' if remaining.empty?

self.class::FORMAT % remaining.join(self.class::SEPARATOR)
end

private

# Find a token starting with the provided keyword and extract its value.
Expand All @@ -135,7 +156,7 @@ def match(keyword)
#
# @return [Array<String>]
def tokens
extract(self.class::FORMAT).split(self.class::SEPARATOR).map(&:strip)
extract(self.class::REGEXP).split(self.class::SEPARATOR).map(&:strip)
end
end

Expand All @@ -151,22 +172,19 @@ def tokens
# @see https://www.gnu.org/software/emacs/manual/html_node/emacs/Specify-Coding.html
# @see https://git.io/vMCXh Emacs handling in Ruby's parse.y
class EmacsComment < EditorComment
FORMAT = /-\*-(.+)-\*-/.freeze
REGEXP = /-\*-(.+)-\*-/.freeze
FORMAT = '# -*- %s -*-'
SEPARATOR = ';'
OPERATOR = ':'

def encoding
match('(?:en)?coding')
end

private

def extract_frozen_string_literal
match('frozen[_-]string[_-]literal')
match(KEYWORDS[:frozen_string_literal])
end

def extract_shareable_constant_value
match('shareable[_-]constant[_-]values')
match(KEYWORDS[:shareable_constant_value])
end
end

Expand All @@ -179,9 +197,11 @@ def extract_shareable_constant_value
#
# comment.encoding # => 'ascii-8bit'
class VimComment < EditorComment
FORMAT = /#\s*vim:\s*(.+)/.freeze
REGEXP = /#\s*vim:\s*(.+)/.freeze
FORMAT = '# vim: %s'
SEPARATOR = ', '
OPERATOR = '='
KEYWORDS = MagicComment::KEYWORDS.merge(encoding: 'fileencoding').freeze

# For some reason the fileencoding keyword only works if there
# is at least one other token included in the string. For example
Expand All @@ -193,7 +213,7 @@ class VimComment < EditorComment
# # vim: foo=bar, fileencoding=ascii-8bit
#
def encoding
match('fileencoding') if tokens.size > 1
super if tokens.size > 1
end

# Vim comments cannot specify frozen string literal behavior.
Expand All @@ -219,7 +239,16 @@ def shareable_constant_value; end
class SimpleComment < MagicComment
# Match `encoding` or `coding`
def encoding
extract(/\A\s*\#.*\b(?:en)?coding: (#{TOKEN})/io)
extract(/\A\s*\#.*\b#{KEYWORDS[:encoding]}: (#{TOKEN})/io)
end

# Rewrite the comment without a given token type
def without(type)
if @comment.match?(/\A#\s*#{self.class::KEYWORDS[type.to_sym]}/)
''
else
@comment
end
end

private
Expand All @@ -232,11 +261,11 @@ def encoding
# Case-insensitive and dashes/underscores are acceptable.
# @see https://git.io/vM7Mg
def extract_frozen_string_literal
extract(/\A\s*#\s*frozen[_-]string[_-]literal:\s*(#{TOKEN})\s*\z/io)
extract(/\A\s*#\s*#{KEYWORDS[:frozen_string_literal]}:\s*(#{TOKEN})\s*\z/io)
end

def extract_shareable_constant_value
extract(/\A\s*#\s*shareable[_-]constant[_-]value:\s*(#{TOKEN})\s*\z/io)
extract(/\A\s*#\s*#{KEYWORDS[:shareable_constant_value]}:\s*(#{TOKEN})\s*\z/io)
end
end
end
Expand Down
92 changes: 77 additions & 15 deletions spec/rubocop/cop/style/encoding_spec.rb
@@ -1,19 +1,26 @@
# frozen_string_literal: true

RSpec.describe RuboCop::Cop::Style::Encoding, :config do
it 'registers no offense when no encoding present' do
it 'does not register an offense when no encoding present' do
expect_no_offenses(<<~RUBY)
def foo() end
RUBY
end

it 'registers no offense when encoding present but not UTF-8' do
it 'does not register an offense when encoding present but not UTF-8' do
expect_no_offenses(<<~RUBY)
# encoding: us-ascii
def foo() end
RUBY
end

it 'does not register an offense on a different magic comment type' do
expect_no_offenses(<<~RUBY)
# frozen-string-literal: true
def foo() end
RUBY
end

it 'registers an offense when encoding present and UTF-8' do
expect_offense(<<~RUBY)
# encoding: utf-8
Expand All @@ -40,10 +47,12 @@ def foo() end
RUBY
end

it 'registers an offense for vim-style encoding comments' do
it 'registers an offense and corrects if there are multiple encoding magic comments' do
expect_offense(<<~RUBY)
# vim:filetype=ruby, fileencoding=utf-8
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
# encoding: utf-8
^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
# coding: utf-8
^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
def foo() end
RUBY

Expand All @@ -52,22 +61,75 @@ def foo() end
RUBY
end

it 'registers no offense when encoding is in the wrong place' do
expect_no_offenses(<<~RUBY)
def foo() end
it 'registers an offense and corrects the magic comment follows another magic comment' do
expect_offense(<<~RUBY)
# frozen-string-literal: true
# encoding: utf-8
^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
def foo() end
RUBY

expect_correction(<<~RUBY)
# frozen-string-literal: true
def foo() end
RUBY
end

it 'registers an offense for encoding inserted by magic_encoding gem' do
expect_offense(<<~RUBY)
# -*- encoding : utf-8 -*-
^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
def foo() 'ä' end
it 'does not register an offense when encoding is not at the top of the file' do
expect_no_offenses(<<~RUBY)
# frozen-string-literal: true
# encoding: utf-8
def foo() end
RUBY
end

expect_correction(<<~RUBY)
def foo() 'ä' end
it 'does not register an offense when encoding is in the wrong place' do
expect_no_offenses(<<~RUBY)
def foo() end
# encoding: utf-8
RUBY
end

context 'vim comments' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
# vim:filetype=ruby, fileencoding=utf-8
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
def foo() end
RUBY

expect_correction(<<~RUBY)
# vim: filetype=ruby
def foo() end
RUBY
end
end

context 'emacs comment' do
it 'registers an offense for encoding' do
expect_offense(<<~RUBY)
# -*- encoding : utf-8 -*-
^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
def foo() 'ä' end
RUBY

expect_correction(<<~RUBY)
def foo() 'ä' end
RUBY
end

it 'only removes encoding if there are other editor comments' do
expect_offense(<<~RUBY)
# -*- encoding : utf-8; mode: enh-ruby -*-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
def foo() 'ä' end
RUBY

expect_correction(<<~RUBY)
# -*- mode: enh-ruby -*-
def foo() 'ä' end
RUBY
end
end
end

0 comments on commit 85219e5

Please sign in to comment.