Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Fix #10014] Fix Style/Encoding to handle more situations properly #10023

Merged
merged 1 commit into from Aug 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/fix_fix_styleencoding_to_handle_more.md
@@ -0,0 +1 @@
* [#10014](https://github.com/rubocop/rubocop/issues/10014): Fix `Style/Encoding` to handle more situations properly. ([@dvandersluis][])
41 changes: 26 additions & 15 deletions lib/rubocop/cop/style/encoding.rb
Expand Up @@ -13,38 +13,49 @@ class Encoding < Base
include RangeHelp
extend AutoCorrector

MSG_UNNECESSARY = 'Unnecessary utf-8 encoding comment.'
MSG = 'Unnecessary utf-8 encoding comment.'
ENCODING_PATTERN = /#.*coding\s?[:=]\s?(?:UTF|utf)-8/.freeze
SHEBANG = '#!'

def on_new_investigation
return if processed_source.buffer.source.empty?

line_number = encoding_line_number(processed_source)
return unless (@message = offense(processed_source, line_number))
comments.each do |line_number, comment|
next unless offense?(comment)

range = processed_source.buffer.line_range(line_number + 1)
add_offense(range, message: @message) do |corrector|
corrector.remove(range_with_surrounding_space(range: range, side: :right))
register_offense(line_number, comment)
end
end

private

def offense(processed_source, line_number)
line = processed_source[line_number]
def comments
processed_source.lines.each.with_index.with_object({}) do |(line, line_number), comments|
next if line.start_with?(SHEBANG)

MSG_UNNECESSARY if encoding_omitable?(line)
comment = MagicComment.parse(line)
return comments unless comment.valid?

comments[line_number + 1] = comment
end
end

def encoding_omitable?(line)
ENCODING_PATTERN.match?(line)
def offense?(comment)
comment.encoding_specified? && comment.encoding.casecmp('utf-8').zero?
end

def encoding_line_number(processed_source)
line_number = 0
line_number += 1 if processed_source[line_number].start_with?(SHEBANG)
line_number
def register_offense(line_number, comment)
range = processed_source.buffer.line_range(line_number)

add_offense(range) do |corrector|
text = comment.without(:encoding)

if text.blank?
corrector.remove(range_with_surrounding_space(range: range, side: :right))
else
corrector.replace(range, text)
end
end
end
end
end
Expand Down
59 changes: 44 additions & 15 deletions lib/rubocop/magic_comment.rb
Expand Up @@ -7,6 +7,11 @@ module RuboCop
class MagicComment
# @see https://git.io/vMC1C IRB's pattern for matching magic comment tokens
TOKEN = /[[:alnum:]\-_]+/.freeze
KEYWORDS = {
encoding: '(?:en)?coding',
frozen_string_literal: 'frozen[_-]string[_-]literal',
shareable_constant_value: 'shareable[_-]constant[_-]value'
}.freeze

# Detect magic comment format and pass it to the appropriate wrapper.
#
Expand All @@ -15,8 +20,8 @@ class MagicComment
# @return [RuboCop::MagicComment]
def self.parse(comment)
case comment
when EmacsComment::FORMAT then EmacsComment.new(comment)
when VimComment::FORMAT then VimComment.new(comment)
when EmacsComment::REGEXP then EmacsComment.new(comment)
when VimComment::REGEXP then VimComment.new(comment)
else
SimpleComment.new(comment)
end
Expand All @@ -30,6 +35,10 @@ def any?
frozen_string_literal_specified? || encoding_specified? || shareable_constant_value_specified?
end

def valid?
@comment.start_with?('#') && any?
end

# Does the magic comment enable the frozen string literal feature.
#
# Test whether the frozen string literal value is `true`. Cannot
Expand Down Expand Up @@ -111,6 +120,18 @@ def extract(pattern)
#
# @abstract
class EditorComment < MagicComment
def encoding
match(self.class::KEYWORDS[:encoding])
end

# Rewrite the comment without a given token type
def without(type)
remaining = tokens.grep_v(/\A#{self.class::KEYWORDS[type.to_sym]}/)
return '' if remaining.empty?

self.class::FORMAT % remaining.join(self.class::SEPARATOR)
end

private

# Find a token starting with the provided keyword and extract its value.
Expand All @@ -135,7 +156,7 @@ def match(keyword)
#
# @return [Array<String>]
def tokens
extract(self.class::FORMAT).split(self.class::SEPARATOR).map(&:strip)
extract(self.class::REGEXP).split(self.class::SEPARATOR).map(&:strip)
end
end

Expand All @@ -151,22 +172,19 @@ def tokens
# @see https://www.gnu.org/software/emacs/manual/html_node/emacs/Specify-Coding.html
# @see https://git.io/vMCXh Emacs handling in Ruby's parse.y
class EmacsComment < EditorComment
FORMAT = /-\*-(.+)-\*-/.freeze
REGEXP = /-\*-(.+)-\*-/.freeze
FORMAT = '# -*- %s -*-'
SEPARATOR = ';'
OPERATOR = ':'

def encoding
match('(?:en)?coding')
end

private

def extract_frozen_string_literal
match('frozen[_-]string[_-]literal')
match(KEYWORDS[:frozen_string_literal])
end

def extract_shareable_constant_value
match('shareable[_-]constant[_-]values')
match(KEYWORDS[:shareable_constant_value])
end
end

Expand All @@ -179,9 +197,11 @@ def extract_shareable_constant_value
#
# comment.encoding # => 'ascii-8bit'
class VimComment < EditorComment
FORMAT = /#\s*vim:\s*(.+)/.freeze
REGEXP = /#\s*vim:\s*(.+)/.freeze
FORMAT = '# vim: %s'
SEPARATOR = ', '
OPERATOR = '='
KEYWORDS = MagicComment::KEYWORDS.merge(encoding: 'fileencoding').freeze

# For some reason the fileencoding keyword only works if there
# is at least one other token included in the string. For example
Expand All @@ -193,7 +213,7 @@ class VimComment < EditorComment
# # vim: foo=bar, fileencoding=ascii-8bit
#
def encoding
match('fileencoding') if tokens.size > 1
super if tokens.size > 1
end

# Vim comments cannot specify frozen string literal behavior.
Expand All @@ -219,7 +239,16 @@ def shareable_constant_value; end
class SimpleComment < MagicComment
# Match `encoding` or `coding`
def encoding
extract(/\A\s*\#.*\b(?:en)?coding: (#{TOKEN})/io)
extract(/\A\s*\#.*\b#{KEYWORDS[:encoding]}: (#{TOKEN})/io)
end

# Rewrite the comment without a given token type
def without(type)
if @comment.match?(/\A#\s*#{self.class::KEYWORDS[type.to_sym]}/)
''
else
@comment
end
end

private
Expand All @@ -232,11 +261,11 @@ def encoding
# Case-insensitive and dashes/underscores are acceptable.
# @see https://git.io/vM7Mg
def extract_frozen_string_literal
extract(/\A\s*#\s*frozen[_-]string[_-]literal:\s*(#{TOKEN})\s*\z/io)
extract(/\A\s*#\s*#{KEYWORDS[:frozen_string_literal]}:\s*(#{TOKEN})\s*\z/io)
end

def extract_shareable_constant_value
extract(/\A\s*#\s*shareable[_-]constant[_-]value:\s*(#{TOKEN})\s*\z/io)
extract(/\A\s*#\s*#{KEYWORDS[:shareable_constant_value]}:\s*(#{TOKEN})\s*\z/io)
end
end
end
Expand Down
92 changes: 77 additions & 15 deletions spec/rubocop/cop/style/encoding_spec.rb
@@ -1,19 +1,26 @@
# frozen_string_literal: true

RSpec.describe RuboCop::Cop::Style::Encoding, :config do
it 'registers no offense when no encoding present' do
it 'does not register an offense when no encoding present' do
expect_no_offenses(<<~RUBY)
def foo() end
RUBY
end

it 'registers no offense when encoding present but not UTF-8' do
it 'does not register an offense when encoding present but not UTF-8' do
expect_no_offenses(<<~RUBY)
# encoding: us-ascii
def foo() end
RUBY
end

it 'does not register an offense on a different magic comment type' do
expect_no_offenses(<<~RUBY)
# frozen-string-literal: true
def foo() end
RUBY
end

it 'registers an offense when encoding present and UTF-8' do
expect_offense(<<~RUBY)
# encoding: utf-8
Expand All @@ -40,10 +47,12 @@ def foo() end
RUBY
end

it 'registers an offense for vim-style encoding comments' do
it 'registers an offense and corrects if there are multiple encoding magic comments' do
expect_offense(<<~RUBY)
# vim:filetype=ruby, fileencoding=utf-8
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
# encoding: utf-8
^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
# coding: utf-8
^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
def foo() end
RUBY

Expand All @@ -52,22 +61,75 @@ def foo() end
RUBY
end

it 'registers no offense when encoding is in the wrong place' do
expect_no_offenses(<<~RUBY)
def foo() end
it 'registers an offense and corrects the magic comment follows another magic comment' do
expect_offense(<<~RUBY)
# frozen-string-literal: true
# encoding: utf-8
^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
def foo() end
RUBY

expect_correction(<<~RUBY)
# frozen-string-literal: true
def foo() end
RUBY
end

it 'registers an offense for encoding inserted by magic_encoding gem' do
expect_offense(<<~RUBY)
# -*- encoding : utf-8 -*-
^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
def foo() 'ä' end
it 'does not register an offense when encoding is not at the top of the file' do
expect_no_offenses(<<~RUBY)
# frozen-string-literal: true

# encoding: utf-8
def foo() end
RUBY
end

expect_correction(<<~RUBY)
def foo() 'ä' end
it 'does not register an offense when encoding is in the wrong place' do
expect_no_offenses(<<~RUBY)
def foo() end
# encoding: utf-8
RUBY
end

context 'vim comments' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
# vim:filetype=ruby, fileencoding=utf-8
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
def foo() end
RUBY

expect_correction(<<~RUBY)
# vim: filetype=ruby
def foo() end
RUBY
end
end

context 'emacs comment' do
it 'registers an offense for encoding' do
expect_offense(<<~RUBY)
# -*- encoding : utf-8 -*-
^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
def foo() 'ä' end
RUBY

expect_correction(<<~RUBY)
def foo() 'ä' end
RUBY
end

it 'only removes encoding if there are other editor comments' do
expect_offense(<<~RUBY)
# -*- encoding : utf-8; mode: enh-ruby -*-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Unnecessary utf-8 encoding comment.
def foo() 'ä' end
RUBY

expect_correction(<<~RUBY)
# -*- mode: enh-ruby -*-
def foo() 'ä' end
RUBY
end
end
end