From 6d6c2273fae7384418c83ad053613838d15ed269 Mon Sep 17 00:00:00 2001 From: Benjamin Quorning Date: Tue, 5 May 2020 09:17:32 +0200 Subject: [PATCH 1/4] Simplify NameSimilarity.find_similar_name By explicitly passing the collection of "possible names" when calling `NameSimilarity.find_similar_name`, the calling class doesn't need to implement a `.collect_variable_like_names` method. --- lib/rubocop/cop/lint/redundant_cop_disable_directive.rb | 6 +----- lib/rubocop/cop/lint/useless_assignment.rb | 3 ++- lib/rubocop/name_similarity.rb | 4 ++-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/lib/rubocop/cop/lint/redundant_cop_disable_directive.rb b/lib/rubocop/cop/lint/redundant_cop_disable_directive.rb index 13b6db4666f..b969700f2c8 100644 --- a/lib/rubocop/cop/lint/redundant_cop_disable_directive.rb +++ b/lib/rubocop/cop/lint/redundant_cop_disable_directive.rb @@ -235,7 +235,7 @@ def describe(cop) elsif all_cop_names.include?(cop) "`#{cop}`" else - similar = find_similar_name(cop, []) + similar = find_similar_name(cop, all_cop_names) if similar "`#{cop}` (did you mean `#{similar}`?)" else @@ -244,10 +244,6 @@ def describe(cop) end end - def collect_variable_like_names(scope) - all_cop_names.each { |name| scope << name } - end - def all_cop_names @all_cop_names ||= Cop.registry.names end diff --git a/lib/rubocop/cop/lint/useless_assignment.rb b/lib/rubocop/cop/lint/useless_assignment.rb index 47399f5ed2a..67cd6ae7523 100644 --- a/lib/rubocop/cop/lint/useless_assignment.rb +++ b/lib/rubocop/cop/lint/useless_assignment.rb @@ -94,7 +94,8 @@ def operator_assignment_message(scope, assignment) end def similar_name_message(variable) - similar_name = find_similar_name(variable.name, variable.scope) + variable_like_names = collect_variable_like_names(variable.scope) + similar_name = find_similar_name(variable.name, variable_like_names) " Did you mean `#{similar_name}`?" if similar_name end diff --git a/lib/rubocop/name_similarity.rb b/lib/rubocop/name_similarity.rb index 7b7b95dca50..32a72445904 100644 --- a/lib/rubocop/name_similarity.rb +++ b/lib/rubocop/name_similarity.rb @@ -5,8 +5,8 @@ module RuboCop module NameSimilarity MINIMUM_SIMILARITY_TO_SUGGEST = 0.9 - def find_similar_name(target_name, scope) - names = collect_variable_like_names(scope) + def find_similar_name(target_name, names) + names = names.dup names.delete(target_name) scores = names.each_with_object({}) do |name, hash| From cabda375fc9ab49aa980e51934d73ac5d7455e5c Mon Sep 17 00:00:00 2001 From: Benjamin Quorning Date: Tue, 5 May 2020 09:31:49 +0200 Subject: [PATCH 2/4] Change find_similar_name to a module_function Change `NameSimilarity#find_similar_name` into `NameSimilarity.find_similar_name`, since it no longer needs to call back to a method in the same context. --- lib/rubocop/cop/lint/redundant_cop_disable_directive.rb | 3 +-- lib/rubocop/cop/lint/useless_assignment.rb | 4 ++-- lib/rubocop/name_similarity.rb | 2 ++ 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/rubocop/cop/lint/redundant_cop_disable_directive.rb b/lib/rubocop/cop/lint/redundant_cop_disable_directive.rb index b969700f2c8..2145d4b6a7e 100644 --- a/lib/rubocop/cop/lint/redundant_cop_disable_directive.rb +++ b/lib/rubocop/cop/lint/redundant_cop_disable_directive.rb @@ -26,7 +26,6 @@ module Lint # # good # x += 1 class RedundantCopDisableDirective < Cop - include NameSimilarity include RangeHelp COP_NAME = 'Lint/RedundantCopDisableDirective' @@ -235,7 +234,7 @@ def describe(cop) elsif all_cop_names.include?(cop) "`#{cop}`" else - similar = find_similar_name(cop, all_cop_names) + similar = NameSimilarity.find_similar_name(cop, all_cop_names) if similar "`#{cop}` (did you mean `#{similar}`?)" else diff --git a/lib/rubocop/cop/lint/useless_assignment.rb b/lib/rubocop/cop/lint/useless_assignment.rb index 67cd6ae7523..793799fe425 100644 --- a/lib/rubocop/cop/lint/useless_assignment.rb +++ b/lib/rubocop/cop/lint/useless_assignment.rb @@ -31,7 +31,6 @@ module Lint # do_something(some_var) # end class UselessAssignment < Cop - include NameSimilarity MSG = 'Useless assignment to variable - `%s`.' def join_force?(force_class) @@ -95,7 +94,8 @@ def operator_assignment_message(scope, assignment) def similar_name_message(variable) variable_like_names = collect_variable_like_names(variable.scope) - similar_name = find_similar_name(variable.name, variable_like_names) + similar_name = NameSimilarity.find_similar_name(variable.name, + variable_like_names) " Did you mean `#{similar_name}`?" if similar_name end diff --git a/lib/rubocop/name_similarity.rb b/lib/rubocop/name_similarity.rb index 32a72445904..7cb97f80eb2 100644 --- a/lib/rubocop/name_similarity.rb +++ b/lib/rubocop/name_similarity.rb @@ -3,6 +3,8 @@ module RuboCop # Common functionality for finding names that are similar to a given name. module NameSimilarity + module_function + MINIMUM_SIMILARITY_TO_SUGGEST = 0.9 def find_similar_name(target_name, names) From 84ee8dbf505e7ebaa29890d8134f37041c96dd6f Mon Sep 17 00:00:00 2001 From: Benjamin Quorning Date: Tue, 5 May 2020 09:23:50 +0200 Subject: [PATCH 3/4] Use DidYouMean Simplify `NameSimilarity#find_similar_name` by using the built-in `did_you_mean` gem instead of `StringUtil.similarity`. --- lib/rubocop/name_similarity.rb | 9 +++------ spec/fixtures/html_formatter/expected.html | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/lib/rubocop/name_similarity.rb b/lib/rubocop/name_similarity.rb index 7cb97f80eb2..7ec2bf90551 100644 --- a/lib/rubocop/name_similarity.rb +++ b/lib/rubocop/name_similarity.rb @@ -11,13 +11,10 @@ def find_similar_name(target_name, names) names = names.dup names.delete(target_name) - scores = names.each_with_object({}) do |name, hash| - score = StringUtil.similarity(target_name, name) - hash[name] = score if score >= MINIMUM_SIMILARITY_TO_SUGGEST - end + spell_checker = DidYouMean::SpellChecker.new(dictionary: names) + similar_names = spell_checker.correct(target_name) - most_similar_name, _max_score = scores.max_by { |_, score| score } - most_similar_name + similar_names.first end end end diff --git a/spec/fixtures/html_formatter/expected.html b/spec/fixtures/html_formatter/expected.html index 5c2f37859e6..d691d610d48 100644 --- a/spec/fixtures/html_formatter/expected.html +++ b/spec/fixtures/html_formatter/expected.html @@ -658,7 +658,7 @@

RuboCop Inspection Report

Line #3warning: - Lint/UselessAssignment: Useless assignment to variable - bar. + Lint/UselessAssignment: Useless assignment to variable - bar. Did you mean baz?
    foo = bar = baz
From 332e66b9088ff247fff16032ba03cd195b245323 Mon Sep 17 00:00:00 2001 From: Benjamin Quorning Date: Tue, 5 May 2020 09:31:29 +0200 Subject: [PATCH 4/4] Remove dependency on `jaro_winkler` Removing `StringUtil` as well, by adding a `NameSimilarity.find_similar_names` method. --- CHANGELOG.md | 1 + lib/rubocop.rb | 1 - lib/rubocop/name_similarity.rb | 10 ++++-- lib/rubocop/options.rb | 5 +-- lib/rubocop/string_util.rb | 14 -------- rubocop.gemspec | 1 - spec/rubocop/string_util_spec.rb | 61 -------------------------------- 7 files changed, 9 insertions(+), 84 deletions(-) delete mode 100644 lib/rubocop/string_util.rb delete mode 100644 spec/rubocop/string_util_spec.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index de52dcf4498..4c1d4280d42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ * [#7390](https://github.com/rubocop-hq/rubocop/issues/7390): **(Breaking)** Enabling a cop overrides disabling its department. ([@jonas054][]) * [#7936](https://github.com/rubocop-hq/rubocop/issues/7936): Mark `Lint/BooleanSymbol` as unsafe. ([@laurmurclar][]) * [#7948](https://github.com/rubocop-hq/rubocop/pull/7948): Mark unsafe for `Style/OptionalArguments`. ([@koic][]) +* [#7931](https://github.com/rubocop-hq/rubocop/pull/7931): Remove dependency on the `jaro_winkler` gem, instead depending on `did_you_mean`. This may be a breaking change for RuboCop libraries calling `NameSimilarity#find_similar_name`. ([@bquorning][]) ## 0.82.0 (2020-04-16) diff --git a/lib/rubocop.rb b/lib/rubocop.rb index 2e0392ea2e4..fcc20dab00b 100644 --- a/lib/rubocop.rb +++ b/lib/rubocop.rb @@ -14,7 +14,6 @@ require_relative 'rubocop/path_util' require_relative 'rubocop/file_finder' require_relative 'rubocop/platform' -require_relative 'rubocop/string_util' require_relative 'rubocop/name_similarity' require_relative 'rubocop/node_pattern' require_relative 'rubocop/string_interpreter' diff --git a/lib/rubocop/name_similarity.rb b/lib/rubocop/name_similarity.rb index 7ec2bf90551..917d87bd2f8 100644 --- a/lib/rubocop/name_similarity.rb +++ b/lib/rubocop/name_similarity.rb @@ -5,16 +5,20 @@ module RuboCop module NameSimilarity module_function - MINIMUM_SIMILARITY_TO_SUGGEST = 0.9 - def find_similar_name(target_name, names) + similar_names = find_similar_names(target_name, names) + + similar_names.first + end + + def find_similar_names(target_name, names) names = names.dup names.delete(target_name) spell_checker = DidYouMean::SpellChecker.new(dictionary: names) similar_names = spell_checker.correct(target_name) - similar_names.first + similar_names end end end diff --git a/lib/rubocop/options.rb b/lib/rubocop/options.rb index 53a6e799d4c..9926cf0e120 100644 --- a/lib/rubocop/options.rb +++ b/lib/rubocop/options.rb @@ -248,10 +248,7 @@ def validate_cop_list(names) def format_message_from(name, cop_names) message = 'Unrecognized cop or department: %s.' message_with_candidate = "%s\nDid you mean? %s" - corrections = cop_names.select do |cn| - score = StringUtil.similarity(cn, name) - score >= NameSimilarity::MINIMUM_SIMILARITY_TO_SUGGEST - end.sort + corrections = NameSimilarity.find_similar_names(name, cop_names) if corrections.empty? format(message, name: name) diff --git a/lib/rubocop/string_util.rb b/lib/rubocop/string_util.rb deleted file mode 100644 index d8464616db8..00000000000 --- a/lib/rubocop/string_util.rb +++ /dev/null @@ -1,14 +0,0 @@ -# frozen_string_literal: true - -require 'jaro_winkler' - -module RuboCop - # This module provides approximate string matching methods. - module StringUtil - module_function - - def similarity(string_a, string_b) - JaroWinkler.distance(string_a.to_s, string_b.to_s) - end - end -end diff --git a/rubocop.gemspec b/rubocop.gemspec index 5cfb6e3b9e4..0bc2bf4e449 100644 --- a/rubocop.gemspec +++ b/rubocop.gemspec @@ -33,7 +33,6 @@ Gem::Specification.new do |s| 'bug_tracker_uri' => 'https://github.com/rubocop-hq/rubocop/issues' } - s.add_runtime_dependency('jaro_winkler', '~> 1.5.1') s.add_runtime_dependency('parallel', '~> 1.10') s.add_runtime_dependency('parser', '>= 2.7.0.1') s.add_runtime_dependency('rainbow', '>= 2.2.2', '< 4.0') diff --git a/spec/rubocop/string_util_spec.rb b/spec/rubocop/string_util_spec.rb deleted file mode 100644 index 41d27e6a0a9..00000000000 --- a/spec/rubocop/string_util_spec.rb +++ /dev/null @@ -1,61 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe RuboCop::StringUtil do - { - # These samples are derived from Apache Lucene project. - # https://github.com/apache/lucene-solr/blob/LUCENE-6989-v2/lucene/suggest/src/test/org/apache/lucene/search/spell/TestJaroWinklerDistance.java - %w[al al] => 1.000, - %w[martha marhta] => 0.961, - %w[jones johnson] => 0.832, - %w[abcvwxyz cabvwxyz] => 0.958, - %w[dwayne duane] => 0.840, - %w[dixon dicksonx] => 0.813, - %w[fvie ten] => 0.000, - # These are from Rich Milne. - # https://github.com/richmilne/JaroWinkler/blob/master/jaro/jaro_tests.py - %w[SHACKLEFORD SHACKELFORD] => 0.98182, - %w[DUNNINGHAM CUNNIGHAM] => 0.89630, - %w[NICHLESON NICHULSON] => 0.95556, - %w[MASSEY MASSIE] => 0.93333, - %w[ABROMS ABRAMS] => 0.92222, - %w[HARDIN MARTINEZ] => 0.72222, - %w[ITMAN SMITH] => 0.46667, - %w[JERALDINE GERALDINE] => 0.92593, - %w[MICHELLE MICHAEL] => 0.92143, - %w[JULIES JULIUS] => 0.93333, - %w[TANYA TONYA] => 0.88000, - %w[SEAN SUSAN] => 0.80500, - %w[JON JOHN] => 0.93333, - %w[JON JAN] => 0.80000, - %w[DWAYNE DYUANE] => 0.84000, - %w[CRATE TRACE] => 0.73333, - %w[WIBBELLY WOBRELBLY] => 0.85298, - %w[MARHTA MARTHA] => 0.96111, - %w[aaaaaabc aaaaaabd] => 0.95000, - %w[ABCAWXYZ BCAWXYZ] => 0.91071, - %w[ABCVWXYZ CBAWXYZ] => 0.91071, - %w[ABCDUVWXYZ DABCUVWXYZ] => 0.93333, - %w[ABCDUVWXYZ DBCAUVWXYZ] => 0.96667, - %w[ABBBUVWXYZ BBBAUVWXYZ] => 0.96667, - %w[ABCDUV11lLZ DBCAUVWXYZ] => 0.73117, - %w[ABBBUVWXYZ BBB11L3VWXZ] => 0.77879, - %w[A A] => 1.00000, - %w[AB AB] => 1.00000, - %w[ABC ABC] => 1.00000, - %w[ABCD ABCD] => 1.00000, - %w[ABCDE ABCDE] => 1.00000, - %w[AA AA] => 1.00000, - %w[AAA AAA] => 1.00000, - %w[AAAA AAAA] => 1.00000, - %w[AAAAA AAAAA] => 1.00000, - %w[A B] => 0.00000 - }.each do |strings, expected| - context "with #{strings.first.inspect} and #{strings.last.inspect}" do - subject(:distance) { described_class.similarity(*strings) } - - it "returns #{expected}" do - expect(distance).to be_within(0.001).of(expected) - end - end - end -end