From 332e66b9088ff247fff16032ba03cd195b245323 Mon Sep 17 00:00:00 2001 From: Benjamin Quorning Date: Tue, 5 May 2020 09:31:29 +0200 Subject: [PATCH] Remove dependency on `jaro_winkler` Removing `StringUtil` as well, by adding a `NameSimilarity.find_similar_names` method. --- CHANGELOG.md | 1 + lib/rubocop.rb | 1 - lib/rubocop/name_similarity.rb | 10 ++++-- lib/rubocop/options.rb | 5 +-- lib/rubocop/string_util.rb | 14 -------- rubocop.gemspec | 1 - spec/rubocop/string_util_spec.rb | 61 -------------------------------- 7 files changed, 9 insertions(+), 84 deletions(-) delete mode 100644 lib/rubocop/string_util.rb delete mode 100644 spec/rubocop/string_util_spec.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index de52dcf4498..4c1d4280d42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ * [#7390](https://github.com/rubocop-hq/rubocop/issues/7390): **(Breaking)** Enabling a cop overrides disabling its department. ([@jonas054][]) * [#7936](https://github.com/rubocop-hq/rubocop/issues/7936): Mark `Lint/BooleanSymbol` as unsafe. ([@laurmurclar][]) * [#7948](https://github.com/rubocop-hq/rubocop/pull/7948): Mark unsafe for `Style/OptionalArguments`. ([@koic][]) +* [#7931](https://github.com/rubocop-hq/rubocop/pull/7931): Remove dependency on the `jaro_winkler` gem, instead depending on `did_you_mean`. This may be a breaking change for RuboCop libraries calling `NameSimilarity#find_similar_name`. ([@bquorning][]) ## 0.82.0 (2020-04-16) diff --git a/lib/rubocop.rb b/lib/rubocop.rb index 2e0392ea2e4..fcc20dab00b 100644 --- a/lib/rubocop.rb +++ b/lib/rubocop.rb @@ -14,7 +14,6 @@ require_relative 'rubocop/path_util' require_relative 'rubocop/file_finder' require_relative 'rubocop/platform' -require_relative 'rubocop/string_util' require_relative 'rubocop/name_similarity' require_relative 'rubocop/node_pattern' require_relative 'rubocop/string_interpreter' diff --git a/lib/rubocop/name_similarity.rb b/lib/rubocop/name_similarity.rb index 7ec2bf90551..917d87bd2f8 100644 --- a/lib/rubocop/name_similarity.rb +++ b/lib/rubocop/name_similarity.rb @@ -5,16 +5,20 @@ module RuboCop module NameSimilarity module_function - MINIMUM_SIMILARITY_TO_SUGGEST = 0.9 - def find_similar_name(target_name, names) + similar_names = find_similar_names(target_name, names) + + similar_names.first + end + + def find_similar_names(target_name, names) names = names.dup names.delete(target_name) spell_checker = DidYouMean::SpellChecker.new(dictionary: names) similar_names = spell_checker.correct(target_name) - similar_names.first + similar_names end end end diff --git a/lib/rubocop/options.rb b/lib/rubocop/options.rb index 53a6e799d4c..9926cf0e120 100644 --- a/lib/rubocop/options.rb +++ b/lib/rubocop/options.rb @@ -248,10 +248,7 @@ def validate_cop_list(names) def format_message_from(name, cop_names) message = 'Unrecognized cop or department: %s.' message_with_candidate = "%s\nDid you mean? %s" - corrections = cop_names.select do |cn| - score = StringUtil.similarity(cn, name) - score >= NameSimilarity::MINIMUM_SIMILARITY_TO_SUGGEST - end.sort + corrections = NameSimilarity.find_similar_names(name, cop_names) if corrections.empty? format(message, name: name) diff --git a/lib/rubocop/string_util.rb b/lib/rubocop/string_util.rb deleted file mode 100644 index d8464616db8..00000000000 --- a/lib/rubocop/string_util.rb +++ /dev/null @@ -1,14 +0,0 @@ -# frozen_string_literal: true - -require 'jaro_winkler' - -module RuboCop - # This module provides approximate string matching methods. - module StringUtil - module_function - - def similarity(string_a, string_b) - JaroWinkler.distance(string_a.to_s, string_b.to_s) - end - end -end diff --git a/rubocop.gemspec b/rubocop.gemspec index 5cfb6e3b9e4..0bc2bf4e449 100644 --- a/rubocop.gemspec +++ b/rubocop.gemspec @@ -33,7 +33,6 @@ Gem::Specification.new do |s| 'bug_tracker_uri' => 'https://github.com/rubocop-hq/rubocop/issues' } - s.add_runtime_dependency('jaro_winkler', '~> 1.5.1') s.add_runtime_dependency('parallel', '~> 1.10') s.add_runtime_dependency('parser', '>= 2.7.0.1') s.add_runtime_dependency('rainbow', '>= 2.2.2', '< 4.0') diff --git a/spec/rubocop/string_util_spec.rb b/spec/rubocop/string_util_spec.rb deleted file mode 100644 index 41d27e6a0a9..00000000000 --- a/spec/rubocop/string_util_spec.rb +++ /dev/null @@ -1,61 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe RuboCop::StringUtil do - { - # These samples are derived from Apache Lucene project. - # https://github.com/apache/lucene-solr/blob/LUCENE-6989-v2/lucene/suggest/src/test/org/apache/lucene/search/spell/TestJaroWinklerDistance.java - %w[al al] => 1.000, - %w[martha marhta] => 0.961, - %w[jones johnson] => 0.832, - %w[abcvwxyz cabvwxyz] => 0.958, - %w[dwayne duane] => 0.840, - %w[dixon dicksonx] => 0.813, - %w[fvie ten] => 0.000, - # These are from Rich Milne. - # https://github.com/richmilne/JaroWinkler/blob/master/jaro/jaro_tests.py - %w[SHACKLEFORD SHACKELFORD] => 0.98182, - %w[DUNNINGHAM CUNNIGHAM] => 0.89630, - %w[NICHLESON NICHULSON] => 0.95556, - %w[MASSEY MASSIE] => 0.93333, - %w[ABROMS ABRAMS] => 0.92222, - %w[HARDIN MARTINEZ] => 0.72222, - %w[ITMAN SMITH] => 0.46667, - %w[JERALDINE GERALDINE] => 0.92593, - %w[MICHELLE MICHAEL] => 0.92143, - %w[JULIES JULIUS] => 0.93333, - %w[TANYA TONYA] => 0.88000, - %w[SEAN SUSAN] => 0.80500, - %w[JON JOHN] => 0.93333, - %w[JON JAN] => 0.80000, - %w[DWAYNE DYUANE] => 0.84000, - %w[CRATE TRACE] => 0.73333, - %w[WIBBELLY WOBRELBLY] => 0.85298, - %w[MARHTA MARTHA] => 0.96111, - %w[aaaaaabc aaaaaabd] => 0.95000, - %w[ABCAWXYZ BCAWXYZ] => 0.91071, - %w[ABCVWXYZ CBAWXYZ] => 0.91071, - %w[ABCDUVWXYZ DABCUVWXYZ] => 0.93333, - %w[ABCDUVWXYZ DBCAUVWXYZ] => 0.96667, - %w[ABBBUVWXYZ BBBAUVWXYZ] => 0.96667, - %w[ABCDUV11lLZ DBCAUVWXYZ] => 0.73117, - %w[ABBBUVWXYZ BBB11L3VWXZ] => 0.77879, - %w[A A] => 1.00000, - %w[AB AB] => 1.00000, - %w[ABC ABC] => 1.00000, - %w[ABCD ABCD] => 1.00000, - %w[ABCDE ABCDE] => 1.00000, - %w[AA AA] => 1.00000, - %w[AAA AAA] => 1.00000, - %w[AAAA AAAA] => 1.00000, - %w[AAAAA AAAAA] => 1.00000, - %w[A B] => 0.00000 - }.each do |strings, expected| - context "with #{strings.first.inspect} and #{strings.last.inspect}" do - subject(:distance) { described_class.similarity(*strings) } - - it "returns #{expected}" do - expect(distance).to be_within(0.001).of(expected) - end - end - end -end