Add new Naming/InclusiveLanguage cop (#9842)

Recommends the use of inclusive language instead of problematic terms.
rubocop · Jun 23, 2021 · 609cd7f · 609cd7f
1 parent 32e0522
commit 609cd7f
Show file tree

Hide file tree

Showing 8 changed files with 719 additions and 1 deletion.
diff --git a/.rubocop.yml b/.rubocop.yml
@@ -97,6 +97,23 @@ Metrics/ModuleLength:
   Exclude:
     - 'spec/**/*.rb'
 
+Naming/InclusiveLanguage:
+  Exclude:
+    - lib/rubocop/cop/naming/inclusive_language.rb
+    - spec/rubocop/cop/naming/inclusive_language_spec.rb
+  FlaggedTerms:
+    whitelist:
+      Suggestions:
+        - allowlist
+    blacklist:
+      Suggestions:
+        - denylist
+    master:
+      AllowedRegex:
+        - 'blob/master/'
+        - 'master \(unreleased\)'
+        - !ruby/regexp "/ENV\\['RUBOCOP_VERSION'\\] == 'master'/"
+
 RSpec/FilePath:
   Exclude:
     - spec/rubocop/cop/internal_affairs/redundant_let_rubocop_config_new_spec.rb

diff --git a/changelog/new_add_naming_inclusive_language_cop.md b/changelog/new_add_naming_inclusive_language_cop.md
@@ -0,0 +1 @@
+* [#9842](https://github.com/rubocop/rubocop/pull/9842): Add new `Naming/InclusiveLanguage` cop. ([@tjwp][])
diff --git a/config/default.yml b/config/default.yml
@@ -2534,6 +2534,36 @@ Naming/HeredocDelimiterNaming:
   ForbiddenDelimiters:
     - !ruby/regexp '/(^|\s)(EO[A-Z]{1}|END)(\s|$)/'
 
+Naming/InclusiveLanguage:
+  Description: 'Recommend the use of inclusive language instead of problematic terms.'
+  Enabled: pending
+  VersionAdded: '<<next>>'
+  CheckIdentifiers: true
+  CheckConstants: true
+  CheckVariables: true
+  CheckStrings: true
+  CheckSymbols: true
+  CheckComments: true
+  CheckFilepaths: true
+  FlaggedTerms:
+    whitelist:
+      Regex: !ruby/regexp '/white[-_\s]?list/'
+      Suggestions:
+        - allowlist
+        - permit
+    blacklist:
+      Regex: !ruby/regexp '/black[-_\s]?list/'
+      Suggestions:
+        - denylist
+        - block
+    master:
+      Suggestions: ['main', 'primary', 'leader']
+      AllowedRegex:
+        - 'Master of None'
+        - 'Master Boot Record'
+    slave:
+      Suggestions: ['replica', 'secondary', 'follower']
+
 Naming/MemoizedInstanceVariableName:
   Description: >-
     Memoized method name should match memo instance variable name.

diff --git a/lib/rubocop.rb b/lib/rubocop.rb
@@ -401,6 +401,7 @@
 require_relative 'rubocop/cop/naming/file_name'
 require_relative 'rubocop/cop/naming/heredoc_delimiter_case'
 require_relative 'rubocop/cop/naming/heredoc_delimiter_naming'
+require_relative 'rubocop/cop/naming/inclusive_language'
 require_relative 'rubocop/cop/naming/memoized_instance_variable_name'
 require_relative 'rubocop/cop/naming/method_name'
 require_relative 'rubocop/cop/naming/method_parameter_name'

diff --git a/lib/rubocop/cop/naming/inclusive_language.rb b/lib/rubocop/cop/naming/inclusive_language.rb
@@ -0,0 +1,249 @@
+# frozen_string_literal: true
+
+module RuboCop
+  module Cop
+    module Naming
+      # This cops recommends the use of inclusive language instead of problematic terms.
+      # The cop can check the following locations for offenses:
+      # - identifiers
+      # - constants
+      # - variables
+      # - strings
+      # - symbols
+      # - comments
+      # - file paths
+      # Each of these locations can be individually enabled/disabled via configuration,
+      # for example CheckIdentifiers = true/false.
+      #
+      # Flagged terms are configurable for the cop. For each flagged term an optional
+      # Regex can be specified to identify offenses. Suggestions for replacing a flagged term can
+      # be configured and will be displayed as part of the offense message.
+      # An AllowedRegex can be specified for a flagged term to exempt allowed uses of the term.
+      #
+      # @example FlaggedTerms: { whitelist: { Suggestions: ['allowlist'] } }
+      #   # Suggest replacing identifier whitelist with allowlist
+      #
+      #   # bad
+      #   whitelist_users = %w(user1 user1)
+      #
+      #   # good
+      #   allowlist_users = %w(user1 user2)
+      #
+      # @example FlaggedTerms: { master: { Suggestions: ['main', 'primary', 'leader'] } }
+      #   # Suggest replacing master in an instance variable name with main, primary, or leader
+      #
+      #   # bad
+      #   @master_node = 'node1.example.com'
+      #
+      #   # good
+      #   @primary_node = 'node1.example.com'
+      #
+      # @example FlaggedTerms: { whitelist: { Regex: !ruby/regexp '/white[-_\s]?list' } }
+      #   # Identify problematic terms using a Regexp
+      #
+      #   # bad
+      #   white_list = %w(user1 user2)
+      #
+      #   # good
+      #   allow_list = %w(user1 user2)
+      #
+      # @example FlaggedTerms: { master: { AllowedRegex: 'master\'?s degree' } }
+      #   # Specify allowed uses of the flagged term as a string or regexp.
+      #
+      #   # bad
+      #   # They had a masters
+      #
+      #   # good
+      #   # They had a master's degree
+      #
+      class InclusiveLanguage < Base
+        include RangeHelp
+
+        EMPTY_ARRAY = [].freeze
+
+        WordLocation = Struct.new(:word, :position)
+
+        def initialize(config = nil, options = nil)
+          super
+          @flagged_term_hash = {}
+          @flagged_terms_regex = nil
+          @allowed_regex = nil
+          @check_token = preprocess_check_config
+          preprocess_flagged_terms
+        end
+
+        def on_new_investigation
+          investigate_filepath if cop_config['CheckFilepaths']
+          investigate_tokens
+        end
+
+        private
+
+        def investigate_tokens
+          processed_source.each_token do |token|
+            next unless check_token?(token.type)
+
+            word_locations = scan_for_words(token.text)
+            next if word_locations.empty?
+
+            add_offenses_for_token(token, word_locations)
+          end
+        end
+
+        def add_offenses_for_token(token, word_locations)
+          word_locations.each do |word_location|
+            start_position = token.pos.begin_pos + token.pos.source.index(word_location.word)
+            range = range_between(start_position, start_position + word_location.word.length)
+            add_offense(range, message: create_message(word_location.word))
+          end
+        end
+
+        def check_token?(type)
+          !!@check_token[type]
+        end
+
+        def preprocess_check_config # rubocop:disable Metrics/AbcSize
+          {
+            tIDENTIFIER: cop_config['CheckIdentifiers'],
+            tCONSTANT: cop_config['CheckConstants'],
+            tIVAR: cop_config['CheckVariables'],
+            tCVAR: cop_config['CheckVariables'],
+            tGVAR: cop_config['CheckVariables'],
+            tSYMBOL: cop_config['CheckSymbols'],
+            tSTRING: cop_config['CheckStrings'],
+            tSTRING_CONTENT: cop_config['CheckStrings'],
+            tCOMMENT: cop_config['CheckComments']
+          }.freeze
+        end
+
+        def preprocess_flagged_terms
+          allowed_strings = []
+          flagged_term_strings = []
+          cop_config['FlaggedTerms'].each do |term, term_definition|
+            next if term_definition.nil?
+
+            allowed_strings.concat(process_allowed_regex(term_definition['AllowedRegex']))
+            regex_string = ensure_regex_string(term_definition['Regex'] || term)
+            flagged_term_strings << regex_string
+
+            add_to_flagged_term_hash(regex_string, term, term_definition)
+          end
+
+          set_regexes(flagged_term_strings, allowed_strings)
+        end
+
+        def add_to_flagged_term_hash(regex_string, term, term_definition)
+          @flagged_term_hash[Regexp.new(regex_string, Regexp::IGNORECASE)] =
+            term_definition.merge('Term' => term,
+                                  'SuggestionString' =>
+                                    preprocess_suggestions(term_definition['Suggestions']))
+        end
+
+        def set_regexes(flagged_term_strings, allowed_strings)
+          @flagged_terms_regex = array_to_ignorecase_regex(flagged_term_strings)
+          @allowed_regex = array_to_ignorecase_regex(allowed_strings) unless allowed_strings.empty?
+        end
+
+        def process_allowed_regex(allowed)
+          return EMPTY_ARRAY if allowed.nil?
+
+          Array(allowed).map do |allowed_term|
+            next if allowed_term.is_a?(String) && allowed_term.strip.empty?
+
+            ensure_regex_string(allowed_term)
+          end
+        end
+
+        def ensure_regex_string(regex)
+          regex.is_a?(Regexp) ? regex.source : regex
+        end
+
+        def array_to_ignorecase_regex(strings)
+          Regexp.new(strings.join('|'), Regexp::IGNORECASE)
+        end
+
+        def investigate_filepath
+          word_locations = scan_for_words(processed_source.file_path)
+
+          case word_locations.length
+          when 0
+            return
+          when 1
+            message = create_single_word_message_for_file(word_locations.first.word)
+          else
+            words = word_locations.map(&:word)
+            message = create_multiple_word_message_for_file(words)
+          end
+
+          range = source_range(processed_source.buffer, 1, 0)
+          add_offense(range, message: message)
+        end
+
+        def create_single_word_message_for_file(word)
+          create_message(word).sub(/\.$/, ' in file path.')
+        end
+
+        def create_multiple_word_message_for_file(words)
+          quoted_words = words.map { |word| "'#{word}'" }
+          "Consider replacing problematic terms #{quoted_words.join(', ')} in file path."
+        end
+
+        def scan_for_words(input)
+          mask_input(input).enum_for(:scan, @flagged_terms_regex).map do
+            match = Regexp.last_match
+            WordLocation.new(match.to_s, match.offset(0).first)
+          end
+        end
+
+        def mask_input(str)
+          return str if @allowed_regex.nil?
+
+          safe_str = if str.valid_encoding?
+                       str
+                     else
+                       str.encode('UTF-8', invalid: :replace, undef: :replace)
+                     end
+          safe_str.gsub(@allowed_regex) { |match| '*' * match.size }
+        end
+
+        def create_message(word)
+          flagged_term = find_flagged_term(word)
+          "Consider replacing problematic term '#{word}'#{flagged_term['SuggestionString']}."
+        end
+
+        def find_flagged_term(word)
+          _regexp, flagged_term = @flagged_term_hash.find do |key, _term|
+            key.match?(word)
+          end
+          flagged_term
+        end
+
+        def create_message_for_file(word)
+          create_message(word).sub(/\.$/, ' in file path.')
+        end
+
+        def preprocess_suggestions(suggestions)
+          return '' if suggestions.nil? ||
+                       (suggestions.is_a?(String) && suggestions.strip.empty?) || suggestions.empty?
+
+          format_suggestions(suggestions)
+        end
+
+        def format_suggestions(suggestions)
+          quoted_suggestions = Array(suggestions).map { |word| "'#{word}'" }
+          suggestion_str = case quoted_suggestions.size
+                           when 1
+                             quoted_suggestions.first
+                           when 2
+                             quoted_suggestions.join(' or ')
+                           else
+                             last_quoted = quoted_suggestions.pop
+                             quoted_suggestions << "or #{last_quoted}"
+                             quoted_suggestions.join(', ')
+                           end
+          " with #{suggestion_str}"
+        end
+      end
+    end
+  end
+end
diff --git a/spec/rubocop/cli/options_spec.rb b/spec/rubocop/cli/options_spec.rb
@@ -505,6 +505,8 @@ class SomeCop < Cop
                 Enabled: false
               Gemspec:
                 Enabled: false
+              Naming:
+                Enabled: false
 
               Style/SomeCop:
                 Description: Something