From deeee25e6a99c345b7aa3f0a562656e1f0de7bee Mon Sep 17 00:00:00 2001 From: Rena Watson Date: Fri, 5 Jun 2020 18:35:56 +0300 Subject: [PATCH] Add new `Performance/IoReadlines` cop --- CHANGELOG.md | 1 + config/default.yml | 6 + docs/modules/ROOT/pages/cops.adoc | 1 + docs/modules/ROOT/pages/cops_performance.adoc | 40 ++++++ lib/rubocop/cop/performance/io_readlines.rb | 127 ++++++++++++++++++ lib/rubocop/cop/performance_cops.rb | 1 + .../cop/performance/io_readlines_spec.rb | 62 +++++++++ 7 files changed, 238 insertions(+) create mode 100644 lib/rubocop/cop/performance/io_readlines.rb create mode 100644 spec/rubocop/cop/performance/io_readlines_spec.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 937a165..d7768e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### New features +* [#127](https://github.com/rubocop-hq/rubocop-performance/pull/127): Add new `Performance/IoReadlines` cop. ([@fatkodima][]) * [#128](https://github.com/rubocop-hq/rubocop-performance/pull/128): Add new `Performance/ReverseFirst` cop. ([@fatkodima][]) * [#132](https://github.com/rubocop-hq/rubocop-performance/issues/132): Add new `Performance/RedundantSortBlock` cop. ([@fatkodima][]) * [#125](https://github.com/rubocop-hq/rubocop-performance/pull/125): Support `Array()` and `Hash()` methods for `Performance/Size` cop. ([@fatkodima][]) diff --git a/config/default.yml b/config/default.yml index 84393e3..5e77b96 100644 --- a/config/default.yml +++ b/config/default.yml @@ -142,6 +142,12 @@ Performance/InefficientHashSearch: VersionAdded: '0.56' Safe: false +Performance/IoReadlines: + Description: 'Use `IO.each_line` (`IO#each_line`) instead of `IO.readlines` (`IO#readlines`).' + Reference: 'https://docs.gitlab.com/ee/development/performance.html#reading-from-files-and-other-data-sources' + Enabled: false + VersionAdded: '1.7' + Performance/OpenStruct: Description: 'Use `Struct` instead of `OpenStruct`.' Enabled: false diff --git a/docs/modules/ROOT/pages/cops.adoc b/docs/modules/ROOT/pages/cops.adoc index 481188c..8dbfe52 100644 --- a/docs/modules/ROOT/pages/cops.adoc +++ b/docs/modules/ROOT/pages/cops.adoc @@ -19,6 +19,7 @@ * xref:cops_performance.adoc#performancefixedsize[Performance/FixedSize] * xref:cops_performance.adoc#performanceflatmap[Performance/FlatMap] * xref:cops_performance.adoc#performanceinefficienthashsearch[Performance/InefficientHashSearch] +* xref:cops_performance.adoc#performanceioreadlines[Performance/IoReadlines] * xref:cops_performance.adoc#performanceopenstruct[Performance/OpenStruct] * xref:cops_performance.adoc#performancerangeinclude[Performance/RangeInclude] * xref:cops_performance.adoc#performanceredundantblockcall[Performance/RedundantBlockCall] diff --git a/docs/modules/ROOT/pages/cops_performance.adoc b/docs/modules/ROOT/pages/cops_performance.adoc index 6b84a1c..b758602 100644 --- a/docs/modules/ROOT/pages/cops_performance.adoc +++ b/docs/modules/ROOT/pages/cops_performance.adoc @@ -826,6 +826,46 @@ h = { a: 1, b: 2 }; h.value?(nil) * https://github.com/JuanitoFatas/fast-ruby#hashkey-instead-of-hashkeysinclude-code +== Performance/IoReadlines + +|=== +| Enabled by default | Safe | Supports autocorrection | VersionAdded | VersionChanged + +| Disabled +| Yes +| Yes +| 1.7 +| - +|=== + +This cop identifies places where inefficient `readlines` method +can be replaced by `each_line` to avoid fully loading file content into memory. + +=== Examples + +[source,ruby] +---- +# bad +File.readlines('testfile').each { |l| puts l } +IO.readlines('testfile', chomp: true).each { |l| puts l } + +conn.readlines(10).map { |l| l.size } +file.readlines.find { |l| l.start_with?('#') } +file.readlines.each { |l| puts l } + +# good +File.open('testfile', 'r').each_line { |l| puts l } +IO.open('testfile').each_line(chomp: true) { |l| puts l } + +conn.each_line(10).map { |l| l.size } +file.each_line.find { |l| l.start_with?('#') } +file.each_line { |l| puts l } +---- + +=== References + +* https://docs.gitlab.com/ee/development/performance.html#reading-from-files-and-other-data-sources + == Performance/OpenStruct |=== diff --git a/lib/rubocop/cop/performance/io_readlines.rb b/lib/rubocop/cop/performance/io_readlines.rb new file mode 100644 index 0000000..33e29ff --- /dev/null +++ b/lib/rubocop/cop/performance/io_readlines.rb @@ -0,0 +1,127 @@ +# frozen_string_literal: true + +module RuboCop + module Cop + module Performance + # This cop identifies places where inefficient `readlines` method + # can be replaced by `each_line` to avoid fully loading file content into memory. + # + # @example + # + # # bad + # File.readlines('testfile').each { |l| puts l } + # IO.readlines('testfile', chomp: true).each { |l| puts l } + # + # conn.readlines(10).map { |l| l.size } + # file.readlines.find { |l| l.start_with?('#') } + # file.readlines.each { |l| puts l } + # + # # good + # File.open('testfile', 'r').each_line { |l| puts l } + # IO.open('testfile').each_line(chomp: true) { |l| puts l } + # + # conn.each_line(10).map { |l| l.size } + # file.each_line.find { |l| l.start_with?('#') } + # file.each_line { |l| puts l } + # + class IoReadlines < Cop + include RangeHelp + + MSG = 'Use `%s` instead of `%s`.' + ENUMERABLE_METHODS = (Enumerable.instance_methods + [:each]).freeze + + def_node_matcher :readlines_on_class?, <<~PATTERN + $(send $(send (const nil? {:IO :File}) :readlines ...) #enumerable_method?) + PATTERN + + def_node_matcher :readlines_on_instance?, <<~PATTERN + $(send $(send ${nil? !const_type?} :readlines ...) #enumerable_method? ...) + PATTERN + + def on_send(node) + readlines_on_class?(node) do |enumerable_call, readlines_call| + offense(node, enumerable_call, readlines_call) + end + + readlines_on_instance?(node) do |enumerable_call, readlines_call, _| + offense(node, enumerable_call, readlines_call) + end + end + + def autocorrect(node) + readlines_on_instance?(node) do |enumerable_call, readlines_call, receiver| + # We cannot safely correct `.readlines` method called on IO/File classes + # due to its signature and we are not sure with implicit receiver + # if it is called in the context of some instance or mentioned class. + return if receiver.nil? + + lambda do |corrector| + range = correction_range(enumerable_call, readlines_call) + + if readlines_call.arguments? + call_args = build_call_args(readlines_call.arguments) + replacement = "each_line(#{call_args})" + else + replacement = 'each_line' + end + + corrector.replace(range, replacement) + end + end + end + + private + + def enumerable_method?(node) + ENUMERABLE_METHODS.include?(node.to_sym) + end + + def offense(node, enumerable_call, readlines_call) + range = offense_range(enumerable_call, readlines_call) + good_method = build_good_method(enumerable_call) + bad_method = build_bad_method(enumerable_call) + + add_offense( + node, + location: range, + message: format(MSG, good: good_method, bad: bad_method) + ) + end + + def offense_range(enumerable_call, readlines_call) + readlines_pos = readlines_call.loc.selector.begin_pos + enumerable_pos = enumerable_call.loc.selector.end_pos + range_between(readlines_pos, enumerable_pos) + end + + def build_good_method(enumerable_call) + if enumerable_call.method?(:each) + 'each_line' + else + "each_line.#{enumerable_call.method_name}" + end + end + + def build_bad_method(enumerable_call) + "readlines.#{enumerable_call.method_name}" + end + + def correction_range(enumerable_call, readlines_call) + begin_pos = readlines_call.loc.selector.begin_pos + + end_pos = if enumerable_call.method?(:each) + enumerable_call.loc.expression.end_pos + else + enumerable_call.loc.dot.begin_pos + end + + range_between(begin_pos, end_pos) + end + + def build_call_args(call_args_node) + call_args_node.map(&:source).join(', ') + end + end + end + end +end diff --git a/lib/rubocop/cop/performance_cops.rb b/lib/rubocop/cop/performance_cops.rb index 9fd291a..e43ad14 100644 --- a/lib/rubocop/cop/performance_cops.rb +++ b/lib/rubocop/cop/performance_cops.rb @@ -21,6 +21,7 @@ require_relative 'performance/inefficient_hash_search' require_relative 'performance/open_struct' require_relative 'performance/range_include' +require_relative 'performance/io_readlines' require_relative 'performance/redundant_block_call' require_relative 'performance/redundant_match' require_relative 'performance/redundant_merge' diff --git a/spec/rubocop/cop/performance/io_readlines_spec.rb b/spec/rubocop/cop/performance/io_readlines_spec.rb new file mode 100644 index 0000000..2a5090b --- /dev/null +++ b/spec/rubocop/cop/performance/io_readlines_spec.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +RSpec.describe RuboCop::Cop::Performance::IoReadlines do + subject(:cop) { described_class.new } + + it 'registers an offense when using `File.readlines` followed by Enumerable method' do + expect_offense(<<~RUBY) + File.readlines('testfile').map { |l| l.size } + ^^^^^^^^^^^^^^^^^^^^^^^^^ Use `each_line.map` instead of `readlines.map`. + RUBY + end + + it 'registers an offense when using `IO.readlines` followed by Enumerable method' do + expect_offense(<<~RUBY) + IO.readlines('testfile').map { |l| l.size } + ^^^^^^^^^^^^^^^^^^^^^^^^^ Use `each_line.map` instead of `readlines.map`. + RUBY + end + + it 'registers an offense when using `IO.readlines` followed by `#each` method' do + # Note: `each_line` in message, not `each_line.each` + expect_offense(<<~RUBY) + IO.readlines('testfile').each { |l| puts l } + ^^^^^^^^^^^^^^^^^^^^^^^^^^ Use `each_line` instead of `readlines.each`. + RUBY + end + + it 'does not register an offense when using `.readlines` and not followed by Enumerable method' do + expect_no_offenses(<<~RUBY) + File.readlines('testfile').not_enumerable_method + RUBY + end + + it 'registers an offense and corrects when using `#readlines` on an instance followed by Enumerable method' do + expect_offense(<<~RUBY) + file.readlines(10).map { |l| l.size } + ^^^^^^^^^^^^^^^^^ Use `each_line.map` instead of `readlines.map`. + RUBY + + expect_correction(<<~RUBY) + file.each_line(10).map { |l| l.size } + RUBY + end + + it 'registers an offense and corrects when using `#readlines` on an instance followed by `#each` method' do + # Note: `each_line` in message, not `each_line.each` + expect_offense(<<~RUBY) + file.readlines(10).each { |l| puts l } + ^^^^^^^^^^^^^^^^^^ Use `each_line` instead of `readlines.each`. + RUBY + + expect_correction(<<~RUBY) + file.each_line(10) { |l| puts l } + RUBY + end + + it 'does not register an offense when using `#readlines` on an instance and not followed by Enumerable method' do + expect_no_offenses(<<~RUBY) + file.readlines.not_enumerable_method + RUBY + end +end