From 24732b756a6508f13da88133d600695eafd148b5 Mon Sep 17 00:00:00 2001 From: fatkodima Date: Fri, 5 Jun 2020 18:35:56 +0300 Subject: [PATCH] Add new `Performance/Readlines` cop --- CHANGELOG.md | 2 + config/default.yml | 5 + docs/modules/ROOT/pages/cops.adoc | 1 + docs/modules/ROOT/pages/cops_performance.adoc | 36 +++++ lib/rubocop/cop/performance/readlines.rb | 127 ++++++++++++++++++ lib/rubocop/cop/performance_cops.rb | 1 + .../rubocop/cop/performance/readlines_spec.rb | 62 +++++++++ 7 files changed, 234 insertions(+) create mode 100644 lib/rubocop/cop/performance/readlines.rb create mode 100644 spec/rubocop/cop/performance/readlines_spec.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 127c9a3d6c..5943edec15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### New features +* [#127](https://github.com/rubocop-hq/rubocop-performance/pull/127): Add new `Performance/Readlines` cop. ([@fatkodima][]) + * [#125](https://github.com/rubocop-hq/rubocop-performance/pull/125): Support `Range#member?` method for `Performance/RangeInclude` cop. ([@fatkodima][]) ## 1.6.1 (2020-06-05) diff --git a/config/default.yml b/config/default.yml index c4f257b8dd..3ccb228797 100644 --- a/config/default.yml +++ b/config/default.yml @@ -145,6 +145,11 @@ Performance/RangeInclude: VersionChanged: '1.7' Safe: false +Performance/Readlines: + Description: 'Use `IO.each_line` (`IO#each_line`) instead of `IO.readlines` (`IO#readlines`).' + Enabled: true + VersionAdded: '1.7' + Performance/RedundantBlockCall: Description: 'Use `yield` instead of `block.call`.' Reference: 'https://github.com/JuanitoFatas/fast-ruby#proccall-and-block-arguments-vs-yieldcode' diff --git a/docs/modules/ROOT/pages/cops.adoc b/docs/modules/ROOT/pages/cops.adoc index 48151780e3..5819f61a75 100644 --- a/docs/modules/ROOT/pages/cops.adoc +++ b/docs/modules/ROOT/pages/cops.adoc @@ -19,6 +19,7 @@ * xref:cops_performance.adoc#performanceinefficienthashsearch[Performance/InefficientHashSearch] * xref:cops_performance.adoc#performanceopenstruct[Performance/OpenStruct] * xref:cops_performance.adoc#performancerangeinclude[Performance/RangeInclude] +* xref:cops_performance.adoc#performancereadlines[Performance/Readlines] * xref:cops_performance.adoc#performanceredundantblockcall[Performance/RedundantBlockCall] * xref:cops_performance.adoc#performanceredundantmatch[Performance/RedundantMatch] * xref:cops_performance.adoc#performanceredundantmerge[Performance/RedundantMerge] diff --git a/docs/modules/ROOT/pages/cops_performance.adoc b/docs/modules/ROOT/pages/cops_performance.adoc index 505dcb4423..40c1ec5212 100644 --- a/docs/modules/ROOT/pages/cops_performance.adoc +++ b/docs/modules/ROOT/pages/cops_performance.adoc @@ -848,6 +848,42 @@ This cop is `Safe: false` by default because `Range#include?` (or `Range#member? * https://github.com/JuanitoFatas/fast-ruby#cover-vs-include-code +== Performance/Readlines + +|=== +| Enabled by default | Safe | Supports autocorrection | VersionAdded | VersionChanged + +| Enabled +| Yes +| Yes +| 1.7 +| - +|=== + +This cop identifies places where inefficient `readlines` method +can be replaced by `each_line` to avoid fully loading file content into memory. + +=== Examples + +[source,ruby] +---- +# bad +File.readlines('testfile').each { |l| puts l } +IO.readlines('testfile', chomp: true).each { |l| puts l } + +conn.readlines(10).map { |l| l.size } +file.readlines.find { |l| l.start_with?('#') } +file.readlines.each { |l| puts l } + +# good +File.open('testfile', 'r').each_line { |l| puts l } +IO.open('testfile').each_line(chomp: true) { |l| puts l } + +conn.each_line(10).map { |l| l.size } +file.readlines.find { |l| l.start_with?('#') } +file.each_line { |l| puts l } +---- + == Performance/RedundantBlockCall |=== diff --git a/lib/rubocop/cop/performance/readlines.rb b/lib/rubocop/cop/performance/readlines.rb new file mode 100644 index 0000000000..f3bf339921 --- /dev/null +++ b/lib/rubocop/cop/performance/readlines.rb @@ -0,0 +1,127 @@ +# frozen_string_literal: true + +module RuboCop + module Cop + module Performance + # This cop identifies places where inefficient `readlines` method + # can be replaced by `each_line` to avoid fully loading file content into memory. + # + # @example + # + # # bad + # File.readlines('testfile').each { |l| puts l } + # IO.readlines('testfile', chomp: true).each { |l| puts l } + # + # conn.readlines(10).map { |l| l.size } + # file.readlines.find { |l| l.start_with?('#') } + # file.readlines.each { |l| puts l } + # + # # good + # File.open('testfile', 'r').each_line { |l| puts l } + # IO.open('testfile').each_line(chomp: true) { |l| puts l } + # + # conn.each_line(10).map { |l| l.size } + # file.readlines.find { |l| l.start_with?('#') } + # file.each_line { |l| puts l } + # + class Readlines < Cop + include RangeHelp + + MSG = 'Use `%s` instead of `%s`.' + ENUMERABLE_METHODS = (Enumerable.instance_methods + [:each]).freeze + + def_node_matcher :readlines_on_class?, <<~PATTERN + $(send $(send (const nil? {:IO :File}) :readlines ...) #enumerable_method?) + PATTERN + + def_node_matcher :readlines_on_instance?, <<~PATTERN + $(send $(send ${nil? !const_type?} :readlines ...) #enumerable_method? ...) + PATTERN + + def on_send(node) + readlines_on_class?(node) do |enumerable_call, readlines_call| + offense(node, enumerable_call, readlines_call) + end + + readlines_on_instance?(node) do |enumerable_call, readlines_call, _| + offense(node, enumerable_call, readlines_call) + end + end + + def autocorrect(node) + readlines_on_instance?(node) do |enumerable_call, readlines_call, receiver| + # We cannot safely correct `.readlines` method called on IO/File classes + # due to its signature and we are not sure with implicit receiver + # if it is called in the context of some instance or mentioned class. + return if receiver.nil? + + lambda do |corrector| + range = correction_range(enumerable_call, readlines_call) + + if readlines_call.arguments? + call_args = build_call_args(readlines_call.arguments) + replacement = "each_line(#{call_args})" + else + replacement = 'each_line' + end + + corrector.replace(range, replacement) + end + end + end + + private + + def enumerable_method?(node) + ENUMERABLE_METHODS.include?(node.to_sym) + end + + def offense(node, enumerable_call, readlines_call) + range = offense_range(enumerable_call, readlines_call) + good_method = build_good_method(enumerable_call) + bad_method = build_bad_method(enumerable_call) + + add_offense( + node, + location: range, + message: format(MSG, good: good_method, bad: bad_method) + ) + end + + def offense_range(enumerable_call, readlines_call) + readlines_pos = readlines_call.loc.selector.begin_pos + enumerable_pos = enumerable_call.loc.selector.end_pos + range_between(readlines_pos, enumerable_pos) + end + + def build_good_method(enumerable_call) + if enumerable_call.method?(:each) + 'each_line' + else + "each_line.#{enumerable_call.method_name}" + end + end + + def build_bad_method(enumerable_call) + "readlines.#{enumerable_call.method_name}" + end + + def correction_range(enumerable_call, readlines_call) + begin_pos = readlines_call.loc.selector.begin_pos + + end_pos = if enumerable_call.method?(:each) + enumerable_call.loc.expression.end_pos + else + enumerable_call.loc.dot.begin_pos + end + + range_between(begin_pos, end_pos) + end + + def build_call_args(call_args_node) + call_args_node.map(&:source).join(', ') + end + end + end + end +end diff --git a/lib/rubocop/cop/performance_cops.rb b/lib/rubocop/cop/performance_cops.rb index 446357cff3..9dfff85328 100644 --- a/lib/rubocop/cop/performance_cops.rb +++ b/lib/rubocop/cop/performance_cops.rb @@ -18,6 +18,7 @@ require_relative 'performance/inefficient_hash_search' require_relative 'performance/open_struct' require_relative 'performance/range_include' +require_relative 'performance/readlines' require_relative 'performance/redundant_block_call' require_relative 'performance/redundant_match' require_relative 'performance/redundant_merge' diff --git a/spec/rubocop/cop/performance/readlines_spec.rb b/spec/rubocop/cop/performance/readlines_spec.rb new file mode 100644 index 0000000000..abd696bf25 --- /dev/null +++ b/spec/rubocop/cop/performance/readlines_spec.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +RSpec.describe RuboCop::Cop::Performance::Readlines do + subject(:cop) { described_class.new } + + it 'registers an offense when using `File.readlines` followed by Enumerable method' do + expect_offense(<<~RUBY) + File.readlines('testfile').map { |l| l.size } + ^^^^^^^^^^^^^^^^^^^^^^^^^ Use `each_line.map` instead of `readlines.map`. + RUBY + end + + it 'registers an offense when using `IO.readlines` followed by Enumerable method' do + expect_offense(<<~RUBY) + IO.readlines('testfile').map { |l| l.size } + ^^^^^^^^^^^^^^^^^^^^^^^^^ Use `each_line.map` instead of `readlines.map`. + RUBY + end + + it 'registers an offense when using `IO.readlines` followed by `#each` method' do + # Note: `each_line` in message, not `each_line.each` + expect_offense(<<~RUBY) + IO.readlines('testfile').each { |l| puts l } + ^^^^^^^^^^^^^^^^^^^^^^^^^^ Use `each_line` instead of `readlines.each`. + RUBY + end + + it 'does not register an offense when using `.readlines` and not followed by Enumerable method' do + expect_no_offenses(<<~RUBY) + File.readlines('testfile').not_enumerable_method + RUBY + end + + it 'registers an offense and corrects when using `#readlines` on an instance followed by Enumerable method' do + expect_offense(<<~RUBY) + file.readlines(10).map { |l| l.size } + ^^^^^^^^^^^^^^^^^ Use `each_line.map` instead of `readlines.map`. + RUBY + + expect_correction(<<~RUBY) + file.each_line(10).map { |l| l.size } + RUBY + end + + it 'registers an offense and corrects when using `#readlines` on an instance followed by `#each` method' do + # Note: `each_line` in message, not `each_line.each` + expect_offense(<<~RUBY) + file.readlines(10).each { |l| puts l } + ^^^^^^^^^^^^^^^^^^ Use `each_line` instead of `readlines.each`. + RUBY + + expect_correction(<<~RUBY) + file.each_line(10) { |l| puts l } + RUBY + end + + it 'does not register an offense when using `#readlines` on an instance and not followed by Enumerable method' do + expect_no_offenses(<<~RUBY) + file.readlines.not_enumerable_method + RUBY + end +end