From 36651c50a86a107358d3f5b8668c6940886e5875 Mon Sep 17 00:00:00 2001 From: Ashwin Maroli Date: Sun, 10 May 2020 16:51:30 +0530 Subject: [PATCH 1/2] Add find filters to optimize where-first chains --- docs/_data/jekyll_filters.yml | 34 ++++++ lib/jekyll/filters.rb | 60 ++++++++++ test/test_filters.rb | 201 ++++++++++++++++++++++++++++++++++ 3 files changed, 295 insertions(+) diff --git a/docs/_data/jekyll_filters.yml b/docs/_data/jekyll_filters.yml index 27089e738f2..5e036feb320 100644 --- a/docs/_data/jekyll_filters.yml +++ b/docs/_data/jekyll_filters.yml @@ -111,6 +111,40 @@ # +- name: Find + description: >- + Return the first object in an array for which the queried + attribute has the given value or return nil if no item in + the array satisfies the given criteria. + version_badge: 4.1.0 + examples: + - input: '{{ site.members | find: "graduation_year", "2014" }}' + output: + +# + +- name: Find Expression + description: >- + Return the first object in an array for which the given + expression evaluates to true or return nil if no item in + the array satisfies the evaluated expression. + version_badge: 4.1.0 + examples: + - input: |- + {{ site.members | find_exp:"item", + "item.graduation_year == 2014" }} + output: + - input: |- + {{ site.members | find_exp:"item", + "item.graduation_year < 2014" }} + output: + - input: |- + {{ site.members | find_exp:"item", + "item.projects contains 'foo'" }} + output: + +# + - name: Group By description: Group an array's items by a given property. examples: diff --git a/lib/jekyll/filters.rb b/lib/jekyll/filters.rb index fcfe6477903..a3c6312c485 100644 --- a/lib/jekyll/filters.rb +++ b/lib/jekyll/filters.rb @@ -210,6 +210,66 @@ def where_exp(input, variable, expression) end || [] end + # Search an array of objects and returns the first object that has the queried attribute + # with the given value or returns nil otherwise. + # + # input - the object array. + # property - the property within each object to search by. + # value - the desired value. + # Cannot be an instance of Array nor Hash since calling #to_s on them returns + # their `#inspect` string object. + # + # Returns the found object or nil + # + # rubocop:disable Metrics/CyclomaticComplexity + def find(input, property, value) + return input if !property || value.is_a?(Array) || value.is_a?(Hash) + return input unless input.respond_to?(:find) + + input = input.values if input.is_a?(Hash) + input_id = input.hash + + # implement a hash based on method parameters to cache the end-result for given parameters. + @find_filter_cache ||= {} + @find_filter_cache[input_id] ||= {} + @find_filter_cache[input_id][property] ||= {} + + # stash or retrive results to return + # Since `enum.find` can return nil or false, we use a placeholder string "<__NO MATCH__>" + # to validate caching. + result = @find_filter_cache[input_id][property][value] ||= begin + input.find do |object| + compare_property_vs_target(item_property(object, property), value) + end || "<__NO MATCH__>" + end + return nil if result == "<__NO MATCH__>" + + result + end + # rubocop:enable Metrics/CyclomaticComplexity + + # Searches an array of objects against an expression and returns the first object for which + # the expression evaluates to true, or returns nil otherwise. + # + # input - the object array + # variable - the variable to assign each item to in the expression + # expression - a Liquid comparison expression passed in as a string + # + # Returns the found object or nil + def find_exp(input, variable, expression) + return input unless input.respond_to?(:find) + + input = input.values if input.is_a?(Hash) + + condition = parse_condition(expression) + @context.stack do + input.find do |object| + @context[variable] = object + condition.evaluate(@context) + end + end + end + # Convert the input into integer # # input - the object string diff --git a/test/test_filters.rb b/test/test_filters.rb index 6450cae822f..423d472ed70 100644 --- a/test/test_filters.rb +++ b/test/test_filters.rb @@ -1079,6 +1079,207 @@ def to_liquid end end + context "find filter" do + should "return any input that is not an array" do + assert_equal "some string", @filter.find("some string", "la", "le") + end + + should "filter objects in a hash appropriately" do + hash = { "a" => { "color" => "red" }, "b" => { "color" => "blue" } } + assert_equal({ "color" => "red" }, @filter.find(hash, "color", "red")) + end + + should "filter objects appropriately" do + assert_equal( + { "color" => "red", "size" => "large" }, + @filter.find(@array_of_objects, "color", "red") + ) + end + + should "filter objects with null properties appropriately" do + array = [{}, { "color" => nil }, { "color" => "" }, { "color" => "text" }] + assert_equal({}, @filter.find(array, "color", nil)) + end + + should "filter objects with numerical properties appropriately" do + array = [ + { "value" => "555" }, + { "value" => 555 }, + { "value" => 24.625 }, + { "value" => "24.625" }, + ] + assert_equal({ "value" => 24.625 }, @filter.find(array, "value", 24.625)) + assert_equal({ "value" => "555" }, @filter.find(array, "value", 555)) + end + + should "filter array properties appropriately" do + hash = { + "a" => { "tags" => %w(x y) }, + "b" => { "tags" => ["x"] }, + "c" => { "tags" => %w(y z) }, + } + assert_equal({ "tags" => %w(x y) }, @filter.find(hash, "tags", "x")) + end + + should "filter array properties alongside string properties" do + hash = { + "a" => { "tags" => %w(x y) }, + "b" => { "tags" => "x" }, + "c" => { "tags" => %w(y z) }, + } + assert_equal({ "tags" => %w(x y) }, @filter.find(hash, "tags", "x")) + end + + should "filter hash properties with null and empty values" do + hash = { + "a" => { "tags" => {} }, + "b" => { "tags" => "" }, + "c" => { "tags" => nil }, + "d" => { "tags" => ["x", nil] }, + "e" => { "tags" => [] }, + "f" => { "tags" => "xtra" }, + } + + assert_equal({ "tags" => nil }, @filter.find(hash, "tags", nil)) + assert_equal({ "tags" => "" }, @filter.find(hash, "tags", "")) + + # `{{ hash | find: 'tags', empty }}` + assert_equal( + { "tags" => {} }, + @filter.find(hash, "tags", Liquid::Expression::LITERALS["empty"]) + ) + + # `{{ `hash | find: 'tags', blank }}` + assert_equal( + { "tags" => {} }, + @filter.find(hash, "tags", Liquid::Expression::LITERALS["blank"]) + ) + end + + should "not match substrings" do + hash = { + "a" => { "category" => "bear" }, + "b" => { "category" => "wolf" }, + "c" => { "category" => %w(bear lion) }, + } + assert_nil @filter.find(hash, "category", "ear") + end + + should "stringify during comparison for compatibility with liquid parsing" do + hash = { + "The Words" => { "rating" => 1.2, "featured" => false }, + "Limitless" => { "rating" => 9.2, "featured" => true }, + "Hustle" => { "rating" => 4.7, "featured" => true }, + } + + result = @filter.find(hash, "featured", "true") + assert_equal 9.2, result["rating"] + + result = @filter.find(hash, "rating", 4.7) + assert_equal 4.7, result["rating"] + end + end + + context "find_exp filter" do + should "return any input that is not an array" do + assert_equal "some string", @filter.find_exp("some string", "la", "le") + end + + should "filter objects in a hash appropriately" do + hash = { "a" => { "color"=>"red" }, "b" => { "color"=>"blue" } } + assert_equal( + { "color" => "red" }, + @filter.find_exp(hash, "item", "item.color == 'red'") + ) + end + + should "filter objects appropriately" do + assert_equal( + { "color" => "red", "size" => "large" }, + @filter.find_exp(@array_of_objects, "item", "item.color == 'red'") + ) + end + + should "filter objects appropriately with 'or', 'and' operators" do + assert_equal( + { "color" => "teal", "size" => "large" }, + @filter.find_exp( + @array_of_objects, "item", "item.color == 'red' or item.size == 'large'" + ) + ) + + assert_equal( + { "color" => "red", "size" => "large" }, + @filter.find_exp( + @array_of_objects, "item", "item.color == 'red' and item.size == 'large'" + ) + ) + end + + should "filter objects across multiple conditions" do + sample = [ + { "color" => "teal", "size" => "large", "type" => "variable" }, + { "color" => "red", "size" => "large", "type" => "fixed" }, + { "color" => "red", "size" => "medium", "type" => "variable" }, + { "color" => "blue", "size" => "medium", "type" => "fixed" }, + ] + assert_equal( + { "color" => "red", "size" => "large", "type" => "fixed" }, + @filter.find_exp( + sample, "item", "item.type == 'fixed' and item.color == 'red' or item.color == 'teal'" + ) + ) + end + + should "stringify during comparison for compatibility with liquid parsing" do + hash = { + "The Words" => { "rating" => 1.2, "featured" => false }, + "Limitless" => { "rating" => 9.2, "featured" => true }, + "Hustle" => { "rating" => 4.7, "featured" => true }, + } + + result = @filter.find_exp(hash, "item", "item.featured == true") + assert_equal 9.2, result["rating"] + + result = @filter.find_exp(hash, "item", "item.rating == 4.7") + assert_equal 4.7, result["rating"] + end + + should "filter with other operators" do + assert_equal 3, @filter.find_exp([1, 2, 3, 4, 5], "n", "n >= 3") + end + + objects = [ + { "id" => "a", "groups" => [1, 2] }, + { "id" => "b", "groups" => [2, 3] }, + { "id" => "c" }, + { "id" => "d", "groups" => [1, 3] }, + ] + should "filter with the contains operator over arrays" do + result = @filter.find_exp(objects, "obj", "obj.groups contains 1") + assert_equal "a", result["id"] + end + + should "filter with the contains operator over hash keys" do + result = @filter.find_exp(objects, "obj", "obj contains 'groups'") + assert_equal "a", result["id"] + end + + should "filter posts" do + site = fixture_site.tap(&:read) + posts = site.site_payload["site"]["posts"] + result = @filter.find_exp(posts, "obj", "obj.title == 'Foo Bar'") + assert_equal(result, site.posts.find { |p| p.title == "Foo Bar" }) + end + + should "filter by variable values" do + @filter.site.tap(&:read) + posts = @filter.site.site_payload["site"]["posts"] + result = @filter.find_exp(posts, "post", "post.date > site.dont_show_posts_before") + assert result.date > @sample_time + end + end + context "group_by_exp filter" do should "successfully group array of Jekyll::Page's" do @filter.site.process From d11c77b5708a0017f45686f1a999c2d44989328a Mon Sep 17 00:00:00 2001 From: Ashwin Maroli Date: Mon, 18 May 2020 20:33:26 +0530 Subject: [PATCH 2/2] Benchmark script for find filter vs where + first --- .../find-filter-vs-where-first-filters.rb | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100755 benchmark/find-filter-vs-where-first-filters.rb diff --git a/benchmark/find-filter-vs-where-first-filters.rb b/benchmark/find-filter-vs-where-first-filters.rb new file mode 100755 index 00000000000..d3c1c0dd194 --- /dev/null +++ b/benchmark/find-filter-vs-where-first-filters.rb @@ -0,0 +1,49 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require 'benchmark/ips' +require_relative '../lib/jekyll' + +puts '' +print 'Setting up... ' + +SITE = Jekyll::Site.new( + Jekyll.configuration({ + "source" => File.expand_path("../docs", __dir__), + "destination" => File.expand_path("../docs/_site", __dir__), + "disable_disk_cache" => true, + "quiet" => true, + }) +) + +TEMPLATE_1 = Liquid::Template.parse(<<~HTML) + {%- assign doc = site.documents | where: 'url', '/docs/assets/' | first -%} + {{- doc.title -}} +HTML + +TEMPLATE_2 = Liquid::Template.parse(<<~HTML) + {%- assign doc = site.documents | find: 'url', '/docs/assets/' -%} + {{- doc.title -}} +HTML + +[:reset, :read, :generate].each { |phase| SITE.send(phase) } + +puts 'done.' +puts 'Testing... ' +puts " #{'where + first'.cyan} results in #{TEMPLATE_1.render(SITE.site_payload).inspect.green}" +puts " #{'find'.cyan} results in #{TEMPLATE_2.render(SITE.site_payload).inspect.green}" + +if TEMPLATE_1.render(SITE.site_payload) == TEMPLATE_2.render(SITE.site_payload) + puts 'Success! Procceding to run benchmarks.'.green + puts '' +else + puts 'Something went wrong. Aborting.'.magenta + puts '' + return +end + +Benchmark.ips do |x| + x.report('where + first') { TEMPLATE_1.render(SITE.site_payload) } + x.report('find') { TEMPLATE_2.render(SITE.site_payload) } + x.compare! +end