From 6ce912e9573893babb6ca9b0e3aa1377f9804373 Mon Sep 17 00:00:00 2001 From: Parker Moore Date: Sat, 2 Sep 2017 10:38:07 -0400 Subject: [PATCH] Use a Schwartzian transform with custom sorting (#6342) Merge pull request 6342 --- benchmark/schwartzian_transform.rb | 115 +++++++++++++++++++++++++++++ lib/jekyll/filters.rb | 29 +++++--- 2 files changed, 133 insertions(+), 11 deletions(-) create mode 100644 benchmark/schwartzian_transform.rb diff --git a/benchmark/schwartzian_transform.rb b/benchmark/schwartzian_transform.rb new file mode 100644 index 00000000000..76c53e44e32 --- /dev/null +++ b/benchmark/schwartzian_transform.rb @@ -0,0 +1,115 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true +# +# The Ruby documentation for #sort_by describes what's called a Schwartzian transform: +# +# > A more efficient technique is to cache the sort keys (modification times in this case) +# > before the sort. Perl users often call this approach a Schwartzian transform, after +# > Randal Schwartz. We construct a temporary array, where each element is an array +# > containing our sort key along with the filename. We sort this array, and then extract +# > the filename from the result. +# > This is exactly what sort_by does internally. +# +# The well-documented efficiency of sort_by is a good reason to use it. However, when a property +# does not exist on an item being sorted, it can cause issues (no nil's allowed!) +# In Jekyll::Filters#sort_input, we extract the property in each iteration of #sort, +# which is quite inefficient! How inefficient? This benchmark will tell you just how, and how much +# it can be improved by using the Schwartzian transform. Thanks, Randall! + +require 'benchmark/ips' +require 'minitest' +require File.expand_path("../lib/jekyll", __dir__) + +def site + @site ||= Jekyll::Site.new( + Jekyll.configuration("source" => File.expand_path("../docs", __dir__)) + ).tap(&:reset).tap(&:read) +end + +def site_docs + site.collections["docs"].docs.dup +end + +def sort_by_property_directly(docs, meta_key) + docs.sort! do |apple, orange| + apple_property = apple[meta_key] + orange_property = orange[meta_key] + + if !apple_property.nil? && !orange_property.nil? + apple_property <=> orange_property + elsif !apple_property.nil? && orange_property.nil? + -1 + elsif apple_property.nil? && !orange_property.nil? + 1 + else + apple <=> orange + end + end +end + +def schwartzian_transform(docs, meta_key) + docs.collect! { |d| + [d[meta_key], d] + }.sort! { |apple, orange| + if !apple[0].nil? && !orange[0].nil? + apple.first <=> orange.first + elsif !apple[0].nil? && orange[0].nil? + -1 + elsif apple[0].nil? && !orange[0].nil? + 1 + else + apple[-1] <=> orange[-1] + end + }.collect! { |d| d[-1] } +end + +# Before we test efficiency, do they produce the same output? +class Correctness + include Minitest::Assertions + + require "pp" + define_method :mu_pp, &:pretty_inspect + + attr_accessor :assertions + + def initialize(docs, property) + @assertions = 0 + @docs = docs + @property = property + end + + def assert! + assert sort_by_property_directly(@docs, @property).is_a?(Array), "sort_by_property_directly must return an array" + assert schwartzian_transform(@docs, @property).is_a?(Array), "schwartzian_transform must return an array" + assert_equal sort_by_property_directly(@docs, @property), + schwartzian_transform(@docs, @property) + puts "Yeah, ok, correctness all checks out for property #{@property.inspect}" + end +end + +Correctness.new(site_docs, "redirect_from".freeze).assert! +Correctness.new(site_docs, "title".freeze).assert! + +# First, test with a property only a handful of documents have. +Benchmark.ips do |x| + x.config(time: 10, warmup: 5) + x.report('sort_by_property_directly with sparse property') do + sort_by_property_directly(site_docs, "redirect_from".freeze) + end + x.report('schwartzian_transform with sparse property') do + schwartzian_transform(site_docs, "redirect_from".freeze) + end + x.compare! +end + +# Next, test with a property they all have. +Benchmark.ips do |x| + x.config(time: 10, warmup: 5) + x.report('sort_by_property_directly with non-sparse property') do + sort_by_property_directly(site_docs, "title".freeze) + end + x.report('schwartzian_transform with non-sparse property') do + schwartzian_transform(site_docs, "title".freeze) + end + x.compare! +end diff --git a/lib/jekyll/filters.rb b/lib/jekyll/filters.rb index 35ed4d9f63e..686995e195f 100644 --- a/lib/jekyll/filters.rb +++ b/lib/jekyll/filters.rb @@ -335,19 +335,26 @@ def inspect(input) end private + + # Sort the input Enumerable by the given property. + # If the property doesn't exist, return the sort order respective of + # which item doesn't have the property. + # We also utilize the Schwartzian transform to make this more efficient. def sort_input(input, property, order) - input.sort do |apple, orange| - apple_property = item_property(apple, property) - orange_property = item_property(orange, property) - - if !apple_property.nil? && orange_property.nil? - - order - elsif apple_property.nil? && !orange_property.nil? - + order - else - apple_property <=> orange_property + input.map { |item| [item_property(item, property), item] } + .sort! do |apple_info, orange_info| + apple_property = apple_info.first + orange_property = orange_info.first + + if !apple_property.nil? && orange_property.nil? + - order + elsif apple_property.nil? && !orange_property.nil? + + order + else + apple_property <=> orange_property + end end - end + .map!(&:last) end private