Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Use a Schwartzian transform with custom sorting (#6342)
Merge pull request 6342
- Loading branch information
Showing
2 changed files
with
133 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
#!/usr/bin/env ruby | ||
# frozen_string_literal: true | ||
# | ||
# The Ruby documentation for #sort_by describes what's called a Schwartzian transform: | ||
# | ||
# > A more efficient technique is to cache the sort keys (modification times in this case) | ||
# > before the sort. Perl users often call this approach a Schwartzian transform, after | ||
# > Randal Schwartz. We construct a temporary array, where each element is an array | ||
# > containing our sort key along with the filename. We sort this array, and then extract | ||
# > the filename from the result. | ||
# > This is exactly what sort_by does internally. | ||
# | ||
# The well-documented efficiency of sort_by is a good reason to use it. However, when a property | ||
# does not exist on an item being sorted, it can cause issues (no nil's allowed!) | ||
# In Jekyll::Filters#sort_input, we extract the property in each iteration of #sort, | ||
# which is quite inefficient! How inefficient? This benchmark will tell you just how, and how much | ||
# it can be improved by using the Schwartzian transform. Thanks, Randall! | ||
|
||
require 'benchmark/ips' | ||
require 'minitest' | ||
require File.expand_path("../lib/jekyll", __dir__) | ||
|
||
def site | ||
@site ||= Jekyll::Site.new( | ||
Jekyll.configuration("source" => File.expand_path("../docs", __dir__)) | ||
).tap(&:reset).tap(&:read) | ||
end | ||
|
||
def site_docs | ||
site.collections["docs"].docs.dup | ||
end | ||
|
||
def sort_by_property_directly(docs, meta_key) | ||
docs.sort! do |apple, orange| | ||
apple_property = apple[meta_key] | ||
orange_property = orange[meta_key] | ||
|
||
if !apple_property.nil? && !orange_property.nil? | ||
apple_property <=> orange_property | ||
elsif !apple_property.nil? && orange_property.nil? | ||
-1 | ||
elsif apple_property.nil? && !orange_property.nil? | ||
1 | ||
else | ||
apple <=> orange | ||
end | ||
end | ||
end | ||
|
||
def schwartzian_transform(docs, meta_key) | ||
docs.collect! { |d| | ||
[d[meta_key], d] | ||
}.sort! { |apple, orange| | ||
if !apple[0].nil? && !orange[0].nil? | ||
apple.first <=> orange.first | ||
elsif !apple[0].nil? && orange[0].nil? | ||
-1 | ||
elsif apple[0].nil? && !orange[0].nil? | ||
1 | ||
else | ||
apple[-1] <=> orange[-1] | ||
end | ||
}.collect! { |d| d[-1] } | ||
end | ||
|
||
# Before we test efficiency, do they produce the same output? | ||
class Correctness | ||
include Minitest::Assertions | ||
|
||
require "pp" | ||
define_method :mu_pp, &:pretty_inspect | ||
|
||
attr_accessor :assertions | ||
|
||
def initialize(docs, property) | ||
@assertions = 0 | ||
@docs = docs | ||
@property = property | ||
end | ||
|
||
def assert! | ||
assert sort_by_property_directly(@docs, @property).is_a?(Array), "sort_by_property_directly must return an array" | ||
assert schwartzian_transform(@docs, @property).is_a?(Array), "schwartzian_transform must return an array" | ||
assert_equal sort_by_property_directly(@docs, @property), | ||
schwartzian_transform(@docs, @property) | ||
puts "Yeah, ok, correctness all checks out for property #{@property.inspect}" | ||
end | ||
end | ||
|
||
Correctness.new(site_docs, "redirect_from".freeze).assert! | ||
Correctness.new(site_docs, "title".freeze).assert! | ||
|
||
# First, test with a property only a handful of documents have. | ||
Benchmark.ips do |x| | ||
x.config(time: 10, warmup: 5) | ||
x.report('sort_by_property_directly with sparse property') do | ||
sort_by_property_directly(site_docs, "redirect_from".freeze) | ||
end | ||
x.report('schwartzian_transform with sparse property') do | ||
schwartzian_transform(site_docs, "redirect_from".freeze) | ||
end | ||
x.compare! | ||
end | ||
|
||
# Next, test with a property they all have. | ||
Benchmark.ips do |x| | ||
x.config(time: 10, warmup: 5) | ||
x.report('sort_by_property_directly with non-sparse property') do | ||
sort_by_property_directly(site_docs, "title".freeze) | ||
end | ||
x.report('schwartzian_transform with non-sparse property') do | ||
schwartzian_transform(site_docs, "title".freeze) | ||
end | ||
x.compare! | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters