Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement RFC5005, "Feed Paging and Archiving". #236

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 13 additions & 2 deletions lib/jekyll-feed/feed.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,20 @@
{% endif %}
<feed xmlns="http://www.w3.org/2005/Atom" {% if site.lang %}xml:lang="{{ site.lang }}"{% endif %}>
<generator uri="https://jekyllrb.com/" version="{{ jekyll.version }}">Jekyll</generator>

{% unless page.current or page.prev_archive %}
<fh:complete xmlns:fh="http://purl.org/syndication/history/1.0"/>
{% endunless %}
{% if page.current %}
<fh:archive xmlns:fh="http://purl.org/syndication/history/1.0"/>
<link href="{{ page.current.url | absolute_url }}" rel="current"/>
{% endif %}
{% if page.prev_archive %}
<link href="{{ page.prev_archive.url | absolute_url }}" rel="prev-archive"/>
{% endif %}
<link href="{{ page.url | absolute_url }}" rel="self" type="application/atom+xml" />
<link href="{{ '/' | absolute_url }}" rel="alternate" type="text/html" {% if site.lang %}hreflang="{{ site.lang }}" {% endif %}/>

<updated>{{ site.time | date_to_xmlschema }}</updated>
<id>{{ '/' | absolute_url | xml_escape }}</id>

Expand All @@ -31,8 +43,7 @@
</author>
{% endif %}

{% assign posts = site.posts | where_exp: "post", "post.draft != true" %}
{% for post in posts limit: 10 %}
{% for post in page.posts %}
<entry{% if post.lang %}{{" "}}xml:lang="{{ post.lang }}"{% endif %}>
<title type="html">{{ post.title | smartify | strip_html | normalize_whitespace | xml_escape }}</title>
<link href="{{ post.url | absolute_url }}" rel="alternate" type="text/html" title="{{ post.title | xml_escape }}" />
Expand Down
73 changes: 69 additions & 4 deletions lib/jekyll-feed/generator.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# frozen_string_literal: true

require "digest"

module JekyllFeed
class Generator < Jekyll::Generator
safe true
Expand All @@ -9,7 +11,11 @@ class Generator < Jekyll::Generator
def generate(site)
@site = site
return if file_exists?(feed_path)
@site.pages << content_for_file(feed_path, feed_source_path)

# All feed documents use the same template, so just read it once.
@feed_source = File.read(feed_source_path).gsub(MINIFY_REGEX, "")

make_feeds(@site.posts.docs.reject(&:draft?))
end

private
Expand All @@ -20,6 +26,9 @@ def generate(site)
# We will strip all of this whitespace to minify the template
MINIFY_REGEX = %r!(?<=>|})\s+!

# Number of posts per feed
PER_PAGE = 10

# Path to feed from config, or feed.xml for default
def feed_path
if @site.config["feed"] && @site.config["feed"]["path"]
Expand All @@ -43,12 +52,68 @@ def file_exists?(file_path)
end
end

def make_feeds(feed_posts)
# Any archive pages should link to the current feed, so set up that page
# early so we can ask it for its URL later.
current = PageWithoutAFile.new(@site, __dir__, "", feed_path)

# Each feed needs to link to the archive feed before it, except for the
# first archive feed.
prev_archive = nil

# Generate archive feeds first, starting from the oldest posts. Never
# include the most recent post in an archive feed. We'll have some overlap
# between the last archive feed and the current feed, but there's no point
# duplicating _all_ the posts in both places.
1.upto((feed_posts.length - 1).div(PER_PAGE)) do |pagenum|
posts = feed_posts[(pagenum - 1) * PER_PAGE, PER_PAGE].reverse
prev_archive = archived_feed(prev_archive, pagenum, posts, current)
@site.pages << prev_archive
end

# Finally, generate the current feed. We can't do this earlier because we
# have to compute the filename of the last archive feed first.
posts = feed_posts.reverse.take(PER_PAGE)
@site.pages << content_for_file(current, posts, prev_archive, nil)
end

# Hash the important parts of an array of posts
def digest_posts(posts, prev_archive)
digest = Digest::MD5.new
posts.each do |post|
filtered = post.data.reject { |k, _v| k == "excerpt" || k == "draft" }
digest.file(post.path).update(filtered.to_s)
end
digest.update(prev_archive.url) unless prev_archive.nil?
digest
end

def archived_feed(prev_archive, pagenum, posts, current)
dir = File.dirname(feed_path)
base = File.basename(feed_path, ".*")
ext = File.extname(feed_path)

# If any of the posts in this page change, then we need to ensure that
# RFC5005 consumers see the changes. Do this with the standard
# cache-busting trick of including a hash of the important contents in
# the filename. Also change this hash if the filename of the previous
# page changed, because consumers will only work backward from the
# newest page.
digest = digest_posts(posts, prev_archive)
page_path = File.join(dir, "#{base}-#{pagenum}-#{digest.hexdigest}#{ext}")

page = PageWithoutAFile.new(@site, __dir__, "", page_path)
content_for_file(page, posts, prev_archive, current)
end

# Generates contents for a file
def content_for_file(file_path, file_source_path)
file = PageWithoutAFile.new(@site, __dir__, "", file_path)
file.content = File.read(file_source_path).gsub(MINIFY_REGEX, "")
def content_for_file(file, posts, prev_archive, current)
file.content = @feed_source
file.data["layout"] = nil
file.data["sitemap"] = false
file.data["posts"] = posts
file.data["prev_archive"] = prev_archive
file.data["current"] = current
file.data["xsl"] = file_exists?("feed.xslt.xml")
file.output
file
Expand Down