Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Problematic UTF+bom files #6322

Merged
merged 7 commits into from Oct 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/jekyll/site.rb
Expand Up @@ -445,6 +445,7 @@ def configure_include_paths
def configure_file_read_opts
self.file_read_opts = {}
self.file_read_opts[:encoding] = config["encoding"] if config["encoding"]
self.file_read_opts = Jekyll::Utils.merged_file_read_opts(self, {})
end

private
Expand Down
3 changes: 3 additions & 0 deletions lib/jekyll/utils.rb
Expand Up @@ -301,6 +301,9 @@ def safe_glob(dir, patterns, flags = 0)
# and a given param
def merged_file_read_opts(site, opts)
merged = (site ? site.file_read_opts : {}).merge(opts)
if merged[:encoding] && !merged[:encoding].start_with?("bom|")
merged[:encoding] = "bom|#{merged[:encoding]}"
end
if merged["encoding"] && !merged["encoding"].start_with?("bom|")
merged["encoding"] = "bom|#{merged["encoding"]}"
end
Expand Down
11 changes: 11 additions & 0 deletions test/source/_encodings/UTF8CRLFandBOM.md
@@ -0,0 +1,11 @@
---
layout: post
title: "UTF8CRLFandBOM"
date: 2017-04-05 16:16:01 -0800
categories: bom
---
This file was created with CR/LFs, and encoded as UTF8 with a BOM

You’ll find this post in your `_posts` directory. Go ahead and edit it and re-build the site to see your changes. You can rebuild the site in many different ways, but the most common way is to run `bundle exec jekyll serve`, which launches a web server and auto-regenerates your site when a file is updated.

To add new posts, simply add a file in the `_posts` directory that follows the convention `YYYY-MM-DD-name-of-post.ext` and includes the necessary front matter. Take a look at the source for this post to get an idea about how it works.
Binary file added test/source/_encodings/Unicode16LECRLFandBOM.md
Binary file not shown.
29 changes: 29 additions & 0 deletions test/test_document.rb
Expand Up @@ -7,6 +7,15 @@ def assert_equal_value(key, one, other)
assert_equal(one[key], other[key])
end

def setup_encoded_document(filename)
site = fixture_site("collections" => ["encodings"])
site.process
Document.new(site.in_source_dir(File.join("_encodings", filename)), {
:site => site,
:collection => site.collections["encodings"],
}).tap(&:read)
end

context "a document in a collection" do
setup do
@site = fixture_site({
Expand Down Expand Up @@ -529,4 +538,24 @@ def assert_equal_value(key, one, other)
assert_equal true, File.file?(@dest_file)
end
end

context "a document with UTF-8 CLRF" do
setup do
@document = setup_encoded_document "UTF8CRLFandBOM.md"
end

should "not throw an error" do
Jekyll::Renderer.new(@document.site, @document).render_document
end
end

context "a document with UTF-16LE CLRF" do
setup do
@document = setup_encoded_document "Unicode16LECRLFandBOM.md"
end

should "not throw an error" do
Jekyll::Renderer.new(@document.site, @document).render_document
end
end
end
13 changes: 9 additions & 4 deletions test/test_utils.rb
Expand Up @@ -386,16 +386,21 @@ class TestUtils < JekyllUnitTest
should "ignore encoding if it's not there" do
opts = Utils.merged_file_read_opts(nil, {})
assert_nil opts["encoding"]
assert_nil opts[:encoding]
end

should "add bom to encoding" do
opts = Utils.merged_file_read_opts(nil, { "encoding" => "utf-8" })
assert_equal "bom|utf-8", opts["encoding"]
opts = { "encoding" => "utf-8", :encoding => "utf-8" }
merged = Utils.merged_file_read_opts(nil, opts)
assert_equal "bom|utf-8", merged["encoding"]
assert_equal "bom|utf-8", merged[:encoding]
end

should "preserve bom in encoding" do
opts = Utils.merged_file_read_opts(nil, { "encoding" => "bom|utf-8" })
assert_equal "bom|utf-8", opts["encoding"]
opts = { "encoding" => "bom|another", :encoding => "bom|another" }
merged = Utils.merged_file_read_opts(nil, opts)
assert_equal "bom|another", merged["encoding"]
assert_equal "bom|another", merged[:encoding]
end
end
end