diff --git a/lib/jekyll/site.rb b/lib/jekyll/site.rb index 54155d0c8e2..32946781aa4 100644 --- a/lib/jekyll/site.rb +++ b/lib/jekyll/site.rb @@ -445,6 +445,7 @@ def configure_include_paths def configure_file_read_opts self.file_read_opts = {} self.file_read_opts[:encoding] = config["encoding"] if config["encoding"] + self.file_read_opts = Jekyll::Utils.merged_file_read_opts(self, {}) end private diff --git a/lib/jekyll/utils.rb b/lib/jekyll/utils.rb index 70605a34c18..2f5f55dcdcd 100644 --- a/lib/jekyll/utils.rb +++ b/lib/jekyll/utils.rb @@ -301,6 +301,9 @@ def safe_glob(dir, patterns, flags = 0) # and a given param def merged_file_read_opts(site, opts) merged = (site ? site.file_read_opts : {}).merge(opts) + if merged[:encoding] && !merged[:encoding].start_with?("bom|") + merged[:encoding] = "bom|#{merged[:encoding]}" + end if merged["encoding"] && !merged["encoding"].start_with?("bom|") merged["encoding"] = "bom|#{merged["encoding"]}" end diff --git a/test/source/_encodings/UTF8CRLFandBOM.md b/test/source/_encodings/UTF8CRLFandBOM.md new file mode 100755 index 00000000000..36390cc3464 --- /dev/null +++ b/test/source/_encodings/UTF8CRLFandBOM.md @@ -0,0 +1,11 @@ +--- +layout: post +title: "UTF8CRLFandBOM" +date: 2017-04-05 16:16:01 -0800 +categories: bom +--- +This file was created with CR/LFs, and encoded as UTF8 with a BOM + +You’ll find this post in your `_posts` directory. Go ahead and edit it and re-build the site to see your changes. You can rebuild the site in many different ways, but the most common way is to run `bundle exec jekyll serve`, which launches a web server and auto-regenerates your site when a file is updated. + +To add new posts, simply add a file in the `_posts` directory that follows the convention `YYYY-MM-DD-name-of-post.ext` and includes the necessary front matter. Take a look at the source for this post to get an idea about how it works. diff --git a/test/source/_encodings/Unicode16LECRLFandBOM.md b/test/source/_encodings/Unicode16LECRLFandBOM.md new file mode 100755 index 00000000000..8941716a1d0 Binary files /dev/null and b/test/source/_encodings/Unicode16LECRLFandBOM.md differ diff --git a/test/test_document.rb b/test/test_document.rb index 42ba3c5748b..6ce71230680 100644 --- a/test/test_document.rb +++ b/test/test_document.rb @@ -7,6 +7,15 @@ def assert_equal_value(key, one, other) assert_equal(one[key], other[key]) end + def setup_encoded_document(filename) + site = fixture_site("collections" => ["encodings"]) + site.process + Document.new(site.in_source_dir(File.join("_encodings", filename)), { + :site => site, + :collection => site.collections["encodings"], + }).tap(&:read) + end + context "a document in a collection" do setup do @site = fixture_site({ @@ -529,4 +538,24 @@ def assert_equal_value(key, one, other) assert_equal true, File.file?(@dest_file) end end + + context "a document with UTF-8 CLRF" do + setup do + @document = setup_encoded_document "UTF8CRLFandBOM.md" + end + + should "not throw an error" do + Jekyll::Renderer.new(@document.site, @document).render_document + end + end + + context "a document with UTF-16LE CLRF" do + setup do + @document = setup_encoded_document "Unicode16LECRLFandBOM.md" + end + + should "not throw an error" do + Jekyll::Renderer.new(@document.site, @document).render_document + end + end end diff --git a/test/test_utils.rb b/test/test_utils.rb index 1b4d4813b66..01c1d98c613 100644 --- a/test/test_utils.rb +++ b/test/test_utils.rb @@ -386,16 +386,21 @@ class TestUtils < JekyllUnitTest should "ignore encoding if it's not there" do opts = Utils.merged_file_read_opts(nil, {}) assert_nil opts["encoding"] + assert_nil opts[:encoding] end should "add bom to encoding" do - opts = Utils.merged_file_read_opts(nil, { "encoding" => "utf-8" }) - assert_equal "bom|utf-8", opts["encoding"] + opts = { "encoding" => "utf-8", :encoding => "utf-8" } + merged = Utils.merged_file_read_opts(nil, opts) + assert_equal "bom|utf-8", merged["encoding"] + assert_equal "bom|utf-8", merged[:encoding] end should "preserve bom in encoding" do - opts = Utils.merged_file_read_opts(nil, { "encoding" => "bom|utf-8" }) - assert_equal "bom|utf-8", opts["encoding"] + opts = { "encoding" => "bom|another", :encoding => "bom|another" } + merged = Utils.merged_file_read_opts(nil, opts) + assert_equal "bom|another", merged["encoding"] + assert_equal "bom|another", merged[:encoding] end end end