From f89a3b2765338c0fb52504f60df9033f38a4e25e Mon Sep 17 00:00:00 2001 From: Frank Taillandier Date: Fri, 18 Aug 2017 19:27:12 +0200 Subject: [PATCH 1/6] add problematic UTF+bom files to fixtures --- test/fixtures/UTF8CRLFandBOM.md | 11 +++++++++++ test/fixtures/Unicode16LECRLFandBOM.md | Bin 0 -> 1556 bytes 2 files changed, 11 insertions(+) create mode 100755 test/fixtures/UTF8CRLFandBOM.md create mode 100755 test/fixtures/Unicode16LECRLFandBOM.md diff --git a/test/fixtures/UTF8CRLFandBOM.md b/test/fixtures/UTF8CRLFandBOM.md new file mode 100755 index 00000000000..36390cc3464 --- /dev/null +++ b/test/fixtures/UTF8CRLFandBOM.md @@ -0,0 +1,11 @@ +--- +layout: post +title: "UTF8CRLFandBOM" +date: 2017-04-05 16:16:01 -0800 +categories: bom +--- +This file was created with CR/LFs, and encoded as UTF8 with a BOM + +You’ll find this post in your `_posts` directory. Go ahead and edit it and re-build the site to see your changes. You can rebuild the site in many different ways, but the most common way is to run `bundle exec jekyll serve`, which launches a web server and auto-regenerates your site when a file is updated. + +To add new posts, simply add a file in the `_posts` directory that follows the convention `YYYY-MM-DD-name-of-post.ext` and includes the necessary front matter. Take a look at the source for this post to get an idea about how it works. diff --git a/test/fixtures/Unicode16LECRLFandBOM.md b/test/fixtures/Unicode16LECRLFandBOM.md new file mode 100755 index 0000000000000000000000000000000000000000..8941716a1d06a4095648425f505cda26421ffa09 GIT binary patch literal 1556 zcma)+OK;Oq5QS%r#1CM1#Tthc@u<4wxoJT{VbK){w%dd{c4fz;`SZZ}X5zRlQiZM@ z`_7%2Gw00Q{P~?3>#;qu$`-b>#(LY>i6y(%cc*X8Hn!4JS?MRqp4vC7Wy|GL$a-Y2 z?VWwKQ~RhawQO(gi=D}j=t8#5AvW0yn=0asaHsate_siY94E3I`|pt~oFD6%dX{{j zO>ih4h|jw`QXtc9$F^H?Y% zRGv6{ls*@FZ%^@$dzlAD;~{vHj(v{zNu&F zDuy0`QhLNjr|=kSM(<30>SeDE_0W^fJ^2EGeW{i9b4XR_MjtL^?;Jlka)Ld1g%9VB z&aB;krwDjmXq6sYd#y~zP6+gYY}C(GP|aGkaTCB~*4Tp_F|cK>f Date: Tue, 17 Oct 2017 22:14:03 +0100 Subject: [PATCH 2/6] add failing test for non-utf8 encoding (#6339) Merge pull request 6339 --- .../_encodings}/UTF8CRLFandBOM.md | 0 .../_encodings}/Unicode16LECRLFandBOM.md | Bin test/test_document.rb | 29 ++++++++++++++++++ 3 files changed, 29 insertions(+) rename test/{fixtures => source/_encodings}/UTF8CRLFandBOM.md (100%) rename test/{fixtures => source/_encodings}/Unicode16LECRLFandBOM.md (100%) diff --git a/test/fixtures/UTF8CRLFandBOM.md b/test/source/_encodings/UTF8CRLFandBOM.md similarity index 100% rename from test/fixtures/UTF8CRLFandBOM.md rename to test/source/_encodings/UTF8CRLFandBOM.md diff --git a/test/fixtures/Unicode16LECRLFandBOM.md b/test/source/_encodings/Unicode16LECRLFandBOM.md similarity index 100% rename from test/fixtures/Unicode16LECRLFandBOM.md rename to test/source/_encodings/Unicode16LECRLFandBOM.md diff --git a/test/test_document.rb b/test/test_document.rb index 42ba3c5748b..6ce71230680 100644 --- a/test/test_document.rb +++ b/test/test_document.rb @@ -7,6 +7,15 @@ def assert_equal_value(key, one, other) assert_equal(one[key], other[key]) end + def setup_encoded_document(filename) + site = fixture_site("collections" => ["encodings"]) + site.process + Document.new(site.in_source_dir(File.join("_encodings", filename)), { + :site => site, + :collection => site.collections["encodings"], + }).tap(&:read) + end + context "a document in a collection" do setup do @site = fixture_site({ @@ -529,4 +538,24 @@ def assert_equal_value(key, one, other) assert_equal true, File.file?(@dest_file) end end + + context "a document with UTF-8 CLRF" do + setup do + @document = setup_encoded_document "UTF8CRLFandBOM.md" + end + + should "not throw an error" do + Jekyll::Renderer.new(@document.site, @document).render_document + end + end + + context "a document with UTF-16LE CLRF" do + setup do + @document = setup_encoded_document "Unicode16LECRLFandBOM.md" + end + + should "not throw an error" do + Jekyll::Renderer.new(@document.site, @document).render_document + end + end end From 69241f2707f3955690ff1612a6a8bb838946bf0f Mon Sep 17 00:00:00 2001 From: Parker Moore Date: Tue, 17 Oct 2017 17:48:20 -0400 Subject: [PATCH 3/6] Utils.merged_file_read_opts: use a Symbol instead of String for encoding --- lib/jekyll/utils.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/jekyll/utils.rb b/lib/jekyll/utils.rb index 70605a34c18..e5525a9b13f 100644 --- a/lib/jekyll/utils.rb +++ b/lib/jekyll/utils.rb @@ -301,8 +301,8 @@ def safe_glob(dir, patterns, flags = 0) # and a given param def merged_file_read_opts(site, opts) merged = (site ? site.file_read_opts : {}).merge(opts) - if merged["encoding"] && !merged["encoding"].start_with?("bom|") - merged["encoding"] = "bom|#{merged["encoding"]}" + if merged[:encoding] && !merged[:encoding].start_with?("bom|") + merged[:encoding] = "bom|#{merged[:encoding]}" end merged end From d88cafe74e266038933113b19fb38d32e77505bf Mon Sep 17 00:00:00 2001 From: Parker Moore Date: Tue, 17 Oct 2017 18:52:48 -0400 Subject: [PATCH 4/6] Handle both the String and Symbol key cases for prefixing BOM reading info --- lib/jekyll/utils.rb | 3 +++ test/test_utils.rb | 9 ++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/jekyll/utils.rb b/lib/jekyll/utils.rb index e5525a9b13f..2f5f55dcdcd 100644 --- a/lib/jekyll/utils.rb +++ b/lib/jekyll/utils.rb @@ -304,6 +304,9 @@ def merged_file_read_opts(site, opts) if merged[:encoding] && !merged[:encoding].start_with?("bom|") merged[:encoding] = "bom|#{merged[:encoding]}" end + if merged["encoding"] && !merged["encoding"].start_with?("bom|") + merged["encoding"] = "bom|#{merged["encoding"]}" + end merged end diff --git a/test/test_utils.rb b/test/test_utils.rb index 1b4d4813b66..bf919c7ed40 100644 --- a/test/test_utils.rb +++ b/test/test_utils.rb @@ -386,16 +386,19 @@ class TestUtils < JekyllUnitTest should "ignore encoding if it's not there" do opts = Utils.merged_file_read_opts(nil, {}) assert_nil opts["encoding"] + assert_nil opts[:encoding] end should "add bom to encoding" do - opts = Utils.merged_file_read_opts(nil, { "encoding" => "utf-8" }) + opts = Utils.merged_file_read_opts(nil, { "encoding" => "utf-8", encoding: "utf-8" }) assert_equal "bom|utf-8", opts["encoding"] + assert_equal "bom|utf-8", opts[:encoding] end should "preserve bom in encoding" do - opts = Utils.merged_file_read_opts(nil, { "encoding" => "bom|utf-8" }) - assert_equal "bom|utf-8", opts["encoding"] + opts = Utils.merged_file_read_opts(nil, { "encoding" => "bom|another", encoding: "bom|another" }) + assert_equal "bom|another", opts["encoding"] + assert_equal "bom|another", opts[:encoding] end end end From 5c91a1142d3c7c85f6d011c1a1c338ea6e7b5317 Mon Sep 17 00:00:00 2001 From: Parker Moore Date: Tue, 17 Oct 2017 18:53:07 -0400 Subject: [PATCH 5/6] While you're at it, just set this for site.read_file_opts --- lib/jekyll/site.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/jekyll/site.rb b/lib/jekyll/site.rb index 54155d0c8e2..1906a9b7297 100644 --- a/lib/jekyll/site.rb +++ b/lib/jekyll/site.rb @@ -445,6 +445,9 @@ def configure_include_paths def configure_file_read_opts self.file_read_opts = {} self.file_read_opts[:encoding] = config["encoding"] if config["encoding"] + if self.file_read_opts[:encoding] && !self.file_read_opts[:encoding].start_with?("bom|") + self.file_read_opts[:encoding] = "bom|#{self.file_read_opts[:encoding]}" + end end private From 6d20916a636b9611d4479d1a298bee964b29d585 Mon Sep 17 00:00:00 2001 From: Parker Moore Date: Tue, 17 Oct 2017 19:02:24 -0400 Subject: [PATCH 6/6] Fix some Rubocop errors. --- lib/jekyll/site.rb | 4 +--- test/test_utils.rb | 14 ++++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/jekyll/site.rb b/lib/jekyll/site.rb index 1906a9b7297..32946781aa4 100644 --- a/lib/jekyll/site.rb +++ b/lib/jekyll/site.rb @@ -445,9 +445,7 @@ def configure_include_paths def configure_file_read_opts self.file_read_opts = {} self.file_read_opts[:encoding] = config["encoding"] if config["encoding"] - if self.file_read_opts[:encoding] && !self.file_read_opts[:encoding].start_with?("bom|") - self.file_read_opts[:encoding] = "bom|#{self.file_read_opts[:encoding]}" - end + self.file_read_opts = Jekyll::Utils.merged_file_read_opts(self, {}) end private diff --git a/test/test_utils.rb b/test/test_utils.rb index bf919c7ed40..01c1d98c613 100644 --- a/test/test_utils.rb +++ b/test/test_utils.rb @@ -390,15 +390,17 @@ class TestUtils < JekyllUnitTest end should "add bom to encoding" do - opts = Utils.merged_file_read_opts(nil, { "encoding" => "utf-8", encoding: "utf-8" }) - assert_equal "bom|utf-8", opts["encoding"] - assert_equal "bom|utf-8", opts[:encoding] + opts = { "encoding" => "utf-8", :encoding => "utf-8" } + merged = Utils.merged_file_read_opts(nil, opts) + assert_equal "bom|utf-8", merged["encoding"] + assert_equal "bom|utf-8", merged[:encoding] end should "preserve bom in encoding" do - opts = Utils.merged_file_read_opts(nil, { "encoding" => "bom|another", encoding: "bom|another" }) - assert_equal "bom|another", opts["encoding"] - assert_equal "bom|another", opts[:encoding] + opts = { "encoding" => "bom|another", :encoding => "bom|another" } + merged = Utils.merged_file_read_opts(nil, opts) + assert_equal "bom|another", merged["encoding"] + assert_equal "bom|another", merged[:encoding] end end end