Skip to content

Commit

Permalink
Allow Configurable Converters on CSV
Browse files Browse the repository at this point in the history
Lets the user specify which converters to pass to `CSV`, defaulting to `nil` if not present in config. This is specifically so that numeric values in a CSV file can be parsed as something other than strings.
  • Loading branch information
MichaelCordingley committed Mar 17, 2022
1 parent 62ef158 commit 7346b59
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 7 deletions.
51 changes: 44 additions & 7 deletions lib/jekyll/readers/data_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,9 @@ def read_data_file(path)

case File.extname(path).downcase
when ".csv"
CSV.read(path,
:headers => true,
:encoding => site.config["encoding"]).map(&:to_hash)
CSV.read(path, **csv_config).map { |row| convert_row(row) }
when ".tsv"
CSV.read(path,
:col_sep => "\t",
:headers => true,
:encoding => site.config["encoding"]).map(&:to_hash)
CSV.read(path, **tsv_config).map { |row| convert_row(row) }
else
SafeYAML.load_file(path)
end
Expand All @@ -76,5 +71,47 @@ def sanitize_filename(name)
name.gsub(%r![^\w\s-]+|(?<=^|\b\s)\s+(?=$|\s?\b)!, "")
.gsub(%r!\s+!, "_")
end

private

# @return [Hash]
def csv_config
@csv_config ||= read_config("csv_reader")
end

# @return [Hash]
def tsv_config
@tsv_config ||= read_config("tsv_reader", { col_sep: "\t" })
end

# @param config_key [String]
# @param overrides [Hash]
# @return [Hash]
# @see https://ruby-doc.org/stdlib-2.6.1/libdoc/csv/rdoc/CSV.html#Converters
def read_config(config_key, overrides = {})
reader_config = (config[config_key] || {}).transform_keys(&:to_sym)

defaults = {
:converters => reader_config.fetch(:csv_converters, []).map(&:to_sym),
:headers => reader_config.fetch(:headers, true),
:encoding => reader_config.fetch(:encoding, config["encoding"])
}

defaults.merge(overrides)
end

def config
@config ||= site.config
end

# @param row [Array, CSV::Row]
# @return [Array, Hash]
def convert_row(row)
if row.instance_of? CSV::Row
return row.to_hash
end

row
end
end
end
3 changes: 3 additions & 0 deletions test/fixtures/sample.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id,field_a
1,"foo"
2,"bar"
3 changes: 3 additions & 0 deletions test/fixtures/sample.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id field_a
1 "foo"
2 "bar"
43 changes: 43 additions & 0 deletions test/test_data_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,47 @@ class TestDataReader < JekyllUnitTest
)
end
end

context "with no csv options set" do
setup do
@reader = DataReader.new(fixture_site)
@parsed = [{ "id" => "1", "field_a" => "foo" }, { "id" => "2", "field_a" => "bar" }]
end

should "parse CSV normally" do
assert_equal @parsed, @reader.read_data_file(File.expand_path("fixtures/sample.csv", __dir__))
end

should "parse TSV normally" do
assert_equal @parsed, @reader.read_data_file(File.expand_path("fixtures/sample.tsv", __dir__))
end
end

context "with csv options set" do
setup do
reader_config = {
"csv_converters" => [:numeric],
"headers" => false,
}

@reader = DataReader.new(
fixture_site(
{
"csv_reader" => reader_config,
"tsv_reader" => reader_config,
}
)
)

@parsed = [%w[id field_a], [1, "foo"], [2, "bar"]]
end

should "parse CSV with options" do
assert_equal @parsed, @reader.read_data_file(File.expand_path("fixtures/sample.csv", __dir__))
end

should "parse TSV with options" do
assert_equal @parsed, @reader.read_data_file(File.expand_path("fixtures/sample.tsv", __dir__))
end
end
end

0 comments on commit 7346b59

Please sign in to comment.