-
Notifications
You must be signed in to change notification settings - Fork 2.6k
/
test_json.py
71 lines (61 loc) 路 1.73 KB
/
test_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import textwrap
import pyarrow as pa
import pytest
from datasets.packaged_modules.json.json import Json
@pytest.fixture
def jsonl_file(tmp_path):
filename = tmp_path / "file.jsonl"
data = textwrap.dedent(
"""\
{"col_1": 1, "col_2": 2}
{"col_1": 10, "col_2": 20}
"""
)
with open(filename, "w") as f:
f.write(data)
return str(filename)
@pytest.fixture
def json_file_with_list_of_dicts(tmp_path):
filename = tmp_path / "file_with_list_of_dicts.json"
data = textwrap.dedent(
"""\
[
{"col_1": 1, "col_2": 2},
{"col_1": 10, "col_2": 20}
]
"""
)
with open(filename, "w") as f:
f.write(data)
return str(filename)
@pytest.fixture
def json_file_with_list_of_dicts_field(tmp_path):
filename = tmp_path / "file_with_list_of_dicts_field.json"
data = textwrap.dedent(
"""\
{
"field1": 1,
"field2": "aabb",
"field3": [
{"col_1": 1, "col_2": 2},
{"col_1": 10, "col_2": 20}
]
}
"""
)
with open(filename, "w") as f:
f.write(data)
return str(filename)
@pytest.mark.parametrize(
"file_fixture, config_kwargs",
[
("jsonl_file", {}),
("json_file_with_list_of_dicts", {}),
("json_file_with_list_of_dicts_field", {"field": "field3"}),
],
)
def test_json_generate_tables(file_fixture, config_kwargs, request):
json = Json(**config_kwargs)
generator = json._generate_tables([[request.getfixturevalue(file_fixture)]])
pa_table = pa.concat_tables([table for _, table in generator])
assert pa_table.to_pydict() == {"col_1": [1, 10], "col_2": [2, 20]}