From a1df085241034c3a7baab795b38d724983b17f68 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Fri, 23 Sep 2022 20:01:40 +0200 Subject: [PATCH] fix utf-8 yaml dump --- src/datasets/utils/metadata.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/datasets/utils/metadata.py b/src/datasets/utils/metadata.py index 3e5a35eaeff..c4c52d29044 100644 --- a/src/datasets/utils/metadata.py +++ b/src/datasets/utils/metadata.py @@ -87,13 +87,15 @@ def from_yaml_string(cls, string: str) -> "DatasetMetadata": return cls(**metadata_dict) def to_yaml_string(self) -> str: - return yaml.dump( + return yaml.safe_dump( { (key.replace("_", "-") if key in self._FIELDS_WITH_DASHES else key): value for key, value in self.items() }, sort_keys=False, - ) + allow_unicode=True, + encoding="utf-8", + ).decode("utf-8") if __name__ == "__main__":