From df83814d4fc7a993ecc379f2ca2e72f79b2cbc53 Mon Sep 17 00:00:00 2001 From: janssenhenning Date: Tue, 22 Feb 2022 10:50:20 +0100 Subject: [PATCH] lxml 4.8 includes a patch (by me :)) that allows pathlib Paths for all of the API https://github.com/lxml/lxml/pull/337 This allows to eliminate some corner cases in the fleur parser --- masci_tools/cmdline/commands/fleur_schema.py | 2 +- masci_tools/io/io_fleurxml.py | 10 -------- .../parsers/fleur_schema/inpschema_todict.py | 7 +++--- .../parsers/fleur_schema/outschema_todict.py | 7 +++--- .../io/parsers/fleur_schema/schema_dict.py | 22 +++++++---------- pyproject.toml | 2 +- tests/cmdline/conftest.py | 24 +++++++++---------- 7 files changed, 29 insertions(+), 45 deletions(-) diff --git a/masci_tools/cmdline/commands/fleur_schema.py b/masci_tools/cmdline/commands/fleur_schema.py index 8e79e8db9..3cb4b7313 100644 --- a/masci_tools/cmdline/commands/fleur_schema.py +++ b/masci_tools/cmdline/commands/fleur_schema.py @@ -98,7 +98,7 @@ def add_fleur_schema(schema_file, test_xml_file, overwrite, branch, api_key, fro project.files.raw(file_path=f'io/xml/{file_name}', ref=branch, streamed=True, action=f.write) echo.echo_success('Download successful') - xmlschema = etree.parse(os.fspath(schema_file)) + xmlschema = etree.parse(schema_file) xmlschema, _ = clear_xml(xmlschema) namespaces = {'xsd': 'http://www.w3.org/2001/XMLSchema'} diff --git a/masci_tools/io/io_fleurxml.py b/masci_tools/io/io_fleurxml.py index d9a9dcde1..15eced4cf 100644 --- a/masci_tools/io/io_fleurxml.py +++ b/masci_tools/io/io_fleurxml.py @@ -50,13 +50,8 @@ def load_inpxml(inpxmlfile: XMLFileLike, 'Setting it to the current working directory.' 'If the tree contains xinclude tags these could fail') base_url = os.getcwd() - elif isinstance(base_url, Path): - base_url = os.fspath(base_url.resolve()) xml_parse_func = partial(xml_parse_func, base_url=base_url) - if isinstance(inpxmlfile, Path): - inpxmlfile = os.fspath(inpxmlfile) - if isinstance(inpxmlfile, etree._ElementTree): xmltree = inpxmlfile else: @@ -116,13 +111,8 @@ def load_outxml(outxmlfile: XMLFileLike, 'Setting it to the current working directory.' 'If the tree contains xinclude tags these could fail') base_url = os.getcwd() - elif isinstance(base_url, Path): - base_url = os.fspath(base_url.resolve()) xml_parse_func = partial(xml_parse_func, base_url=base_url) - if isinstance(outxmlfile, Path): - outxmlfile = os.fspath(outxmlfile) - outfile_broken = False if isinstance(outxmlfile, etree._ElementTree): diff --git a/masci_tools/io/parsers/fleur_schema/inpschema_todict.py b/masci_tools/io/parsers/fleur_schema/inpschema_todict.py index f8c0befe1..c2e04c576 100644 --- a/masci_tools/io/parsers/fleur_schema/inpschema_todict.py +++ b/masci_tools/io/parsers/fleur_schema/inpschema_todict.py @@ -15,11 +15,12 @@ """ from __future__ import annotations +import os from .fleur_schema_parser_functions import * #pylint: disable=unused-wildcard-import from masci_tools.util.xml.common_functions import clear_xml from masci_tools.util.case_insensitive_dict import CaseInsensitiveDict, CaseInsensitiveFrozenSet from masci_tools.util.lockable_containers import LockableDict, LockableList -from typing import AnyStr, Callable +from typing import Callable try: from typing import TypedDict, Literal except ImportError: @@ -49,7 +50,7 @@ class InputSchemaData(TypedDict, total=False): 'unique_path_attribs', 'other_attribs', 'omitt_contained_tags', 'tag_info'] -def create_inpschema_dict(path: AnyStr, apply_patches: bool = True) -> InputSchemaData: +def create_inpschema_dict(path: os.PathLike, apply_patches: bool = True) -> InputSchemaData: """ Creates dictionary with information about the FleurInputSchema.xsd. The functions, whose results are added to the schema_dict and the corresponding keys @@ -76,7 +77,7 @@ def create_inpschema_dict(path: AnyStr, apply_patches: bool = True) -> InputSche } schema_patches = [convert_string_to_float_expr, patch_forcetheorem_attributes, patch_text_types] - xmlschema = etree.parse(path) + xmlschema = etree.parse(path) #type:ignore xmlschema, _ = clear_xml(xmlschema) xmlschema_evaluator = etree.XPathEvaluator(xmlschema, namespaces=NAMESPACES) diff --git a/masci_tools/io/parsers/fleur_schema/outschema_todict.py b/masci_tools/io/parsers/fleur_schema/outschema_todict.py index 1872a6b3f..87d8c160e 100644 --- a/masci_tools/io/parsers/fleur_schema/outschema_todict.py +++ b/masci_tools/io/parsers/fleur_schema/outschema_todict.py @@ -21,8 +21,9 @@ from masci_tools.util.lockable_containers import LockableDict, LockableList from lxml import etree import copy +import os from collections import UserList -from typing import AnyStr, Callable +from typing import Callable try: from typing import TypedDict, Literal except ImportError: @@ -63,7 +64,7 @@ class OutputSchemaData(TypedDict, total=False): 'omitt_contained_tags', 'tag_info', 'iteration_tag_info'] -def create_outschema_dict(path: AnyStr, +def create_outschema_dict(path: os.PathLike, inpschema_dict: inpschema_todict.InputSchemaData, apply_patches: bool = True) -> OutputSchemaData: """ @@ -99,7 +100,7 @@ def create_outschema_dict(path: AnyStr, schema_patches = [fix_qpoints_typo, patch_text_types] #print(f'processing: {path}/FleurOutputSchema.xsd') - xmlschema = etree.parse(path) + xmlschema = etree.parse(path) #type: ignore xmlschema, _ = clear_xml(xmlschema) xmlschema_evaluator = etree.XPathEvaluator(xmlschema, namespaces=NAMESPACES) diff --git a/masci_tools/io/parsers/fleur_schema/schema_dict.py b/masci_tools/io/parsers/fleur_schema/schema_dict.py index b938c22a2..3e29d0017 100644 --- a/masci_tools/io/parsers/fleur_schema/schema_dict.py +++ b/masci_tools/io/parsers/fleur_schema/schema_dict.py @@ -681,11 +681,8 @@ def fromPath(cls, path: os.PathLike) -> InputSchemaDict: :return: InputSchemaDict object with the information for the provided file """ - fspath = os.fspath(path) - schema_dict = create_inpschema_dict(fspath) - - xmlschema_doc = etree.parse(fspath) - xmlschema = etree.XMLSchema(xmlschema_doc) + schema_dict = create_inpschema_dict(path) + xmlschema = etree.XMLSchema(file=path) #type:ignore return cls(schema_dict, xmlschema=xmlschema) @@ -845,25 +842,22 @@ def fromPath(cls, if inp_path is None: inp_path = Path(path).parent / 'FleurInputSchema.xsd' - fspath = os.fspath(path) - fsinp_path = os.fspath(inp_path) - if inpschema_dict is None: - inpschema_dict = create_inpschema_dict(fsinp_path) #type:ignore + inpschema_dict = create_inpschema_dict(inp_path) #type:ignore inpschema_data = cast(InputSchemaData, inpschema_dict) - schema_dict = create_outschema_dict(fspath, inpschema_dict=inpschema_data) + schema_dict = create_outschema_dict(path, inpschema_dict=inpschema_data) schema_dict = merge_schema_dicts(inpschema_data, schema_dict) with tempfile.TemporaryDirectory() as td: td_path = Path(td) temp_input_schema_path = td_path / 'FleurInputSchema.xsd' - shutil.copy(fsinp_path, temp_input_schema_path) + shutil.copy(inp_path, temp_input_schema_path) temp_output_schema_path = td_path / 'FleurOutputSchema.xsd' - shutil.copy(fspath, temp_output_schema_path) - xmlschema_doc = etree.parse(os.fspath(temp_output_schema_path)) - xmlschema = etree.XMLSchema(xmlschema_doc) + shutil.copy(path, temp_output_schema_path) + + xmlschema = etree.XMLSchema(file=temp_output_schema_path) #type:ignore return cls(schema_dict, xmlschema=xmlschema) diff --git a/pyproject.toml b/pyproject.toml index 895835362..4fd6b77ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ 'matplotlib', 'h5py', 'pandas', - 'lxml>=4.5', + 'lxml~=4.8', 'more_itertools', 'seaborn', 'deepdiff', diff --git a/tests/cmdline/conftest.py b/tests/cmdline/conftest.py index 54a730ffb..d172477fa 100644 --- a/tests/cmdline/conftest.py +++ b/tests/cmdline/conftest.py @@ -9,7 +9,7 @@ @pytest.fixture -def fake_schemas_and_test_files(tmp_path): +def fake_schemas_and_test_files(tmp_path, test_file): """ Helper fixture for add fleur schema tests @@ -33,7 +33,7 @@ def dummy_migration(definitions): schema_file = schema_folder / 'FleurInputSchema.xsd' - inputschema = etree.parse(os.fspath(schema_file)) + inputschema = etree.parse(schema_file) namespaces = {'xsd': 'http://www.w3.org/2001/XMLSchema'} root = inputschema.xpath('/xsd:schema', namespaces=namespaces)[0] root.attrib['version'] = '0.01' @@ -41,11 +41,11 @@ def dummy_migration(definitions): namespaces=namespaces)[0] root.attrib['value'] = '0.01' - inputschema.write(os.fspath(tmp_path / 'FleurInputSchema.xsd'), encoding='utf-8', pretty_print=True) + inputschema.write(tmp_path / 'FleurInputSchema.xsd', encoding='utf-8', pretty_print=True) schema_file = schema_folder / 'FleurOutputSchema.xsd' - outputschema = etree.parse(os.fspath(schema_file)) + outputschema = etree.parse(schema_file) namespaces = {'xsd': 'http://www.w3.org/2001/XMLSchema'} root = outputschema.xpath('/xsd:schema', namespaces=namespaces)[0] root.attrib['version'] = '0.01' @@ -53,23 +53,21 @@ def dummy_migration(definitions): namespaces=namespaces)[0] root.attrib['value'] = '0.01' - outputschema.write(os.fspath(tmp_path / 'FleurOutputSchema.xsd'), encoding='utf-8', pretty_print=True) + outputschema.write(tmp_path / 'FleurOutputSchema.xsd', encoding='utf-8', pretty_print=True) - xml_file = Path(__file__).parent.resolve() / Path('../files/fleur/Max-R5/SiLOXML/files/inp.xml') - - xmltree = etree.parse(os.fspath(xml_file)) + xml_file = test_file('fleur/Max-R5/SiLOXML/files/inp.xml') + xmltree = etree.parse(xml_file) root = xmltree.xpath('/fleurInput')[0] root.attrib['fleurInputVersion'] = '0.01' - xmltree.write(os.fspath(tmp_path / 'inp.xml'), encoding='utf-8', pretty_print=True) - - xml_file = Path(__file__).parent.resolve() / Path('../files/fleur/Max-R5/SiLOXML/files/out.xml') + xmltree.write(tmp_path / 'inp.xml', encoding='utf-8', pretty_print=True) - xmltree = etree.parse(os.fspath(xml_file)) + xml_file = test_file('fleur/Max-R5/SiLOXML/files/out.xml') + xmltree = etree.parse(xml_file) root = xmltree.xpath('/fleurOutput')[0] root.attrib['fleurOutputVersion'] = '0.01' root = xmltree.xpath('//fleurInput')[0] root.attrib['fleurInputVersion'] = '0.01' - xmltree.write(os.fspath(tmp_path / 'out.xml'), encoding='utf-8', pretty_print=True) + xmltree.write(tmp_path / 'out.xml', encoding='utf-8', pretty_print=True) try: yield tmp_path