diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 9e02aa62b..52325a5ec 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -21,14 +21,12 @@ help="Whether to write a report under a unique path in the DANDI log directory.", ) @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True)) -@devel_debug_option() @map_to_click_exceptions def validate_bids( paths, schema, report, report_path, - devel_debug=False, ): """Validate BIDS paths. @@ -46,7 +44,6 @@ def validate_bids( report=report, report_path=report_path, schema_version=schema, - devel_debug=devel_debug, ) valid = is_valid(validator_result) report_errors(validator_result) diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index f97b8f10f..228508e08 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -7,7 +7,7 @@ def test_validate_bids_error(bids_examples): from ..cmd_validate import validate_bids expected_error = ( - "Summary: 2 filename patterns required by BIDS could not be found " + "Summary: 1 filename pattern required by BIDS could not be found " "and 1 filename did not match any pattern known to BIDS.\n" ) broken_dataset = os.path.join(bids_examples, "invalid_pet001") diff --git a/dandi/support/bids/__init__.py b/dandi/support/bids/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/dandi/support/bids/schema.py b/dandi/support/bids/schema.py deleted file mode 100644 index 2865d00bd..000000000 --- a/dandi/support/bids/schema.py +++ /dev/null @@ -1,153 +0,0 @@ -""" -THIS MODULE IS BUNDLED FROM THE bids-specification PACKAGE. -Schema loading- and processing-related functions. -""" -from copy import deepcopy -import logging -import os -from pathlib import Path - -from ruamel import yaml - -from . import utils - -lgr = utils.get_logger() -# Basic settings for output, for now just basic -utils.set_logger_level(lgr, os.environ.get("BIDS_SCHEMA_LOG_LEVEL", logging.INFO)) -logging.basicConfig(format="%(asctime)-15s [%(levelname)8s] %(message)s") - - -def _get_entry_name(path): - if path.suffix == ".yaml": - return path.name[:-5] # no .yaml - else: - return path.name - - -def dereference_yaml(schema, struct): - """Recursively search a dictionary-like object for $ref keys. - - Each $ref key is replaced with the contents of the referenced field in the overall - dictionary-like object. - """ - if isinstance(struct, dict): - if "$ref" in struct: - ref_field = struct["$ref"] - template = schema[ref_field] - struct.pop("$ref") - # Result is template object with local overrides - struct = {**template, **struct} - - struct = {key: dereference_yaml(schema, val) for key, val in struct.items()} - - elif isinstance(struct, list): - struct = [dereference_yaml(schema, item) for item in struct] - - return struct - - -def load_schema(schema_path): - """Load the schema into a dictionary. - - This function allows the schema, like BIDS itself, to be specified in - a hierarchy of directories and files. - File names (minus extensions) and directory names become keys - in the associative array (dict) of entries composed from content - of files and entire directories. - - Parameters - ---------- - schema_path : str - Folder containing yaml files or yaml file. - - Returns - ------- - dict - Schema in dictionary form. - """ - _yaml = yaml.YAML(typ="safe", pure=True) - - schema_path = Path(schema_path) - objects_dir = schema_path / "objects/" - rules_dir = schema_path / "rules/" - - if not objects_dir.is_dir() or not rules_dir.is_dir(): - raise ValueError( - f"Schema path or paths do not exist:\n\t{str(objects_dir)}\n\t{str(rules_dir)}" - ) - - schema = {} - schema["objects"] = {} - schema["rules"] = {} - - # Load object definitions. All are present in single files. - for object_group_file in sorted(objects_dir.glob("*.yaml")): - lgr.debug(f"Loading {object_group_file.stem} objects.") - dict_ = _yaml.load(object_group_file.read_text()) - schema["objects"][object_group_file.stem] = dereference_yaml(dict_, dict_) - - # Grab single-file rule groups - for rule_group_file in sorted(rules_dir.glob("*.yaml")): - lgr.debug(f"Loading {rule_group_file.stem} rules.") - dict_ = _yaml.load(rule_group_file.read_text()) - schema["rules"][rule_group_file.stem] = dereference_yaml(dict_, dict_) - - # Load folders of rule subgroups. - for rule_group_file in sorted(rules_dir.glob("*/*.yaml")): - rule = schema["rules"].setdefault(rule_group_file.parent.name, {}) - lgr.debug(f"Loading {rule_group_file.stem} rules.") - dict_ = _yaml.load(rule_group_file.read_text()) - rule[rule_group_file.stem] = dereference_yaml(dict_, dict_) - - return schema - - -def filter_schema(schema, **kwargs): - """Filter the schema based on a set of keyword arguments. - - Parameters - ---------- - schema : dict - The schema object, which is a dictionary with nested dictionaries and - lists stored within it. - kwargs : dict - Keyword arguments used to filter the schema. - Example kwargs that may be used include: "suffixes", "datatypes", - "extensions". - - Returns - ------- - new_schema : dict - The filtered version of the schema. - - Notes - ----- - This function calls itself recursively, in order to apply filters at - arbitrary depth. - - Warning - ------- - This function employs a *very* simple filter. It is very limited. - """ - new_schema = deepcopy(schema) - if isinstance(new_schema, dict): - # Reduce values in dict to only requested - for k, v in kwargs.items(): - if k in new_schema.keys(): - filtered_item = deepcopy(new_schema[k]) - if isinstance(filtered_item, dict): - filtered_item = { - k1: v1 for k1, v1 in filtered_item.items() if k1 in v - } - else: - filtered_item = [i for i in filtered_item if i in v] - new_schema[k] = filtered_item - - for k2, v2 in new_schema.items(): - new_schema[k2] = filter_schema(new_schema[k2], **kwargs) - - elif isinstance(new_schema, list): - for i, item in enumerate(new_schema): - if isinstance(item, dict): - new_schema[i] = filter_schema(item, **kwargs) - return new_schema diff --git a/dandi/support/bids/schemadata/1.7.0+012+dandi001/README.md b/dandi/support/bids/schemadata/1.7.0+012+dandi001/README.md deleted file mode 100644 index c3d142cba..000000000 --- a/dandi/support/bids/schemadata/1.7.0+012+dandi001/README.md +++ /dev/null @@ -1,500 +0,0 @@ -# BIDS-schema - -Portions of the BIDS specification are defined using YAML files, in order to -make the specification machine-readable. - -Currently, the portions of the specification that rely on this schema are -the entity tables, entity definitions, filename templates, and metadata tables. -Any changes to the specification should be mirrored in the schema. - -## The format of the schema - -The schema is divided into two parts: the object definitions and the rules. - -The object definitions (files in `objects/`) describe attributes of individual -objects or data types in the specification. -Common information in these files includes full names, descriptions, and -constraints on valid values. -These files **do not** describe how objects of different types -(for example file suffixes and file entities) interact with one another, or -whether objects are required in a given dataset or file. - -The rules (files in `rules/`) describe how objects related to one another, -as well as their requirement levels. - -## Object files - -The types of objects currently supported in the schema are: - -- modalities, -- datatypes, -- entities, -- suffixes, -- metadata, -- top-level files, -- and non-BIDS associated folders. - -Each of these object types has a single file in the `objects/` folder. - -- `modalities.yaml`: The modalities, or types of technology, used to acquire data in a BIDS dataset. - These modalities are not reflected directly in the specification. - For example, while both fMRI and DWI data are acquired with an MRI, - in a BIDS dataset they are stored in different folders reflecting the two different `datatypes`. - -- `datatypes.yaml`: Data types supported by the specification. - The only information provided in the file is: - - 1. a full list of valid BIDS datatypes - 1. each datatype's full name - 1. a free text description of the datatype. - -- `entities.yaml`: Entities (key/value pairs in folder and filenames). - -- `metadata.yaml`: All valid metadata fields that are explicitly supported in BIDS sidecar JSON files. - -- `columns.yaml`: All valid columns that are explicitly supported in BIDS TSV files. - -- `suffixes.yaml`: Valid file suffixes. - -- `top_level_files.yaml`: Valid top-level files which may appear in a BIDS dataset. - -- `associated_data.yaml`: Folders that may appear within a dataset folder without following BIDS rules. - -### On re-used objects with different definitions - -If an object may mean something different depending on where it is used within the specification, -then this must be reflected in the schema. -Specifically, each version of the object must have its own definition within the relevant file. -However, since object files are organized as dictionaries, each object must have a unique key. -Thus, we append a suffix to each re-used object's key in order to make it unique. -For objects with `CamelCase` names (for example, metadata fields), the suffix will start with a single underscore (`_`). -For objects with `snake_case` names, two underscores must be used. - -There should also be a comment near the object definition in the YAML file describing the nature of the different objects. - -For example, the TSV column `"reference"` means different things when used for EEG data, as compared to iEEG data. -As such, there are two definitions in `columns.yaml` for the `"reference"` column: `"reference__eeg"` and `"reference_ieeg"`. - -```yaml -# reference column for channels.tsv files for EEG data -reference__eeg: - name: reference - description: | - Name of the reference electrode(s). - This column is not needed when it is common to all channels. - In that case the reference electrode(s) can be specified in `*_eeg.json` as `EEGReference`). - type: string -# reference column for channels.tsv files for iEEG data -reference__ieeg: - name: reference - description: | - Specification of the reference (for example, 'mastoid', 'ElectrodeName01', 'intracranial', 'CAR', 'other', 'n/a'). - If the channel is not an electrode channel (for example, a microphone channel) use `n/a`. - anyOf: - - type: string - - type: string - enum: - - n/a -``` - -When adding new object definitions to the schema, -every effort should be made to find a shared, common definition for the term, should it already exist. -If the differences between two versions of the same object are subtle or driven by context, -then you can generally _append_ additional text to the object definition within the associated rendered table in the specification, -rather than creating a separate entry in the schema. - -### `modalities.yaml` - -This file contains a dictionary in which each modality is defined. -Keys are modality abbreviations (for example, `mri` for magnetic resonance imaging), -and each associated value is a dictionary with two keys: `name` and `description`. - -The `name` field is the full name of the modality. -The `description` field is a freeform description of the modality. - -### `datatypes.yaml` - -This file contains a dictionary in which each datatype is defined. -Keys are the folder names associated with each datatype (for example, `anat` for anatomical MRI), -and each associated value is a dictionary with two keys: `name` and `description`. - -The `name` field is the full name of the datatype. -The `description` field is a freeform description of the datatype. - -### `entities.yaml` - -This file contains a dictionary in which each entity (key/value pair in filenames) is defined. -Keys are long-form versions of the entities, which are distinct from both the entities as -they appear in filenames _and_ their full names. -For example, the key for the "Contrast Enhancing Agent" entity, which appears in filenames as `ce-