-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'develop' into f/example-branch-id-pk-blog
* develop: (29 commits) [BUGFIX] pydantic>=1.10.4 - ImportError: cannot import name dataclass_transform (#7163) [MAINTENANCE] ZEP - update asset factories method signatures from asset models (#7096) Delete cli v012 tests. (#7159) [CONTRIB] added new Expectations - India_zip_code expectation and not_to_be_future_date expectation (#6086) [MAINTENANCE] Remove unused dockerfile (#7152) [DOCS] doc-464 consolidating and standardizing snippets (#7154) [BUGFIX] Patch broken rendered content Cloud tests (#7155) [MAINTENANCE] Clean up `mypy` violations in `CardinalityChecker` (#7146) [MAINTENANCE] Clean up pathlib.Path() usage in DataConnector utilities and restore tighter formatting in great_expectations/util.py (#7149) [MAINTENANCE] Change all instances of `create_expectation_suite` to `add_expectation_suite` in tests, docs, and source code (#7117) [BUGFIX] Parse pandas version correctly for development builds (#7147) [MAINTENANCE] Update V3 DataConnector utilities to support New Datasources (ZEP) (#7144) [BUGFIX] Patch inconsistent ordering within GCP test asserts (#7130) Refactor sql splitter to take selectable instead of str. (#7133) [BUGFIX] `TupleAzureBlobStoreBackend` no longer gives warning when obfuscating connection string (#7139) [MAINTENANCE] ruff 0.0.246 update (#7137) [MAINTENANCE] Output Consistent Data Format from "table.head" Metric for every ExecutionEngine (#7134) [BUGFIX] Copy previous versions after checking out the the current commit (#7142) [DOCS] Remove sitemap.xml (#7141) [MAINTENANCE] mypy `v1.0.0` (#7138) ...
- Loading branch information
Showing
273 changed files
with
4,424 additions
and
14,250 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,9 @@ | ||
black==22.3.0 # Linting / code style | ||
black[jupyter]==22.3.0 # Linting / code style | ||
Click>=7.1.2 # CLI tooling | ||
cookiecutter==1.7.3 # Project templating | ||
mypy==0.991 # Type checker | ||
mypy==1.0.0 # Type checker | ||
pydantic>=1.0,<2.0 # Needed for mypy plugin | ||
pytest>=5.3.5 # Test framework | ||
ruff==0.0.241 # Linting / code style | ||
ruff==0.0.246 # Linting / code style | ||
twine==3.7.1 # Packaging | ||
wheel==0.37.1 # Packaging |
187 changes: 187 additions & 0 deletions
187
...t_expectations_experimental/expectations/expect_column_values_after_split_to_be_in_set.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
""" | ||
This is a template for creating custom ColumnMapExpectations. | ||
For detailed instructions on how to use it, please see: | ||
https://docs.greatexpectations.io/docs/guides/expectations/creating_custom_expectations/how_to_create_custom_column_map_expectations | ||
""" | ||
|
||
from typing import List, Optional | ||
|
||
from great_expectations.core.expectation_configuration import ExpectationConfiguration | ||
from great_expectations.execution_engine import PandasExecutionEngine | ||
from great_expectations.expectations.expectation import ColumnMapExpectation | ||
from great_expectations.expectations.metrics import ( | ||
ColumnMapMetricProvider, | ||
column_condition_partial, | ||
) | ||
|
||
|
||
def are_values_after_split_in_value_set( | ||
val: str, delimiter: str, value_set: List[str] | ||
) -> bool: | ||
all_split_values = [v.strip() for v in val.split(delimiter)] | ||
|
||
for val in all_split_values: | ||
if val not in value_set: | ||
return False | ||
|
||
return True | ||
|
||
|
||
# This class defines a Metric to support your Expectation. | ||
# For most ColumnMapExpectations, the main business logic for calculation will live in this class. | ||
class ColumnValuesAfterSplitInSet(ColumnMapMetricProvider): | ||
|
||
# This is the id string that will be used to reference your metric. | ||
condition_metric_name = "column_values.values_after_split_in_set" | ||
condition_value_keys = ( | ||
"delimiter", | ||
"value_set", | ||
) | ||
|
||
# This method implements the core logic for the PandasExecutionEngine | ||
@column_condition_partial(engine=PandasExecutionEngine) | ||
def _pandas(cls, column, delimiter, value_set, **kwargs): | ||
value_set = set(value_set) | ||
return column.apply( | ||
lambda x: are_values_after_split_in_value_set(x, delimiter, value_set) | ||
) | ||
|
||
# This method defines the business logic for evaluating your metric when using a SqlAlchemyExecutionEngine | ||
# @column_condition_partial(engine=SqlAlchemyExecutionEngine) | ||
# def _sqlalchemy(cls, column, _dialect, **kwargs): | ||
# raise NotImplementedError | ||
|
||
# This method defines the business logic for evaluating your metric when using a SparkDFExecutionEngine | ||
# @column_condition_partial(engine=SparkDFExecutionEngine) | ||
# def _spark(cls, column, **kwargs): | ||
# raise NotImplementedError | ||
|
||
|
||
# This class defines the Expectation itself | ||
class ExpectColumnValuesAfterSplitToBeInSet(ColumnMapExpectation): | ||
"""Expect values in the column after splitting on a delimiter to be in a pre-defined set""" | ||
|
||
# These examples will be shown in the public gallery. | ||
# They will also be executed as unit tests for your Expectation. | ||
examples = [ | ||
{ | ||
"data": { | ||
"allowed_sports": [ | ||
"hockey,football", | ||
"cricket,tennis", | ||
"tennis,badminton,hockey", | ||
], | ||
"not_sports": ["cnn,hockey", "football", "badminton,judo,BBC"], | ||
}, | ||
"tests": [ | ||
{ | ||
"title": "basic_positive_test", | ||
"exact_match_out": False, | ||
"include_in_gallery": True, | ||
"in": { | ||
"column": "allowed_sports", | ||
"delimiter": ",", | ||
"value_set": [ | ||
"hockey", | ||
"football", | ||
"tennis", | ||
"badminton", | ||
"cricket", | ||
], | ||
}, | ||
"out": { | ||
"success": True, | ||
}, | ||
}, | ||
{ | ||
"title": "basic_negative_test", | ||
"exact_match_out": False, | ||
"include_in_gallery": True, | ||
"in": { | ||
"column": "not_sports", | ||
"delimiter": ",", | ||
"value_set": [ | ||
"hockey", | ||
"football", | ||
"tennis", | ||
"badminton", | ||
"cricket", | ||
], | ||
}, | ||
"out": { | ||
"success": False, | ||
}, | ||
}, | ||
{ | ||
"title": "postive_test_with_mostly", | ||
"exact_match_out": False, | ||
"include_in_gallery": True, | ||
"in": { | ||
"column": "not_sports", | ||
"delimiter": ",", | ||
"value_set": [ | ||
"hockey", | ||
"football", | ||
"tennis", | ||
"badminton", | ||
"cricket", | ||
], | ||
"mostly": 0.33, | ||
}, | ||
"out": { | ||
"success": True, | ||
}, | ||
}, | ||
], | ||
} | ||
] | ||
|
||
# This is the id string of the Metric used by this Expectation. | ||
# For most Expectations, it will be the same as the `condition_metric_name` defined in your Metric class above. | ||
map_metric = "column_values.values_after_split_in_set" | ||
|
||
# This is a list of parameter names that can affect whether the Expectation evaluates to True or False | ||
success_keys = ("mostly", "value_set", "delimiter") | ||
|
||
# This dictionary contains default values for any parameters that should have default values | ||
default_kwarg_values = {} | ||
|
||
def validate_configuration( | ||
self, configuration: Optional[ExpectationConfiguration] = None | ||
) -> None: | ||
""" | ||
Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that | ||
necessary configuration arguments have been provided for the validation of the expectation. | ||
Args: | ||
configuration (OPTIONAL[ExpectationConfiguration]): \ | ||
An optional Expectation Configuration entry that will be used to configure the expectation | ||
Returns: | ||
None. Raises InvalidExpectationConfigurationError if the config is not validated successfully | ||
""" | ||
|
||
super().validate_configuration(configuration) | ||
configuration = configuration or self.configuration | ||
|
||
# # Check other things in configuration.kwargs and raise Exceptions if needed | ||
# try: | ||
# assert ( | ||
# ... | ||
# ), "message" | ||
# assert ( | ||
# ... | ||
# ), "message" | ||
# except AssertionError as e: | ||
# raise InvalidExpectationConfigurationError(str(e)) | ||
|
||
# This object contains metadata for display in the public Gallery | ||
library_metadata = { | ||
"tags": ["pandas"], # Tags for this Expectation in the Gallery | ||
"contributors": [ # Github handles for all contributors to this Expectation. | ||
"@ace-racer", # Don't forget to add your github handle here! | ||
], | ||
} | ||
|
||
|
||
if __name__ == "__main__": | ||
ExpectColumnValuesAfterSplitToBeInSet().print_diagnostic_checklist() |
156 changes: 156 additions & 0 deletions
156
...t_expectations_experimental/expectations/expect_column_values_after_split_to_be_unique.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
""" | ||
This is a template for creating custom ColumnMapExpectations. | ||
For detailed instructions on how to use it, please see: | ||
https://docs.greatexpectations.io/docs/guides/expectations/creating_custom_expectations/how_to_create_custom_column_map_expectations | ||
""" | ||
|
||
from typing import Optional | ||
|
||
from great_expectations.core.expectation_configuration import ExpectationConfiguration | ||
from great_expectations.execution_engine import PandasExecutionEngine | ||
from great_expectations.expectations.expectation import ColumnMapExpectation | ||
from great_expectations.expectations.metrics import ( | ||
ColumnMapMetricProvider, | ||
column_condition_partial, | ||
) | ||
|
||
|
||
def are_values_after_split_unique(val: str, delimiter: str) -> bool: | ||
all_split_values = val.split(delimiter) | ||
unique_split_values = set(all_split_values) | ||
return len(all_split_values) == len(unique_split_values) | ||
|
||
|
||
# This class defines a Metric to support your Expectation. | ||
# For most ColumnMapExpectations, the main business logic for calculation will live in this class. | ||
class ColumnValuesAfterSplitAreUnique(ColumnMapMetricProvider): | ||
|
||
# This is the id string that will be used to reference your metric. | ||
condition_metric_name = "column_values.values_after_split_are_unique" | ||
condition_value_keys = ("delimiter",) | ||
|
||
# This method implements the core logic for the PandasExecutionEngine | ||
@column_condition_partial(engine=PandasExecutionEngine) | ||
def _pandas(cls, column, **kwargs): | ||
delimiter = kwargs.get("delimiter", ",") | ||
return column.apply(lambda x: are_values_after_split_unique(x, delimiter)) | ||
|
||
# This method defines the business logic for evaluating your metric when using a SqlAlchemyExecutionEngine | ||
# @column_condition_partial(engine=SqlAlchemyExecutionEngine) | ||
# def _sqlalchemy(cls, column, _dialect, **kwargs): | ||
# raise NotImplementedError | ||
|
||
# This method defines the business logic for evaluating your metric when using a SparkDFExecutionEngine | ||
# @column_condition_partial(engine=SparkDFExecutionEngine) | ||
# def _spark(cls, column, **kwargs): | ||
# raise NotImplementedError | ||
|
||
|
||
# This class defines the Expectation itself | ||
class ExpectColumnValuesAfterSplitToBeUnique(ColumnMapExpectation): | ||
"""Expect values in the column after splitting on a delimiter to be unique""" | ||
|
||
# These examples will be shown in the public gallery. | ||
# They will also be executed as unit tests for your Expectation. | ||
examples = [ | ||
{ | ||
"data": { | ||
"unique_sports": [ | ||
"hockey,football", | ||
"cricket", | ||
"tennis,badminton,hockey", | ||
], | ||
"duplicate_sports": [ | ||
"foosball,hockey", | ||
"football,football", | ||
"badminton,judo", | ||
], | ||
}, | ||
"tests": [ | ||
{ | ||
"title": "basic_positive_test", | ||
"exact_match_out": False, | ||
"include_in_gallery": True, | ||
"in": {"column": "unique_sports", "delimiter": ","}, | ||
"out": { | ||
"success": True, | ||
}, | ||
}, | ||
{ | ||
"title": "basic_negative_test", | ||
"exact_match_out": False, | ||
"include_in_gallery": True, | ||
"in": {"column": "duplicate_sports", "delimiter": ","}, | ||
"out": { | ||
"success": False, | ||
}, | ||
}, | ||
{ | ||
"title": "positive_test_with_mostly", | ||
"exact_match_out": False, | ||
"include_in_gallery": True, | ||
"in": { | ||
"column": "duplicate_sports", | ||
"delimiter": ",", | ||
"mostly": 0.66, | ||
}, | ||
"out": { | ||
"success": True, | ||
}, | ||
}, | ||
], | ||
} | ||
] | ||
|
||
# This is the id string of the Metric used by this Expectation. | ||
# For most Expectations, it will be the same as the `condition_metric_name` defined in your Metric class above. | ||
map_metric = "column_values.values_after_split_are_unique" | ||
|
||
# This is a list of parameter names that can affect whether the Expectation evaluates to True or False | ||
success_keys = ( | ||
"mostly", | ||
"delimiter", | ||
) | ||
|
||
# This dictionary contains default values for any parameters that should have default values | ||
default_kwarg_values = {} | ||
|
||
def validate_configuration( | ||
self, configuration: Optional[ExpectationConfiguration] = None | ||
) -> None: | ||
""" | ||
Validates that a configuration has been set, and sets a configuration if it has yet to be set. Ensures that | ||
necessary configuration arguments have been provided for the validation of the expectation. | ||
Args: | ||
configuration (OPTIONAL[ExpectationConfiguration]): \ | ||
An optional Expectation Configuration entry that will be used to configure the expectation | ||
Returns: | ||
None. Raises InvalidExpectationConfigurationError if the config is not validated successfully | ||
""" | ||
|
||
super().validate_configuration(configuration) | ||
configuration = configuration or self.configuration | ||
|
||
# # Check other things in configuration.kwargs and raise Exceptions if needed | ||
# try: | ||
# assert ( | ||
# ... | ||
# ), "message" | ||
# assert ( | ||
# ... | ||
# ), "message" | ||
# except AssertionError as e: | ||
# raise InvalidExpectationConfigurationError(str(e)) | ||
|
||
# This object contains metadata for display in the public Gallery | ||
library_metadata = { | ||
"tags": ["pandas"], # Tags for this Expectation in the Gallery | ||
"contributors": [ # Github handles for all contributors to this Expectation. | ||
"@ace-racer", # Don't forget to add your github handle here! | ||
], | ||
} | ||
|
||
|
||
if __name__ == "__main__": | ||
ExpectColumnValuesAfterSplitToBeUnique().print_diagnostic_checklist() |
Oops, something went wrong.