diff --git a/src/neptune/new/attributes/namespace.py b/src/neptune/new/attributes/namespace.py index 1e6c93f54..856e89fe7 100644 --- a/src/neptune/new/attributes/namespace.py +++ b/src/neptune/new/attributes/namespace.py @@ -13,14 +13,18 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import argparse from collections.abc import MutableMapping from typing import ( TYPE_CHECKING, Any, + Collection, Dict, + Iterable, Iterator, List, Mapping, + Optional, Union, ) @@ -63,6 +67,19 @@ def __len__(self) -> int: def __iter__(self) -> Iterator[str]: yield from self._attributes.__iter__() + def extend( + self, + value: Union[Any, Iterable[Any]], + steps: Optional[Collection[float]] = None, + timestamps: Optional[Collection[float]] = None, + wait: bool = False, + **kwargs, + ) -> None: + if not isinstance(value, NamespaceVal): + value = NamespaceVal(value) + for k, v in value.value.items(): + self._container[f"{self._str_path}/{k}"].extend(v, steps, timestamps, wait, **kwargs) + def to_dict(self) -> Dict[str, Any]: result = {} for key, value in self._attributes.items(): @@ -73,7 +90,9 @@ def to_dict(self) -> Dict[str, Any]: return result def assign(self, value: Union[NamespaceVal, dict, Mapping], wait: bool = False): - if not isinstance(value, NamespaceVal): + if isinstance(value, argparse.Namespace): + value = NamespaceVal(vars(value)) + elif not isinstance(value, NamespaceVal): value = NamespaceVal(value) for k, v in value.value.items(): diff --git a/src/neptune/new/attributes/series/file_series.py b/src/neptune/new/attributes/series/file_series.py index 619860f9d..e1d20e579 100644 --- a/src/neptune/new/attributes/series/file_series.py +++ b/src/neptune/new/attributes/series/file_series.py @@ -35,30 +35,26 @@ ) from neptune.new.internal.types.file_types import FileType from neptune.new.internal.utils import base64_encode -from neptune.new.internal.utils.iteration import get_batches from neptune.new.internal.utils.limits import image_size_exceeds_limit_for_logging from neptune.new.types import File from neptune.new.types.series.file_series import FileSeries as FileSeriesVal Val = FileSeriesVal Data = File +LogOperation = LogImages -class FileSeries(Series[Val, Data]): - def _get_log_operations_from_value(self, value: Val, step: Optional[float], timestamp: float) -> List[Operation]: - values = [ - LogImages.ValueType( - ImageValue( - data=self._get_base64_image_content(val), - name=value.name, - description=value.description, - ), - step=step, - ts=timestamp, +class FileSeries(Series[Val, Data, LogOperation], max_batch_size=1, operation_cls=LogOperation): + @classmethod + def _map_series_val(cls, value: Val) -> List[ImageValue]: + return [ + ImageValue( + data=cls._get_base64_image_content(val), + name=value.name, + description=value.description, ) for val in value.values ] - return [LogImages(self._path, chunk) for chunk in get_batches(values, batch_size=1)] def _get_clear_operation(self) -> Operation: return ClearImageLog(self._path) diff --git a/src/neptune/new/attributes/series/float_series.py b/src/neptune/new/attributes/series/float_series.py index 09483b152..22660ad36 100644 --- a/src/neptune/new/attributes/series/float_series.py +++ b/src/neptune/new/attributes/series/float_series.py @@ -15,7 +15,6 @@ # from typing import ( Iterable, - List, Optional, Union, ) @@ -30,15 +29,17 @@ Operation, ) from neptune.new.internal.utils import verify_type -from neptune.new.internal.utils.iteration import get_batches from neptune.new.internal.utils.logger import logger from neptune.new.types.series.float_series import FloatSeries as FloatSeriesVal Val = FloatSeriesVal Data = Union[float, int] +LogOperation = LogFloats -class FloatSeries(Series[Val, Data], FetchableSeries[FloatSeriesValues]): +class FloatSeries( + Series[Val, Data, LogOperation], FetchableSeries[FloatSeriesValues], max_batch_size=100, operation_cls=LogOperation +): def configure( self, min: Optional[Union[float, int]] = None, @@ -52,10 +53,6 @@ def configure( with self._container.lock(): self._enqueue_operation(ConfigFloatSeries(self._path, min, max, unit), wait) - def _get_log_operations_from_value(self, value: Val, step: Optional[float], timestamp: float) -> List[Operation]: - values = [LogFloats.ValueType(val, step=step, ts=timestamp) for val in value.values] - return [LogFloats(self._path, chunk) for chunk in get_batches(values, batch_size=100)] - def _get_clear_operation(self) -> Operation: return ClearFloatLog(self._path) diff --git a/src/neptune/new/attributes/series/series.py b/src/neptune/new/attributes/series/series.py index dc9c53de3..f8d83cb04 100644 --- a/src/neptune/new/attributes/series/series.py +++ b/src/neptune/new/attributes/series/series.py @@ -15,7 +15,9 @@ # import abc import time +from itertools import cycle from typing import ( + Collection, Generic, Iterable, List, @@ -25,34 +27,60 @@ ) from neptune.new.attributes.attribute import Attribute -from neptune.new.internal.operation import Operation +from neptune.new.internal.operation import LogOperation from neptune.new.internal.utils import ( is_collection, + verify_collection_type, verify_type, ) +from neptune.new.internal.utils.iteration import get_batches from neptune.new.types.series.series import Series as SeriesVal -Val = TypeVar("Val", bound=SeriesVal) -Data = TypeVar("Data") +ValTV = TypeVar("ValTV", bound=SeriesVal) +DataTV = TypeVar("DataTV") +LogOperationTV = TypeVar("LogOperationTV", bound=LogOperation) -class Series(Attribute, Generic[Val, Data]): +class Series(Attribute, Generic[ValTV, DataTV, LogOperationTV]): + def __init_subclass__(cls, max_batch_size: int, operation_cls: type(LogOperationTV)): + cls.max_batch_size = max_batch_size + cls.operation_cls = operation_cls + def clear(self, wait: bool = False) -> None: self._clear_impl(wait) - @abc.abstractmethod - def _get_log_operations_from_value(self, value: Val, step: Optional[float], timestamp: float) -> List[Operation]: - pass + def _get_log_operations_from_value( + self, value: ValTV, *, steps: Union[None, Collection[float]], timestamps: Union[None, Collection[float]] + ) -> List[LogOperationTV]: + if steps is None: + steps = cycle([None]) + else: + assert len(value) == len(steps) + if timestamps is None: + timestamps = cycle([time.time()]) + else: + assert len(value) == len(timestamps) + + mapped_values = self._map_series_val(value) + values_with_step_and_ts = zip(mapped_values, steps, timestamps) + log_values = [self.operation_cls.ValueType(val, step=step, ts=ts) for val, step, ts in values_with_step_and_ts] + return [ + self.operation_cls(self._path, chunk) for chunk in get_batches(log_values, batch_size=self.max_batch_size) + ] - def _get_config_operation_from_value(self, value: Val) -> Optional[Operation]: + @classmethod + def _map_series_val(cls, value: ValTV) -> List[DataTV]: + return value.values + + def _get_config_operation_from_value(self, value: ValTV) -> Optional[LogOperationTV]: return None @abc.abstractmethod - def _get_clear_operation(self) -> Operation: + def _get_clear_operation(self) -> LogOperationTV: pass @abc.abstractmethod - def _data_to_value(self, values: Iterable, **kwargs) -> Val: + def _data_to_value(self, values: Iterable, **kwargs) -> ValTV: pass @abc.abstractmethod @@ -71,19 +99,19 @@ def assign(self, value, wait: bool = False) -> None: self._enqueue_operation(clear_op, wait=wait) else: self._enqueue_operation(clear_op, wait=False) - ts = time.time() - ops = self._get_log_operations_from_value(value, None, ts) + ops = self._get_log_operations_from_value(value, steps=None, timestamps=None) for op in ops: self._enqueue_operation(op, wait=wait) def log( self, - value: Union[Data, Iterable[Data]], + value: Union[DataTV, Iterable[DataTV]], step: Optional[float] = None, timestamp: Optional[float] = None, wait: bool = False, **kwargs, ) -> None: + """log is a deprecated method, this code should be removed in future""" if is_collection(value): if step is not None and len(value) > 1: raise ValueError("Collection of values are not supported for explicitly defined 'step'.") @@ -96,10 +124,37 @@ def log( if timestamp is not None: verify_type("timestamp", timestamp, (float, int)) - if not timestamp: - timestamp = time.time() + steps = None if step is None else [step] + timestamps = None if timestamp is None else [timestamp] * len(value) + + ops = self._get_log_operations_from_value(value, steps=steps, timestamps=timestamps) - ops = self._get_log_operations_from_value(value, step, timestamp) + with self._container.lock(): + for op in ops: + self._enqueue_operation(op, wait) + + def extend( + self, + values: Collection[DataTV], + steps: Optional[Collection[float]] = None, + timestamps: Optional[Collection[float]] = None, + wait: bool = False, + **kwargs, + ) -> None: + value = self._data_to_value(values, **kwargs) + + if steps is not None: + verify_collection_type("steps", steps, (float, int)) + if len(steps) != len(values): + raise ValueError(f"Number of steps must be equal to number of values ({len(steps)} != {len(values)}") + if timestamps is not None: + verify_collection_type("timestamps", timestamps, (float, int)) + if len(timestamps) != len(values): + raise ValueError( + f"Number of timestamps must be equal to number of values ({len(timestamps)} != {len(values)}" + ) + + ops = self._get_log_operations_from_value(value, steps=steps, timestamps=timestamps) with self._container.lock(): for op in ops: diff --git a/src/neptune/new/attributes/series/string_series.py b/src/neptune/new/attributes/series/string_series.py index 270a241a5..b81861e83 100644 --- a/src/neptune/new/attributes/series/string_series.py +++ b/src/neptune/new/attributes/series/string_series.py @@ -15,9 +15,10 @@ # from typing import ( TYPE_CHECKING, + Collection, Iterable, List, - Optional, + Union, ) from neptune.new.attributes.series.fetchable_series import FetchableSeries @@ -28,9 +29,9 @@ LogStrings, Operation, ) -from neptune.new.internal.utils.iteration import get_batches from neptune.new.internal.utils.logger import logger from neptune.new.internal.utils.paths import path_to_str +from neptune.new.types.series.string_series import MAX_STRING_SERIES_VALUE_LENGTH from neptune.new.types.series.string_series import StringSeries as StringSeriesVal if TYPE_CHECKING: @@ -38,18 +39,20 @@ Val = StringSeriesVal Data = str +LogOperation = LogStrings -MAX_STRING_SERIES_VALUE_LENGTH = 1000 - -class StringSeries(Series[Val, Data], FetchableSeries[StringSeriesValues]): +class StringSeries( + Series[Val, Data, LogOperation], FetchableSeries[StringSeriesValues], max_batch_size=10, operation_cls=LogOperation +): def __init__(self, container: "MetadataContainer", path: List[str]): super().__init__(container, path) self._value_truncation_occurred = False - def _get_log_operations_from_value(self, value: Val, step: Optional[float], timestamp: float) -> List[Operation]: - values = [v[:MAX_STRING_SERIES_VALUE_LENGTH] for v in value.values] - if not self._value_truncation_occurred and any([len(v) > MAX_STRING_SERIES_VALUE_LENGTH for v in value.values]): + def _get_log_operations_from_value( + self, value: Val, *, steps: Union[None, Collection[float]], timestamps: Union[None, Collection[float]] + ) -> List[LogOperation]: + if not self._value_truncation_occurred and value.truncated: # the first truncation self._value_truncation_occurred = True logger.warning( @@ -60,8 +63,7 @@ def _get_log_operations_from_value(self, value: Val, step: Optional[float], time MAX_STRING_SERIES_VALUE_LENGTH, ) - values = [LogStrings.ValueType(val, step=step, ts=timestamp) for val in values] - return [LogStrings(self._path, chunk) for chunk in get_batches(values, batch_size=10)] + return super()._get_log_operations_from_value(value, steps=steps, timestamps=timestamps) def _get_clear_operation(self) -> Operation: return ClearStringLog(self._path) @@ -69,6 +71,7 @@ def _get_clear_operation(self) -> Operation: def _data_to_value(self, values: Iterable, **kwargs) -> Val: if kwargs: logger.warning("Warning: unexpected arguments (%s) in StringSeries", kwargs) + return StringSeriesVal(values) def _is_value_type(self, value) -> bool: diff --git a/src/neptune/new/exceptions.py b/src/neptune/new/exceptions.py index d4f3b2983..f2a552aed 100644 --- a/src/neptune/new/exceptions.py +++ b/src/neptune/new/exceptions.py @@ -1354,3 +1354,8 @@ def __init__(self): {correct}Need help?{end}-> https://docs.neptune.ai/getting_help """ super().__init__(message.format(**STYLES)) + + +class NeptuneUserApiInputException(NeptuneException): + def __init__(self, message): + super().__init__(message) diff --git a/src/neptune/new/handler.py b/src/neptune/new/handler.py index 4629099a9..0f89ab93c 100644 --- a/src/neptune/new/handler.py +++ b/src/neptune/new/handler.py @@ -16,7 +16,11 @@ from functools import wraps from typing import ( TYPE_CHECKING, + Any, + Collection, + Dict, Iterable, + Iterator, List, Optional, Union, @@ -38,10 +42,12 @@ from neptune.new.exceptions import ( MissingFieldException, NeptuneCannotChangeStageManually, + NeptuneUserApiInputException, ) from neptune.new.internal.artifacts.types import ArtifactFileData from neptune.new.internal.utils import ( is_collection, + is_dict_like, is_float, is_float_like, is_string, @@ -53,7 +59,9 @@ join_paths, parse_path, ) +from neptune.new.internal.value_to_attribute_visitor import ValueToAttributeVisitor from neptune.new.types.atoms.file import File as FileVal +from neptune.new.types.type_casting import cast_value_for_extend from neptune.new.types.value_copy import ValueCopy if TYPE_CHECKING: @@ -75,6 +83,9 @@ def inner_fun(self: "Handler", *args, **kwargs): return inner_fun +ExtendDictT = Union[Collection[Any], Dict[str, "ExtendDictT"]] + + class Handler: # paths which can't be modified by client directly _PROTECTED_PATHS = { @@ -156,12 +167,12 @@ def assign(self, value, wait: bool = False) -> None: """ with self._container.lock(): attr = self._container.get_attribute(self._path) - if attr: + if not attr: + self._container.define(self._path, value) + else: if isinstance(value, Handler): value = ValueCopy(value) attr.process_assignment(value, wait) - else: - self._container.define(self._path, value, wait) @check_protected_paths def upload(self, value, wait: bool = False) -> None: @@ -287,6 +298,102 @@ def log( self._container.set_attribute(self._path, attr) attr.log(value, step=step, timestamp=timestamp, wait=wait, **kwargs) + def append( + self, + value: Union[dict, Any], + step: Optional[float] = None, + timestamp: Optional[float] = None, + wait: bool = False, + **kwargs, + ) -> None: + """Logs a series of values, such as a metric, by appending the provided value to the end of the series. + + Available for following series field types: + + * `FloatSeries` - series of float values + * `StringSeries` - series of strings + * `FileSeries` - series of files + + When you log the first value, the type of the value determines what type of field is created. + For more, see the field types documentation: https://docs.neptune.ai/api/field_types + + Args: + value: Value to be added to the series field. + step: Optional index of the entry being appended. Must be strictly increasing. + timestamp: Optional time index of the log entry being appended, in Unix time format. + If None, the current time (obtained with `time.time()`) is used. + wait: If True, the client sends all tracked metadata to the server before executing the call. + For details, see https://docs.neptune.ai/api/universal/#wait + + Examples: + >>> import neptune.new as neptune + >>> run = neptune.init_run() + >>> for epoch in range(n_epochs): + ... ... # Your training loop + ... run["train/epoch/loss"].append(loss) # FloatSeries + ... token = str(...) + ... run["train/tokens"].append(token) # StringSeries + ... run["train/distribution"].append(plt_histogram, step=epoch) # FileSeries + """ + verify_type("step", step, (int, float, type(None))) + verify_type("timestamp", timestamp, (int, float, type(None))) + if step is not None: + step = [step] + if timestamp is not None: + timestamp = [timestamp] + + value = ExtendUtils.validate_and_transform_to_extend_format(value) + self.extend(value, step, timestamp, wait, **kwargs) + + def extend( + self, + values: ExtendDictT, + steps: Optional[Collection[float]] = None, + timestamps: Optional[Collection[float]] = None, + wait: bool = False, + **kwargs, + ) -> None: + """Logs a series of values by appending the provided collection of values to the end of the series. + + Available for following series field types: + + * `FloatSeries` - series of float values + * `StringSeries` - series of strings + * `FileSeries` - series of files + + When you log the first value, the type of the value determines what type of field is created. + For more, see the field types documentation: https://docs.neptune.ai/api/field_types + + Args: + values: Values to be added to the series field, as a dictionary or collection. + steps: Optional collection of indeces for the entries being appended. Must be strictly increasing. + timestamps: Optional collection of time indeces for the entries being appended, in Unix time format. + If None, the current time (obtained with `time.time()`) is used. + wait: If True, the client sends all tracked metadata to the server before executing the call. + For details, see https://docs.neptune.ai/api/universal/#wait + + Example: + The following example reads a CSV file into a pandas DataFrame and extracts the values + to create a Neptune series. + >>> import neptune.new as neptune + >>> run = neptune.init_run() + >>> for epoch in range(n_epochs): + ... df = pandas.read_csv("time_series.csv") + ... ys = df["value"] + ... ts = df["timestamp"] + ... run["data/example_series"].extend(ys, timestamps=ts) + """ + ExtendUtils.validate_values_for_extend(values, steps, timestamps) + + with self._container.lock(): + attr = self._container.get_attribute(self._path) + if not attr: + neptune_value = cast_value_for_extend(values) + attr = ValueToAttributeVisitor(self._container, parse_path(self._path)).visit(neptune_value) + self._container.set_attribute(self._path, attr) + + attr.extend(values, steps=steps, timestamps=timestamps, wait=wait, **kwargs) + @check_protected_paths def add(self, values: Union[str, Iterable[str]], wait: bool = False) -> None: """Adds the provided tag or tags to the run's tags. @@ -518,10 +625,53 @@ def track_files(self, path: str, destination: str = None, wait: bool = False) -> attr = self._container.get_attribute(self._path) if not attr: attr = Artifact(self._container, parse_path(self._path)) - - self._container.set_attribute(self._path, attr) + self._container.set_attribute(self._path, attr) attr.track_files(path=path, destination=destination, wait=wait) def __delitem__(self, path) -> None: self.pop(path) + + +class ExtendUtils: + @staticmethod + def validate_and_transform_to_extend_format(value): + """Preserve nested structure created by `Namespaces` and `dict_like` objects, + but replace all other values with single-element lists, + so work can be delegated to `extend` method.""" + if isinstance(value, Namespace) or is_dict_like(value): + return {k: ExtendUtils.validate_and_transform_to_extend_format(v) for k, v in value.items()} + elif is_collection(value): + raise NeptuneUserApiInputException( + "Value cannot be a collection, if you want to `append` multiple values at once use `extend` method." + ) + else: + return [value] + + @staticmethod + def validate_values_for_extend(values, steps, timestamps): + """Validates if input data is a collection or Namespace with collections leafs. + If steps or timestamps are passed, check if its length is equal to all given values.""" + collections_lengths = set(ExtendUtils.generate_leaf_collection_lengths(values)) + + if len(collections_lengths) > 1: + if steps is not None: + raise NeptuneUserApiInputException("Number of steps must be equal to number of values") + if timestamps is not None: + raise NeptuneUserApiInputException("Number of timestamps must be equal to number of values") + else: + common_collections_length = next(iter(collections_lengths)) + if steps is not None and common_collections_length != len(steps): + raise NeptuneUserApiInputException("Number of steps must be equal to number of values") + if timestamps is not None and common_collections_length != len(timestamps): + raise NeptuneUserApiInputException("Number of timestamps must be equal to number of values") + + @staticmethod + def generate_leaf_collection_lengths(values) -> Iterator[int]: + if isinstance(values, Namespace) or is_dict_like(values): + for val in values.values(): + yield from ExtendUtils.generate_leaf_collection_lengths(val) + elif is_collection(values): + yield len(values) + else: + raise NeptuneUserApiInputException("Values must be a collection or Namespace leafs must be collections") diff --git a/src/neptune/new/internal/operation.py b/src/neptune/new/internal/operation.py index 963e817db..8a507a4db 100644 --- a/src/neptune/new/internal/operation.py +++ b/src/neptune/new/internal/operation.py @@ -49,7 +49,7 @@ def all_subclasses(cls): @dataclass -class Operation: +class Operation(abc.ABC): path: List[str] @@ -276,6 +276,10 @@ def from_dict(data: dict) -> "UploadFileSet": return UploadFileSet(data["path"], data["file_globs"], data["reset"] != str(False)) +class LogOperation(Operation, abc.ABC): + pass + + @dataclass class LogSeriesValue(Generic[T]): @@ -292,7 +296,7 @@ def from_dict(data: dict, value_deserializer=lambda x: x) -> "LogSeriesValue[T]" @dataclass -class LogFloats(Operation): +class LogFloats(LogOperation): ValueType = LogSeriesValue[float] @@ -315,7 +319,7 @@ def from_dict(data: dict) -> "LogFloats": @dataclass -class LogStrings(Operation): +class LogStrings(LogOperation): ValueType = LogSeriesValue[str] @@ -360,7 +364,7 @@ def deserializer(obj) -> "ImageValue": @dataclass -class LogImages(Operation): +class LogImages(LogOperation): ValueType = LogSeriesValue[ImageValue] diff --git a/src/neptune/new/metadata_containers/metadata_container.py b/src/neptune/new/metadata_containers/metadata_container.py index 8c5f5859f..0dc966ef7 100644 --- a/src/neptune/new/metadata_containers/metadata_container.py +++ b/src/neptune/new/metadata_containers/metadata_container.py @@ -237,16 +237,15 @@ def define( value: Any, wait: bool = False, ) -> Attribute: - value = cast_value(value) - parsed_path = parse_path(path) - with self._lock: - old_attr = self._structure.get(parsed_path) - if old_attr: + old_attr = self.get_attribute(path) + if old_attr is not None: raise MetadataInconsistency("Attribute or namespace {} is already defined".format(path)) - attr = ValueToAttributeVisitor(self, parsed_path).visit(value) - self._structure.set(parsed_path, attr) - attr.process_assignment(value, wait) + + neptune_value = cast_value(value) + attr = ValueToAttributeVisitor(self, parse_path(path)).visit(neptune_value) + self.set_attribute(path, attr) + attr.process_assignment(neptune_value, wait) return attr def get_attribute(self, path: str) -> Optional[Attribute]: diff --git a/src/neptune/new/types/series/series.py b/src/neptune/new/types/series/series.py index 73ab12fd6..9cd1f03b1 100644 --- a/src/neptune/new/types/series/series.py +++ b/src/neptune/new/types/series/series.py @@ -36,3 +36,6 @@ def accept(self, visitor: "ValueVisitor[Ret]") -> Ret: @abc.abstractmethod def values(self): pass + + def __len__(self): + return len(self.values) diff --git a/src/neptune/new/types/series/string_series.py b/src/neptune/new/types/series/string_series.py index 75f1e2875..d7a9b457d 100644 --- a/src/neptune/new/types/series/string_series.py +++ b/src/neptune/new/types/series/string_series.py @@ -27,12 +27,16 @@ Ret = TypeVar("Ret") +MAX_STRING_SERIES_VALUE_LENGTH = 1000 + class StringSeries(Series): def __init__(self, values): if not is_collection(values): raise TypeError("`values` is not a collection") - self._values = [str(value) for value in values] + values_str = [str(val) for val in values] + self._truncated = any([len(value) > MAX_STRING_SERIES_VALUE_LENGTH for value in values_str]) + self._values = [value[:MAX_STRING_SERIES_VALUE_LENGTH] for value in values_str] def accept(self, visitor: "ValueVisitor[Ret]") -> Ret: return visitor.visit_string_series(self) @@ -41,5 +45,10 @@ def accept(self, visitor: "ValueVisitor[Ret]") -> Ret: def values(self): return self._values + @property + def truncated(self): + """True if any value had to be truncated to `MAX_STRING_SERIES_VALUE_LENGTH`""" + return self._truncated + def __str__(self): return "StringSeries({})".format(str(self.values)) diff --git a/src/neptune/new/types/type_casting.py b/src/neptune/new/types/type_casting.py index b5b5d643a..a39a1193d 100644 --- a/src/neptune/new/types/type_casting.py +++ b/src/neptune/new/types/type_casting.py @@ -15,10 +15,13 @@ # import argparse from datetime import datetime -from typing import Any +from typing import ( + Any, + Collection, + Union, +) from neptune.common.deprecation import warn_once -from neptune.new.handler import Handler from neptune.new.internal.utils import ( is_bool, is_dict_like, @@ -37,11 +40,19 @@ from neptune.new.types.atoms.float import Float from neptune.new.types.atoms.string import String from neptune.new.types.namespace import Namespace +from neptune.new.types.series import ( + FileSeries, + FloatSeries, + StringSeries, +) +from neptune.new.types.series.series import Series from neptune.new.types.value import Value from neptune.new.types.value_copy import ValueCopy def cast_value(value: Any) -> Value: + from neptune.new.handler import Handler + if isinstance(value, Value): return value elif isinstance(value, Handler): @@ -71,8 +82,39 @@ def cast_value(value: Any) -> Value: message="The object you're logging will be implicitly cast to a string." " We'll end support of this behavior in `neptune-client==1.0.0`." " To log the object as a string, use `str(object)` instead.", - stack_level=3, + stack_level=6, ) return String(str(value)) else: raise TypeError("Value of unsupported type {}".format(type(value))) + + +def cast_value_for_extend(values: Union[Namespace, Series, Collection[Any]]) -> Union[Series, Namespace]: + if isinstance(values, Namespace): + return values + elif is_dict_like(values): + return Namespace(values) + elif isinstance(values, Series): + return values + + sample_val = next(iter(values)) + if isinstance(sample_val, File): + return FileSeries(values=values) + elif File.is_convertable_to_image(sample_val): + return FileSeries(values=values) + elif File.is_convertable_to_html(sample_val): + return FileSeries(values=values) + elif is_string(sample_val): + return StringSeries(values=values) + elif is_float_like(sample_val): + return FloatSeries(values=values) + elif is_string_like(sample_val): + warn_once( + message="The object you're logging will be implicitly cast to a string." + " We'll end support of this behavior in `neptune-client==1.0.0`." + " To log the object as a string, use `str(object)` instead.", + stack_level=4, + ) + return StringSeries(values=values) + else: + raise TypeError("Value of unsupported type List[{}]".format(type(sample_val))) diff --git a/tests/e2e/standard/test_series.py b/tests/e2e/standard/test_series.py index ecad56f5b..59cf4b173 100644 --- a/tests/e2e/standard/test_series.py +++ b/tests/e2e/standard/test_series.py @@ -80,3 +80,98 @@ def test_log_images(self, container: MetadataContainer): for i in range(4): with Image.open(f"all/{i}.png") as img: assert img == image_to_png(image=images[i]) + + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_append_numbers(self, container: MetadataContainer): + key = self.gen_key() + values = [random.random() for _ in range(50)] + for value in values: + container[key].append(value) + container.sync() + + assert container[key].fetch_last() == values[-1] + + fetched_values = container[key].fetch_values() + assert list(fetched_values["value"]) == values + + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_append_strings(self, container: MetadataContainer): + key = self.gen_key() + values = [fake.word() for _ in range(50)] + for value in values: + container[key].append(value) + container.sync() + + assert container[key].fetch_last() == values[-1] + + fetched_values = container[key].fetch_values() + assert list(fetched_values["value"]) == values + + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_append_images(self, container: MetadataContainer): + key = self.gen_key() + # images with size between 200KB - 12MB + images = list(generate_image(size=2**n) for n in range(8, 12)) + for value in images: + container[key].append(value) + container.sync() + + with tmp_context(): + container[key].download_last("last") + container[key].download("all") + + with Image.open("last/3.png") as img: + assert img == image_to_png(image=images[-1]) + + for i in range(4): + with Image.open(f"all/{i}.png") as img: + assert img == image_to_png(image=images[i]) + + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_extend_numbers(self, container: MetadataContainer): + key = self.gen_key() + values = [random.random() for _ in range(50)] + + container[key].extend([values[0]]) + container[key].extend(values[1:]) + container.sync() + + assert container[key].fetch_last() == values[-1] + + fetched_values = container[key].fetch_values() + assert list(fetched_values["value"]) == values + + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_extend_strings(self, container: MetadataContainer): + key = self.gen_key() + values = [fake.word() for _ in range(50)] + + container[key].extend([values[0]]) + container[key].extend(values[1:]) + container.sync() + + assert container[key].fetch_last() == values[-1] + + fetched_values = container[key].fetch_values() + assert list(fetched_values["value"]) == values + + @pytest.mark.parametrize("container", AVAILABLE_CONTAINERS, indirect=True) + def test_extend_images(self, container: MetadataContainer): + key = self.gen_key() + # images with size between 200KB - 12MB + images = list(generate_image(size=2**n) for n in range(8, 12)) + + container[key].extend([images[0]]) + container[key].extend(images[1:]) + container.sync() + + with tmp_context(): + container[key].download_last("last") + container[key].download("all") + + with Image.open("last/3.png") as img: + assert img == image_to_png(image=images[-1]) + + for i in range(4): + with Image.open(f"all/{i}.png") as img: + assert img == image_to_png(image=images[i]) diff --git a/tests/unit/neptune/new/internal/test_operations.py b/tests/unit/neptune/new/internal/test_operations.py index 66077e9a5..b59bd2e5d 100644 --- a/tests/unit/neptune/new/internal/test_operations.py +++ b/tests/unit/neptune/new/internal/test_operations.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import inspect import json import unittest import uuid @@ -53,12 +54,20 @@ class TestOperations(unittest.TestCase): def test_serialization_to_dict(self): - classes = {cls.__name__ for cls in all_subclasses(Operation)} + classes = {cls for cls in all_subclasses(Operation)} + # drop abstract classes + for cls in classes.copy(): + if inspect.isabstract(cls): + classes.remove(cls) + + # test every Operation subclass and drop from `classes` for obj in self._list_objects(): - if obj.__class__.__name__ in classes: - classes.remove(obj.__class__.__name__) + if obj.__class__ in classes: + classes.remove(obj.__class__) deserialized_obj = Operation.from_dict(json.loads(json.dumps(obj.to_dict()))) self.assertEqual(obj.__dict__, deserialized_obj.__dict__) + + # expect no Operation subclass left self.assertEqual(classes, set()) @staticmethod diff --git a/tests/unit/neptune/new/test_handler.py b/tests/unit/neptune/new/test_handler.py index a817a232c..cd6c914fe 100644 --- a/tests/unit/neptune/new/test_handler.py +++ b/tests/unit/neptune/new/test_handler.py @@ -15,6 +15,7 @@ # import argparse import os +import time import unittest from dataclasses import dataclass from datetime import ( @@ -46,7 +47,10 @@ API_TOKEN_ENV_NAME, PROJECT_ENV_NAME, ) -from neptune.new.exceptions import FileNotFound +from neptune.new.exceptions import ( + FileNotFound, + NeptuneUserApiInputException, +) from neptune.new.types import File as FileVal from neptune.new.types.atoms.artifact import Artifact from neptune.new.types.atoms.datetime import Datetime as DatetimeVal @@ -212,6 +216,54 @@ def test_log(self): self.assertEqual(exp["some"]["str"]["val"].fetch_last(), "some text") self.assertIsInstance(exp.get_structure()["some"]["img"]["val"], FileSeries) + def test_log_dict(self): + exp = init(mode="debug", flush_period=0.5) + dict_value = {"key-a": "value-a", "key-b": "value-b"} + exp["some/num/val"].log(dict_value) + self.assertEqual(exp["some"]["num"]["val"].fetch_last(), str(dict_value)) + + def test_log_complex_input(self): + exp = init(mode="debug", flush_period=0.5) + exp["train/listOfDicts"].log([{"1a": 1, "1b": 1}, {"1a": 2, "1b": 2}]) + exp["train/listOfListsOfDicts"].log( + [[{"2a": 11, "2b": 11}, {"2a": 12, "2b": 12}], [{"2a": 21, "2b": 21}, {"2a": 22, "2b": 22}]] + ) + + self.assertEqual(exp["train"]["listOfDicts"].fetch_last(), "{'1a': 2, '1b': 2}") + self.assertEqual( + exp["train"]["listOfListsOfDicts"].fetch_last(), "[{'2a': 21, '2b': 21}, {'2a': 22, '2b': 22}]" + ) + + def test_append(self): + exp = init(mode="debug", flush_period=0.5) + exp["some/num/val"].append(5) + exp["some/str/val"].append("some text") + exp["some/img/val"].append(FileVal.as_image(PIL.Image.new("RGB", (60, 30), color="red"))) + exp["some/img/val"].append(PIL.Image.new("RGB", (60, 30), color="red")) + self.assertEqual(exp["some"]["num"]["val"].fetch_last(), 5) + self.assertEqual(exp["some"]["str"]["val"].fetch_last(), "some text") + self.assertIsInstance(exp.get_structure()["some"]["img"]["val"], FileSeries) + + def test_append_dict(self): + exp = init(mode="debug", flush_period=0.5) + dict_value = {"key-a": "value-a", "key-b": "value-b"} + exp["some/num/val"].append(dict_value) + self.assertEqual(exp["some"]["num"]["val"]["key-a"].fetch_last(), "value-a") + self.assertEqual(exp["some"]["num"]["val"]["key-b"].fetch_last(), "value-b") + + def test_append_complex_input(self): + exp = init(mode="debug", flush_period=0.5) + exp["train/dictOfDicts"].append( + { + "key-a": {"aa": 11, "ab": 22}, + "key-b": {"ba": 33, "bb": 44}, + } + ) + self.assertEqual(exp["train"]["dictOfDicts"]["key-a"]["aa"].fetch_last(), 11) + self.assertEqual(exp["train"]["dictOfDicts"]["key-a"]["ab"].fetch_last(), 22) + self.assertEqual(exp["train"]["dictOfDicts"]["key-b"]["ba"].fetch_last(), 33) + self.assertEqual(exp["train"]["dictOfDicts"]["key-b"]["bb"].fetch_last(), 44) + def test_log_many_values(self): exp = init(mode="debug", flush_period=0.5) exp["some/num/val"].log([5, 10, 15]) @@ -226,6 +278,111 @@ def test_log_many_values(self): self.assertEqual(exp["some"]["str"]["val"].fetch_last(), "other") self.assertIsInstance(exp.get_structure()["some"]["img"]["val"], FileSeries) + def test_append_many_values_cause_error(self): + exp = init(mode="debug", flush_period=0.5) + with self.assertRaises(NeptuneUserApiInputException): + exp["some/num/val"].append([]) + with self.assertRaises(NeptuneUserApiInputException): + exp["some/num/val"].append([5, 10, 15]) + with self.assertRaises(NeptuneUserApiInputException): + exp["some/str/val"].append(["some text", "other"]) + with self.assertRaises(NeptuneUserApiInputException): + exp["some/num/val"].append({"key-a": [1, 2]}) + with self.assertRaises(NeptuneUserApiInputException): + exp["some/img/val"].append( + [ + FileVal.as_image(PIL.Image.new("RGB", (60, 30), color="red")), + FileVal.as_image(PIL.Image.new("RGB", (20, 90), color="red")), + ] + ) + + def test_extend(self): + exp = init(mode="debug", flush_period=0.5) + exp["some/num/val"].extend([5, 7]) + exp["some/str/val"].extend(["some", "text"]) + exp["some/img/val"].extend( + [ + FileVal.as_image(PIL.Image.new("RGB", (60, 30), color="red")), + FileVal.as_image(PIL.Image.new("RGB", (20, 90), color="blue")), + ] + ) + self.assertEqual(exp["some"]["num"]["val"].fetch_last(), 7) + self.assertEqual(exp["some"]["str"]["val"].fetch_last(), "text") + self.assertIsInstance(exp.get_structure()["some"]["img"]["val"], FileSeries) + + def test_extend_dict(self): + exp = init(mode="debug", flush_period=0.5) + dict_value = {"key-a": ["value-a", "value-aa"], "key-b": ["value-b", "value-bb"], "key-c": ["ccc"]} + exp["some/num/val"].extend(dict_value) + self.assertEqual(exp["some"]["num"]["val"]["key-a"].fetch_last(), "value-aa") + self.assertEqual(exp["some"]["num"]["val"]["key-b"].fetch_last(), "value-bb") + self.assertEqual(exp["some"]["num"]["val"]["key-c"].fetch_last(), "ccc") + + def test_extend_dict_in_list(self): + """We expect that everything which is inside Collection is mapped to atoms""" + exp = init(mode="debug", flush_period=0.5) + exp["train/listOfDicts"].extend([{"1a": 1, "1b": 1}, {"1a": 2, "1b": 2}]) # inside list -> mapped to str + exp["train/listOfDictOfDicts"].extend( + [ + {"key-a": {"a1": 11, "a2": 22}}, # inside list -> mapped to str + {"key-b": {"b1": 33, "b2": 44}}, + {"key-a": {"xx": 11, "yy": 22}}, + ] + ) + exp["train/listOfListsOfDicts"].extend( + [ + [{"2a": 11, "2b": 11}, {"2a": 12, "2b": 12}], # inside list -> mapped to str + [{"2a": 21, "2b": 21}, {"2a": 22, "2b": 22}], + ] + ) + self.assertEqual(exp["train"]["listOfDicts"].fetch_last(), "{'1a': 2, '1b': 2}") + self.assertEqual(exp["train"]["listOfDictOfDicts"].fetch_last(), "{'key-a': {'xx': 11, 'yy': 22}}") + self.assertEqual( + exp["train"]["listOfListsOfDicts"].fetch_last(), "[{'2a': 21, '2b': 21}, {'2a': 22, '2b': 22}]" + ) + + def test_extend_nested(self): + """We expect that we are able to log arbitrary tre structure""" + exp = init(mode="debug", flush_period=0.5) + + exp["train/simple_dict"].extend({"list1": [1, 2, 3], "list2": [10, 20, 30]}) + exp["train/simple_dict"].extend( + { + "list1": [4, 5, 6], + } + ) + self.assertEqual(exp["train"]["simple_dict"]["list1"].fetch_last(), 6) + self.assertEqual(list(exp["train"]["simple_dict"]["list1"].fetch_values().value), [1, 2, 3, 4, 5, 6]) + self.assertEqual(exp["train"]["simple_dict"]["list2"].fetch_last(), 30) + self.assertEqual(list(exp["train"]["simple_dict"]["list2"].fetch_values().value), [10, 20, 30]) + + exp["train/different-depths"].extend( + {"lvl1": {"lvl1.1": [1, 2, 3], "lvl1.2": {"lvl1.2.1": [1]}}, "lvl2": [10, 20]} + ) + exp["train/different-depths/lvl1"].extend({"lvl1.2": {"lvl1.2.1": [2, 3]}}) + self.assertEqual(exp["train"]["different-depths"]["lvl1"]["lvl1.1"].fetch_last(), 3) + self.assertEqual(list(exp["train"]["different-depths"]["lvl1"]["lvl1.1"].fetch_values().value), [1, 2, 3]) + self.assertEqual(exp["train"]["different-depths"]["lvl1"]["lvl1.2"]["lvl1.2.1"].fetch_last(), 3) + self.assertEqual( + list(exp["train"]["different-depths"]["lvl1"]["lvl1.2"]["lvl1.2.1"].fetch_values().value), [1, 2, 3] + ) + self.assertEqual(exp["train"]["different-depths"]["lvl2"].fetch_last(), 20) + self.assertEqual(list(exp["train"]["different-depths"]["lvl2"].fetch_values().value), [10, 20]) + + def test_extend_nested_with_wrong_parameters(self): + """We expect that we are able to log arbitrary tre structure""" + exp = init(mode="debug", flush_period=0.5) + + with self.assertRaises(NeptuneUserApiInputException): + # wrong number of steps + exp["train/simple_dict"].extend(values={"list1": [1, 2, 3], "list2": [10, 20, 30]}, steps=[0, 1]) + + with self.assertRaises(NeptuneUserApiInputException): + # wrong number of timestamps + exp["train/simple_dict"].extend( + values={"list1": [1, 2, 3], "list2": [10, 20, 30]}, timestamps=[time.time()] * 2 + ) + def test_log_value_errors(self): exp = init(mode="debug", flush_period=0.5) img = FileVal.as_image(PIL.Image.new("RGB", (60, 30), color="red")) @@ -265,6 +422,34 @@ def setUpClass(cls) -> None: os.environ[PROJECT_ENV_NAME] = "organization/project" os.environ[API_TOKEN_ENV_NAME] = ANONYMOUS + def test_append_errors(self): + exp = init(mode="debug", flush_period=0.5) + img = FileVal.as_image(PIL.Image.new("RGB", (60, 30), color="red")) + + exp["some/num/val"].append(5, step=1) + with self.assertRaises(ValueError): + exp["some/num/val"].append("str") + with self.assertRaises(TypeError): + exp["some/num/val"].append(img) + + exp["some/str/val"].append("str", step=1) + exp["some/img/val"].append(img, step=1) + with self.assertRaises(TypeError): + exp["some/img/val"].append(5) + with self.assertRaises(FileNotFound): + exp["some/img/val"].append("path") + + self.assertEqual(exp["some"]["num"]["val"].fetch_last(), 5) + self.assertEqual(exp["some"]["str"]["val"].fetch_last(), "str") + self.assertIsInstance(exp.get_structure()["some"]["img"]["val"], FileSeries) + + def test_extend_value_errors(self): + exp = init(mode="debug", flush_period=0.5) + with self.assertRaises(NeptuneUserApiInputException): + exp["x"].extend(10, step=10) + with self.assertRaises(ValueError): + exp["x"].extend([5, "str"]) + def test_assign_set(self): exp = init(mode="debug", flush_period=0.5) exp["some/str/val"].assign(StringSetVal(["tag1", "tag2"]), wait=True) @@ -485,6 +670,28 @@ def test_float_like_types(self): with self.assertRaises(ValueError): exp["attr3"].log([4, "234a"]) + def test_append_float_like_types(self): + exp = init(mode="debug", flush_period=0.5) + exp["attr"].append(self.FloatLike(34)) + self.assertEqual(exp["attr"].fetch_last(), 34) + exp["attr"].append("345") + exp["attr"].append(self.FloatLike(34)) + exp["attr"].append(4) + exp["attr"].append(13.0) + self.assertEqual(exp["attr"].fetch_last(), 13) + with self.assertRaises(ValueError): + exp["attr"].append(4) + exp["attr"].append("234a") + + def test_extend_float_like_types(self): + exp = init(mode="debug", flush_period=0.5) + exp["attr"].extend([self.FloatLike(34)]) + self.assertEqual(exp["attr"].fetch_last(), 34) + exp["attr"].extend(["345", self.FloatLike(34), 4, 13.0]) + self.assertEqual(exp["attr"].fetch_last(), 13) + with self.assertRaises(ValueError): + exp["attr"].extend([4, "234a"]) + def test_string_like_types(self): exp = init(mode="debug", flush_period=0.5) @@ -496,6 +703,47 @@ def test_string_like_types(self): exp["attr2"].log(["345", self.FloatLike(34), 4, 13.0]) self.assertEqual(exp["attr2"].fetch_last(), "13.0") + def test_append_string_like_types(self): + exp = init(mode="debug", flush_period=0.5) + + exp["attr1"] = "234" + exp["attr1"] = self.FloatLike(12356) + self.assertEqual(exp["attr1"].fetch(), "TestOtherBehaviour.FloatLike(value=12356)") + exp["attr2"].append("xxx") + exp["attr2"].append("345") + exp["attr2"].append(self.FloatLike(34)) + exp["attr2"].append(4) + exp["attr2"].append(13.0) + self.assertEqual(exp["attr2"].fetch_last(), "13.0") + + def test_extend_string_like_types(self): + exp = init(mode="debug", flush_period=0.5) + exp["attr"].extend(["kebab"]) + exp["attr"].extend(["345", self.FloatLike(34), 4, 13.0]) + self.assertEqual(exp["attr"].fetch_last(), "13.0") + + def test_assign_dict(self): + exp = init(mode="debug", flush_period=0.5) + exp["params"] = { + "x": 5, + "metadata": {"name": "Trol", "age": 376}, + "toys": StringSeriesVal(["cudgel", "hat"]), + "nested": {"nested": {"deep_secret": FloatSeriesVal([13, 15])}}, + } + self.assertEqual(exp["params/x"].fetch(), 5) + self.assertEqual(exp["params/metadata/name"].fetch(), "Trol") + self.assertEqual(exp["params/metadata/age"].fetch(), 376) + self.assertEqual(exp["params/toys"].fetch_last(), "hat") + self.assertEqual(exp["params/nested/nested/deep_secret"].fetch_last(), 15) + + def test_convertable_to_dict(self): + exp = init(mode="debug", flush_period=0.5) + exp["params"] = argparse.Namespace(foo="bar", baz=42, nested=argparse.Namespace(nested_attr=[1, 2, 3], num=55)) + self.assertEqual(exp["params/foo"].fetch(), "bar") + self.assertEqual(exp["params/baz"].fetch(), 42) + self.assertEqual(exp["params/nested/nested_attr"].fetch(), "[1, 2, 3]") + self.assertEqual(exp["params/nested/num"].fetch(), 55) + def test_object(self): class Dog: def __init__(self, name, fierceness): @@ -538,7 +786,6 @@ def test_representation(self): @dataclass class FloatLike: - value: float def __float__(self): diff --git a/tests/unit/neptune/new/test_imports.py b/tests/unit/neptune/new/test_imports.py index cbd8836cc..3a1aacf35 100644 --- a/tests/unit/neptune/new/test_imports.py +++ b/tests/unit/neptune/new/test_imports.py @@ -161,7 +161,6 @@ Attribute, Generic, Iterable, - Operation, Series, SeriesVal, TypeVar,