-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add parsedatetime op #5417
base: main
Are you sure you want to change the base?
Add parsedatetime op #5417
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
# Copyright (c) ONNX Project Contributors | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
from datetime import datetime | ||
|
||
import numpy as np | ||
|
||
import onnx | ||
from onnx import numpy_helper | ||
from onnx.backend.test.case.base import Base | ||
from onnx.backend.test.case.node import expect | ||
|
||
|
||
class ParseDateTime(Base): | ||
@staticmethod | ||
def export_float_nan_default() -> None: | ||
fmt = "%d/%m/%y %H:%M" | ||
default = float("NaN") | ||
node = onnx.helper.make_node( | ||
"ParseDateTime", | ||
inputs=["x"], | ||
outputs=["y"], | ||
format=fmt, | ||
unit="s", | ||
default=onnx.helper.make_tensor( | ||
name="default", | ||
data_type=onnx.TensorProto.DOUBLE, | ||
dims=[], | ||
vals=np.array(default), | ||
), | ||
) | ||
x = np.array(["21/11/06 16:30", "foobar"], dtype=object) | ||
y = [] | ||
for s in x: | ||
try: | ||
# datetime.timestamp() returns a float | ||
y.append(datetime.strptime(s, fmt).timestamp()) | ||
except ValueError: | ||
y.append(default) | ||
expect(node, inputs=[x], outputs=[np.array(y)], name="test_parsedatetime") | ||
|
||
@staticmethod | ||
def export_int_default() -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we add a test with a 2D matrix? A tensor will null dimensions? |
||
fmt = "%d/%m/%y %H:%M" | ||
default = np.iinfo(np.int64).min | ||
node = onnx.helper.make_node( | ||
"ParseDateTime", | ||
inputs=["x"], | ||
outputs=["y"], | ||
format=fmt, | ||
unit="s", | ||
default=onnx.helper.make_tensor( | ||
name="default", | ||
data_type=onnx.TensorProto.INT64, | ||
dims=[], | ||
vals=np.array(default), | ||
), | ||
) | ||
x = np.array(["21/11/06 16:30", "foobar"], dtype=object) | ||
y = [] | ||
for s in x: | ||
try: | ||
y.append(datetime.strptime(s, fmt).timestamp()) | ||
except ValueError: | ||
y.append(default) | ||
expect( | ||
node, inputs=[x], outputs=[np.array(y, np.int64)], name="test_parsedatetime" | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
221/11/06 16:302foobarBx |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,44 @@ | |
#include "onnx/defs/schema.h" | ||
|
||
namespace ONNX_NAMESPACE { | ||
static const char* ParseDateTime_doc = R"DOC(Parse a datetime string into a (floating point) Unix time stamp.)DOC"; | ||
ONNX_OPERATOR_SET_SCHEMA( | ||
ParseDateTime, | ||
20, | ||
OpSchema() | ||
.Input(0, "X", "Tensor with datetime strings", "T1", OpSchema::Single, true, 1, OpSchema::NonDifferentiable) | ||
.Output(0, "y", "Unix time stamps", "T2", OpSchema::Single, true, 1, OpSchema::NonDifferentiable) | ||
.Attr("format", "Format description in the syntax of C's `strptime`.", AttributeProto::STRING) | ||
.Attr( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would make this unit optional as well and choose one for the default. |
||
"unit", | ||
"Unit of the returned time stamp. Allowed values are: 's' (second), 'ms' (millisecond), 'us' (microsecond) or 'ns' (nanosecond).", | ||
Check warning on line 18 in onnx/defs/text/defs.cc GitHub Actions / Optional Lint
|
||
AttributeProto::STRING) | ||
.Attr( | ||
"default", | ||
"Default value to be used if the parsing fails. The tensor must be of rank 0 and either of type `tensor(int64)` or `tensor(double)`. The tensor type is the output type. If 'default' is specified, the output type is `tensor(int64)` and the behavior for failing to parse an input element is implementation defined.", | ||
Check warning on line 22 in onnx/defs/text/defs.cc GitHub Actions / Optional Lint
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If 'default' is not specified... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any combination (unit, default) not possible? The documentation says what the default value type is but not the default value. |
||
AttributeProto::TENSOR, | ||
OPTIONAL_VALUE) | ||
|
||
.TypeConstraint("T1", {"tensor(string)"}, "UTF-8 datetime strings") | ||
.TypeConstraint("T2", {"tensor(double)", "tensor(int64)"}, "Output type depends on 'default' attribute.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to support double here? In principle, users can encode it as |
||
.SetDoc(ParseDateTime_doc) | ||
.TypeAndShapeInferenceFunction([](InferenceContext& ctx) { | ||
Check warning on line 29 in onnx/defs/text/defs.cc GitHub Actions / Optional Lint
|
||
auto* default_value = ctx.getAttribute("default"); | ||
|
||
if (hasInputShape(ctx, 0)) { | ||
propagateShapeFromInputToOutput(ctx, 0, 0); | ||
} | ||
|
||
if (nullptr == default_value) { | ||
updateOutputElemType(ctx, 0, TensorProto::INT64); | ||
return; | ||
} else { | ||
const TensorProto& tensor_proto = default_value->t(); | ||
updateOutputElemType(ctx, 0, tensor_proto.data_type()); | ||
return; | ||
} | ||
})); | ||
|
||
static const char* StringConcat_doc = | ||
R"DOC(StringConcat concatenates string tensors elementwise (with NumPy-style broadcasting support))DOC"; | ||
ONNX_OPERATOR_SET_SCHEMA( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Copyright (c) ONNX Project Contributors | ||
|
||
# SPDX-License-Identifier: Apache-2.0 | ||
# pylint: disable=W0221 | ||
from datetime import datetime | ||
|
||
import numpy as np | ||
|
||
from onnx.reference.op_run import OpRun | ||
|
||
|
||
class ParseDateTime(OpRun): | ||
def _run(self, x, format, unit, default=None): # type: ignore | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. format, unit, default are operatros attribute, they should be added as format=None, unit=None, default=None to distinguish them from the inputs. Class OpRun replaces them by the value stored in the node definition. |
||
def parse(el): | ||
try: | ||
return datetime.strptime(el, format).timestamp() | ||
except ValueError: | ||
return np.nan | ||
out = np.array([parse(el) for el in x]) | ||
out[np.isnan(out)] = default | ||
out = out.astype(default.dtype) if default is not None else out.astype(np.int64) | ||
return (out,) |
Check notice
Code scanning / CodeQL
Unused import Note