Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

histogram: make summary_v2.histogram_pb TPU compatible #5409

Merged
merged 4 commits into from Nov 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
35 changes: 35 additions & 0 deletions tensorboard/plugins/histogram/summary_test.py
Expand Up @@ -175,6 +175,41 @@ class SummaryV2PbTest(SummaryBaseTest, tf.test.TestCase):
def histogram(self, *args, **kwargs):
return summary.histogram_pb(*args, **kwargs)

def test_singleton_input(self):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add the other 3 test cases for v3 as well?

test_empty_input, test_empty_input_of_high_rank, test_zero_bucket_count

See also other comment about the empty input case.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

pb = self.histogram("twelve", [12])
buckets = tensor_util.make_ndarray(pb.value[0].tensor)
# By default there will be 30 buckets.
expected_buckets = np.array(
[[12, 12, 0] for _ in range(29)] + [[12, 12, 1]]
)
np.testing.assert_allclose(buckets, expected_buckets)

def test_input_with_all_same_values(self):
pb = self.histogram("twelven", [12, 12, 12])
buckets = tensor_util.make_ndarray(pb.value[0].tensor)
# By default there will be 30 buckets.
expected_buckets = np.array(
[[12, 12, 0] for _ in range(29)] + [[12, 12, 3]]
)
np.testing.assert_allclose(buckets, expected_buckets)

def test_empty_input(self):
pb = self.histogram("empty", [])
buckets = tensor_util.make_ndarray(pb.value[0].tensor)
# By default there will be 30 buckets.
np.testing.assert_allclose(buckets, np.zeros((30, 3)))

def test_empty_input_of_high_rank(self):
pb = self.histogram("empty_but_fancy", [[[], []], [[], []]])
buckets = tensor_util.make_ndarray(pb.value[0].tensor)
# By default there will be 30 buckets.
np.testing.assert_allclose(buckets, np.zeros((30, 3)))

def test_zero_bucket_count(self):
pb = self.histogram("zero_bucket_count", [1, 1, 1], buckets=0)
buckets = tensor_util.make_ndarray(pb.value[0].tensor)
np.testing.assert_array_equal(buckets, np.array([]).reshape((0, 3)))


class SummaryV2OpTest(SummaryBaseTest, tf.test.TestCase):
def setUp(self):
Expand Down
50 changes: 30 additions & 20 deletions tensorboard/plugins/histogram/summary_v2.py
Expand Up @@ -14,15 +14,22 @@
# ==============================================================================
"""Histogram summaries and TensorFlow operations to create them, V2 versions.

A histogram summary stores a list of buckets. Each bucket is encoded as
a triple `[left_edge, right_edge, count]`. Thus, a full histogram is
encoded as a tensor of dimension `[k, 3]`.

In general, the value of `k` (the number of buckets) will be a constant,
like 30. There are two edge cases: if there is no data, then there are
no buckets (the shape is `[0, 3]`); and if there is data but all points
have the same value, then there is one bucket whose left and right
endpoints are the same (the shape is `[1, 3]`).
A histogram summary stores a list of buckets. Each bucket is encoded as a triple
`[left_edge, right_edge, count]`. Thus, a full histogram is encoded as a tensor
of dimension `[k, 3]`, where the first `k - 1` buckets are closed-open and the
last bucket is closed-closed.

In general, the value of `k` (the number of buckets) will be a constant, like 30.
For V2 format, there are two edge cases: if there is no data, then there are no
buckets (the shape is `[0, 3]`); and if there is data but all points have the
same value, then there is one bucket whose left and right endpoints are the same
(the shape is `[1, 3]`).

For V3 format, the shape of the output histogram is always constant (`[k, 3]`).
In the case of empty data, the output will be an all-zero histogram of shape
`[k, 3]`, where all edges and counts are zeros. If there is data but all points
have the same value, then all buckets' left and right edges are the same and only
the last bucket has nonzero count.
"""

import contextlib
Expand Down Expand Up @@ -257,11 +264,11 @@ def histogram_pb(tag, data, buckets=None, description=None):
tag: String tag for the summary.
data: A `np.array` or array-like form of any shape. Must have type
castable to `float`.
buckets: Optional positive `int`. The output will have this
many buckets, except in two edge cases. If there is no data, then
there are no buckets. If there is data but all points have the
same value, then there is one bucket whose left and right
endpoints are the same.
buckets: Optional positive `int`. The output shape will always be
[buckets, 3]. If there is no data, then an all-zero array of shape
[buckets, 3] will be returned. If there is data but all points have
the same value, then all buckets' left and right endpoints are the
same and only the last bucket has nonzero count.
description: Optional long-form description for this summary, as a
`str`. Markdown is supported. Defaults to empty.

Expand All @@ -270,15 +277,18 @@ def histogram_pb(tag, data, buckets=None, description=None):
"""
bucket_count = DEFAULT_BUCKET_COUNT if buckets is None else buckets
data = np.array(data).flatten().astype(float)
if data.size == 0:
buckets = np.array([]).reshape((0, 3))
if bucket_count == 0 or data.size == 0:
histogram_buckets = np.zeros((bucket_count, 3))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optional, but this implementation also handles the 0-bucket-count case, so you could combine these two conditions into just

if bucket_count == 0 or data.size == 0:
    histogram_buckets = np.zeros((bucket_count, 3))

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Thanks!

else:
min_ = np.min(data)
max_ = np.max(data)
range_ = max_ - min_
if range_ == 0:
center = min_
buckets = np.array([[center - 0.5, center + 0.5, float(data.size)]])
left_edges = right_edges = np.array([min_] * bucket_count)
bucket_counts = np.array([0] * (bucket_count - 1) + [data.size])
histogram_buckets = np.array(
[left_edges, right_edges, bucket_counts]
).transpose()
else:
bucket_width = range_ / bucket_count
offsets = data - min_
Expand All @@ -295,10 +305,10 @@ def histogram_pb(tag, data, buckets=None, description=None):
edges = np.linspace(min_, max_, bucket_count + 1)
left_edges = edges[:-1]
right_edges = edges[1:]
buckets = np.array(
histogram_buckets = np.array(
[left_edges, right_edges, bucket_counts]
).transpose()
tensor = tensor_util.make_tensor_proto(buckets, dtype=np.float64)
tensor = tensor_util.make_tensor_proto(histogram_buckets, dtype=np.float64)

summary_metadata = metadata.create_summary_metadata(
display_name=None, description=description
Expand Down