Skip to content

Commit

Permalink
histogram: make summary_v2.histogram_pb TPU compatible (tensorflow#…
Browse files Browse the repository at this point in the history
…5409)

* make histogram_pb tpu compatible

* remove superfluous trailing whitespaces

* fix empty data case & update docs

* merge the empty data and zero bucket count cases
  • Loading branch information
yatbear committed Mar 27, 2023
1 parent 087f414 commit 55547c3
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 20 deletions.
35 changes: 35 additions & 0 deletions tensorboard/plugins/histogram/summary_test.py
Expand Up @@ -175,6 +175,41 @@ class SummaryV2PbTest(SummaryBaseTest, tf.test.TestCase):
def histogram(self, *args, **kwargs):
return summary.histogram_pb(*args, **kwargs)

def test_singleton_input(self):
pb = self.histogram("twelve", [12])
buckets = tensor_util.make_ndarray(pb.value[0].tensor)
# By default there will be 30 buckets.
expected_buckets = np.array(
[[12, 12, 0] for _ in range(29)] + [[12, 12, 1]]
)
np.testing.assert_allclose(buckets, expected_buckets)

def test_input_with_all_same_values(self):
pb = self.histogram("twelven", [12, 12, 12])
buckets = tensor_util.make_ndarray(pb.value[0].tensor)
# By default there will be 30 buckets.
expected_buckets = np.array(
[[12, 12, 0] for _ in range(29)] + [[12, 12, 3]]
)
np.testing.assert_allclose(buckets, expected_buckets)

def test_empty_input(self):
pb = self.histogram("empty", [])
buckets = tensor_util.make_ndarray(pb.value[0].tensor)
# By default there will be 30 buckets.
np.testing.assert_allclose(buckets, np.zeros((30, 3)))

def test_empty_input_of_high_rank(self):
pb = self.histogram("empty_but_fancy", [[[], []], [[], []]])
buckets = tensor_util.make_ndarray(pb.value[0].tensor)
# By default there will be 30 buckets.
np.testing.assert_allclose(buckets, np.zeros((30, 3)))

def test_zero_bucket_count(self):
pb = self.histogram("zero_bucket_count", [1, 1, 1], buckets=0)
buckets = tensor_util.make_ndarray(pb.value[0].tensor)
np.testing.assert_array_equal(buckets, np.array([]).reshape((0, 3)))


class SummaryV2OpTest(SummaryBaseTest, tf.test.TestCase):
def setUp(self):
Expand Down
50 changes: 30 additions & 20 deletions tensorboard/plugins/histogram/summary_v2.py
Expand Up @@ -14,15 +14,22 @@
# ==============================================================================
"""Histogram summaries and TensorFlow operations to create them, V2 versions.
A histogram summary stores a list of buckets. Each bucket is encoded as
a triple `[left_edge, right_edge, count]`. Thus, a full histogram is
encoded as a tensor of dimension `[k, 3]`.
In general, the value of `k` (the number of buckets) will be a constant,
like 30. There are two edge cases: if there is no data, then there are
no buckets (the shape is `[0, 3]`); and if there is data but all points
have the same value, then there is one bucket whose left and right
endpoints are the same (the shape is `[1, 3]`).
A histogram summary stores a list of buckets. Each bucket is encoded as a triple
`[left_edge, right_edge, count]`. Thus, a full histogram is encoded as a tensor
of dimension `[k, 3]`, where the first `k - 1` buckets are closed-open and the
last bucket is closed-closed.
In general, the value of `k` (the number of buckets) will be a constant, like 30.
For V2 format, there are two edge cases: if there is no data, then there are no
buckets (the shape is `[0, 3]`); and if there is data but all points have the
same value, then there is one bucket whose left and right endpoints are the same
(the shape is `[1, 3]`).
For V3 format, the shape of the output histogram is always constant (`[k, 3]`).
In the case of empty data, the output will be an all-zero histogram of shape
`[k, 3]`, where all edges and counts are zeros. If there is data but all points
have the same value, then all buckets' left and right edges are the same and only
the last bucket has nonzero count.
"""

import contextlib
Expand Down Expand Up @@ -257,11 +264,11 @@ def histogram_pb(tag, data, buckets=None, description=None):
tag: String tag for the summary.
data: A `np.array` or array-like form of any shape. Must have type
castable to `float`.
buckets: Optional positive `int`. The output will have this
many buckets, except in two edge cases. If there is no data, then
there are no buckets. If there is data but all points have the
same value, then there is one bucket whose left and right
endpoints are the same.
buckets: Optional positive `int`. The output shape will always be
[buckets, 3]. If there is no data, then an all-zero array of shape
[buckets, 3] will be returned. If there is data but all points have
the same value, then all buckets' left and right endpoints are the
same and only the last bucket has nonzero count.
description: Optional long-form description for this summary, as a
`str`. Markdown is supported. Defaults to empty.
Expand All @@ -270,15 +277,18 @@ def histogram_pb(tag, data, buckets=None, description=None):
"""
bucket_count = DEFAULT_BUCKET_COUNT if buckets is None else buckets
data = np.array(data).flatten().astype(float)
if data.size == 0:
buckets = np.array([]).reshape((0, 3))
if bucket_count == 0 or data.size == 0:
histogram_buckets = np.zeros((bucket_count, 3))
else:
min_ = np.min(data)
max_ = np.max(data)
range_ = max_ - min_
if range_ == 0:
center = min_
buckets = np.array([[center - 0.5, center + 0.5, float(data.size)]])
left_edges = right_edges = np.array([min_] * bucket_count)
bucket_counts = np.array([0] * (bucket_count - 1) + [data.size])
histogram_buckets = np.array(
[left_edges, right_edges, bucket_counts]
).transpose()
else:
bucket_width = range_ / bucket_count
offsets = data - min_
Expand All @@ -295,10 +305,10 @@ def histogram_pb(tag, data, buckets=None, description=None):
edges = np.linspace(min_, max_, bucket_count + 1)
left_edges = edges[:-1]
right_edges = edges[1:]
buckets = np.array(
histogram_buckets = np.array(
[left_edges, right_edges, bucket_counts]
).transpose()
tensor = tensor_util.make_tensor_proto(buckets, dtype=np.float64)
tensor = tensor_util.make_tensor_proto(histogram_buckets, dtype=np.float64)

summary_metadata = metadata.create_summary_metadata(
display_name=None, description=description
Expand Down

0 comments on commit 55547c3

Please sign in to comment.