diff --git a/tensorboard/plugins/histogram/summary_test.py b/tensorboard/plugins/histogram/summary_test.py index aceae45b50b..89af32953ba 100644 --- a/tensorboard/plugins/histogram/summary_test.py +++ b/tensorboard/plugins/histogram/summary_test.py @@ -175,6 +175,41 @@ class SummaryV2PbTest(SummaryBaseTest, tf.test.TestCase): def histogram(self, *args, **kwargs): return summary.histogram_pb(*args, **kwargs) + def test_singleton_input(self): + pb = self.histogram("twelve", [12]) + buckets = tensor_util.make_ndarray(pb.value[0].tensor) + # By default there will be 30 buckets. + expected_buckets = np.array( + [[12, 12, 0] for _ in range(29)] + [[12, 12, 1]] + ) + np.testing.assert_allclose(buckets, expected_buckets) + + def test_input_with_all_same_values(self): + pb = self.histogram("twelven", [12, 12, 12]) + buckets = tensor_util.make_ndarray(pb.value[0].tensor) + # By default there will be 30 buckets. + expected_buckets = np.array( + [[12, 12, 0] for _ in range(29)] + [[12, 12, 3]] + ) + np.testing.assert_allclose(buckets, expected_buckets) + + def test_empty_input(self): + pb = self.histogram("empty", []) + buckets = tensor_util.make_ndarray(pb.value[0].tensor) + # By default there will be 30 buckets. + np.testing.assert_allclose(buckets, np.zeros((30, 3))) + + def test_empty_input_of_high_rank(self): + pb = self.histogram("empty_but_fancy", [[[], []], [[], []]]) + buckets = tensor_util.make_ndarray(pb.value[0].tensor) + # By default there will be 30 buckets. + np.testing.assert_allclose(buckets, np.zeros((30, 3))) + + def test_zero_bucket_count(self): + pb = self.histogram("zero_bucket_count", [1, 1, 1], buckets=0) + buckets = tensor_util.make_ndarray(pb.value[0].tensor) + np.testing.assert_array_equal(buckets, np.array([]).reshape((0, 3))) + class SummaryV2OpTest(SummaryBaseTest, tf.test.TestCase): def setUp(self): diff --git a/tensorboard/plugins/histogram/summary_v2.py b/tensorboard/plugins/histogram/summary_v2.py index e1cd01846db..1b850f25872 100644 --- a/tensorboard/plugins/histogram/summary_v2.py +++ b/tensorboard/plugins/histogram/summary_v2.py @@ -14,15 +14,22 @@ # ============================================================================== """Histogram summaries and TensorFlow operations to create them, V2 versions. -A histogram summary stores a list of buckets. Each bucket is encoded as -a triple `[left_edge, right_edge, count]`. Thus, a full histogram is -encoded as a tensor of dimension `[k, 3]`. - -In general, the value of `k` (the number of buckets) will be a constant, -like 30. There are two edge cases: if there is no data, then there are -no buckets (the shape is `[0, 3]`); and if there is data but all points -have the same value, then there is one bucket whose left and right -endpoints are the same (the shape is `[1, 3]`). +A histogram summary stores a list of buckets. Each bucket is encoded as a triple +`[left_edge, right_edge, count]`. Thus, a full histogram is encoded as a tensor +of dimension `[k, 3]`, where the first `k - 1` buckets are closed-open and the +last bucket is closed-closed. + +In general, the value of `k` (the number of buckets) will be a constant, like 30. +For V2 format, there are two edge cases: if there is no data, then there are no +buckets (the shape is `[0, 3]`); and if there is data but all points have the +same value, then there is one bucket whose left and right endpoints are the same +(the shape is `[1, 3]`). + +For V3 format, the shape of the output histogram is always constant (`[k, 3]`). +In the case of empty data, the output will be an all-zero histogram of shape +`[k, 3]`, where all edges and counts are zeros. If there is data but all points +have the same value, then all buckets' left and right edges are the same and only +the last bucket has nonzero count. """ import contextlib @@ -257,11 +264,11 @@ def histogram_pb(tag, data, buckets=None, description=None): tag: String tag for the summary. data: A `np.array` or array-like form of any shape. Must have type castable to `float`. - buckets: Optional positive `int`. The output will have this - many buckets, except in two edge cases. If there is no data, then - there are no buckets. If there is data but all points have the - same value, then there is one bucket whose left and right - endpoints are the same. + buckets: Optional positive `int`. The output shape will always be + [buckets, 3]. If there is no data, then an all-zero array of shape + [buckets, 3] will be returned. If there is data but all points have + the same value, then all buckets' left and right endpoints are the + same and only the last bucket has nonzero count. description: Optional long-form description for this summary, as a `str`. Markdown is supported. Defaults to empty. @@ -270,15 +277,18 @@ def histogram_pb(tag, data, buckets=None, description=None): """ bucket_count = DEFAULT_BUCKET_COUNT if buckets is None else buckets data = np.array(data).flatten().astype(float) - if data.size == 0: - buckets = np.array([]).reshape((0, 3)) + if bucket_count == 0 or data.size == 0: + histogram_buckets = np.zeros((bucket_count, 3)) else: min_ = np.min(data) max_ = np.max(data) range_ = max_ - min_ if range_ == 0: - center = min_ - buckets = np.array([[center - 0.5, center + 0.5, float(data.size)]]) + left_edges = right_edges = np.array([min_] * bucket_count) + bucket_counts = np.array([0] * (bucket_count - 1) + [data.size]) + histogram_buckets = np.array( + [left_edges, right_edges, bucket_counts] + ).transpose() else: bucket_width = range_ / bucket_count offsets = data - min_ @@ -295,10 +305,10 @@ def histogram_pb(tag, data, buckets=None, description=None): edges = np.linspace(min_, max_, bucket_count + 1) left_edges = edges[:-1] right_edges = edges[1:] - buckets = np.array( + histogram_buckets = np.array( [left_edges, right_edges, bucket_counts] ).transpose() - tensor = tensor_util.make_tensor_proto(buckets, dtype=np.float64) + tensor = tensor_util.make_tensor_proto(histogram_buckets, dtype=np.float64) summary_metadata = metadata.create_summary_metadata( display_name=None, description=description