Skip to content

Commit

Permalink
histogram: remove backwards buckets in v1 histogram migration (#5404)
Browse files Browse the repository at this point in the history
* remove empty buckets on both ends to avoid having `backwards` buckets

* fix typo

* use data min/max as bucket left/right hand limit

* grammar fix

* improve readability

  - improve docs
  - test extremal histogram values
  - fix tag names in test

* small change
  • Loading branch information
yatbear committed Nov 10, 2021
1 parent 97b6f6f commit cec1311
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 5 deletions.
36 changes: 31 additions & 5 deletions tensorboard/data_compat.py
Expand Up @@ -81,13 +81,39 @@ def make_summary(tag, metadata, data):


def _migrate_histogram_value(value):
"""Convert `old-style` histogram value to `new-style`.
The "old-style" format can have outermost bucket limits of -DBL_MAX and
DBL_MAX, which are problematic for visualization. We replace those here
with the actual min and max values seen in the input data, but then in
order to avoid introducing "backwards" buckets (where left edge > right
edge), we first must drop all empty buckets on the left and right ends.
"""
histogram_value = value.histo
bucket_lefts = [histogram_value.min] + histogram_value.bucket_limit[:-1]
bucket_rights = histogram_value.bucket_limit[:-1] + [histogram_value.max]
bucket_counts = histogram_value.bucket
buckets = np.array(
[bucket_lefts, bucket_rights, bucket_counts], dtype=np.float32
).transpose()
# Find the indices of the leftmost and rightmost non-empty buckets.
n = len(bucket_counts)
start = next((i for i in range(n) if bucket_counts[i] > 0), n)
end = next((i for i in reversed(range(n)) if bucket_counts[i] > 0), -1)
if start > end:
# If all input buckets were empty, treat it as a zero-bucket
# new-style histogram.
buckets = np.zeros([0, 3], dtype=np.float32)
else:
# Discard empty buckets on both ends, and keep only the "inner"
# edges from the remaining buckets. Note that bucket indices range
# from `start` to `end` inclusive, but bucket_limit indices are
# exclusive of `end` - this is because bucket_limit[i] is the
# right-hand edge for bucket[i].
bucket_counts = bucket_counts[start : end + 1]
inner_edges = histogram_value.bucket_limit[start:end]
# Use min as the left-hand limit for the first non-empty bucket.
bucket_lefts = [histogram_value.min] + inner_edges
# Use max as the right-hand limit for the last non-empty bucket.
bucket_rights = inner_edges + [histogram_value.max]
buckets = np.array(
[bucket_lefts, bucket_rights, bucket_counts], dtype=np.float32
).transpose()

summary_metadata = histogram_metadata.create_summary_metadata(
display_name=value.metadata.display_name or value.tag,
Expand Down
90 changes: 90 additions & 0 deletions tensorboard/data_compat_test.py
Expand Up @@ -201,10 +201,100 @@ def test_histogram(self):
self.assertEqual(expected_metadata, new_value.metadata)
self.assertTrue(new_value.HasField("tensor"))
buckets = tensor_util.make_ndarray(new_value.tensor)
for bucket in buckets:
# No `backwards` buckets.
self.assertLessEqual(bucket[0], bucket[1])
self.assertEqual(old_value.histo.min, buckets[0][0])
self.assertEqual(old_value.histo.max, buckets[-1][1])
self.assertEqual(23 * 45, buckets[:, 2].astype(int).sum())

def test_empty_histogram(self):
with tf.compat.v1.Graph().as_default():
old_op = tf.compat.v1.summary.histogram(
"empty_yet_important", tf.constant([])
)
old_value = self._value_from_op(old_op)
assert old_value.HasField("histo"), old_value
new_value = data_compat.migrate_value(old_value)

self.assertEqual("empty_yet_important", new_value.tag)
expected_metadata = histogram_metadata.create_summary_metadata(
display_name="empty_yet_important", description=""
)
self.assertEqual(expected_metadata, new_value.metadata)
self.assertTrue(new_value.HasField("tensor"))
buckets = tensor_util.make_ndarray(new_value.tensor)
self.assertEmpty(buckets)

def test_single_value_histogram(self):
with tf.compat.v1.Graph().as_default():
old_op = tf.compat.v1.summary.histogram(
"single_value_data", tf.constant([1] * 1024)
)
old_value = self._value_from_op(old_op)
assert old_value.HasField("histo"), old_value
new_value = data_compat.migrate_value(old_value)

self.assertEqual("single_value_data", new_value.tag)
expected_metadata = histogram_metadata.create_summary_metadata(
display_name="single_value_data", description=""
)
self.assertEqual(expected_metadata, new_value.metadata)
self.assertTrue(new_value.HasField("tensor"))
buckets = tensor_util.make_ndarray(new_value.tensor)
# Only one bucket is kept.
self.assertEqual((1, 3), buckets.shape)
self.assertEqual(1, buckets[0][0])
self.assertEqual(1, buckets[-1][1])
self.assertEqual(1024, buckets[0][2])

def test_histogram_with_empty_buckets_on_both_ends(self):
with tf.compat.v1.Graph().as_default():
old_op = tf.compat.v1.summary.histogram(
"data_with_empty_buckets_on_both_ends",
tf.constant([1, 1, 1, 2, 2, 3, 3, 3, 3]),
)
old_value = self._value_from_op(old_op)
assert old_value.HasField("histo"), old_value
new_value = data_compat.migrate_value(old_value)

self.assertEqual("data_with_empty_buckets_on_both_ends", new_value.tag)
expected_metadata = histogram_metadata.create_summary_metadata(
display_name="data_with_empty_buckets_on_both_ends", description=""
)
self.assertEqual(expected_metadata, new_value.metadata)
self.assertTrue(new_value.HasField("tensor"))
buckets = tensor_util.make_ndarray(new_value.tensor)
for bucket in buckets:
# No `backwards` buckets.
self.assertLessEqual(bucket[0], bucket[1])
self.assertEqual(1, buckets[0][0])
self.assertEqual(3, buckets[-1][1])
self.assertEqual(9, buckets[:, 2].astype(int).sum())

def test_histogram_with_extremal_values(self):
with tf.compat.v1.Graph().as_default():
old_op = tf.compat.v1.summary.histogram(
"extremal_values", tf.constant([-1e20, 1e20])
)
old_value = self._value_from_op(old_op)
assert old_value.HasField("histo"), old_value
new_value = data_compat.migrate_value(old_value)

self.assertEqual("extremal_values", new_value.tag)
expected_metadata = histogram_metadata.create_summary_metadata(
display_name="extremal_values", description=""
)
self.assertEqual(expected_metadata, new_value.metadata)
self.assertTrue(new_value.HasField("tensor"))
buckets = tensor_util.make_ndarray(new_value.tensor)
for bucket in buckets:
# No `backwards` buckets.
self.assertLessEqual(bucket[0], bucket[1])
self.assertEqual(old_value.histo.min, buckets[0][0])
self.assertEqual(old_value.histo.max, buckets[-1][1])
self.assertEqual(2, buckets[:, 2].astype(int).sum())

def test_new_style_histogram(self):
with tf.compat.v1.Graph().as_default():
op = histogram_summary.op(
Expand Down

0 comments on commit cec1311

Please sign in to comment.