Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

histogram: remove backwards buckets in v1 histogram migration #5404

Merged
merged 6 commits into from Nov 10, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
40 changes: 33 additions & 7 deletions tensorboard/data_compat.py
Expand Up @@ -81,19 +81,45 @@ def make_summary(tag, metadata, data):


def _migrate_histogram_value(value):
histogram_value = value.histo
bucket_lefts = [histogram_value.min] + histogram_value.bucket_limit[:-1]
bucket_rights = histogram_value.bucket_limit[:-1] + [histogram_value.max]
bucket_counts = histogram_value.bucket
buckets = np.array(
[bucket_lefts, bucket_rights, bucket_counts], dtype=np.float32
).transpose()
"""Convert `old-style` histogram value to `new-style`.

The "old-style" format can have outermost bucket limits of -DBL_MAX and
DBL_MAX, which are problematic for visualization. We replace those here
with the actual min and max values seen in the input data, but then in
order to avoid introducing "backwards" buckets (where left edge > right
edge), we first must drop all empty buckets on the left and right ends.
"""
summary_metadata = histogram_metadata.create_summary_metadata(
display_name=value.metadata.display_name or value.tag,
description=value.metadata.summary_description,
)

histogram_value = value.histo
bucket_counts = histogram_value.bucket
# Find the indices of the leftmost and rightmost non-empty buckets.
n = len(bucket_counts)
start = next((i for i in range(n) if bucket_counts[i] > 0), n)
end = next((i for i in reversed(range(n)) if bucket_counts[i] > 0), -1)
if start > end:
# If all input buckets were empty, treat it as a zero-bucket
# new-style histogram.
return make_summary(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah right, I forgot that we need to wrap this in make_summary() in two places if we do an early return here. Alternatively we could do something like:

if start > end:
    buckets = np.zeros([0, 3], dtype=np.float32)
else:
    # rest of normal codepath goes here
    buckets = np.array([...], dtype=np.float32).transpose()
return make_summary(value.tag, summary_metadata, buckets)

Up to you!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed the sequence to avoid moving the code blocks around.

value.tag, summary_metadata, np.zeros([0, 3], dtype=np.float32)
)
# Discard empty buckets on both ends, and keep only the "inner" edges
# from the remaining buckets. Note that bucket indices range from `start`
# to `end` inclusive, but bucket_limit indices are exclusive of `end` -
# this is because bucket_limit[i] is the right-hand edge for bucket[i].
bucket_counts = bucket_counts[start : end + 1]
inner_edges = histogram_value.bucket_limit[start:end]
# Use min as the left-hand limit for the first non-empty bucket.
bucket_lefts = [histogram_value.min] + inner_edges
# Use max as the right-hand limit for the last non-empty bucket.
bucket_rights = inner_edges + [histogram_value.max]
buckets = np.array(
[bucket_lefts, bucket_rights, bucket_counts], dtype=np.float32
).transpose()

return make_summary(value.tag, summary_metadata, buckets)


Expand Down
90 changes: 90 additions & 0 deletions tensorboard/data_compat_test.py
Expand Up @@ -201,10 +201,100 @@ def test_histogram(self):
self.assertEqual(expected_metadata, new_value.metadata)
self.assertTrue(new_value.HasField("tensor"))
buckets = tensor_util.make_ndarray(new_value.tensor)
for bucket in buckets:
# No `backwards` buckets.
self.assertLessEqual(bucket[0], bucket[1])
self.assertEqual(old_value.histo.min, buckets[0][0])
self.assertEqual(old_value.histo.max, buckets[-1][1])
self.assertEqual(23 * 45, buckets[:, 2].astype(int).sum())

def test_empty_histogram(self):
with tf.compat.v1.Graph().as_default():
old_op = tf.compat.v1.summary.histogram(
"empty_yet_important", tf.constant([])
)
old_value = self._value_from_op(old_op)
assert old_value.HasField("histo"), old_value
new_value = data_compat.migrate_value(old_value)

self.assertEqual("empty_yet_important", new_value.tag)
expected_metadata = histogram_metadata.create_summary_metadata(
display_name="empty_yet_important", description=""
)
self.assertEqual(expected_metadata, new_value.metadata)
self.assertTrue(new_value.HasField("tensor"))
buckets = tensor_util.make_ndarray(new_value.tensor)
self.assertEmpty(buckets)

def test_single_value_histogram(self):
with tf.compat.v1.Graph().as_default():
old_op = tf.compat.v1.summary.histogram(
"single_value_data", tf.constant([1] * 1024)
)
old_value = self._value_from_op(old_op)
assert old_value.HasField("histo"), old_value
new_value = data_compat.migrate_value(old_value)

self.assertEqual("single_value_data", new_value.tag)
expected_metadata = histogram_metadata.create_summary_metadata(
display_name="single_value_data", description=""
)
self.assertEqual(expected_metadata, new_value.metadata)
self.assertTrue(new_value.HasField("tensor"))
buckets = tensor_util.make_ndarray(new_value.tensor)
# Only one bucket is kept.
self.assertEqual((1, 3), buckets.shape)
self.assertEqual(1, buckets[0][0])
self.assertEqual(1, buckets[-1][1])
self.assertEqual(1024, buckets[0][2])

def test_histogram_with_empty_buckets_on_both_ends(self):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be worth an additional test case using a histogram with data that has extremal values, e.g. [-1e20, 1e20], which should produce counts in the "farthest out" buckets that the legacy histogram format can generate. In particular, in that case the final bucket in the legacy histogram format is non-empty (whereas usually it's empty).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

with tf.compat.v1.Graph().as_default():
old_op = tf.compat.v1.summary.histogram(
"data_with_empty_buckets_on_both_ends",
tf.constant([1, 1, 1, 2, 2, 3, 3, 3, 3]),
)
old_value = self._value_from_op(old_op)
assert old_value.HasField("histo"), old_value
new_value = data_compat.migrate_value(old_value)

self.assertEqual("data_with_empty_buckets_on_both_ends", new_value.tag)
expected_metadata = histogram_metadata.create_summary_metadata(
display_name="data_with_empty_buckets_on_both_ends", description=""
)
self.assertEqual(expected_metadata, new_value.metadata)
self.assertTrue(new_value.HasField("tensor"))
buckets = tensor_util.make_ndarray(new_value.tensor)
for bucket in buckets:
# No `backwards` buckets.
self.assertLessEqual(bucket[0], bucket[1])
self.assertEqual(1, buckets[0][0])
self.assertEqual(3, buckets[-1][1])
self.assertEqual(9, buckets[:, 2].astype(int).sum())

def test_histogram_with_extremal_values(self):
with tf.compat.v1.Graph().as_default():
old_op = tf.compat.v1.summary.histogram(
"extremal_values", tf.constant([-1e20, 1e20])
)
old_value = self._value_from_op(old_op)
assert old_value.HasField("histo"), old_value
new_value = data_compat.migrate_value(old_value)

self.assertEqual("extremal_values", new_value.tag)
expected_metadata = histogram_metadata.create_summary_metadata(
display_name="extremal_values", description=""
)
self.assertEqual(expected_metadata, new_value.metadata)
self.assertTrue(new_value.HasField("tensor"))
buckets = tensor_util.make_ndarray(new_value.tensor)
for bucket in buckets:
# No `backwards` buckets.
self.assertLessEqual(bucket[0], bucket[1])
self.assertEqual(old_value.histo.min, buckets[0][0])
self.assertEqual(old_value.histo.max, buckets[-1][1])
self.assertEqual(2, buckets[:, 2].astype(int).sum())

def test_new_style_histogram(self):
with tf.compat.v1.Graph().as_default():
op = histogram_summary.op(
Expand Down