Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't use hash_funcs to compute an @st.cache function's cache_key #2331

Merged
merged 3 commits into from
Nov 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 15 additions & 2 deletions lib/streamlit/caching.py
Expand Up @@ -480,12 +480,25 @@ def cache(

func_hasher = hashlib.new("md5")

# Include the function's module and qualified name in the hash.
# Include the function's __module__ and __qualname__ strings in the hash.
# This means that two identical functions in different modules
# will not share a hash; it also means that two identical *nested*
# functions in the same module will not share a hash.
# We do not pass `hash_funcs` here, because we don't want our function's
# name to get an unexpected hash.
update_hash(
(func.__module__, func.__qualname__, func),
(func.__module__, func.__qualname__),
hasher=func_hasher,
hash_funcs=None,
hash_reason=HashReason.CACHING_FUNC_BODY,
hash_source=func,
)

# Include the function's body in the hash. We *do* pass hash_funcs here,
# because this step will be hashing any objects referenced in the function
# body.
update_hash(
func,
hasher=func_hasher,
hash_funcs=hash_funcs,
hash_reason=HashReason.CACHING_FUNC_BODY,
Expand Down
56 changes: 53 additions & 3 deletions lib/tests/streamlit/caching_test.py
Expand Up @@ -13,15 +13,13 @@
# limitations under the License.

"""st.caching unit tests."""
from unittest.mock import patch
from unittest.mock import patch, Mock
import threading
import unittest
import pytest
import types

from streamlit import caching
from streamlit import hashing
from streamlit.hashing import UserHashError
from streamlit.elements import exception_proto
from streamlit.proto.Exception_pb2 import Exception as ExceptionProto
from tests import testutil
Expand Down Expand Up @@ -347,6 +345,58 @@ def bar(x):
self.assertEqual([0, 1, 2, 0, 1, 2], foo_vals)
self.assertEqual([0, 1, 2, 0, 1, 2], bar_vals)

def test_unique_function_caches(self):
"""Each function should have its own cache, even if it has an
identical body and arguments to another cached function.
"""

@st.cache
def foo():
return []

@st.cache
def bar():
return []

id_foo = id(foo())
id_bar = id(bar())
self.assertNotEqual(id_foo, id_bar)

def test_function_body_uses_hashfuncs(self):
hash_func = Mock(return_value=None)

# This is an external object that's referenced by our
# function. It cannot be hashed (without a custom hashfunc).
dict_gen = {1: (x for x in range(1))}

@st.cache(hash_funcs={"builtins.generator": hash_func})
def foo(arg):
# Reference the generator object. It will be hashed when we
# hash the function body to generate foo's cache_key.
print(dict_gen)
return []

foo(1)
foo(2)
hash_func.assert_called_once()

def test_function_name_does_not_use_hashfuncs(self):
"""Hash funcs should only be used on arguments to a function,
and not when computing the key for a function's unique MemCache.
"""

str_hash_func = Mock(return_value=None)

@st.cache(hash_funcs={str: str_hash_func})
def foo(string_arg):
return []

# If our str hash_func is called multiple times, it's probably because
# it's being used to compute the function's cache_key (as opposed to
# the value_key). It should only be used to compute the value_key!
foo("ahoy")
str_hash_func.assert_called_once_with("ahoy")


# Temporarily turn off these tests since there's no Cache object in __init__
# right now.
Expand Down