Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat/introduce cache related metrics for prometheus #7439

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
64 changes: 63 additions & 1 deletion core/store/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use near_metrics::{try_create_histogram_vec, HistogramVec};
use near_metrics::{
try_create_histogram_vec, try_create_int_counter_vec, try_create_int_gauge_vec, HistogramVec,
IntCounterVec, IntGaugeVec,
};
use once_cell::sync::Lazy;

pub(crate) static DATABASE_OP_LATENCY_HIST: Lazy<HistogramVec> = Lazy::new(|| {
Expand All @@ -10,3 +13,62 @@ pub(crate) static DATABASE_OP_LATENCY_HIST: Lazy<HistogramVec> = Lazy::new(|| {
)
.unwrap()
});

pub static CHUNK_CACHE_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
try_create_int_counter_vec(
"near_chunk_cache_hits",
"Chunk cache hits",
&["shard_id", "is_view"],
)
.unwrap()
});

pub static CHUNK_CACHE_MISSES: Lazy<IntCounterVec> = Lazy::new(|| {
try_create_int_counter_vec(
"near_chunk_cache_misses",
"Chunk cache misses",
&["shard_id", "is_view"],
)
.unwrap()
});

pub static SHARD_CACHE_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
try_create_int_counter_vec(
"near_shard_cache_hits",
"Shard cache hits",
&["shard_id", "is_view"],
)
.unwrap()
});

pub static SHARD_CACHE_MISSES: Lazy<IntCounterVec> = Lazy::new(|| {
try_create_int_counter_vec(
"near_shard_cache_misses",
"Shard cache misses",
&["shard_id", "is_view"],
)
.unwrap()
});

pub static SHARD_CACHE_TOO_LARGE: Lazy<IntCounterVec> = Lazy::new(|| {
try_create_int_counter_vec(
"near_shard_cache_too_large",
"Number of values to be inserted into shard cache is too large",
&["shard_id", "is_view"],
)
.unwrap()
});

pub static SHARD_CACHE_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
try_create_int_gauge_vec("near_shard_cache_size", "Shard cache size", &["shard_id", "is_view"])
.unwrap()
});

pub static CHUNK_CACHE_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
try_create_int_gauge_vec("near_chunk_cache_size", "Chunk cache size", &["shard_id", "is_view"])
.unwrap()
});

pub static SHARD_CACHE_POPS: Lazy<IntCounterVec> = Lazy::new(|| {
try_create_int_counter_vec("near_shard_cache_pops", "Shard cache pops", &["shard_id"]).unwrap()
});
5 changes: 3 additions & 2 deletions core/store/src/trie/shard_tries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ impl ShardTries {
.or_insert_with(|| self.0.trie_cache_factory.create_cache(&shard_uid))
.clone()
};
let storage = Box::new(TrieCachingStorage::new(self.0.store.clone(), cache, shard_uid));
let storage =
Box::new(TrieCachingStorage::new(self.0.store.clone(), cache, shard_uid, is_view));
let flat_state = {
#[cfg(feature = "protocol_feature_flat_state")]
if use_flat_state {
Expand Down Expand Up @@ -179,7 +180,7 @@ impl ShardTries {
.entry(shard_uid)
.or_insert_with(|| self.0.trie_cache_factory.create_cache(&shard_uid))
.clone();
cache.update_cache(ops);
cache.update_cache(ops, shard_uid);
}
Ok(())
}
Expand Down
54 changes: 49 additions & 5 deletions core/store/src/trie/trie_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use near_primitives::hash::CryptoHash;

use crate::db::refcount::decode_value_with_rc;
use crate::trie::POISONED_LOCK_ERR;
use crate::{DBCol, StorageError, Store};
use crate::{metrics, DBCol, StorageError, Store};
use lru::LruCache;
use near_primitives::shard_layout::ShardUId;
use near_primitives::types::{TrieCacheMode, TrieNodesCount};
Expand Down Expand Up @@ -34,19 +34,34 @@ impl TrieCache {
self.0.lock().expect(POISONED_LOCK_ERR).clear()
}

pub fn update_cache(&self, ops: Vec<(CryptoHash, Option<&Vec<u8>>)>) {
pub fn update_cache(&self, ops: Vec<(CryptoHash, Option<&Vec<u8>>)>, shard_uid: ShardUId) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I think that TrieCache should hold shard_uid, so we could avoid passing it here. Let's fix it in a separate PR

let shard_id_str = format!("{}", shard_uid.shard_id);
let labels: [&str; 1] = [&shard_id_str];

let mut guard = self.0.lock().expect(POISONED_LOCK_ERR);
for (hash, opt_value_rc) in ops {
if let Some(value_rc) = opt_value_rc {
if let (Some(value), _rc) = decode_value_with_rc(&value_rc) {
if value.len() < TRIE_LIMIT_CACHED_VALUE_SIZE {
guard.put(hash, value.into());
} else {
metrics::SHARD_CACHE_TOO_LARGE.with_label_values(&labels).inc();
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
}
} else {
metrics::SHARD_CACHE_TOO_LARGE.with_label_values(&labels).inc();
}

We need to inc this metric here as well - this is my bug from the first impl

} else {
guard.pop(&hash);
match guard.pop(&hash) {
Some(_) => {
metrics::SHARD_CACHE_POPS.with_label_values(&labels).inc();
}
_ => {}
};
}
} else {
guard.pop(&hash);
match guard.pop(&hash) {
Some(_) => {
metrics::SHARD_CACHE_POPS.with_label_values(&labels).inc();
}
_ => {}
};
}
}
}
Expand Down Expand Up @@ -179,10 +194,17 @@ pub struct TrieCachingStorage {
pub(crate) db_read_nodes: Cell<u64>,
/// Counts trie nodes retrieved from the chunk cache.
pub(crate) mem_read_nodes: Cell<u64>,
/// Boolean for determining if the cache is a view cache or not
is_view: bool,
}

impl TrieCachingStorage {
pub fn new(store: Store, shard_cache: TrieCache, shard_uid: ShardUId) -> TrieCachingStorage {
pub fn new(
store: Store,
shard_cache: TrieCache,
shard_uid: ShardUId,
is_view: bool,
) -> TrieCachingStorage {
TrieCachingStorage {
store,
shard_uid,
Expand All @@ -191,6 +213,7 @@ impl TrieCachingStorage {
chunk_cache: RefCell::new(Default::default()),
db_read_nodes: Cell::new(0),
mem_read_nodes: Cell::new(0),
is_view,
}
}

Expand Down Expand Up @@ -227,25 +250,45 @@ impl TrieCachingStorage {
pub fn set_mode(&self, state: TrieCacheMode) {
self.cache_mode.set(state);
}

fn update_cache_size_metrics(&self, labels: [&str; 2]) {
{
metrics::CHUNK_CACHE_SIZE
.with_label_values(&labels)
.set(self.chunk_cache.borrow().len() as i64);
metrics::SHARD_CACHE_SIZE
.with_label_values(&labels)
.set(self.shard_cache.0.lock().expect(POISONED_LOCK_ERR).len() as i64);
}
}
}

impl TrieStorage for TrieCachingStorage {
fn retrieve_raw_bytes(&self, hash: &CryptoHash) -> Result<Arc<[u8]>, StorageError> {
let shard_id_str = format!("{}", self.shard_uid.shard_id);
let is_view_str = format!("{}", self.is_view as u8);
let labels: [&str; 2] = [&shard_id_str, &is_view_str];

self.update_cache_size_metrics(labels);
// Try to get value from chunk cache containing nodes with cheaper access. We can do it for any `TrieCacheMode`,
// because we charge for reading nodes only when `CachingChunk` mode is enabled anyway.
if let Some(val) = self.chunk_cache.borrow_mut().get(hash) {
metrics::CHUNK_CACHE_HITS.with_label_values(&labels).inc();
self.inc_mem_read_nodes();
return Ok(val.clone());
}
metrics::CHUNK_CACHE_MISSES.with_label_values(&labels).inc();

// Try to get value from shard cache containing most recently touched nodes.
let mut guard = self.shard_cache.0.lock().expect(POISONED_LOCK_ERR);
let val = match guard.get(hash) {
Some(val) => {
metrics::SHARD_CACHE_HITS.with_label_values(&labels).inc();
near_o11y::io_trace!(count: "shard_cache_hit");
val.clone()
}
None => {
metrics::SHARD_CACHE_MISSES.with_label_values(&labels).inc();
near_o11y::io_trace!(count: "shard_cache_miss");
// If value is not present in cache, get it from the storage.
let key = Self::get_key_from_shard_uid_and_hash(self.shard_uid, hash);
Expand All @@ -265,6 +308,7 @@ impl TrieStorage for TrieCachingStorage {
if val.len() < TRIE_LIMIT_CACHED_VALUE_SIZE {
guard.put(*hash, val.clone());
} else {
metrics::SHARD_CACHE_TOO_LARGE.with_label_values(&labels).inc();
near_o11y::io_trace!(count: "shard_cache_too_large");
}

Expand Down
15 changes: 10 additions & 5 deletions core/store/src/trie/trie_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,8 @@ mod caching_storage_tests {
let shard_uid = ShardUId::single_shard();
let store = create_store_with_values(&values, shard_uid);
let trie_cache = TrieCache::new();
let trie_caching_storage = TrieCachingStorage::new(store, trie_cache.clone(), shard_uid);
let trie_caching_storage =
TrieCachingStorage::new(store, trie_cache.clone(), shard_uid, false);
let key = hash(&value);
assert_eq!(trie_cache.get(&key), None);

Expand All @@ -255,7 +256,8 @@ mod caching_storage_tests {
fn test_retrieve_error() {
let shard_uid = ShardUId::single_shard();
let store = create_test_store();
let trie_caching_storage = TrieCachingStorage::new(store, TrieCache::new(), shard_uid);
let trie_caching_storage =
TrieCachingStorage::new(store, TrieCache::new(), shard_uid, false);
let value = vec![1u8];
let key = hash(&value);

Expand All @@ -271,7 +273,8 @@ mod caching_storage_tests {
let shard_uid = ShardUId::single_shard();
let store = create_store_with_values(&values, shard_uid);
let trie_cache = TrieCache::new();
let trie_caching_storage = TrieCachingStorage::new(store, trie_cache.clone(), shard_uid);
let trie_caching_storage =
TrieCachingStorage::new(store, trie_cache.clone(), shard_uid, false);
let key = hash(&value);

trie_caching_storage.set_mode(TrieCacheMode::CachingChunk);
Expand All @@ -293,7 +296,8 @@ mod caching_storage_tests {
let shard_uid = ShardUId::single_shard();
let store = create_store_with_values(&values, shard_uid);
let trie_cache = TrieCache::new();
let trie_caching_storage = TrieCachingStorage::new(store, trie_cache.clone(), shard_uid);
let trie_caching_storage =
TrieCachingStorage::new(store, trie_cache.clone(), shard_uid, false);
let value = &values[0];
let key = hash(&value);

Expand Down Expand Up @@ -341,7 +345,8 @@ mod caching_storage_tests {
let shard_uid = ShardUId::single_shard();
let store = create_store_with_values(&values, shard_uid);
let trie_cache = TrieCache::with_capacity(shard_cache_size);
let trie_caching_storage = TrieCachingStorage::new(store, trie_cache.clone(), shard_uid);
let trie_caching_storage =
TrieCachingStorage::new(store, trie_cache.clone(), shard_uid, false);

let value = &values[0];
let key = hash(&value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ impl<'c> Testbed<'c> {
pub(crate) fn trie_caching_storage(&mut self) -> TrieCachingStorage {
let store = self.inner.store();
let caching_storage =
TrieCachingStorage::new(store, TrieCache::new(), ShardUId::single_shard());
TrieCachingStorage::new(store, TrieCache::new(), ShardUId::single_shard(), false);
caching_storage
}

Expand Down