Skip to content

Commit

Permalink
Add Cache metrics to groupcache (thanos-io#5352)
Browse files Browse the repository at this point in the history
Add metrics about the hot and main caches[0].
* Number of bytes in each cache.
* Number of items in each cache.
* Counter of evictions from each cache.

[0]: https://pkg.go.dev/github.com/vimeo/galaxycache#CacheStats

Signed-off-by: SuperQ <superq@gmail.com>
Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
  • Loading branch information
SuperQ authored and GiedriusS committed May 11, 2022
1 parent 453a48e commit 25dac01
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 3 deletions.
25 changes: 24 additions & 1 deletion CHANGELOG.md
Expand Up @@ -11,8 +11,31 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
## Performance

### Added
=======

- [#5352](https://github.com/thanos-io/thanos/pull/5352) Cache: Add cache metrics to groupcache.

### Changed

### Removed

## [v0.26.0](https://github.com/thanos-io/thanos/tree/release-0.26) - 2022.05.05

### Fixed
- [#5281](https://github.com/thanos-io/thanos/pull/5281) Blocks: Use correct separators for filesystem paths and object storage paths respectively.
- [#5300](https://github.com/thanos-io/thanos/pull/5300) Query: Ignore cache on queries with deduplication off.
- [#5324](https://github.com/thanos-io/thanos/pull/5324) Reloader: Force trigger reload when config rollbacked

### Added

- [#5220](https://github.com/thanos-io/thanos/pull/5220) Query Frontend: Add `--query-frontend.forward-header` flag, forward headers to downstream querier.
- [#5250](https://github.com/thanos-io/thanos/pull/5250/files) Querier: Expose Query and QueryRange APIs through GRPC.
- [#5290](https://github.com/thanos-io/thanos/pull/5290) Add support for [ppc64le](https://en.wikipedia.org/wiki/Ppc64)

### Changed

- [#4838](https://github.com/thanos-io/thanos/pull/4838) Tracing: Chanced client for Stackdriver which deprecated "type: STACKDRIVER" in tracing YAML configuration. Use `type: GOOGLE_CLOUD` instead (`STACKDRIVER` type remains for backward compatibility).
- [#5170](https://github.com/thanos-io/thanos/pull/5170) All: Upgraded the TLS version from TLS1.2 to TLS1.3.
>>>>>>> cf3f5201... Add Cache metrics to groupcache (#5352)
- [#5205](https://github.com/thanos-io/thanos/pull/5205) Rule: Add ruler labels as external labels in stateless ruler mode.
- [#5206](https://github.com/thanos-io/thanos/pull/5206) Cache: add timeout for groupcache's fetch operation

Expand Down
31 changes: 29 additions & 2 deletions pkg/cache/groupcache.go
Expand Up @@ -254,7 +254,7 @@ func NewGroupcacheWithConfig(logger log.Logger, reg prometheus.Registerer, conf
},
))

RegisterCacheStatsCollector(galaxy, reg)
RegisterCacheStatsCollector(galaxy, &conf, reg)

return &Groupcache{
logger: logger,
Expand Down Expand Up @@ -305,8 +305,13 @@ func (c *Groupcache) Name() string {

type CacheStatsCollector struct {
galaxy *galaxycache.Galaxy
conf *GroupcacheConfig

// GalaxyCache Metric descriptions.
bytes *prometheus.Desc
evictions *prometheus.Desc
items *prometheus.Desc
maxBytes *prometheus.Desc
gets *prometheus.Desc
loads *prometheus.Desc
peerLoads *prometheus.Desc
Expand All @@ -317,7 +322,16 @@ type CacheStatsCollector struct {
}

// RegisterCacheStatsCollector registers a groupcache metrics collector.
func RegisterCacheStatsCollector(galaxy *galaxycache.Galaxy, reg prometheus.Registerer) {
func RegisterCacheStatsCollector(galaxy *galaxycache.Galaxy, conf *GroupcacheConfig, reg prometheus.Registerer) {
// Cache metrics.
bytes := prometheus.NewDesc("thanos_cache_groupcache_bytes", "The number of bytes in the main cache.", []string{"cache"}, nil)
evictions := prometheus.NewDesc("thanos_cache_groupcache_evictions_total", "The number items evicted from the cache.", []string{"cache"}, nil)
items := prometheus.NewDesc("thanos_cache_groupcache_items", "The number of items in the cache.", []string{"cache"}, nil)

// Configuration Metrics.
maxBytes := prometheus.NewDesc("thanos_cache_groupcache_max_bytes", "The max number of bytes in the cache.", nil, nil)

// GroupCache metrics.
gets := prometheus.NewDesc("thanos_cache_groupcache_get_requests_total", "Total number of get requests, including from peers.", nil, nil)
loads := prometheus.NewDesc("thanos_cache_groupcache_loads_total", "Total number of loads from backend (gets - cacheHits).", nil, nil)
peerLoads := prometheus.NewDesc("thanos_cache_groupcache_peer_loads_total", "Total number of loads from peers (remote load or remote cache hit).", nil, nil)
Expand All @@ -328,6 +342,11 @@ func RegisterCacheStatsCollector(galaxy *galaxycache.Galaxy, reg prometheus.Regi

collector := &CacheStatsCollector{
galaxy: galaxy,
conf: conf,
bytes: bytes,
evictions: evictions,
items: items,
maxBytes: maxBytes,
gets: gets,
loads: loads,
peerLoads: peerLoads,
Expand All @@ -340,6 +359,14 @@ func RegisterCacheStatsCollector(galaxy *galaxycache.Galaxy, reg prometheus.Regi
}

func (s *CacheStatsCollector) Collect(ch chan<- prometheus.Metric) {
for _, cache := range []galaxycache.CacheType{galaxycache.MainCache, galaxycache.HotCache} {
cacheStats := s.galaxy.CacheStats(cache)
ch <- prometheus.MustNewConstMetric(s.bytes, prometheus.GaugeValue, float64(cacheStats.Bytes), cache.String())
ch <- prometheus.MustNewConstMetric(s.evictions, prometheus.GaugeValue, float64(cacheStats.Evictions), cache.String())
ch <- prometheus.MustNewConstMetric(s.items, prometheus.GaugeValue, float64(cacheStats.Items), cache.String())
}

ch <- prometheus.MustNewConstMetric(s.maxBytes, prometheus.GaugeValue, float64(s.conf.MaxSize))
ch <- prometheus.MustNewConstMetric(s.gets, prometheus.CounterValue, float64(s.galaxy.Stats.Gets.Get()))
ch <- prometheus.MustNewConstMetric(s.loads, prometheus.CounterValue, float64(s.galaxy.Stats.Loads.Get()))
ch <- prometheus.MustNewConstMetric(s.peerLoads, prometheus.CounterValue, float64(s.galaxy.Stats.PeerLoads.Get()))
Expand Down

0 comments on commit 25dac01

Please sign in to comment.