diff --git a/README.md b/README.md index d2978544..876df622 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,10 @@ Logster can be configured using `Logster.config`: - `Logster.config.rate_limit_error_reporting` : controls automatic 1 minute rate limiting for JS error reporting. - `Logster.config.web_title` : `` tag for logster error page. - `Logster.config.enable_custom_patterns_via_ui` : enable the settings page (`/settings`) where you can add suppression and grouping patterns. -- `Logster.config.maximum_message_size_bytes` : specify a size in bytes that a message cannot exceed. Note this isn't 100% accurate, meaning a message may still grow above the limit, but it shouldn't grow by more than, say, 2000 bytes. +- `Logster.config.allow_grouping` : Enable grouping of similar messages into one messages with an array of `env` of the grouped messages. Similar messages are messages that have identical backtraces, severity and log message. +- `Logster.config.maximum_message_size_bytes` : set a maximum size for messages. Default value is 10,000. If a message size exceeds this limit, Logster will first remove all occurrences of `gems_dir` (more on this config below) from the backtrace and computes the size again; if the message is still above the limit, Logster will iteratively remove a line from the end of the backtrace until the size becomes below the limit, or one line remains. At this point if the message size is still above the limit, Logster will remove as many as character as needed to bring the size below the limit. However, before all of this if Logster figures out that removing the whole backtrace will not bring the size below the limit, it'll give up early and not attempt to reduce the size at all. So it's not recommended to set this config to a really low value e.g. less than 2000. +- `Logster.config.max_env_bytes` : set a maximum size for `env`. Default value is 1000. In case `env` is an array of hashes, this limit applies to the individual hashes in the array rather than the whole array. If an `env` hash exceeds this limit, Logster will take the biggest subset of key-value pairs whose size is below the limit. If the hash has a key with the name `time`, it will always be included. +- `Logster.config.max_env_count_per_message` : default value is 50. Logster can merge messages that have the same backtrace, severity and log message into one grouping message that have many `env` hashes. This config specifies the maximum number of `env` hashes a grouping message is allowed to keep. If this limit is reached and a new similar message is created and it needs to be merged, Logster will remove the oldest `env` hash from the grouping message and adds the new one. - `Logster.config.project_directories` : This should be an array of hashes that map paths on the local filesystem to GitHub repository URLs. If this feature is enabled, Logster will parse backtraces and try to construct a GitHub URL to the exact file and line number for each line in the backtrace. For a Rails app, the config may look like this: `Logster.config.project_directories = [{ path: Rails.root.to_s, url: "https://github.com/<your_org>/<your_repo>" }]`. The GitHub links that are constructed will use the `master` branch. If you want Logster to use the `application_version` attribute from the `env` tab so that the GitHub links point to the exact version of the app when the log message is created, add `main_app: true` key to the hash. - `Logster.config.enable_backtrace_links` : Enable/disable the backtrace links feature. - `Logster.config.gems_dir` : The value of this config is `Gem.dir + "/gems/"` by default. You probably don't need to change this config, but it's available in case your app gems are installed in a different directory. An example where this config is needed is Logster [demo site](http://logster.info/logs/): [https://github.com/discourse/logster/blob/master/website/sample.rb#L77](https://github.com/discourse/logster/blob/master/website/sample.rb#L77). diff --git a/lib/logster/base_store.rb b/lib/logster/base_store.rb index fb25275a..1b059b27 100644 --- a/lib/logster/base_store.rb +++ b/lib/logster/base_store.rb @@ -18,7 +18,7 @@ def save(message) end # Modify the saved message to the given one (identified by message.key) and bump it to the top of the latest list - def replace_and_bump(message, save_env: true) + def replace_and_bump(message) not_implemented end @@ -199,15 +199,17 @@ def report(severity, progname, msg, opts = {}) similar = get(key, load_env: false) if key end + message.drop_redundant_envs(Logster.config.max_env_count_per_message) + message.apply_env_size_limit(Logster.config.max_env_bytes) if similar - if similar.count < Logster::MAX_GROUPING_LENGTH - similar.env = get_env(similar.key) || {} - end - save_env = similar.merge_similar_message(message) - - replace_and_bump(similar, save_env: save_env) + similar.merge_similar_message(message) + replace_and_bump(similar) similar else + message.apply_message_size_limit( + Logster.config.maximum_message_size_bytes, + gems_dir: Logster.config.gems_dir + ) save message message end diff --git a/lib/logster/configuration.rb b/lib/logster/configuration.rb index 5bf36239..b3555cb2 100644 --- a/lib/logster/configuration.rb +++ b/lib/logster/configuration.rb @@ -15,7 +15,9 @@ class Configuration :maximum_message_size_bytes, :project_directories, :enable_backtrace_links, - :gems_dir + :gems_dir, + :max_env_bytes, + :max_env_count_per_message ) attr_writer :subdirectory @@ -29,7 +31,9 @@ def initialize @enable_custom_patterns_via_ui = false @rate_limit_error_reporting = true @enable_js_error_reporting = true - @maximum_message_size_bytes = 60_000 + @maximum_message_size_bytes = 10_000 + @max_env_bytes = 1000 + @max_env_count_per_message = 50 @project_directories = [] @enable_backtrace_links = true @gems_dir = Gem.dir + "/gems/" diff --git a/lib/logster/message.rb b/lib/logster/message.rb index a4d16555..9794c096 100644 --- a/lib/logster/message.rb +++ b/lib/logster/message.rb @@ -4,8 +4,6 @@ require 'securerandom' module Logster - - MAX_GROUPING_LENGTH = 50 MAX_MESSAGE_LENGTH = 600 class Message @@ -22,9 +20,10 @@ class Message hostname process_id application_version + time } - attr_accessor :timestamp, :severity, :progname, :key, :backtrace, :count, :protected, :first_timestamp + attr_accessor :timestamp, :severity, :progname, :key, :backtrace, :count, :protected, :first_timestamp, :env_buffer attr_reader :message, :env def initialize(severity, progname, message, timestamp = nil, key = nil, count: 1) @@ -37,6 +36,7 @@ def initialize(severity, progname, message, timestamp = nil, key = nil, count: 1 @count = count || 1 @protected = false @first_timestamp = nil + @env_buffer = [] end def to_h(exclude_env: false) @@ -82,7 +82,6 @@ def self.from_json(json) end def env=(env) - @env_json = nil @env = self.class.scrub_params(env) end @@ -94,12 +93,18 @@ def populate_from_env(env) env ||= {} if Array === env env = env.map do |single_env| - self.class.default_env.merge(single_env) + single_env = self.class.default_env.merge(single_env) + if !single_env.key?("time") && !single_env.key?(:time) + single_env["time"] = @timestamp || get_timestamp + end + single_env end else env = self.class.default_env.merge(env) + if !env.key?("time") && !env.key?(:time) + env["time"] = @timestamp || get_timestamp + end end - @env_json = nil @env = Message.populate_from_env(env) end @@ -146,31 +151,24 @@ def is_similar?(other) def merge_similar_message(other) self.first_timestamp ||= self.timestamp self.timestamp = [self.timestamp, other.timestamp].max - self.count += other.count || 1 - return false if self.count > Logster::MAX_GROUPING_LENGTH - - size = self.to_json(exclude_env: true).bytesize + self.env_json.bytesize - extra_env_size = other.env_json.bytesize - return false if size + extra_env_size > Logster.config.maximum_message_size_bytes - other_env = JSON.load JSON.fast_generate other.env - if Hash === other_env && !other_env.key?("time") - other_env["time"] = other.timestamp - end - if Hash === self.env && !self.env.key?("time") - self.env["time"] = self.first_timestamp + if Hash === other.env && !other.env.key?("time") && !other.env.key?(:time) + other.env["time"] = other.timestamp end - if Array === self.env - Array === other_env ? self.env.concat(other_env) : self.env << other_env + if Array === other.env + env_buffer.unshift(*other.env) else - Array === other_env ? self.env = [self.env, *other_env] : self.env = [self.env, other_env] + env_buffer.unshift(other.env) end - @env_json = nil true end + def has_env_buffer? + env_buffer.size > 0 + end + def self.populate_from_env(env) if Array === env env.map do |single_env| @@ -229,10 +227,6 @@ def =~(pattern) end end - def env_json - @env_json ||= (self.env || {}).to_json - end - def self.scrub_params(params) if Array === params params.map! { |p| scrub_params(p) } @@ -250,8 +244,77 @@ def self.scrub_params(params) end end + def drop_redundant_envs(limit) + if Array === env + env.slice!(limit..-1) + end + end + + def apply_env_size_limit(size_limit) + if Array === env + env.each { |e| truncate_env(e, size_limit) } + elsif Hash === env + truncate_env(env, size_limit) + end + end + + def apply_message_size_limit(limit, gems_dir: nil) + size = self.to_json(exclude_env: true).bytesize + if size > limit && @backtrace + backtrace_limit = limit - (size - @backtrace.bytesize) + @backtrace.gsub!(gems_dir, "") if gems_dir + @backtrace.strip! + orig = @backtrace.dup + stop = false + while @backtrace.bytesize > backtrace_limit && backtrace_limit > 0 && !stop + lines = @backtrace.lines + if lines.size > 1 + lines.pop + @backtrace = lines.join + else + @backtrace.slice!(-1) + end + # protection to ensure we never get stuck + stop = orig == @backtrace + end + end + end + protected + def truncate_env(env, limit) + if JSON.fast_generate(env).bytesize > limit + sizes = {} + braces = '{}'.bytesize + env.each do |k,v| + sizes[k] = JSON.fast_generate(k => v).bytesize - braces + end + sorted = env.keys.sort { |a,b| sizes[a] <=> sizes[b] } + + kept_keys = [] + if env.key?(:time) + kept_keys << :time + elsif env.key?("time") + kept_keys << "time" + end + + sum = braces + if time_key = kept_keys.first + sum += sizes[time_key] + sorted.delete(time_key) + end + comma = ','.bytesize + + sorted.each do |k| + extra = kept_keys.size == 0 ? 0 : comma + break if sum + sizes[k] + extra > limit + kept_keys << k + sum += sizes[k] + extra + end + env.select! { |k| kept_keys.include?(k) } + end + end + def truncate_message(msg) return msg unless msg msg = msg.inspect unless String === msg diff --git a/lib/logster/redis_store.rb b/lib/logster/redis_store.rb index f4c9462e..5993ee0d 100644 --- a/lib/logster/redis_store.rb +++ b/lib/logster/redis_store.rb @@ -6,6 +6,7 @@ module Logster class RedisStore < BaseStore + ENV_PREFIX = "logster-env-" attr_accessor :redis, :max_backlog, :redis_raw_connection attr_writer :redis_prefix @@ -24,12 +25,11 @@ def save(message) return true if @redis.hget(solved_key, solved) end end - apply_max_size_limit(message) @redis.multi do @redis.hset(grouping_key, message.grouping_key, message.key) @redis.rpush(list_key, message.key) - update_message(message) + update_message(message, save_env: true) end trim @@ -46,7 +46,7 @@ def delete(msg) save_pattern_group(group) if group.changed? end @redis.hdel(hash_key, msg.key) - @redis.hdel(env_key, msg.key) + delete_env(msg.key) @redis.hdel(grouping_key, msg.grouping_key) @redis.lrem(list_key, -1, msg.key) end @@ -60,26 +60,26 @@ def bulk_delete(message_keys, grouping_keys) save_pattern_group(group) if group.changed? end @redis.hdel(hash_key, message_keys) - @redis.hdel(env_key, message_keys) @redis.hdel(grouping_key, grouping_keys) message_keys.each do |k| @redis.lrem(list_key, -1, k) + delete_env(k) end end end - def replace_and_bump(message, save_env: true) + def replace_and_bump(message) # TODO make it atomic exists = @redis.hexists(hash_key, message.key) return false unless exists @redis.multi do @redis.hset(hash_key, message.key, message.to_json(exclude_env: true)) - @redis.hset(env_key, message.key, message.env_json) if save_env + push_env(message.key, message.env_buffer) if message.has_env_buffer? @redis.lrem(list_key, -1, message.key) @redis.rpush(list_key, message.key) end - + message.env_buffer = [] if message.has_env_buffer? check_rate_limits(message.severity) true @@ -164,22 +164,21 @@ def latest(opts = {}) def clear RedisRateLimiter.clear_all(@redis) @redis.del(solved_key) + all_keys = @redis.lrange(list_key, 0, -1) @redis.del(list_key) - keys = @redis.smembers(protected_key) || [] - if keys.empty? + protected_keys = @redis.smembers(protected_key) || [] + if protected_keys.empty? @redis.del(hash_key) - @redis.del(env_key) + all_keys.each { |k| delete_env(k) } @redis.del(pattern_groups_key) @redis.del(grouping_key) else - protected = @redis.mapped_hmget(hash_key, *keys) - protected_env = @redis.mapped_hmget(env_key, *keys) + protected_messages = @redis.mapped_hmget(hash_key, *protected_keys) @redis.del(hash_key) - @redis.del(env_key) - @redis.mapped_hmset(hash_key, protected) - @redis.mapped_hmset(env_key, protected_env) + @redis.mapped_hmset(hash_key, protected_messages) + (all_keys - protected_keys).each { |k| delete_env(k) } - sorted = protected + sorted = protected_messages .values .map { |string| Message.from_json(string) rescue nil @@ -203,10 +202,10 @@ def clear # Delete everything, included protected messages # (use in tests) def clear_all + @redis.lrange(list_key, 0, -1).each { |k| delete_env(k) } @redis.del(list_key) @redis.del(protected_key) @redis.del(hash_key) - @redis.del(env_key) @redis.del(grouping_key) @redis.del(solved_key) @redis.del(ignored_logs_count_key) @@ -235,17 +234,35 @@ def get_all_messages(with_env: true) bulk_get(@redis.lrange(list_key, 0, -1), with_env: with_env) end + BULK_ENV_GET_LUA = <<~LUA + local results = {}; + for i = 1, table.getn(KEYS), 1 do + results[i] = { KEYS[i], redis.call('LRANGE', KEYS[i], 0, -1) }; + end + return results; + LUA + def bulk_get(message_keys, with_env: true) return [] if !message_keys || message_keys.size == 0 - envs = @redis.mapped_hmget(env_key, *message_keys) if with_env + envs = nil + if with_env + envs = {} + @redis.eval( + BULK_ENV_GET_LUA, + keys: message_keys.map { |k| env_prefix(k) } + ).to_h.each do |k, v| + next if v.size == 0 + parsed = v.size == 1 ? JSON.parse(v[0]) : v.map { |e| JSON.parse(e) } + envs[env_unprefix(k)] = parsed + end + end messages = @redis.hmget(hash_key, message_keys).map! do |json| next if !json || json.size == 0 message = Message.from_json(json) - if with_env + if with_env && envs env = envs[message.key] if !message.env || message.env.size == 0 - env = env && env.size > 0 ? ::JSON.parse(env) : {} - message.env = env + message.env = env || {} end end message @@ -255,20 +272,20 @@ def bulk_get(message_keys, with_env: true) end def get_env(message_key) - json = @redis.hget(env_key, message_key) - return if !json || json.size == 0 - JSON.parse(json) + envs = @redis.lrange(env_prefix(message_key), 0, -1) + return if !envs || envs.size == 0 + envs.size == 1 ? JSON.parse(envs[0]) : envs.map { |j| JSON.parse(j) } end def protect(message_key) - if message = get(message_key) + if message = get(message_key, load_env: false) message.protected = true update_message(message) end end def unprotect(message_key) - if message = get(message_key) + if message = get(message_key, load_env: false) message.protected = false update_message(message) else @@ -376,13 +393,11 @@ def remove_pattern_group(pattern) protected - def clear_solved(count = nil) - + def clear_solved ignores = Set.new(@redis.hkeys(solved_key) || []) if ignores.length > 0 - start = count ? 0 - count : 0 - message_keys = @redis.lrange(list_key, start, -1) || [] + message_keys = @redis.lrange(list_key, 0, -1) || [] bulk_get(message_keys).each do |message| unless (ignores & (message.solved_keys || [])).empty? @@ -397,7 +412,7 @@ def trim removed_keys = [] while removed_key = @redis.lpop(list_key) unless @redis.sismember(protected_key, removed_key) - rmsg = get removed_key + rmsg = get(removed_key, load_env: false) delete(rmsg) break else @@ -410,9 +425,9 @@ def trim end end - def update_message(message) + def update_message(message, save_env: false) @redis.hset(hash_key, message.key, message.to_json(exclude_env: true)) - @redis.hset(env_key, message.key, message.env_json) + push_env(message.key, message.env) if save_env if message.protected @redis.sadd(protected_key, message.key) else @@ -571,7 +586,7 @@ def env_key end def protected_key - @saved_key ||= "__LOGSTER__SAVED" + @protected_key ||= "__LOGSTER__SAVED" end def grouping_key @@ -608,22 +623,6 @@ def serialize_group(group, row_id) ) end - def apply_max_size_limit(message) - size = message.to_json(exclude_env: true).bytesize - env_size = message.env_json.bytesize - max_size = Logster.config.maximum_message_size_bytes - if size + env_size > max_size - # env is most likely the reason for the large size - # truncate it so the overall size is < the limit - if Array === message.env - # the - 1 at the end ensures the size goes a little bit below the limit - truncate_at = (message.env.size.to_f * max_size.to_f / (env_size + size)).to_i - 1 - truncate_at = 1 if truncate_at < 1 - message.env = message.env[0...truncate_at] - end - end - end - def register_rate_limit(severities, limit, duration, callback) severities = [severities] unless severities.is_a?(Array) redis = (@redis_raw_connection && @redis_prefix) ? @redis_raw_connection : @redis @@ -636,5 +635,24 @@ def register_rate_limit(severities, limit, duration, callback) rate_limits[self.redis_prefix] << rate_limiter rate_limiter end + + def push_env(message_key, env) + prefixed = env_prefix(message_key) + env = [env] unless Array === env + @redis.lpush(prefixed, env.map(&:to_json).reverse) + @redis.ltrim(prefixed, 0, Logster.config.max_env_count_per_message - 1) + end + + def delete_env(message_key) + @redis.del(env_prefix(message_key)) + end + + def env_unprefix(key) + key.sub(ENV_PREFIX, "") + end + + def env_prefix(key) + ENV_PREFIX + key + end end end diff --git a/lib/logster/suppression_pattern.rb b/lib/logster/suppression_pattern.rb index c2b88e29..ffc78a96 100644 --- a/lib/logster/suppression_pattern.rb +++ b/lib/logster/suppression_pattern.rb @@ -24,7 +24,7 @@ def destroy(clear_cache: true) # arg used in tests def retro_delete_messages keys = [] grouping_keys = [] - @store.get_all_messages.each do |message| + @store.get_all_messages(with_env: false).each do |message| if message =~ self.pattern keys << message.key grouping_keys << message.grouping_key diff --git a/test/logster/test_message.rb b/test/logster/test_message.rb index 4c316996..4c732796 100644 --- a/test/logster/test_message.rb +++ b/test/logster/test_message.rb @@ -11,32 +11,38 @@ def test_merge_similar msg2 = Logster::Message.new(0, '', 'test', 20) msg2.populate_from_env(a: "2", c: "3") - assert_equal(msg2.grouping_key, msg1.grouping_key) - msg1.merge_similar_message(msg2) assert_equal(20, msg1.timestamp) assert_equal(10, msg1.first_timestamp) - - assert Array === msg1.env - assert_equal(msg1.env.size, 2) - assert({ a: "1", b: "2" } <= msg1.env[0]) - assert({ "a" => "2", "c" => "3" } <= msg1.env[1]) + assert_equal(msg1.env_buffer, [msg2.env]) end - def test_merge_adds_timestamp_to_env - time1 = Time.new(2010, 1, 1, 1, 1).to_i - msg1 = Logster::Message.new(0, '', 'test', time1) - msg1.env = { a: "aa", b: "bb" } - - time2 = Time.new(2011, 1, 1, 1, 1).to_i - msg2 = Logster::Message.new(0, '', 'test', time2) - msg2.env = { e: "ee", f: "ff" } - - msg1.merge_similar_message(msg2) - assert_equal(time1, msg1.env[0]["time"]) - assert_equal(time2, msg1.env[1]["time"]) + def test_populate_from_env_will_add_time_to_env_unless_it_already_exists + t = (Time.new.to_f * 1000).to_i + msg = Logster::Message.new(0, '', 'test', t) + msg.populate_from_env({}) + assert_equal(t, msg.env["time"]) + + msg = Logster::Message.new(0, '', 'test', t) + msg.populate_from_env(time: 5) + assert_nil(msg.env["time"]) + assert_equal(5, msg.env[:time]) + + msg = Logster::Message.new(0, '', 'test', t) + msg.populate_from_env("time" => 6) + assert_equal(6, msg.env["time"]) + assert_nil(msg.env[:time]) + + msg = Logster::Message.new(0, '', 'test', t) + msg.populate_from_env([{ "time" => 6 }, { "time" => 8 }]) + assert_equal([6, 8], msg.env.map { |e| e["time"] }) + assert_equal([nil, nil], msg.env.map { |e| e[:time] }) + + msg = Logster::Message.new(0, '', 'test', t) + msg.populate_from_env([{ dsd: 6 }, { dsd: 8 }]) + assert_equal([t, t], msg.env.map { |e| e["time"] }) end def test_merge_messages_both_with_array_envs @@ -47,26 +53,18 @@ def test_merge_messages_both_with_array_envs msg2.env = [{ e: "ee", f: "ff" }, { g: "gg", h: "hh" }] msg1.merge_similar_message(msg2) - - # new env should be an array, but it should never have - # another array of envs within itself (hence flatten(1)) - assert_equal(msg1.env.size, 4) - assert_equal(msg1.env.map(&:keys).flatten(1).map(&:to_s), %w{a b c d e f g h}) - assert_equal(msg1.env.map(&:values).flatten(1).map(&:to_s), %w{aa bb cc dd ee ff gg hh}) + assert_equal(msg2.env, msg1.env_buffer) end def test_merge_messages_one_with_array_envs msg1 = Logster::Message.new(0, '', 'test', 10) - msg1.env = [{ a: "aa", b: "bb" }, { c: "cc", d: "dd" }] + msg1.env = { e: "ee", f: "ff" } msg2 = Logster::Message.new(0, '', 'test', 20) - msg2.env = { e: "ee", f: "ff" } + msg2.env = [{ a: "aa", b: "bb" }, { c: "cc", d: "dd" }] msg1.merge_similar_message(msg2) - - assert_equal(msg1.env.size, 3) - assert_equal(msg1.env.map(&:keys).flatten(1).map(&:to_s), %w{a b c d e f time}) - assert_equal(msg1.env.map(&:values).flatten(1).map(&:to_s), %w{aa bb cc dd ee ff 20}) + assert_equal(msg2.env, msg1.env_buffer) end def test_adds_application_version @@ -87,8 +85,7 @@ def test_merging_sums_count_for_both_messages assert_equal(msg1.grouping_key, msg2.grouping_key) - save_env = msg1.merge_similar_message(msg2) - assert(save_env) + msg1.merge_similar_message(msg2) assert_equal(msg1.count, 15 + 13) end @@ -120,19 +117,17 @@ def test_populate_from_env_works_on_array assert hash <= msg.env[0] end - def test_ensure_env_samples_dont_exceed_50 + def test_merging_envs_add_new_envs_to_buffer msg1 = Logster::Message.new(0, '', 'test', 10, count: 50) - msg1.env = [{ a: 1 }] + msg1.env = 50.times.map { |n| { a: n } } msg2 = Logster::Message.new(0, '', 'test', 20, count: 13) - msg2.env = { b: 2 } + msg2.env = 13.times.map { |n| { b: n } } assert_equal(msg1.grouping_key, msg2.grouping_key) - save_env = msg1.merge_similar_message(msg2) - - refute(save_env) + msg1.merge_similar_message(msg2) assert_equal(63, msg1.count) # update count - assert_equal([{ a: 1 }], msg1.env) # but don't merge msg2's env into msg1's + assert_equal(msg2.env, msg1.env_buffer) end def test_message_to_h_respects_params @@ -157,21 +152,92 @@ def test_title_is_truncated_when_too_large assert_equal(600 + 3, msg.message.size) end - def test_env_is_not_merged_into_similar_message_if_size_will_be_too_large - default = Logster.config.maximum_message_size_bytes - Logster.config.maximum_message_size_bytes = 1000 - message = Logster::Message.new(Logger::INFO, "test", "message", count: 13) - env = [{ key1: "this is my first key", key2: "this is my second key" }] * 13 - message.env = env + def test_drop_redundant_envs + message = Logster::Message.new(Logger::WARN, '', 'message') + message.env = [{ a: 4 }] * 10 + assert_equal(10, message.env.size) + message.drop_redundant_envs(5) + assert_equal(5, message.env.size) + + env = { f: 5, g: 4 } + message.env = env.dup + message.drop_redundant_envs(1) + assert_equal(env, message.env) + end - message2 = Logster::Message.new(Logger::INFO, "test", "message") - message2.env = env.first - message.merge_similar_message(message2) + def test_apply_env_size_limit_keeps_as_many_keys_as_possible + message = Logster::Message.new(Logger::WARN, '', 'message', 1) + env = { a: 1, bb: 22, ccc: 333 } + message.env = env.dup + message.apply_env_size_limit(24) + assert_operator(message.env.to_json.bytesize, :<=, 24) + assert_equal({ a: 1, bb: 22 }.to_json.bytesize, message.env.to_json.bytesize) + + message.env = [env.dup] * 5 + message.apply_env_size_limit(24) + assert_equal(5, message.env.size) + message.env.each do |e| + assert_operator(e.to_json.bytesize, :<=, 24) + assert_equal({ a: 1, bb: 22 }.to_json.bytesize, e.to_json.bytesize) + end + + message.env = env.dup + message.apply_env_size_limit(25) + assert_operator(message.env.to_json.bytesize, :<=, 25) + assert_equal({ a: 1, bb: 22, ccc: 333 }.to_json.bytesize, message.env.to_json.bytesize) + end - # env isn't merged, but count is incremented - assert_equal(13, message.env.size) - assert_equal(14, message.count) - ensure - Logster.config.maximum_message_size_bytes = default + def test_apply_message_size_limit_removes_gems_dir_from_backtrace_to_keep_total_message_size_below_limit + backtrace = <<~TEXT + /var/www/discourse/vendor/bundle/ruby/2.6.0/gems/rails_multisite-2.0.7/lib/rails_multisite/connection_management.rb:220:in `with_connection' + /var/www/discourse/vendor/bundle/ruby/2.6.0/gems/rails_multisite-2.0.7/lib/rails_multisite/connection_management.rb:60:in `with_connection' + /var/www/discourse/lib/scheduler/defer.rb:89:in `do_work' + /var/www/discourse/lib/scheduler/defer.rb:79:in `block (2 levels) in start_thread' + TEXT + without_gems_dir = <<~TEXT + rails_multisite-2.0.7/lib/rails_multisite/connection_management.rb:220:in `with_connection' + rails_multisite-2.0.7/lib/rails_multisite/connection_management.rb:60:in `with_connection' + /var/www/discourse/lib/scheduler/defer.rb:89:in `do_work' + /var/www/discourse/lib/scheduler/defer.rb:79:in `block (2 levels) in start_thread' + TEXT + gems_dir = "/var/www/discourse/vendor/bundle/ruby/2.6.0/gems/" + message = Logster::Message.new(Logger::WARN, '', 'message', 1) + + message.backtrace = backtrace.dup + assert_operator(message.to_json(exclude_env: true).bytesize, :>=, 500) + message.apply_message_size_limit(500, gems_dir: gems_dir) + assert_operator(message.to_json(exclude_env: true).bytesize, :<=, 500) + assert_equal(without_gems_dir.strip, message.backtrace.strip) + end + + def test_apply_message_size_limit_removes_lines_from_backtrace_to_keep_total_size_below_limit + backtrace = <<~TEXT + rails_multisite-2.0.7/lib/rails_multisite/connection_management.rb:220:in `with_connection' + rails_multisite-2.0.7/lib/rails_multisite/connection_management.rb:60:in `with_connection' + /var/www/discourse/lib/scheduler/defer.rb:89:in `do_work' + /var/www/discourse/lib/scheduler/defer.rb:79:in `block (2 levels) in start_thread' + TEXT + message = Logster::Message.new(Logger::WARN, '', 'message', 1) + message.backtrace = backtrace.dup + assert_operator(message.to_json(exclude_env: true).bytesize, :>=, 350) + message.apply_message_size_limit(350) + assert_operator(message.to_json(exclude_env: true).bytesize, :<=, 350) + assert_equal(backtrace.lines.first(2).join.strip, message.backtrace.strip) + end + + def test_apply_message_size_limit_doesnt_remove_backtrace_entirely + message = Logster::Message.new(Logger::WARN, '', 'message', 1) + message.backtrace = "a" * 1000 + assert_operator(message.to_json(exclude_env: true).bytesize, :>=, 500) + message.apply_message_size_limit(500) + assert_operator(message.to_json(exclude_env: true).bytesize, :<=, 500) + assert_equal(("a" * 354).size, message.backtrace.size) + end + + def test_apply_message_size_limit_doesnt_hang_forever_and_doesnt_remove_backtrace_entirely + message = Logster::Message.new(Logger::WARN, '', 'message', 1) + message.backtrace = "aa" * 100 + message.apply_message_size_limit(10) + assert_equal(("aa" * 100).size, message.backtrace.size) end end diff --git a/test/logster/test_redis_store.rb b/test/logster/test_redis_store.rb index da63539b..7ec76a8c 100644 --- a/test/logster/test_redis_store.rb +++ b/test/logster/test_redis_store.rb @@ -223,10 +223,11 @@ def test_bulk_get msg = messages[n] assert_equal("test_#{n}", msg.message) if n == 0 - assert_equal(Logster::Message.default_env, msg.env) + assert_equal(Logster::Message.default_env.merge("time" => msg.timestamp), msg.env) else assert({ "test_#{n}" => "envsss" } <= msg.env) end + assert_equal(msg.timestamp, msg.env["time"]) end end @@ -241,43 +242,50 @@ def test_replace_and_bump old_env = { "old_env" => "old value" } message = @store.report(Logger::WARN, "test", "A", env: old_env) - unsaved_env = { "unsaved_env" => "lost value" } - message.env = unsaved_env + extra_env = { "saved_env" => "saved value!" } + similar = @store.report(Logger::WARN, 'test', 'A', env: extra_env) + message.merge_similar_message(similar) - @store.replace_and_bump(message, save_env: false) + @store.replace_and_bump(message) message = @store.get(message.key) - assert(old_env <= message.env) - refute(unsaved_env <= message.env) - - saved_env = { "saved_env" => "saved value!" } - message.env = saved_env - - @store.replace_and_bump(message) + assert(extra_env <= message.env[0]) + assert(old_env <= message.env[1]) + end + def test_ensure_env_doesnt_exceed_50_item + Logster.config.allow_grouping = true + message = nil + 52.times do |n| + message = @store.report(Logger::WARN, "", "mssage", env: { a: n }) + end message = @store.get(message.key) - assert(saved_env == message.env) + assert_equal(52, message.count) + assert_equal(50, message.env.size) + assert_equal((2..51).to_a, message.env.map { |e| e[:a] || e["a"] }.sort) + ensure + Logster.config.allow_grouping = false end def test_merging_performance Logster.config.allow_grouping = true backtrace = "fake backtrace" - env = { "some_env" => "some env" } - another_env = { "another_env" => "more env" } - yet_another_env = { "moaar_env" => "more env" } - - @store.report(Logger::WARN, "", "title", backtrace: backtrace, env: env, count: 49) + env = [{ "some_env" => "some env" }] * 50 + new_env = { "some_key" => "1234442" } - message = @store.report(Logger::WARN, "", "title", backtrace: backtrace, env: another_env) - assert_instance_of(Array, message.env) - assert_equal(2, message.env.size) - assert(env <= message.env[0]) - assert(another_env <= message.env[1]) + @store.report(Logger::WARN, "", "title", backtrace: backtrace, env: env, count: 50) - message = @store.report(Logger::WARN, "", "title", backtrace: backtrace, env: yet_another_env) - # we don't need to load env from redis cause we don't - # need to merge new env samples if count is 50 or more + message = @store.report(Logger::WARN, "", "title", backtrace: backtrace, env: new_env) + # env is nil cause we don't need to fetch it from redis + # we just send the newly added envs to redis and it'll + # take care of prepending them to the existing envs assert_nil(message.env) + + message = @store.get(message.key) + assert_instance_of(Array, message.env) + assert_equal(50, message.env.size) + assert_equal(51, message.count) + assert(new_env <= message.env[0]) ensure Logster.config.allow_grouping = false end @@ -548,7 +556,7 @@ def test_data_kept_intact_on_report_when_env_matches_an_ignore_pattern # message2 shouldn't vanish even if # its env matches an ignore pattern # however it should be merged with message1 - assert_equal("business17", message.env[1]["cluster"]) + assert_equal("business17", message.env[0]["cluster"]) ensure # reset so it doesn't affect other tests @store.ignore = nil @@ -833,19 +841,46 @@ def test_suppression_patterns_are_cached end end - def test_store_trims_too_big_envs - default = Logster.config.maximum_message_size_bytes - Logster.config.maximum_message_size_bytes = 1000 - message = Logster::Message.new(Logger::INFO, "test", "message") - env = [{ key1: "this is my first key", key2: "this is my second key" }] * 40 - message.env = env - @store.save(message) - trimmed_message = @store.latest.first - assert_equal(13, trimmed_message.env.size) - size = message.to_json(exclude_env: true).bytesize + message.env_json.bytesize - assert_operator(1000, :>, size) - ensure - Logster.config.maximum_message_size_bytes = default + def test_ensure_messages_meet_config_size_limits_when_messages_are_saved + config_reset( + maximum_message_size_bytes: 300, + max_env_bytes: 30, + max_env_count_per_message: 5 + ) do + env = [{ aaa: 111, bbb: 222, ccc: 333, ddd: 444 }] * 7 + message = @store.report(Logger::WARN, '', 'test', backtrace: "aa\n" * 100, env: env.dup, timestamp: 777) + message = @store.get(message.key) + assert_operator(message.to_json(exclude_env: true).bytesize, :<, 300) + assert_equal(5, message.env.size) + message.env.each do |e| + assert_operator(e.to_json.bytesize, :<=, 30) + assert_equal({ "aaa" => 111, "time" => 777 }, e) + end + end + end + + def test_ensure_messages_meet_config_size_limits_when_merged_together + + config_reset( + max_env_bytes: 30, + max_env_count_per_message: 5, + allow_grouping: true + ) do + env = [{ a: 1, aa: 22, aaa: 333, aaaa: 4444 }] * 3 + env_2 = [{ b: 1, bb: 22, bbb: 333, bbbb: 4444 }] * 3 + @store.report(Logger::WARN, '', 'test', backtrace: "aa\n" * 100, env: env.dup, timestamp: 777) + message = @store.report(Logger::WARN, '', 'test', backtrace: "aa\n" * 100, env: env_2.dup, timestamp: 777) + message = @store.get(message.key) + assert_equal(5, message.env.size) + message.env.first(3).each do |e| + assert_operator(e.to_json.bytesize, :<=, 30) + assert_equal({ "b" => 1, "bb" => 22, "time" => 777 }, e) + end + message.env.last(2).each do |e| + assert_operator(e.to_json.bytesize, :<=, 30) + assert_equal({ "a" => 1, "aa" => 22, "time" => 777 }, e) + end + end end def test_custom_grouping_patterns @@ -999,6 +1034,19 @@ def test_latest_doesnt_include_rows_that_are_removed_from_grouping_patterns_due_ private + def config_reset(configs) + defaults = {} + configs.each do |k,v| + defaults[k] = Logster.config.public_send(k) + Logster.config.public_send("#{k}=", v) + end + yield + ensure + defaults.each do |k,v| + Logster.config.public_send("#{k}=", v) + end + end + def reset_redis @store.clear_all end