From 021d07b45bdeb56d91106b098b61ab1a3714bc0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Habov=C5=A1tiak?= Date: Thu, 4 Feb 2021 17:43:14 +0100 Subject: [PATCH] Replace regex with memchr (#385) `regex` was used only in four trivial cases that could be implemented more simply, either naively or using memchr, without losing performance. As such the dependency needlessly increases build time, size of binary and attack surface. This change replaces`regex` with `naive`/`memchr` implementations. Signed-off-by: Martin Habovstiak Co-authored-by: Max Inden --- Cargo.toml | 2 +- src/desc.rs | 44 +++++++++++++++++++++++++++++--------------- src/encoder/text.rs | 24 +++++++++--------------- 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5429956b..9c6d93df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,7 @@ lazy_static = "^1.4" libc = { version = "^0.2", optional = true } parking_lot = "^0.11" protobuf = { version = "^2.0", optional = true } -regex = "^1.3" +memchr = "^2.3" reqwest = { version = "^0.11", features = ["blocking"], optional = true } thiserror = "^1.0" diff --git a/src/desc.rs b/src/desc.rs index f5c0df21..e5c0f9a0 100644 --- a/src/desc.rs +++ b/src/desc.rs @@ -5,30 +5,44 @@ use std::collections::{BTreeSet, HashMap}; use std::hash::Hasher; use fnv::FnvHasher; -use regex::Regex; use crate::errors::{Error, Result}; use crate::metrics::SEPARATOR_BYTE; use crate::proto::LabelPair; -// Details of required format are at -// https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels -fn is_valid_metric_name(name: &str) -> bool { - lazy_static! { - static ref VALIDATOR: Regex = - Regex::new("^[a-zA-Z_:][a-zA-Z0-9_:]*$").expect("Regex to be valid."); - } +// [a-zA-Z_] +fn matches_charset_without_colon(c: char) -> bool { + c.is_ascii_alphabetic() || c == '_' +} - VALIDATOR.is_match(name) +// [a-zA-Z_:] +fn matches_charset_with_colon(c: char) -> bool { + matches_charset_without_colon(c) || c == ':' } -fn is_valid_label_name(name: &str) -> bool { - lazy_static! { - static ref VALIDATOR: Regex = - Regex::new("^[a-zA-Z_][a-zA-Z0-9_]*$").expect("Regex to be valid."); - } +// Equivalent to regex ^[?][?0-9]*$ where ? denotes char set as validated by charset_validator +fn is_valid_ident bool>(input: &str, mut charset_validator: F) -> bool { + let mut chars = input.chars(); + let zeroth = chars.next(); + zeroth + .and_then(|zeroth| { + if charset_validator(zeroth) { + Some(chars.all(|c| charset_validator(c) || c.is_digit(10))) + } else { + None + } + }) + .unwrap_or(false) +} + +// Details of required format are at +// https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels +pub(super) fn is_valid_metric_name(name: &str) -> bool { + is_valid_ident(name, matches_charset_with_colon) +} - VALIDATOR.is_match(name) +pub(super) fn is_valid_label_name(name: &str) -> bool { + is_valid_ident(name, matches_charset_without_colon) } /// The descriptor used by every Prometheus [`Metric`](crate::core::Metric). It is essentially diff --git a/src/encoder/text.rs b/src/encoder/text.rs index cfdeb64a..17516bc0 100644 --- a/src/encoder/text.rs +++ b/src/encoder/text.rs @@ -1,6 +1,5 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use regex::{Match, Regex}; use std::borrow::Cow; use std::io::Write; @@ -216,26 +215,21 @@ fn label_pairs_to_text( Ok(()) } +fn find_first_occurence(v: &str, include_double_quote: bool) -> Option { + if include_double_quote { + memchr::memchr3(b'\\', b'\n', b'\"', v.as_bytes()) + } else { + memchr::memchr2(b'\\', b'\n', v.as_bytes()) + } +} + /// `escape_string` replaces `\` by `\\`, new line character by `\n`, and `"` by `\"` if /// `include_double_quote` is true. /// /// Implementation adapted from /// https://lise-henry.github.io/articles/optimising_strings.html fn escape_string(v: &str, include_double_quote: bool) -> Cow<'_, str> { - // Regex compilation is expensive. Use `lazy_static` to compile the regexes - // once per process lifetime and not once per function invocation. - lazy_static! { - static ref ESCAPER: Regex = Regex::new("(\\\\|\n)").expect("Regex to be valid."); - static ref QUOTED_ESCAPER: Regex = Regex::new("(\\\\|\n|\")").expect("Regex to be valid."); - } - - let first_occurence = if include_double_quote { - QUOTED_ESCAPER.find(v) - } else { - ESCAPER.find(v) - } - .as_ref() - .map(Match::start); + let first_occurence = find_first_occurence(v, include_double_quote); if let Some(first) = first_occurence { let mut escaped = String::with_capacity(v.len() * 2);