diff --git a/Cargo.toml b/Cargo.toml index 5429956b..9c6d93df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,7 @@ lazy_static = "^1.4" libc = { version = "^0.2", optional = true } parking_lot = "^0.11" protobuf = { version = "^2.0", optional = true } -regex = "^1.3" +memchr = "^2.3" reqwest = { version = "^0.11", features = ["blocking"], optional = true } thiserror = "^1.0" diff --git a/src/desc.rs b/src/desc.rs index f5c0df21..e5c0f9a0 100644 --- a/src/desc.rs +++ b/src/desc.rs @@ -5,30 +5,44 @@ use std::collections::{BTreeSet, HashMap}; use std::hash::Hasher; use fnv::FnvHasher; -use regex::Regex; use crate::errors::{Error, Result}; use crate::metrics::SEPARATOR_BYTE; use crate::proto::LabelPair; -// Details of required format are at -// https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels -fn is_valid_metric_name(name: &str) -> bool { - lazy_static! { - static ref VALIDATOR: Regex = - Regex::new("^[a-zA-Z_:][a-zA-Z0-9_:]*$").expect("Regex to be valid."); - } +// [a-zA-Z_] +fn matches_charset_without_colon(c: char) -> bool { + c.is_ascii_alphabetic() || c == '_' +} - VALIDATOR.is_match(name) +// [a-zA-Z_:] +fn matches_charset_with_colon(c: char) -> bool { + matches_charset_without_colon(c) || c == ':' } -fn is_valid_label_name(name: &str) -> bool { - lazy_static! { - static ref VALIDATOR: Regex = - Regex::new("^[a-zA-Z_][a-zA-Z0-9_]*$").expect("Regex to be valid."); - } +// Equivalent to regex ^[?][?0-9]*$ where ? denotes char set as validated by charset_validator +fn is_valid_ident bool>(input: &str, mut charset_validator: F) -> bool { + let mut chars = input.chars(); + let zeroth = chars.next(); + zeroth + .and_then(|zeroth| { + if charset_validator(zeroth) { + Some(chars.all(|c| charset_validator(c) || c.is_digit(10))) + } else { + None + } + }) + .unwrap_or(false) +} + +// Details of required format are at +// https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels +pub(super) fn is_valid_metric_name(name: &str) -> bool { + is_valid_ident(name, matches_charset_with_colon) +} - VALIDATOR.is_match(name) +pub(super) fn is_valid_label_name(name: &str) -> bool { + is_valid_ident(name, matches_charset_without_colon) } /// The descriptor used by every Prometheus [`Metric`](crate::core::Metric). It is essentially diff --git a/src/encoder/text.rs b/src/encoder/text.rs index cfdeb64a..17516bc0 100644 --- a/src/encoder/text.rs +++ b/src/encoder/text.rs @@ -1,6 +1,5 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. -use regex::{Match, Regex}; use std::borrow::Cow; use std::io::Write; @@ -216,26 +215,21 @@ fn label_pairs_to_text( Ok(()) } +fn find_first_occurence(v: &str, include_double_quote: bool) -> Option { + if include_double_quote { + memchr::memchr3(b'\\', b'\n', b'\"', v.as_bytes()) + } else { + memchr::memchr2(b'\\', b'\n', v.as_bytes()) + } +} + /// `escape_string` replaces `\` by `\\`, new line character by `\n`, and `"` by `\"` if /// `include_double_quote` is true. /// /// Implementation adapted from /// https://lise-henry.github.io/articles/optimising_strings.html fn escape_string(v: &str, include_double_quote: bool) -> Cow<'_, str> { - // Regex compilation is expensive. Use `lazy_static` to compile the regexes - // once per process lifetime and not once per function invocation. - lazy_static! { - static ref ESCAPER: Regex = Regex::new("(\\\\|\n)").expect("Regex to be valid."); - static ref QUOTED_ESCAPER: Regex = Regex::new("(\\\\|\n|\")").expect("Regex to be valid."); - } - - let first_occurence = if include_double_quote { - QUOTED_ESCAPER.find(v) - } else { - ESCAPER.find(v) - } - .as_ref() - .map(Match::start); + let first_occurence = find_first_occurence(v, include_double_quote); if let Some(first) = first_occurence { let mut escaped = String::with_capacity(v.len() * 2);