Skip to content

Commit

Permalink
Replace regex with memchr (#385)
Browse files Browse the repository at this point in the history
`regex` was used only in four trivial cases that could be implemented
more simply, either naively or using memchr, without losing performance.
As such the dependency needlessly increases build time, size of binary
and attack surface.

This change replaces`regex` with `naive`/`memchr` implementations.

Signed-off-by: Martin Habovstiak <martin.habovstiak@gmail.com>
Co-authored-by: Max Inden <mail@max-inden.de>
  • Loading branch information
Kixunil and mxinden committed Feb 4, 2021
1 parent e1b197b commit 021d07b
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 31 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Expand Up @@ -31,7 +31,7 @@ lazy_static = "^1.4"
libc = { version = "^0.2", optional = true }
parking_lot = "^0.11"
protobuf = { version = "^2.0", optional = true }
regex = "^1.3"
memchr = "^2.3"
reqwest = { version = "^0.11", features = ["blocking"], optional = true }
thiserror = "^1.0"

Expand Down
44 changes: 29 additions & 15 deletions src/desc.rs
Expand Up @@ -5,30 +5,44 @@ use std::collections::{BTreeSet, HashMap};
use std::hash::Hasher;

use fnv::FnvHasher;
use regex::Regex;

use crate::errors::{Error, Result};
use crate::metrics::SEPARATOR_BYTE;
use crate::proto::LabelPair;

// Details of required format are at
// https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
fn is_valid_metric_name(name: &str) -> bool {
lazy_static! {
static ref VALIDATOR: Regex =
Regex::new("^[a-zA-Z_:][a-zA-Z0-9_:]*$").expect("Regex to be valid.");
}
// [a-zA-Z_]
fn matches_charset_without_colon(c: char) -> bool {
c.is_ascii_alphabetic() || c == '_'
}

VALIDATOR.is_match(name)
// [a-zA-Z_:]
fn matches_charset_with_colon(c: char) -> bool {
matches_charset_without_colon(c) || c == ':'
}

fn is_valid_label_name(name: &str) -> bool {
lazy_static! {
static ref VALIDATOR: Regex =
Regex::new("^[a-zA-Z_][a-zA-Z0-9_]*$").expect("Regex to be valid.");
}
// Equivalent to regex ^[?][?0-9]*$ where ? denotes char set as validated by charset_validator
fn is_valid_ident<F: FnMut(char) -> bool>(input: &str, mut charset_validator: F) -> bool {
let mut chars = input.chars();
let zeroth = chars.next();
zeroth
.and_then(|zeroth| {
if charset_validator(zeroth) {
Some(chars.all(|c| charset_validator(c) || c.is_digit(10)))
} else {
None
}
})
.unwrap_or(false)
}

// Details of required format are at
// https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
pub(super) fn is_valid_metric_name(name: &str) -> bool {
is_valid_ident(name, matches_charset_with_colon)
}

VALIDATOR.is_match(name)
pub(super) fn is_valid_label_name(name: &str) -> bool {
is_valid_ident(name, matches_charset_without_colon)
}

/// The descriptor used by every Prometheus [`Metric`](crate::core::Metric). It is essentially
Expand Down
24 changes: 9 additions & 15 deletions src/encoder/text.rs
@@ -1,6 +1,5 @@
// Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0.

use regex::{Match, Regex};
use std::borrow::Cow;
use std::io::Write;

Expand Down Expand Up @@ -216,26 +215,21 @@ fn label_pairs_to_text(
Ok(())
}

fn find_first_occurence(v: &str, include_double_quote: bool) -> Option<usize> {
if include_double_quote {
memchr::memchr3(b'\\', b'\n', b'\"', v.as_bytes())
} else {
memchr::memchr2(b'\\', b'\n', v.as_bytes())
}
}

/// `escape_string` replaces `\` by `\\`, new line character by `\n`, and `"` by `\"` if
/// `include_double_quote` is true.
///
/// Implementation adapted from
/// https://lise-henry.github.io/articles/optimising_strings.html
fn escape_string(v: &str, include_double_quote: bool) -> Cow<'_, str> {
// Regex compilation is expensive. Use `lazy_static` to compile the regexes
// once per process lifetime and not once per function invocation.
lazy_static! {
static ref ESCAPER: Regex = Regex::new("(\\\\|\n)").expect("Regex to be valid.");
static ref QUOTED_ESCAPER: Regex = Regex::new("(\\\\|\n|\")").expect("Regex to be valid.");
}

let first_occurence = if include_double_quote {
QUOTED_ESCAPER.find(v)
} else {
ESCAPER.find(v)
}
.as_ref()
.map(Match::start);
let first_occurence = find_first_occurence(v, include_double_quote);

if let Some(first) = first_occurence {
let mut escaped = String::with_capacity(v.len() * 2);
Expand Down

0 comments on commit 021d07b

Please sign in to comment.