Skip to content

Commit

Permalink
Avoid accidentally quadratic issue in TextEncoder::encode_utf8
Browse files Browse the repository at this point in the history
Signed-off-by: Aleksey Kladov <aleksey.kladov@gmail.com>
  • Loading branch information
matklad committed Jul 9, 2021
1 parent ada5773 commit 062224b
Showing 1 changed file with 79 additions and 43 deletions.
122 changes: 79 additions & 43 deletions src/encoder/text.rs
@@ -1,7 +1,7 @@
// Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0.

use std::borrow::Cow;
use std::io::Write;
use std::io::{self, Write};
use std::mem;

use crate::errors::Result;
Expand All @@ -26,26 +26,27 @@ impl TextEncoder {
pub fn new() -> TextEncoder {
TextEncoder
}
pub fn encode_utf8(
&self,
metric_families: &[MetricFamily],
buf: &mut String,
) -> Result<()> {
let mut bytes = mem::take(buf).into_bytes();
self.encode(metric_families, &mut bytes)?;
let text = String::from_utf8(bytes).unwrap_or_else(|_| unreachable!());
*buf = text;
/// Appends metrics to a given `String` buffer.
///
/// This is a convenience wrapper around `<TextEncoder as Encoder>::encode`.
pub fn encode_utf8(&self, metric_families: &[MetricFamily], buf: &mut String) -> Result<()> {
// Note: it's important to *not* re-validate ut8 validity for the
// entirety of `buf`. Otherwise, repeatedly appending metrics to the
// same `buf` will lead to quadratic behavior. That's why we use
// `Utf8Writer` abstraction to skip the validation.
Utf8Writer::write_to_string(buf, |writer| self.encode_impl(metric_families, writer))?;
Ok(())
}
/// Converts metrics to `String.
///
/// This is a convenience wrapper around `<TextEncoder as Encoder>::encode`.
pub fn encode_to_string(&self, metric_families: &[MetricFamily]) -> Result<String> {
let mut buf = String::new();
self.encode_utf8(metric_families, &mut buf)?;
Ok(buf)
}
}

impl Encoder for TextEncoder {
fn encode<W: Write>(&self, metric_families: &[MetricFamily], writer: &mut W) -> Result<()> {
fn encode_impl(&self, metric_families: &[MetricFamily], writer: &mut Utf8Writer) -> Result<()> {
for mf in metric_families {
// Fail-fast checks.
check_metric_family(mf)?;
Expand All @@ -54,21 +55,21 @@ impl Encoder for TextEncoder {
let name = mf.get_name();
let help = mf.get_help();
if !help.is_empty() {
writer.write_all(b"# HELP ")?;
writer.write_all(name.as_bytes())?;
writer.write_all(b" ")?;
writer.write_all(escape_string(help, false).as_bytes())?;
writer.write_all(b"\n")?;
writer.write_all("# HELP ")?;
writer.write_all(name)?;
writer.write_all(" ")?;
writer.write_all(&escape_string(help, false))?;
writer.write_all("\n")?;
}

// Write `# TYPE` header.
let metric_type = mf.get_field_type();
let lowercase_type = format!("{:?}", metric_type).to_lowercase();
writer.write_all(b"# TYPE ")?;
writer.write_all(name.as_bytes())?;
writer.write_all(b" ")?;
writer.write_all(lowercase_type.as_bytes())?;
writer.write_all(b"\n")?;
writer.write_all("# TYPE ")?;
writer.write_all(name)?;
writer.write_all(" ")?;
writer.write_all(&lowercase_type)?;
writer.write_all("\n")?;

for m in mf.get_metric() {
match metric_type {
Expand Down Expand Up @@ -152,6 +153,13 @@ impl Encoder for TextEncoder {

Ok(())
}
}

impl Encoder for TextEncoder {
fn encode<W: Write>(&self, metric_families: &[MetricFamily], writer: &mut W) -> Result<()> {
let mut utf8_writer = Utf8Writer { writer };
self.encode_impl(metric_families, &mut utf8_writer)
}

fn format_type(&self) -> &str {
TEXT_FORMAT
Expand All @@ -164,30 +172,30 @@ impl Encoder for TextEncoder {
/// not required), and the value. The function returns the number of bytes
/// written and any error encountered.
fn write_sample(
writer: &mut dyn Write,
writer: &mut Utf8Writer<'_>,
name: &str,
name_postfix: Option<&str>,
mc: &proto::Metric,
additional_label: Option<(&str, &str)>,
value: f64,
) -> Result<()> {
writer.write_all(name.as_bytes())?;
writer.write_all(name)?;
if let Some(postfix) = name_postfix {
writer.write_all(postfix.as_bytes())?;
writer.write_all(postfix)?;
}

label_pairs_to_text(mc.get_label(), additional_label, writer)?;

writer.write_all(b" ")?;
writer.write_all(value.to_string().as_bytes())?;
writer.write_all(" ")?;
writer.write_all(&value.to_string())?;

let timestamp = mc.get_timestamp_ms();
if timestamp != 0 {
writer.write_all(b" ")?;
writer.write_all(timestamp.to_string().as_bytes())?;
writer.write_all(" ")?;
writer.write_all(&timestamp.to_string())?;
}

writer.write_all(b"\n")?;
writer.write_all("\n")?;

Ok(())
}
Expand All @@ -202,32 +210,32 @@ fn write_sample(
fn label_pairs_to_text(
pairs: &[proto::LabelPair],
additional_label: Option<(&str, &str)>,
writer: &mut dyn Write,
writer: &mut Utf8Writer<'_>,
) -> Result<()> {
if pairs.is_empty() && additional_label.is_none() {
return Ok(());
}

let mut separator = b"{";
let mut separator = "{";
for lp in pairs {
writer.write_all(separator)?;
writer.write_all(lp.get_name().as_bytes())?;
writer.write_all(b"=\"")?;
writer.write_all(escape_string(lp.get_value(), true).as_bytes())?;
writer.write_all(b"\"")?;
writer.write_all(&lp.get_name())?;
writer.write_all("=\"")?;
writer.write_all(&escape_string(lp.get_value(), true))?;
writer.write_all("\"")?;

separator = b",";
separator = ",";
}

if let Some((name, value)) = additional_label {
writer.write_all(separator)?;
writer.write_all(name.as_bytes())?;
writer.write_all(b"=\"")?;
writer.write_all(escape_string(value, true).as_bytes())?;
writer.write_all(b"\"")?;
writer.write_all(name)?;
writer.write_all("=\"")?;
writer.write_all(&escape_string(value, true))?;
writer.write_all("\"")?;
}

writer.write_all(b"}")?;
writer.write_all("}")?;

Ok(())
}
Expand Down Expand Up @@ -276,6 +284,34 @@ fn escape_string(v: &str, include_double_quote: bool) -> Cow<'_, str> {
}
}

struct Utf8Writer<'a> {
// Safety invariant: writer gets only valid utf8 fragments.
writer: &'a mut dyn Write,
}

impl<'a> Utf8Writer<'a> {
fn write_all(&mut self, text: &str) -> io::Result<()> {
self.writer.write_all(text.as_bytes())?;
Ok(())
}
fn write_to_string(
buf: &mut String,
f: impl FnOnce(&mut Utf8Writer<'_>) -> Result<()>,
) -> Result<()> {
let mut bytes = mem::take(buf).into_bytes();
let old_len = bytes.len();
f(&mut Utf8Writer { writer: &mut bytes })?;

// Safety: bytes was utf8 originally, we appended some number of utf8
// fragments, so it must still be valid utf8.
debug_assert!(std::str::from_utf8(&bytes[old_len..]).is_ok());
let text = unsafe { String::from_utf8_unchecked(bytes) };

*buf = text;
Ok(())
}
}

#[cfg(test)]
mod tests {

Expand Down

0 comments on commit 062224b

Please sign in to comment.