From b73f78e8ea3622db2d176da152e1403fce961212 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 8 Jul 2021 21:35:42 +0300 Subject: [PATCH] Avoid accidentally quadratic issue in TextEncoder::encode_utf8 Signed-off-by: Aleksey Kladov --- src/encoder/text.rs | 122 ++++++++++++++++++++++++++++---------------- 1 file changed, 79 insertions(+), 43 deletions(-) diff --git a/src/encoder/text.rs b/src/encoder/text.rs index 4a70536f..d444fdce 100644 --- a/src/encoder/text.rs +++ b/src/encoder/text.rs @@ -1,7 +1,7 @@ // Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0. use std::borrow::Cow; -use std::io::Write; +use std::io::{self, Write}; use std::mem; use crate::errors::Result; @@ -26,26 +26,27 @@ impl TextEncoder { pub fn new() -> TextEncoder { TextEncoder } - pub fn encode_utf8( - &self, - metric_families: &[MetricFamily], - buf: &mut String, - ) -> Result<()> { - let mut bytes = mem::take(buf).into_bytes(); - self.encode(metric_families, &mut bytes)?; - let text = String::from_utf8(bytes).unwrap_or_else(|_| unreachable!()); - *buf = text; + /// Appends metrics to a given `String` buffer. + /// + /// This is a convenience wrapper around `::encode`. + pub fn encode_utf8(&self, metric_families: &[MetricFamily], buf: &mut String) -> Result<()> { + // Note: it's important to *not* re-validate ut8 validity for the + // entirety of `buf`. Otherwise, repeatedly appending metrics to the + // same `buf` will lead to quadratic behavior. That's why we use + // `Utf8Writer` abstraction to skip the validation. + Utf8Writer::write_to_string(buf, |writer| self.encode_impl(metric_families, writer))?; Ok(()) } + /// Converts metrics to `String. + /// + /// This is a convenience wrapper around `::encode`. pub fn encode_to_string(&self, metric_families: &[MetricFamily]) -> Result { let mut buf = String::new(); self.encode_utf8(metric_families, &mut buf)?; Ok(buf) } -} -impl Encoder for TextEncoder { - fn encode(&self, metric_families: &[MetricFamily], writer: &mut W) -> Result<()> { + fn encode_impl(&self, metric_families: &[MetricFamily], writer: &mut Utf8Writer) -> Result<()> { for mf in metric_families { // Fail-fast checks. check_metric_family(mf)?; @@ -54,21 +55,21 @@ impl Encoder for TextEncoder { let name = mf.get_name(); let help = mf.get_help(); if !help.is_empty() { - writer.write_all(b"# HELP ")?; - writer.write_all(name.as_bytes())?; - writer.write_all(b" ")?; - writer.write_all(escape_string(help, false).as_bytes())?; - writer.write_all(b"\n")?; + writer.write_all("# HELP ")?; + writer.write_all(name)?; + writer.write_all(" ")?; + writer.write_all(&escape_string(help, false))?; + writer.write_all("\n")?; } // Write `# TYPE` header. let metric_type = mf.get_field_type(); let lowercase_type = format!("{:?}", metric_type).to_lowercase(); - writer.write_all(b"# TYPE ")?; - writer.write_all(name.as_bytes())?; - writer.write_all(b" ")?; - writer.write_all(lowercase_type.as_bytes())?; - writer.write_all(b"\n")?; + writer.write_all("# TYPE ")?; + writer.write_all(name)?; + writer.write_all(" ")?; + writer.write_all(&lowercase_type)?; + writer.write_all("\n")?; for m in mf.get_metric() { match metric_type { @@ -152,6 +153,13 @@ impl Encoder for TextEncoder { Ok(()) } +} + +impl Encoder for TextEncoder { + fn encode(&self, metric_families: &[MetricFamily], writer: &mut W) -> Result<()> { + let mut utf8_writer = Utf8Writer { writer }; + self.encode_impl(metric_families, &mut utf8_writer) + } fn format_type(&self) -> &str { TEXT_FORMAT @@ -164,30 +172,30 @@ impl Encoder for TextEncoder { /// not required), and the value. The function returns the number of bytes /// written and any error encountered. fn write_sample( - writer: &mut dyn Write, + writer: &mut Utf8Writer<'_>, name: &str, name_postfix: Option<&str>, mc: &proto::Metric, additional_label: Option<(&str, &str)>, value: f64, ) -> Result<()> { - writer.write_all(name.as_bytes())?; + writer.write_all(name)?; if let Some(postfix) = name_postfix { - writer.write_all(postfix.as_bytes())?; + writer.write_all(postfix)?; } label_pairs_to_text(mc.get_label(), additional_label, writer)?; - writer.write_all(b" ")?; - writer.write_all(value.to_string().as_bytes())?; + writer.write_all(" ")?; + writer.write_all(&value.to_string())?; let timestamp = mc.get_timestamp_ms(); if timestamp != 0 { - writer.write_all(b" ")?; - writer.write_all(timestamp.to_string().as_bytes())?; + writer.write_all(" ")?; + writer.write_all(×tamp.to_string())?; } - writer.write_all(b"\n")?; + writer.write_all("\n")?; Ok(()) } @@ -202,32 +210,32 @@ fn write_sample( fn label_pairs_to_text( pairs: &[proto::LabelPair], additional_label: Option<(&str, &str)>, - writer: &mut dyn Write, + writer: &mut Utf8Writer<'_>, ) -> Result<()> { if pairs.is_empty() && additional_label.is_none() { return Ok(()); } - let mut separator = b"{"; + let mut separator = "{"; for lp in pairs { writer.write_all(separator)?; - writer.write_all(lp.get_name().as_bytes())?; - writer.write_all(b"=\"")?; - writer.write_all(escape_string(lp.get_value(), true).as_bytes())?; - writer.write_all(b"\"")?; + writer.write_all(&lp.get_name())?; + writer.write_all("=\"")?; + writer.write_all(&escape_string(lp.get_value(), true))?; + writer.write_all("\"")?; - separator = b","; + separator = ","; } if let Some((name, value)) = additional_label { writer.write_all(separator)?; - writer.write_all(name.as_bytes())?; - writer.write_all(b"=\"")?; - writer.write_all(escape_string(value, true).as_bytes())?; - writer.write_all(b"\"")?; + writer.write_all(name)?; + writer.write_all("=\"")?; + writer.write_all(&escape_string(value, true))?; + writer.write_all("\"")?; } - writer.write_all(b"}")?; + writer.write_all("}")?; Ok(()) } @@ -276,6 +284,34 @@ fn escape_string(v: &str, include_double_quote: bool) -> Cow<'_, str> { } } +struct Utf8Writer<'a> { + // Safety invariant: writer gets only valid utf8 fragments. + writer: &'a mut dyn Write, +} + +impl<'a> Utf8Writer<'a> { + fn write_all(&mut self, text: &str) -> io::Result<()> { + self.writer.write_all(text.as_bytes())?; + Ok(()) + } + fn write_to_string( + buf: &mut String, + f: impl FnOnce(&mut Utf8Writer<'_>) -> Result<()>, + ) -> Result<()> { + let mut bytes = mem::take(buf).into_bytes(); + let old_len = bytes.len(); + f(&mut Utf8Writer { writer: &mut bytes })?; + + // Safety: bytes was utf8 originally, we appended some number of utf8 + // fragments, so it must still be valid utf8. + debug_assert!(std::str::from_utf8(buf[old_len..]).is_ok()); + let text = unsafe { String::from_utf8_unchecked(bytes) }; + + *buf = text; + Ok(()) + } +} + #[cfg(test)] mod tests {