Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix table width when containing unicode, fix crash due to min_width n… #64

Merged
merged 2 commits into from Nov 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
86 changes: 56 additions & 30 deletions src/lib.rs
Expand Up @@ -77,7 +77,7 @@ use markup5ever_rcdom::{
RcDom,
};
use std::cell::Cell;
use std::cmp::{min,max};
use std::cmp::{max, min};
use std::io;
use std::io::Write;
use std::iter::{once, repeat};
Expand Down Expand Up @@ -285,9 +285,8 @@ impl RenderTable {
}
let size = sizes.iter().map(|s| s.size).sum(); // Include borders?
let min_width = sizes.iter().map(|s| s.min_width).sum::<usize>() + self.num_columns - 1;
self.size_estimate.set(Some(SizeEstimate {
size, min_width,
}));
self.size_estimate
.set(Some(SizeEstimate { size, min_width }));
}

/// Calculate and store (or return stored value) of estimated size
Expand Down Expand Up @@ -370,7 +369,7 @@ impl RenderNode {
}
}

/// Get a size estimate (~characters)
/// Get a size estimate
pub fn get_size_estimate(&self) -> SizeEstimate {
// If it's already calculated, then just return the answer.
if let Some(s) = self.size_estimate.get() {
Expand All @@ -382,7 +381,8 @@ impl RenderNode {
// Otherwise, make an estimate.
let estimate = match self.info {
Text(ref t) | Img(ref t) => {
let mut len = t.trim().len();
use unicode_width::UnicodeWidthStr;
let mut len = t.trim().width();
// Add one for preceding whitespace.
if let Some(true) = t.chars().next().map(|c| c.is_whitespace()) {
len += 1;
Expand All @@ -405,12 +405,26 @@ impl RenderNode {
| BlockQuote(ref v)
| Dl(ref v)
| Dt(ref v)
| Dd(ref v)
| Ul(ref v)
| Ol(_, ref v) => v
| Dd(ref v) => v
.iter()
.map(RenderNode::get_size_estimate)
.fold(Default::default(), SizeEstimate::add),
Ul(ref v) => v
.iter()
.map(RenderNode::get_size_estimate)
.fold(Default::default(), SizeEstimate::add)
.add(SizeEstimate {
size: 2,
min_width: 2,
}),
Ol(i, ref v) => v
.iter()
.map(RenderNode::get_size_estimate)
.fold(Default::default(), SizeEstimate::add)
.add(SizeEstimate {
size: i.to_string().len() + 2,
min_width: i.to_string().len() + 2,
}),
Header(level, ref v) => v
.iter()
.map(RenderNode::get_size_estimate)
Expand Down Expand Up @@ -646,10 +660,13 @@ fn tr_to_render_tree<'a, 'b, T: Write>(
}
})
.collect();
Some(RenderNode::new(RenderNodeInfo::TableRow(RenderTableRow {
cells,
col_sizes: None,
}, false)))
Some(RenderNode::new(RenderNodeInfo::TableRow(
RenderTableRow {
cells,
col_sizes: None,
},
false,
)))
})
}

Expand Down Expand Up @@ -930,9 +947,7 @@ fn process_dom_node<'a, 'b, T: Write>(
// it on use.
let href: String = href.into();
Box::new(move |_, cs: Vec<RenderNode>| {
if cs
.iter()
.any(|c| !c.is_shallow_empty()) {
if cs.iter().any(|c| !c.is_shallow_empty()) {
Some(RenderNode::new(Link(href.clone(), cs)))
} else {
None
Expand Down Expand Up @@ -1082,6 +1097,7 @@ fn process_dom_node<'a, 'b, T: Write>(
/// Context to use during tree parsing.
/// This mainly gives access to a Renderer, but needs to be able to push
/// new ones on for nested structures.
#[derive(Clone, Debug)]
struct BuilderStack<R: Renderer> {
builders: Vec<R>,
}
Expand Down Expand Up @@ -1132,7 +1148,6 @@ fn render_tree_to_string<T: Write, R: Renderer>(
) -> R {
/* Phase 1: get size estimates. */
tree_map_reduce(&mut (), &tree, |_, node| precalc_size_estimate(&node));

/* Phase 2: actually render. */
let mut bs = BuilderStack::new(builder);
tree_map_reduce(&mut bs, tree, |builders, node| {
Expand Down Expand Up @@ -1391,8 +1406,8 @@ fn render_table_tree<T: Write, R: Renderer>(
}
// TODO: remove empty columns
let tot_size: usize = col_sizes.iter().map(|est| est.size).sum();
let min_size: usize = col_sizes.iter().map(|est| est.min_width).sum::<usize>() +
col_sizes.len().saturating_sub(1);
let min_size: usize = col_sizes.iter().map(|est| est.min_width).sum::<usize>()
+ col_sizes.len().saturating_sub(1);
let width = builder.width();

let vert_row = min_size > width;
Expand All @@ -1404,22 +1419,21 @@ fn render_table_tree<T: Write, R: Renderer>(
if sz.size == 0 {
0
} else {
min(sz.size,
if usize::MAX/width <= sz.size {
min(
sz.size,
if usize::MAX / width <= sz.size {
// The provided width is too large to multiply by width,
// so do it the other way around.
max((width / tot_size) * sz.size, sz.min_width)
} else {
max(sz.size * width / tot_size, sz.min_width)
})
},
)
}
})
.collect()
} else {
col_sizes
.iter()
.map(|_| width)
.collect()
col_sizes.iter().map(|_| width).collect()
};

if !vert_row {
Expand Down Expand Up @@ -1449,8 +1463,12 @@ fn render_table_tree<T: Write, R: Renderer>(
let table_width = if vert_row {
width
} else {
col_widths.iter().cloned().sum::<usize>() +
col_widths.iter().filter(|&w| w > &0).count().saturating_sub(1)
col_widths.iter().cloned().sum::<usize>()
+ col_widths
.iter()
.filter(|&w| w > &0)
.count()
.saturating_sub(1)
};

builder.add_horizontal_border_width(table_width);
Expand Down Expand Up @@ -1533,7 +1551,11 @@ pub struct RenderTree(RenderNode);

impl RenderTree {
/// Render this document using the given `decorator` and wrap it to `width` columns.
pub fn render<D: TextDecorator>(self, width: usize, decorator: D) -> RenderedText<D> {
pub fn render<D: TextDecorator>(
self,
width: usize,
decorator: D,
) -> RenderedText<D> {
let builder = TextRenderer::new(width, decorator);
let builder = render_tree_to_string(builder, self.0, &mut Discard {});
RenderedText(builder)
Expand Down Expand Up @@ -1595,7 +1617,11 @@ pub fn parse(mut input: impl io::Read) -> RenderTree {

/// Reads HTML from `input`, decorates it using `decorator`, and
/// returns a `String` with text wrapped to `width` columns.
pub fn from_read_with_decorator<R, D>(input: R, width: usize, decorator: D) -> String
pub fn from_read_with_decorator<R, D>(
input: R,
width: usize,
decorator: D,
) -> String
where
R: io::Read,
D: TextDecorator,
Expand Down
68 changes: 42 additions & 26 deletions src/render/text_renderer.rs
Expand Up @@ -4,15 +4,15 @@
//! into different text formats.

use super::Renderer;
use std::{fmt::Debug, collections::LinkedList};
use std::mem;
use std::ops::Deref;
use std::vec;
use std::{collections::LinkedList, fmt::Debug};
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};

/// A wrapper around a String with extra metadata.
#[derive(Debug, PartialEq)]
pub struct TaggedString<T: Debug + PartialEq> {
#[derive(Debug, Clone, PartialEq)]
pub struct TaggedString<T> {
/// The wrapped text.
pub s: String,

Expand All @@ -33,8 +33,8 @@ impl<T: Debug + PartialEq> TaggedString<T> {

/// An element of a line of tagged text: either a TaggedString or a
/// marker appearing in between document characters.
#[derive(Debug, PartialEq)]
pub enum TaggedLineElement<T: Debug + Eq + PartialEq + Clone> {
#[derive(Clone, Debug, PartialEq)]
pub enum TaggedLineElement<T> {
/// A string with tag information attached.
Str(TaggedString<T>),

Expand All @@ -43,8 +43,8 @@ pub enum TaggedLineElement<T: Debug + Eq + PartialEq + Clone> {
}

/// A line of tagged text (composed of a set of `TaggedString`s).
#[derive(Debug, PartialEq)]
pub struct TaggedLine<T: Debug + Eq + PartialEq + Clone> {
#[derive(Debug, Clone, PartialEq)]
pub struct TaggedLine<T> {
v: Vec<TaggedLineElement<T>>,
}

Expand Down Expand Up @@ -209,8 +209,8 @@ impl<T: Debug + Eq + PartialEq + Clone + Default> TaggedLine<T> {

/// A type to build up wrapped text, allowing extra metadata for
/// spans.
#[derive(Debug)]
struct WrappedBlock<T: Clone + Eq + Debug + Default> {
#[derive(Debug, Clone)]
struct WrappedBlock<T> {
width: usize,
text: Vec<TaggedLine<T>>,
textlen: usize,
Expand Down Expand Up @@ -584,7 +584,7 @@ impl BorderHoriz {
/// Make a join to a line above at the xth cell
pub fn join_above(&mut self, x: usize) {
use self::BorderSegHoriz::*;
self.stretch_to(x+1);
self.stretch_to(x + 1);
let prev = self.segments[x];
self.segments[x] = match prev {
Straight | JoinAbove => JoinAbove,
Expand All @@ -596,7 +596,7 @@ impl BorderHoriz {
/// Make a join to a line below at the xth cell
pub fn join_below(&mut self, x: usize) {
use self::BorderSegHoriz::*;
self.stretch_to(x+1);
self.stretch_to(x + 1);
let prev = self.segments[x];
self.segments[x] = match prev {
Straight | JoinBelow => JoinBelow,
Expand Down Expand Up @@ -665,7 +665,7 @@ impl BorderHoriz {
}

/// A line, which can either be text or a line.
#[derive(Debug)]
#[derive(Clone, Debug)]
pub enum RenderLine<T: PartialEq + Eq + Clone + Debug + Default> {
/// Some rendered text
Text(TaggedLine<T>),
Expand Down Expand Up @@ -712,6 +712,7 @@ impl<T: PartialEq + Eq + Clone + Debug + Default> RenderLine<T> {

/// A renderer which just outputs plain text with
/// annotations depending on a decorator.
#[derive(Clone)]
pub struct TextRenderer<D: TextDecorator> {
width: usize,
lines: LinkedList<RenderLine<Vec<D::Annotation>>>,
Expand All @@ -726,6 +727,18 @@ pub struct TextRenderer<D: TextDecorator> {
pre_depth: usize,
}

impl<D: TextDecorator + Debug> std::fmt::Debug for TextRenderer<D> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
f.debug_struct("TextRenderer")
.field("width", &self.width)
.field("lines", &self.lines)
.field("decorator", &self.decorator)
.field("ann_stack", &self.ann_stack)
.field("pre_depth", &self.pre_depth)
.finish()
}
}

impl<D: TextDecorator> TextRenderer<D> {
/// Construct a new empty TextRenderer.
pub fn new(width: usize, decorator: D) -> TextRenderer<D> {
Expand Down Expand Up @@ -808,7 +821,6 @@ impl<D: TextDecorator> TextRenderer<D> {
}
result
}


/// Returns a `Vec` of `TaggedLine`s with therendered text.
pub fn into_lines(mut self) -> LinkedList<RenderLine<Vec<D::Annotation>>> {
Expand Down Expand Up @@ -1047,7 +1059,7 @@ impl<D: TextDecorator> Renderer for TextRenderer<D> {
fn append_columns_with_borders<I>(&mut self, cols: I, collapse: bool)
where
I: IntoIterator<Item = Self>,
Self: Sized
Self: Sized,
{
use self::TaggedLineElement::Str;
html_trace!("append_columns_with_borders(collapse={})", collapse);
Expand Down Expand Up @@ -1133,8 +1145,12 @@ impl<D: TextDecorator> Renderer for TextRenderer<D> {
self.lines.back_mut().expect("No previous line")
{
if let RenderLine::Line(line) = sublines.remove(0) {
html_trace!("prev border:\n{}\n, pos={}, line:\n{}",
prev_border.to_string(), pos, line.to_string());
html_trace!(
"prev border:\n{}\n, pos={}, line:\n{}",
prev_border.to_string(),
pos,
line.to_string()
);
prev_border.merge_from_below(&line, pos);
}
} else {
Expand Down Expand Up @@ -1211,7 +1227,7 @@ impl<D: TextDecorator> Renderer for TextRenderer<D> {
fn append_vert_row<I>(&mut self, cols: I)
where
I: IntoIterator<Item = Self>,
Self: Sized
Self: Sized,
{
html_trace!("append_vert_row()");
html_trace!("self=\n{}", self.to_string());
Expand All @@ -1233,13 +1249,13 @@ impl<D: TextDecorator> Renderer for TextRenderer<D> {
self.add_horizontal_border();
}


fn empty(&self) -> bool {
self.lines.is_empty() && if let Some(wrapping) = &self.wrapping {
wrapping.is_empty()
} else {
true
}
self.lines.is_empty()
&& if let Some(wrapping) = &self.wrapping {
wrapping.is_empty()
} else {
true
}
}

fn text_len(&self) -> usize {
Expand Down Expand Up @@ -1379,7 +1395,7 @@ impl<D: TextDecorator> Renderer for TextRenderer<D> {

/// A decorator for use with `TextRenderer` which outputs plain UTF-8 text
/// with no annotations. Markup is rendered as text characters or footnotes.
#[derive(Clone)]
#[derive(Clone, Debug)]
pub struct PlainDecorator {
links: Vec<String>,
}
Expand Down Expand Up @@ -1478,7 +1494,7 @@ impl TextDecorator for PlainDecorator {

/// A decorator for use with `TextRenderer` which outputs plain UTF-8 text
/// with no annotations or markup, emitting only the literal text.
#[derive(Clone)]
#[derive(Clone, Debug)]
pub struct TrivialDecorator {}

impl TrivialDecorator {
Expand Down Expand Up @@ -1571,7 +1587,7 @@ impl TextDecorator for TrivialDecorator {

/// A decorator to generate rich text (styled) rather than
/// pure text output.
#[derive(Clone)]
#[derive(Clone, Debug)]
pub struct RichDecorator {}

/// Annotation type for "rich" text. Text is associated with a set of
Expand Down