Skip to content

Commit

Permalink
fix: Handle UTF-8 graphemes when truncating cells
Browse files Browse the repository at this point in the history
  • Loading branch information
Nukesor committed Feb 2, 2025
1 parent 3ffa4f8 commit 61dcad0
Show file tree
Hide file tree
Showing 7 changed files with 216 additions and 130 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## Unreleased

### Fix

- Handle UTF-8 graphemes when truncating cells. [#167](https://github.com/Nukesor/comfy-table/pull/167)

## Changed

## [7.1.3] - 2024-11-24
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ integration_test = []
[dependencies]
ansi-str = { version = "0.8", optional = true }
console = { version = "0.15", optional = true }
unicode-segmentation = { version = "1" }
unicode-width = "0.2"

[dev-dependencies]
Expand Down
2 changes: 1 addition & 1 deletion benches/build_large_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ fn build_huge_table() {
.set_header(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);

let mut rng = rand::thread_rng();
// Create a 10x10 grid
// Create a 500x10 grid
for _ in 0..500 {
let mut row = Vec::new();
for _ in 0..10 {
Expand Down
52 changes: 37 additions & 15 deletions src/utils/formatting/content_format.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#[cfg(feature = "tty")]
use crossterm::style::{style, Stylize};
use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthStr;

use super::content_split::measure_text_width;
Expand Down Expand Up @@ -105,8 +106,10 @@ pub fn format_row(
// This then inserts a '...' string at the end to indicate that the cell has been truncated.
if let Some(lines) = row.max_height {
if cell_lines.len() > lines {
// We already have to many lines. Cut off the surplus lines.
let _ = cell_lines.split_off(lines);
// Direct access.

// Directly access the last line.
let last_line = cell_lines
.get_mut(lines - 1)
.expect("We know it's this long.");
Expand All @@ -119,21 +122,40 @@ pub fn format_row(
*last_line = stripped;
}

// Only show the `...` indicator if the column is smaller then 6 characters.
// Otherwise it feels like it doesn't make a lot of sense to show it, as it
// might cover up too much important content on such a small column.
//
// That's questionable though, should we really keep that limitation as users
// won't have an indicator that truncation is taking place?
let width: usize = info.content_width.into();
if width >= 6 {
let indicator_width = table.truncation_indicator.width();
// Truncate the line if indicator doesn't fit
if last_line.width() >= width - indicator_width {
let surplus = (last_line.width() + indicator_width) - width;
last_line.truncate(last_line.width() - surplus);
let max_width: usize = info.content_width.into();
let indicator_width = table.truncation_indicator.width();

let mut truncate_at = 0;
let mut padding = None;
// Start the accumulated_width with the indicator_width, which is the minimum width
// we may show anyway.
let mut accumulated_width = indicator_width;

// Iterate through the UTF-8 graphemes.
// Check the `split_long_word` inline function docs to see why we're using
// graphemes.
for (index, grapheme) in last_line.grapheme_indices(true) {
// Check if the next grapheme would break the boundary of the allowed line
// length.
if (accumulated_width + grapheme.width()) > max_width {
if accumulated_width < max_width {
padding = Some(" ".repeat(max_width - accumulated_width));
}
break;
}
last_line.push_str(&table.truncation_indicator);

// The grapheme seems to fit. Save the index and check the next one.
accumulated_width += grapheme.width();
truncate_at = index;
}

// Truncate the string at the index of the last valid grapheme and push the
// truncation indicator right afterwards.
last_line.truncate(truncate_at);
last_line.push_str(&table.truncation_indicator);

if let Some(whitespaces) = padding {
last_line.push_str(&whitespaces);
}
}
}
Expand Down
115 changes: 1 addition & 114 deletions tests/all/content_arrangement_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ use comfy_table::ColumnConstraint;
use comfy_table::Width;
use pretty_assertions::assert_eq;

use comfy_table::ColumnConstraint::*;
use comfy_table::Width::*;
use comfy_table::{ContentArrangement, Row, Table};
use comfy_table::{ContentArrangement, Table};

use super::assert_table_line_width;

Expand Down Expand Up @@ -68,117 +66,6 @@ fn simple_dynamic_table() {
assert_eq!(expected, "\n".to_string() + &table.to_string());
}

/// Individual rows can be configured to have a max height.
/// Everything beyond that line height should be truncated.
#[test]
fn table_with_truncate() {
let mut table = Table::new();
let mut first_row: Row = Row::from(vec![
"This is a very long line with a lot of text",
"This is anotherverylongtextwithlongwords text",
"smol",
]);
first_row.max_height(4);

let mut second_row = Row::from(vec![
"Now let's\nadd a really long line in the middle of the cell \n and add more multi line stuff",
"This is another text",
"smol",
]);
second_row.max_height(4);

table
.set_header(vec!["Header1", "Header2", "Head"])
.set_content_arrangement(ContentArrangement::Dynamic)
.set_width(35)
.add_row(first_row)
.add_row(second_row);

// The first column will be wider than 6 chars.
// The second column's content is wider than 6 chars. There should be a '...'.
let second_column = table.column_mut(1).unwrap();
second_column.set_constraint(Absolute(Fixed(8)));

// The third column's content is less than 6 chars width. There shouldn't be a '...'.
let third_column = table.column_mut(2).unwrap();
third_column.set_constraint(Absolute(Fixed(7)));

println!("{table}");
let expected = "
+----------------+--------+-------+
| Header1 | Header | Head |
| | 2 | |
+=================================+
| This is a very | This | smol |
| long line with | is ano | |
| a lot of text | therve | |
| | ryl... | |
|----------------+--------+-------|
| Now let's | This | smol |
| add a really | is ano | |
| long line in | ther | |
| the middle ... | text | |
+----------------+--------+-------+";
println!("{expected}");
assert_table_line_width(&table, 35);
assert_eq!(expected, "\n".to_string() + &table.to_string());
}

#[test]
fn table_with_truncate_indicator() {
let mut table = Table::new();
let mut first_row: Row = Row::from(vec![
"This is a very long line with a lot of text",
"This is anotherverylongtextwithlongwords text",
"smol",
]);
first_row.max_height(4);

let mut second_row = Row::from(vec![
"Now let's\nadd a really long line in the middle of the cell \n and add more multi line stuff",
"This is another text",
"smol",
]);
second_row.max_height(4);

table
.set_header(vec!["Header1", "Header2", "Head"])
.set_content_arrangement(ContentArrangement::Dynamic)
.set_truncation_indicator("…")
.set_width(35)
.add_row(first_row)
.add_row(second_row);

// The first column will be wider than 6 chars.
// The second column's content is wider than 6 chars. There should be a '…'.
let second_column = table.column_mut(1).unwrap();
second_column.set_constraint(Absolute(Fixed(8)));

// The third column's content is less than 6 chars width. There shouldn't be a '…'.
let third_column = table.column_mut(2).unwrap();
third_column.set_constraint(Absolute(Fixed(7)));

println!("{table}");
let expected = "
+----------------+--------+-------+
| Header1 | Header | Head |
| | 2 | |
+=================================+
| This is a very | This | smol |
| long line with | is ano | |
| a lot of text | therve | |
| | rylon… | |
|----------------+--------+-------|
| Now let's | This | smol |
| add a really | is ano | |
| long line in | ther | |
| the middle of… | text | |
+----------------+--------+-------+";
println!("{expected}");
assert_table_line_width(&table, 35);
assert_eq!(expected, "\n".to_string() + &table.to_string());
}

/// This table checks the scenario, where a column has a big max_width, but a lot of the assigned
/// space doesn't get used after splitting the lines. This happens mostly when there are
/// many long words in a single column.
Expand Down
1 change: 1 addition & 0 deletions tests/all/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ mod property_test;
mod simple_test;
#[cfg(feature = "tty")]
mod styling_test;
mod truncation;
mod utf_8_characters;

pub fn assert_table_line_width(table: &Table, count: usize) {
Expand Down
Loading

0 comments on commit 61dcad0

Please sign in to comment.