Nukesor · Nukesor · Feb 7, 2025 · Feb 2, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,9 +4,16 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## Unreleased
+## [7.1.4] - 2025-02-07
 
-## Changed
+### Fix
+
+- Handle UTF-8 graphemes when truncating cells. [#167](https://github.com/Nukesor/comfy-table/pull/167)
+- Respect UTF-8 zero-width joiner and variation selection characters when splitting words. [#168](https://github.com/Nukesor/comfy-table/pull/168) by [tisonkun](https://github.com/tisonkun)
+
+### Change
+
+- Remove strum dependency. [#169](https://github.com/Nukesor/comfy-table/pull/169) by [tisonkun](https://github.com/tisonkun)
 
 ## Chore
 

diff --git a/Cargo.toml b/Cargo.toml
@@ -53,8 +53,8 @@ debug = []
 integration_test = []
 
 [dependencies]
-unicode-segmentation = { version = "1" }
-unicode-width = { version = "0.2" }
+unicode-segmentation = "1"
+unicode-width = "0.2"
 
 # Optional dependencies
 ansi-str = { version = "0.9", optional = true }

diff --git a/benches/build_large_table.rs b/benches/build_large_table.rs
@@ -17,7 +17,6 @@ fn build_huge_table() {
         .set_header(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
 
     let mut rng = rand::rng();
-    // Create a 10x10 grid
     for _ in 0..500 {
         let mut row = Vec::new();
         for _ in 0..10 {

diff --git a/src/utils/formatting/content_format.rs b/src/utils/formatting/content_format.rs
@@ -1,5 +1,6 @@
 #[cfg(feature = "tty")]
 use crossterm::style::{style, Stylize};
+use unicode_segmentation::UnicodeSegmentation;
 use unicode_width::UnicodeWidthStr;
 
 use super::content_split::measure_text_width;
@@ -102,39 +103,115 @@ pub fn format_row(
 
         // Remove all unneeded lines of this cell, if the row's height is capped to a certain
         // amount of lines and there're too many lines in this cell.
-        // This then inserts a '...' string at the end to indicate that the cell has been truncated.
+        // This then truncates and inserts a '...' string at the end of the last line to indicate
+        // that the cell has been truncated.
         if let Some(lines) = row.max_height {
             if cell_lines.len() > lines {
+                // We already have to many lines. Cut off the surplus lines.
                 let _ = cell_lines.split_off(lines);
-                // Direct access.
+
+                // Directly access the last line.
                 let last_line = cell_lines
                     .get_mut(lines - 1)
                     .expect("We know it's this long.");
 
-                // Truncate any ansi codes, as the following cutoff might break an ansi code
-                // otherwise. This could be handled smarter, but works for now.
+                // Truncate any ansi codes, as the following cutoff might break ansi code
+                // otherwise anyway. This could be handled smarter, but it's simple and just works.
                 #[cfg(feature = "custom_styling")]
                 {
                     let stripped = console::strip_ansi_codes(last_line).to_string();
                     *last_line = stripped;
                 }
 
-                // Only show the `...` indicator if the column is smaller then 6 characters.
-                // Otherwise it feels like it doesn't make a lot of sense to show it, as it
-                // might cover up too much important content on such a small column.
-                //
-                // That's questionable though, should we really keep that limitation as users
-                // won't have an indicator that truncation is taking place?
-                let width: usize = info.content_width.into();
-                if width >= 6 {
-                    let indicator_width = table.truncation_indicator.width();
-                    // Truncate the line if indicator doesn't fit
-                    if last_line.width() >= width - indicator_width {
-                        let surplus = (last_line.width() + indicator_width) - width;
-                        last_line.truncate(last_line.width() - surplus);
+                let max_width: usize = info.content_width.into();
+                let indicator_width = table.truncation_indicator.width();
+
+                let mut truncate_at = 0;
+                // Start the accumulated_width with the indicator_width, which is the minimum width
+                // we may show anyway.
+                let mut accumulated_width = indicator_width;
+                let mut full_string_fits = false;
+
+                // Leave these print statements in here in case we ever have to debug this annoying
+                // stuff again.
+                //println!("\nSTART:");
+                //println!("\nMax width: {max_width}, Indicator width: {indicator_width}");
+                //println!("Full line hex: {last_line}");
+                //println!(
+                //    "Full line hex: {}",
+                //    last_line
+                //        .as_bytes()
+                //        .iter()
+                //        .map(|byte| format!("{byte:02x}"))
+                //        .collect::<Vec<String>>()
+                //        .join(", ")
+                //);
+
+                // Iterate through the UTF-8 graphemes.
+                // Check the `split_long_word` inline function docs to see why we're using
+                // graphemes.
+                // **Note:** The `index` here is the **byte** index. So we cannot just
+                //    String::truncate afterwards. We have to convert to a byte vector to perform
+                //    the truncation first.
+                let mut grapheme_iter = last_line.grapheme_indices(true).peekable();
+                while let Some((index, grapheme)) = grapheme_iter.next() {
+                    // Leave these print statements in here in case we ever have to debug this
+                    // annoying stuff again
+                    //println!(
+                    //    "Current index: {index}, Next grapheme: {grapheme} (width: {})",
+                    //    grapheme.width()
+                    //);
+                    //println!(
+                    //    "Next grapheme hex: {}",
+                    //    grapheme
+                    //        .as_bytes()
+                    //        .iter()
+                    //        .map(|byte| format!("{byte:02x}"))
+                    //        .collect::<Vec<String>>()
+                    //        .join(", ")
+                    //);
+
+                    // Immediately save where to truncate in case this grapheme doesn't fit.
+                    // The index is just before the current grapheme actually starts.
+                    truncate_at = index;
+                    // Check if the next grapheme would break the boundary of the allowed line
+                    // length.
+                    let new_width = accumulated_width + grapheme.width();
+                    //println!(
+                    //    "Next width: {new_width}/{max_width} ({accumulated_width} + {})",
+                    //    grapheme.width()
+                    //);
+                    if new_width > max_width {
+                        //println!(
+                        //    "Breaking: {:?}",
+                        //    accumulated_width + grapheme.width() > max_width
+                        //);
+                        break;
+                    }
+
+                    // The grapheme seems to fit. Save the index and check the next one.
+                    accumulated_width += grapheme.width();
+
+                    // This is a special case.
+                    // We reached the last char, meaning that full last line + the indicator fit.
+                    if grapheme_iter.peek().is_none() {
+                        full_string_fits = true
                     }
-                    last_line.push_str(&table.truncation_indicator);
                 }
+
+                // Only do any truncation logic if the line doesn't fit.
+                if !full_string_fits {
+                    // Truncate the string at the byte index just behind the last valid grapheme
+                    // and overwrite the last line with the new truncated string.
+                    let mut last_line_bytes = last_line.clone().into_bytes();
+                    last_line_bytes.truncate(truncate_at);
+                    let new_last_line = String::from_utf8(last_line_bytes)
+                        .expect("We cut at an exact char boundary");
+                    *last_line = new_last_line;
+                }
+
+                // Push the truncation indicator.
+                last_line.push_str(&table.truncation_indicator);
             }
         }
 

diff --git a/tests/all/content_arrangement_test.rs b/tests/all/content_arrangement_test.rs
@@ -2,9 +2,7 @@ use comfy_table::ColumnConstraint;
 use comfy_table::Width;
 use pretty_assertions::assert_eq;
 
-use comfy_table::ColumnConstraint::*;
-use comfy_table::Width::*;
-use comfy_table::{ContentArrangement, Row, Table};
+use comfy_table::{ContentArrangement, Table};
 
 use super::assert_table_line_width;
 
@@ -68,117 +66,6 @@ fn simple_dynamic_table() {
     assert_eq!(expected, "\n".to_string() + &table.to_string());
 }
 
-/// Individual rows can be configured to have a max height.
-/// Everything beyond that line height should be truncated.
-#[test]
-fn table_with_truncate() {
-    let mut table = Table::new();
-    let mut first_row: Row = Row::from(vec![
-        "This is a very long line with a lot of text",
-        "This is anotherverylongtextwithlongwords text",
-        "smol",
-    ]);
-    first_row.max_height(4);
-
-    let mut second_row = Row::from(vec![
-        "Now let's\nadd a really long line in the middle of the cell \n and add more multi line stuff",
-        "This is another text",
-        "smol",
-    ]);
-    second_row.max_height(4);
-
-    table
-        .set_header(vec!["Header1", "Header2", "Head"])
-        .set_content_arrangement(ContentArrangement::Dynamic)
-        .set_width(35)
-        .add_row(first_row)
-        .add_row(second_row);
-
-    // The first column will be wider than 6 chars.
-    // The second column's content is wider than 6 chars. There should be a '...'.
-    let second_column = table.column_mut(1).unwrap();
-    second_column.set_constraint(Absolute(Fixed(8)));
-
-    // The third column's content is less than 6 chars width. There shouldn't be a '...'.
-    let third_column = table.column_mut(2).unwrap();
-    third_column.set_constraint(Absolute(Fixed(7)));
-
-    println!("{table}");
-    let expected = "
-+----------------+--------+-------+
-| Header1        | Header | Head  |
-|                | 2      |       |
-+=================================+
-| This is a very | This   | smol  |
-| long line with | is ano |       |
-| a lot of text  | therve |       |
-|                | ryl... |       |
-|----------------+--------+-------|
-| Now let's      | This   | smol  |
-| add a really   | is ano |       |
-| long line in   | ther   |       |
-| the middle ... | text   |       |
-+----------------+--------+-------+";
-    println!("{expected}");
-    assert_table_line_width(&table, 35);
-    assert_eq!(expected, "\n".to_string() + &table.to_string());
-}
-
-#[test]
-fn table_with_truncate_indicator() {
-    let mut table = Table::new();
-    let mut first_row: Row = Row::from(vec![
-        "This is a very long line with a lot of text",
-        "This is anotherverylongtextwithlongwords text",
-        "smol",
-    ]);
-    first_row.max_height(4);
-
-    let mut second_row = Row::from(vec![
-        "Now let's\nadd a really long line in the middle of the cell \n and add more multi line stuff",
-        "This is another text",
-        "smol",
-    ]);
-    second_row.max_height(4);
-
-    table
-        .set_header(vec!["Header1", "Header2", "Head"])
-        .set_content_arrangement(ContentArrangement::Dynamic)
-        .set_truncation_indicator("…")
-        .set_width(35)
-        .add_row(first_row)
-        .add_row(second_row);
-
-    // The first column will be wider than 6 chars.
-    // The second column's content is wider than 6 chars. There should be a '…'.
-    let second_column = table.column_mut(1).unwrap();
-    second_column.set_constraint(Absolute(Fixed(8)));
-
-    // The third column's content is less than 6 chars width. There shouldn't be a '…'.
-    let third_column = table.column_mut(2).unwrap();
-    third_column.set_constraint(Absolute(Fixed(7)));
-
-    println!("{table}");
-    let expected = "
-+----------------+--------+-------+
-| Header1        | Header | Head  |
-|                | 2      |       |
-+=================================+
-| This is a very | This   | smol  |
-| long line with | is ano |       |
-| a lot of text  | therve |       |
-|                | rylon… |       |
-|----------------+--------+-------|
-| Now let's      | This   | smol  |
-| add a really   | is ano |       |
-| long line in   | ther   |       |
-| the middle of… | text   |       |
-+----------------+--------+-------+";
-    println!("{expected}");
-    assert_table_line_width(&table, 35);
-    assert_eq!(expected, "\n".to_string() + &table.to_string());
-}
-
 /// This table checks the scenario, where a column has a big max_width, but a lot of the assigned
 /// space doesn't get used after splitting the lines. This happens mostly when there are
 /// many long words in a single column.

diff --git a/tests/all/mod.rs b/tests/all/mod.rs
@@ -20,6 +20,7 @@ mod property_test;
 mod simple_test;
 #[cfg(feature = "tty")]
 mod styling_test;
+mod truncation;
 mod utf_8_characters;
 
 pub fn assert_table_line_width(table: &Table, count: usize) {