From 63072bbe29a1657d82cd3deb5db45070404ec7a1 Mon Sep 17 00:00:00 2001 From: leyoda Date: Thu, 23 Apr 2026 23:20:40 +0200 Subject: [PATCH] yoda: snap monospace cell width via Unicode East Asian Width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the previous heuristic (font_monospace_em_width.is_none() ⇒ 2 cells) which was reviewed as unsound: Arabic, dingbats, math symbols and other narrow scripts pulled from non-monospace fallback fonts would all have been forced to 2 cells. It also didn't handle ZWJ emoji clusters or ambiguous-width chars correctly. Proper fix, computed at shape time when `line: &str` is in scope: - new ShapeGlyph.terminal_cells: u8 (0, 1 or 2) - populated via unicode-width crate applied to the cluster text line[start..end] (harfrust path, uses UnicodeWidthStr) or to the single codepoint (no-font fallback path, UnicodeWidthChar) - layout_to_buffer consumes it when match_mono_width is Some: x_advance = cells * mono_width instead of the previous round(x_advance / mono_width) * mono_width which produced variable cell counts for fallback glyphs. Covers: - ASCII + Latin → width 1 (unchanged visual) - CJK + fullwidth → width 2 ✓ - Emoji (incl. ZWJ) → width 2 ✓ (cluster text handles the ZWJ case) - Arabic / Hebrew → width 1 ✓ (was wrongly snapped to 2 before) - Combining marks → width 0 ✓ (zero-advance, matches terminals) - Variation selectors → width 0 ✓ Limitations: ambiguous-width chars (EAW=A) resolve to 1 via unicode-width default; a 'cjk' ambiguous mode (unicode-width::UnicodeWidthChar::width_cjk) could be exposed later as a Buffer flag if needed — not needed for typical terminal use, matching most wcwidth implementations. Based on review feedback from lionel@wopr.io on the initial heuristic patch. --- Cargo.toml | 2 ++ src/shape.rs | 49 ++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 67e9485..227b603 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,8 @@ sys-locale = { version = "0.3.2", optional = true } unicode-linebreak = "0.1.5" unicode-script = "0.5.8" unicode-segmentation = "1.12.0" +# Yoda: EAW-aware cell width for monospace/terminal rendering. +unicode-width = "0.2" [dependencies.swash] version = "0.2.6" diff --git a/src/shape.rs b/src/shape.rs index 9153a62..5e76c73 100644 --- a/src/shape.rs +++ b/src/shape.rs @@ -237,6 +237,8 @@ fn shape_fallback( metadata: attrs.metadata, cache_key_flags: override_fake_italic(attrs.cache_key_flags, font, &attrs), metrics_opt: attrs.metrics_opt.map(Into::into), + // Set by the post-loop pass below once `end` is finalized. + terminal_cells: 1, }); } @@ -265,6 +267,19 @@ fn shape_fallback( } } + // Yoda: compute EAW terminal width per glyph now that `end` is finalized. + // We use the cluster's text (byte range into `line`) rather than just the + // first char so ZWJ emoji sequences collapse to a single cluster width. + // `unicode-width` returns 0 for combining marks / variation selectors, 2 + // for CJK & most emoji, 1 for everything else including Arabic. + { + use unicode_width::UnicodeWidthStr; + for glyph in &mut glyphs[glyph_start..] { + let cluster = line.get(glyph.start..glyph.end).unwrap_or(""); + glyph.terminal_cells = UnicodeWidthStr::width(cluster).min(2) as u8; + } + } + // Restore the buffer to save an allocation. scratch.harfrust_buffer = Some(glyph_buffer.clear()); @@ -590,6 +605,12 @@ fn shape_skip_glyphs( .letter_spacing_opt .map_or(0.0, |spacing| spacing.0); let attrs = attrs_list.get_span(start_run + chr_idx); + // Yoda: EAW width for terminal-aware monospace snap (see + // ShapeGlyph.terminal_cells doc). + let terminal_cells = { + use unicode_width::UnicodeWidthChar; + UnicodeWidthChar::width(codepoint).unwrap_or(1).min(2) as u8 + }; ShapeGlyph { start: chr_idx + start_run, @@ -608,6 +629,7 @@ fn shape_skip_glyphs( metadata: attrs.metadata, cache_key_flags: override_fake_italic(attrs.cache_key_flags, font, &attrs), metrics_opt: attrs.metrics_opt.map(Into::into), + terminal_cells, } }), ); @@ -644,6 +666,14 @@ pub struct ShapeGlyph { pub metadata: usize, pub cache_key_flags: CacheKeyFlags, pub metrics_opt: Option, + /// Yoda: Unicode East Asian Width of the source text covered by this glyph, + /// in terminal cells (0, 1 or 2 per `unicode_width::UnicodeWidthStr::width`). + /// Populated at shape time from `line[start..end]`. Consumed by + /// `layout_to_buffer` when `match_mono_width` is Some — wide chars (emoji, + /// CJK, fullwidth forms) snap to 2 cells instead of rounding based on the + /// fallback font's natural glyph advance, which was the upstream bug + /// breaking terminal column alignment. + pub terminal_cells: u8, } impl ShapeGlyph { @@ -2904,11 +2934,20 @@ impl ShapeLine { }, ); if let Some(mono_width) = match_mono_width { - // Round to nearest multiple of the monospace cell width - let cells = (x_advance / mono_width).round(); - // Ensure visible glyphs occupy at least one cell - let cells = if x_advance > 0.0 { cells.max(1.0) } else { cells }; - x_advance = cells * mono_width; + // Yoda: use Unicode East Asian Width, stored at + // shape time, as the authoritative cell count. + // `terminal_cells` is 0 for combining marks / + // variation selectors (they stay 0-advance), + // 2 for CJK & emoji clusters, 1 for everything + // else (including Arabic, Latin, etc.). + // + // This replaces the previous round-to-nearest + // logic which produced variable cell widths + // for fallback glyphs because it depended on + // the glyph's natural advance in the fallback + // font rather than on the source character's + // terminal width spec. + x_advance = f32::from(glyph.terminal_cells) * mono_width; } if hinting == Hinting::Enabled { x_advance = x_advance.round();