yoda: snap monospace cell width via Unicode East Asian Width

Replaces the previous heuristic (font_monospace_em_width.is_none() ⇒ 2
cells) which was reviewed as unsound: Arabic, dingbats, math symbols and
other narrow scripts pulled from non-monospace fallback fonts would all
have been forced to 2 cells. It also didn't handle ZWJ emoji clusters
or ambiguous-width chars correctly.

Proper fix, computed at shape time when `line: &str` is in scope:
- new ShapeGlyph.terminal_cells: u8 (0, 1 or 2)
- populated via unicode-width crate applied to the cluster text
  line[start..end] (harfrust path, uses UnicodeWidthStr) or to the
  single codepoint (no-font fallback path, UnicodeWidthChar)
- layout_to_buffer consumes it when match_mono_width is Some:
      x_advance = cells * mono_width
  instead of the previous round(x_advance / mono_width) * mono_width
  which produced variable cell counts for fallback glyphs.

Covers:
- ASCII + Latin        → width 1  (unchanged visual)
- CJK + fullwidth      → width 2  ✓
- Emoji (incl. ZWJ)    → width 2  ✓  (cluster text handles the ZWJ case)
- Arabic / Hebrew      → width 1  ✓  (was wrongly snapped to 2 before)
- Combining marks      → width 0  ✓  (zero-advance, matches terminals)
- Variation selectors  → width 0  ✓

Limitations: ambiguous-width chars (EAW=A) resolve to 1 via unicode-width
default; a 'cjk' ambiguous mode (unicode-width::UnicodeWidthChar::width_cjk)
could be exposed later as a Buffer flag if needed — not needed for typical
terminal use, matching most wcwidth implementations.

Based on review feedback from lionel@wopr.io on the initial heuristic patch.
This commit is contained in:
Lionel DARNIS 2026-04-23 23:20:40 +02:00
parent 0e15681adb
commit 63072bbe29
2 changed files with 46 additions and 5 deletions

View file

@ -30,6 +30,8 @@ sys-locale = { version = "0.3.2", optional = true }
unicode-linebreak = "0.1.5"
unicode-script = "0.5.8"
unicode-segmentation = "1.12.0"
# Yoda: EAW-aware cell width for monospace/terminal rendering.
unicode-width = "0.2"
[dependencies.swash]
version = "0.2.6"

View file

@ -237,6 +237,8 @@ fn shape_fallback(
metadata: attrs.metadata,
cache_key_flags: override_fake_italic(attrs.cache_key_flags, font, &attrs),
metrics_opt: attrs.metrics_opt.map(Into::into),
// Set by the post-loop pass below once `end` is finalized.
terminal_cells: 1,
});
}
@ -265,6 +267,19 @@ fn shape_fallback(
}
}
// Yoda: compute EAW terminal width per glyph now that `end` is finalized.
// We use the cluster's text (byte range into `line`) rather than just the
// first char so ZWJ emoji sequences collapse to a single cluster width.
// `unicode-width` returns 0 for combining marks / variation selectors, 2
// for CJK & most emoji, 1 for everything else including Arabic.
{
use unicode_width::UnicodeWidthStr;
for glyph in &mut glyphs[glyph_start..] {
let cluster = line.get(glyph.start..glyph.end).unwrap_or("");
glyph.terminal_cells = UnicodeWidthStr::width(cluster).min(2) as u8;
}
}
// Restore the buffer to save an allocation.
scratch.harfrust_buffer = Some(glyph_buffer.clear());
@ -590,6 +605,12 @@ fn shape_skip_glyphs(
.letter_spacing_opt
.map_or(0.0, |spacing| spacing.0);
let attrs = attrs_list.get_span(start_run + chr_idx);
// Yoda: EAW width for terminal-aware monospace snap (see
// ShapeGlyph.terminal_cells doc).
let terminal_cells = {
use unicode_width::UnicodeWidthChar;
UnicodeWidthChar::width(codepoint).unwrap_or(1).min(2) as u8
};
ShapeGlyph {
start: chr_idx + start_run,
@ -608,6 +629,7 @@ fn shape_skip_glyphs(
metadata: attrs.metadata,
cache_key_flags: override_fake_italic(attrs.cache_key_flags, font, &attrs),
metrics_opt: attrs.metrics_opt.map(Into::into),
terminal_cells,
}
}),
);
@ -644,6 +666,14 @@ pub struct ShapeGlyph {
pub metadata: usize,
pub cache_key_flags: CacheKeyFlags,
pub metrics_opt: Option<Metrics>,
/// Yoda: Unicode East Asian Width of the source text covered by this glyph,
/// in terminal cells (0, 1 or 2 per `unicode_width::UnicodeWidthStr::width`).
/// Populated at shape time from `line[start..end]`. Consumed by
/// `layout_to_buffer` when `match_mono_width` is Some — wide chars (emoji,
/// CJK, fullwidth forms) snap to 2 cells instead of rounding based on the
/// fallback font's natural glyph advance, which was the upstream bug
/// breaking terminal column alignment.
pub terminal_cells: u8,
}
impl ShapeGlyph {
@ -2904,11 +2934,20 @@ impl ShapeLine {
},
);
if let Some(mono_width) = match_mono_width {
// Round to nearest multiple of the monospace cell width
let cells = (x_advance / mono_width).round();
// Ensure visible glyphs occupy at least one cell
let cells = if x_advance > 0.0 { cells.max(1.0) } else { cells };
x_advance = cells * mono_width;
// Yoda: use Unicode East Asian Width, stored at
// shape time, as the authoritative cell count.
// `terminal_cells` is 0 for combining marks /
// variation selectors (they stay 0-advance),
// 2 for CJK & emoji clusters, 1 for everything
// else (including Arabic, Latin, etc.).
//
// This replaces the previous round-to-nearest
// logic which produced variable cell widths
// for fallback glyphs because it depended on
// the glyph's natural advance in the fallback
// font rather than on the source character's
// terminal width spec.
x_advance = f32::from(glyph.terminal_cells) * mono_width;
}
if hinting == Hinting::Enabled {
x_advance = x_advance.round();