fix: improved dynamic ligature probing to handle contextual alternates

This commit is contained in:
Adam Kowalski 2026-01-16 21:35:53 -08:00 committed by Jeremy Soller
parent c6ce5e69e5
commit 723841f934
2 changed files with 29 additions and 9 deletions

View file

@ -863,10 +863,30 @@ impl ShapeSpan {
false,
);
// If we get fewer glyphs than characters, it's likely a ligature.
if glyphs.len() == 1 {
// 1. If we have fewer glyphs than chars, it's definitely a ligature (e.g. -> becoming 1 arrow).
if glyphs.len() < probe_text.chars().count() {
continue;
}
// 2. If we have the same number of glyphs, they might be contextual alternates (e.g. |> becoming 2 special glyphs).
// Check if the glyphs match the standard "cmap" (character to glyph) mapping.
// If they differ, the shaper substituted them, so we should keep them together.
if glyphs.len() == probe_text.chars().count() {
let charmap = font.as_swash().charmap();
let mut is_modified = false;
for (i, c) in probe_text.chars().enumerate() {
let std_id = charmap.map(c);
if glyphs[i].glyph_id != std_id {
is_modified = true;
break;
}
}
if is_modified {
// Ligature/Contextual Alternate detected!
continue;
}
}
}
}
}

View file

@ -94,12 +94,12 @@ fn test_ligature_segmentation() {
let shape = line.shape_opt().expect("ShapeLine not found");
let span = &shape.spans[0];
// Inter-Regular does NOT have a ligature for |>, so we expect it to be split.
// This confirms that we didn't break valid wrapping for non-ligatures.
// Inter-Regular HAS a contextual alternate for |> (changing the glyph ID),
// so our probe detects it and keeps them together.
assert_eq!(
span.words.len(),
2,
"Expected '|>' to be 2 words (no ligature in Inter), but found {} words.",
1,
"Expected '|>' to be 1 word (contextual alternate in Inter), but found {} words.",
span.words.len()
);
@ -121,11 +121,11 @@ fn test_ligature_segmentation() {
buffer.shape_until_scroll(false);
let line = &buffer.lines[0];
let shape = line.shape_opt().expect("ShapeLine not found");
// Inter-Regular does not have a != ligature.
// Inter has a contextual alternate for != too.
assert_eq!(
shape.spans[0].words.len(),
2,
"Expected '!=' to be 2 words (no ligature), but found {} words.",
1,
"Expected '!=' to be 1 word (contextual alternate), but found {} words.",
shape.spans[0].words.len()
);