fix: use dynamic font probing to preserve ligatures across break opportunities

This commit is contained in:
Adam Kowalski 2026-01-16 21:21:00 -08:00 committed by Jeremy Soller
parent 5d1db4992a
commit c6ce5e69e5
2 changed files with 68 additions and 23 deletions

View file

@ -826,19 +826,49 @@ impl ShapeSpan {
let mut start_word = 0;
for (end_lb, _) in unicode_linebreak::linebreaks(span) {
// The unicode-linebreak crate treats the pipe character '|' as a break opportunity (BA/AL class).
// This causes ShapeSpan::build to split text like '|>' into separate ShapeWords.
// When these words are shaped independently, the font shaping engine cannot form ligatures that cross the word boundary.
// We manually check for known ligature sequences during segmentation and skip the break opportunity
// to ensure they remain in the same shaping run.
// Check if this break opportunity splits a likely ligature (e.g. "|>" or "!=")
if end_lb > 0 && end_lb < span.len() {
let b = span.as_bytes();
match (b[end_lb - 1], b[end_lb]) {
(b'|', b'>') | // |>
(b'!', b'=') | // !=
(b'+', b'+') // ++
=> continue,
_ => {}
let start_idx = span_range.start;
let pre_char = span[..end_lb].chars().last();
let post_char = span[end_lb..].chars().next();
if let (Some(c1), Some(c2)) = (pre_char, post_char) {
// Only probe if both are punctuation (optimization for coding ligatures)
if c1.is_ascii_punctuation() && c2.is_ascii_punctuation() {
let probe_text = format!("{}{}", c1, c2);
let attrs = attrs_list.get_span(start_idx + end_lb);
let fonts = font_system.get_font_matches(&attrs);
let default_families = [&attrs.family];
let mut font_iter = FontFallbackIter::new(
font_system,
&fonts,
&default_families,
&[],
&probe_text,
attrs.weight,
);
if let Some(font) = font_iter.next() {
let mut glyphs = Vec::new();
let scratch = font_iter.shape_caches();
shape_fallback(
scratch,
&mut glyphs,
&font,
&probe_text,
attrs_list,
0,
probe_text.len(),
false,
);
// If we get fewer glyphs than characters, it's likely a ligature.
if glyphs.len() == 1 {
continue;
}
}
}
}
}

View file

@ -78,7 +78,8 @@ fn test_english_mixed_with_arabic_paragraph_rendering() {
fn test_ligature_segmentation() {
use cosmic_text::{Buffer, FontSystem, Metrics, Shaping};
let mut font_system = FontSystem::new_with_locale_and_db("en-US".into(), fontdb::Database::new());
let mut font_system =
FontSystem::new_with_locale_and_db("en-US".into(), fontdb::Database::new());
let font = std::fs::read("fonts/Inter-Regular.ttf").unwrap();
font_system.db_mut().load_font_data(font);
let metrics = Metrics::new(14.0, 20.0);
@ -93,25 +94,38 @@ fn test_ligature_segmentation() {
let shape = line.shape_opt().expect("ShapeLine not found");
let span = &shape.spans[0];
// The pipe character | is typically a line break opportunity.
// This test ensures that our patch prevents splitting |> into separate words,
// which would break ligature formation in fonts that support it.
// Inter-Regular does NOT have a ligature for |>, so we expect it to be split.
// This confirms that we didn't break valid wrapping for non-ligatures.
assert_eq!(
span.words.len(),
1,
"Expected '|>' to be a single word (preserved for ligature), but found {} words.",
2,
"Expected '|>' to be 2 words (no ligature in Inter), but found {} words.",
span.words.len()
);
// Test -> (Arrow), which is a common ligature.
buffer.set_text("->", &Attrs::new(), Shaping::Advanced, None);
buffer.shape_until_scroll(false);
let line = &buffer.lines[0];
let shape = line.shape_opt().expect("ShapeLine not found");
assert_eq!(
shape.spans[0].words.len(),
1,
"Expected '->' to be a single word (ligature), but found {} words.",
shape.spans[0].words.len()
);
// Test !=
buffer.set_text("!=", &Attrs::new(), Shaping::Advanced, None);
buffer.shape_until_scroll(false);
let line = &buffer.lines[0];
let shape = line.shape_opt().expect("ShapeLine not found");
// Inter-Regular does not have a != ligature.
assert_eq!(
shape.spans[0].words.len(),
1,
"Expected '!=' to be a single word, but found {} words.",
2,
"Expected '!=' to be 2 words (no ligature), but found {} words.",
shape.spans[0].words.len()
);
@ -120,10 +134,11 @@ fn test_ligature_segmentation() {
buffer.shape_until_scroll(false);
let line = &buffer.lines[0];
let shape = line.shape_opt().expect("ShapeLine not found");
// Inter does not have a ++ ligature.
assert_eq!(
shape.spans[0].words.len(),
1,
"Expected '++' to be a single word, but found {} words.",
2,
"Expected '++' to be 2 words, but found {} words.",
shape.spans[0].words.len()
);
}
}