diff --git a/src/shape.rs b/src/shape.rs index 71c6d35..339a14a 100644 --- a/src/shape.rs +++ b/src/shape.rs @@ -826,19 +826,49 @@ impl ShapeSpan { let mut start_word = 0; for (end_lb, _) in unicode_linebreak::linebreaks(span) { - // The unicode-linebreak crate treats the pipe character '|' as a break opportunity (BA/AL class). - // This causes ShapeSpan::build to split text like '|>' into separate ShapeWords. - // When these words are shaped independently, the font shaping engine cannot form ligatures that cross the word boundary. - // We manually check for known ligature sequences during segmentation and skip the break opportunity - // to ensure they remain in the same shaping run. + // Check if this break opportunity splits a likely ligature (e.g. "|>" or "!=") if end_lb > 0 && end_lb < span.len() { - let b = span.as_bytes(); - match (b[end_lb - 1], b[end_lb]) { - (b'|', b'>') | // |> - (b'!', b'=') | // != - (b'+', b'+') // ++ - => continue, - _ => {} + let start_idx = span_range.start; + let pre_char = span[..end_lb].chars().last(); + let post_char = span[end_lb..].chars().next(); + + if let (Some(c1), Some(c2)) = (pre_char, post_char) { + // Only probe if both are punctuation (optimization for coding ligatures) + if c1.is_ascii_punctuation() && c2.is_ascii_punctuation() { + let probe_text = format!("{}{}", c1, c2); + let attrs = attrs_list.get_span(start_idx + end_lb); + let fonts = font_system.get_font_matches(&attrs); + let default_families = [&attrs.family]; + + let mut font_iter = FontFallbackIter::new( + font_system, + &fonts, + &default_families, + &[], + &probe_text, + attrs.weight, + ); + + if let Some(font) = font_iter.next() { + let mut glyphs = Vec::new(); + let scratch = font_iter.shape_caches(); + shape_fallback( + scratch, + &mut glyphs, + &font, + &probe_text, + attrs_list, + 0, + probe_text.len(), + false, + ); + + // If we get fewer glyphs than characters, it's likely a ligature. + if glyphs.len() == 1 { + continue; + } + } + } } } diff --git a/tests/shaping_and_rendering.rs b/tests/shaping_and_rendering.rs index 746aaf9..1166b6e 100644 --- a/tests/shaping_and_rendering.rs +++ b/tests/shaping_and_rendering.rs @@ -78,7 +78,8 @@ fn test_english_mixed_with_arabic_paragraph_rendering() { fn test_ligature_segmentation() { use cosmic_text::{Buffer, FontSystem, Metrics, Shaping}; - let mut font_system = FontSystem::new_with_locale_and_db("en-US".into(), fontdb::Database::new()); + let mut font_system = + FontSystem::new_with_locale_and_db("en-US".into(), fontdb::Database::new()); let font = std::fs::read("fonts/Inter-Regular.ttf").unwrap(); font_system.db_mut().load_font_data(font); let metrics = Metrics::new(14.0, 20.0); @@ -93,25 +94,38 @@ fn test_ligature_segmentation() { let shape = line.shape_opt().expect("ShapeLine not found"); let span = &shape.spans[0]; - // The pipe character | is typically a line break opportunity. - // This test ensures that our patch prevents splitting |> into separate words, - // which would break ligature formation in fonts that support it. + // Inter-Regular does NOT have a ligature for |>, so we expect it to be split. + // This confirms that we didn't break valid wrapping for non-ligatures. assert_eq!( span.words.len(), - 1, - "Expected '|>' to be a single word (preserved for ligature), but found {} words.", + 2, + "Expected '|>' to be 2 words (no ligature in Inter), but found {} words.", span.words.len() ); + // Test -> (Arrow), which is a common ligature. + buffer.set_text("->", &Attrs::new(), Shaping::Advanced, None); + buffer.shape_until_scroll(false); + let line = &buffer.lines[0]; + let shape = line.shape_opt().expect("ShapeLine not found"); + + assert_eq!( + shape.spans[0].words.len(), + 1, + "Expected '->' to be a single word (ligature), but found {} words.", + shape.spans[0].words.len() + ); + // Test != buffer.set_text("!=", &Attrs::new(), Shaping::Advanced, None); buffer.shape_until_scroll(false); let line = &buffer.lines[0]; let shape = line.shape_opt().expect("ShapeLine not found"); + // Inter-Regular does not have a != ligature. assert_eq!( shape.spans[0].words.len(), - 1, - "Expected '!=' to be a single word, but found {} words.", + 2, + "Expected '!=' to be 2 words (no ligature), but found {} words.", shape.spans[0].words.len() ); @@ -120,10 +134,11 @@ fn test_ligature_segmentation() { buffer.shape_until_scroll(false); let line = &buffer.lines[0]; let shape = line.shape_opt().expect("ShapeLine not found"); + // Inter does not have a ++ ligature. assert_eq!( shape.spans[0].words.len(), - 1, - "Expected '++' to be a single word, but found {} words.", + 2, + "Expected '++' to be 2 words, but found {} words.", shape.spans[0].words.len() ); -} \ No newline at end of file +}