fix: prevent line break splitting |> ligature sequence

The unicode-linebreak crate treats the pipe character '|' as a break opportunity (BA/AL class). This causes ShapeSpan::build to split text like '|>' into separate ShapeWords. When these words are shaped independently, the font shaping engine cannot form ligatures that cross the word boundary. This patch manually checks for the '|>' sequence during segmentation and skips the break opportunity, ensuring they remain in the same shaping run. Added a unit test 'ligature_segmentation' to verify that '|>' remains a single word.
2026-01-08 21:13:59 -08:00 · 2026-01-08 21:13:59 -08:00 · 8c8c41b05b
commit 8c8c41b05b
parent ee702e5090
2 changed files with 38 additions and 0 deletions
--- a/src/shape.rs
+++ b/src/shape.rs
@ -826,6 +826,14 @@ impl ShapeSpan {

        let mut start_word = 0;
        for (end_lb, _) in unicode_linebreak::linebreaks(span) {
+            // Workaround for broken |> ligature in code fonts
+            if end_lb > 0 && end_lb < span.len() {
+                let b = span.as_bytes();
+                if b[end_lb - 1] == b'|' && b[end_lb] == b'>' {
+                    continue;
+                }
+            }
+
            let mut start_lb = end_lb;
            for (i, c) in span[start_word..end_lb].char_indices().rev() {
                // TODO: Not all whitespace characters are linebreakable, e.g. 00A0 (No-break
--- a/tests/ligature_segmentation.rs
+++ b/tests/ligature_segmentation.rs
@ -0,0 +1,30 @@
+use cosmic_text::{Attrs, Buffer, FontSystem, Metrics, Shaping};
+
+#[test]
+fn ligature_segmentation() {
+    let mut font_system =
+        FontSystem::new_with_locale_and_db("en-US".into(), fontdb::Database::new());
+    let font = std::fs::read("fonts/Inter-Regular.ttf").unwrap();
+    font_system.db_mut().load_font_data(font);
+    let metrics = Metrics::new(14.0, 20.0);
+
+    let mut buffer = Buffer::new(&mut font_system, metrics);
+    let mut buffer = buffer.borrow_with(&mut font_system);
+
+    buffer.set_text("|>", &Attrs::new(), Shaping::Advanced, None);
+    buffer.shape_until_scroll(false);
+
+    let line = &buffer.lines[0];
+    let shape = line.shape_opt().expect("ShapeLine not found");
+    let span = &shape.spans[0];
+    
+    // The pipe character | is typically a line break opportunity.
+    // This test ensures that our patch prevents splitting |> into separate words,
+    // which would break ligature formation in fonts that support it.
+    assert_eq!(
+        span.words.len(),
+        1,
+        "Expected '|>' to be a single word (preserved for ligature), but found {} words.",
+        span.words.len()
+    );
+}