From 8c8c41b05b7357c34b5d635e8e52159ba512a8c1 Mon Sep 17 00:00:00 2001 From: Adam Kowalski Date: Thu, 8 Jan 2026 21:13:59 -0800 Subject: [PATCH] fix: prevent line break splitting |> ligature sequence The unicode-linebreak crate treats the pipe character '|' as a break opportunity (BA/AL class). This causes ShapeSpan::build to split text like '|>' into separate ShapeWords. When these words are shaped independently, the font shaping engine cannot form ligatures that cross the word boundary. This patch manually checks for the '|>' sequence during segmentation and skips the break opportunity, ensuring they remain in the same shaping run. Added a unit test 'ligature_segmentation' to verify that '|>' remains a single word. --- src/shape.rs | 8 ++++++++ tests/ligature_segmentation.rs | 30 ++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 tests/ligature_segmentation.rs diff --git a/src/shape.rs b/src/shape.rs index 523ec70..491c69b 100644 --- a/src/shape.rs +++ b/src/shape.rs @@ -826,6 +826,14 @@ impl ShapeSpan { let mut start_word = 0; for (end_lb, _) in unicode_linebreak::linebreaks(span) { + // Workaround for broken |> ligature in code fonts + if end_lb > 0 && end_lb < span.len() { + let b = span.as_bytes(); + if b[end_lb - 1] == b'|' && b[end_lb] == b'>' { + continue; + } + } + let mut start_lb = end_lb; for (i, c) in span[start_word..end_lb].char_indices().rev() { // TODO: Not all whitespace characters are linebreakable, e.g. 00A0 (No-break diff --git a/tests/ligature_segmentation.rs b/tests/ligature_segmentation.rs new file mode 100644 index 0000000..c1936ac --- /dev/null +++ b/tests/ligature_segmentation.rs @@ -0,0 +1,30 @@ +use cosmic_text::{Attrs, Buffer, FontSystem, Metrics, Shaping}; + +#[test] +fn ligature_segmentation() { + let mut font_system = + FontSystem::new_with_locale_and_db("en-US".into(), fontdb::Database::new()); + let font = std::fs::read("fonts/Inter-Regular.ttf").unwrap(); + font_system.db_mut().load_font_data(font); + let metrics = Metrics::new(14.0, 20.0); + + let mut buffer = Buffer::new(&mut font_system, metrics); + let mut buffer = buffer.borrow_with(&mut font_system); + + buffer.set_text("|>", &Attrs::new(), Shaping::Advanced, None); + buffer.shape_until_scroll(false); + + let line = &buffer.lines[0]; + let shape = line.shape_opt().expect("ShapeLine not found"); + let span = &shape.spans[0]; + + // The pipe character | is typically a line break opportunity. + // This test ensures that our patch prevents splitting |> into separate words, + // which would break ligature formation in fonts that support it. + assert_eq!( + span.words.len(), + 1, + "Expected '|>' to be a single word (preserved for ligature), but found {} words.", + span.words.len() + ); +}