fix: prevent line break splitting |> ligature sequence
The unicode-linebreak crate treats the pipe character '|' as a break opportunity (BA/AL class). This causes ShapeSpan::build to split text like '|>' into separate ShapeWords. When these words are shaped independently, the font shaping engine cannot form ligatures that cross the word boundary. This patch manually checks for the '|>' sequence during segmentation and skips the break opportunity, ensuring they remain in the same shaping run. Added a unit test 'ligature_segmentation' to verify that '|>' remains a single word.
This commit is contained in:
parent
ee702e5090
commit
8c8c41b05b
2 changed files with 38 additions and 0 deletions
|
|
@ -826,6 +826,14 @@ impl ShapeSpan {
|
|||
|
||||
let mut start_word = 0;
|
||||
for (end_lb, _) in unicode_linebreak::linebreaks(span) {
|
||||
// Workaround for broken |> ligature in code fonts
|
||||
if end_lb > 0 && end_lb < span.len() {
|
||||
let b = span.as_bytes();
|
||||
if b[end_lb - 1] == b'|' && b[end_lb] == b'>' {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let mut start_lb = end_lb;
|
||||
for (i, c) in span[start_word..end_lb].char_indices().rev() {
|
||||
// TODO: Not all whitespace characters are linebreakable, e.g. 00A0 (No-break
|
||||
|
|
|
|||
30
tests/ligature_segmentation.rs
Normal file
30
tests/ligature_segmentation.rs
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
use cosmic_text::{Attrs, Buffer, FontSystem, Metrics, Shaping};
|
||||
|
||||
#[test]
|
||||
fn ligature_segmentation() {
|
||||
let mut font_system =
|
||||
FontSystem::new_with_locale_and_db("en-US".into(), fontdb::Database::new());
|
||||
let font = std::fs::read("fonts/Inter-Regular.ttf").unwrap();
|
||||
font_system.db_mut().load_font_data(font);
|
||||
let metrics = Metrics::new(14.0, 20.0);
|
||||
|
||||
let mut buffer = Buffer::new(&mut font_system, metrics);
|
||||
let mut buffer = buffer.borrow_with(&mut font_system);
|
||||
|
||||
buffer.set_text("|>", &Attrs::new(), Shaping::Advanced, None);
|
||||
buffer.shape_until_scroll(false);
|
||||
|
||||
let line = &buffer.lines[0];
|
||||
let shape = line.shape_opt().expect("ShapeLine not found");
|
||||
let span = &shape.spans[0];
|
||||
|
||||
// The pipe character | is typically a line break opportunity.
|
||||
// This test ensures that our patch prevents splitting |> into separate words,
|
||||
// which would break ligature formation in fonts that support it.
|
||||
assert_eq!(
|
||||
span.words.len(),
|
||||
1,
|
||||
"Expected '|>' to be a single word (preserved for ligature), but found {} words.",
|
||||
span.words.len()
|
||||
);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue