From e80dbc3607ac4b2e78886d6f3c862eb30e0a0853 Mon Sep 17 00:00:00 2001 From: romanstingler Date: Mon, 11 Aug 2025 21:15:09 +0200 Subject: [PATCH] Optimize BidiParagraphs with ASCII fast path (#408) * Optimize BidiParagraphs with ASCII fast path - Added fast path for ASCII text that avoids BidiInfo allocation - Added some text shaping benchmarks * refactor: fix clippy warnings and cleanup imports --- Cargo.toml | 4 + benches/text_shaping_benchmarks.rs | 121 +++++++++++++++++++++++++++++ src/bidi_para.rs | 45 +++++++++-- src/buffer.rs | 8 +- src/edit/editor.rs | 19 ++--- src/edit/syntect.rs | 6 +- src/edit/vi.rs | 17 ++-- src/font/fallback/mod.rs | 2 +- src/font/system.rs | 4 +- 9 files changed, 196 insertions(+), 30 deletions(-) create mode 100644 benches/text_shaping_benchmarks.rs diff --git a/Cargo.toml b/Cargo.toml index 2d9b75f..091c306 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,6 +68,10 @@ warn_on_missing_glyphs = [] name = "layout" harness = false +[[bench]] +name = "text_shaping_benchmarks" +harness = false + [workspace] members = ["examples/*"] diff --git a/benches/text_shaping_benchmarks.rs b/benches/text_shaping_benchmarks.rs new file mode 100644 index 0000000..df02d2c --- /dev/null +++ b/benches/text_shaping_benchmarks.rs @@ -0,0 +1,121 @@ +use cosmic_text as ct; +use cosmic_text::BidiParagraphs; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +fn bench_ascii_fast_path(c: &mut Criterion) { + let mut fs = ct::FontSystem::new(); + let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0)); + buffer.set_size(&mut fs, Some(500.0), None); + + let ascii_text = "Pure ASCII text for BidiParagraphs optimization testing.\n".repeat(50); + + c.bench_function("ShapeLine/ASCII Fast Path", |b| { + b.iter(|| { + buffer.set_text( + &mut fs, + black_box(&ascii_text), + &ct::Attrs::new(), + ct::Shaping::Advanced, + ); + buffer.shape_until_scroll(&mut fs, false); + }); + }); +} + +fn bench_bidi_processing(c: &mut Criterion) { + let mut fs = ct::FontSystem::new(); + let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0)); + buffer.set_size(&mut fs, Some(500.0), None); + + let bidi_text = "Mixed English and العربية النص العربي text for BiDi testing.\nThis tests adjust_levels and combined BiDi optimizations.\n".repeat(30); + + c.bench_function("ShapeLine/BiDi Processing", |b| { + b.iter(|| { + buffer.set_text( + &mut fs, + black_box(&bidi_text), + &ct::Attrs::new(), + ct::Shaping::Advanced, + ); + buffer.shape_until_scroll(&mut fs, false); + }); + }); +} + +fn bench_layout_heavy(c: &mut Criterion) { + let mut fs = ct::FontSystem::new(); + let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0)); + buffer.set_size(&mut fs, Some(500.0), None); + + let layout_text = "This is a very long line that will wrap multiple times and stress the reorder optimization through intensive layout processing with comprehensive buffer reuse testing. ".repeat(30); + + c.bench_function("ShapeLine/Layout Heavy", |b| { + b.iter(|| { + buffer.set_text( + &mut fs, + black_box(&layout_text), + &ct::Attrs::new(), + ct::Shaping::Advanced, + ); + buffer.shape_until_scroll(&mut fs, false); + }); + }); +} + +fn bench_combined_stress(c: &mut Criterion) { + let mut fs = ct::FontSystem::new(); + let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0)); + buffer.set_size(&mut fs, Some(500.0), None); + + let stress_text = format!("{}\n{}\n{}\n{}\n", + "ASCII line for BidiParagraphs optimization. ".repeat(15), + "Mixed English + العربية for BiDi optimizations. ".repeat(12), + "Very long wrapping line that will trigger reorder optimizations multiple times through intensive layout processing. ".repeat(8), + "Cache key generation line for ShapeRunKey optimization testing. ".repeat(10) + ).repeat(10); + + c.bench_function("ShapeLine/Combined Stress", |b| { + b.iter(|| { + buffer.set_text( + &mut fs, + black_box(&stress_text), + &ct::Attrs::new(), + ct::Shaping::Advanced, + ); + buffer.shape_until_scroll(&mut fs, false); + }); + }); +} + +fn bench_bidi_paragraphs_ascii(c: &mut Criterion) { + let ascii_text = "Simple ASCII text\nwith multiple lines\n".repeat(50); + + c.bench_function("BidiParagraphs/ASCII", |b| { + b.iter(|| { + let paras = BidiParagraphs::new(black_box(&ascii_text)); + black_box(paras.count()); + }); + }); +} + +fn bench_bidi_paragraphs_mixed(c: &mut Criterion) { + let mixed_text = "Mixed English and العربية text\nwith multiple lines\n".repeat(30); + + c.bench_function("BidiParagraphs/Mixed", |b| { + b.iter(|| { + let paras = BidiParagraphs::new(black_box(&mixed_text)); + black_box(paras.count()); + }); + }); +} + +criterion_group!( + benches, + bench_ascii_fast_path, + bench_bidi_processing, + bench_layout_heavy, + bench_combined_stress, + bench_bidi_paragraphs_ascii, + bench_bidi_paragraphs_mixed +); +criterion_main!(benches); diff --git a/src/bidi_para.rs b/src/bidi_para.rs index 5a7e595..fefdc1c 100644 --- a/src/bidi_para.rs +++ b/src/bidi_para.rs @@ -1,5 +1,6 @@ // SPDX-License-Identifier: MIT OR Apache-2.0 +use alloc::vec::Vec; use unicode_bidi::{bidi_class, BidiClass, BidiInfo, ParagraphInfo}; /// An iterator over the paragraphs in the input text. @@ -11,12 +12,46 @@ pub struct BidiParagraphs<'text> { } impl<'text> BidiParagraphs<'text> { - /// Create an iterator to split the input text into paragraphs - /// in accordance with `unicode-bidi` behaviour. + /// Create an iterator with optimized paragraph detection. + /// This version avoids `BidiInfo` allocation for simple ASCII text. pub fn new(text: &'text str) -> Self { - let info = BidiInfo::new(text, None); - let info = info.paragraphs.into_iter(); - Self { text, info } + // Fast path for simple ASCII text - just split on newlines + if text.is_ascii() + && !text + .chars() + .any(|c| c.is_ascii_control() && c != '\n' && c != '\r' && c != '\t') + { + // For simple ASCII, we can avoid `BidiInfo` entirely + // Create minimal ParagraphInfo entries for each line + let mut paragraphs = Vec::new(); + let mut start = 0; + + for (i, c) in text.char_indices() { + if c == '\n' { + paragraphs.push(ParagraphInfo { + range: start..i, + level: unicode_bidi::Level::ltr(), + }); + start = i + 1; + } + } + + // Add final paragraph if text doesn't end with newline + if start < text.len() { + paragraphs.push(ParagraphInfo { + range: start..text.len(), + level: unicode_bidi::Level::ltr(), + }); + } + + let info = paragraphs.into_iter(); + Self { text, info } + } else { + // Complex text - fall back to full `BidiInfo` analysis + let info = BidiInfo::new(text, None); + let info = info.paragraphs.into_iter(); + Self { text, info } + } } } diff --git a/src/buffer.rs b/src/buffer.rs index 3dda7a4..b13e96d 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -5,10 +5,12 @@ use alloc::{string::String, vec::Vec}; use core::{cmp, fmt}; use unicode_segmentation::UnicodeSegmentation; +#[cfg(feature = "swash")] +use crate::Color; use crate::{ - Affinity, Align, Attrs, AttrsList, BidiParagraphs, BorrowedWithFontSystem, BufferLine, Color, - Cursor, FontSystem, LayoutCursor, LayoutGlyph, LayoutLine, LineEnding, LineIter, Motion, - Scroll, ShapeLine, Shaping, Wrap, + Affinity, Align, Attrs, AttrsList, BidiParagraphs, BorrowedWithFontSystem, BufferLine, Cursor, + FontSystem, LayoutCursor, LayoutGlyph, LayoutLine, LineEnding, LineIter, Motion, Scroll, + ShapeLine, Shaping, Wrap, }; /// A line of visible text for rendering diff --git a/src/edit/editor.rs b/src/edit/editor.rs index 36fb92d..8060b6d 100644 --- a/src/edit/editor.rs +++ b/src/edit/editor.rs @@ -1,20 +1,21 @@ // SPDX-License-Identifier: MIT OR Apache-2.0 -#[cfg(not(feature = "std"))] -use alloc::{ - string::{String, ToString}, - vec::Vec, -}; -use core::{cmp, iter::once}; -use unicode_segmentation::UnicodeSegmentation; - #[cfg(feature = "swash")] use crate::Color; use crate::{ Action, Attrs, AttrsList, BorrowedWithFontSystem, BufferLine, BufferRef, Change, ChangeItem, Cursor, Edit, FontSystem, LayoutRun, Selection, Shaping, }; +#[cfg(feature = "no_std")] +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; +#[cfg(feature = "swash")] +use std::cmp; +use core::iter::once; +use unicode_segmentation::UnicodeSegmentation; /// A wrapper of [`Buffer`] for easy editing #[derive(Debug, Clone)] pub struct Editor<'buffer> { @@ -583,7 +584,7 @@ impl<'buffer> Edit<'buffer> for Editor<'buffer> { Action::Insert(character) => { if character.is_control() && !['\t', '\n', '\u{92}'].contains(&character) { // Filter out special chars (except for tab), use Action instead - log::debug!("Refusing to insert control character {:?}", character); + log::debug!("Refusing to insert control character {character:?}"); } else if character == '\n' { self.action(font_system, Action::Enter); } else { diff --git a/src/edit/syntect.rs b/src/edit/syntect.rs index 57eb9c7..8f70942 100644 --- a/src/edit/syntect.rs +++ b/src/edit/syntect.rs @@ -132,11 +132,11 @@ impl<'syntax_system, 'buffer> SyntaxEditor<'syntax_system, 'buffer> { self.syntax = match self.syntax_system.syntax_set.find_syntax_for_file(path) { Ok(Some(some)) => some, Ok(None) => { - log::warn!("no syntax found for {:?}", path); + log::warn!("no syntax found for {path:?}"); self.syntax_system.syntax_set.find_syntax_plain_text() } Err(err) => { - log::warn!("failed to determine syntax for {:?}: {:?}", path, err); + log::warn!("failed to determine syntax for {path:?}: {err:?}"); self.syntax_system.syntax_set.find_syntax_plain_text() } }; @@ -156,7 +156,7 @@ impl<'syntax_system, 'buffer> SyntaxEditor<'syntax_system, 'buffer> { { Some(some) => some, None => { - log::warn!("no syntax found for {}", extension); + log::warn!("no syntax found for {extension:?}"); self.syntax_system.syntax_set.find_syntax_plain_text() } }; diff --git a/src/edit/vi.rs b/src/edit/vi.rs index 0719e7b..9540234 100644 --- a/src/edit/vi.rs +++ b/src/edit/vi.rs @@ -1,7 +1,8 @@ -use alloc::{collections::BTreeMap, string::String}; +#[cfg(feature = "no_std")] use core::cmp; + +use alloc::{collections::BTreeMap, string::String}; use modit::{Event, Key, Parser, TextObject, WordIter}; -use unicode_segmentation::UnicodeSegmentation; use crate::{ Action, AttrsList, BorrowedWithFontSystem, BufferRef, Change, Color, Cursor, Edit, FontSystem, @@ -9,6 +10,8 @@ use crate::{ }; pub use modit::{ViMode, ViParser}; +#[cfg(feature = "swash")] +use unicode_segmentation::UnicodeSegmentation; fn undo_2_action<'buffer, E: Edit<'buffer>>( editor: &mut E, @@ -599,7 +602,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> { } fn action(&mut self, font_system: &mut FontSystem, action: Action) { - log::debug!("Action {:?}", action); + log::debug!("Action {action:?}"); let editor = &mut self.editor; @@ -636,7 +639,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> { Action::Unindent => Key::Backtab, Action::Motion(Motion::Up) => Key::Up, _ => { - log::debug!("Pass through action {:?}", action); + log::debug!("Pass through action {action:?}"); editor.action(font_system, action); // Always finish change when passing through (TODO: group changes) finish_change( @@ -652,7 +655,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> { let has_selection = !matches!(editor.selection(), Selection::None); self.parser.parse(key, has_selection, |event| { - log::debug!(" Event {:?}", event); + log::debug!(" Event {event:?}"); let action = match event { Event::AutoIndent => { log::info!("TODO: AutoIndent"); @@ -813,7 +816,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> { editor.set_cursor(cursor); } _ => { - log::info!("TODO: {:?}", text_object); + log::info!("TODO: {text_object:?}"); } } return; @@ -1006,7 +1009,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> { } None }) - .last() + .next_back() { cursor.index = i; } diff --git a/src/font/fallback/mod.rs b/src/font/fallback/mod.rs index 5a7be2f..05c392b 100644 --- a/src/font/fallback/mod.rs +++ b/src/font/fallback/mod.rs @@ -454,7 +454,7 @@ impl<'a> FontFallbackIter<'a> { } } } - log::debug!("failed to find family '{}'", common_family); + log::debug!("failed to find family '{common_family}'"); } //TODO: do we need to do this? diff --git a/src/font/system.rs b/src/font/system.rs index b1896e2..8a47e4a 100644 --- a/src/font/system.rs +++ b/src/font/system.rs @@ -147,7 +147,7 @@ impl FontSystem { /// Create a new [`FontSystem`] with a pre-specified set of fonts. pub fn new_with_fonts(fonts: impl IntoIterator) -> Self { let locale = Self::get_locale(); - log::debug!("Locale: {}", locale); + log::debug!("Locale: {locale}"); let mut db = fontdb::Database::new(); @@ -370,7 +370,7 @@ impl FontSystem { #[cfg(all(feature = "std", not(target_arch = "wasm32")))] { let elapsed = now.elapsed(); - log::debug!("font matches for {:?} in {:?}", attrs, elapsed); + log::debug!("font matches for {attrs:?} in {elapsed:?}"); } Arc::new(font_match_keys)