Optimize BidiParagraphs with ASCII fast path (#408)

* Optimize BidiParagraphs with ASCII fast path - Added fast path for ASCII text that avoids BidiInfo allocation - Added some text shaping benchmarks

* refactor: fix clippy warnings and cleanup imports
This commit is contained in:
romanstingler 2025-08-11 21:15:09 +02:00 committed by GitHub
parent de355a1fd9
commit e80dbc3607
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 196 additions and 30 deletions

View file

@ -68,6 +68,10 @@ warn_on_missing_glyphs = []
name = "layout"
harness = false
[[bench]]
name = "text_shaping_benchmarks"
harness = false
[workspace]
members = ["examples/*"]

View file

@ -0,0 +1,121 @@
use cosmic_text as ct;
use cosmic_text::BidiParagraphs;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
fn bench_ascii_fast_path(c: &mut Criterion) {
let mut fs = ct::FontSystem::new();
let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0));
buffer.set_size(&mut fs, Some(500.0), None);
let ascii_text = "Pure ASCII text for BidiParagraphs optimization testing.\n".repeat(50);
c.bench_function("ShapeLine/ASCII Fast Path", |b| {
b.iter(|| {
buffer.set_text(
&mut fs,
black_box(&ascii_text),
&ct::Attrs::new(),
ct::Shaping::Advanced,
);
buffer.shape_until_scroll(&mut fs, false);
});
});
}
fn bench_bidi_processing(c: &mut Criterion) {
let mut fs = ct::FontSystem::new();
let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0));
buffer.set_size(&mut fs, Some(500.0), None);
let bidi_text = "Mixed English and العربية النص العربي text for BiDi testing.\nThis tests adjust_levels and combined BiDi optimizations.\n".repeat(30);
c.bench_function("ShapeLine/BiDi Processing", |b| {
b.iter(|| {
buffer.set_text(
&mut fs,
black_box(&bidi_text),
&ct::Attrs::new(),
ct::Shaping::Advanced,
);
buffer.shape_until_scroll(&mut fs, false);
});
});
}
fn bench_layout_heavy(c: &mut Criterion) {
let mut fs = ct::FontSystem::new();
let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0));
buffer.set_size(&mut fs, Some(500.0), None);
let layout_text = "This is a very long line that will wrap multiple times and stress the reorder optimization through intensive layout processing with comprehensive buffer reuse testing. ".repeat(30);
c.bench_function("ShapeLine/Layout Heavy", |b| {
b.iter(|| {
buffer.set_text(
&mut fs,
black_box(&layout_text),
&ct::Attrs::new(),
ct::Shaping::Advanced,
);
buffer.shape_until_scroll(&mut fs, false);
});
});
}
fn bench_combined_stress(c: &mut Criterion) {
let mut fs = ct::FontSystem::new();
let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0));
buffer.set_size(&mut fs, Some(500.0), None);
let stress_text = format!("{}\n{}\n{}\n{}\n",
"ASCII line for BidiParagraphs optimization. ".repeat(15),
"Mixed English + العربية for BiDi optimizations. ".repeat(12),
"Very long wrapping line that will trigger reorder optimizations multiple times through intensive layout processing. ".repeat(8),
"Cache key generation line for ShapeRunKey optimization testing. ".repeat(10)
).repeat(10);
c.bench_function("ShapeLine/Combined Stress", |b| {
b.iter(|| {
buffer.set_text(
&mut fs,
black_box(&stress_text),
&ct::Attrs::new(),
ct::Shaping::Advanced,
);
buffer.shape_until_scroll(&mut fs, false);
});
});
}
fn bench_bidi_paragraphs_ascii(c: &mut Criterion) {
let ascii_text = "Simple ASCII text\nwith multiple lines\n".repeat(50);
c.bench_function("BidiParagraphs/ASCII", |b| {
b.iter(|| {
let paras = BidiParagraphs::new(black_box(&ascii_text));
black_box(paras.count());
});
});
}
fn bench_bidi_paragraphs_mixed(c: &mut Criterion) {
let mixed_text = "Mixed English and العربية text\nwith multiple lines\n".repeat(30);
c.bench_function("BidiParagraphs/Mixed", |b| {
b.iter(|| {
let paras = BidiParagraphs::new(black_box(&mixed_text));
black_box(paras.count());
});
});
}
criterion_group!(
benches,
bench_ascii_fast_path,
bench_bidi_processing,
bench_layout_heavy,
bench_combined_stress,
bench_bidi_paragraphs_ascii,
bench_bidi_paragraphs_mixed
);
criterion_main!(benches);

View file

@ -1,5 +1,6 @@
// SPDX-License-Identifier: MIT OR Apache-2.0
use alloc::vec::Vec;
use unicode_bidi::{bidi_class, BidiClass, BidiInfo, ParagraphInfo};
/// An iterator over the paragraphs in the input text.
@ -11,12 +12,46 @@ pub struct BidiParagraphs<'text> {
}
impl<'text> BidiParagraphs<'text> {
/// Create an iterator to split the input text into paragraphs
/// in accordance with `unicode-bidi` behaviour.
/// Create an iterator with optimized paragraph detection.
/// This version avoids `BidiInfo` allocation for simple ASCII text.
pub fn new(text: &'text str) -> Self {
let info = BidiInfo::new(text, None);
let info = info.paragraphs.into_iter();
Self { text, info }
// Fast path for simple ASCII text - just split on newlines
if text.is_ascii()
&& !text
.chars()
.any(|c| c.is_ascii_control() && c != '\n' && c != '\r' && c != '\t')
{
// For simple ASCII, we can avoid `BidiInfo` entirely
// Create minimal ParagraphInfo entries for each line
let mut paragraphs = Vec::new();
let mut start = 0;
for (i, c) in text.char_indices() {
if c == '\n' {
paragraphs.push(ParagraphInfo {
range: start..i,
level: unicode_bidi::Level::ltr(),
});
start = i + 1;
}
}
// Add final paragraph if text doesn't end with newline
if start < text.len() {
paragraphs.push(ParagraphInfo {
range: start..text.len(),
level: unicode_bidi::Level::ltr(),
});
}
let info = paragraphs.into_iter();
Self { text, info }
} else {
// Complex text - fall back to full `BidiInfo` analysis
let info = BidiInfo::new(text, None);
let info = info.paragraphs.into_iter();
Self { text, info }
}
}
}

View file

@ -5,10 +5,12 @@ use alloc::{string::String, vec::Vec};
use core::{cmp, fmt};
use unicode_segmentation::UnicodeSegmentation;
#[cfg(feature = "swash")]
use crate::Color;
use crate::{
Affinity, Align, Attrs, AttrsList, BidiParagraphs, BorrowedWithFontSystem, BufferLine, Color,
Cursor, FontSystem, LayoutCursor, LayoutGlyph, LayoutLine, LineEnding, LineIter, Motion,
Scroll, ShapeLine, Shaping, Wrap,
Affinity, Align, Attrs, AttrsList, BidiParagraphs, BorrowedWithFontSystem, BufferLine, Cursor,
FontSystem, LayoutCursor, LayoutGlyph, LayoutLine, LineEnding, LineIter, Motion, Scroll,
ShapeLine, Shaping, Wrap,
};
/// A line of visible text for rendering

View file

@ -1,20 +1,21 @@
// SPDX-License-Identifier: MIT OR Apache-2.0
#[cfg(not(feature = "std"))]
use alloc::{
string::{String, ToString},
vec::Vec,
};
use core::{cmp, iter::once};
use unicode_segmentation::UnicodeSegmentation;
#[cfg(feature = "swash")]
use crate::Color;
use crate::{
Action, Attrs, AttrsList, BorrowedWithFontSystem, BufferLine, BufferRef, Change, ChangeItem,
Cursor, Edit, FontSystem, LayoutRun, Selection, Shaping,
};
#[cfg(feature = "no_std")]
use alloc::{
string::{String, ToString},
vec::Vec,
};
#[cfg(feature = "swash")]
use std::cmp;
use core::iter::once;
use unicode_segmentation::UnicodeSegmentation;
/// A wrapper of [`Buffer`] for easy editing
#[derive(Debug, Clone)]
pub struct Editor<'buffer> {
@ -583,7 +584,7 @@ impl<'buffer> Edit<'buffer> for Editor<'buffer> {
Action::Insert(character) => {
if character.is_control() && !['\t', '\n', '\u{92}'].contains(&character) {
// Filter out special chars (except for tab), use Action instead
log::debug!("Refusing to insert control character {:?}", character);
log::debug!("Refusing to insert control character {character:?}");
} else if character == '\n' {
self.action(font_system, Action::Enter);
} else {

View file

@ -132,11 +132,11 @@ impl<'syntax_system, 'buffer> SyntaxEditor<'syntax_system, 'buffer> {
self.syntax = match self.syntax_system.syntax_set.find_syntax_for_file(path) {
Ok(Some(some)) => some,
Ok(None) => {
log::warn!("no syntax found for {:?}", path);
log::warn!("no syntax found for {path:?}");
self.syntax_system.syntax_set.find_syntax_plain_text()
}
Err(err) => {
log::warn!("failed to determine syntax for {:?}: {:?}", path, err);
log::warn!("failed to determine syntax for {path:?}: {err:?}");
self.syntax_system.syntax_set.find_syntax_plain_text()
}
};
@ -156,7 +156,7 @@ impl<'syntax_system, 'buffer> SyntaxEditor<'syntax_system, 'buffer> {
{
Some(some) => some,
None => {
log::warn!("no syntax found for {}", extension);
log::warn!("no syntax found for {extension:?}");
self.syntax_system.syntax_set.find_syntax_plain_text()
}
};

View file

@ -1,7 +1,8 @@
use alloc::{collections::BTreeMap, string::String};
#[cfg(feature = "no_std")]
use core::cmp;
use alloc::{collections::BTreeMap, string::String};
use modit::{Event, Key, Parser, TextObject, WordIter};
use unicode_segmentation::UnicodeSegmentation;
use crate::{
Action, AttrsList, BorrowedWithFontSystem, BufferRef, Change, Color, Cursor, Edit, FontSystem,
@ -9,6 +10,8 @@ use crate::{
};
pub use modit::{ViMode, ViParser};
#[cfg(feature = "swash")]
use unicode_segmentation::UnicodeSegmentation;
fn undo_2_action<'buffer, E: Edit<'buffer>>(
editor: &mut E,
@ -599,7 +602,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
}
fn action(&mut self, font_system: &mut FontSystem, action: Action) {
log::debug!("Action {:?}", action);
log::debug!("Action {action:?}");
let editor = &mut self.editor;
@ -636,7 +639,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
Action::Unindent => Key::Backtab,
Action::Motion(Motion::Up) => Key::Up,
_ => {
log::debug!("Pass through action {:?}", action);
log::debug!("Pass through action {action:?}");
editor.action(font_system, action);
// Always finish change when passing through (TODO: group changes)
finish_change(
@ -652,7 +655,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
let has_selection = !matches!(editor.selection(), Selection::None);
self.parser.parse(key, has_selection, |event| {
log::debug!(" Event {:?}", event);
log::debug!(" Event {event:?}");
let action = match event {
Event::AutoIndent => {
log::info!("TODO: AutoIndent");
@ -813,7 +816,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
editor.set_cursor(cursor);
}
_ => {
log::info!("TODO: {:?}", text_object);
log::info!("TODO: {text_object:?}");
}
}
return;
@ -1006,7 +1009,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
}
None
})
.last()
.next_back()
{
cursor.index = i;
}

View file

@ -454,7 +454,7 @@ impl<'a> FontFallbackIter<'a> {
}
}
}
log::debug!("failed to find family '{}'", common_family);
log::debug!("failed to find family '{common_family}'");
}
//TODO: do we need to do this?

View file

@ -147,7 +147,7 @@ impl FontSystem {
/// Create a new [`FontSystem`] with a pre-specified set of fonts.
pub fn new_with_fonts(fonts: impl IntoIterator<Item = fontdb::Source>) -> Self {
let locale = Self::get_locale();
log::debug!("Locale: {}", locale);
log::debug!("Locale: {locale}");
let mut db = fontdb::Database::new();
@ -370,7 +370,7 @@ impl FontSystem {
#[cfg(all(feature = "std", not(target_arch = "wasm32")))]
{
let elapsed = now.elapsed();
log::debug!("font matches for {:?} in {:?}", attrs, elapsed);
log::debug!("font matches for {attrs:?} in {elapsed:?}");
}
Arc::new(font_match_keys)