Optimize BidiParagraphs with ASCII fast path (#408)
* Optimize BidiParagraphs with ASCII fast path - Added fast path for ASCII text that avoids BidiInfo allocation - Added some text shaping benchmarks * refactor: fix clippy warnings and cleanup imports
This commit is contained in:
parent
de355a1fd9
commit
e80dbc3607
9 changed files with 196 additions and 30 deletions
|
|
@ -68,6 +68,10 @@ warn_on_missing_glyphs = []
|
|||
name = "layout"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "text_shaping_benchmarks"
|
||||
harness = false
|
||||
|
||||
[workspace]
|
||||
members = ["examples/*"]
|
||||
|
||||
|
|
|
|||
121
benches/text_shaping_benchmarks.rs
Normal file
121
benches/text_shaping_benchmarks.rs
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
use cosmic_text as ct;
|
||||
use cosmic_text::BidiParagraphs;
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
|
||||
fn bench_ascii_fast_path(c: &mut Criterion) {
|
||||
let mut fs = ct::FontSystem::new();
|
||||
let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0));
|
||||
buffer.set_size(&mut fs, Some(500.0), None);
|
||||
|
||||
let ascii_text = "Pure ASCII text for BidiParagraphs optimization testing.\n".repeat(50);
|
||||
|
||||
c.bench_function("ShapeLine/ASCII Fast Path", |b| {
|
||||
b.iter(|| {
|
||||
buffer.set_text(
|
||||
&mut fs,
|
||||
black_box(&ascii_text),
|
||||
&ct::Attrs::new(),
|
||||
ct::Shaping::Advanced,
|
||||
);
|
||||
buffer.shape_until_scroll(&mut fs, false);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_bidi_processing(c: &mut Criterion) {
|
||||
let mut fs = ct::FontSystem::new();
|
||||
let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0));
|
||||
buffer.set_size(&mut fs, Some(500.0), None);
|
||||
|
||||
let bidi_text = "Mixed English and العربية النص العربي text for BiDi testing.\nThis tests adjust_levels and combined BiDi optimizations.\n".repeat(30);
|
||||
|
||||
c.bench_function("ShapeLine/BiDi Processing", |b| {
|
||||
b.iter(|| {
|
||||
buffer.set_text(
|
||||
&mut fs,
|
||||
black_box(&bidi_text),
|
||||
&ct::Attrs::new(),
|
||||
ct::Shaping::Advanced,
|
||||
);
|
||||
buffer.shape_until_scroll(&mut fs, false);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_layout_heavy(c: &mut Criterion) {
|
||||
let mut fs = ct::FontSystem::new();
|
||||
let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0));
|
||||
buffer.set_size(&mut fs, Some(500.0), None);
|
||||
|
||||
let layout_text = "This is a very long line that will wrap multiple times and stress the reorder optimization through intensive layout processing with comprehensive buffer reuse testing. ".repeat(30);
|
||||
|
||||
c.bench_function("ShapeLine/Layout Heavy", |b| {
|
||||
b.iter(|| {
|
||||
buffer.set_text(
|
||||
&mut fs,
|
||||
black_box(&layout_text),
|
||||
&ct::Attrs::new(),
|
||||
ct::Shaping::Advanced,
|
||||
);
|
||||
buffer.shape_until_scroll(&mut fs, false);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_combined_stress(c: &mut Criterion) {
|
||||
let mut fs = ct::FontSystem::new();
|
||||
let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0));
|
||||
buffer.set_size(&mut fs, Some(500.0), None);
|
||||
|
||||
let stress_text = format!("{}\n{}\n{}\n{}\n",
|
||||
"ASCII line for BidiParagraphs optimization. ".repeat(15),
|
||||
"Mixed English + العربية for BiDi optimizations. ".repeat(12),
|
||||
"Very long wrapping line that will trigger reorder optimizations multiple times through intensive layout processing. ".repeat(8),
|
||||
"Cache key generation line for ShapeRunKey optimization testing. ".repeat(10)
|
||||
).repeat(10);
|
||||
|
||||
c.bench_function("ShapeLine/Combined Stress", |b| {
|
||||
b.iter(|| {
|
||||
buffer.set_text(
|
||||
&mut fs,
|
||||
black_box(&stress_text),
|
||||
&ct::Attrs::new(),
|
||||
ct::Shaping::Advanced,
|
||||
);
|
||||
buffer.shape_until_scroll(&mut fs, false);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_bidi_paragraphs_ascii(c: &mut Criterion) {
|
||||
let ascii_text = "Simple ASCII text\nwith multiple lines\n".repeat(50);
|
||||
|
||||
c.bench_function("BidiParagraphs/ASCII", |b| {
|
||||
b.iter(|| {
|
||||
let paras = BidiParagraphs::new(black_box(&ascii_text));
|
||||
black_box(paras.count());
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_bidi_paragraphs_mixed(c: &mut Criterion) {
|
||||
let mixed_text = "Mixed English and العربية text\nwith multiple lines\n".repeat(30);
|
||||
|
||||
c.bench_function("BidiParagraphs/Mixed", |b| {
|
||||
b.iter(|| {
|
||||
let paras = BidiParagraphs::new(black_box(&mixed_text));
|
||||
black_box(paras.count());
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_ascii_fast_path,
|
||||
bench_bidi_processing,
|
||||
bench_layout_heavy,
|
||||
bench_combined_stress,
|
||||
bench_bidi_paragraphs_ascii,
|
||||
bench_bidi_paragraphs_mixed
|
||||
);
|
||||
criterion_main!(benches);
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
// SPDX-License-Identifier: MIT OR Apache-2.0
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use unicode_bidi::{bidi_class, BidiClass, BidiInfo, ParagraphInfo};
|
||||
|
||||
/// An iterator over the paragraphs in the input text.
|
||||
|
|
@ -11,12 +12,46 @@ pub struct BidiParagraphs<'text> {
|
|||
}
|
||||
|
||||
impl<'text> BidiParagraphs<'text> {
|
||||
/// Create an iterator to split the input text into paragraphs
|
||||
/// in accordance with `unicode-bidi` behaviour.
|
||||
/// Create an iterator with optimized paragraph detection.
|
||||
/// This version avoids `BidiInfo` allocation for simple ASCII text.
|
||||
pub fn new(text: &'text str) -> Self {
|
||||
let info = BidiInfo::new(text, None);
|
||||
let info = info.paragraphs.into_iter();
|
||||
Self { text, info }
|
||||
// Fast path for simple ASCII text - just split on newlines
|
||||
if text.is_ascii()
|
||||
&& !text
|
||||
.chars()
|
||||
.any(|c| c.is_ascii_control() && c != '\n' && c != '\r' && c != '\t')
|
||||
{
|
||||
// For simple ASCII, we can avoid `BidiInfo` entirely
|
||||
// Create minimal ParagraphInfo entries for each line
|
||||
let mut paragraphs = Vec::new();
|
||||
let mut start = 0;
|
||||
|
||||
for (i, c) in text.char_indices() {
|
||||
if c == '\n' {
|
||||
paragraphs.push(ParagraphInfo {
|
||||
range: start..i,
|
||||
level: unicode_bidi::Level::ltr(),
|
||||
});
|
||||
start = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Add final paragraph if text doesn't end with newline
|
||||
if start < text.len() {
|
||||
paragraphs.push(ParagraphInfo {
|
||||
range: start..text.len(),
|
||||
level: unicode_bidi::Level::ltr(),
|
||||
});
|
||||
}
|
||||
|
||||
let info = paragraphs.into_iter();
|
||||
Self { text, info }
|
||||
} else {
|
||||
// Complex text - fall back to full `BidiInfo` analysis
|
||||
let info = BidiInfo::new(text, None);
|
||||
let info = info.paragraphs.into_iter();
|
||||
Self { text, info }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,10 +5,12 @@ use alloc::{string::String, vec::Vec};
|
|||
use core::{cmp, fmt};
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
#[cfg(feature = "swash")]
|
||||
use crate::Color;
|
||||
use crate::{
|
||||
Affinity, Align, Attrs, AttrsList, BidiParagraphs, BorrowedWithFontSystem, BufferLine, Color,
|
||||
Cursor, FontSystem, LayoutCursor, LayoutGlyph, LayoutLine, LineEnding, LineIter, Motion,
|
||||
Scroll, ShapeLine, Shaping, Wrap,
|
||||
Affinity, Align, Attrs, AttrsList, BidiParagraphs, BorrowedWithFontSystem, BufferLine, Cursor,
|
||||
FontSystem, LayoutCursor, LayoutGlyph, LayoutLine, LineEnding, LineIter, Motion, Scroll,
|
||||
ShapeLine, Shaping, Wrap,
|
||||
};
|
||||
|
||||
/// A line of visible text for rendering
|
||||
|
|
|
|||
|
|
@ -1,20 +1,21 @@
|
|||
// SPDX-License-Identifier: MIT OR Apache-2.0
|
||||
|
||||
#[cfg(not(feature = "std"))]
|
||||
use alloc::{
|
||||
string::{String, ToString},
|
||||
vec::Vec,
|
||||
};
|
||||
use core::{cmp, iter::once};
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
#[cfg(feature = "swash")]
|
||||
use crate::Color;
|
||||
use crate::{
|
||||
Action, Attrs, AttrsList, BorrowedWithFontSystem, BufferLine, BufferRef, Change, ChangeItem,
|
||||
Cursor, Edit, FontSystem, LayoutRun, Selection, Shaping,
|
||||
};
|
||||
#[cfg(feature = "no_std")]
|
||||
use alloc::{
|
||||
string::{String, ToString},
|
||||
vec::Vec,
|
||||
};
|
||||
#[cfg(feature = "swash")]
|
||||
use std::cmp;
|
||||
|
||||
use core::iter::once;
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
/// A wrapper of [`Buffer`] for easy editing
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Editor<'buffer> {
|
||||
|
|
@ -583,7 +584,7 @@ impl<'buffer> Edit<'buffer> for Editor<'buffer> {
|
|||
Action::Insert(character) => {
|
||||
if character.is_control() && !['\t', '\n', '\u{92}'].contains(&character) {
|
||||
// Filter out special chars (except for tab), use Action instead
|
||||
log::debug!("Refusing to insert control character {:?}", character);
|
||||
log::debug!("Refusing to insert control character {character:?}");
|
||||
} else if character == '\n' {
|
||||
self.action(font_system, Action::Enter);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -132,11 +132,11 @@ impl<'syntax_system, 'buffer> SyntaxEditor<'syntax_system, 'buffer> {
|
|||
self.syntax = match self.syntax_system.syntax_set.find_syntax_for_file(path) {
|
||||
Ok(Some(some)) => some,
|
||||
Ok(None) => {
|
||||
log::warn!("no syntax found for {:?}", path);
|
||||
log::warn!("no syntax found for {path:?}");
|
||||
self.syntax_system.syntax_set.find_syntax_plain_text()
|
||||
}
|
||||
Err(err) => {
|
||||
log::warn!("failed to determine syntax for {:?}: {:?}", path, err);
|
||||
log::warn!("failed to determine syntax for {path:?}: {err:?}");
|
||||
self.syntax_system.syntax_set.find_syntax_plain_text()
|
||||
}
|
||||
};
|
||||
|
|
@ -156,7 +156,7 @@ impl<'syntax_system, 'buffer> SyntaxEditor<'syntax_system, 'buffer> {
|
|||
{
|
||||
Some(some) => some,
|
||||
None => {
|
||||
log::warn!("no syntax found for {}", extension);
|
||||
log::warn!("no syntax found for {extension:?}");
|
||||
self.syntax_system.syntax_set.find_syntax_plain_text()
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
use alloc::{collections::BTreeMap, string::String};
|
||||
#[cfg(feature = "no_std")]
|
||||
use core::cmp;
|
||||
|
||||
use alloc::{collections::BTreeMap, string::String};
|
||||
use modit::{Event, Key, Parser, TextObject, WordIter};
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
use crate::{
|
||||
Action, AttrsList, BorrowedWithFontSystem, BufferRef, Change, Color, Cursor, Edit, FontSystem,
|
||||
|
|
@ -9,6 +10,8 @@ use crate::{
|
|||
};
|
||||
|
||||
pub use modit::{ViMode, ViParser};
|
||||
#[cfg(feature = "swash")]
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
fn undo_2_action<'buffer, E: Edit<'buffer>>(
|
||||
editor: &mut E,
|
||||
|
|
@ -599,7 +602,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
|
|||
}
|
||||
|
||||
fn action(&mut self, font_system: &mut FontSystem, action: Action) {
|
||||
log::debug!("Action {:?}", action);
|
||||
log::debug!("Action {action:?}");
|
||||
|
||||
let editor = &mut self.editor;
|
||||
|
||||
|
|
@ -636,7 +639,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
|
|||
Action::Unindent => Key::Backtab,
|
||||
Action::Motion(Motion::Up) => Key::Up,
|
||||
_ => {
|
||||
log::debug!("Pass through action {:?}", action);
|
||||
log::debug!("Pass through action {action:?}");
|
||||
editor.action(font_system, action);
|
||||
// Always finish change when passing through (TODO: group changes)
|
||||
finish_change(
|
||||
|
|
@ -652,7 +655,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
|
|||
let has_selection = !matches!(editor.selection(), Selection::None);
|
||||
|
||||
self.parser.parse(key, has_selection, |event| {
|
||||
log::debug!(" Event {:?}", event);
|
||||
log::debug!(" Event {event:?}");
|
||||
let action = match event {
|
||||
Event::AutoIndent => {
|
||||
log::info!("TODO: AutoIndent");
|
||||
|
|
@ -813,7 +816,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
|
|||
editor.set_cursor(cursor);
|
||||
}
|
||||
_ => {
|
||||
log::info!("TODO: {:?}", text_object);
|
||||
log::info!("TODO: {text_object:?}");
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
|
@ -1006,7 +1009,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
|
|||
}
|
||||
None
|
||||
})
|
||||
.last()
|
||||
.next_back()
|
||||
{
|
||||
cursor.index = i;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -454,7 +454,7 @@ impl<'a> FontFallbackIter<'a> {
|
|||
}
|
||||
}
|
||||
}
|
||||
log::debug!("failed to find family '{}'", common_family);
|
||||
log::debug!("failed to find family '{common_family}'");
|
||||
}
|
||||
|
||||
//TODO: do we need to do this?
|
||||
|
|
|
|||
|
|
@ -147,7 +147,7 @@ impl FontSystem {
|
|||
/// Create a new [`FontSystem`] with a pre-specified set of fonts.
|
||||
pub fn new_with_fonts(fonts: impl IntoIterator<Item = fontdb::Source>) -> Self {
|
||||
let locale = Self::get_locale();
|
||||
log::debug!("Locale: {}", locale);
|
||||
log::debug!("Locale: {locale}");
|
||||
|
||||
let mut db = fontdb::Database::new();
|
||||
|
||||
|
|
@ -370,7 +370,7 @@ impl FontSystem {
|
|||
#[cfg(all(feature = "std", not(target_arch = "wasm32")))]
|
||||
{
|
||||
let elapsed = now.elapsed();
|
||||
log::debug!("font matches for {:?} in {:?}", attrs, elapsed);
|
||||
log::debug!("font matches for {attrs:?} in {elapsed:?}");
|
||||
}
|
||||
|
||||
Arc::new(font_match_keys)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue