Optimize BidiParagraphs with ASCII fast path (#408)
* Optimize BidiParagraphs with ASCII fast path - Added fast path for ASCII text that avoids BidiInfo allocation - Added some text shaping benchmarks * refactor: fix clippy warnings and cleanup imports
This commit is contained in:
parent
de355a1fd9
commit
e80dbc3607
9 changed files with 196 additions and 30 deletions
|
|
@ -68,6 +68,10 @@ warn_on_missing_glyphs = []
|
||||||
name = "layout"
|
name = "layout"
|
||||||
harness = false
|
harness = false
|
||||||
|
|
||||||
|
[[bench]]
|
||||||
|
name = "text_shaping_benchmarks"
|
||||||
|
harness = false
|
||||||
|
|
||||||
[workspace]
|
[workspace]
|
||||||
members = ["examples/*"]
|
members = ["examples/*"]
|
||||||
|
|
||||||
|
|
|
||||||
121
benches/text_shaping_benchmarks.rs
Normal file
121
benches/text_shaping_benchmarks.rs
Normal file
|
|
@ -0,0 +1,121 @@
|
||||||
|
use cosmic_text as ct;
|
||||||
|
use cosmic_text::BidiParagraphs;
|
||||||
|
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||||
|
|
||||||
|
fn bench_ascii_fast_path(c: &mut Criterion) {
|
||||||
|
let mut fs = ct::FontSystem::new();
|
||||||
|
let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0));
|
||||||
|
buffer.set_size(&mut fs, Some(500.0), None);
|
||||||
|
|
||||||
|
let ascii_text = "Pure ASCII text for BidiParagraphs optimization testing.\n".repeat(50);
|
||||||
|
|
||||||
|
c.bench_function("ShapeLine/ASCII Fast Path", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
buffer.set_text(
|
||||||
|
&mut fs,
|
||||||
|
black_box(&ascii_text),
|
||||||
|
&ct::Attrs::new(),
|
||||||
|
ct::Shaping::Advanced,
|
||||||
|
);
|
||||||
|
buffer.shape_until_scroll(&mut fs, false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_bidi_processing(c: &mut Criterion) {
|
||||||
|
let mut fs = ct::FontSystem::new();
|
||||||
|
let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0));
|
||||||
|
buffer.set_size(&mut fs, Some(500.0), None);
|
||||||
|
|
||||||
|
let bidi_text = "Mixed English and العربية النص العربي text for BiDi testing.\nThis tests adjust_levels and combined BiDi optimizations.\n".repeat(30);
|
||||||
|
|
||||||
|
c.bench_function("ShapeLine/BiDi Processing", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
buffer.set_text(
|
||||||
|
&mut fs,
|
||||||
|
black_box(&bidi_text),
|
||||||
|
&ct::Attrs::new(),
|
||||||
|
ct::Shaping::Advanced,
|
||||||
|
);
|
||||||
|
buffer.shape_until_scroll(&mut fs, false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_layout_heavy(c: &mut Criterion) {
|
||||||
|
let mut fs = ct::FontSystem::new();
|
||||||
|
let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0));
|
||||||
|
buffer.set_size(&mut fs, Some(500.0), None);
|
||||||
|
|
||||||
|
let layout_text = "This is a very long line that will wrap multiple times and stress the reorder optimization through intensive layout processing with comprehensive buffer reuse testing. ".repeat(30);
|
||||||
|
|
||||||
|
c.bench_function("ShapeLine/Layout Heavy", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
buffer.set_text(
|
||||||
|
&mut fs,
|
||||||
|
black_box(&layout_text),
|
||||||
|
&ct::Attrs::new(),
|
||||||
|
ct::Shaping::Advanced,
|
||||||
|
);
|
||||||
|
buffer.shape_until_scroll(&mut fs, false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_combined_stress(c: &mut Criterion) {
|
||||||
|
let mut fs = ct::FontSystem::new();
|
||||||
|
let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0));
|
||||||
|
buffer.set_size(&mut fs, Some(500.0), None);
|
||||||
|
|
||||||
|
let stress_text = format!("{}\n{}\n{}\n{}\n",
|
||||||
|
"ASCII line for BidiParagraphs optimization. ".repeat(15),
|
||||||
|
"Mixed English + العربية for BiDi optimizations. ".repeat(12),
|
||||||
|
"Very long wrapping line that will trigger reorder optimizations multiple times through intensive layout processing. ".repeat(8),
|
||||||
|
"Cache key generation line for ShapeRunKey optimization testing. ".repeat(10)
|
||||||
|
).repeat(10);
|
||||||
|
|
||||||
|
c.bench_function("ShapeLine/Combined Stress", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
buffer.set_text(
|
||||||
|
&mut fs,
|
||||||
|
black_box(&stress_text),
|
||||||
|
&ct::Attrs::new(),
|
||||||
|
ct::Shaping::Advanced,
|
||||||
|
);
|
||||||
|
buffer.shape_until_scroll(&mut fs, false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_bidi_paragraphs_ascii(c: &mut Criterion) {
|
||||||
|
let ascii_text = "Simple ASCII text\nwith multiple lines\n".repeat(50);
|
||||||
|
|
||||||
|
c.bench_function("BidiParagraphs/ASCII", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
let paras = BidiParagraphs::new(black_box(&ascii_text));
|
||||||
|
black_box(paras.count());
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bench_bidi_paragraphs_mixed(c: &mut Criterion) {
|
||||||
|
let mixed_text = "Mixed English and العربية text\nwith multiple lines\n".repeat(30);
|
||||||
|
|
||||||
|
c.bench_function("BidiParagraphs/Mixed", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
let paras = BidiParagraphs::new(black_box(&mixed_text));
|
||||||
|
black_box(paras.count());
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(
|
||||||
|
benches,
|
||||||
|
bench_ascii_fast_path,
|
||||||
|
bench_bidi_processing,
|
||||||
|
bench_layout_heavy,
|
||||||
|
bench_combined_stress,
|
||||||
|
bench_bidi_paragraphs_ascii,
|
||||||
|
bench_bidi_paragraphs_mixed
|
||||||
|
);
|
||||||
|
criterion_main!(benches);
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
// SPDX-License-Identifier: MIT OR Apache-2.0
|
// SPDX-License-Identifier: MIT OR Apache-2.0
|
||||||
|
|
||||||
|
use alloc::vec::Vec;
|
||||||
use unicode_bidi::{bidi_class, BidiClass, BidiInfo, ParagraphInfo};
|
use unicode_bidi::{bidi_class, BidiClass, BidiInfo, ParagraphInfo};
|
||||||
|
|
||||||
/// An iterator over the paragraphs in the input text.
|
/// An iterator over the paragraphs in the input text.
|
||||||
|
|
@ -11,12 +12,46 @@ pub struct BidiParagraphs<'text> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'text> BidiParagraphs<'text> {
|
impl<'text> BidiParagraphs<'text> {
|
||||||
/// Create an iterator to split the input text into paragraphs
|
/// Create an iterator with optimized paragraph detection.
|
||||||
/// in accordance with `unicode-bidi` behaviour.
|
/// This version avoids `BidiInfo` allocation for simple ASCII text.
|
||||||
pub fn new(text: &'text str) -> Self {
|
pub fn new(text: &'text str) -> Self {
|
||||||
let info = BidiInfo::new(text, None);
|
// Fast path for simple ASCII text - just split on newlines
|
||||||
let info = info.paragraphs.into_iter();
|
if text.is_ascii()
|
||||||
Self { text, info }
|
&& !text
|
||||||
|
.chars()
|
||||||
|
.any(|c| c.is_ascii_control() && c != '\n' && c != '\r' && c != '\t')
|
||||||
|
{
|
||||||
|
// For simple ASCII, we can avoid `BidiInfo` entirely
|
||||||
|
// Create minimal ParagraphInfo entries for each line
|
||||||
|
let mut paragraphs = Vec::new();
|
||||||
|
let mut start = 0;
|
||||||
|
|
||||||
|
for (i, c) in text.char_indices() {
|
||||||
|
if c == '\n' {
|
||||||
|
paragraphs.push(ParagraphInfo {
|
||||||
|
range: start..i,
|
||||||
|
level: unicode_bidi::Level::ltr(),
|
||||||
|
});
|
||||||
|
start = i + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add final paragraph if text doesn't end with newline
|
||||||
|
if start < text.len() {
|
||||||
|
paragraphs.push(ParagraphInfo {
|
||||||
|
range: start..text.len(),
|
||||||
|
level: unicode_bidi::Level::ltr(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let info = paragraphs.into_iter();
|
||||||
|
Self { text, info }
|
||||||
|
} else {
|
||||||
|
// Complex text - fall back to full `BidiInfo` analysis
|
||||||
|
let info = BidiInfo::new(text, None);
|
||||||
|
let info = info.paragraphs.into_iter();
|
||||||
|
Self { text, info }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,12 @@ use alloc::{string::String, vec::Vec};
|
||||||
use core::{cmp, fmt};
|
use core::{cmp, fmt};
|
||||||
use unicode_segmentation::UnicodeSegmentation;
|
use unicode_segmentation::UnicodeSegmentation;
|
||||||
|
|
||||||
|
#[cfg(feature = "swash")]
|
||||||
|
use crate::Color;
|
||||||
use crate::{
|
use crate::{
|
||||||
Affinity, Align, Attrs, AttrsList, BidiParagraphs, BorrowedWithFontSystem, BufferLine, Color,
|
Affinity, Align, Attrs, AttrsList, BidiParagraphs, BorrowedWithFontSystem, BufferLine, Cursor,
|
||||||
Cursor, FontSystem, LayoutCursor, LayoutGlyph, LayoutLine, LineEnding, LineIter, Motion,
|
FontSystem, LayoutCursor, LayoutGlyph, LayoutLine, LineEnding, LineIter, Motion, Scroll,
|
||||||
Scroll, ShapeLine, Shaping, Wrap,
|
ShapeLine, Shaping, Wrap,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// A line of visible text for rendering
|
/// A line of visible text for rendering
|
||||||
|
|
|
||||||
|
|
@ -1,20 +1,21 @@
|
||||||
// SPDX-License-Identifier: MIT OR Apache-2.0
|
// SPDX-License-Identifier: MIT OR Apache-2.0
|
||||||
|
|
||||||
#[cfg(not(feature = "std"))]
|
|
||||||
use alloc::{
|
|
||||||
string::{String, ToString},
|
|
||||||
vec::Vec,
|
|
||||||
};
|
|
||||||
use core::{cmp, iter::once};
|
|
||||||
use unicode_segmentation::UnicodeSegmentation;
|
|
||||||
|
|
||||||
#[cfg(feature = "swash")]
|
#[cfg(feature = "swash")]
|
||||||
use crate::Color;
|
use crate::Color;
|
||||||
use crate::{
|
use crate::{
|
||||||
Action, Attrs, AttrsList, BorrowedWithFontSystem, BufferLine, BufferRef, Change, ChangeItem,
|
Action, Attrs, AttrsList, BorrowedWithFontSystem, BufferLine, BufferRef, Change, ChangeItem,
|
||||||
Cursor, Edit, FontSystem, LayoutRun, Selection, Shaping,
|
Cursor, Edit, FontSystem, LayoutRun, Selection, Shaping,
|
||||||
};
|
};
|
||||||
|
#[cfg(feature = "no_std")]
|
||||||
|
use alloc::{
|
||||||
|
string::{String, ToString},
|
||||||
|
vec::Vec,
|
||||||
|
};
|
||||||
|
#[cfg(feature = "swash")]
|
||||||
|
use std::cmp;
|
||||||
|
|
||||||
|
use core::iter::once;
|
||||||
|
use unicode_segmentation::UnicodeSegmentation;
|
||||||
/// A wrapper of [`Buffer`] for easy editing
|
/// A wrapper of [`Buffer`] for easy editing
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Editor<'buffer> {
|
pub struct Editor<'buffer> {
|
||||||
|
|
@ -583,7 +584,7 @@ impl<'buffer> Edit<'buffer> for Editor<'buffer> {
|
||||||
Action::Insert(character) => {
|
Action::Insert(character) => {
|
||||||
if character.is_control() && !['\t', '\n', '\u{92}'].contains(&character) {
|
if character.is_control() && !['\t', '\n', '\u{92}'].contains(&character) {
|
||||||
// Filter out special chars (except for tab), use Action instead
|
// Filter out special chars (except for tab), use Action instead
|
||||||
log::debug!("Refusing to insert control character {:?}", character);
|
log::debug!("Refusing to insert control character {character:?}");
|
||||||
} else if character == '\n' {
|
} else if character == '\n' {
|
||||||
self.action(font_system, Action::Enter);
|
self.action(font_system, Action::Enter);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -132,11 +132,11 @@ impl<'syntax_system, 'buffer> SyntaxEditor<'syntax_system, 'buffer> {
|
||||||
self.syntax = match self.syntax_system.syntax_set.find_syntax_for_file(path) {
|
self.syntax = match self.syntax_system.syntax_set.find_syntax_for_file(path) {
|
||||||
Ok(Some(some)) => some,
|
Ok(Some(some)) => some,
|
||||||
Ok(None) => {
|
Ok(None) => {
|
||||||
log::warn!("no syntax found for {:?}", path);
|
log::warn!("no syntax found for {path:?}");
|
||||||
self.syntax_system.syntax_set.find_syntax_plain_text()
|
self.syntax_system.syntax_set.find_syntax_plain_text()
|
||||||
}
|
}
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
log::warn!("failed to determine syntax for {:?}: {:?}", path, err);
|
log::warn!("failed to determine syntax for {path:?}: {err:?}");
|
||||||
self.syntax_system.syntax_set.find_syntax_plain_text()
|
self.syntax_system.syntax_set.find_syntax_plain_text()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
@ -156,7 +156,7 @@ impl<'syntax_system, 'buffer> SyntaxEditor<'syntax_system, 'buffer> {
|
||||||
{
|
{
|
||||||
Some(some) => some,
|
Some(some) => some,
|
||||||
None => {
|
None => {
|
||||||
log::warn!("no syntax found for {}", extension);
|
log::warn!("no syntax found for {extension:?}");
|
||||||
self.syntax_system.syntax_set.find_syntax_plain_text()
|
self.syntax_system.syntax_set.find_syntax_plain_text()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
use alloc::{collections::BTreeMap, string::String};
|
#[cfg(feature = "no_std")]
|
||||||
use core::cmp;
|
use core::cmp;
|
||||||
|
|
||||||
|
use alloc::{collections::BTreeMap, string::String};
|
||||||
use modit::{Event, Key, Parser, TextObject, WordIter};
|
use modit::{Event, Key, Parser, TextObject, WordIter};
|
||||||
use unicode_segmentation::UnicodeSegmentation;
|
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
Action, AttrsList, BorrowedWithFontSystem, BufferRef, Change, Color, Cursor, Edit, FontSystem,
|
Action, AttrsList, BorrowedWithFontSystem, BufferRef, Change, Color, Cursor, Edit, FontSystem,
|
||||||
|
|
@ -9,6 +10,8 @@ use crate::{
|
||||||
};
|
};
|
||||||
|
|
||||||
pub use modit::{ViMode, ViParser};
|
pub use modit::{ViMode, ViParser};
|
||||||
|
#[cfg(feature = "swash")]
|
||||||
|
use unicode_segmentation::UnicodeSegmentation;
|
||||||
|
|
||||||
fn undo_2_action<'buffer, E: Edit<'buffer>>(
|
fn undo_2_action<'buffer, E: Edit<'buffer>>(
|
||||||
editor: &mut E,
|
editor: &mut E,
|
||||||
|
|
@ -599,7 +602,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn action(&mut self, font_system: &mut FontSystem, action: Action) {
|
fn action(&mut self, font_system: &mut FontSystem, action: Action) {
|
||||||
log::debug!("Action {:?}", action);
|
log::debug!("Action {action:?}");
|
||||||
|
|
||||||
let editor = &mut self.editor;
|
let editor = &mut self.editor;
|
||||||
|
|
||||||
|
|
@ -636,7 +639,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
|
||||||
Action::Unindent => Key::Backtab,
|
Action::Unindent => Key::Backtab,
|
||||||
Action::Motion(Motion::Up) => Key::Up,
|
Action::Motion(Motion::Up) => Key::Up,
|
||||||
_ => {
|
_ => {
|
||||||
log::debug!("Pass through action {:?}", action);
|
log::debug!("Pass through action {action:?}");
|
||||||
editor.action(font_system, action);
|
editor.action(font_system, action);
|
||||||
// Always finish change when passing through (TODO: group changes)
|
// Always finish change when passing through (TODO: group changes)
|
||||||
finish_change(
|
finish_change(
|
||||||
|
|
@ -652,7 +655,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
|
||||||
let has_selection = !matches!(editor.selection(), Selection::None);
|
let has_selection = !matches!(editor.selection(), Selection::None);
|
||||||
|
|
||||||
self.parser.parse(key, has_selection, |event| {
|
self.parser.parse(key, has_selection, |event| {
|
||||||
log::debug!(" Event {:?}", event);
|
log::debug!(" Event {event:?}");
|
||||||
let action = match event {
|
let action = match event {
|
||||||
Event::AutoIndent => {
|
Event::AutoIndent => {
|
||||||
log::info!("TODO: AutoIndent");
|
log::info!("TODO: AutoIndent");
|
||||||
|
|
@ -813,7 +816,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
|
||||||
editor.set_cursor(cursor);
|
editor.set_cursor(cursor);
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
log::info!("TODO: {:?}", text_object);
|
log::info!("TODO: {text_object:?}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
|
@ -1006,7 +1009,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
})
|
})
|
||||||
.last()
|
.next_back()
|
||||||
{
|
{
|
||||||
cursor.index = i;
|
cursor.index = i;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -454,7 +454,7 @@ impl<'a> FontFallbackIter<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
log::debug!("failed to find family '{}'", common_family);
|
log::debug!("failed to find family '{common_family}'");
|
||||||
}
|
}
|
||||||
|
|
||||||
//TODO: do we need to do this?
|
//TODO: do we need to do this?
|
||||||
|
|
|
||||||
|
|
@ -147,7 +147,7 @@ impl FontSystem {
|
||||||
/// Create a new [`FontSystem`] with a pre-specified set of fonts.
|
/// Create a new [`FontSystem`] with a pre-specified set of fonts.
|
||||||
pub fn new_with_fonts(fonts: impl IntoIterator<Item = fontdb::Source>) -> Self {
|
pub fn new_with_fonts(fonts: impl IntoIterator<Item = fontdb::Source>) -> Self {
|
||||||
let locale = Self::get_locale();
|
let locale = Self::get_locale();
|
||||||
log::debug!("Locale: {}", locale);
|
log::debug!("Locale: {locale}");
|
||||||
|
|
||||||
let mut db = fontdb::Database::new();
|
let mut db = fontdb::Database::new();
|
||||||
|
|
||||||
|
|
@ -370,7 +370,7 @@ impl FontSystem {
|
||||||
#[cfg(all(feature = "std", not(target_arch = "wasm32")))]
|
#[cfg(all(feature = "std", not(target_arch = "wasm32")))]
|
||||||
{
|
{
|
||||||
let elapsed = now.elapsed();
|
let elapsed = now.elapsed();
|
||||||
log::debug!("font matches for {:?} in {:?}", attrs, elapsed);
|
log::debug!("font matches for {attrs:?} in {elapsed:?}");
|
||||||
}
|
}
|
||||||
|
|
||||||
Arc::new(font_match_keys)
|
Arc::new(font_match_keys)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue