Optimize BidiParagraphs with ASCII fast path (#408)

* Optimize BidiParagraphs with ASCII fast path - Added fast path for ASCII text that avoids BidiInfo allocation - Added some text shaping benchmarks

* refactor: fix clippy warnings and cleanup imports
This commit is contained in:
romanstingler 2025-08-11 21:15:09 +02:00 committed by GitHub
parent de355a1fd9
commit e80dbc3607
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 196 additions and 30 deletions

View file

@ -1,5 +1,6 @@
// SPDX-License-Identifier: MIT OR Apache-2.0
use alloc::vec::Vec;
use unicode_bidi::{bidi_class, BidiClass, BidiInfo, ParagraphInfo};
/// An iterator over the paragraphs in the input text.
@ -11,12 +12,46 @@ pub struct BidiParagraphs<'text> {
}
impl<'text> BidiParagraphs<'text> {
/// Create an iterator to split the input text into paragraphs
/// in accordance with `unicode-bidi` behaviour.
/// Create an iterator with optimized paragraph detection.
/// This version avoids `BidiInfo` allocation for simple ASCII text.
pub fn new(text: &'text str) -> Self {
let info = BidiInfo::new(text, None);
let info = info.paragraphs.into_iter();
Self { text, info }
// Fast path for simple ASCII text - just split on newlines
if text.is_ascii()
&& !text
.chars()
.any(|c| c.is_ascii_control() && c != '\n' && c != '\r' && c != '\t')
{
// For simple ASCII, we can avoid `BidiInfo` entirely
// Create minimal ParagraphInfo entries for each line
let mut paragraphs = Vec::new();
let mut start = 0;
for (i, c) in text.char_indices() {
if c == '\n' {
paragraphs.push(ParagraphInfo {
range: start..i,
level: unicode_bidi::Level::ltr(),
});
start = i + 1;
}
}
// Add final paragraph if text doesn't end with newline
if start < text.len() {
paragraphs.push(ParagraphInfo {
range: start..text.len(),
level: unicode_bidi::Level::ltr(),
});
}
let info = paragraphs.into_iter();
Self { text, info }
} else {
// Complex text - fall back to full `BidiInfo` analysis
let info = BidiInfo::new(text, None);
let info = info.paragraphs.into_iter();
Self { text, info }
}
}
}

View file

@ -5,10 +5,12 @@ use alloc::{string::String, vec::Vec};
use core::{cmp, fmt};
use unicode_segmentation::UnicodeSegmentation;
#[cfg(feature = "swash")]
use crate::Color;
use crate::{
Affinity, Align, Attrs, AttrsList, BidiParagraphs, BorrowedWithFontSystem, BufferLine, Color,
Cursor, FontSystem, LayoutCursor, LayoutGlyph, LayoutLine, LineEnding, LineIter, Motion,
Scroll, ShapeLine, Shaping, Wrap,
Affinity, Align, Attrs, AttrsList, BidiParagraphs, BorrowedWithFontSystem, BufferLine, Cursor,
FontSystem, LayoutCursor, LayoutGlyph, LayoutLine, LineEnding, LineIter, Motion, Scroll,
ShapeLine, Shaping, Wrap,
};
/// A line of visible text for rendering

View file

@ -1,20 +1,21 @@
// SPDX-License-Identifier: MIT OR Apache-2.0
#[cfg(not(feature = "std"))]
use alloc::{
string::{String, ToString},
vec::Vec,
};
use core::{cmp, iter::once};
use unicode_segmentation::UnicodeSegmentation;
#[cfg(feature = "swash")]
use crate::Color;
use crate::{
Action, Attrs, AttrsList, BorrowedWithFontSystem, BufferLine, BufferRef, Change, ChangeItem,
Cursor, Edit, FontSystem, LayoutRun, Selection, Shaping,
};
#[cfg(feature = "no_std")]
use alloc::{
string::{String, ToString},
vec::Vec,
};
#[cfg(feature = "swash")]
use std::cmp;
use core::iter::once;
use unicode_segmentation::UnicodeSegmentation;
/// A wrapper of [`Buffer`] for easy editing
#[derive(Debug, Clone)]
pub struct Editor<'buffer> {
@ -583,7 +584,7 @@ impl<'buffer> Edit<'buffer> for Editor<'buffer> {
Action::Insert(character) => {
if character.is_control() && !['\t', '\n', '\u{92}'].contains(&character) {
// Filter out special chars (except for tab), use Action instead
log::debug!("Refusing to insert control character {:?}", character);
log::debug!("Refusing to insert control character {character:?}");
} else if character == '\n' {
self.action(font_system, Action::Enter);
} else {

View file

@ -132,11 +132,11 @@ impl<'syntax_system, 'buffer> SyntaxEditor<'syntax_system, 'buffer> {
self.syntax = match self.syntax_system.syntax_set.find_syntax_for_file(path) {
Ok(Some(some)) => some,
Ok(None) => {
log::warn!("no syntax found for {:?}", path);
log::warn!("no syntax found for {path:?}");
self.syntax_system.syntax_set.find_syntax_plain_text()
}
Err(err) => {
log::warn!("failed to determine syntax for {:?}: {:?}", path, err);
log::warn!("failed to determine syntax for {path:?}: {err:?}");
self.syntax_system.syntax_set.find_syntax_plain_text()
}
};
@ -156,7 +156,7 @@ impl<'syntax_system, 'buffer> SyntaxEditor<'syntax_system, 'buffer> {
{
Some(some) => some,
None => {
log::warn!("no syntax found for {}", extension);
log::warn!("no syntax found for {extension:?}");
self.syntax_system.syntax_set.find_syntax_plain_text()
}
};

View file

@ -1,7 +1,8 @@
use alloc::{collections::BTreeMap, string::String};
#[cfg(feature = "no_std")]
use core::cmp;
use alloc::{collections::BTreeMap, string::String};
use modit::{Event, Key, Parser, TextObject, WordIter};
use unicode_segmentation::UnicodeSegmentation;
use crate::{
Action, AttrsList, BorrowedWithFontSystem, BufferRef, Change, Color, Cursor, Edit, FontSystem,
@ -9,6 +10,8 @@ use crate::{
};
pub use modit::{ViMode, ViParser};
#[cfg(feature = "swash")]
use unicode_segmentation::UnicodeSegmentation;
fn undo_2_action<'buffer, E: Edit<'buffer>>(
editor: &mut E,
@ -599,7 +602,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
}
fn action(&mut self, font_system: &mut FontSystem, action: Action) {
log::debug!("Action {:?}", action);
log::debug!("Action {action:?}");
let editor = &mut self.editor;
@ -636,7 +639,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
Action::Unindent => Key::Backtab,
Action::Motion(Motion::Up) => Key::Up,
_ => {
log::debug!("Pass through action {:?}", action);
log::debug!("Pass through action {action:?}");
editor.action(font_system, action);
// Always finish change when passing through (TODO: group changes)
finish_change(
@ -652,7 +655,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
let has_selection = !matches!(editor.selection(), Selection::None);
self.parser.parse(key, has_selection, |event| {
log::debug!(" Event {:?}", event);
log::debug!(" Event {event:?}");
let action = match event {
Event::AutoIndent => {
log::info!("TODO: AutoIndent");
@ -813,7 +816,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
editor.set_cursor(cursor);
}
_ => {
log::info!("TODO: {:?}", text_object);
log::info!("TODO: {text_object:?}");
}
}
return;
@ -1006,7 +1009,7 @@ impl<'buffer> Edit<'buffer> for ViEditor<'_, 'buffer> {
}
None
})
.last()
.next_back()
{
cursor.index = i;
}

View file

@ -454,7 +454,7 @@ impl<'a> FontFallbackIter<'a> {
}
}
}
log::debug!("failed to find family '{}'", common_family);
log::debug!("failed to find family '{common_family}'");
}
//TODO: do we need to do this?

View file

@ -147,7 +147,7 @@ impl FontSystem {
/// Create a new [`FontSystem`] with a pre-specified set of fonts.
pub fn new_with_fonts(fonts: impl IntoIterator<Item = fontdb::Source>) -> Self {
let locale = Self::get_locale();
log::debug!("Locale: {}", locale);
log::debug!("Locale: {locale}");
let mut db = fontdb::Database::new();
@ -370,7 +370,7 @@ impl FontSystem {
#[cfg(all(feature = "std", not(target_arch = "wasm32")))]
{
let elapsed = now.elapsed();
log::debug!("font matches for {:?} in {:?}", attrs, elapsed);
log::debug!("font matches for {attrs:?} in {elapsed:?}");
}
Arc::new(font_match_keys)