From 2610c869f677f2cf0093877f1fc0511abe0ab6bf Mon Sep 17 00:00:00 2001 From: valadaptive <79560998+valadaptive@users.noreply.github.com> Date: Mon, 8 Sep 2025 23:15:27 -0400 Subject: [PATCH] Replace rustybuzz with HarfRust (#417) * Use HarfRust for shaping * Replace ttf-parser with skrifa entirely * Fix clippy lints * Add shape plan cache * Bump harfrust and skrifa * Fix no_std build * Simplify the shape plan cache * Please the paperclip * Cache font ID with plan * Tune shape plan cache for "BiDi Processing" bench --- Cargo.toml | 12 +- README.md | 4 +- benches/text_shaping_benchmarks.rs | 23 ++++ src/font/mod.rs | 193 +++++++++++++++++------------ src/font/system.rs | 32 ++--- src/lib.rs | 2 +- src/shape.rs | 76 ++++++++---- 7 files changed, 221 insertions(+), 121 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a43caac..31a9c5c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,25 +7,25 @@ edition = "2021" license = "MIT OR Apache-2.0" documentation = "https://docs.rs/cosmic-text/latest/cosmic_text/" repository = "https://github.com/pop-os/cosmic-text" -rust-version = "1.75" +rust-version = "1.80" [dependencies] bitflags = "2.4.1" core_maths = { version = "0.1.1", optional = true } cosmic_undo_2 = { version = "0.2.0", optional = true } fontdb = { version = "0.23", default-features = false } +harfrust = { version = "0.2.0", default-features = false } hashbrown = { version = "0.14.1", optional = true, default-features = false } libm = { version = "0.2.8", optional = true } log = "0.4.20" modit = { version = "0.1.4", optional = true } rangemap = "1.4.0" rustc-hash = { version = "1.1.0", default-features = false } -rustybuzz = { version = "0.14", default-features = false, features = ["libm"] } self_cell = "1.0.1" +skrifa = { version = "0.36.0", default-features = false } smol_str = { version = "0.2.2", default-features = false } syntect = { version = "5.1.0", optional = true } sys-locale = { version = "0.3.1", optional = true } -ttf-parser = { version = "0.21", default-features = false } unicode-linebreak = "0.1.5" unicode-script = "0.5.5" unicode-segmentation = "1.10.1" @@ -49,16 +49,16 @@ optional = true default = ["std", "swash", "fontconfig"] fontconfig = ["fontdb/fontconfig", "std"] monospace_fallback = [] -no_std = ["rustybuzz/libm", "hashbrown", "dep:libm", "core_maths"] +no_std = ["hashbrown", "dep:libm", "skrifa/libm", "core_maths"] peniko = ["dep:peniko"] shape-run-cache = [] std = [ "fontdb/memmap", "fontdb/std", - "rustybuzz/std", + "harfrust/std", + "skrifa/std", "swash?/std", "sys-locale", - "ttf-parser/std", "unicode-bidi/std", ] vi = ["modit", "syntect", "cosmic_undo_2"] diff --git a/README.md b/README.md index 979a333..adb5ff8 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Pure Rust multi-line text handling. COSMIC Text provides advanced text shaping, layout, and rendering wrapped up -into a simple abstraction. Shaping is provided by rustybuzz, and supports a +into a simple abstraction. Shaping is provided by HarfRust, and supports a wide variety of advanced shaping operations. Rendering is provided by swash, which supports ligatures and color emoji. Layout is implemented custom, in safe Rust, and supports bidirectional text. Font fallback is also a custom @@ -37,7 +37,7 @@ The following features must be supported before this is "ready": - [x] Text styles (bold, italic, etc.) - [x] Per-buffer - [x] Per-span -- [x] Font shaping (using rustybuzz) +- [x] Font shaping (using HarfRust) - [x] Cache results - [x] RTL - [x] Bidirectional rendering diff --git a/benches/text_shaping_benchmarks.rs b/benches/text_shaping_benchmarks.rs index 723cc7f..f259182 100644 --- a/benches/text_shaping_benchmarks.rs +++ b/benches/text_shaping_benchmarks.rs @@ -44,6 +44,28 @@ fn bench_bidi_processing(c: &mut Criterion) { }); } +fn bench_lang_mixed(c: &mut Criterion) { + let mut fs = ct::FontSystem::new(); + let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0)); + buffer.set_size(&mut fs, Some(500.0), None); + + let bidi_text = include_str!("../sample/hello.txt"); + + c.benchmark_group("bench_lang_mixed") + .sample_size(10) + .bench_function("ShapeLine/Mixed-Language Text", |b| { + b.iter(|| { + buffer.set_text( + &mut fs, + black_box(&bidi_text), + &ct::Attrs::new(), + ct::Shaping::Advanced, + ); + buffer.shape_until_scroll(&mut fs, false); + }); + }); +} + fn bench_layout_heavy(c: &mut Criterion) { let mut fs = ct::FontSystem::new(); let mut buffer = ct::Buffer::new(&mut fs, ct::Metrics::new(14.0, 20.0)); @@ -117,6 +139,7 @@ criterion_group!( benches, bench_ascii_fast_path, bench_bidi_processing, + bench_lang_mixed, bench_layout_heavy, bench_combined_stress, bench_bidi_paragraphs_ascii, diff --git a/src/font/mod.rs b/src/font/mod.rs index 2f97704..3b7d53b 100644 --- a/src/font/mod.rs +++ b/src/font/mod.rs @@ -1,7 +1,10 @@ // SPDX-License-Identifier: MIT OR Apache-2.0 -// re-export ttf_parser -pub use ttf_parser; +use harfrust::Shaper; +use skrifa::raw::{ReadError, TableProvider as _}; +use skrifa::{metrics::Metrics, prelude::*}; +// re-export skrifa +pub use skrifa; // re-export peniko::Font; #[cfg(feature = "peniko")] pub use peniko::Font as PenikoFont; @@ -12,7 +15,6 @@ use alloc::sync::Arc; #[cfg(not(feature = "std"))] use alloc::vec::Vec; -use rustybuzz::Face as RustybuzzFace; use self_cell::self_cell; pub mod fallback; @@ -21,12 +23,19 @@ pub use fallback::{Fallback, PlatformFallback}; pub use self::system::*; mod system; +struct OwnedFaceData { + data: Arc + Send + Sync>, + shaper_data: harfrust::ShaperData, + shaper_instance: harfrust::ShaperInstance, + metrics: Metrics, +} + self_cell!( struct OwnedFace { - owner: Arc + Send + Sync>, + owner: OwnedFaceData, #[covariant] - dependent: RustybuzzFace, + dependent: Shaper, } ); @@ -40,7 +49,7 @@ struct FontMonospaceFallback { pub struct Font { #[cfg(feature = "swash")] swash: (u32, swash::CacheKey), - rustybuzz: OwnedFace, + harfrust: OwnedFace, #[cfg(not(feature = "peniko"))] data: Arc + Send + Sync>, #[cfg(feature = "peniko")] @@ -89,8 +98,16 @@ impl Font { } } - pub fn rustybuzz(&self) -> &RustybuzzFace<'_> { - self.rustybuzz.borrow_dependent() + pub fn shaper(&self) -> &harfrust::Shaper<'_> { + self.harfrust.borrow_dependent() + } + + pub(crate) fn shaper_instance(&self) -> &harfrust::ShaperInstance { + &self.harfrust.borrow_owner().shaper_instance + } + + pub fn metrics(&self) -> &Metrics { + &self.harfrust.borrow_owner().metrics } #[cfg(feature = "peniko")] @@ -113,59 +130,6 @@ impl Font { pub fn new(db: &fontdb::Database, id: fontdb::ID, weight: fontdb::Weight) -> Option { let info = db.face(id)?; - let monospace_fallback = if cfg!(feature = "monospace_fallback") { - db.with_face_data(id, |font_data, face_index| { - let face = ttf_parser::Face::parse(font_data, face_index).ok()?; - let monospace_em_width = info - .monospaced - .then(|| { - let hor_advance = face.glyph_hor_advance(face.glyph_index(' ')?)?; - let upem = face.units_per_em(); - Some(f32::from(hor_advance) / f32::from(upem)) - }) - .flatten(); - - if info.monospaced && monospace_em_width.is_none() { - None?; - } - - let scripts = face - .tables() - .gpos - .into_iter() - .chain(face.tables().gsub) - .flat_map(|table| table.scripts) - .map(|script| script.tag.to_bytes()) - .collect(); - - let mut unicode_codepoints = Vec::new(); - - face.tables() - .cmap? - .subtables - .into_iter() - .filter(ttf_parser::cmap::Subtable::is_unicode) - .for_each(|subtable| { - unicode_codepoints.reserve(1024); - subtable.codepoints(|code_point| { - if subtable.glyph_index(code_point).is_some() { - unicode_codepoints.push(code_point); - } - }); - }); - - unicode_codepoints.shrink_to_fit(); - - Some(FontMonospaceFallback { - monospace_em_width, - scripts, - unicode_codepoints, - }) - })? - } else { - None - }; - let data = match &info.source { fontdb::Source::Binary(data) => Arc::clone(data), #[cfg(feature = "std")] @@ -177,6 +141,77 @@ impl Font { fontdb::Source::SharedFile(_path, data) => Arc::clone(data), }; + // It's a bit unfortunate but we need to parse the data into a `FontRef` + // twice--once to construct the HarfRust `ShaperInstance` and + // `ShaperData`, and once to create the persistent `FontRef` tied to the + // lifetime of the face data. + let font_ref = FontRef::from_index((*data).as_ref(), info.index).ok()?; + let location = font_ref + .axes() + .location([(Tag::new(b"wght"), weight.0 as f32)]); + let metrics = font_ref.metrics(Size::unscaled(), &location); + + let monospace_fallback = if cfg!(feature = "monospace_fallback") { + (|| { + let glyph_metrics = font_ref.glyph_metrics(Size::unscaled(), &location); + let charmap = font_ref.charmap(); + let monospace_em_width = info + .monospaced + .then(|| { + let hor_advance = glyph_metrics.advance_width(charmap.map(' ')?)?; + let upem = metrics.units_per_em; + Some(hor_advance / f32::from(upem)) + }) + .flatten(); + + if info.monospaced && monospace_em_width.is_none() { + None?; + } + + let scripts = font_ref + .gpos() + .ok()? + .script_list() + .ok()? + .script_records() + .iter() + .chain( + font_ref + .gsub() + .ok()? + .script_list() + .ok()? + .script_records() + .iter(), + ) + .map(|script| script.script_tag().into_bytes()) + .collect(); + + let mut unicode_codepoints = Vec::new(); + + for (code_point, _) in charmap.mappings() { + unicode_codepoints.push(code_point); + } + + unicode_codepoints.shrink_to_fit(); + + Some(FontMonospaceFallback { + monospace_em_width, + scripts, + unicode_codepoints, + }) + })() + } else { + None + }; + + let (shaper_instance, shaper_data) = { + ( + harfrust::ShaperInstance::from_coords(&font_ref, location.coords().iter().copied()), + harfrust::ShaperData::new(&font_ref), + ) + }; + Some(Self { id: info.id, monospace_fallback, @@ -185,21 +220,27 @@ impl Font { let swash = swash::FontRef::from_index((*data).as_ref(), info.index as usize)?; (swash.offset, swash.key) }, - rustybuzz: OwnedFace::try_new(Arc::clone(&data), |data| { - RustybuzzFace::from_slice((**data).as_ref(), info.index) - .ok_or(()) - .map(|mut face| { - if let Some(axis) = face - .variation_axes() - .into_iter() - .find(|axis| axis.tag == ttf_parser::Tag::from_bytes(b"wght")) - { - let wght = f32::from(weight.0).clamp(axis.min_value, axis.max_value); - let _ = face.set_variation(ttf_parser::Tag::from_bytes(b"wght"), wght); - } - face - }) - }) + harfrust: OwnedFace::try_new( + OwnedFaceData { + data: Arc::clone(&data), + shaper_data, + shaper_instance, + metrics, + }, + |OwnedFaceData { + data, + shaper_data, + shaper_instance, + .. + }| { + let font_ref = FontRef::from_index((**data).as_ref(), info.index)?; + let shaper = shaper_data + .shaper(&font_ref) + .instance(Some(shaper_instance)) + .build(); + Ok::<_, ReadError>(shaper) + }, + ) .ok()?, #[cfg(not(feature = "peniko"))] data, diff --git a/src/font/system.rs b/src/font/system.rs index 6ddc24d..7ec0cfe 100644 --- a/src/font/system.rs +++ b/src/font/system.rs @@ -7,10 +7,11 @@ use alloc::vec::Vec; use core::fmt; use core::ops::{Deref, DerefMut}; use fontdb::Query; +use skrifa::raw::{ReadError, TableProvider as _}; -// re-export fontdb and rustybuzz +// re-export fontdb and harfrust pub use fontdb; -pub use rustybuzz; +pub use harfrust; use super::fallback::{Fallback, Fallbacks, MonospaceFallbackInfo, PlatformFallback}; @@ -182,19 +183,20 @@ impl FontSystem { if cfg!(feature = "monospace_fallback") { for &id in &monospace_font_ids { db.with_face_data(id, |font_data, face_index| { - let _ = ttf_parser::Face::parse(font_data, face_index).map(|face| { - face.tables() - .gpos - .into_iter() - .chain(face.tables().gsub) - .flat_map(|table| table.scripts) - .inspect(|script| { - per_script_monospace_font_ids - .entry(script.tag.to_bytes()) - .or_default() - .insert(id); - }) - }); + let face = skrifa::FontRef::from_index(font_data, face_index)?; + for script in face + .gpos()? + .script_list()? + .script_records() + .iter() + .chain(face.gsub()?.script_list()?.script_records().iter()) + { + per_script_monospace_font_ids + .entry(script.script_tag().into_bytes()) + .or_default() + .insert(id); + } + Ok::<_, ReadError>(()) }); } } diff --git a/src/lib.rs b/src/lib.rs index 37ae433..1ce9f14 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,7 @@ //! //! This library provides advanced text handling in a generic way. It provides abstractions for //! shaping, font discovery, font fallback, layout, rasterization, and editing. Shaping utilizes -//! rustybuzz, font discovery utilizes fontdb, and the rasterization is optional and utilizes +//! harfrust, font discovery utilizes fontdb, and the rasterization is optional and utilizes //! swash. The other features are developed internal to this library. //! //! It is recommended that you start by creating a [`FontSystem`], after which you can create a diff --git a/src/shape.rs b/src/shape.rs index 91365a5..ff654b1 100644 --- a/src/shape.rs +++ b/src/shape.rs @@ -10,6 +10,7 @@ use crate::{ #[cfg(not(feature = "std"))] use alloc::vec::Vec; +use alloc::collections::VecDeque; use core::cmp::{max, min}; use core::fmt; use core::mem; @@ -78,11 +79,17 @@ impl Shaping { } } +const NUM_SHAPE_PLANS: usize = 6; + /// A set of buffers containing allocations for shaped text. #[derive(Default)] pub struct ShapeBuffer { + /// Cache for harfrust shape plans. Stores up to [`NUM_SHAPE_PLANS`] plans at once. Inserting a new one past that + /// will remove the one that was least recently added (not least recently used). + shape_plan_cache: VecDeque<(fontdb::ID, harfrust::ShapePlan)>, + /// Buffer for holding unicode text. - rustybuzz_buffer: Option, + harfrust_buffer: Option, /// Temporary buffers for scripts. scripts: Vec