diff --git a/src/bidi_para.rs b/src/bidi_para.rs new file mode 100644 index 0000000..461665e --- /dev/null +++ b/src/bidi_para.rs @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT OR Apache-2.0 + +use unicode_bidi::{bidi_class, BidiClass, BidiInfo, ParagraphInfo}; + +/// An iterator over the paragraphs in the input text. +/// It is equivalent to [`core::str::Lines`] but follows `unicode-bidi` behaviour. +pub struct BidiParagraphs<'text> { + text: &'text str, + info: std::vec::IntoIter, +} + +impl<'text> BidiParagraphs<'text> { + /// Create an iterator to split the input text into paragraphs + /// in accordance with `unicode-bidi` behaviour. + pub fn new(text: &'text str) -> Self { + let info = BidiInfo::new(text, None); + let info = info.paragraphs.into_iter(); + Self { text, info } + } +} + +impl<'text> Iterator for BidiParagraphs<'text> { + type Item = &'text str; + + fn next(&mut self) -> Option { + let para = self.info.next()?; + let paragraph = &self.text[para.range]; + // `para.range` includes the newline that splits the line, so remove it if present + let mut char_indices = paragraph.char_indices(); + if let Some(i) = char_indices.next_back().and_then(|(i, c)| { + // `BidiClass::B` is a Paragraph_Separator (various newline characters) + (bidi_class(c) == BidiClass::B).then_some(i) + }) { + Some(¶graph[0..i]) + } else { + Some(paragraph) + } + } +} diff --git a/src/lib.rs b/src/lib.rs index f4b700e..a07eecf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -95,6 +95,9 @@ extern crate alloc; pub use self::attrs::*; mod attrs; +pub use self::bidi_para::*; +mod bidi_para; + pub use self::buffer::*; mod buffer;