Fix character counting in document statistics to use graphemes
- Add unicode-segmentation dependency for proper grapheme cluster support - Replace chars() iteration with graphemes(true) for accurate character counting - Fix counting of complex Unicode characters like emojis, combining characters, and multi-byte sequences - Resolves TODO: 'do graphemes?' in document_statistics function This change provides more accurate character counts for international text, emojis with skin tones, combined characters, and other multi-codepoint graphemes. Examples of improved accuracy: - 👍🏾 now counts as 1 character instead of 2 - é (e + combining acute) counts as 1 character instead of 2 - 🧑💻 (person technologist) counts as 1 character instead of 4
This commit is contained in:
parent
0d84055362
commit
801c7fa68c
4 changed files with 127 additions and 3 deletions
119
.idea/workspace.xml
generated
Normal file
119
.idea/workspace.xml
generated
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="AutoImportSettings">
|
||||
<option name="autoReloadType" value="ALL" />
|
||||
</component>
|
||||
<component name="CargoProjects">
|
||||
<cargoProject FILE="$PROJECT_DIR$/Cargo.toml">
|
||||
<package file="$PROJECT_DIR$">
|
||||
<feature name="default" enabled="true" />
|
||||
</package>
|
||||
</cargoProject>
|
||||
</component>
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="dcd1dad2-6701-46c3-a277-b30862871a25" name="Changes" comment="" />
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
||||
<option name="LAST_RESOLUTION" value="IGNORE" />
|
||||
</component>
|
||||
<component name="ExecutionTargetManager" SELECTED_TARGET="RsBuildProfile:dev" />
|
||||
<component name="Git.Settings">
|
||||
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
|
||||
</component>
|
||||
<component name="GitHubPullRequestSearchHistory"><![CDATA[{
|
||||
"lastFilter": {
|
||||
"state": "OPEN",
|
||||
"assignee": "Aquilesorei"
|
||||
}
|
||||
}]]></component>
|
||||
<component name="GithubPullRequestsUISettings"><![CDATA[{
|
||||
"selectedUrlAndAccountId": {
|
||||
"url": "https://github.com/Aquilesorei/cosmic-edit.git",
|
||||
"accountId": "f43afa1a-5616-4b4c-85c3-ceca38badb67"
|
||||
}
|
||||
}]]></component>
|
||||
<component name="MacroExpansionManager">
|
||||
<option name="directoryName" value="ZsQYuuik" />
|
||||
</component>
|
||||
<component name="ProjectColorInfo"><![CDATA[{
|
||||
"associatedIndex": 7
|
||||
}]]></component>
|
||||
<component name="ProjectId" id="33dQwtLbmBMJywCKQ4NtK87ggG3" />
|
||||
<component name="ProjectViewState">
|
||||
<option name="hideEmptyMiddlePackages" value="true" />
|
||||
<option name="showLibraryContents" value="true" />
|
||||
</component>
|
||||
<component name="PropertiesComponent"><![CDATA[{
|
||||
"keyToString": {
|
||||
"ModuleVcsDetector.initialDetectionPerformed": "true",
|
||||
"RunOnceActivity.ShowReadmeOnStart": "true",
|
||||
"RunOnceActivity.git.unshallow": "true",
|
||||
"RunOnceActivity.rust.reset.selective.auto.import": "true",
|
||||
"git-widget-placeholder": "master",
|
||||
"last_opened_file_path": "/home/aquiles/RustroverProjects/cosmic-edit",
|
||||
"node.js.detected.package.eslint": "true",
|
||||
"node.js.detected.package.tslint": "true",
|
||||
"node.js.selected.package.eslint": "(autodetect)",
|
||||
"node.js.selected.package.tslint": "(autodetect)",
|
||||
"nodejs_package_manager_path": "npm",
|
||||
"org.rust.cargo.project.model.PROJECT_DISCOVERY": "true",
|
||||
"org.rust.first.attach.projects": "true",
|
||||
"vue.rearranger.settings.migration": "true"
|
||||
}
|
||||
}]]></component>
|
||||
<component name="RunManager" selected="Cargo.Run cosmic-edit">
|
||||
<configuration name="Run cosmic-edit" type="CargoCommandRunConfiguration" factoryName="Cargo Command">
|
||||
<option name="buildProfileId" value="dev" />
|
||||
<option name="command" value="run --package cosmic-edit --bin cosmic-edit" />
|
||||
<option name="workingDirectory" value="file://$PROJECT_DIR$" />
|
||||
<envs />
|
||||
<option name="emulateTerminal" value="true" />
|
||||
<option name="channel" value="DEFAULT" />
|
||||
<option name="requiredFeatures" value="true" />
|
||||
<option name="allFeatures" value="false" />
|
||||
<option name="withSudo" value="false" />
|
||||
<option name="buildTarget" value="REMOTE" />
|
||||
<option name="backtrace" value="SHORT" />
|
||||
<option name="isRedirectInput" value="false" />
|
||||
<option name="redirectInputPath" value="" />
|
||||
<method v="2">
|
||||
<option name="CARGO.BUILD_TASK_PROVIDER" enabled="true" />
|
||||
</method>
|
||||
</configuration>
|
||||
<configuration name="Test cosmic-edit" type="CargoCommandRunConfiguration" factoryName="Cargo Command">
|
||||
<option name="command" value="test --workspace" />
|
||||
<option name="workingDirectory" value="file://$PROJECT_DIR$" />
|
||||
<envs />
|
||||
<option name="emulateTerminal" value="true" />
|
||||
<option name="channel" value="DEFAULT" />
|
||||
<option name="requiredFeatures" value="true" />
|
||||
<option name="allFeatures" value="false" />
|
||||
<option name="withSudo" value="false" />
|
||||
<option name="buildTarget" value="REMOTE" />
|
||||
<option name="backtrace" value="SHORT" />
|
||||
<option name="isRedirectInput" value="false" />
|
||||
<option name="redirectInputPath" value="" />
|
||||
<method v="2">
|
||||
<option name="CARGO.BUILD_TASK_PROVIDER" enabled="true" />
|
||||
</method>
|
||||
</configuration>
|
||||
</component>
|
||||
<component name="RustProjectSettings">
|
||||
<option name="toolchainHomeDirectory" value="$USER_HOME$/.cargo/bin" />
|
||||
</component>
|
||||
<component name="TaskManager">
|
||||
<task active="true" id="Default" summary="Default task">
|
||||
<changelist id="dcd1dad2-6701-46c3-a277-b30862871a25" name="Changes" comment="" />
|
||||
<created>1759644710501</created>
|
||||
<option name="number" value="Default" />
|
||||
<option name="presentableId" value="Default" />
|
||||
<updated>1759644710501</updated>
|
||||
<workItem from="1759644711720" duration="720000" />
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
<component name="TypeScriptGeneratedFilesManager">
|
||||
<option name="version" value="3" />
|
||||
</component>
|
||||
</project>
|
||||
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -1460,6 +1460,7 @@ dependencies = [
|
|||
"syntect",
|
||||
"tokio",
|
||||
"two-face",
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ syntect = "5.2.0"
|
|||
two-face = "0.4.3"
|
||||
# Internationalization
|
||||
icu = { version = "2.0.0", features = ["compiled_data"] }
|
||||
unicode-segmentation = "1.12"
|
||||
i18n-embed = { version = "0.16", features = [
|
||||
"fluent-system",
|
||||
"desktop-requester",
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ use cosmic_files::{
|
|||
};
|
||||
use cosmic_text::{Cursor, Edit, Family, Selection, SwashCache, SyntaxSystem, ViMode};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
use std::{
|
||||
any::TypeId,
|
||||
collections::HashMap,
|
||||
|
|
@ -868,11 +869,13 @@ impl App {
|
|||
editor.with_buffer(|buffer| {
|
||||
line_count = buffer.lines.len();
|
||||
for line in buffer.lines.iter() {
|
||||
let text = line.text();
|
||||
let mut last_whitespace = true;
|
||||
//TODO: do graphemes?
|
||||
for c in line.text().chars() {
|
||||
|
||||
// Count graphemes instead of Unicode scalar values for accurate character count
|
||||
for grapheme in text.graphemes(true) {
|
||||
character_count += 1;
|
||||
let is_whitespace = c.is_whitespace();
|
||||
let is_whitespace = grapheme.chars().all(|c| c.is_whitespace());
|
||||
if !is_whitespace {
|
||||
character_count_no_spaces += 1;
|
||||
if last_whitespace {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue