From 801c7fa68c37d7fa61c9b060b6052c6d9e8780fc Mon Sep 17 00:00:00 2001
From: aquiles <achillezongo07@gmail.com>
Date: Sun, 5 Oct 2025 06:39:31 +0000
Subject: [PATCH] Fix character counting in document statistics to use
 graphemes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add unicode-segmentation dependency for proper grapheme cluster support
- Replace chars() iteration with graphemes(true) for accurate character counting
- Fix counting of complex Unicode characters like emojis, combining characters, and multi-byte sequences
- Resolves TODO: 'do graphemes?' in document_statistics function

This change provides more accurate character counts for international text,
emojis with skin tones, combined characters, and other multi-codepoint graphemes.

Examples of improved accuracy:
- 👍🏾 now counts as 1 character instead of 2
- é (e + combining acute) counts as 1 character instead of 2
- 🧑‍💻 (person technologist) counts as 1 character instead of 4
---
 .idea/workspace.xml | 119 ++++++++++++++++++++++++++++++++++++++++++++
 Cargo.lock          |   1 +
 Cargo.toml          |   1 +
 src/main.rs         |   9 ++--
 4 files changed, 127 insertions(+), 3 deletions(-)
 create mode 100644 .idea/workspace.xml
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
new file mode 100644
index 0000000..8e7a315
--- /dev/null
+++ b/.idea/workspace.xml
@@ -0,0 +1,119 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="AutoImportSettings">
+    <option name="autoReloadType" value="ALL" />
+  </component>
+  <component name="CargoProjects">
+    <cargoProject FILE="$PROJECT_DIR$/Cargo.toml">
+      <package file="$PROJECT_DIR$">
+        <feature name="default" enabled="true" />
+      </package>
+    </cargoProject>
+  </component>
+  <component name="ChangeListManager">
+    <list default="true" id="dcd1dad2-6701-46c3-a277-b30862871a25" name="Changes" comment="" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="ExecutionTargetManager" SELECTED_TARGET="RsBuildProfile:dev" />
+  <component name="Git.Settings">
+    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
+  </component>
+  <component name="GitHubPullRequestSearchHistory"><![CDATA[{
+  "lastFilter": {
+    "state": "OPEN",
+    "assignee": "Aquilesorei"
+  }
+}]]></component>
+  <component name="GithubPullRequestsUISettings"><![CDATA[{
+  "selectedUrlAndAccountId": {
+    "url": "https://github.com/Aquilesorei/cosmic-edit.git",
+    "accountId": "f43afa1a-5616-4b4c-85c3-ceca38badb67"
+  }
+}]]></component>
+  <component name="MacroExpansionManager">
+    <option name="directoryName" value="ZsQYuuik" />
+  </component>
+  <component name="ProjectColorInfo"><![CDATA[{
+  "associatedIndex": 7
+}]]></component>
+  <component name="ProjectId" id="33dQwtLbmBMJywCKQ4NtK87ggG3" />
+  <component name="ProjectViewState">
+    <option name="hideEmptyMiddlePackages" value="true" />
+    <option name="showLibraryContents" value="true" />
+  </component>
+  <component name="PropertiesComponent"><![CDATA[{
+  "keyToString": {
+    "ModuleVcsDetector.initialDetectionPerformed": "true",
+    "RunOnceActivity.ShowReadmeOnStart": "true",
+    "RunOnceActivity.git.unshallow": "true",
+    "RunOnceActivity.rust.reset.selective.auto.import": "true",
+    "git-widget-placeholder": "master",
+    "last_opened_file_path": "/home/aquiles/RustroverProjects/cosmic-edit",
+    "node.js.detected.package.eslint": "true",
+    "node.js.detected.package.tslint": "true",
+    "node.js.selected.package.eslint": "(autodetect)",
+    "node.js.selected.package.tslint": "(autodetect)",
+    "nodejs_package_manager_path": "npm",
+    "org.rust.cargo.project.model.PROJECT_DISCOVERY": "true",
+    "org.rust.first.attach.projects": "true",
+    "vue.rearranger.settings.migration": "true"
+  }
+}]]></component>
+  <component name="RunManager" selected="Cargo.Run cosmic-edit">
+    <configuration name="Run cosmic-edit" type="CargoCommandRunConfiguration" factoryName="Cargo Command">
+      <option name="buildProfileId" value="dev" />
+      <option name="command" value="run --package cosmic-edit --bin cosmic-edit" />
+      <option name="workingDirectory" value="file://$PROJECT_DIR$" />
+      <envs />
+      <option name="emulateTerminal" value="true" />
+      <option name="channel" value="DEFAULT" />
+      <option name="requiredFeatures" value="true" />
+      <option name="allFeatures" value="false" />
+      <option name="withSudo" value="false" />
+      <option name="buildTarget" value="REMOTE" />
+      <option name="backtrace" value="SHORT" />
+      <option name="isRedirectInput" value="false" />
+      <option name="redirectInputPath" value="" />
+      <method v="2">
+        <option name="CARGO.BUILD_TASK_PROVIDER" enabled="true" />
+      </method>
+    </configuration>
+    <configuration name="Test cosmic-edit" type="CargoCommandRunConfiguration" factoryName="Cargo Command">
+      <option name="command" value="test --workspace" />
+      <option name="workingDirectory" value="file://$PROJECT_DIR$" />
+      <envs />
+      <option name="emulateTerminal" value="true" />
+      <option name="channel" value="DEFAULT" />
+      <option name="requiredFeatures" value="true" />
+      <option name="allFeatures" value="false" />
+      <option name="withSudo" value="false" />
+      <option name="buildTarget" value="REMOTE" />
+      <option name="backtrace" value="SHORT" />
+      <option name="isRedirectInput" value="false" />
+      <option name="redirectInputPath" value="" />
+      <method v="2">
+        <option name="CARGO.BUILD_TASK_PROVIDER" enabled="true" />
+      </method>
+    </configuration>
+  </component>
+  <component name="RustProjectSettings">
+    <option name="toolchainHomeDirectory" value="$USER_HOME$/.cargo/bin" />
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="dcd1dad2-6701-46c3-a277-b30862871a25" name="Changes" comment="" />
+      <created>1759644710501</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1759644710501</updated>
+      <workItem from="1759644711720" duration="720000" />
+    </task>
+    <servers />
+  </component>
+  <component name="TypeScriptGeneratedFilesManager">
+    <option name="version" value="3" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
index fbe15d1..8722262 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1460,6 +1460,7 @@ dependencies = [
  "syntect",
  "tokio",
  "two-face",
+ "unicode-segmentation",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index b425a5f..b49aaf9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,6 +24,7 @@ syntect = "5.2.0"
 two-face = "0.4.3"
 # Internationalization
 icu = { version = "2.0.0", features = ["compiled_data"] }
+unicode-segmentation = "1.12"
 i18n-embed = { version = "0.16", features = [
     "fluent-system",
     "desktop-requester",
diff --git a/src/main.rs b/src/main.rs
index 8dc144a..1aa2005 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -27,6 +27,7 @@ use cosmic_files::{
 };
 use cosmic_text::{Cursor, Edit, Family, Selection, SwashCache, SyntaxSystem, ViMode};
 use serde::{Deserialize, Serialize};
+use unicode_segmentation::UnicodeSegmentation;
 use std::{
     any::TypeId,
     collections::HashMap,
@@ -868,11 +869,13 @@ impl App {
             editor.with_buffer(|buffer| {
                 line_count = buffer.lines.len();
                 for line in buffer.lines.iter() {
+                    let text = line.text();
                     let mut last_whitespace = true;
-                    //TODO: do graphemes?
-                    for c in line.text().chars() {
+                    
+                    // Count graphemes instead of Unicode scalar values for accurate character count
+                    for grapheme in text.graphemes(true) {
                         character_count += 1;
-                        let is_whitespace = c.is_whitespace();
+                        let is_whitespace = grapheme.chars().all(|c| c.is_whitespace());
                         if !is_whitespace {
                             character_count_no_spaces += 1;
                             if last_whitespace {