diff --git a/README.md b/README.md
index b492da8..e7b0b19 100644
--- a/README.md
+++ b/README.md
@@ -78,9 +78,10 @@ from durak import process_text
 
 result = process_text(
     "Türkiye'de NLP zor!",
-    steps=["clean", "tokenize", "remove_stopwords"]
+    remove_stopwords=True,
+    attach_suffixes=True,
 )
-# ["türkiye'de", "nlp", "zor", "!"]
+# result.tokens => ["türkiye'de", "nlp", "zor", "!"]
 ```
 
 ### Build Blocks à la Carte
diff --git a/docs/USER_GUIDE.md b/docs/USER_GUIDE.md
index e0d44f0..b9f800e 100644
--- a/docs/USER_GUIDE.md
+++ b/docs/USER_GUIDE.md
@@ -77,6 +77,7 @@ config = ProcessorConfig(emoji_mode="remove")
 
 # Extract emojis separately
 config = ProcessorConfig(emoji_mode="extract")
+processor = TextProcessor(config)
 result = processor.process("Harika! 🎉")
 print(result.tokens)   # ['harika']
 print(result.emojis)   # ['🎉']
diff --git a/python/durak/ottoman/processor.py b/python/durak/ottoman/processor.py
index 0dc7f41..fe94364 100644
--- a/python/durak/ottoman/processor.py
+++ b/python/durak/ottoman/processor.py
@@ -335,11 +335,22 @@ def process(self, text: str) -> OttomanProcessingResult:
                 else:
                     output_token = original_token
                 
-                # Create mapping for this token
+                # Create mapping for this token using document-level offsets
+                if orig_start >= 0 and orig_end >= orig_start:
+                    # Use document-level offsets derived from the full mapping
+                    token_char_mappings = [
+                        (orig_start, orig_end, trans_idx, trans_idx + len(token))
+                    ]
+                else:
+                    # Fallback: use token-local offsets if we could not resolve global ones
+                    token_char_mappings = [
+                        (0, len(original_token), 0, len(token))
+                    ]
+
                 token_mapping = TransliterationMapping(
                     original=original_token,
                     transliterated=token,
-                    char_mappings=[(0, len(original_token), 0, len(token))],
+                    char_mappings=token_char_mappings,
                 )
                 
                 processed_tokens.append(output_token)
@@ -363,7 +374,20 @@ def process(self, text: str) -> OttomanProcessingResult:
                 ))
                 script_types.append(script_type)
         
-        # Step 6: Remove stopwords (using processed/modern tokens for matching)
+        # Step 6: Strip custom suffixes from processed tokens
+        if self.suffixes:
+            sorted_suffixes = sorted(self.suffixes, key=len, reverse=True)
+            stripped = []
+            for token in processed_tokens:
+                # Remove the longest matching suffix (at most one per token)
+                for suffix in sorted_suffixes:
+                    if token.endswith(suffix) and len(token) > len(suffix):
+                        token = token[: -len(suffix)]
+                        break
+                stripped.append(token)
+            processed_tokens = stripped
+
+        # Step 7: Remove stopwords (using processed/modern tokens for matching)
         if self.stopwords:
             filtered_indices = [
                 i for i, token in enumerate(processed_tokens)
diff --git a/python/durak/ottoman/transliterator.py b/python/durak/ottoman/transliterator.py
index c30e0e6..0cf777e 100644
--- a/python/durak/ottoman/transliterator.py
+++ b/python/durak/ottoman/transliterator.py
@@ -165,6 +165,7 @@ def arabic_to_latin(self, text: str) -> TransliterationMapping:
         mappings = []
         ambiguous = []
         
+        trans_pos = 0
         i = 0
         while i < len(text):
             char = text[i]
@@ -186,19 +187,21 @@ def arabic_to_latin(self, text: str) -> TransliterationMapping:
                 if char in AMBIGUOUS_MAPPINGS:
                     ambiguous.append((i, char, latin_char))
                 
-                # Record mapping
-                trans_start = len("".join(transliterated_chars))
+                # Record mapping using running counter instead of O(n) join
+                trans_start = trans_pos
                 transliterated_chars.append(latin_char)
-                trans_end = len("".join(transliterated_chars))
+                trans_pos += len(latin_char)
+                trans_end = trans_pos
                 
                 mappings.append((i, i + 1, trans_start, trans_end))
                 i += 1
             else:
                 # Non-Arabic character (space, punctuation, etc.)
                 # Pass through but record mapping
-                trans_start = len("".join(transliterated_chars))
+                trans_start = trans_pos
                 transliterated_chars.append(char)
-                trans_end = len("".join(transliterated_chars))
+                trans_pos += len(char)
+                trans_end = trans_pos
                 
                 mappings.append((i, i + 1, trans_start, trans_end))
                 i += 1
@@ -225,6 +228,7 @@ def scholarly_to_modern(self, text: str) -> TransliterationMapping:
         result_chars = []
         mappings = []
         
+        trans_pos = 0
         i = 0
         while i < len(text):
             char = text[i]
@@ -237,16 +241,18 @@ def scholarly_to_modern(self, text: str) -> TransliterationMapping:
                     # Keep original if preserving and would be removed
                     modern = char
                 
-                # Record mapping
-                trans_start = len("".join(result_chars))
+                # Record mapping using running counter instead of O(n) join
+                trans_start = trans_pos
                 result_chars.append(modern)
-                trans_end = len("".join(result_chars))
+                trans_pos += len(modern)
+                trans_end = trans_pos
                 mappings.append((i, i + 1, trans_start, trans_end))
             else:
                 # Pass through unchanged (regular Latin letters, spaces, etc.)
-                trans_start = len("".join(result_chars))
+                trans_start = trans_pos
                 result_chars.append(char)
-                trans_end = len("".join(result_chars))
+                trans_pos += len(char)
+                trans_end = trans_pos
                 mappings.append((i, i + 1, trans_start, trans_end))
             
             i += 1
diff --git a/python/durak/processor.py b/python/durak/processor.py
index 7d4eeb7..a657265 100644
--- a/python/durak/processor.py
+++ b/python/durak/processor.py
@@ -24,7 +24,7 @@
 from dataclasses import dataclass, field
 from typing import Callable, Literal
 
-from durak.cleaning import clean_text, normalize_case
+from durak.cleaning import clean_text, normalize_case, DEFAULT_CLEANING_STEPS
 from durak.exceptions import ConfigurationError
 from durak.lemmatizer import Lemmatizer
 from durak.stopwords import BASE_STOPWORDS, StopwordManager, remove_stopwords
@@ -152,28 +152,37 @@ def process(self, text: str) -> ProcessingResult:
         result = ProcessingResult()
         
         # Step 1: Clean text (with emoji handling)
+        # When lowercase=False, use a custom steps pipeline that omits case normalization
+        if self.config.lowercase:
+            cleaning_steps = None  # use DEFAULT_CLEANING_STEPS (includes lowercase)
+        else:
+            cleaning_steps = tuple(
+                step for step in DEFAULT_CLEANING_STEPS
+                if not (
+                    step is normalize_case
+                    or getattr(step, "func", None) is normalize_case
+                )
+            )
+
         if self.config.emoji_mode == "extract":
-            cleaned, emojis = clean_text(text, emoji_mode="extract")
+            cleaned, emojis = clean_text(text, steps=cleaning_steps, emoji_mode="extract")
             result.emojis = emojis
         else:
-            cleaned = clean_text(text, emoji_mode=self.config.emoji_mode)
-        
-        # Step 2: Additional lowercase normalization if needed
-        # (clean_text already lowercases via DEFAULT_CLEANING_STEPS)
+            cleaned = clean_text(text, steps=cleaning_steps, emoji_mode=self.config.emoji_mode)
         
-        # Step 3: Tokenize
+        # Step 2: Tokenize
         tokens = tokenize(cleaned, strip_punct=self.config.remove_punctuation)
         
-        # Step 4: Reattach detached suffixes
+        # Step 3: Reattach detached suffixes
         if self.config.attach_suffixes:
             tokens = attach_detached_suffixes(tokens)
         
-        # Step 5: Lemmatize (before stopword removal to help with matching)
+        # Step 4: Lemmatize (before stopword removal to help with matching)
         if self.config.lemmatize and self.config.lemmatizer:
             lemmas = [self.config.lemmatizer(token) for token in tokens]
             result.lemmas = lemmas
         
-        # Step 6: Remove stopwords
+        # Step 5: Remove stopwords
         if self.config.remove_stopwords and self.config.stopword_manager:
             # Filter both tokens and lemmas together
             filtered_indices = [
diff --git a/python/durak/stats/frequencies.py b/python/durak/stats/frequencies.py
index a0fb9dc..bb59a3c 100644
--- a/python/durak/stats/frequencies.py
+++ b/python/durak/stats/frequencies.py
@@ -8,7 +8,7 @@
 
 Example:
     >>> from durak.stats import FrequencyCounter, ngrams
-    >>> from durak import TextProcessor
+    >>> from durak import TextProcessor, ProcessorConfig
     >>> 
     >>> processor = TextProcessor(ProcessorConfig(lemmatize=True))
     >>> texts = ["Kitap okuyorum.", "Kitap yazıyorum."]