-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhandwriting_processor.py
More file actions
290 lines (242 loc) · 9.53 KB
/
handwriting_processor.py
File metadata and controls
290 lines (242 loc) · 9.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
"""
Handwriting Processor - Integration layer between text processing and handwriting synthesis.
This module combines the enhanced text processor with the handwriting synthesis
engine to provide a complete solution for converting text to handwritten pages.
It handles text cleaning, formatting, pagination, and delegation to the handwriting
synthesis model.
"""
import os
from typing import List, Optional, Dict, Any, Tuple
from handwriting_synthesis import Hand
from text_processor import (
TextProcessor,
TextProcessingConfig,
ParagraphStyle,
create_alphabet_set
)
class HandwritingProcessor:
"""
High-level interface for converting text to handwriting with intelligent
text processing (line wrapping, paragraph handling, pagination).
"""
def __init__(self, text_config: Optional[TextProcessingConfig] = None):
"""
Initialize the handwriting processor.
Args:
text_config: Configuration for text processing. If None, default
configuration is used.
"""
self.text_processor = TextProcessor(text_config or TextProcessingConfig())
self.hand = Hand()
def process_and_write(
self,
input_text: str,
output_dir: str,
alphabet: Optional[List[str]] = None,
biases: Optional[float] = 0.95,
styles: Optional[int] = 1,
stroke_colors: Optional[str] = "Black",
stroke_widths: Optional[float] = 1.0,
page_params: Optional[List[Any]] = None,
file_prefix: str = "result_page",
) -> Dict[str, Any]:
"""
Process text and generate handwriting SVG files.
This method takes raw input text, processes it into pages and lines using
the TextProcessor, and then generates handwriting SVG files for each page
using the Hand model.
Args:
input_text: The text to convert to handwriting.
output_dir: Directory to save generated SVG files.
alphabet: List of allowed characters.
biases: Handwriting consistency (0.0 to 1.0). Higher values usually
mean more legible but less variable handwriting.
styles: Handwriting style ID (1-12).
stroke_colors: Color name (e.g., "Black", "Blue") or hex code.
stroke_widths: Pen thickness in pixels/units.
page_params: Page layout parameters list:
[line_height, total_lines, height, width, margin_left,
margin_top, page_color, margin_color, line_color].
file_prefix: Prefix for output filenames (e.g., "result_page").
Returns:
Dictionary with processing metadata including:
- output_dir: Directory where files were saved.
- generated_files: List of paths to generated files.
- settings: Dictionary of settings used.
- num_pages: Number of pages generated.
- num_lines: Total lines processed.
- num_paragraphs: Total paragraphs processed.
"""
# Create alphabet set
alphabet_set = create_alphabet_set(alphabet) if alphabet else None
# Process text into pages
pages, metadata = self.text_processor.get_pages(input_text, alphabet_set)
# Create output directory
os.makedirs(output_dir, exist_ok=True)
# Convert color names to hex
color_map = {
"Black": "#000000",
"Blue": "#0000FF",
"Red": "#FF0000",
"Green": "#008000"
}
stroke_color_hex = color_map.get(stroke_colors, stroke_colors)
# Generate handwriting for each page
generated_files = []
for page_num, page_lines in enumerate(pages):
if not page_lines: # Skip empty pages
continue
filename = os.path.join(output_dir, f"{file_prefix}_{page_num + 1}.svg")
# Prepare parameters for hand.write()
num_lines = len(page_lines)
line_biases = [biases] * num_lines
line_styles = [styles] * num_lines
line_colors = [stroke_color_hex] * num_lines
line_widths = [stroke_widths] * num_lines
# Write the page
self.hand.write(
filename=filename,
lines=page_lines,
biases=line_biases,
styles=line_styles,
stroke_colors=line_colors,
stroke_widths=line_widths,
page=page_params
)
generated_files.append(filename)
print(f"Page {page_num + 1} written to {filename}")
# Update metadata with generation info
metadata['output_dir'] = output_dir
metadata['generated_files'] = generated_files
metadata['settings'] = {
'biases': biases,
'styles': styles,
'stroke_colors': stroke_colors,
'stroke_widths': stroke_widths,
}
return metadata
def batch_process_texts(
texts: List[str],
output_base_dir: str,
**kwargs
) -> List[Dict[str, Any]]:
"""
Process multiple texts in batch.
Iterates through a list of text strings and processes each one using
the handwriting synthesis engine. Each text is saved in its own subdirectory.
Args:
texts: List of text strings to process.
output_base_dir: Base directory for outputs (subdirs 'text_1', 'text_2', etc.
will be created here).
**kwargs: Additional arguments passed to process_and_write().
(e.g., styles, biases, page_params).
Returns:
List of metadata dictionaries for each processed text.
"""
processor = HandwritingProcessor(kwargs.get('text_config'))
results = []
for i, text in enumerate(texts):
output_dir = os.path.join(output_base_dir, f"text_{i + 1}")
kwargs_copy = dict(kwargs)
kwargs_copy.pop('text_config', None)
result = processor.process_and_write(
input_text=text,
output_dir=output_dir,
**kwargs_copy
)
results.append(result)
return results
def process_from_file(
input_file: str,
output_dir: str,
**kwargs
) -> Dict[str, Any]:
"""
Process text from a file.
Reads the content of the specified file and converts it to handwriting.
Args:
input_file: Path to input text file.
output_dir: Directory to save SVG files.
**kwargs: Additional arguments passed to process_and_write().
Returns:
Processing metadata dictionary.
"""
with open(input_file, 'r', encoding='utf-8') as f:
text = f.read()
processor = HandwritingProcessor(kwargs.get('text_config'))
kwargs_copy = dict(kwargs)
kwargs_copy.pop('text_config', None)
return processor.process_and_write(
input_text=text,
output_dir=output_dir,
**kwargs_copy
)
# Example usage
if __name__ == '__main__':
# Sample text with multiple paragraphs
sample_text = """Hello there! This is a test of the improved text processing system.
This system can handle multiple paragraphs intelligently. It wraps text at word boundaries, preserves paragraph breaks, and creates proper pagination.
The old system had issues with:
- Poor paragraph detection
- Basic word wrapping that didn't handle edge cases
- No smart pagination
This new system solves all of those problems! It provides:
- Intelligent paragraph detection and preservation
- Advanced word wrapping with configurable options
- Smart pagination to avoid orphans and widows
- Flexible configuration for different use cases
Try it out with your own text and see the difference!"""
# Configure text processing
config = TextProcessingConfig(
max_line_length=60,
lines_per_page=24,
paragraph_style=ParagraphStyle.PRESERVE_BREAKS,
preserve_empty_lines=True,
)
# Default alphabet (from the original code)
alphabet = [
'\x00', ' ', '!', '"', '@' ,'#', "'", '(', ')', ',', '-', '.',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';',
'?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
'L', 'M', 'N', 'O', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'Y',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
'y', 'z'
]
# Page parameters (A4-like dimensions)
page = [
32, # line_height
24, # total_lines_per_page
896, # view_height
633.472, # view_width
-64, # margin_left
-96, # margin_top
"white", # page_color
"red", # margin_color
"lightgray" # line_color
]
# Process the text
processor = HandwritingProcessor(text_config=config)
try:
result = processor.process_and_write(
input_text=sample_text,
output_dir='img',
alphabet=alphabet,
biases=0.95,
styles=1,
stroke_colors="Black",
stroke_widths=1.0,
page_params=page,
file_prefix="sample_page"
)
print("\n=== Processing Complete ===")
print(f"Pages generated: {result['num_pages']}")
print(f"Total lines: {result['num_lines']}")
print(f"Paragraphs processed: {result['num_paragraphs']}")
print(f"\nFiles created:")
for file in result['generated_files']:
print(f" - {file}")
except Exception as e:
print(f"Error: {e}")
import traceback
traceback.print_exc()