Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[metadata]
name = pytessy
author = hyperrixel
url = https://github.com/hyperrixel/pytessy
project_urls =
Bug Tracker = https://github.com/hyperrixel/pytessy/issues
Documentation = https://pytessy.readthedocs.io/
Source Code = https://github.com/hyperrixel/pytessy


license = Boost Software License 1.0
long_description = file: README.md
long_description_content_type = text/markdown; charset=UTF-8
platform = any
license_files =
LICENSE

[options]
name = pytessy
python_requires = >=3.8
packages = pytessy
package_dir =
pytessy=source

setup_requires =
setuptools >=38.3.0
pip >= 20.0
setuptools_scm
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from setuptools import setup

setup(use_scm_version=True)
32 changes: 26 additions & 6 deletions source/pytessy.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import __main__
import ctypes
import ctypes.util
from os import chdir, environ
from os import chdir, environ, getcwd
from os.path import abspath, dirname, isabs, isdir, isfile, join
from sys import platform

Expand Down Expand Up @@ -102,6 +102,8 @@ def get_text(self):
result = self._lib.TessBaseAPIGetUTF8Text(self._api)
if result:
return result.decode('utf-8')
else:
return ""



Expand Down Expand Up @@ -137,6 +139,17 @@ def set_image(self, imagedata, width, height, bytes_per_pixel, bytes_per_line,
imagedata, width, height,
bytes_per_pixel, bytes_per_line)
self._lib.TessBaseAPISetSourceResolution(self._api, resolution)


def set_variable(self, key, val):
"""
Sets a variable in Tesseract
----------
@Params: key
val : TYPE
"""
self._check_setup()
self._lib.TessBaseAPISetVariable(self._api, key, val)



Expand Down Expand Up @@ -172,7 +185,11 @@ def setup_lib(cls, lib_path=None):
ctypes.c_int, # height
ctypes.c_int, # bytes_per_pixel
ctypes.c_int) # bytes_per_line


lib.TessBaseAPISetVariable.argtypes = (cls.TessBaseAPI,
ctypes.c_char_p,
ctypes.c_char_p)

lib.TessBaseAPIGetUTF8Text.restype = ctypes.c_char_p # text
lib.TessBaseAPIGetUTF8Text.argtypes = (cls.TessBaseAPI, ) # handle

Expand Down Expand Up @@ -227,7 +244,8 @@ class PyTessy(object):


def __init__(self, tesseract_path=None, api_version=None, lib_path=None,
data_path=None, language='eng', verbose_search=False):
data_path=None, language='eng', verbose_search=False,
oem=1, psm=7, char_whitelist=None):
"""
Initializes PyTessy instance
----------------------------
Expand Down Expand Up @@ -258,7 +276,6 @@ def __init__(self, tesseract_path=None, api_version=None, lib_path=None,
search process.
FileNotFoundError If cannot found "tessdata" directory.
"""

run_path = dirname(abspath(__main__.__file__))
no_lib = True
if lib_path is not None:
Expand Down Expand Up @@ -317,10 +334,13 @@ def __init__(self, tesseract_path=None, api_version=None, lib_path=None,
break
if data_path is None:
raise FileNotFoundError('PyTessy: Couldn\'t find "tessdata" directory.')
chdir(tess_path)
self._tess = TesseractHandler(lib_path=lib_path, data_path=data_path,
language=language)
chdir(run_path)
self._tess.set_variable(b"tessedit_pageseg_mode", bytes(psm))
self._tess.set_variable(b"tessedit_ocr_engine_mode", bytes(oem))
if char_whitelist:
self._tess.set_variable(b"tessedit_char_whitelist", char_whitelist)




Expand Down