diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9f171f6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/.idea
+deploy_key
diff --git a/Examples/.gitignore b/Examples/.gitignore
new file mode 100644
index 0000000..b83d222
--- /dev/null
+++ b/Examples/.gitignore
@@ -0,0 +1 @@
+/target/
diff --git a/Examples/.idea/.gitignore b/Examples/.idea/.gitignore
new file mode 100644
index 0000000..26d3352
--- /dev/null
+++ b/Examples/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/Examples/.idea/Examples.iml b/Examples/.idea/Examples.iml
new file mode 100644
index 0000000..d6ebd48
--- /dev/null
+++ b/Examples/.idea/Examples.iml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Examples/.idea/compiler.xml b/Examples/.idea/compiler.xml
new file mode 100644
index 0000000..73f79e9
--- /dev/null
+++ b/Examples/.idea/compiler.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Examples/.idea/jarRepositories.xml b/Examples/.idea/jarRepositories.xml
new file mode 100644
index 0000000..57791ca
--- /dev/null
+++ b/Examples/.idea/jarRepositories.xml
@@ -0,0 +1,65 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Examples/.idea/misc.xml b/Examples/.idea/misc.xml
new file mode 100644
index 0000000..d5cd614
--- /dev/null
+++ b/Examples/.idea/misc.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Examples/.idea/vcs.xml b/Examples/.idea/vcs.xml
new file mode 100644
index 0000000..6c0b863
--- /dev/null
+++ b/Examples/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Examples/Data/Attachments/License Agreement.doc b/Examples/Data/Attachments/License Agreement.doc
deleted file mode 100644
index 3c6f4ed..0000000
Binary files a/Examples/Data/Attachments/License Agreement.doc and /dev/null differ
diff --git a/Examples/Data/Attachments/sample.msg b/Examples/Data/Attachments/sample.msg
deleted file mode 100644
index bc72cb7..0000000
Binary files a/Examples/Data/Attachments/sample.msg and /dev/null differ
diff --git a/Examples/Data/Attachmentssample.msg b/Examples/Data/Attachmentssample.msg
deleted file mode 100644
index bc72cb7..0000000
Binary files a/Examples/Data/Attachmentssample.msg and /dev/null differ
diff --git a/Examples/Data/Output/sample.docx b/Examples/Data/Output/sample.docx
deleted file mode 100644
index 10fe0d4..0000000
Binary files a/Examples/Data/Output/sample.docx and /dev/null differ
diff --git a/Examples/Data/Output/sample.gif b/Examples/Data/Output/sample.gif
deleted file mode 100644
index 0597dab..0000000
Binary files a/Examples/Data/Output/sample.gif and /dev/null differ
diff --git a/Examples/Data/Output/sample.msg b/Examples/Data/Output/sample.msg
deleted file mode 100644
index 760fad6..0000000
Binary files a/Examples/Data/Output/sample.msg and /dev/null differ
diff --git a/Examples/Data/Output/sample.pdf b/Examples/Data/Output/sample.pdf
deleted file mode 100644
index 22c2e78..0000000
Binary files a/Examples/Data/Output/sample.pdf and /dev/null differ
diff --git a/Examples/Data/Output/sample.png b/Examples/Data/Output/sample.png
deleted file mode 100644
index 5bbc560..0000000
Binary files a/Examples/Data/Output/sample.png and /dev/null differ
diff --git a/Examples/Data/Output/sample.pptx b/Examples/Data/Output/sample.pptx
deleted file mode 100644
index b53ed81..0000000
Binary files a/Examples/Data/Output/sample.pptx and /dev/null differ
diff --git a/Examples/Data/Output/sample.vsdx b/Examples/Data/Output/sample.vsdx
deleted file mode 100644
index cc53cfc..0000000
Binary files a/Examples/Data/Output/sample.vsdx and /dev/null differ
diff --git a/Examples/Data/Output/sample.xlsx b/Examples/Data/Output/sample.xlsx
deleted file mode 100644
index 978fc4d..0000000
Binary files a/Examples/Data/Output/sample.xlsx and /dev/null differ
diff --git a/Examples/Data/Screenshots/Files.png b/Examples/Data/Screenshots/Files.png
deleted file mode 100644
index 5078410..0000000
Binary files a/Examples/Data/Screenshots/Files.png and /dev/null differ
diff --git a/Examples/Data/Screenshots/ProjectInEclipse.png b/Examples/Data/Screenshots/ProjectInEclipse.png
deleted file mode 100644
index b4a3678..0000000
Binary files a/Examples/Data/Screenshots/ProjectInEclipse.png and /dev/null differ
diff --git a/Examples/Data/Source/Sample_with_Annotation_watermark.pdf b/Examples/Data/Source/Sample_with_Annotation_watermark.pdf
deleted file mode 100644
index 238ad41..0000000
Binary files a/Examples/Data/Source/Sample_with_Annotation_watermark.pdf and /dev/null differ
diff --git a/Examples/Data/Source/sample.docx b/Examples/Data/Source/sample.docx
deleted file mode 100644
index ce8ac56..0000000
Binary files a/Examples/Data/Source/sample.docx and /dev/null differ
diff --git a/Examples/Data/Source/sample.gif b/Examples/Data/Source/sample.gif
deleted file mode 100644
index cc85865..0000000
Binary files a/Examples/Data/Source/sample.gif and /dev/null differ
diff --git a/Examples/Data/Source/sample.jpg b/Examples/Data/Source/sample.jpg
deleted file mode 100644
index c24a0d3..0000000
Binary files a/Examples/Data/Source/sample.jpg and /dev/null differ
diff --git a/Examples/Data/Source/sample.msg b/Examples/Data/Source/sample.msg
deleted file mode 100644
index 81f0e00..0000000
Binary files a/Examples/Data/Source/sample.msg and /dev/null differ
diff --git a/Examples/Data/Source/sample.pdf b/Examples/Data/Source/sample.pdf
deleted file mode 100644
index b240cab..0000000
Binary files a/Examples/Data/Source/sample.pdf and /dev/null differ
diff --git a/Examples/Data/Source/sample.png b/Examples/Data/Source/sample.png
deleted file mode 100644
index e561f94..0000000
Binary files a/Examples/Data/Source/sample.png and /dev/null differ
diff --git a/Examples/Data/Source/sample.pptx b/Examples/Data/Source/sample.pptx
deleted file mode 100644
index abb766a..0000000
Binary files a/Examples/Data/Source/sample.pptx and /dev/null differ
diff --git a/Examples/Data/Source/sample.sqlite b/Examples/Data/Source/sample.sqlite
deleted file mode 100644
index 00a750d..0000000
Binary files a/Examples/Data/Source/sample.sqlite and /dev/null differ
diff --git a/Examples/Data/Source/sample.vsdx b/Examples/Data/Source/sample.vsdx
deleted file mode 100644
index 88fb4f2..0000000
Binary files a/Examples/Data/Source/sample.vsdx and /dev/null differ
diff --git a/Examples/Data/Source/sample.xlsx b/Examples/Data/Source/sample.xlsx
deleted file mode 100644
index 3e67df6..0000000
Binary files a/Examples/Data/Source/sample.xlsx and /dev/null differ
diff --git a/Examples/Data/Source/sample_with_Artifact_watermark.pdf b/Examples/Data/Source/sample_with_Artifact_watermark.pdf
deleted file mode 100644
index be55d34..0000000
Binary files a/Examples/Data/Source/sample_with_Artifact_watermark.pdf and /dev/null differ
diff --git a/Examples/Data/Source/sample_with_watermark.docx b/Examples/Data/Source/sample_with_watermark.docx
deleted file mode 100644
index eb17e63..0000000
Binary files a/Examples/Data/Source/sample_with_watermark.docx and /dev/null differ
diff --git a/Examples/Data/Source/samplewithattachments.msg b/Examples/Data/Source/samplewithattachments.msg
deleted file mode 100644
index bc72cb7..0000000
Binary files a/Examples/Data/Source/samplewithattachments.msg and /dev/null differ
diff --git a/Examples/Data/watermark.jpg b/Examples/Data/watermark.jpg
deleted file mode 100644
index 35ecc51..0000000
Binary files a/Examples/Data/watermark.jpg and /dev/null differ
diff --git a/Examples/README.md b/Examples/README.md
index da5882b..ca22cca 100644
--- a/Examples/README.md
+++ b/Examples/README.md
@@ -1,4 +1,4 @@
-## GroupDocs.Parser for Java Examples
+# GroupDocs.Parser for Java Examples
This package contains Example Project for [GroupDocs.Parser for Java](https://products.groupdocs.com/parser/java) and sample input documents used in the examples.
@@ -9,9 +9,11 @@ This package contains Example Project for [GroupDocs.Parser for Java](https://pr
-## How to Run the Examples?
+# How to Run the Examples?
-It is assumed that maven is installed and configured in your system path. You can edit the project by opening in your favorite IDE like NetBeans, Eclipse, and IntelliJ IDEA.
+It is assumed that maven is installed and configured in your system path. You can edit the project by opening in your favorite IDE like NetBeans, Eclipse, and IntelliJ IDEA. Please visit our [documentation website](https://docs.groupdocs.com/display/parserjava/How+to+Run+Examples) for more details.
+
+## Install and Configure Maven
You may find following resources helpful:
diff --git a/Examples/Resources/SampleFiles/Barcodes.pdf b/Examples/Resources/SampleFiles/Barcodes.pdf
new file mode 100644
index 0000000..43d2049
Binary files /dev/null and b/Examples/Resources/SampleFiles/Barcodes.pdf differ
diff --git a/Examples/Resources/SampleFiles/Embedded Image And Attachment.eml b/Examples/Resources/SampleFiles/Embedded Image And Attachment.eml
new file mode 100644
index 0000000..4f23ca2
--- /dev/null
+++ b/Examples/Resources/SampleFiles/Embedded Image And Attachment.eml
@@ -0,0 +1,171 @@
+Return-Path:
+To: Manuel Lemos
+Subject: Testing Manuel Lemos' MIME E-mail composing and sending PHP class: HTML message
+From: mlemos
+Reply-To: mlemos
+Sender: mlemos@acm.org
+X-Mailer: http://www.phpclasses.org/mimemessage $Revision: 1.63 $ (mail)
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="652b8c4dcb00cdcdda1e16af36781caf"
+Message-ID: <20050430192829.0489.mlemos@acm.org>
+Date: Sat, 30 Apr 2005 19:28:29 -0300
+
+
+--652b8c4dcb00cdcdda1e16af36781caf
+Content-Type: multipart/related; boundary="6a82fb459dcaacd40ab3404529e808dc"
+
+
+--6a82fb459dcaacd40ab3404529e808dc
+Content-Type: multipart/alternative; boundary="69c1683a3ee16ef7cf16edd700694a2f"
+
+
+--69c1683a3ee16ef7cf16edd700694a2f
+Content-Type: text/plain; charset=ISO-8859-1
+Content-Transfer-Encoding: quoted-printable
+
+This is an HTML message. Please use an HTML capable mail program to read
+this message.
+
+--69c1683a3ee16ef7cf16edd700694a2f
+Content-Type: text/html; charset=ISO-8859-1
+Content-Transfer-Encoding: quoted-printable
+
+
+
+Testing Manuel Lemos' MIME E-mail composing and sending PHP class: H=
+TML message
+
+
+
+
+
+
+Testing Manuel Lemos' MIME E-mail composing and sending PHP cla=
+ss: HTML message
+
+Hello Manuel,
+This message is just to let you know that the MIME E-mail message composing and sending PHP class is working as expected.
+
Here is an image embedded in a message as a separate part: =
+
+ Than=
+k you,
+mlemos
+
+
+
+
+
+--69c1683a3ee16ef7cf16edd700694a2f--
+
+--6a82fb459dcaacd40ab3404529e808dc
+Content-Type: image/gif; name="logo.gif"
+Content-Transfer-Encoding: base64
+Content-Disposition: inline; filename="logo.gif"
+Content-ID:
+
+R0lGODlhlgAjAPMJAAAAAAAA/y8vLz8/P19fX19f339/f4+Pj4+Pz7+/v///////////////////
+/////yH5BAEAAAkALAAAAACWACMAQwT+MMlJq7046827/2AoHYChGAChAkBylgKgKClFyEl6xDMg
+qLFBj3C5uXKplVAxIOxkA8BhdFCpDlMK1urMTrZWbAV8tVS5YsxtxmZHBVOSCcW9zaXyNhslVcto
+RBp5NQYxLAYGLi8oSwoJBlE+BiSNj5E/PDQsmy4pAJWQLAKJY5+hXhZ2dDYldFWtNSFPiXssXnZR
+k5+1pjpBiDMJUXG/Jo7DI4eKfMSmxsJ9GAUB1NXW19jZ2tvc3d7f4OHi2AgZN5vom1kk6F7s6u/p
+m3Ab7AOIiCxOyZuBIv8AOeTJIaYQjiR/kKTr5GQNE3pYSjCJ9mUXClRUsLxaZGciC0X+OlpoOuQo
+ZKdNJnIoKfnxRUQh6FLG0iLxIoYnJd0JEKISJyAQDodp3EUDC48oDnUY7HFI3wEDRjzycQJVZCQT
+Ol7NK+G0qgtkAcOKHUu2rNmzYTVqRMt2bB49bHompSchqg6HcGeANSMxr8sEa2y2HexnSEUTuWri
+SSbkYh7BgGVAnhB1b2REibESYaRoBgqIMYx59tFM9AvQffVG49P5NMZkMlHKhJPJb0knmSKZ6kSX
+JtbeF3Am7ocok6c7cM7pU5xcXiJJETUz16qPrzEfaFgZpvzn7h86YV5r/1mxXeAUMVyEIpnVUGpN
+RlG2ka9b3lP3pm2l6u7P+l/YLj3+RlEHbz1C0kRxSITQaAcilVBMEzmkkEQO8oSOBNg9SN+AX6hV
+z1pjgJiAhwCRsY8ZIp6xj1ruqCgeGeKNGEZwLnIwzTg45qjjjjz2GEA5hAUp5JBEFmnkkSCoWEcZ
+X8yohZNK1pFGPQS4hx0qNSLJlk9wCQORYu5QiMd7bUzGVyNlRiOHSlpuKdGEItHQ3HZ18beRRyws
+YSY/waDTiHf/tWlWUBAJiMJ1/Z0XXU7N0FnREpKM4NChCgbyRDq9XYpOplaKopN9NMkDnBbG+UMC
+QwLWIeaiglES6AjGARcPHCWoVAiatcTnGTABZoLPaPG1phccPv366mEvWEFSLnj+2QaonECwcJt/
+e1Zw3lJvVMmftBdVNQS3UngLCA85YHIQOy6JO9N4eZW7KJwtOUZmGwOMWqejwVW6RQzaikRHX3yI
+osKhDAq8wmnKSmdMwNidSOof9ZG2DoV0RfTVmLFtGmNk+CoZna0HQnPHS3AhRbIeDpqmR09E0bsu
+soeaw994z+rwQVInvqLenBftYjLOVphLFHhV9qsnez8AEUbQRgO737AxChjmyANxuEFHSGi7hFCV
+4jxLst2N8sRJYU+SHiAKjlmCgz2IffbLI5aaQR71hnkxq1ZfHSfKata6YDCJDMAQwY7wOgzhjxgj
+VFQnKB5uX4mr9qJ79pann+VcfcSzsSCd2mw5scqRRvlQ6TgcUelYhu75iPE4JejrsJOFQAG01277
+7bjnrvvuvPfu++/ABy887hfc6OPxyCevPDdAVoDA89BHL/301Fdv/fXYZ6/99tx3Pz0FEQAAOw==
+
+--6a82fb459dcaacd40ab3404529e808dc
+Content-Type: image/gif; name="background.gif"
+Content-Transfer-Encoding: base64
+Content-Disposition: inline; filename="background.gif"
+Content-ID: <4c837ed463ad29c820668e835a270e8a.gif>
+
+R0lGODlh+wHCAPMAAKPFzKLEy6HDyqHCyaDByJ/Ax56/xp2+xZ28xJy7w5u6wpq5wZm4wJm3v5i2
+vpe1vSwAAAAA+wHCAEME/hDISau9OOvNu/9gKI5kaZ5oqq5s675wLM90bd94ru987//AoHBILBqP
+yKRyyWw6n9CodEqtWq+gwSHReHgfjobY8X00FIc019tIHAYS7dqcQCDm3vC4fD4QAhUBBFsMZF8O
+hnkLCAYFW11tb1iTlJWWOXJdZZtmC24Eg3hgYntfbXainJ2fgBSZbG5wFAG0E6+RoAZ3CbwJCgya
+p3cMbAyevQcFAgMGCcRmxr1uyszOxQq+wF4MdcPFx7zJApfk5eYhr3SSGemRsu3dc+4iAqELhZwO
+0X6hkHUHCBRoGtUg0RkEAAUeKhhGAcICBQIODIPooIEBzCTmKcjGYSNd/go3VvQo65zJkyhTqlzJ
+sqXLlzBjypxJs6bNmzhz6tzJs6fPn0CDCh1KtKjRo0iTKl3KtKnTp1CXBhhAwECaq1gPNCIwANDU
+qmkMcG311apWULmyZt3alcPXAma1FgAlgCxVq2LbRt3LF0Y7hwWoEjLEDZUmff8AOjMkTB5gwYu3
+JbhIQUDEZw+4+aE1aNc0R2vcDYjoDBgpBoUDj95yzzRqbH7qgW4t5vUnAfVAoj7NwOOf1QloN7Ad
+u1Xf41b+IlCNsa6rR7DWwTPccTnG5sYvCEKwgPGiZI64A9OsK/Q/BM/0YfuFz13VOwsULLhHps+f
+98Hl0zeDRk0X9Qih/vLPWPjFN197aPyB3IJVBLDMdc5t4OB1A0QowYQQ0vIgdilgyGEgG1roYV0j
+GufhhyBSWGF2s2yIYosqWsjgjDTWaOONOOao44489ujjj0AGKeSQRBZp5JFIJqnkkkw26eSTUMJU
+llpYseXVXWGNdSGWZ6EVF5VWukUVXFdtRUCEU+bFYpRslqNcYKHgk1k8hxWWxjCM0VkdnINJRtkE
+lqH3hWZ/CKJYOBBBJxppu/FWh2qzNUrcmQRE6lpvt+UWUKPD9cbIb5bWhmlxbbL5JoUywiMddHRQ
+x591GWqwXXdsfJeoeMO5UZ4/AaaHKXv1xVKgfghuNuyB9fUHHYAA/u2CEIHlGbiffWuWyuSJMmKA
+bXbbbtuhi9kCUOIEJY57oYsraoduuOfGWO2J6Vor77z01mvvvfjmq+++/Pbr778AByzwwAQXbPDB
+CCfcZDobldLRVfLEEgerjQ1EEEemJMiioZEdkggYizSiqMQKl5wCw6qswg+rDTvc6h0Wq9KAJ5tV
+oGpJF9YysXn8lCfNL8HE88xw4EyzTDNDR4MMNUhfk40mhXkDTdHimHzjzRpgDcB0MEeHswf1sCZn
+GfrQDMrIAYZEkEEOJTQRQweBp5FIDTGCEUiHYWwRXHOPMpLdVgcu+OCEF2744YgnrvjijDfu+OOQ
+Ry755JRXbvnl/phnrvnmnHfu+eegZ57RAqSUzptv75E+M+Bb66L6InZwZ7rpr31aLQBhb2pap548
+e7TsIX8dOr/pIIZQQphFHfGqEbtq/J2/DDrZ13Ga0jt8h/XX9TxvfRmmuPVUatb34INCplxakjtm
+XOQ7aP74c+k1fE4MD7fefvxBbLEeLldsyq/4o9ZzHOOHylBFS7f4RJxQMx/8MeB4ggIDA02ziLno
+wlfGoOByKnUAhZQNWfkzwAXzMEExVFB+86NJ/TDVC4SIZRzFs5Ni5OQ/p7XwLOOwQDXSswgFiYuD
+Z4GMP8AjtvGgJk9aYU2davdCeyzRU2LpBwkb2KjvWCU4T/TN/u1S+BKtYUBrXFue8DYQKFoVAzXa
+eJh/XiYPpZEOFhAMTnzkk8aQWQU+c7yHJkIGkGd4SkDhMJ9i5qMAOu4RAWfiYk1yxwvfaYCRA8oh
+JF14x0bGhgSyaZY07JCMRDLyWWnxTOyc1UmweMaSL5zSKf/xQgnk5lA3TCWWVunCRCrylrjMpS53
+ycte+vKXwAymMIdJzGIa85jITKYyl8nMZjrzmdCMpjSnSc1qWvOa2MymvkY3u9IxMReyW92fuLm6
+2Kmum53SIgZyxx7e9C423AyeNnkUw8RsSnqumsfWKKYnCdozen6iHiGsF483gkF7PIND96oUP7KE
+73zteyj8/tK3JfGVqaHkkmhYMDrPJqzwfjRUlij4hzE4ds1pdGSMxgYYjAQZEBRtSeDKSmMMEGYG
+ghjU4+osGEF9ZNCEG3SEB2s6LTSIsKcl3CkKO2qEj24Sh/ucw/NmmCdXQQMbsbSlzZoGMkSSBYh5
+kWIkEhWc3aARiVc0qE+hSCklkvCbUpQgFTWYRCy+la1bZGoQvHgBMPIznyT7QBkNgsY05m+NNSQa
+Lwx6ijvJsZB69IIdB5nHOjKij9twCCAVGJ7HGlKyiMyhXo0wyUtmoLS2LK0ID+XIEWRys5ycyzg+
+yQ9TtjB2lpyLbZ8qy91mVZK+ReWZVCkNVmp1tMhNrnKX/svc5jr3udCNrnSnS93qWve62M2udrfL
+3e5697vgDa94x0ve8pr3vOhNr3rXy972uve98I2vfOdLXxrBS0Uv8lZGUaUh/OKXXRmAV7jMVV+X
+QLK4vD0TaoHLWq1UEsEJFu0FXknLh3iyM5EssEtQlrK98ZN5QbNqyl71pwqEza752MfZEqrhljg1
+pYMKkBh3FuKTXtUX+LupMkwcETNCA40D6QNiA3tfdunXAkdOEX+1Ba68tjiqLbVOnKp60oNAam6J
+fcyUvTYLAnDHOw8Jjx7Js71YTKWzxX1IV76iyayuWTCwDSIgKJxmqLI5zmp6sg5ZNdV7bkPGQWYh
+0EzR/s8+A1THEt6hIrx6IbByRawKHKjfpEfExVREpUEdzKX3dJe5UaQ6UdT0p18VGCfPF2X8S4QD
+QgaamI24hi1TtTxZyuVZ6AzK6gBnIbE66DmhImlzxAYouUq0XQ+oUhG039P+rAZgG7u1erYFyy6W
+Tt85ddkmHak3PWVaWuePAC9F4Mh6dgdjB/A8tCqbscUxWLmumxp8jsa5A5RuY7xbwtHGtT+Phz69
+nGo0WC60DPt9u0AljxWG8kylh9hsRKw1jbiwx24cDsUKSRwYFPdIq2347NoWkSEAKnG++brnGes7
+sYH1QPVqVdDsOZZXUlN2WYO1soCA9JBoScjNQdvs/n3fKXaxYefOH9BDfD+Z5Db78Dv+WuWUd4Bj
+YwPDx1bNiI03BoO7yRi9CzJBBLlQdj5tTbKIOFQqikHjruN6Bovlw5GnXZxjtMXbZ01O2NnhdawL
+ASOFw8BIxpOSuutUYWfmBjW0U1S+gczhqy0Wzuhmd7Ur5RYW/01Tz3dKcpYVl/Isrs2jBSyZJ4H7
+LIq+4VYUL2NZaCMgQiY1LXSjFH09wWexvovGvvawX2q+d8/73vv+98APvvCHT/ziG//4yE++8pfP
+/OY7//nQj770p0/96lv/+tjPvva3z/3ue//74A+/+MdP/vKb//zoT7/6e3Lf/3KryTDKUPvdBQIB
+/q+JwOuPwYEhbFzcYDjDuPN/lARL/FdLRlcZwdUNnTRbGAZt+fcCHCYzGqd0NJZtrsYJFjFGJ2ZQ
+m1A2kcZiD+gXLKNsMMZsTQdiFvg/IJUID7RjldFjhAVkGaM/6lASRfYu8KcuS6aDO4hkOfh7p7Jl
+bBRlVxYSWSZlfVKDXfZltRJmADFmulJmb3BmBJhbb9YZp1RLV9hmwtUWdBZhnYeFCaZ7Rxdv/5Q8
+gKaCvNBrQ0hCZxhjLhgHXEV1PiQIjhBEkDZT6VFSmkFWhbBppMZBljZqVtZpIUGIqCNqevMYlhdf
+qEYKslZ10zZibbgQDkN1IndyTkcLxiFTulZI/muYRsrjbKA4bNYwNR1nPsn2K6J4PKdYbKXYbSM3
+bSQVeWdybWwIa9Rmi0b3FwUEKAcUU+MGTr4AivP2hGSgbqDIbjDobssIb1IlbzSEbslob894gGUY
+jYkxeyf3GABnhAK3jeTDYxE0J5uRcEtjdYUnaoMXHStGGxlnNxs4cYgARRt3Y8UobB5XVhhXjyTR
+e0jnbfoURkGzDh+wcquACmqFUDD3iiw0LZFmczhmWTknkZ9FdK5IDH0GdArWGaB4kUXHewEpbSZH
+kLX2AVA3dVPHamgjNQ8XZG0Ddl2XLF9HOmF3RPmTKGV3IGdXdWl3k2zXiPBVd3nXV3PHOkRpgk5A
+lYlgg2F8Fw3WlnZW9HiCB2Q0Y3ic8k2Kl5V4JQhUiXgWFgqUh1e9h3mcpy2epxdm+XnjQ1EiMHoQ
+pVtogiWuV3urBxGod4Xnw41huJfjKHvtg3t8GYKEWZiGeZiImZiKuZiM2ZiO+ZiQGZmSOZmUWZmW
+eZmYmZmauZmc2ZlCEQEAOw==
+
+--6a82fb459dcaacd40ab3404529e808dc--
+
+--652b8c4dcb00cdcdda1e16af36781caf
+Content-Type: text/plain; name="attachment.txt"
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment; filename="attachment.txt"
+
+VGhpcyBpcyBqdXN0IGEgcGxhaW4gdGV4dCBhdHRhY2htZW50IGZpbGUgbmFtZWQgYXR0YWNobWVu
+dC50eHQgLg==
+
+--652b8c4dcb00cdcdda1e16af36781caf--
+
diff --git a/Examples/Resources/SampleFiles/Forms.pdf b/Examples/Resources/SampleFiles/Forms.pdf
new file mode 100644
index 0000000..bd20cdd
Binary files /dev/null and b/Examples/Resources/SampleFiles/Forms.pdf differ
diff --git a/Examples/Resources/SampleFiles/Hyperlinks.docx b/Examples/Resources/SampleFiles/Hyperlinks.docx
new file mode 100644
index 0000000..cc5e62b
Binary files /dev/null and b/Examples/Resources/SampleFiles/Hyperlinks.docx differ
diff --git a/Examples/Resources/SampleFiles/Hyperlinks.pdf b/Examples/Resources/SampleFiles/Hyperlinks.pdf
new file mode 100644
index 0000000..ecac0e9
Binary files /dev/null and b/Examples/Resources/SampleFiles/Hyperlinks.pdf differ
diff --git a/Examples/Resources/SampleFiles/Portfolio.pdf b/Examples/Resources/SampleFiles/Portfolio.pdf
new file mode 100644
index 0000000..283bd52
Binary files /dev/null and b/Examples/Resources/SampleFiles/Portfolio.pdf differ
diff --git a/Examples/Resources/SampleFiles/PortfolioWithFolder.pdf b/Examples/Resources/SampleFiles/PortfolioWithFolder.pdf
new file mode 100644
index 0000000..b73f4a9
Binary files /dev/null and b/Examples/Resources/SampleFiles/PortfolioWithFolder.pdf differ
diff --git a/Examples/Resources/SampleFiles/SampleCarWash.pdf b/Examples/Resources/SampleFiles/SampleCarWash.pdf
new file mode 100644
index 0000000..8a24e97
Binary files /dev/null and b/Examples/Resources/SampleFiles/SampleCarWash.pdf differ
diff --git a/Examples/Resources/SampleFiles/SampleScan.jpg b/Examples/Resources/SampleFiles/SampleScan.jpg
new file mode 100644
index 0000000..9382183
Binary files /dev/null and b/Examples/Resources/SampleFiles/SampleScan.jpg differ
diff --git a/Examples/Resources/SampleFiles/SampleWithImages.docx b/Examples/Resources/SampleFiles/SampleWithImages.docx
new file mode 100644
index 0000000..da63675
Binary files /dev/null and b/Examples/Resources/SampleFiles/SampleWithImages.docx differ
diff --git a/Examples/Resources/SampleFiles/SampleWithToc.docx b/Examples/Resources/SampleFiles/SampleWithToc.docx
new file mode 100644
index 0000000..6b31b1d
Binary files /dev/null and b/Examples/Resources/SampleFiles/SampleWithToc.docx differ
diff --git a/Examples/Resources/SampleFiles/SampleWithToc.pdf b/Examples/Resources/SampleFiles/SampleWithToc.pdf
new file mode 100644
index 0000000..0020510
Binary files /dev/null and b/Examples/Resources/SampleFiles/SampleWithToc.pdf differ
diff --git a/Examples/Resources/SampleFiles/The butterfly effect.msg b/Examples/Resources/SampleFiles/The butterfly effect.msg
new file mode 100644
index 0000000..00a6b19
Binary files /dev/null and b/Examples/Resources/SampleFiles/The butterfly effect.msg differ
diff --git a/Examples/Resources/SampleFiles/corrupted.png b/Examples/Resources/SampleFiles/corrupted.png
new file mode 100644
index 0000000..d35e3ea
Binary files /dev/null and b/Examples/Resources/SampleFiles/corrupted.png differ
diff --git a/Examples/Resources/SampleFiles/images.pdf b/Examples/Resources/SampleFiles/images.pdf
new file mode 100644
index 0000000..36147c9
Binary files /dev/null and b/Examples/Resources/SampleFiles/images.pdf differ
diff --git a/Examples/Resources/SampleFiles/images.pptx b/Examples/Resources/SampleFiles/images.pptx
new file mode 100644
index 0000000..37620b8
Binary files /dev/null and b/Examples/Resources/SampleFiles/images.pptx differ
diff --git a/Examples/Resources/SampleFiles/images.xlsx b/Examples/Resources/SampleFiles/images.xlsx
new file mode 100644
index 0000000..9f1565f
Binary files /dev/null and b/Examples/Resources/SampleFiles/images.xlsx differ
diff --git a/Examples/Resources/SampleFiles/installation.html b/Examples/Resources/SampleFiles/installation.html
new file mode 100644
index 0000000..766fe94
--- /dev/null
+++ b/Examples/Resources/SampleFiles/installation.html
@@ -0,0 +1,45 @@
+
+
+
+
+
+ Installation
+ Install from Nuget
+
+ NuGet is the easiest way to download and install GroupDocs.Parser for .NET. There are ways to install it in your
+ project.
+ Install via Package Manager GUI
+
+
+ Follow these steps to reference GroupDocs.Parser using Package Manager GUI:
+
+
+ Open your solution/project in Visual Studio.
+ Click Tools -> NuGet Package Manager -> Manage NuGet Packages for Solution.
+ You can also access the same option through the Solution Explorer. Right-click the solution or project and select
+ Manage NuGet Packages from the context menu
+ Select Browse tab and type “GroupDocs.Parser” in the search text box.
+ Click the Install button to install the latest version of the API into your project as shown in the following
+ screenshot.
+
+
+
+ Using Package Manager Console
+
+ You can follow the steps below to reference GroupDocs.Parser for .NET using the Package Manager Console:
+
+
+ Open your solution/project in Visual Studio.
+ Select Tools -> NuGet Package Manager -> Package Manager Console from the menu to open package manager console.
+ Type the command “Install-Package GroupDocs.Parser” and press enter to install the latest release into your
+ application.
+ After successful installation, GroupDocs.Parser will be referenced in your application.
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Examples/Resources/SampleFiles/installation.png b/Examples/Resources/SampleFiles/installation.png
new file mode 100644
index 0000000..a8e93c8
Binary files /dev/null and b/Examples/Resources/SampleFiles/installation.png differ
diff --git a/Examples/Resources/SampleFiles/installation_1.png b/Examples/Resources/SampleFiles/installation_1.png
new file mode 100644
index 0000000..fb0bc7e
Binary files /dev/null and b/Examples/Resources/SampleFiles/installation_1.png differ
diff --git a/Examples/Resources/SampleFiles/installation_2.png b/Examples/Resources/SampleFiles/installation_2.png
new file mode 100644
index 0000000..be464fb
Binary files /dev/null and b/Examples/Resources/SampleFiles/installation_2.png differ
diff --git a/Examples/Resources/SampleFiles/invoice.pdf b/Examples/Resources/SampleFiles/invoice.pdf
new file mode 100644
index 0000000..956f370
Binary files /dev/null and b/Examples/Resources/SampleFiles/invoice.pdf differ
diff --git a/Examples/Resources/SampleFiles/invoice_pages.pdf b/Examples/Resources/SampleFiles/invoice_pages.pdf
new file mode 100644
index 0000000..e23ed3a
Binary files /dev/null and b/Examples/Resources/SampleFiles/invoice_pages.pdf differ
diff --git a/Examples/Resources/SampleFiles/sample.docx b/Examples/Resources/SampleFiles/sample.docx
new file mode 100644
index 0000000..e9e3a91
Binary files /dev/null and b/Examples/Resources/SampleFiles/sample.docx differ
diff --git a/Examples/Resources/SampleFiles/sample.epub b/Examples/Resources/SampleFiles/sample.epub
new file mode 100644
index 0000000..78db0d7
Binary files /dev/null and b/Examples/Resources/SampleFiles/sample.epub differ
diff --git a/Examples/Resources/SampleFiles/sample.htm b/Examples/Resources/SampleFiles/sample.htm
new file mode 100644
index 0000000..4ff8005
--- /dev/null
+++ b/Examples/Resources/SampleFiles/sample.htm
@@ -0,0 +1,1153 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Test
+
+
Text for
+test:
+
+
1.
+ One
+
+
2.
+ Two
+
+
a.
+ Sub1
+
+
b.
+ Sub2
+
+
3.
+ Three
+
+
Bullets:
+
+
+ A
+
+
o
+ AA
+
+
+ B
+
+
+ C
+
+
+
+
+ iidqd
+
+
+
+
+
+
+
+
+
+
+ 1. Aa
+ 2. Bb
+ 3. Cc
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Examples/Resources/SampleFiles/sample.md b/Examples/Resources/SampleFiles/sample.md
new file mode 100644
index 0000000..c515227
--- /dev/null
+++ b/Examples/Resources/SampleFiles/sample.md
@@ -0,0 +1,16 @@
+# Test
+
+Text for test:
+
+1. One
+2. Two
+ 1. Sub1
+ 2. Sub2
+3. Three
+
+Bullets:
+
+* A
+* AA
+* B
+* C
diff --git a/Examples/Resources/SampleFiles/sample.one b/Examples/Resources/SampleFiles/sample.one
new file mode 100644
index 0000000..71b1fd9
Binary files /dev/null and b/Examples/Resources/SampleFiles/sample.one differ
diff --git a/Examples/Resources/SampleFiles/sample.ost b/Examples/Resources/SampleFiles/sample.ost
new file mode 100644
index 0000000..3a00f9b
Binary files /dev/null and b/Examples/Resources/SampleFiles/sample.ost differ
diff --git a/Examples/Resources/SampleFiles/sample.pdf b/Examples/Resources/SampleFiles/sample.pdf
new file mode 100644
index 0000000..bab54da
Binary files /dev/null and b/Examples/Resources/SampleFiles/sample.pdf differ
diff --git a/Examples/Resources/SampleFiles/sample.pptx b/Examples/Resources/SampleFiles/sample.pptx
new file mode 100644
index 0000000..7e970a1
Binary files /dev/null and b/Examples/Resources/SampleFiles/sample.pptx differ
diff --git a/Examples/Resources/SampleFiles/sample.xlsx b/Examples/Resources/SampleFiles/sample.xlsx
new file mode 100644
index 0000000..cd26b14
Binary files /dev/null and b/Examples/Resources/SampleFiles/sample.xlsx differ
diff --git a/Examples/Resources/SampleFiles/sample.zip b/Examples/Resources/SampleFiles/sample.zip
new file mode 100644
index 0000000..c17d6c6
Binary files /dev/null and b/Examples/Resources/SampleFiles/sample.zip differ
diff --git a/Examples/Resources/SampleFiles/samplePassword.pdf b/Examples/Resources/SampleFiles/samplePassword.pdf
new file mode 100644
index 0000000..f79643b
Binary files /dev/null and b/Examples/Resources/SampleFiles/samplePassword.pdf differ
diff --git a/Examples/Resources/SampleFiles/sqlite.db b/Examples/Resources/SampleFiles/sqlite.db
new file mode 100644
index 0000000..1d64699
Binary files /dev/null and b/Examples/Resources/SampleFiles/sqlite.db differ
diff --git a/Examples/Resources/SampleFiles/utf8.txt b/Examples/Resources/SampleFiles/utf8.txt
new file mode 100644
index 0000000..ed9233d
--- /dev/null
+++ b/Examples/Resources/SampleFiles/utf8.txt
@@ -0,0 +1,2 @@
+Привет™
+Testing for plain text… ©
\ No newline at end of file
diff --git a/Examples/pom.xml b/Examples/pom.xml
index 79373d1..23f7092 100644
--- a/Examples/pom.xml
+++ b/Examples/pom.xml
@@ -1,70 +1,78 @@
- 4.0.0
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ 4.0.0
- GroupDocs.Text-for-Java
- GroupDocs.Text-for-Java
- 1.0-SNAPSHOT
+ GroupDocs.Parser-for-Java
+ GroupDocs.Parser-for-Java
+ 24.6
-
- 8
- 8
-
-
-
-
- commons-net
- commons-net
- 3.5
-
-
- org.apache.commons
- commons-lang3
- 3.4
-
-
- com.microsoft.azure
- azure-keyvault
- 0.9.4
-
-
- com.microsoft.azure
- azure-storage
- 4.3.0
-
-
-
- org.apache.commons
- commons-io
- 1.3.2
-
-
- com.groupdocs
- groupdocs-parser
- 18.12
-
-
- org.xerial
- sqlite-jdbc
- 3.23.1
-
-
-
-
- GroupDocsJavaAPI
- GroupDocs Java API
- http://artifact.groupdocs.com/repo/
-
-
-
-
-
- src/main/resources
- true
-
-
-
-
+
+ 8
+ 8
+
+
+
+ commons-net
+ commons-net
+ 3.5
+
+
+ org.apache.commons
+ commons-lang3
+ 3.4
+
+
+ com.microsoft.azure
+ azure-keyvault
+ 0.9.4
+
+
+ com.microsoft.azure
+ azure-storage
+ 4.3.0
+
+
+
+ org.apache.commons
+ commons-io
+ 1.3.2
+
+
+ com.groupdocs
+ groupdocs-parser
+ 24.6
+
+
+ org.xerial
+ sqlite-jdbc
+ 3.23.1
+
+
+ com.aspose
+ aspose-ocr
+ 22.11
+
+
+
+
+ GroupDocsJavaAPI
+ GroupDocs Java API
+ https://releases.groupdocs.com/java/repo/
+
+
+ AsposeJavaAPI
+ Aspose Java API
+ https://releases.aspose.com/java/repo/
+
+
+
+
+
+ src/main/resources
+ true
+
+
+
\ No newline at end of file
diff --git a/Examples/src/main/java/com/groupdocs/parser/examples/BusinessCases.java b/Examples/src/main/java/com/groupdocs/parser/examples/BusinessCases.java
deleted file mode 100644
index 39e28ce..0000000
--- a/Examples/src/main/java/com/groupdocs/parser/examples/BusinessCases.java
+++ /dev/null
@@ -1,106 +0,0 @@
-package com.groupdocs.parser.examples;
-
-import com.groupdocs.parser.ExtractorFactory;
-import com.groupdocs.parser.TextExtractor;
-
-public class BusinessCases {
- //ExStart:WordStatistic
- class WordStatistic {
- public WordStatistic(String fileName, int minWordlength) throws java.lang.Exception {
- ExtractorFactory factory = new ExtractorFactory();
- java.util.Map statistic = new java.util.HashMap();
-
- TextExtractor extractor = factory.createTextExtractor(fileName);
- if (extractor == null) {
- System.out.println("The document's format is not supported");
- return;
- }
-
- try {
- String line = null;
- do {
- line = extractor.extractLine();
- if (line != null) {
- String[] words = line.split("[ ,;.]");
- for (String w : words) {
- String word = w.trim().toLowerCase();
- if (word.length() > minWordlength) {
- int value = !statistic.containsKey(word) ? 0 : statistic.get(word);
- statistic.put(word, value + 1);
- }
- }
- }
- }
- while (line != null);
- } finally {
- if (extractor != null) {
- extractor.close();
- }
- }
-
- System.out.println("Top words:");
-
- for (int i = 0; i < 10; i++) {
- int count = -1;
- String maxKey = null;
- for (String key : statistic.keySet()) {
- if (statistic.get(key) > count) {
- count = statistic.get(key);
- maxKey = key;
- }
- }
-
- if (maxKey == null) {
- break;
- }
-
- System.out.println(String.format("%s: %d", maxKey, count));
- statistic.remove(maxKey);
- }
- }
- }
- //ExEnd:WordStatistic
-
- //ExStart:FileViewer
- class FileViewer {
- public FileViewer(String fileName, boolean formatted) throws java.lang.Exception {
- int linesPerPage = 25;
- ExtractorFactory factory = new ExtractorFactory();
-
- TextExtractor extractor = formatted
- ? factory.createFormattedTextExtractor(fileName)
- : factory.createTextExtractor(fileName);
-
- if (extractor == null) {
- System.out.println("The document's format is not supported");
- return;
- }
-
- try {
- String line = null;
- do {
- System.out.println(fileName);
-
- int lineNumber = 0;
- do {
- line = extractor.extractLine();
- lineNumber++;
- if (line != null) {
- System.out.println(line);
- }
- }
- while (line != null && lineNumber < linesPerPage);
-
- System.out.println();
- System.out.println("Press q and Enter to exit or Enter to move to the next page");
- }
- while (line != null && System.in.read() != 'q');
- } finally {
- extractor.close();
- }
- }
- }
- //ExEnd:FileViewer
-
-
-}
diff --git a/Examples/src/main/java/com/groupdocs/parser/examples/Common.java b/Examples/src/main/java/com/groupdocs/parser/examples/Common.java
deleted file mode 100644
index a05af37..0000000
--- a/Examples/src/main/java/com/groupdocs/parser/examples/Common.java
+++ /dev/null
@@ -1,123 +0,0 @@
-package com.groupdocs.parser.examples;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.file.FileSystems;
-import java.nio.file.Path;
-import java.util.Properties;
-
-import com.groupdocs.parser.License;
-import com.groupdocs.parser.Metered;
-
-public class Common {
-
- public static final Path STORAGE_PATH = getProjectBaseDir().resolve("Data/Source");
- public static final Path OUTPUT_PATH = getProjectBaseDir().resolve("Data/Output");
-
- // public static final String ATTACHMENTS_PATH =
- // getProjectBaseDir().resolve("Data/Attachments/").toString();
- public static final String LICENSE_PATH = "D:\\GroupDocs.Total.Java.lic";
- public static final String PUBLIC_KEY = "Public key for your account";
- public static final String PRIVATE_KEY = "Private key for your account";
-
- // applies product license
- public static void applyLicenseFromFile() {
- try {
- // ExStart:ApplyLicenseFromFile
- // Setup license
- License lic = new License();
- lic.setLicense(LICENSE_PATH);
- // ExEnd:ApplyLicenseFromFile
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- // applies product license
- public static void applyLicenseFromStream() {
- try {
- // ExStart:ApplyLicenseFromStream
- // Setup license
- License lic = new License();
- lic.setLicense(new java.io.FileInputStream(LICENSE_PATH));
- // ExEnd:ApplyLicenseFromStream
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- // returns project base directory
- public static Path getProjectBaseDir() {
- Properties props = new Properties();
- try {
- InputStream i = Common.class.getResourceAsStream("/project.properties");
- props.load(i);
- } catch (IOException x) {
- throw new RuntimeException(x);
- }
- return FileSystems.getDefault().getPath(props.getProperty("project.basedir"));
- }
-
- // returns source file path
- public static String mapSourceFilePath(String inputFileName) {
- try {
- return STORAGE_PATH + "/" + inputFileName;
- } catch (Exception e) {
- e.printStackTrace();
- return e.getMessage();
- }
- }
-
- // returns output file path
- public static String mapOutputFilePath(String outputFileName) {
- try {
- return OUTPUT_PATH + "/" + outputFileName;
- } catch (Exception e) {
- e.printStackTrace();
- return e.getMessage();
- }
- }
-
- // shows how to use library in licensed mode using Dynabic.Metered account
- public static void useDynabicMeteredAccount() {
- // ExStart:ApplyMeteredLicense
- // initialize Metered API
- Metered metered = new Metered();
- // set-up credentials
- try {
- metered.setMeteredKey(PUBLIC_KEY, PRIVATE_KEY);
- } catch (Exception e) {
- e.printStackTrace();
- }
- // do some work:
- // ExEnd:ApplyMeteredLicense
-
- }
-
- // gets connection string
- public static String getConnectionString(String inputFileName) {
- try {
- String connectionString = "jdbc:sqlite:" + STORAGE_PATH + "/" + inputFileName;
- return connectionString;
- } catch (Exception e) {
- e.printStackTrace();
- return e.getMessage();
- }
- }
-
- // saves the source stream into destination stream
- public static void copyStream(java.io.InputStream source, java.io.OutputStream dest) throws java.io.IOException {
- byte[] buffer = new byte[4096];
-
- int r = 0;
- do {
- r = source.read(buffer);
- if (r > 0) {
- dest.write(buffer);
- }
- } while (r > 0);
- }
-
-}
diff --git a/Examples/src/main/java/com/groupdocs/parser/examples/Constants.java b/Examples/src/main/java/com/groupdocs/parser/examples/Constants.java
new file mode 100644
index 0000000..9f320d9
--- /dev/null
+++ b/Examples/src/main/java/com/groupdocs/parser/examples/Constants.java
@@ -0,0 +1,93 @@
+//
+// Copyright (C) 2011-2024 GroupDocs. All Rights Reserved.
+//
+
+package com.groupdocs.parser.examples;
+
+import java.io.IOException;
+
+public class Constants {
+ public static final String LicensePath = "C:\\licenses\\GroupDocs.Parser.Java.lic";
+ public static final String SamplesPath = "..\\Examples\\Resources\\SampleFiles\\";
+ public static final String OutputPath = "..\\Examples\\Output\\";
+
+ public static final String AsposeOcrLicensePath = "C:\\Licenses\\Aspose.OCR.Java.lic";
+
+ public static final String SampleDocx = getFilePath("sample.docx");
+
+ public static final String SampleWithImagesDocx = getFilePath("SampleWithImages.docx");
+
+ public static final String SampleHyperlinksDocx = getFilePath("Hyperlinks.docx");
+
+ public static final String SampleXlsx = getFilePath("sample.xlsx");
+
+ public static final String SampleWithImagesXlsx = getFilePath("images.xlsx");
+
+ public static final String SamplePptx = getFilePath("sample.pptx");
+
+ public static final String SampleWithImagesPptx = getFilePath("images.pptx");
+
+ public static final String SamplePdf = getFilePath("sample.pdf");
+
+ public static final String SampleImagesPdf = getFilePath("images.pdf");
+
+ public static final String HyperlinksPdf = getFilePath("Hyperlinks.pdf");
+
+ public static final String SampleFormsPdf = getFilePath("forms.pdf");
+
+ public static final String SampleCarWashPdf = getFilePath("SampleCarWash.pdf");
+
+ public static final String SampleInvoicePdf = getFilePath("invoice.pdf");
+
+ public static final String SampleInvoicePagesPdf = getFilePath("invoice_pages.pdf");
+
+ public static final String SamplePassword = getFilePath("samplePassword.pdf");
+
+ public static final String SampleMd = getFilePath("sample.md");
+
+ public static final String SampleHtml = getFilePath("sample.htm");
+
+ public static final String SampleEpub = getFilePath("sample.epub");
+
+ public static final String SampleOne = getFilePath("sample.one");
+
+ public static final String SampleZip = getFilePath("sample.zip");
+
+ public static final String SampleText = getFilePath("utf8.txt");
+
+ public static final String SampleDatabase = getFilePath("sqlite.db");
+
+ public static final String SampleMsg = getFilePath("The butterfly effect.msg");
+
+ public static final String SampleOutlook = getFilePath("sample.ost");
+
+ public static final String SamplePdfPortfolio = getFilePath("PortfolioWithFolder.pdf");
+
+ public static final String SampleDocxWithToc = getFilePath("samplewithtoc.docx");
+
+ public static final String SamplePdfWithToc = getFilePath("samplewithtoc.pdf");
+
+ public static final String SamplePdfWithBarcodes = getFilePath("Barcodes.pdf");
+
+ public static final String SampleScan = getFilePath("SampleScan.jpg");
+
+ public static final String SampleHtmlWithImages = getFilePath("installation.html");
+
+ public static final String InlineImages = getFilePath("Embedded Image And Attachment.eml");
+
+ public static final String SampleCorruptedBarcodes = getFilePath("corrupted.png");
+
+ private static String getFilePath(String fileName) {
+ return System.getProperty("user.dir") + "\\" + SamplesPath + fileName;
+ }
+
+ public static String getOutputFilePath(String fileName) throws IOException {
+ String outputDirectory = OutputPath + fileName;
+
+ java.io.File directory = new java.io.File(outputDirectory).getParentFile();
+ if (!directory.exists())
+ directory.mkdir();
+
+ return outputDirectory;
+ }
+}
diff --git a/Examples/src/main/java/com/groupdocs/parser/examples/Containers.java b/Examples/src/main/java/com/groupdocs/parser/examples/Containers.java
deleted file mode 100644
index 64a58b2..0000000
--- a/Examples/src/main/java/com/groupdocs/parser/examples/Containers.java
+++ /dev/null
@@ -1,400 +0,0 @@
-package com.groupdocs.parser.examples;
-
-import java.io.BufferedReader;
-import java.io.InputStream;
-
-import com.groupdocs.parser.CellsMediaTypeDetector;
-import com.groupdocs.parser.Container;
-import com.groupdocs.parser.ContainerEnumerator;
-import com.groupdocs.parser.DbContainer;
-import com.groupdocs.parser.EmailConnectionInfo;
-import com.groupdocs.parser.EmailContainer;
-import com.groupdocs.parser.EmailTextExtractor;
-import com.groupdocs.parser.ExtractorFactory;
-import com.groupdocs.parser.GroupDocsParserException;
-import com.groupdocs.parser.IContainerFactory;
-import com.groupdocs.parser.LoadOptions;
-import com.groupdocs.parser.MediaTypeDetector;
-import com.groupdocs.parser.MetadataNames;
-import com.groupdocs.parser.PersonalStorageContainer;
-import com.groupdocs.parser.TextExtractor;
-import com.groupdocs.parser.ZipContainer;
-
-public class Containers {
- // ExStart:SourceOSTDocumentFilePath
- private final static String OST_FILE_PATH = "sample.ost";
- private final static String ZIP_FILE_PATH = "sample.zip";
- private final static String DB_FILE_PATH = "sample.sqlite";
- // ExEnd:SourceOSTZIPDocumentFilePath
- /**
- * Creates containers
- *
- */
- public static void createContainer() {
- try {
- // ExStart:createContainer
- // Create a factory
- ExtractorFactory factory = new ExtractorFactory(null, new CellsMediaTypeDetector());
-
- // Try to create a container from the file
- Container container = factory.createContainer(Common.mapSourceFilePath(OST_FILE_PATH));
- if (container == null) {
- System.out.println("The document format is not supported");
- }
-
- // Try to create a container from the stream
- Container container2 = factory.createContainer(Common.mapSourceFilePath(OST_FILE_PATH));
- if (container == null) {
- System.out.println("The document format is not supported");
- }
-
- // Create load options
- LoadOptions loadOptions = new LoadOptions("text/plain", java.nio.charset.Charset.forName("UTF-8"));
- // Try to create a container from the stream with load options
- Container container3 = factory.createContainer(Common.mapSourceFilePath(OST_FILE_PATH), loadOptions);
- if (container == null) {
- System.out.println("The document format is not supported");
- }
- // ExEnd:createContainer
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts messages from OST-container.
- *
- */
- public static void extractMessagesFromOST() {
- try {
- // ExStart:extractMessagesFromOST
- // Create an extractor factory
- ExtractorFactory factory = new ExtractorFactory();
- // Create a container
- try (PersonalStorageContainer container = new PersonalStorageContainer(
- Common.mapSourceFilePath(OST_FILE_PATH))) {
- // Iterate over container's entities
- for (int i = 0; i < container.getEntities().size(); i++) {
- System.out.println("Name: " + container.getEntities().get(i).getName()); // name
- // of
- // the
- // file
- System.out.println("Path: " + container.getEntities().get(i).getPath().toString()); // path
- // of
- // the
- // file
- System.out.println("MediaType :" + container.getEntities().get(i).getMediaType()); // media
- // type
- // of
- // the
- // file
- System.out.println("Date: " + container.getEntities().get(i).getDate().toString()); // date
- // when
- // the
- // file
- // was
- // added
- // to
- // the
- // archive
- System.out.println("Size: " + container.getEntities().get(i).getSize()); // uncompressed
- // size
- // of
- // the
- // file
- System.out.println("Subject: " + container.getEntities().get(i).get_Item(MetadataNames.SUBJECT)); // subject
- // of
- // the
- // email
- System.out.println("From: " + container.getEntities().get(i).get_Item(MetadataNames.EMAIL_FROM)); // "from"
- // addresses
- // of
- // the
- // email
- System.out.println("To: " + container.getEntities().get(i).get_Item(MetadataNames.EMAIL_TO)); // "to"
- // addresses
- // of
- // the
- // email
-
- // Try to create a text extractor for the file of the
- // container
- TextExtractor extractor = factory.createTextExtractor(container.getEntities().get(i).openStream());
- System.out.println("Content:");
- // If the text extractor is supported (extractor != null),
- // then extract a text from the document
- System.out.println(
- extractor != null ? extractor.extractAll() : "The document format is not supported");
- }
- }
- // ExEnd:extractMessagesFromOST
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Enumerates all the entities of the group of containers.
- *
- */
- public static void enumerateAllEntitiesOfGroupOfContainers() {
- try {
- // ExStart:enumerateAllEntitiesOfGroupOfContainers
- // Create an extractor factory
- ExtractorFactory factory = new ExtractorFactory();
- // Create a container
- try (PersonalStorageContainer container = new PersonalStorageContainer(
- Common.mapSourceFilePath(OST_FILE_PATH))) {
- IContainerFactory containerFactory = null;
- MediaTypeDetector containerMediaTypeDetector = null;
- // Create a container enumerator
- ContainerEnumerator enumerator = new ContainerEnumerator(containerFactory, containerMediaTypeDetector,
- container);
- // Get the entity
- Container.Entity entity = enumerator.nextElement();
- // Iterate over files
- while (entity != null) {
- // Try to create a text extractor
- TextExtractor extractor = factory.createTextExtractor(entity.openStream());
- // If the text extractor is supported (extractor != null),
- // then
- // extract a text from the document
- System.out.println(extractor == null ? "document isn't supported" : extractor.extractAll());
- // Get the entity for the next iteration
- entity = enumerator.nextElement();
- }
- }
- // ExEnd:enumerateAllEntitiesOfGroupOfContainers
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Shows the usage of EmailConnectionInfo class for Exchange, POP, and IMAP
- * email servers
- *
- */
- public static void createEmailConnectionInfo() {
- try {
- // ExStart:createEmailConnectionInfo
- // Exchange Web Service:
- EmailConnectionInfo ewsInfo = EmailConnectionInfo.createEwsConnectionInfo(
- "https://outlook.office365.com/ews/exchange.asmx", "username", "password", "domain");
- // or if domain is not required:
- EmailConnectionInfo ewsInfoNoDomain = EmailConnectionInfo
- .createEwsConnectionInfo("https://outlook.office365.com/ews/exchange.asmx", "username", "password");
- // POP:
- EmailConnectionInfo popInfo = EmailConnectionInfo.createPopConnectionInfo("pop-mail.outlook.com", 995,
- "username", "password");
- // IMAP:
- EmailConnectionInfo imapInfo = EmailConnectionInfo.createImapConnectionInfo("imap-mail.outlook.com", 993,
- "username", "password");
-
- // ExEnd:createEmailConnectionInfo
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Retrieves list of all email from Exchange Web Service
- *
- */
- public static void getListOfEmailsFromEWS() {
- try {
- // ExStart:getListOfEmailsFromEWS
- // Exchange Web Service:
- EmailConnectionInfo ewsInfo = EmailConnectionInfo.createEwsConnectionInfo(
- "https://outlook.office365.com/ews/exchange.asmx", "username", "password", "domain");
- // Create an email container
- try (EmailContainer container = new EmailContainer(ewsInfo)) {
- // Iterate over emails
- for (Container.Entity entity : container.getEntities()) {
- System.out.println("Folder: " + entity.getPath().toString()); // A
- // folder
- // at
- // server
- System.out.println("Subject: " + entity.get_Item(MetadataNames.SUBJECT)); // A
- // subject
- // of
- // email
- System.out.println("From: " + entity.get_Item(MetadataNames.EMAIL_FROM)); // "From"
- // address
- System.out.println("To: " + entity.get_Item(MetadataNames.EMAIL_TO)); // "To"
- // addresses
- }
- }
- // ExEnd:getListOfEmailsFromEWS
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Retrieves an email from Exchange Web Service
- *
- */
- public static void retrieveAnEmailFromEWS() {
- try {
- // ExStart:retrieveAnEmailFromEWS
- // Exchange Web Service:
- EmailConnectionInfo ewsInfo = EmailConnectionInfo.createEwsConnectionInfo(
- "https://outlook.office365.com/ews/exchange.asmx", "username", "password", "domain");
- // Create an email container
- try (EmailContainer container = new EmailContainer(ewsInfo)) {
- // Iterate over emails
- for (Container.Entity entity : container.getEntities()) {
- // Create a stream with the content of email
- java.io.InputStream stream = entity.openStream();
- // Create a text extractor for email
- try (TextExtractor extractor = new EmailTextExtractor(stream)) {
- // Extract all the text from email
- System.out.println(extractor.extractAll());
- }
- }
- }
- // ExEnd:retrieveAnEmailFromEWS
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Enumerates all entities in a ZIP archive
- *
- */
- public static void enumerateFileInZIPArchive() {
- try {
- // ExStart:enumerateFileInZIPArchive
- try (ZipContainer container = new ZipContainer(Common.mapSourceFilePath(ZIP_FILE_PATH))) {
-
- for (int i = 0; i < container.getEntities().size(); i++) {
- System.out.println("Name: " + container.getEntities().get(i).getName());
- System.out.println("Path: " + container.getEntities().get(i).getPath().toString());
- System.out.println("Media type: " + container.getEntities().get(i).getMediaType());
- }
- }
- // ExEnd:enumerateFileInZIPArchive
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Reads file in a ZIP archive
- *
- */
- public static void readConcreteFileInZIPArchive() {
- try {
- // ExStart:readConcreteFileInZIPArchive
- try (ZipContainer container = new ZipContainer(Common.mapSourceFilePath(ZIP_FILE_PATH))) {
-
- // Create a factory
- ExtractorFactory extractorFactory = new ExtractorFactory();
-
- try (TextExtractor extractor = extractorFactory
- .createTextExtractor(container.getEntities().get(0).openStream())) {
- System.out.println(extractor.extractAll());
- }
- }
- // ExEnd:readConcreteFileInZIPArchive
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Reads file in a ZIP archive
- *
- */
- public static void retrieveAllEntitiesInZIPArchiveByName() {
- try {
- // ExStart:retrieveAllEntitiesInZIPArchiveByName
- try (ZipContainer container = new ZipContainer(Common.mapSourceFilePath(ZIP_FILE_PATH))) {
-
- // Create a factory
- ExtractorFactory extractorFactory = new ExtractorFactory();
-
- // Try to get "container.xml" entity from "META-INF" folder
- Container.Entity containerEntry = container.getEntity("META-INF\\container.xml");
- // If the entity isn't found
- if (containerEntry == null) {
- throw new GroupDocsParserException("File not found");
- }
-
- // Try to create a text extractor
- TextExtractor extractor = extractorFactory.createTextExtractor(containerEntry.openStream());
- try {
- // Extract a text (if the document type is supported)
- System.out.println(extractor == null ? "Document type isn't supported" : extractor.extractAll());
- } finally {
- // Cleanup
- if (extractor != null) {
- extractor.dispose();
- }
- }
- }
- // ExEnd:retrieveAllEntitiesInZIPArchiveByName
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Reads text from database
- *
- */
- public static void extractTextFromDatabase() {
- try {
- // ExStart:extractTextFromDatabase_18.9
- String connectionString = Common.getConnectionString(DB_FILE_PATH);
- DbContainer container = new DbContainer(java.sql.DriverManager.getConnection(connectionString));
- try {
- // Iterate over entities
- for (Container.Entity entity : container.getEntities()) {
- // Print a table name
- System.out.println(entity.getName());
- // Print a media type
- System.out.println(entity.getMediaType());
- // Create a stream reader for CSV document: OpenStream
- // method converts a table to the CSV file and returns it as
- // Stream
- java.io.InputStreamReader reader = new java.io.InputStreamReader(entity.openStream());
- try {
- BufferedReader br = new BufferedReader(reader);
-
- // Read a line
- String line = br.readLine();
- // Loop to the end of the file
- while (line != null) {
- // Print a line from the document
- System.out.println(line);
- // Read the next line
- line = br.readLine();
- }
- } finally {
- reader.close();
- }
-
- }
- } finally {
- container.dispose();
- }
- // ExEnd:extractTextFromDatabase_18.9
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
-}
diff --git a/Examples/src/main/java/com/groupdocs/parser/examples/MainClass.java b/Examples/src/main/java/com/groupdocs/parser/examples/MainClass.java
deleted file mode 100644
index 95996de..0000000
--- a/Examples/src/main/java/com/groupdocs/parser/examples/MainClass.java
+++ /dev/null
@@ -1,188 +0,0 @@
-package com.groupdocs.parser.examples;
-
-import com.groupdocs.parser.examples.StructuredHandlers.Headers;
-import com.groupdocs.parser.examples.TextExtractors.CHMDocuments;
-import com.groupdocs.parser.examples.TextExtractors.EPUBDocuments;
-import com.groupdocs.parser.examples.TextExtractors.EmailMessages;
-import com.groupdocs.parser.examples.TextExtractors.FictionBookDocuments;
-import com.groupdocs.parser.examples.TextExtractors.MarkdownDocuments;
-import com.groupdocs.parser.examples.TextExtractors.OneNoteDocuments;
-import com.groupdocs.parser.examples.TextExtractors.PDFDocuments;
-import com.groupdocs.parser.examples.TextExtractors.PresentationDocuments;
-import com.groupdocs.parser.examples.TextExtractors.SpreadsheetDocuments;
-import com.groupdocs.parser.examples.TextExtractors.TextDocuments;
-import com.groupdocs.parser.examples.TextFormatters.HTML;
-import com.groupdocs.parser.examples.TextFormatters.Markdown;
-import com.groupdocs.parser.examples.TextFormatters.PlainText;
-import com.groupdocs.parser.examples.Tools.Detector;
-import com.groupdocs.parser.examples.Tools.EncodingDetector;
-import com.groupdocs.parser.examples.Tools.Indexer;
-import com.groupdocs.parser.examples.Tools.Loggers;
-import com.groupdocs.parser.examples.Tools.MediaTypeDetectors;
-
-public class MainClass {
- public static void main(String[] args) throws Throwable {
-
- // Uncomment following code if you have license file
- // Common.applyLicenseFromStream();
-
- // Using metered license
- // Common.useDynabicMeteredAccount();
-
- //// Extract plain text
- // TextExtraction.extractTextUsingExtractorClass();
- // TextExtraction.extractTextWithMediaTypeUsingExtractorClass();
- // TextExtraction.extractTextUsingExtractorClassWithConstructor();
- // TextExtraction.extractTextUsingTextExtractor();
- // TextExtraction.extractTextWithMediaTypeDetection();
- // TextExtraction.extractTextWithCompositeMediaTypeDetection();
- // TextExtraction.extractTextUsingExtractorFactory();
- // TextExtraction.extractTextFromDocumentsContainedInOSTContainer();
- // TextExtraction.extractTextInSimpleExtractMode();
-
- //// Extract formatted text
- // TextExtraction.extractFormattedTextUsingExtractorClass();
- // TextExtraction.extractFormattedTextUsingFormattedTextExtractor();
- // TextExtraction.extractFormattedTextUsingExtractorFactory();
- // TextExtraction.extractFormattedTextWithTextFormatter();
- // TextExtraction.extractFormattedTextWithTextFormatterUsingITextExtractorWithFormatter();
- // TextExtraction.extractFormattedTextWithTextFormatterUsingExtractorFactory();
- // TextExtraction.extractFormattedTextUsingExtractorClassWithConstructor();
- // TextExtraction.extractFormattedTextWithMediaTypeUsingExtractorClass();
-
- //// Extract highlights
- // TextExtraction.extractHighlights();
- // TextExtraction.extractHighlightsWithLineOptions();
- // TextExtraction.extractHighlightsWithWordsCountOptions();
- // TextExtraction.extractHighlightsWithSearchHighlightOptions();
-
- //// Extract document pages
- // TextExtraction.extractDocumentPagesUsingIPageTextExtractor();
- // Extract text from password protected document
- // TextExtraction.extractTextFromPasswordProtectedDocument();
-
- //// Text search
- // TextSearch.searchText();
- // TextSearch.searchWholeWord();
- // TextSearch.searchWithRegularExpression();
-
- //// Metadata extraction
- // MetadataExtraction.createMetadataExtractor();
- // MetadataExtraction.extractMetadataUsingComplexMetadataExtractor();
- // MetadataExtraction.extractMetadataUsingExtractorFactory();
- // MetadataExtraction.extractMetadataUsingMetadataExtractor();
- // MetadataExtraction.extractMetadataUsingDefaultExtractor();
-
- //// region Text extractors
- //// Working with text documents
- // TextDocuments.extractFormattedText();
- // TextDocuments.extractTextAsWhole();
- // TextDocuments.extractTextByLines();
- // TextDocuments.extractImages();
-
- //// Working with spreadsheet documents
- // SpreadsheetDocuments.extractFormattedText();
- // SpreadsheetDocuments.extractSelectedColumns();
- // SpreadsheetDocuments.extractSelectedColumnsByRows();
- // SpreadsheetDocuments.extractSheetByRows();
- // SpreadsheetDocuments.extractTextAsWhole();
- // SpreadsheetDocuments.extractTextByLines();
- // SpreadsheetDocuments.extractImages();
-
- //// Working with presentation documents
- // PresentationDocuments.extractFormattedText();
- // PresentationDocuments.extractTextAsWhole();
- // PresentationDocuments.extractTextByLines();
- // PresentationDocuments.extractImages();
-
- //// Working with PDF documents
- // PDFDocuments.extractTextAsWhole();
- // PDFDocuments.extractTextByLines();
- // PDFDocuments.extractDataFromPDFForms();
- // PDFDocuments.extractImages();
- // PDFDocuments.extractTablesManually();
- // PDFDocuments.extractTablesUsingTableAreaDetector();
-
- //// Working with OneNote documents
- // OneNoteDocuments.extractTextAsWhole();
- // OneNoteDocuments.extractTextByLines();
-
- //// Working with FB2 documents
- // FictionBookDocuments.extractFormattedText();
- // FictionBookDocuments.extractTextAsWhole();
- // FictionBookDocuments.extractTextByLines();
-
- //// Working with EPUB documents
- // EPUBDocuments.extractFormattedText();
- // EPUBDocuments.extractTextAsWhole();
- // EPUBDocuments.extractTextByLines();
- // EPUBDocuments.extractTextFromContentDocument();
- // EPUBDocuments.getContentDocument();
- // EPUBDocuments.getEPUBPackages();
- // EmailMessages.extractAttachments();
-
- //// Working with CHM documents
- // CHMDocuments.extractTextAsWhole();
- // CHMDocuments.extractTextByLines();
- // CHMDocuments.extractFormattedText();
- // CHMDocuments.extractFormattedTextByLines();
- // CHMDocuments.extractFormattedTextUsingTextFormatter();
- // CHMDocuments.extractTableOfContent();
- // CHMDocuments.extractTextOfItemInTOC();
-
- //// Working with markdown documents
- // MarkdownDocuments.extractSingleLineAsRawText();
- // MarkdownDocuments.extractAllCharactersAsRawText();
- // MarkdownDocuments.extractSingleLineAsFormattedText();
- // MarkdownDocuments.extractAllCharactersAsFormattedText();
- // MarkdownDocuments.extractFormattedTextUsingDocumentFormatter();
- // MarkdownDocuments.extractStructuredText();
-
- //// Text formatters
- // HTML.extractTextUsingHTMLDocumentFormatter();
- // Markdown.extractTextUsingMarkdownDocumentFormatter();
- // PlainText.extractTextUsingPlainTextDocumentFormatter();
- // PlainText.extractTextUsingPlainTextDocumentFormatterWithPlainTableFrame();
-
- //// Tools
- // EncodingDetector.detectEncoding();
- // Loggers.extractTextWithNotificationReceiver();
- // Loggers.extractTextWithNotificationReceiverAllNotifications();
- // Loggers.extractTextWithNotificationReceiverManualExceptionHandling();
- // Loggers.extractTextWithNotificationReceiverUsingExtractFactory();
- // MediaTypeDetectors.detectMediaTypeByContent();
- // MediaTypeDetectors.detectMediaTypeByExtension();
- // MediaTypeDetectors.IsMediaTypeSupportedByDetector();
- // ExStart:Detector_usage_18.12
- // Detector detector = new Detector();
- // detector.detect(new java.io.File(Common.STORAGE_PATH.toString()).list(null));
- // ExEnd:Detector_usage_18.12
-
- // This feature allows providing a password for protected documents
- // on-demand
- // ExStart:requestPasswordForProtectedDocument_usage_18.9
- // Indexer indexer = new Indexer();
- // indexer.process(new java.io.File(Common.STORAGE_PATH.toString()));
- // ExEnd:requestPasswordForProtectedDocument_usage_18.9
-
- //// Containers
- // Containers.createEmailConnectionInfo();
- // Containers.enumerateAllEntitiesOfGroupOfContainers();
- // Containers.extractMessagesFromOST();
- // Containers.getListOfEmailsFromEWS();
- // Containers.retrieveAnEmailFromEWS();
- // Containers.extractTextFromDatabase();
-
- //// Structured handlers
- // StructuredHandlers.Headers hd = new Headers();
- // hd.extractHeader(new
- //// java.io.FileInputStream(Common.mapSourceFilePath("sample.docx")));
-
- // StructuredHandlers.Hyperlinks hl = new
- // StructuredHandlers.Hyperlinks();
- // hl.extract(new
- // java.io.FileInputStream(Common.mapSourceFilePath("sample.docx")));
-
- System.out.println("Operation Completed...");
- }
-}
diff --git a/Examples/src/main/java/com/groupdocs/parser/examples/MetadataExtraction.java b/Examples/src/main/java/com/groupdocs/parser/examples/MetadataExtraction.java
deleted file mode 100644
index 364e3c8..0000000
--- a/Examples/src/main/java/com/groupdocs/parser/examples/MetadataExtraction.java
+++ /dev/null
@@ -1,162 +0,0 @@
-package com.groupdocs.parser.examples;
-
-import com.groupdocs.parser.EpubMetadataExtractor;
-import com.groupdocs.parser.Extractor;
-import com.groupdocs.parser.ExtractorFactory;
-import com.groupdocs.parser.LoadOptions;
-import com.groupdocs.parser.MetadataCollection;
-import com.groupdocs.parser.MetadataExtractor;
-import com.groupdocs.parser.WordsMetadataExtractor;
-
-public class MetadataExtraction {
- // ExStart:SourceDocumentFilePath
- private final static String FILE_PATH = "sample.docx";
- private final static String EPUB_FILE_PATH = "sample.epub";
- // ExEnd:SourceDocumentFilePath
-
- /**
- * Shows how to create metadata extractor using ExtractFactory
- *
- */
- public static void createMetadataExtractor() {
- try {
- // ExStart:createMetadataExtractorUsingExtractorFactory
- // Create a factory
- ExtractorFactory factory = new ExtractorFactory();
-
- // Try to create a metadata extractor from the file
- MetadataExtractor extractor = factory.createMetadataExtractor(Common.mapSourceFilePath(FILE_PATH));
- if (extractor == null) {
- System.out.println("The document format is not supported");
- }
-
- // Try to create a metadata extractor from the stream
- MetadataExtractor extractor2 = factory.createMetadataExtractor(new java.io.FileInputStream(Common.mapSourceFilePath(FILE_PATH)));
- if (extractor == null) {
- System.out.println("The document format is not supported");
- }
-
- // Create load options
- LoadOptions loadOptions = new LoadOptions("text/plain", java.nio.charset.Charset.forName("UTF-8"));
- // Try to create a metadata extractor from the stream with load options
- MetadataExtractor extractor3 = factory.createMetadataExtractor(new java.io.FileInputStream(Common.mapSourceFilePath(FILE_PATH)), loadOptions);
- if (extractor == null) {
- System.out.println("The document format is not supported");
- }
- // ExEnd:createMetadataExtractorUsingExtractorFactory
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts metadata from a document using MetadataExtractor.
- *
- */
- public static void extractMetadataUsingMetadataExtractor() {
- try {
- // ExStart:extractMetadataUsingMetadataExtractor
- String fileName = Common.mapSourceFilePath(FILE_PATH);
- // Create a metadata extractor
- WordsMetadataExtractor extractor = new WordsMetadataExtractor();
- // Extract metadata from the file
- MetadataCollection metadata = extractor.extractMetadata(fileName);
- // Iterate over metadata keys
- for (String key : metadata.getKeys()) {
- // Write a value of the metadata key
- System.out.println(String.format("%s = %s", key, metadata.get_Item(key)));
- }
- // ExEnd:extractMetadataUsingMetadataExtractor
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts metadata from a document using ExtractorFactory.
- *
- */
- public static void extractMetadataUsingExtractorClass() {
- try {
- // ExStart:extractMetadataUsingExtractorClass
- String fileName = Common.mapSourceFilePath(FILE_PATH);
- // Create an extractor
- Extractor extractor = new Extractor();
- // Try to extract metadata
- MetadataCollection metadata = extractor.extractMetadata(fileName);
- // If metadata extractor is supported
- if (metadata != null) {
- // Iterate over metadata keys
- for (String key : metadata.getKeys()) {
- // Write a value of the metadata key
- System.out.println(String.format("%s = %s", key, metadata.get_Item(key)));
- }
- } else {
- System.out.println("The document format is not supported");
- }
- // ExEnd:extractMetadataUsingExtractorClass
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts metadata from EPUB document using ComplexMetadataExtractor.
- *
- */
- public static void extractMetadataUsingComplexMetadataExtractor() {
- try {
- // ExStart:extractMetadataUsingComplexMetadataExtractor
- // Create an extractor
- EpubMetadataExtractor metadataExtractor = new EpubMetadataExtractor();
- // Get an enumerator for all metadata collections of the document
- java.util.Enumeration enumerator = metadataExtractor
- .extractComplexMetadata(Common.mapSourceFilePath(EPUB_FILE_PATH));
- // Get the metadata collection
- MetadataCollection metadata = enumerator.nextElement();
- // Iterate over metadata collections
- while (metadata != null) {
- // Iterate over metadata keys
- for (String key : metadata.getKeys()) {
- // Write a value of the metadata key
- System.out.println(String.format("%s = %s", key, metadata.get_Item(key)));
- }
-
- // Get the metadata collection for the next iteration
- metadata = enumerator.nextElement();
- }
- // ExEnd:extractMetadataUsingComplexMetadataExtractor
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts metadata using Default Extractor
- *
- */
- public static void extractMetadataUsingDefaultExtractor() {
- try {
- // ExStart:extractMetadataUsingDefaultExtractor_18.12
- // Extract metadata from the file
- MetadataCollection metadata = Extractor.DEFAULT.extractMetadata(Common.mapSourceFilePath(FILE_PATH));
- // Print extracted metadata
- for (String key : metadata.getKeys()) {
- // Print a metadata key
- System.out.print(key);
- System.out.print(": ");
- // Print a metadata value
- System.out.println(metadata.get_Item(key));
- }
- // ExEnd:extractMetadataUsingDefaultExtractor_18.12
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
-}
diff --git a/Examples/src/main/java/com/groupdocs/parser/examples/RunExamples.java b/Examples/src/main/java/com/groupdocs/parser/examples/RunExamples.java
new file mode 100644
index 0000000..8eea754
--- /dev/null
+++ b/Examples/src/main/java/com/groupdocs/parser/examples/RunExamples.java
@@ -0,0 +1,258 @@
+//
+// Copyright (C) 2011-2024 GroupDocs. All Rights Reserved.
+//
+package com.groupdocs.parser.examples;
+
+import com.groupdocs.parser.examples.advanced_usage.*;
+import com.groupdocs.parser.examples.advanced_usage.loading.*;
+import com.groupdocs.parser.examples.advanced_usage.using_ocr.*;
+import com.groupdocs.parser.examples.advanced_usage.working_with_hyperlinks.*;
+import com.groupdocs.parser.examples.advanced_usage.working_with_tables.*;
+import com.groupdocs.parser.examples.advanced_usage.working_with_templates.ParsePagesByTemplate;
+import com.groupdocs.parser.examples.advanced_usage.working_with_zip_archives_and_attachments.*;
+import com.groupdocs.parser.examples.advanced_usage.working_with_data_extracted_by_template.*;
+import com.groupdocs.parser.examples.advanced_usage.working_with_images.*;
+import com.groupdocs.parser.examples.advanced_usage.working_with_templates.template_fields.*;
+import com.groupdocs.parser.examples.advanced_usage.working_with_templates.template_tables.*;
+import com.groupdocs.parser.examples.advanced_usage.working_with_templates.template_barcodes.*;
+import com.groupdocs.parser.examples.advanced_usage.working_with_text.*;
+import com.groupdocs.parser.examples.advanced_usage.working_with_barcodes.*;
+import com.groupdocs.parser.examples.advanced_usage.working_with_text.working_with_formatted_text.*;
+import com.groupdocs.parser.examples.quick_start.*;
+import com.groupdocs.parser.examples.basic_usage.*;
+
+import java.io.Console;
+
+public class RunExamples {
+ public static void main(String[] args) throws Exception {
+ System.out.println("Open RunExamples.cs. \n In Main() method uncomment the example that you want to run.");
+ System.out.println("=====================================================");
+
+ // NOTE: Please uncomment the example you want to try out
+
+ // Quick Start
+
+ SetLicenseFromFile.run();
+// SetLicenseFromStream.run();
+// SetMeteredLicense.run();
+// HelloWorld.run();
+
+ // Basic Usage
+
+// ExtractDataFromAttachmentsAndZipArchives.run();
+// ExtractFormattedTextFromDocuments.run();
+// ExtractImagesFromDocuments.run();
+// ExtractMetadataFromDocuments.run();
+// ExtractTableOfContents.run();
+// ExtractTextFromDocuments.run();
+// GetDocumentInfo.run();
+// GetSupportedFeatures.run();
+// GetSupportedFileFormats.run();
+// ParseDataFromDocuments.run();
+
+ // Advanced Usage
+
+// Export.run();
+// Logging.run();
+// ExtractDataFromDatabases.run();
+// try {
+// ExtractDataFromDatabasesWithDbConnection.Run();
+// } catch (Exception ex) {
+// System.out.println(ex.getMessage());
+// }
+
+// GenerateDocumentPagesPreview.run();
+// GenerateSpreadsheetPagesPreview.run();
+
+ // Loading
+
+// PasswordProtectedDocuments.run();
+// LoadingSpecificFileFormats.run();
+// LoadDocumentFromStream.run();
+// LoadDocumentFromLocalDisk.run();
+// HandleLoadingOfExternalResources.run();
+// LoadDocumentFromUrl.run();
+
+ // Working With Zip Archives and Attachments
+
+// DetectFileType.run();
+// IterateThroughContainerItems.run();
+// DistinguishInlineImagesInEmail.run();
+
+ // Working With Images
+
+// ExtractImagesFormDocumentPageArea.run();
+// ExtractImagesFromDocument.run();
+// ExtractImagesFromDocumentPage.run();
+// ExtractImagesToFiles.run();
+
+ // Working With Tables
+
+// ExtractTablesFromDocument.Run();
+// ExtractTablesFromDocumentPage.Run();
+
+ // Working With Hyperlinks
+
+// ExtractHyperlinksFromDocument.Run();
+// ExtractHyperlinksFromDocumentPage.Run();
+// ExtractHyperlinksFromDocumentPageArea.Run();
+
+ // Working With Barcodes
+
+// ExtractBarcodesFromDocument.run();
+// ExtractBarcodesFromDocumentCorrupted.run();
+// ExtractBarcodesFromDocumentPage.run();
+// ExtractBarcodesFromDocumentPageArea.run();
+// ExtractBarcodesFromDocumentWithOptions.run();
+
+ // Working With Text
+
+// DetectEncoding.run();
+// ExtractHighlight.run();
+// ExtractTextAreas.run();
+// ExtractTextAreasFromPage.run();
+// ExtractTextAreasWithOptions.run();
+// ExtractTextFromPageInAccurateMode.run();
+// ExtractTextInRawMode.run();
+// ExtractTextInAccurateMode.run();
+// ExtractTextFromPageInRawMode.run();
+// ExtractTextStructure.run();
+// SearchTextByKeyword.run();
+// SearchTextByRegex.run();
+// SearchTextWithHighlights.run();
+// SearchTextByPages.run();
+// ExtractTextByTocItem.run();
+
+ // Working With Formatted Text
+
+// ExtractFormattedTextFromDocument.run();
+// ExtractFormattedTextFromDocumentPage.run();
+// Html.run();
+// Markdown.run();
+// PlainText.run();
+
+ // Working With Templates
+
+// ParsePagesByTemplate.run();
+
+ // Template Fields
+
+// WorkingWithTemplateFixedPosition.run();
+// WorkingWithTemplateRegexPosition.run();
+// WorkingWithTemplateLinkedPosition.run();
+
+ // Template Tables
+
+// WorkingWithTableLayout.run();
+// WorkingWithTableParameters.run();
+
+ // Template Barcodes
+
+// WorkingWithBarcodes.run();
+
+ // Working with data extracted by template
+
+// GetFieldByName.run();
+// IterateThroughFields.run();
+// WorkingWithTables.run();
+
+ // Using OCR
+
+// OcrUsageBasicsText.run();
+// OcrUsageBasicsTextAreas.run();
+// OcrUsageBasicsRectangle.run();
+// OcrUsageBasicsHandler.run();
+
+ // Extract data from various formats
+
+ // Word
+
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.word.ExtractHyperlinks.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.word.ExtractImages.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.word.ExtractMetadata.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.word.ExtractTableOfContents.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.word.ExtractTables.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.word.ExtractText.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.word.ExtractTextAsHtml.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.word.ExtractTextFromPage.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.word.SearchTextByKeyword.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.word.SearchTextByRegularExpression.run();
+
+ // Excel
+
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.excel.ExtractImages.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.excel.ExtractMetadata.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.excel.ExtractText.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.excel.ExtractTextAsHtml.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.excel.ExtractTextFromSheet.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.excel.ExtractTextFromSheetInRawMode.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.excel.SearchTextByKeyword.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.excel.SearchTextByRegularExpression.run();
+
+ // PowerPoint
+
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.power_point.ExtractImages.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.power_point.ExtractMetadata.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.power_point.ExtractText.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.power_point.ExtractTextAsHtml.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.power_point.ExtractTextFromSlide.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.power_point.ExtractTextFromSlideInRawMode.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.power_point.SearchTextByKeyword.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.power_point.SearchTextByRegularExpression.run();
+
+ // Pdf
+
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.pdf.ExtractImages.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.pdf.ExtractMetadata.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.pdf.ExtractText.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.pdf.ParseDataFromDocuments.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.pdf.ExtractTextFromPage.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.pdf.ExtractTextFromPageInRawMode.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.pdf.SearchTextByKeyword.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.pdf.SearchTextByRegularExpression.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.pdf.ExtractAttachmentsFromPdfPortfolios.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.pdf.ExtractDataFromPdfForms.run();
+
+ // Email
+
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.email.ExtractImages.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.email.ExtractMetadata.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.email.ExtractText.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.email.ExtractTextAsHtml.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.email.ExtractAttachmentsFromEmails.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.email.SearchTextByKeyword.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.email.SearchTextByRegularExpression.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.email.ExtractEmailsFromOutlookStorage.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.email.ExtractEmailsFromRemoveServer.run();
+
+ // Epub
+
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.epub.ExtractMetadata.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.epub.ExtractText.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.epub.ExtractTextAsHtml.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.epub.ExtractTextFromPage.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.epub.SearchTextByKeyword.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.epub.SearchTextByRegularExpression.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.epub.ExtractTableOfContents.run();
+
+ // Html
+
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.html.ExtractText.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.html.SearchTextByKeyword.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.html.SearchTextByRegularExpression.run();
+
+ // OneNote
+
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.one_note.ExtractText.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.one_note.ExtractTextFromPage.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.one_note.SearchTextByKeyword.run();
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.one_note.SearchTextByRegularExpression.run();
+
+ // Zip
+
+// com.groupdocs.parser.examples.advanced_usage.extract_data_from_various_formats.zip.ExtractTextFromZipArchiveFiles.run();
+
+ System.out.println();
+ System.out.println("All done.");
+ }
+}
diff --git a/Examples/src/main/java/com/groupdocs/parser/examples/StructuredHandlers.java b/Examples/src/main/java/com/groupdocs/parser/examples/StructuredHandlers.java
deleted file mode 100644
index 260f279..0000000
--- a/Examples/src/main/java/com/groupdocs/parser/examples/StructuredHandlers.java
+++ /dev/null
@@ -1,131 +0,0 @@
-package com.groupdocs.parser.examples;
-
-import com.groupdocs.parser.HyperlinkProperties;
-import com.groupdocs.parser.IStructuredExtractor;
-import com.groupdocs.parser.ListProperties;
-import com.groupdocs.parser.ParagraphProperties;
-import com.groupdocs.parser.ParagraphStyle;
-import com.groupdocs.parser.StructuredHandler;
-import com.groupdocs.parser.TableProperties;
-import com.groupdocs.parser.TextProperties;
-import com.groupdocs.parser.WordsTextExtractor;
-
-public class StructuredHandlers {
- // ExStart:extractHeadersFromDocumentUsingStructuredHandler
- public static class Headers {
- public Headers()
- {}
- private class Handler extends StructuredHandler {
-
- // Handle List event to prevent processing of lists
- @Override
- protected void onStartList(ListProperties properties) {
- properties.setSkipElement(true); // ignore lists
- }
-
- // Handle Table event to prevent processing of tables
- @Override
- protected void onStartTable(TableProperties properties) {
- properties.setSkipElement(true); // ignore tables
- }
-
- // Handle ElementText event to process a text
- @Override
- protected void onText(TextProperties properties, String value) {
- sb.append(value);
- }
-
- // Handle Paragraph event to process a paragraph
- @Override
- protected void onStartParagraph(ParagraphProperties properties) {
- int h1 = (int) ParagraphStyle.Heading1;
- int h6 = (int) ParagraphStyle.Heading6;
-
- int style = properties.getStyle();
- if (h1 <= style && style <= h6) {
- if (sb.length() > 0) {
- sb.append("\r\n");
- }
-
- // make an indention for the header (h1 - no indention)
- sb.append(new String(new char[style - h1]).replace('\0', ' '));
- } else {
- // skip paragraph if it's not a header or a title
- properties.setSkipElement(properties.getStyle() != ParagraphStyle.Title);
- }
- }
- }
-
- private StringBuilder sb = new StringBuilder();
- /**
- * Extracts headers from a document
- *
- */
- public void extractHeader(java.io.InputStream stream) {
- IStructuredExtractor extractor = new WordsTextExtractor(stream);
- Handler handler = new Handler();
-
- // Extract a text with its structure
- extractor.extractStructured(handler);
-
- System.out.println(sb.toString());
- }
-
- }
-
- // ExEnd:extractHeadersFromDocumentUsingStructuredHandler
-
- // ExStart:extractHyperlinksFromDocumentUsingStructuredHandler
- public static class Hyperlinks {
- public Hyperlinks(){}
- private class Handler extends StructuredHandler {
- // Handle Hyperlink event to process a starting of a hyperlink
- @Override
- protected void onStartHyperlink(HyperlinkProperties properties) {
- sb = new StringBuilder();
- currentLink = properties.getLink();
- }
-
- // Handle ElementClose event to process a closing of a hyperlink
- @Override
- protected void onEndElement() {
- if (get_Item(0).getClass() == HyperlinkProperties.class) // closing
- // of
- // hyperlink
- {
- if (sb != null) {
- hyperlinks.add(String.format("%s (%s)", sb.toString(), currentLink));
- }
- sb = null;
- currentLink = null;
- }
- }
-
- // Handle ElementText event to process a text
- @Override
- protected void onText(TextProperties properties, String value) {
- if (sb != null) // if hyperlink is open
- {
- sb.append(value);
- }
- }
- }
-
- java.util.List hyperlinks = new java.util.ArrayList();
- StringBuilder sb = null;
- String currentLink = null;
-
- public void extract(java.io.InputStream stream) {
- IStructuredExtractor extractor = new WordsTextExtractor(stream);
- Handler handler = new Handler();
-
- // Extract a text with its structure
- extractor.extractStructured(handler);
-
- for (String hl : hyperlinks) {
- System.out.println(hl);
- }
- }
- }
- // ExEnd:extractHyperlinksFromDocumentUsingStructuredHandler
-}
diff --git a/Examples/src/main/java/com/groupdocs/parser/examples/TextExtraction.java b/Examples/src/main/java/com/groupdocs/parser/examples/TextExtraction.java
deleted file mode 100644
index ec58e07..0000000
--- a/Examples/src/main/java/com/groupdocs/parser/examples/TextExtraction.java
+++ /dev/null
@@ -1,966 +0,0 @@
-package com.groupdocs.parser.examples;
-
-import java.io.FileNotFoundException;
-
-import com.groupdocs.parser.CellsMediaTypeDetector;
-import com.groupdocs.parser.CellsTextExtractor;
-import com.groupdocs.parser.CompositeMediaTypeDetector;
-import com.groupdocs.parser.DocumentInfo;
-import com.groupdocs.parser.EncodingDetector;
-import com.groupdocs.parser.ExtractMode;
-import com.groupdocs.parser.Extractor;
-import com.groupdocs.parser.ExtractorFactory;
-import com.groupdocs.parser.HighlightDirection;
-import com.groupdocs.parser.HighlightOptions;
-import com.groupdocs.parser.IDocumentContentExtractor;
-import com.groupdocs.parser.IFastTextExtractor;
-import com.groupdocs.parser.IPageTextExtractor;
-import com.groupdocs.parser.ITextExtractorWithFormatter;
-import com.groupdocs.parser.InvalidPasswordException;
-import com.groupdocs.parser.ListSearchHandler;
-import com.groupdocs.parser.LoadOptions;
-import com.groupdocs.parser.MarkdownDocumentFormatter;
-import com.groupdocs.parser.MediaTypeDetector;
-import com.groupdocs.parser.MediaTypeNames;
-import com.groupdocs.parser.MetadataNames;
-import com.groupdocs.parser.PdfTextExtractor;
-import com.groupdocs.parser.PersonalStorageContainer;
-import com.groupdocs.parser.Rectangle;
-import com.groupdocs.parser.SearchHighlightOptions;
-import com.groupdocs.parser.SearchOptions;
-import com.groupdocs.parser.TextArea;
-import com.groupdocs.parser.TextAreaSearchOptions;
-import com.groupdocs.parser.TextExtractor;
-import com.groupdocs.parser.WordsFormattedTextExtractor;
-import com.groupdocs.parser.WordsMediaTypeDetector;
-import com.groupdocs.parser.WordsTextExtractor;
-
-public class TextExtraction {
- // ExStart:SourceDocumentFilePath
- private final static String DOC_FILE_PATH = "sample.docx";
- private final static String EXCEL_FILE_PATH = "sample.xlsx";
- private final static String OST_FILE_PATH = "sample.ost";
- private final static String PDF_FILE_PATH = "sample.pdf";
- // ExEnd:SourceDocumentFilePath
-
- /**
- * Shows how to create text extractor using ExtractFactory
- *
- */
- public static void createTextExtractor() {
- try {
- // ExStart:createTextExtractor
- // Create a factory
- ExtractorFactory factory = new ExtractorFactory();
-
- // Try to create a text extractor from the file
- try (TextExtractor extractor = factory.createTextExtractor(Common.mapSourceFilePath(DOC_FILE_PATH))) {
- if (extractor == null) {
- System.out.println("The document format is not supported");
- }
- }
- // Try to create a text extractor from the stream
- try (TextExtractor extractor2 = factory
- .createTextExtractor(new java.io.FileInputStream(Common.mapSourceFilePath(DOC_FILE_PATH)))) {
- if (extractor2 == null) {
- System.out.println("The document format is not supported");
- }
- }
- // Create load options
- LoadOptions loadOptions = new LoadOptions("text/plain", java.nio.charset.Charset.forName("UTF-8"));
- // Try to create a text extractor with load options
- try (TextExtractor extractor3 = factory.createTextExtractor(
- new java.io.FileInputStream(Common.mapSourceFilePath(DOC_FILE_PATH)), loadOptions)) {
- if (extractor3 == null) {
- System.out.println("The document format is not supported");
- }
- }
- // ExEnd:createTextExtractor
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Shows how to create formatted text extractor using ExtractFactory
- *
- */
- public static void createFormattedTextExtractor() {
- try {
- // ExStart:createFormattedTextExtractor
- // Create a factory
- ExtractorFactory factory = new ExtractorFactory();
-
- // Try to create a formatted text extractor from the file
- try (TextExtractor extractor = factory
- .createFormattedTextExtractor(Common.mapSourceFilePath(DOC_FILE_PATH))) {
- if (extractor == null) {
- System.out.println("The document format is not supported");
- }
- }
- // Try to create a formatted text extractor from the stream
- try (TextExtractor extractor2 = factory.createFormattedTextExtractor(
- new java.io.FileInputStream(Common.mapSourceFilePath(DOC_FILE_PATH)))) {
- if (extractor2 == null) {
- System.out.println("The document format is not supported");
- }
- }
- // Create load options
- LoadOptions loadOptions = new LoadOptions("text/plain", java.nio.charset.Charset.forName("UTF-8"));
- // Try to create a formatted text extractor with load options
- try (TextExtractor extractor3 = factory.createFormattedTextExtractor(
- new java.io.FileInputStream(Common.mapSourceFilePath(DOC_FILE_PATH)), loadOptions)) {
- if (extractor3 == null) {
- System.out.println("The document format is not supported");
- }
- }
- // ExEnd:createFormattedTextExtractor
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Shows how to create text extractor using ExtractFactory
- *
- */
- public static void createConcreteTextExtractor() {
- try {
- // ExStart:createConcreteTextExtractor
- // Create a text extractor from the file
- CellsTextExtractor extractor = new CellsTextExtractor(Common.mapSourceFilePath(EXCEL_FILE_PATH));
- // Extract a text
- System.out.println(extractor.extractAll());
-
- // Create load options
- LoadOptions loadOptions = new LoadOptions("text/plain", java.nio.charset.Charset.forName("UTF-8"));
- // Create a text extractor from the stream with load options
- CellsTextExtractor extractor2 = new CellsTextExtractor(Common.mapSourceFilePath(EXCEL_FILE_PATH),
- loadOptions);
- // Extract a text
- System.out.println(extractor.extractAll());
-
- // ExEnd:createConcreteTextExtractor
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts text from a document using Extractor class.
- *
- * @throws FileNotFoundException
- */
- public static void extractTextUsingExtractorClass() throws FileNotFoundException {
- // ExStart:extractTextUsingExtractorClass
- // Extract a text from the stream
- System.out.println(
- Extractor.DEFAULT.extractText(new java.io.FileInputStream(Common.mapSourceFilePath(DOC_FILE_PATH))));
-
- // Extract a text from the file
- System.out.println(Extractor.DEFAULT.extractText(Common.mapSourceFilePath(DOC_FILE_PATH)));
- // ExEnd:extractTextUsingExtractorClass
- }
-
- /**
- * Extracts formatted text from a document using Extractor class.
- *
- * @throws FileNotFoundException
- */
- public static void extractFormattedTextUsingExtractorClass() throws FileNotFoundException {
- // ExStart:extractFormattedTextUsingExtractorClass
- // Extract a text from the stream
- System.out.println(Extractor.DEFAULT
- .extractFormattedText(new java.io.FileInputStream(Common.mapSourceFilePath(DOC_FILE_PATH))));
-
- // Extract a text from the file
- System.out.println(Extractor.DEFAULT.extractFormattedText(Common.mapSourceFilePath(DOC_FILE_PATH)));
- // ExEnd:extractFormattedTextUsingExtractorClass
- }
-
- /**
- * Extracts text from a document with media type using Extractor class.
- * Setting {loadOptions}} will increase text extraction (because detecting
- * media type is skipped).
- *
- * @throws FileNotFoundException
- */
- public static void extractTextWithMediaTypeUsingExtractorClass() throws FileNotFoundException {
- // ExStart:extractTextWithMediaTypeUsingExtractorClass
- // Create load options
- LoadOptions loadOptions = new LoadOptions(MediaTypeNames.Application.WORD_OPEN_XML);
- // Extract a text from the file
- System.out.println(Extractor.DEFAULT.extractText(Common.mapSourceFilePath(DOC_FILE_PATH), loadOptions));
- // ExEnd:extractTextWithMediaTypeUsingExtractorClass
- }
-
- /**
- * Extracts formatted text from a document with media type using Extractor
- * class. Setting {loadOptions}} will increase text extraction (because
- * detecting media type is skipped).
- *
- */
- public static void extractFormattedTextWithMediaTypeUsingExtractorClass() {
- // ExStart:extractFormattedTextWithMediaTypeUsingExtractorClass
- // Create load options
- LoadOptions loadOptions = new LoadOptions(MediaTypeNames.Application.WORD_OPEN_XML);
- // Extract a text from the file
- System.out
- .println(Extractor.DEFAULT.extractFormattedText(Common.mapSourceFilePath(DOC_FILE_PATH), loadOptions));
- // ExEnd:extractFormattedTextWithMediaTypeUsingExtractorClass
- }
-
- /**
- * Extracts text from a document defining Extractor constructor.
- *
- * @throws FileNotFoundException
- */
- public static void extractTextUsingExtractorClassWithConstructor() throws FileNotFoundException {
- // ExStart:extractTextUsingExtractorClassWithConstructor
- WordsMediaTypeDetector detector = new WordsMediaTypeDetector();
- // Create an encoding detector
- EncodingDetector encodingDetector = new EncodingDetector(java.nio.charset.Charset.forName("windows-1251"));
- // Create an instance of Extractor
- Extractor extractor = new Extractor(detector, encodingDetector, null);
- // Extract a text from the stream
- System.out.println(extractor.extractText(Common.mapSourceFilePath(DOC_FILE_PATH)));
- // ExEnd:extractTextUsingExtractorClassWithConstructor
- }
-
- /**
- * Extracts formatted text from a document defining Extractor constructor.
- *
- * @throws FileNotFoundException
- */
- public static void extractFormattedTextUsingExtractorClassWithConstructor() throws FileNotFoundException {
- // ExStart:extractTextUsingExtractorClassWithConstructor
- // Create an instance of Extractor with a custom document formatter
- Extractor extractor = new Extractor(null, null, null, new MarkdownDocumentFormatter());
- // Extract a Markdown-formatted text
- System.out.println(extractor.extractFormattedText(Common.mapSourceFilePath(DOC_FILE_PATH)));
- // ExEnd:extractFormattedTextUsingExtractorClassWithConstructor
- }
-
- /**
- * Extracts text from a document using Text Extractor.
- *
- */
- public static void extractTextUsingTextExtractor() {
- try {
- // ExStart:extractTextUsingWordTextExtractor
- // Create a text extractor
- try (WordsTextExtractor extractor = new WordsTextExtractor(Common.mapSourceFilePath(DOC_FILE_PATH))) {
- // Extract a text from the document
- System.out.println(extractor.extractAll());
- }
- // ExEnd:extractTextUsingWordTextExtractor
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts text from a document using text mode.
- *
- */
- public static void extractTextInSimpleExtractMode() {
- try {
- // ExStart:extractTextInSimpleExtractMode
- // Create a text extractor
- try (CellsTextExtractor extractor = new CellsTextExtractor(Common.mapSourceFilePath(EXCEL_FILE_PATH))) {
- // Set ExtractMode for the faster text extraction
- extractor.setExtractMode(ExtractMode.Simple);
- // Extract a text from the document
- System.out.println(extractor.extractAll());
- }
- // ExEnd:extractTextInSimpleExtractMode
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Detects source document's media type and extracts text.
- *
- */
- public static void extractTextWithMediaTypeDetection() {
- try {
- // ExStart:extractTextWithMediaTypeDetection
- // Create a media type detector
- WordsMediaTypeDetector detector = new WordsMediaTypeDetector();
- String fileName = Common.mapSourceFilePath(DOC_FILE_PATH);
- // Detect a media type of the file
- String mediaType = detector.detect(fileName);
- // If media type is docx
- if (mediaType.equals(MediaTypeNames.Application.WORD_OPEN_XML)) {
- // Create a text extractor
- try (WordsTextExtractor extractor = new WordsTextExtractor(fileName)) {
- // Extract a text from the document
- System.out.println(extractor.extractAll());
- }
- }
- // ExEnd:extractTextWithMediaTypeDetection
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Detects source document's media type using CompositeMediaTypeDetector and
- * extracts text using appropriate text extractor.
- *
- */
- public static void extractTextWithCompositeMediaTypeDetection() {
- try {
- // ExStart:extractTextWithCompositeMediaTypeDetection
- // Create a composite media type detector
- CompositeMediaTypeDetector detector = new CompositeMediaTypeDetector(
- new MediaTypeDetector[] { new WordsMediaTypeDetector(), // Detector
- // for
- // Words
- // documents
- new CellsMediaTypeDetector(), // Detector for Cells
- // documents
- });
- String fileName = Common.mapSourceFilePath(DOC_FILE_PATH);
- // Detect a media type of the file
- String mediaType = detector.detect(fileName);
- // If media type is docx
- if (mediaType.equals(MediaTypeNames.Application.WORD_OPEN_XML)) {
- // Create a text extractor
- try (WordsTextExtractor extractor = new WordsTextExtractor(fileName)) {
- // Extract a text from the document
- System.out.println(extractor.extractAll());
- }
- }
- // If media type is xlxs
- else if (mediaType.equals(MediaTypeNames.Application.EXCEL_OPEN_XML)) {
- // Create a text extractor
- try (CellsTextExtractor extractor = new CellsTextExtractor(fileName)) {
- // Extract a text from the document
- System.out.println(extractor.extractAll());
- }
- }
- // ExEnd:extractTextWithCompositeMediaTypeDetection
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts text from a document using ExtractorFactory. ExtractorFactory
- * automatically detects the document's format and creates the proper text
- * extractor.
- *
- */
- public static void extractTextUsingExtractorFactory() {
- try {
- // ExStart:extractTextUsingExtractorFactor
- String fileName = Common.mapSourceFilePath(DOC_FILE_PATH);
- // Create an extractor factory
- ExtractorFactory factory = new ExtractorFactory();
- // Try to create a text extractor
- try (TextExtractor extractor = factory.createTextExtractor(fileName)) {
- // If text extractor is supported (extractor != null), then
- // extract
- // a text from the document
- System.out.println(extractor != null ? extractor.extractAll() : "The document format is not supported");
- }
- // ExEnd:extractTextUsingExtractorFactory
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts text and metadata information from the documents contained by
- * OST container
- *
- */
- public static void extractTextFromDocumentsContainedInOSTContainer() {
- try {
- // ExStart:extractTextFromDocumentsContainedInOSTContainer
- // Create an extractor factory
- ExtractorFactory factory = new ExtractorFactory();
- // Create a container
- try (PersonalStorageContainer container = new PersonalStorageContainer(
- Common.mapSourceFilePath(OST_FILE_PATH))) {
- // Iterate over container's entities
- for (int i = 0; i < container.getEntities().size(); i++) {
- System.out.println("Name: " + container.getEntities().get(i).getName()); // name
- // of
- // the
- // file
- System.out.println("Path: " + container.getEntities().get(i).getPath().toString()); // path
- // of
- // the
- // file
- System.out.println("MediaType :" + container.getEntities().get(i).getMediaType()); // media
- // type
- // of
- // the
- // file
- System.out.println("Date: " + container.getEntities().get(i).getDate().toString()); // date
- // when
- // the
- // file
- // was
- // added
- // to
- // the
- // archive
- System.out.println("Size: " + container.getEntities().get(i).getSize()); // uncompressed
- // size
- // of
- // the
- // file
- System.out.println("Subject: " + container.getEntities().get(i).get_Item(MetadataNames.SUBJECT)); // subject
- // of
- // the
- // email
- System.out.println("From: " + container.getEntities().get(i).get_Item(MetadataNames.EMAIL_FROM)); // "from"
- // addresses
- // of
- // the
- // email
- System.out.println("To: " + container.getEntities().get(i).get_Item(MetadataNames.EMAIL_TO)); // "to"
- // addresses
- // of
- // the
- // email
-
- // Try to create a text extractor for the file of the
- // container
- try (TextExtractor extractor = factory
- .createTextExtractor(container.getEntities().get(i).openStream())) {
- System.out.println("Content:");
- // If text extractor is supported (extractor != null)
- // then extract a text from the document
- System.out.println(
- extractor != null ? extractor.extractAll() : "The document format is not supported");
- }
- }
- }
- // ExEnd:extractTextFromDocumentsContainedInOSTContainer
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts formatted text from a document using Formatted Text Extractor.
- *
- */
- public static void extractFormattedTextUsingFormattedTextExtractor() {
- try {
- // ExStart:extractFormattedTextUsingFormattedTextExtractor
- // Create a formatted text extractor
- try (WordsFormattedTextExtractor extractor = new WordsFormattedTextExtractor(
- Common.mapSourceFilePath(DOC_FILE_PATH))) {
- // Extract a formatted text from the document
- System.out.println(extractor.extractAll());
- }
- // ExEnd:extractFormattedTextUsingFormattedTextExtractor
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts formatted text from a document using ExtractorFactory.
- *
- */
- public static void extractFormattedTextUsingExtractorFactory() {
- try {
- // ExStart:extractFormattedTextUsingExtractorFactory
- String fileName = Common.mapSourceFilePath(DOC_FILE_PATH);
- // Create an extractor factory
- ExtractorFactory factory = new ExtractorFactory();
- // Try to create a formatted text extractor
- try (TextExtractor extractor = factory.createFormattedTextExtractor(fileName)) {
- // If text extractor is supported (extractor != null), then
- // extract a formatted text from the document
- System.out.println(extractor != null ? extractor.extractAll() : "The document format is not supported");
- }
- // ExEnd:extractFormattedTextUsingExtractorFactory
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts formatted text from a document with text formatter.
- *
- */
- public static void extractFormattedTextWithTextFormatter() {
- try {
- // ExStart:extractFormattedTextUsingTextFormatter
- // Create a formatted text extractor
- try (WordsFormattedTextExtractor extractor = new WordsFormattedTextExtractor(
- Common.mapSourceFilePath(DOC_FILE_PATH))) {
- // Set a Markdown text formatter (now the text is formatted as
- // Markdown)
- extractor.setDocumentFormatter(new MarkdownDocumentFormatter());
- // Extract a formatted text from the document
- System.out.println(extractor.extractAll());
- }
- // ExEnd:extractFormattedTextUsingTextFormatter
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts formatted text from a document with text formatter using
- * ITextExtractorWithFormatter. This is helpful you want to check if the
- * extractor supports ITextExtractorWithFormatter on runtime.
- *
- */
- public static void extractFormattedTextWithTextFormatterUsingITextExtractorWithFormatter() {
- try {
- // ExStart:extractFormattedTextWithTextFormatterUsingITextExtractorWithFormatter
- // Create a formatted text extractor
- try (WordsFormattedTextExtractor extractor = new WordsFormattedTextExtractor(
- Common.mapSourceFilePath(DOC_FILE_PATH))) {
- // If the extractor supports ITextExtractorWithFormatter
- // interface
- if (extractor instanceof ITextExtractorWithFormatter) {
- // Set MarkdownDocumentFormatter formatter
- ((ITextExtractorWithFormatter) extractor).setDocumentFormatter(new MarkdownDocumentFormatter());
- }
- // Extract a formatted text from the document
- System.out.println(extractor.extractAll());
- }
- // ExEnd:extractFormattedTextWithTextFormatterUsingITextExtractorWithFormatter
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts formatted text from a document with text formatter using
- * Extractor Factory.
- *
- */
- public static void extractFormattedTextWithTextFormatterUsingExtractorFactory() {
- try {
- // ExStart:extractFormattedTextWithTextFormatterUsingExtractorFactory
- String fileName = Common.mapSourceFilePath(DOC_FILE_PATH);
- // Create an extractor factory with a markdown text formatter as
- // default
- ExtractorFactory factory = new ExtractorFactory(new MarkdownDocumentFormatter());
- // Try to create a formatted text extractor
- try (TextExtractor extractor = factory.createFormattedTextExtractor(fileName)) {
- // If text extractor is supported (extractor != null), then
- // extract
- // a formatted text from the document
- System.out.println(extractor != null ? extractor.extractAll() : "The document format is not supported");
- }
- // ExEnd:extractFormattedTextWithTextFormatterUsingExtractorFactory
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts highlights.
- *
- */
- public static void extractHighlights() {
- try {
- // ExStart:extractHighlights
- // Create a text extractor
- try (WordsTextExtractor extractor = new WordsTextExtractor(Common.mapSourceFilePath(DOC_FILE_PATH))) {
- // Extract highlights
- java.util.List highlights = extractor.extractHighlights(
- HighlightOptions.createFixedLengthOptions(HighlightDirection.Left, 15, 10), // highlight
- // from
- // position
- // 15
- // to
- // position
- // 5
- // (15
- // -
- // 10)
- HighlightOptions.createFixedLengthOptions(HighlightDirection.Right, 20, 10) // highlight
- // from
- // position
- // 20
- // to
- // position
- // 30
- // (20
- // +
- // 10)
- );
-
- // Iterate over highlights
- for (int i = 0; i < highlights.size(); i++) {
- // Print the highlight
- System.out.println(highlights.get(i));
- }
- }
- // ExEnd:extractHighlights
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts highlights with CreateLineOptions. This way, highlight can be
- * limited to line's start/end.
- *
- */
- public static void extractHighlightsWithLineOptions() {
- try {
- // ExStart:extractHighlightsWithLineOptions
- // Create a text extractor
- try (WordsTextExtractor extractor = new WordsTextExtractor(Common.mapSourceFilePath(DOC_FILE_PATH))) {
- // Extract highlights
- java.util.List highlights = extractor.extractHighlights(
- HighlightOptions.createLineOptions(HighlightDirection.Left, 15), // highlight
- // from
- // position
- // 15
- // to
- // the
- // beginning
- // of
- // the
- // line
- HighlightOptions.createLineOptions(HighlightDirection.Right, 20) // highlight
- // from
- // position
- // 20
- // to
- // the
- // end
- // of
- // the
- // line
- );
-
- // Iterate over highlights
- for (int i = 0; i < highlights.size(); i++) {
- // Print the highlight
- System.out.println(highlights.get(i));
- }
- }
- // ExEnd:extractHighlightsWithLineOptions
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts highlights with CreateWordsCountOptions. This way, highlight can
- * be limited to the words count.
- *
- */
- public static void extractHighlightsWithWordsCountOptions() {
- try {
- // ExStart:extractHighlightsWithWordsCountOptions
- // Create a text extractor
- try (WordsTextExtractor extractor = new WordsTextExtractor(Common.mapSourceFilePath(DOC_FILE_PATH))) {
- // Extract highlights
- java.util.List highlights = extractor.extractHighlights(
- HighlightOptions.createWordsCountOptions(HighlightDirection.Left, 15, 5), // highlight
- // with
- // no
- // more
- // than
- // 5
- // words
- // before
- // position
- // 15
- HighlightOptions.createWordsCountOptions(HighlightDirection.Right, 20, 5) // highlight
- // with
- // no
- // more
- // than
- // 5
- // words
- // after
- // position
- // 20
- );
-
- // Iterate over highlights
- for (int i = 0; i < highlights.size(); i++) {
- // Print the highlight
- System.out.println(highlights.get(i));
- }
- }
- // ExEnd:extractHighlightsWithWordsCountOptions
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts highlights with SearchHighlightOptions. This way, you can find
- * the left highlight (a text on the left side of the found text) and the
- * right highlight (a text on the left side of the found text).
- *
- */
- public static void extractHighlightsWithSearchHighlightOptions() {
- try {
- // ExStart:extractHighlightsWithSearchHighlightOptions
- // Create a text extractor
- try (WordsTextExtractor extractor = new WordsTextExtractor(Common.mapSourceFilePath(DOC_FILE_PATH))) {
- // Create a search handler. It is used to store search results
- ListSearchHandler handler = new ListSearchHandler();
- // Create highlight options to extract a highlight with a search
- // result
- SearchHighlightOptions highlightOptions = SearchHighlightOptions.createLineOptions(100, 100);
- // Perform the search for two keywords
- extractor.search(new SearchOptions(highlightOptions), handler,
- java.util.Arrays.asList(new String[] { "test", "keyword" }));
-
- // If the handler contains data
- if (handler.getList().size() > 0) {
- /// Iterate over search results
- for (int i = 0; i < handler.getList().size(); i++) {
- System.out.println(handler.getList().get(i).getLeftText()); // text
- // before
- // the
- // found
- // text
- System.out.println("_");
- System.out.println(handler.getList().get(i).getFoundText()); // found
- // text
- // ("test
- // text"
- // or
- // "keyword")
- System.out.println("_");
- System.out.println(handler.getList().get(i).getRightText()); // text
- // after
- // the
- // found
- // text
- System.out.println("---");
- }
- } else {
- System.out.println("Not found");
- }
- }
- // ExEnd:extractHighlightsWithSearchHighlightOptions
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts document pages using IPageTextExtractor
- *
- */
- public static void extractDocumentPagesUsingIPageTextExtractor() {
- try {
- // ExStart:extractDocumentPagesUsingIPageTextExtractor
- // Create an extractor factory
- ExtractorFactory factory = new ExtractorFactory();
- // Create an instance of text extractor class
- try (TextExtractor extractor = factory
- .createFormattedTextExtractor(Common.mapSourceFilePath(DOC_FILE_PATH))) {
- // Check if IPageTextExtractor is supported
- IPageTextExtractor pageTextExtractor = (IPageTextExtractor) extractor;
- if (pageTextExtractor != null) {
- // Iterate over all pages
- for (int i = 0; i < pageTextExtractor.getPageCount(); i++) {
- // Print a page number
- System.out.println(String.format("%d/%d", i, pageTextExtractor.getPageCount()));
- // Extract a text from the page
- System.out.println(pageTextExtractor.extractPage(i));
- }
- }
- }
- // ExEnd:extractDocumentPagesUsingIPageTextExtractor
- } catch (Exception exp) {
- System.out.println("Exception: " + exp.getMessage());
- exp.printStackTrace();
- }
- }
-
- /**
- * Extracts text from password protected document. If the password is not
- * set or incorrect InvalidPasswordException is thrown.
- *
- */
- public static void extractTextFromPasswordProtectedDocument() {
- // ExStart:extractTextFromPasswordProtectedDocument
- // Create an instance of LoadOptions
- LoadOptions loadOptions = new LoadOptions();
- // Set the wrong document password
- loadOptions.setPassword("4321"); // or comment this line
-
- WordsTextExtractor extractor = null;
- try {
- // Create a text extractor for the password-protected document
- extractor = new WordsTextExtractor(Common.mapSourceFilePath(DOC_FILE_PATH), loadOptions);
- // Extract all the text from the document
- System.out.println(extractor.extractAll());
- } catch (InvalidPasswordException ex) {
- // Print the message if the password is incorrect (or empty)
- System.out.println("Invalid password.");
- }
- // ExEnd:extractTextFromPasswordProtectedDocument
- }
-
- /**
- * Extracts text area from PDF document.
- *
- */
- public static void extractTextAreaFromDocument() {
- // ExStart:extractTextAreaFromDocument_18.7
- try {
- // Create a text extractor
- PdfTextExtractor extractor = new PdfTextExtractor(Common.mapSourceFilePath(PDF_FILE_PATH));
-
- // Create search options
- TextAreaSearchOptions searchOptions = new TextAreaSearchOptions();
- // Set a regular expression to search 'Invoice # XXX' text
- searchOptions.setExpression("\\s?INVOICE\\s?#\\s?[0-9]+");
- // Limit the search with a rectangle
- searchOptions.setRectangle(new Rectangle(10, 10, 300, 150));
-
- // Get text areas
- java.util.List