File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -18,7 +18,8 @@ pinecone-client = "^2.2.2"
1818beautifulsoup4 = " 4.12.2"
1919markdown = " >=3.4.4,<3.5.0"
2020loguru = " >=0.7.0,<0.8.0"
21- pdfplumber = " 0.10.3"
21+ pypdf2 = " ^3.0.1"
22+
2223
2324
2425[tool .poetry .group .test .dependencies ]
Original file line number Diff line number Diff line change 1414import sherpa_ai .config as cfg
1515from sherpa_ai .models .sherpa_base_model import SherpaOpenAI
1616
17- import pdfplumber
18-
17+ import PyPDF2
1918def load_files (files : List [str ]) -> List [Document ]:
2019 documents = []
2120 loader = None
@@ -247,7 +246,13 @@ def show_commands_only(logs):
247246
248247def extract_text_from_pdf (pdf_path ):
249248 text = ""
250- with pdfplumber .open (pdf_path ) as pdf :
251- for page in pdf .pages :
252- text += page .extract_text ()
249+ # Extract text from a PDF using PdfReader
250+ pdf_file = open (pdf_path , "rb" )
251+ pdf_reader = PyPDF2 .PdfReader (pdf_file )
252+
253+ text = ""
254+ for page in pdf_reader .pages :
255+ text += page .extract_text ()
256+
257+ pdf_file .close ()
253258 return text
You can’t perform that action at this time.
0 commit comments