Chan-Y commited on
Commit
c97532f
1 Parent(s): 28f0884

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import warnings
2
  warnings.simplefilter(action='ignore', category=FutureWarning)
3
 
4
- import PyPDF2
5
  import gradio as gr
6
  from langchain.prompts import PromptTemplate
7
  from pathlib import Path
@@ -22,13 +22,15 @@ llm_engine_hf = ChatHuggingFace(llm=llm)
22
  def read_pdf(file_path):
23
  print("It is a PDF file")
24
  try:
25
- pdf_reader = PyPDF2.PdfReader(file_path)
26
  text = ""
27
- for page in range(len(pdf_reader.pages)):
28
- text += pdf_reader.pages[page].extract_text()
 
 
29
  return text
30
  except Exception as e:
31
- print("Error reading file, ", e)
32
 
33
  def read_txt(file_path):
34
  print("It is not a PDF file")
 
1
  import warnings
2
  warnings.simplefilter(action='ignore', category=FutureWarning)
3
 
4
+ import fitz
5
  import gradio as gr
6
  from langchain.prompts import PromptTemplate
7
  from pathlib import Path
 
22
  def read_pdf(file_path):
23
  print("It is a PDF file")
24
  try:
25
+ pdf_document = fitz.open(file_path)
26
  text = ""
27
+ for page_num in range(len(pdf_document)):
28
+ page = pdf_document[page_num]
29
+ text += page.get_text()
30
+
31
  return text
32
  except Exception as e:
33
+ print("Error reading file,", e)
34
 
35
  def read_txt(file_path):
36
  print("It is not a PDF file")