srinivas-mushroom commited on
Commit
f33afb3
1 Parent(s): 4af8357

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -16
app.py CHANGED
@@ -10,7 +10,16 @@ model_name = "distilbert-base-cased-distilled-squad"
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
  model = AutoModelForQuestionAnswering.from_pretrained(model_name)
12
 
13
- def answer_questions(pdf_file, questions):
 
 
 
 
 
 
 
 
 
14
  # Load PDF file and extract text
15
  pdf_reader = PyPDF2.PdfFileReader(io.BytesIO(pdf_file.read()))
16
  text = ""
@@ -19,27 +28,23 @@ def answer_questions(pdf_file, questions):
19
  text += page.extractText()
20
  text = text.strip()
21
 
22
- answers = []
23
- for question in questions:
24
- # Tokenize question and text
25
- input_ids = tokenizer.encode(question, text)
26
 
27
- # Perform question answering
28
- outputs = model(torch.tensor([input_ids]), return_dict=True)
29
- answer_start = outputs.start_logits.argmax().item()
30
- answer_end = outputs.end_logits.argmax().item()
31
- answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end+1]))
32
 
33
- answers.append(answer)
34
-
35
- return answers
36
 
37
  inputs = [
38
  gr.inputs.File(label="PDF document"),
39
- gr.inputs.Textbox(label="Questions (one per line)", type="text")
40
  ]
41
 
42
- outputs = gr.outputs.Textarea(label="Answers")
43
 
44
  gr.Interface(fn=answer_questions, inputs=inputs, outputs=outputs, title="PDF Question Answering Tool",
45
- description="Upload a PDF document and ask multiple questions. The app will use a pre-trained model to find the answers.").launch()
 
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
  model = AutoModelForQuestionAnswering.from_pretrained(model_name)
12
 
13
+ # Define a list of pre-defined questions
14
+ predefined_questions = [
15
+ "What is the purpose of this document?",
16
+ "What is the main topic of the document?",
17
+ "Who is the target audience?",
18
+ "What is the author's main argument?",
19
+ "What is the conclusion of the document?",
20
+ ]
21
+
22
+ def answer_questions(pdf_file, question):
23
  # Load PDF file and extract text
24
  pdf_reader = PyPDF2.PdfFileReader(io.BytesIO(pdf_file.read()))
25
  text = ""
 
28
  text += page.extractText()
29
  text = text.strip()
30
 
31
+ # Tokenize question and text
32
+ input_ids = tokenizer.encode(question, text)
 
 
33
 
34
+ # Perform question answering
35
+ outputs = model(torch.tensor([input_ids]), return_dict=True)
36
+ answer_start = outputs.start_logits.argmax().item()
37
+ answer_end = outputs.end_logits.argmax().item()
38
+ answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end+1]))
39
 
40
+ return answer
 
 
41
 
42
  inputs = [
43
  gr.inputs.File(label="PDF document"),
44
+ gr.inputs.Dropdown(label="Question", choices=predefined_questions),
45
  ]
46
 
47
+ outputs = gr.outputs.Textbox(label="Answer")
48
 
49
  gr.Interface(fn=answer_questions, inputs=inputs, outputs=outputs, title="PDF Question Answering Tool",
50
+ description="Upload a PDF document and select a question from the dropdown. The app will use a pre-trained model to find the answer.").launch()