import gradio as gr import base64 import io import requests import json from PIL import Image def analyze_image(image, question): #img64 = base64.b64decode(image) im = Image.fromarray(image) in_mem_file = io.BytesIO() im.save(in_mem_file, format="png") payload = { "model":"Baseline", "tasktype":"Extraction", "questions":[{"Pages":[1],"Text":question}], "image": base64.b64encode(in_mem_file.getvalue()).decode() } url = "https://ky8mfb27dj.execute-api.us-east-1.amazonaws.com/dev/analyzedocument/submit" payload = json.dumps(payload) headers = {'Content-Type': 'application/json'} response = requests.request("POST", url, headers=headers, data=payload) jsonresponse = json.loads(response.text) return "Answer: {0} \nConfidence: {1}".format(jsonresponse['body'][0]['result'][0]['answer'][0], jsonresponse['body'][0]['result'][0]['score']) description = "Hyland Demo for Document Question & Answering , fine-tuned on DocVQA (document visual question answering). To use it, simply upload your image and type a question and click 'submit', or click one of the examples to load them. Read more at the links below." title = "DocVQA" article = "

DocVQA: Challenge | Overview - Document Visual Question Answering

" examples =[['publaynet_example.jpeg']] css = ".output-image, .input-image, .image-preview {height: 600px !important}" demo = gr.Interface(fn=analyze_image, inputs=[gr.inputs.Image(type="numpy", label="Document image"),"text"], outputs=gr.outputs.Textbox(type="auto", label="Answer"), title=title, description=description, article=article, css=css, enable_queue=True) demo.launch(debug=True)