Build

Paused

App Files Files Community

ManishThota commited on Feb 12

Commit

89e1517

•

1 Parent(s): 8323cfd

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -19

app.py CHANGED Viewed

@@ -13,31 +13,48 @@ model = AutoModelForCausalLM.from_pretrained("ManishThota/Sparrow",  torch_dtype
     trust_remote_code=True).to(device)
 tokenizer = AutoTokenizer.from_pretrained("ManishThota/Sparrow", trust_remote_code=True)
-def predict_answer(image, question):
-    # Convert PIL image to RGB if not already
-    image = image.convert("RGB")
-    # # Format the text input for the model
-    # text = f"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\n{question} ASSISTANT:"
-    # Tokenize the text input
-    encoding = tokenizer(image, question, return_tensors='pt').to(device)
-    out = model.generate(**encoding)
-    # Preprocess the image for the model
-    generated_text = tokenizer.decode(out[0], skip_special_tokens=True)
-    # # Generate the answer
-    # output_ids = model.generate(
-    #     input_ids,
-    #     max_new_tokens=100,
-    #     images=image_tensor,
-    #     use_cache=True)[0]
-    # # Decode the generated tokens to get the answer
-    # answer = tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
-    return generated_text
 def gradio_predict(image, question):
     answer = predict_answer(image, question)

     trust_remote_code=True).to(device)
 tokenizer = AutoTokenizer.from_pretrained("ManishThota/Sparrow", trust_remote_code=True)
+# def predict_answer(image, question):
+#     # Convert PIL image to RGB if not already
+#     image = image.convert("RGB")
+#     # # Format the text input for the model
+#     # text = f"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\n{question} ASSISTANT:"
+#     # Tokenize the text input
+#     encoding = tokenizer(image, question, return_tensors='pt').to(device)
+#     out = model.generate(**encoding)
+#     # Preprocess the image for the model
+#     generated_text = tokenizer.decode(out[0], skip_special_tokens=True)
+#     # # Generate the answer
+#     # output_ids = model.generate(
+#     #     input_ids,
+#     #     max_new_tokens=100,
+#     #     images=image_tensor,
+#     #     use_cache=True)[0]
+#     # # Decode the generated tokens to get the answer
+#     # answer = tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
+#     return generated_text
+def predict_answer(image, question):
+    #Set inputs
+    text = f"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\n{question}? ASSISTANT:"
+    image = Image.open(image)
+    input_ids = tokenizer(text, return_tensors='pt').input_ids
+    image_tensor = model.image_preprocess(image)
+    #Generate the answer
+    output_ids = model.generate(
+        input_ids,
+        max_new_tokens=25,
+        images=image_tensor,
+        use_cache=True)[0]
+    return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
 def gradio_predict(image, question):
     answer = predict_answer(image, question)