jeremyarancio commited on
Commit
2df6b4b
1 Parent(s): 36477d8

Update app name

Browse files
Files changed (1) hide show
  1. app.py +108 -0
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+ import spaces
5
+
6
+
7
+ # Example images and texts
8
+ EXAMPLES = [
9
+ ["images/ingredients_1.jpg", "24.36% chocolat noir 63% origine non UE (cacao, sucre, beurre de cacao, émulsifiant léci - thine de colza, vanille bourbon gousse), œuf, farine de blé, beurre, sucre, miel, sucre perlé, levure chimique, zeste de citron."],
10
+ ["images/ingredients_2.jpg", "farine de froment, œufs, lait entier pasteurisé Aprigine: France), sucre, sel, extrait de vanille naturelle Conditi( 35."],
11
+ ["images/ingredients_3.jpg", "tural basmati rice - cooked (98%), rice bran oil, salt"],
12
+ ["images/ingredients_4.jpg", "Eau de noix de coco 93.9%, Arôme natutel de fruit"],
13
+ ["images/ingredients_5.jpg", "Sucre, pâte de cacao, beurre de cacao, émulsifiant: léci - thines (soja). Peut contenir des traces de lait. Chocolat noir: cacao: 50% minimum. À conserver à l'abri de la chaleur et de l'humidité. Élaboré en France."],
14
+ ]
15
+ MODEL_ID = "openfoodfacts/spellcheck-mistral-7b"
16
+
17
+
18
+ # CPU/GPU device
19
+ zero = torch.Tensor([0]).cuda()
20
+
21
+ # Tokenizer
22
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
23
+ tokenizer.pad_token = tokenizer.eos_token
24
+ tokenizer.pad_token_id = tokenizer.eos_token_id
25
+
26
+ # Model
27
+ model = AutoModelForCausalLM.from_pretrained(
28
+ MODEL_ID,
29
+ device_map="auto",
30
+ attn_implementation="flash_attention_2",
31
+ torch_dtype=torch.bfloat16,
32
+ )
33
+
34
+ @spaces.GPU
35
+ def process(text: str) -> str:
36
+ """Take the text, the tokenizer and the causal model and generate the correction."""
37
+ prompt = prepare_instruction(text)
38
+ input_ids = tokenizer(
39
+ prompt,
40
+ add_special_tokens=True,
41
+ return_tensors="pt"
42
+ ).input_ids
43
+ output = model.generate(
44
+ input_ids.to(zero.device), # GPU
45
+ do_sample=False,
46
+ max_new_tokens=512,
47
+ )
48
+ return tokenizer.decode(output[0], skip_special_tokens=True)[len(prompt):].strip()
49
+
50
+
51
+ def prepare_instruction(text: str) -> str:
52
+ """Prepare instruction prompt for fine-tuning and inference.
53
+
54
+ Args:
55
+ text (str): List of ingredients
56
+
57
+ Returns:
58
+ str: Instruction.
59
+ """
60
+ instruction = (
61
+ "###Correct the list of ingredients:\n"
62
+ + text
63
+ + "\n\n###Correction:\n"
64
+ )
65
+ return instruction
66
+
67
+
68
+ ##########################
69
+ # GRADIO SETUP
70
+ ##########################
71
+
72
+ # Creating the Gradio interface
73
+ with gr.Blocks() as demo:
74
+
75
+ gr.Markdown("# Ingredients Spellcheck")
76
+ gr.Markdown("")
77
+
78
+ with gr.Row():
79
+ with gr.Column():
80
+ image = gr.Image(type="pil", label="image_input")
81
+ ingredients = gr.Textbox(label="List of ingredients")
82
+ spellcheck_button = gr.Button(value='Spellcheck')
83
+
84
+ with gr.Column():
85
+ correction = gr.Textbox(label="Correction", interactive=False)
86
+
87
+ with gr.Row():
88
+ gr.Examples(
89
+ fn=process,
90
+ examples=EXAMPLES,
91
+ inputs=[
92
+ image,
93
+ ingredients,
94
+ ],
95
+ outputs=[correction],
96
+ run_on_click=False,
97
+ )
98
+
99
+ spellcheck_button.click(
100
+ fn=process,
101
+ inputs=[ingredients],
102
+ outputs=[correction]
103
+ )
104
+
105
+
106
+ if __name__ == "__main__":
107
+ # Launch the demo
108
+ demo.launch()