jeremyarancio commited on
Commit
36477d8
1 Parent(s): 31657d5

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.jpg filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Ingredients Spellcheck
3
- emoji: 🦀
4
  colorFrom: green
5
  colorTo: indigo
6
  sdk: gradio
 
1
  ---
2
  title: Ingredients Spellcheck
3
+ emoji: 🍊
4
  colorFrom: green
5
  colorTo: indigo
6
  sdk: gradio
demo.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+ import spaces
5
+
6
+
7
+ # Example images and texts
8
+ EXAMPLES = [
9
+ ["images/ingredients_1.jpg", "24.36% chocolat noir 63% origine non UE (cacao, sucre, beurre de cacao, émulsifiant léci - thine de colza, vanille bourbon gousse), œuf, farine de blé, beurre, sucre, miel, sucre perlé, levure chimique, zeste de citron."],
10
+ ["images/ingredients_2.jpg", "farine de froment, œufs, lait entier pasteurisé Aprigine: France), sucre, sel, extrait de vanille naturelle Conditi( 35."],
11
+ ["images/ingredients_3.jpg", "tural basmati rice - cooked (98%), rice bran oil, salt"],
12
+ ["images/ingredients_4.jpg", "Eau de noix de coco 93.9%, Arôme natutel de fruit"],
13
+ ["images/ingredients_5.jpg", "Sucre, pâte de cacao, beurre de cacao, émulsifiant: léci - thines (soja). Peut contenir des traces de lait. Chocolat noir: cacao: 50% minimum. À conserver à l'abri de la chaleur et de l'humidité. Élaboré en France."],
14
+ ]
15
+ MODEL_ID = "openfoodfacts/spellcheck-mistral-7b"
16
+
17
+
18
+ # CPU/GPU device
19
+ zero = torch.Tensor([0]).cuda()
20
+
21
+ # Tokenizer
22
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
23
+ tokenizer.pad_token = tokenizer.eos_token
24
+ tokenizer.pad_token_id = tokenizer.eos_token_id
25
+
26
+ # Model
27
+ model = AutoModelForCausalLM.from_pretrained(
28
+ MODEL_ID,
29
+ device_map="auto",
30
+ attn_implementation="flash_attention_2",
31
+ torch_dtype=torch.bfloat16,
32
+ )
33
+
34
+ @spaces.GPU
35
+ def process(text: str) -> str:
36
+ """Take the text, the tokenizer and the causal model and generate the correction."""
37
+ prompt = prepare_instruction(text)
38
+ input_ids = tokenizer(
39
+ prompt,
40
+ add_special_tokens=True,
41
+ return_tensors="pt"
42
+ ).input_ids
43
+ output = model.generate(
44
+ input_ids.to(zero.device), # GPU
45
+ do_sample=False,
46
+ max_new_tokens=512,
47
+ )
48
+ return tokenizer.decode(output[0], skip_special_tokens=True)[len(prompt):].strip()
49
+
50
+
51
+ def prepare_instruction(text: str) -> str:
52
+ """Prepare instruction prompt for fine-tuning and inference.
53
+
54
+ Args:
55
+ text (str): List of ingredients
56
+
57
+ Returns:
58
+ str: Instruction.
59
+ """
60
+ instruction = (
61
+ "###Correct the list of ingredients:\n"
62
+ + text
63
+ + "\n\n###Correction:\n"
64
+ )
65
+ return instruction
66
+
67
+
68
+ ##########################
69
+ # GRADIO SETUP
70
+ ##########################
71
+
72
+ # Creating the Gradio interface
73
+ with gr.Blocks() as demo:
74
+
75
+ gr.Markdown("# Ingredients Spellcheck")
76
+ gr.Markdown("")
77
+
78
+ with gr.Row():
79
+ with gr.Column():
80
+ image = gr.Image(type="pil", label="image_input")
81
+ ingredients = gr.Textbox(label="List of ingredients")
82
+ spellcheck_button = gr.Button(value='Spellcheck')
83
+
84
+ with gr.Column():
85
+ correction = gr.Textbox(label="Correction", interactive=False)
86
+
87
+ with gr.Row():
88
+ gr.Examples(
89
+ fn=process,
90
+ examples=EXAMPLES,
91
+ inputs=[
92
+ image,
93
+ ingredients,
94
+ ],
95
+ outputs=[correction],
96
+ run_on_click=False,
97
+ )
98
+
99
+ spellcheck_button.click(
100
+ fn=process,
101
+ inputs=[ingredients],
102
+ outputs=[correction]
103
+ )
104
+
105
+
106
+ if __name__ == "__main__":
107
+ # Launch the demo
108
+ demo.launch()
images/ingredients_1.jpg ADDED

Git LFS Details

  • SHA256: 1ac1b875dd8451a81b653526fc3cfbcbee7ec3e14318ed699d29e1376bf67877
  • Pointer size: 131 Bytes
  • Size of remote file: 293 kB
images/ingredients_2.jpg ADDED

Git LFS Details

  • SHA256: ce43533db8a55812e97e5021c3844527d7f1552df9d6aa26e4c644fc38b6ce01
  • Pointer size: 132 Bytes
  • Size of remote file: 1.01 MB
images/ingredients_3.jpg ADDED

Git LFS Details

  • SHA256: dc84dd2e7e74f8ce0edc79bcb1fb64354ebf9b6948dd98971977b68f01d229e4
  • Pointer size: 131 Bytes
  • Size of remote file: 759 kB
images/ingredients_4.jpg ADDED

Git LFS Details

  • SHA256: ecbddc19d6897baaecf2193866cf82495b943eece6eadb5649e790bd434d93e3
  • Pointer size: 131 Bytes
  • Size of remote file: 155 kB
images/ingredients_5.jpg ADDED

Git LFS Details

  • SHA256: 69e4c64079b338cb475f220fe23597cceeac942426b5db1d22ff71c258caa1b6
  • Pointer size: 130 Bytes
  • Size of remote file: 87.7 kB
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ flash-attn
3
+ transformers
4
+ spaces
5
+ sentencepiece