jeremyarancio commited on
Commit
17306ce
1 Parent(s): 32de7fa

Change app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -10
app.py CHANGED
@@ -1,9 +1,19 @@
 
 
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  import spaces
5
 
6
 
 
 
 
 
 
 
 
 
7
  # Example images and texts
8
  EXAMPLES = [
9
  ["images/ingredients_1.jpg", "24.36% chocolat noir 63% origine non UE (cacao, sucre, beurre de cacao, émulsifiant léci - thine de colza, vanille bourbon gousse), œuf, farine de blé, beurre, sucre, miel, sucre perlé, levure chimique, zeste de citron."],
@@ -12,18 +22,47 @@ EXAMPLES = [
12
  ["images/ingredients_4.jpg", "Eau de noix de coco 93.9%, Arôme natutel de fruit"],
13
  ["images/ingredients_5.jpg", "Sucre, pâte de cacao, beurre de cacao, émulsifiant: léci - thines (soja). Peut contenir des traces de lait. Chocolat noir: cacao: 50% minimum. À conserver à l'abri de la chaleur et de l'humidité. Élaboré en France."],
14
  ]
 
15
  MODEL_ID = "openfoodfacts/spellcheck-mistral-7b"
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # CPU/GPU device
19
  zero = torch.Tensor([0]).cuda()
20
 
 
 
 
 
 
 
 
21
  # Tokenizer
 
22
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
23
  tokenizer.pad_token = tokenizer.eos_token
24
  tokenizer.pad_token_id = tokenizer.eos_token_id
25
 
26
  # Model
 
27
  model = AutoModelForCausalLM.from_pretrained(
28
  MODEL_ID,
29
  device_map="auto",
@@ -31,6 +70,10 @@ model = AutoModelForCausalLM.from_pretrained(
31
  # torch_dtype=torch.bfloat16,
32
  )
33
 
 
 
 
 
34
  @spaces.GPU
35
  def process(text: str) -> str:
36
  """Take the text, the tokenizer and the causal model and generate the correction."""
@@ -50,6 +93,7 @@ def process(text: str) -> str:
50
 
51
  def prepare_instruction(text: str) -> str:
52
  """Prepare instruction prompt for fine-tuning and inference.
 
53
 
54
  Args:
55
  text (str): List of ingredients
@@ -68,20 +112,17 @@ def prepare_instruction(text: str) -> str:
68
  ##########################
69
  # GRADIO SETUP
70
  ##########################
71
-
72
- # Creating the Gradio interface
73
  with gr.Blocks() as demo:
74
 
75
- gr.Markdown("# Ingredients Spellcheck")
76
- gr.Markdown("")
77
 
78
  with gr.Row():
79
  with gr.Column():
80
- image = gr.Image(type="pil", label="image_input")
81
- spellcheck_button = gr.Button(value='Spellcheck')
82
 
83
  with gr.Column():
84
  ingredients = gr.Textbox(label="List of ingredients")
 
85
  correction = gr.Textbox(label="Correction", interactive=False)
86
 
87
  with gr.Row():
@@ -92,10 +133,7 @@ with gr.Blocks() as demo:
92
  image,
93
  ingredients,
94
  ],
95
- outputs=[correction],
96
- run_on_click=False,
97
  )
98
-
99
  spellcheck_button.click(
100
  fn=process,
101
  inputs=[ingredients],
 
1
+ import logging
2
+
3
  import gradio as gr
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
5
  import torch
6
  import spaces
7
 
8
 
9
+ ##########################
10
+ # CONFIGURATION
11
+ ##########################
12
+ logging.basicConfig(
13
+ level=logging.getLevelName("INFO"),
14
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
15
+ )
16
+
17
  # Example images and texts
18
  EXAMPLES = [
19
  ["images/ingredients_1.jpg", "24.36% chocolat noir 63% origine non UE (cacao, sucre, beurre de cacao, émulsifiant léci - thine de colza, vanille bourbon gousse), œuf, farine de blé, beurre, sucre, miel, sucre perlé, levure chimique, zeste de citron."],
 
22
  ["images/ingredients_4.jpg", "Eau de noix de coco 93.9%, Arôme natutel de fruit"],
23
  ["images/ingredients_5.jpg", "Sucre, pâte de cacao, beurre de cacao, émulsifiant: léci - thines (soja). Peut contenir des traces de lait. Chocolat noir: cacao: 50% minimum. À conserver à l'abri de la chaleur et de l'humidité. Élaboré en France."],
24
  ]
25
+
26
  MODEL_ID = "openfoodfacts/spellcheck-mistral-7b"
27
 
28
+ PRESENTATION = """# 🍊 Ingredients Spellcheck - Open Food Facts
29
+
30
+ Open Food Facts is a non-profit organization building the largest open food database in the world. 🌎
31
+
32
+ When a product is added to the database, all its details, such as allergens, additives, or nutritional values, are either wrote down by the contributor,
33
+ or automatically extracted from the product pictures using OCR.
34
+
35
+ However, it often happens the information extracted by OCR contains typos and errors due to bad quality pictures: low-definition, curved product, light reflection, etc...
36
+
37
+ To solve this problem, we developed an 🍊 **Ingredient Spellcheck** 🍊, a model capable of correcting typos in a list of ingredients following a defined guideline.
38
+ The model, based on Mistral-7B-v0.3, was fine-tuned on thousand of corrected lists of ingredients extracted from the database. More information in the model card.
39
+
40
+ ## 👇 Links
41
+
42
+ * Open Food Facts website: https://world.openfoodfacts.org/discover
43
+ * Open Food Facts Github: https://github.com/openfoodfacts
44
+ * Spellcheck project: https://github.com/openfoodfacts/openfoodfacts-ai/tree/develop/spellcheck
45
+ * Model card: https://huggingface.co/openfoodfacts/spellcheck-mistral-7b
46
+ """
47
 
48
  # CPU/GPU device
49
  zero = torch.Tensor([0]).cuda()
50
 
51
+ # Transformers seed to orient generation to be reproducible (as possible since it doesn't ensure 100% reproducibility)
52
+ set_seed(42)
53
+
54
+
55
+ ##########################
56
+ # LOADING
57
+ ##########################
58
  # Tokenizer
59
+ logging.info(f"Load tokenizer from {MODEL_ID}.")
60
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
61
  tokenizer.pad_token = tokenizer.eos_token
62
  tokenizer.pad_token_id = tokenizer.eos_token_id
63
 
64
  # Model
65
+ logging.info(f"Load model from {MODEL_ID}.")
66
  model = AutoModelForCausalLM.from_pretrained(
67
  MODEL_ID,
68
  device_map="auto",
 
70
  # torch_dtype=torch.bfloat16,
71
  )
72
 
73
+
74
+ ##########################
75
+ # FUNCTIONS
76
+ ##########################
77
  @spaces.GPU
78
  def process(text: str) -> str:
79
  """Take the text, the tokenizer and the causal model and generate the correction."""
 
93
 
94
  def prepare_instruction(text: str) -> str:
95
  """Prepare instruction prompt for fine-tuning and inference.
96
+ Identical to instruction during training.
97
 
98
  Args:
99
  text (str): List of ingredients
 
112
  ##########################
113
  # GRADIO SETUP
114
  ##########################
 
 
115
  with gr.Blocks() as demo:
116
 
117
+ gr.Markdown(PRESENTATION)
 
118
 
119
  with gr.Row():
120
  with gr.Column():
121
+ image = gr.Image(type="pil", label="image_input", interactive=False)
 
122
 
123
  with gr.Column():
124
  ingredients = gr.Textbox(label="List of ingredients")
125
+ spellcheck_button = gr.Button(value='Run spellcheck')
126
  correction = gr.Textbox(label="Correction", interactive=False)
127
 
128
  with gr.Row():
 
133
  image,
134
  ingredients,
135
  ],
 
 
136
  )
 
137
  spellcheck_button.click(
138
  fn=process,
139
  inputs=[ingredients],