MohamedRashad commited on
Commit
6a52aab
1 Parent(s): 62d4a12

Refactor code and optimize dataframe sorting

Browse files
Files changed (1) hide show
  1. app.py +8 -16
app.py CHANGED
@@ -6,10 +6,6 @@ from datasets import load_dataset
6
  import random
7
  from pathlib import Path
8
 
9
- # tokenizer = AutoTokenizer.from_pretrained("Xenova/gpt-4o")
10
- # token_ids = tokenizer.encode("السلام عليكم ورحمة الله")
11
- # exit()
12
-
13
  initial_list_of_models = [
14
  "Xenova/gpt-4o",
15
  "NousResearch/Meta-Llama-3-8B",
@@ -48,16 +44,12 @@ for model_name in tqdm(initial_list_of_models):
48
  df = df.sort_values(by="➕ Total Number of Tokens", ascending=True)
49
 
50
  # Save the dataframe to a csv file
51
- df.to_json(dataframe_path, lines=True, orient="records")
52
-
53
- # Gradio Functions
54
- def refresh():
55
- global df
56
- df = df.sort_values(by="➕ Total Number of Tokens", ascending=True)
57
- return gr.Dataframe(df), gr.BarPlot(df), gr.Dropdown(choices=df["📛 Models"].tolist())
58
 
59
  def submit(model_name):
60
  global df
 
 
61
  tokenizer = AutoTokenizer.from_pretrained(
62
  model_name, use_fast=True, trust_remote_code=True
63
  )
@@ -72,6 +64,9 @@ def submit(model_name):
72
  },
73
  ignore_index=True,
74
  )
 
 
 
75
 
76
  def generate_distinct_colors(n):
77
  """Generate n visually distinct colors in hexadecimal format."""
@@ -196,9 +191,7 @@ with gr.Blocks() as demo:
196
  model_name = gr.Textbox(
197
  label="Model Name from Hugging Face (e.g. Xenova/gpt-4o)"
198
  )
199
- with gr.Row():
200
- refresh_btn = gr.Button(value="Refresh")
201
- submit_new_model_btn = gr.Button(value="Submit", variant="primary")
202
  with gr.Tab(label="Try tokenizers"):
203
  text = gr.Textbox(label="Enter a text", lines=5, value="السلام عليكم ورحمة الله", rtl=True, text_align="right")
204
  dropdown = gr.Dropdown(
@@ -209,8 +202,7 @@ with gr.Blocks() as demo:
209
  submit_text_btn = gr.Button(value="Submit", variant="primary")
210
  tokenized_textbox = gr.HighlightedText(label="Tokenized text")
211
 
212
- submit_new_model_btn.click(submit, model_name)
213
- refresh_btn.click(refresh, outputs=[dataframe, barplot, dropdown])
214
  submit_text_btn.click(tokenize_text, inputs=[text, dropdown], outputs=[tokenized_textbox])
215
 
216
 
 
6
  import random
7
  from pathlib import Path
8
 
 
 
 
 
9
  initial_list_of_models = [
10
  "Xenova/gpt-4o",
11
  "NousResearch/Meta-Llama-3-8B",
 
44
  df = df.sort_values(by="➕ Total Number of Tokens", ascending=True)
45
 
46
  # Save the dataframe to a csv file
47
+ df.to_json(dataframe_path, lines=True, orient="records", force_ascii=False)
 
 
 
 
 
 
48
 
49
  def submit(model_name):
50
  global df
51
+ if model_name in df["📛 Models"].values:
52
+ return gr.Dataframe(df), gr.BarPlot(df), gr.Dropdown(choices=df["📛 Models"].tolist())
53
  tokenizer = AutoTokenizer.from_pretrained(
54
  model_name, use_fast=True, trust_remote_code=True
55
  )
 
64
  },
65
  ignore_index=True,
66
  )
67
+ df = df.sort_values(by="➕ Total Number of Tokens", ascending=True)
68
+ df.to_json(dataframe_path, lines=True, orient="records", force_ascii=False)
69
+ return gr.Dataframe(df), gr.BarPlot(df), gr.Dropdown(choices=df["📛 Models"].tolist())
70
 
71
  def generate_distinct_colors(n):
72
  """Generate n visually distinct colors in hexadecimal format."""
 
191
  model_name = gr.Textbox(
192
  label="Model Name from Hugging Face (e.g. Xenova/gpt-4o)"
193
  )
194
+ submit_new_model_btn = gr.Button(value="Submit", variant="primary")
 
 
195
  with gr.Tab(label="Try tokenizers"):
196
  text = gr.Textbox(label="Enter a text", lines=5, value="السلام عليكم ورحمة الله", rtl=True, text_align="right")
197
  dropdown = gr.Dropdown(
 
202
  submit_text_btn = gr.Button(value="Submit", variant="primary")
203
  tokenized_textbox = gr.HighlightedText(label="Tokenized text")
204
 
205
+ submit_new_model_btn.click(submit, model_name, outputs=[dataframe, barplot, dropdown])
 
206
  submit_text_btn.click(tokenize_text, inputs=[text, dropdown], outputs=[tokenized_textbox])
207
 
208