donb-hf commited on
Commit
ad6330a
1 Parent(s): a6d3ba4

refactor app

Browse files
Files changed (4) hide show
  1. .gitignore +8 -0
  2. app.py +11 -60
  3. requirements.txt +3 -1
  4. utils.py +73 -0
.gitignore CHANGED
@@ -1,2 +1,10 @@
 
 
 
 
 
1
  .venv/
 
 
 
2
  .python-version
 
1
+ # Ignore Python cache files
2
+ __pycache__/
3
+ *.py[cod]
4
+
5
+ # Ignore virtual environment
6
  .venv/
7
+
8
+ # Ignore environment-specific files
9
+ .env
10
  .python-version
app.py CHANGED
@@ -1,65 +1,13 @@
 
1
  import gradio as gr
2
- import os, requests
3
- import torch, torchvision, einops
4
- import spaces
5
- import subprocess
6
- from transformers import AutoModelForCausalLM, AutoModel, AutoModelForVision2Seq, PaliGemmaForConditionalGeneration, LlavaForConditionalGeneration, LlavaNextForConditionalGeneration
7
- from huggingface_hub import login
8
 
9
  # Install required package
10
- subprocess.run(
11
- "pip install flash-attn --no-build-isolation",
12
- env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
13
- shell=True,
14
- )
15
 
 
16
  HF_TOKEN = os.getenv("HF_TOKEN")
17
- login(token=HF_TOKEN, add_to_git_credential=True)
18
-
19
- # Cache for storing loaded models and their summaries
20
- model_cache = {}
21
-
22
- # Function to get the model summary
23
- @spaces.GPU
24
- def get_model_summary(model_name):
25
- if model_name in model_cache:
26
- return model_cache[model_name], ""
27
-
28
- try:
29
- # Fetch the config.json file
30
- config_url = f"https://huggingface.co/{model_name}/raw/main/config.json"
31
- headers = {"Authorization": f"Bearer {HF_TOKEN}"}
32
- response = requests.get(config_url, headers=headers)
33
- response.raise_for_status()
34
- config = response.json()
35
- architecture = config["architectures"][0]
36
-
37
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
38
-
39
- # Select the correct model class based on the architecture
40
- if architecture == "LlavaNextForConditionalGeneration":
41
- from transformers import LlavaNextForConditionalGeneration
42
- model = LlavaNextForConditionalGeneration.from_pretrained(model_name, trust_remote_code=True).to(device)
43
- elif architecture == "LlavaForConditionalGeneration":
44
- from transformers import LlavaForConditionalGeneration
45
- model = LlavaForConditionalGeneration.from_pretrained(model_name, trust_remote_code=True).to(device)
46
- elif architecture == "PaliGemmaForConditionalGeneration":
47
- from transformers import PaliGemmaForConditionalGeneration
48
- model = PaliGemmaForConditionalGeneration.from_pretrained(model_name, trust_remote_code=True).to(device)
49
- elif architecture == "Idefics2ForConditionalGeneration":
50
- from transformers import Idefics2ForConditionalGeneration
51
- model = Idefics2ForConditionalGeneration.from_pretrained(model_name, trust_remote_code=True).to(device)
52
- elif architecture == "MiniCPMV":
53
- from transformers import MiniCPMV
54
- model = MiniCPMV.from_pretrained(model_name, trust_remote_code=True).to(device)
55
- else:
56
- model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to(device)
57
-
58
- model_summary = str(model)
59
- model_cache[model_name] = model_summary
60
- return model_summary, ""
61
- except Exception as e:
62
- return "", str(e)
63
 
64
  # Create the Gradio Blocks interface
65
  with gr.Blocks() as demo:
@@ -69,13 +17,14 @@ with gr.Blocks() as demo:
69
  gr.Markdown("### Vision Models")
70
  vision_examples = gr.Examples(
71
  examples=[
 
 
72
  ["llava-hf/llava-v1.6-mistral-7b-hf"],
73
  ["xtuner/llava-phi-3-mini-hf"],
74
  ["xtuner/llava-llama-3-8b-v1_1-transformers"],
75
  ["vikhyatk/moondream2"],
76
  ["openbmb/MiniCPM-Llama3-V-2_5"],
77
  ["microsoft/Phi-3-vision-128k-instruct"],
78
- ["google/paligemma-3b-mix-224"],
79
  ["HuggingFaceM4/idefics2-8b-chatty"],
80
  ["microsoft/llava-med-v1.5-mistral-7b"]
81
  ],
@@ -85,10 +34,12 @@ with gr.Blocks() as demo:
85
  gr.Markdown("### Other Models")
86
  other_examples = gr.Examples(
87
  examples=[
 
 
 
88
  ["google/gemma-7b"],
89
  ["microsoft/Phi-3-mini-4k-instruct"],
90
- ["meta-llama/Meta-Llama-3-8B"],
91
- ["mistralai/Mistral-7B-Instruct-v0.3"]
92
  ],
93
  inputs=textbox
94
  )
 
1
+ import os
2
  import gradio as gr
3
+ from utils import get_model_summary, install_flash_attn, authenticate_hf
 
 
 
 
 
4
 
5
  # Install required package
6
+ install_flash_attn()
 
 
 
 
7
 
8
+ # Authenticate with Hugging Face
9
  HF_TOKEN = os.getenv("HF_TOKEN")
10
+ authenticate_hf(HF_TOKEN)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Create the Gradio Blocks interface
13
  with gr.Blocks() as demo:
 
17
  gr.Markdown("### Vision Models")
18
  vision_examples = gr.Examples(
19
  examples=[
20
+ ["google/paligemma-3b-mix-224"],
21
+ ["google/paligemma-3b-ft-refcoco-seg-224"],
22
  ["llava-hf/llava-v1.6-mistral-7b-hf"],
23
  ["xtuner/llava-phi-3-mini-hf"],
24
  ["xtuner/llava-llama-3-8b-v1_1-transformers"],
25
  ["vikhyatk/moondream2"],
26
  ["openbmb/MiniCPM-Llama3-V-2_5"],
27
  ["microsoft/Phi-3-vision-128k-instruct"],
 
28
  ["HuggingFaceM4/idefics2-8b-chatty"],
29
  ["microsoft/llava-med-v1.5-mistral-7b"]
30
  ],
 
34
  gr.Markdown("### Other Models")
35
  other_examples = gr.Examples(
36
  examples=[
37
+ ["dwb2023/mistral-7b-instruct-quantized"],
38
+ ["mistralai/Mistral-7B-Instruct-v0.2"],
39
+ ["mistralai/Mistral-7B-Instruct-v0.3"],
40
  ["google/gemma-7b"],
41
  ["microsoft/Phi-3-mini-4k-instruct"],
42
+ ["meta-llama/Meta-Llama-3-8B"]
 
43
  ],
44
  inputs=textbox
45
  )
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  git+https://github.com/huggingface/transformers.git
2
  spaces
3
  torchvision
4
- einops
 
 
 
1
  git+https://github.com/huggingface/transformers.git
2
  spaces
3
  torchvision
4
+ einops
5
+ accelerate
6
+ bitsandbytes
utils.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import os, requests
3
+ import torch, torchvision
4
+ from huggingface_hub import login
5
+ from transformers import BitsAndBytesConfig, AutoModelForCausalLM, LlavaNextForConditionalGeneration, LlavaForConditionalGeneration, PaliGemmaForConditionalGeneration, Idefics2ForConditionalGeneration
6
+
7
+ # Install required package
8
+ def install_flash_attn():
9
+ subprocess.run(
10
+ "pip install flash-attn --no-build-isolation",
11
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
12
+ shell=True,
13
+ )
14
+
15
+ # Authenticate with Hugging Face
16
+ def authenticate_hf(token):
17
+ login(token=token, add_to_git_credential=True)
18
+
19
+ # Function to get the model summary
20
+ model_cache = {}
21
+
22
+ def get_model_summary(model_name):
23
+ if model_name in model_cache:
24
+ return model_cache[model_name], ""
25
+
26
+ try:
27
+ # Fetch the config.json file
28
+ config_url = f"https://huggingface.co/{model_name}/raw/main/config.json"
29
+ headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}
30
+ response = requests.get(config_url, headers=headers)
31
+ response.raise_for_status()
32
+ config = response.json()
33
+ architecture = config["architectures"][0]
34
+
35
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
36
+
37
+ # Check if the model is quantized
38
+ is_quantized = "quantized" in model_name.lower()
39
+
40
+ # Set up BitsAndBytesConfig if the model is quantized
41
+ bnb_config = BitsAndBytesConfig(load_in_4bit=True) if is_quantized else None
42
+
43
+ # Load the model based on its architecture and quantization status
44
+ if architecture == "LlavaNextForConditionalGeneration":
45
+ model = LlavaNextForConditionalGeneration.from_pretrained(
46
+ model_name, config=bnb_config, trust_remote_code=True
47
+ )
48
+ elif architecture == "LlavaForConditionalGeneration":
49
+ model = LlavaForConditionalGeneration.from_pretrained(
50
+ model_name, config=bnb_config, trust_remote_code=True
51
+ )
52
+ elif architecture == "PaliGemmaForConditionalGeneration":
53
+ model = PaliGemmaForConditionalGeneration.from_pretrained(
54
+ model_name, config=bnb_config, trust_remote_code=True
55
+ )
56
+ elif architecture == "Idefics2ForConditionalGeneration":
57
+ model = Idefics2ForConditionalGeneration.from_pretrained(
58
+ model_name, config=bnb_config, trust_remote_code=True
59
+ )
60
+ else:
61
+ model = AutoModelForCausalLM.from_pretrained(
62
+ model_name, config=bnb_config, trust_remote_code=True
63
+ )
64
+
65
+ # Move to device only if the model is not quantized
66
+ if not is_quantized:
67
+ model = model.to(device)
68
+
69
+ model_summary = str(model)
70
+ model_cache[model_name] = model_summary
71
+ return model_summary, ""
72
+ except Exception as e:
73
+ return "", str(e)