sandz7 commited on
Commit
d364219
β€’
1 Parent(s): 58f26ad

start of krypton

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +92 -0
  3. requirements.txt +4 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ venv/
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import torch
2
+ # import gradio as gr
3
+ # from transformers import pipeline, TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
4
+ # from PIL import Image
5
+ # import requests
6
+ # import threading
7
+
8
+ DESCRIPTION = '''
9
+ <div>
10
+ <h1 style="text-align: center;">Krypton πŸ•‹</h1>
11
+ <p>This uses an Open Source model from <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers"><b>xtuner/llava-llama-3-8b-v1_1-transformers</b></a></p>
12
+ </div>
13
+ '''
14
+
15
+ # model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
16
+ # pipe = pipeline("image-to-text", model=model_id, device_map="auto")
17
+ # # Place transformers in hardware to prepare for process and generation
18
+ # llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
19
+ # llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.float16).to('cuda')
20
+ # terminators = [
21
+ # llama_tokenizer.eos_token_id,
22
+ # llama_tokenizer.convert_tokens_to_ids("<|eot_id|>")
23
+ # ]
24
+
25
+ # def krypton(prompt,
26
+ # history,
27
+ # input_image,
28
+ # max_new_tokens,
29
+ # temperature,
30
+ # num_beams,
31
+ # do_sample: bool=True):
32
+ # """
33
+ # Passes an image as input, places it for generation
34
+ # on pipeline and output is passed. This is multimodal
35
+ # """
36
+ # conversation = []
37
+ # for user, assistant in history:
38
+ # conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
39
+ # conversation.append({"role": "user", "content": prompt})
40
+
41
+ # input_ids = llama_tokenizer.apply_chat_template(conversation, return_tensors='pt').to(llama_model.device)
42
+
43
+ # streamer = TextIteratorStreamer(llama_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
44
+
45
+ # llava_generation_kwargs = dict(
46
+ # input_ids=input_ids,
47
+ # streamer=streamer,
48
+ # max_new_tokens=max_new_tokens,
49
+ # num_beams=num_beams,
50
+ # do_sample=do_sample
51
+ # )
52
+
53
+ # if temperature == 0.0:
54
+ # do_sample = False
55
+
56
+ # pil_image = Image.fromarray(input_image.astype('uint8'), 'RGB')
57
+
58
+ # # Pipeline generation
59
+ # outputs = pipeline()
60
+
61
+
62
+ from transformers import pipeline
63
+ from PIL import Image
64
+ import requests
65
+ import torch
66
+ import subprocess
67
+ import gradio as gr
68
+
69
+ model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
70
+ pipe = pipeline("image-to-text", model=model_id, torch_dtype=torch.float16, device=0)
71
+
72
+ def krypton(input_image):
73
+
74
+ pil_image = Image.fromarray(input_image.astype('uint8'), 'RGB')
75
+ # image = Image.open(requests.get(url, stream=True).raw)
76
+ prompt = ("<|start_header_id|>user<|end_header_id|>\n\n<image>\nWhat are these?<|eot_id|>"
77
+ "<|start_header_id|>assistant<|end_header_id|>\n\n")
78
+ outputs = pipe(input_image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
79
+ nvidia_result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE)
80
+ return outputs[0]
81
+
82
+ with gr.Blocks(fill_height=True) as demo:
83
+ gr.Markdown(DESCRIPTION)
84
+ gr.Interface(
85
+ fn=krypton,
86
+ inputs="image",
87
+ outputs="text",
88
+ fill_height=True
89
+ )
90
+
91
+ if __name__ == "__main__":
92
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ gradio
4
+ numpy