Spaces:
Sleeping
Sleeping
File size: 6,306 Bytes
8824f88 e88dcad 8824f88 e88dcad 1aa1944 66b837f 8824f88 e88dcad 8824f88 a5c0568 a174343 8824f88 31bf44d 0737a9d 34353a1 0737a9d 8824f88 c5ac75a e88dcad c5ac75a 615fd05 c4a2b23 9604647 8824f88 ecbb198 fe36abc 8824f88 e88dcad 8322405 e88dcad 09d4545 e88dcad 09d4545 e88dcad 09d4545 e88dcad 09d4545 e88dcad 09d4545 e88dcad 09d4545 e88dcad 8824f88 e88dcad fe36abc e88dcad 8824f88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
#!/usr/bin/env python
import os
from threading import Thread
from typing import Iterator
import gradio as gr
import spaces
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
MAX_MAX_NEW_TOKENS = 2048
DEFAULT_MAX_NEW_TOKENS = 1024
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "2048"))
if torch.cuda.is_available():
model_id = "pints-ai/1.5-Pints-2K-v0.1"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="cuda")
tokenizer = AutoTokenizer.from_pretrained(model_id)
@spaces.GPU(duration=60)
def generate(
message: str,
chat_history: list[tuple[str, str]],
max_new_tokens: int = 1024,
temperature: float = 0.06,
top_p: float = 0.95,
top_k: int = 40,
repetition_penalty: float = 1.2,
) -> Iterator[str]:
conversation = []
for user, assistant in chat_history:
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
conversation.append({"role": "user", "content": message})
input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
input_ids = input_ids.to(model.device)
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
generate_kwargs = dict(
{"input_ids": input_ids},
streamer=streamer,
max_new_tokens=max_new_tokens,
do_sample=True,
top_p=top_p,
top_k=top_k,
temperature=temperature,
num_beams=1,
repetition_penalty=repetition_penalty,
)
t = Thread(target=model.generate, kwargs=generate_kwargs)
t.start()
outputs = []
for text in streamer:
outputs.append(text)
yield "".join(outputs)
chat_interface = gr.ChatInterface(
fn=generate,
chatbot=gr.Chatbot(height=450,
label="pints-ai/1.5-Pints-2K-v0.1",
show_share_button=True,
# avatar_images=(None, 'geitje-logo.jpg')
),
# textbox=gr.Textbox(value="Typ een bericht…"),
cache_examples=False,
additional_inputs=[
gr.Slider(
label="Max new tokens",
minimum=1,
maximum=MAX_MAX_NEW_TOKENS,
step=1,
value=DEFAULT_MAX_NEW_TOKENS,
),
gr.Slider(
label="Temperature",
minimum=0.05,
maximum=1.2,
step=0.05,
value=0.2,
),
gr.Slider(
label="Top-p (nucleus sampling)",
minimum=0.05,
maximum=1.0,
step=0.05,
value=0.9,
),
gr.Slider(
label="Top-k",
minimum=1,
maximum=1000,
step=1,
value=50,
),
gr.Slider(
label="Repetition penalty",
minimum=1.0,
maximum=2.0,
step=0.05,
value=1.2,
),
],
examples=[
["""What word does not belong in this sequence: "car, airplane, goat, bus"?"""],
["Write a news article for The Onion about the application of a herd of goats by the Netherlands Forensic Institute"],
["Wat are 3 nice things to do when I visit Amsterdam for a weekend?"],
["Who is the partner of Batman?"],
["Can you cycle to the moon?"],
["What is the importance of open source language models?"],
["""```
Auckland Zoo otter Jin has been found alive
Monday, July 10, 2006
New Zealand
Related articles
9 May 2023: First NASA TROPICS satellites launch to monitor tropical storms
31 January 2023: "Energized and excited": Chris Hipkins becomes 41st Prime Minister of New Zealand
12 February 2022: US warns its citizens to leave Ukraine as Russia could invade 'anytime'
25 November 2021: New Zealand raises interest rates in second straight month to 0.75%
5 September 2021: At least six injured after stabbing in New Zealand supermarket
Location of New Zealand
Collaborate!
Pillars of Wikinews writing
Writing an article
New Zealand
Jin, the short-clawed Asiatic otter who escaped from the Auckland Zoo almost a month ago, has been found alive.
Jin escaped from the zoo by digging through two walls and scaling a 1.8 metre high barrier around her enclosure.
She successfully evaded capture despite numerous sightings around Auckland's inner harbours.
The New Zealand Department of Conservation says a tip-off from a yachtie helped bring an escaped zoo otter's days of freedom to an end this morning, the yachtie spotted her at Islington Bay, at the causeway between Rangitoto and Motutapu island.
Auckland Zoo senior vet Dr Richard Jakob-Hoff said Jin has “…got to take her time and relax in her own time. She is in quite good condition and is well hydrated... She's lost a lot of weight but given that she is really in very good condition."
She has abrasions on her head, another under her tail and on her back paws.
When Jin first left the enclosure she weighed 3.6-kilograms (kg) and when she was caught she weighed 2.5 kg.
She is currently in quarantine for 30 days to make sure she is free of any infections that she could have caught from the outside world. The infection she would most likely get would be salmonella from the raw food she has been eating.
"The most common thing in these types of animals is that toxoplasmosis can cause blindness and can get into the brain and cause fits... She has been very stressed and that is what I am concerned about." said Dr Richard Jakob-Hoff.
```
Summarize the above article"""]
],
title="pints-ai/1.5-Pints-2K-v0.1",
description="""pints-ai/1.5-Pints-2K-v0.1 quick demo""",
submit_btn="Generate",
stop_btn="Stop",
retry_btn="🔄 Retry",
undo_btn="↩️ Undo",
clear_btn="🗑️ Clear",
)
with gr.Blocks(css="style.css") as demo:
chat_interface.render()
if __name__ == "__main__":
demo.queue(max_size=20).launch()
|