Spaces:

jordigonzm
/

Phi-3-mini-128k-instruct

Runtime error

File size: 2,570 Bytes

a1d1004
0067ed6
 
 
 
 
f97a5f5
 
 
 
 
0067ed6
 
45bb7b2
896b82a
c86f71e
f97a5f5
 
 
896b82a
 
 
f97a5f5
0067ed6
 
c86f71e
 
f97a5f5
0349ecb
0067ed6
 
 
f97a5f5
c86f71e
 
 
 
 
0067ed6
 
 
 
 
 
 
 
c86f71e
 
 
0067ed6
f97a5f5
 
0067ed6
 
f97a5f5
0067ed6
f455b1e
 
0067ed6
 
 
f455b1e
f97a5f5

import gradio as gr
import spaces
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer

def get_device():
    """Devuelve 'cuda' si CUDA está disponible (GPU disponible), de lo contrario devuelve 'cpu'."""
    return "cuda" if torch.cuda.is_available() else "cpu"


model_name = "microsoft/Phi-3-mini-128k-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, trust_remote_code=True)

# Obtener el dispositivo adecuado según la disponibilidad de GPU
device = get_device()
model.to(device)
#model = torch.quantization.quantize_dynamic(
#    model, {torch.nn.Linear}, dtype=torch.qint8
#)

pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=device,
    trust_remote_code=True
)

@spaces.GPU
def chat_function(message, history, system_prompt, max_new_tokens, temperature):
    prompt = system_prompt
    for msg in history:
        prompt += f'{msg["role"]}: {msg["content"]}\n'
    prompt += f'user: {message}\n'
    
    temp = temperature + 0.1
    outputs = pipeline(
        prompt,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=temp,
        top_p=0.9,
    )
    generated_text = outputs[0]['generated_text']
    new_text = generated_text[len(prompt):]
    return new_text

chat_interface = gr.ChatInterface(
    fn=chat_function,
    chatbot=gr.Chatbot(height=400),
    textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7),
    title="Chat with AI Model",
    description="""
The Phi-3-Mini-128K-Instruct is a 3.8 billion-parameter, lightweight, state-of-the-art open model trained using the Phi-3 datasets. This dataset includes both synthetic data and filtered publicly available website data, with an emphasis on high-quality and reasoning-dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) that it can support.
https://huggingface.co/microsoft/Phi-3-mini-128k-instruct
    """,
    theme="soft",
    additional_inputs=[
        gr.Textbox(value="Hello!", label="System Prompt", placeholder="Enter a system prompt"),
        gr.Slider(minimum=50, maximum=1000, step=50, value=150, label="Max New Tokens"),
        gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.7, label="Temperature")
    ],
    allow_screenshots=False,
    allow_flagging="never"
)

chat_interface.launch()