Spaces:
Runtime error
Runtime error
from ctransformers import AutoModelForCausalLM | |
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
from fastapi.middleware.cors import CORSMiddleware | |
# Initialize the Mistral model with appropriate quantization settings | |
llm = AutoModelForCausalLM.from_pretrained( | |
"mistral-7b-v0.1.Q4_K_M.gguf", # Path to your model file | |
model_type='mistral', # Specify the model type | |
max_new_tokens=1092, # Adjust this to a safe value | |
threads=3 # Adjust based on your CPU resources | |
) | |
MAX_CONTEXT_LENGTH = 2500 | |
app = FastAPI() | |
# Define the input structure using Pydantic | |
class RequestData(BaseModel): | |
prompt: str | |
async def generate_response(request_data: RequestData): | |
system_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request." | |
E_INST = "</s>" | |
user, assistant = "<|user|>", "<|assistant|>" | |
# Construct the full prompt | |
prompt = f"{system_prompt}{E_INST}\n{user}\n{request_data.prompt}{E_INST}\n{assistant}\n" | |
try: | |
# Generate the response using the model | |
response = llm(prompt) | |
return response | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Model inference failed: {str(e)}") | |
# if __name__ == "__main__": | |
# import uvicorn | |
# # Run the FastAPI app with Uvicorn | |
# uvicorn.run(app, host="0.0.0.0", port=7860) | |