Spaces:
Runtime error
Runtime error
File size: 1,470 Bytes
183c0d9 e815c29 183c0d9 e815c29 183c0d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
from ctransformers import AutoModelForCausalLM
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware
# Initialize the Mistral model with appropriate quantization settings
llm = AutoModelForCausalLM.from_pretrained(
"mistral-7b-v0.1.Q4_K_M.gguf", # Path to your model file
model_type='mistral', # Specify the model type
max_new_tokens=1092, # Adjust this to a safe value
threads=3 # Adjust based on your CPU resources
)
MAX_CONTEXT_LENGTH = 2500
app = FastAPI()
# Define the input structure using Pydantic
class RequestData(BaseModel):
prompt: str
@app.post("/generate")
async def generate_response(request_data: RequestData):
system_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
E_INST = "</s>"
user, assistant = "<|user|>", "<|assistant|>"
# Construct the full prompt
prompt = f"{system_prompt}{E_INST}\n{user}\n{request_data.prompt}{E_INST}\n{assistant}\n"
try:
# Generate the response using the model
response = llm(prompt)
return response
except Exception as e:
raise HTTPException(status_code=500, detail=f"Model inference failed: {str(e)}")
# if __name__ == "__main__":
# import uvicorn
# # Run the FastAPI app with Uvicorn
# uvicorn.run(app, host="0.0.0.0", port=7860)
|