fahad1403's picture
Update main.py
e815c29 verified
raw
history blame contribute delete
No virus
1.47 kB
from ctransformers import AutoModelForCausalLM
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware
# Initialize the Mistral model with appropriate quantization settings
llm = AutoModelForCausalLM.from_pretrained(
"mistral-7b-v0.1.Q4_K_M.gguf", # Path to your model file
model_type='mistral', # Specify the model type
max_new_tokens=1092, # Adjust this to a safe value
threads=3 # Adjust based on your CPU resources
)
MAX_CONTEXT_LENGTH = 2500
app = FastAPI()
# Define the input structure using Pydantic
class RequestData(BaseModel):
prompt: str
@app.post("/generate")
async def generate_response(request_data: RequestData):
system_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
E_INST = "</s>"
user, assistant = "<|user|>", "<|assistant|>"
# Construct the full prompt
prompt = f"{system_prompt}{E_INST}\n{user}\n{request_data.prompt}{E_INST}\n{assistant}\n"
try:
# Generate the response using the model
response = llm(prompt)
return response
except Exception as e:
raise HTTPException(status_code=500, detail=f"Model inference failed: {str(e)}")
# if __name__ == "__main__":
# import uvicorn
# # Run the FastAPI app with Uvicorn
# uvicorn.run(app, host="0.0.0.0", port=7860)