Spaces:

fahad1403
/

deploy-mistral-api

Runtime error

deploy-mistral-api / main.py

Update main.py

e815c29 verified 20 days ago

No virus

1.47 kB

	from ctransformers import AutoModelForCausalLM
	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	from fastapi.middleware.cors import CORSMiddleware

	# Initialize the Mistral model with appropriate quantization settings
	llm = AutoModelForCausalLM.from_pretrained(
	"mistral-7b-v0.1.Q4_K_M.gguf", # Path to your model file
	model_type='mistral', # Specify the model type
	max_new_tokens=1092, # Adjust this to a safe value
	threads=3 # Adjust based on your CPU resources
	)

	MAX_CONTEXT_LENGTH = 2500

	app = FastAPI()

	# Define the input structure using Pydantic
	class RequestData(BaseModel):
	prompt: str

	@app.post("/generate")
	async def generate_response(request_data: RequestData):
	system_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
	E_INST = "</s>"
	user, assistant = "<\|user\|>", "<\|assistant\|>"

	# Construct the full prompt
	prompt = f"{system_prompt}{E_INST}\n{user}\n{request_data.prompt}{E_INST}\n{assistant}\n"

	try:
	# Generate the response using the model
	response = llm(prompt)
	return response
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Model inference failed: {str(e)}")

	# if __name__ == "__main__":
	# import uvicorn
	# # Run the FastAPI app with Uvicorn
	# uvicorn.run(app, host="0.0.0.0", port=7860)