File size: 1,470 Bytes
183c0d9
 
 
 
 
 
 
 
 
e815c29
 
183c0d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e815c29
183c0d9
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from ctransformers import AutoModelForCausalLM
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware

# Initialize the Mistral model with appropriate quantization settings
llm = AutoModelForCausalLM.from_pretrained(
    "mistral-7b-v0.1.Q4_K_M.gguf",  # Path to your model file
    model_type='mistral',            # Specify the model type
    max_new_tokens=1092,              # Adjust this to a safe value
    threads=3                    # Adjust based on your CPU resources
)

MAX_CONTEXT_LENGTH = 2500

app = FastAPI()

# Define the input structure using Pydantic
class RequestData(BaseModel):
    prompt: str

@app.post("/generate")
async def generate_response(request_data: RequestData):
    system_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
    E_INST = "</s>"
    user, assistant = "<|user|>", "<|assistant|>"
    
    # Construct the full prompt
    prompt = f"{system_prompt}{E_INST}\n{user}\n{request_data.prompt}{E_INST}\n{assistant}\n"
    
    try:
        # Generate the response using the model
        response = llm(prompt)
        return response
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Model inference failed: {str(e)}")

# if __name__ == "__main__":
#     import uvicorn
#     # Run the FastAPI app with Uvicorn
#     uvicorn.run(app, host="0.0.0.0", port=7860)