import gradio as gr import concurrent.futures # Load the model into RAM model = gr.load("models/TheBloke/SOLAR-10.7B-Instruct-v1.0-uncensored-GGUF") def interact(input): # Define the function for user interaction response = model(input) return response # Use ThreadPoolExecutor to manage the threads with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: # Create a Gradio interface with the loaded model interface = gr.Interface(fn=interact, inputs="text", outputs="text") # Handle the interactions with Gradio interface.launch()