import os #from langchain import PromptTemplate, HuggingFaceHub, LLMChain from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationChain import langchain.globals from transformers import AutoModelForCausalLM, AutoTokenizer import streamlit as st from abc import ABC, abstractmethod class Runnable(ABC): @abstractmethod def run(self, input_text): """ This method should be implemented by any class that inherits from Runnable. It should accept input text and return processed text or any other result based on the implementation. """ pass class HuggingFaceModelWrapper(Runnable): # Assuming Runnable is the required interface def __init__(self, model, tokenizer): self.model = model self.tokenizer = tokenizer def run(self, input_text): # Convert the input text to tokens input_ids = self.tokenizer.encode(input_text, return_tensors="pt") # Generate a response from the model output = self.model.generate(input_ids, max_length=100, num_return_sequences=1) # Decode the generated tokens to a string response_text = self.tokenizer.decode(output[0], skip_special_tokens=True) return response_text @st.cache_resource def load_model(): tokenizer = AutoTokenizer.from_pretrained("KvrParaskevi/Hotel-Assistant-Attempt4-Llama-2-7b") model = AutoModelForCausalLM.from_pretrained("KvrParaskevi/Hotel-Assistant-Attempt4-Llama-2-7b") return tokenizer,model def demo_miny_memory(model): # llm_data = get_Model(hugging_face_key) memory = ConversationBufferMemory(llm = model,max_token_limit = 512) return memory def demo_chain(input_text, memory,model): # llm_data = get_Model(hugging_face_key) llm_conversation = ConversationChain(llm=model,memory=memory,verbose=langchain.globals.get_verbose()) chat_reply = llm_conversation.predict(input=input_text) return chat_reply