import os
#from langchain import PromptTemplate, HuggingFaceHub, LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
import langchain.globals
from transformers import AutoModelForCausalLM, AutoTokenizer
import streamlit as st
from abc import ABC, abstractmethod

class Runnable(ABC):
    @abstractmethod
    def run(self, input_text):
        """
        This method should be implemented by any class that inherits from Runnable.
        It should accept input text and return processed text or any other result based on the implementation.
        """
        pass

class HuggingFaceModelWrapper(Runnable):  # Assuming Runnable is the required interface
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer

    def run(self, input_text):
        # Convert the input text to tokens
        input_ids = self.tokenizer.encode(input_text, return_tensors="pt")

        # Generate a response from the model
        output = self.model.generate(input_ids, max_length=100, num_return_sequences=1)

        # Decode the generated tokens to a string
        response_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
        return response_text


@st.cache_resource 
def load_model():
    tokenizer = AutoTokenizer.from_pretrained("KvrParaskevi/Hotel-Assistant-Attempt4-Llama-2-7b")
    model = AutoModelForCausalLM.from_pretrained("KvrParaskevi/Hotel-Assistant-Attempt4-Llama-2-7b")
    return tokenizer,model

def demo_miny_memory(model):
    # llm_data = get_Model(hugging_face_key)
    memory = ConversationBufferMemory(llm = model,max_token_limit = 512)
    return memory

def demo_chain(input_text, memory,model):
    # llm_data = get_Model(hugging_face_key)
    llm_conversation = ConversationChain(llm=model,memory=memory,verbose=langchain.globals.get_verbose())

    chat_reply = llm_conversation.predict(input=input_text)
    return chat_reply