Hotel-Booking-Assistant / chatbot_bedrock.py
KvrParaskevi's picture
Update chatbot_bedrock.py
0de3a96 verified
raw
history blame
No virus
1.97 kB
import os
#from langchain import PromptTemplate, HuggingFaceHub, LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
import langchain.globals
from transformers import AutoModelForCausalLM, AutoTokenizer
import streamlit as st
from abc import ABC, abstractmethod
class Runnable(ABC):
@abstractmethod
def run(self, input_text):
"""
This method should be implemented by any class that inherits from Runnable.
It should accept input text and return processed text or any other result based on the implementation.
"""
pass
class HuggingFaceModelWrapper(Runnable): # Assuming Runnable is the required interface
def __init__(self, model, tokenizer):
self.model = model
self.tokenizer = tokenizer
def run(self, input_text):
# Convert the input text to tokens
input_ids = self.tokenizer.encode(input_text, return_tensors="pt")
# Generate a response from the model
output = self.model.generate(input_ids, max_length=100, num_return_sequences=1)
# Decode the generated tokens to a string
response_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
return response_text
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained("KvrParaskevi/Hotel-Assistant-Attempt4-Llama-2-7b")
model = AutoModelForCausalLM.from_pretrained("KvrParaskevi/Hotel-Assistant-Attempt4-Llama-2-7b")
return tokenizer,model
def demo_miny_memory(model):
# llm_data = get_Model(hugging_face_key)
memory = ConversationBufferMemory(llm = model,max_token_limit = 512)
return memory
def demo_chain(input_text, memory,model):
# llm_data = get_Model(hugging_face_key)
llm_conversation = ConversationChain(llm=model,memory=memory,verbose=langchain.globals.get_verbose())
chat_reply = llm_conversation.predict(input=input_text)
return chat_reply