from datasets import Dataset from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer from datafile import QL, resume_data_dict import streamlit as st from tensorflow import keras import tensorflow as tf # Load your custom data data = [] for i in range(len(QL['labels'])): data.append({"question":QL["queries"][i], "answer":resume_data_dict[QL['labels'][i]]}) # Create a Dataset dataset = Dataset.from_list(data) # Load the tokenizer tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium") if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token def preprocess_function(examples): inputs = [f"Question: {q}" for q in examples["question"]] model_inputs = tokenizer(inputs, padding="max_length", truncation=True, max_length=128) # Setup the tokenizer for targets with tokenizer.as_target_tokenizer(): labels = tokenizer(examples["answer"], padding="max_length", truncation=True, max_length=128) model_inputs["labels"] = labels["input_ids"] return model_inputs # Apply preprocessing tokenized_dataset = dataset.map(preprocess_function, batched=True) model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium") model.resize_token_embeddings(len(tokenizer)) # Define training arguments training_args = TrainingArguments( output_dir="./resume_bot", num_train_epochs=3, per_device_train_batch_size=2, per_device_eval_batch_size=2, warmup_steps=10, weight_decay=0.01, logging_dir="./logs", logging_steps=10, save_steps=500, evaluation_strategy="steps" ) # Initialize the Trainer trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_dataset, eval_dataset=tokenized_dataset, tokenizer=tokenizer ) # Train the model trainer.train() model_name = "./resume_bot" # Path to your fine-tuned model tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) st.title("Resume Chatbot") if 'history' not in st.session_state: st.session_state.history = [] user_input = st.text_input("You: ", "") if user_input: # Encode the input input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors='pt') try: response_ids = model.generate(input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id) bot_response = tokenizer.decode(response_ids[0], skip_special_tokens=True) # Update the chat history st.session_state.history.append(f"You: {user_input}") st.session_state.history.append(f"Bot: {bot_response}") # Display the bot response st.write(f"Bot: {bot_response}") except Exception as e: st.error(f"Error generating response: {e}") # Add a button to clear the conversation if st.button("Reset Conversation"): st.session_state.history = []