import gradio as gr import json from datetime import datetime from theme import TufteInspired import uuid from huggingface_hub import InferenceClient, CommitScheduler, hf_hub_download from openai import OpenAI from huggingface_hub import get_token, login from prompts import detailed_genre_description_prompt, basic_prompt import random import os from pathlib import Path # Ensure you're logged in to Hugging Face login(get_token()) # Define available models MODELS = [ "meta-llama/Meta-Llama-3-70B-Instruct", "mistralai/Mixtral-8x7B-Instruct-v0.1", "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", ] # Set up dataset storage dataset_folder = Path("dataset") dataset_folder.mkdir(exist_ok=True) # Function to get the latest dataset file def get_latest_dataset_file(): files = list(dataset_folder.glob("data_*.jsonl")) return max(files, key=os.path.getctime) if files else None # Check for existing dataset and create or append to it latest_file = get_latest_dataset_file() if latest_file: dataset_file = latest_file print(f"Appending to existing dataset file: {dataset_file}") else: dataset_file = dataset_folder / f"data_{uuid.uuid4()}.jsonl" print(f"Creating new dataset file: {dataset_file}") # Set up CommitScheduler for dataset uploads repo_id = ( "davanstrien/summer-reading-preference" # Replace with your desired dataset repo ) scheduler = CommitScheduler( repo_id=repo_id, repo_type="dataset", folder_path=dataset_folder, path_in_repo="data", every=1, # Upload every 5 minutes ) # Function to download existing dataset files def download_existing_dataset(): try: files = hf_hub_download( repo_id=repo_id, filename="data", repo_type="dataset", recursive=True ) for file in Path(files).glob("*.jsonl"): dest_file = dataset_folder / file.name if not dest_file.exists(): dest_file.write_bytes(file.read_bytes()) print(f"Downloaded existing dataset file: {dest_file}") except Exception as e: print(f"Error downloading existing dataset: {e}") # Download existing dataset files at startup download_existing_dataset() def get_random_model(): global CHOSEN_MODEL model = random.choice(MODELS) CHOSEN_MODEL = model return model def create_client(model_id): return OpenAI( base_url=f"https://api-inference.huggingface.co/models/{model_id}/v1", api_key=get_token(), ) client = OpenAI( base_url="https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-70B-Instruct/v1", api_key=get_token(), ) def generate_prompt(): if random.choice([True, False]): return detailed_genre_description_prompt() else: return basic_prompt() def get_and_store_prompt(): prompt = generate_prompt() print(prompt) # Keep this for debugging return prompt def generate_blurb(prompt): model_id = get_random_model() client = create_client(model_id) max_tokens = random.randint(100, 1000) chat_completion = client.chat.completions.create( model="tgi", messages=[ {"role": "user", "content": prompt}, ], stream=True, max_tokens=max_tokens, ) full_text = "" for message in chat_completion: full_text += message.choices[0].delta.content yield full_text # Modified function to log blurb and vote def log_blurb_and_vote( prompt, blurb, vote, user_info: gr.OAuthProfile | None, has_voted ): if has_voted: return ( "You've already voted on this response.", has_voted, gr.Row.update(visible=True), ) user_id = user_info.username if user_info is not None else str(uuid.uuid4()) log_entry = { "timestamp": datetime.now().isoformat(), "prompt": prompt, "blurb": blurb, "vote": vote, "user_id": user_id, "model": CHOSEN_MODEL, } with scheduler.lock: with dataset_file.open("a") as f: f.write(json.dumps(log_entry) + "\n") gr.Info("Thank you for voting!") return f"Logged: {vote} by user {user_id}", True, gr.Row.update(visible=False) tufte_theme = TufteInspired() # Create Gradio interface with gr.Blocks(theme=tufte_theme) as demo: gr.Markdown("

Would you read this book?

") gr.Markdown( """

Looking for your next summer read? Would you read a book based on this LLM generated blurb?
Your vote will be added to this Hugging Face dataset

""" ) login_btn = gr.LoginButton() with gr.Row(): generate_btn = gr.Button("Create a book", variant="primary") prompt_state = gr.State() blurb_output = gr.Markdown(label="Book blurb") has_voted = gr.State(False) with gr.Row() as voting_row: upvote_btn = gr.Button("👍 would read") downvote_btn = gr.Button("👎 wouldn't read") vote_output = gr.Textbox(label="Vote Status", interactive=False, visible=False) def generate_and_show(prompt): return "Generating...", False, gr.Row.update(visible=False) def show_voting_buttons(blurb): return blurb, False, gr.Row.update(visible=True) generate_btn.click(get_and_store_prompt, outputs=prompt_state).then( generate_and_show, inputs=prompt_state, outputs=[blurb_output, has_voted, voting_row], ).then(generate_blurb, inputs=prompt_state, outputs=blurb_output).then( show_voting_buttons, inputs=blurb_output, outputs=[blurb_output, has_voted, voting_row], ) upvote_btn.click( log_blurb_and_vote, inputs=[ prompt_state, blurb_output, gr.Textbox(value="upvote", visible=False), login_btn, has_voted, ], outputs=[vote_output, has_voted, voting_row], ) downvote_btn.click( log_blurb_and_vote, inputs=[ prompt_state, blurb_output, gr.Textbox(value="downvote", visible=False), login_btn, has_voted, ], outputs=[vote_output, has_voted, voting_row], ) if __name__ == "__main__": demo.launch(debug=True)