Spaces:
Running
Running
File size: 5,369 Bytes
b2e385a 88d9c40 2655ad8 b2e385a 5a3f658 b2e385a a328dd2 2655ad8 19dd1b9 dce9e22 5a3f658 b2e385a 5a3f658 b2e385a b0ec887 dce9e22 b0ec887 f921425 dce9e22 f921425 dce9e22 88d9c40 b0ec887 b2e385a f921425 b0ec887 b2e385a 2655ad8 f921425 2655ad8 b0ec887 2655ad8 5a3f658 2655ad8 4e8ec3f b0ec887 88d9c40 dce9e22 4e8ec3f dce9e22 5a3f658 4e8ec3f b0ec887 2655ad8 203a7f7 5a3f658 88d9c40 dce9e22 f921425 dce9e22 b2e385a f921425 dce9e22 88d9c40 f921425 dce9e22 b2e385a f921425 dce9e22 b4fc14a 4e8ec3f f921425 b2e385a f921425 dce9e22 f921425 4e8ec3f 2655ad8 f921425 b2e385a f921425 4e8ec3f 2655ad8 f921425 b2e385a f921425 4e8ec3f 88d9c40 b0ec887 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
import hashlib
import json
import os
import random
import uuid
from datetime import datetime
from pathlib import Path
import gradio as gr
from huggingface_hub import CommitScheduler, InferenceClient, get_token, login
from openai import OpenAI
from prompts import basic_prompt, detailed_genre_description_prompt
from theme import TufteInspired
# Ensure you're logged in to Hugging Face
login(os.getenv("HF_TOKEN"))
client = OpenAI(
base_url="https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-70B-Instruct/v1",
api_key=get_token(),
)
# Set up dataset storage
dataset_folder = Path("dataset")
dataset_folder.mkdir(exist_ok=True)
# Function to get the latest dataset file
def get_latest_dataset_file():
files = list(dataset_folder.glob("data_*.jsonl"))
return max(files, key=os.path.getctime) if files else None
# Check for existing dataset and create or append to it
if latest_file := get_latest_dataset_file():
dataset_file = latest_file
print(f"Appending to existing dataset file: {dataset_file}")
else:
dataset_file = dataset_folder / f"data_{uuid.uuid4()}.jsonl"
print(f"Creating new dataset file: {dataset_file}")
# Set up CommitScheduler for dataset uploads
repo_id = "davanstrien/summer-reading-preferences"
scheduler = CommitScheduler(
repo_id=repo_id,
repo_type="dataset",
folder_path=dataset_folder,
path_in_repo="data",
every=1, # Upload every 5 minutes
)
# Add a dictionary to store votes
votes = {}
def generate_prompt():
if random.choice([True, False]):
return detailed_genre_description_prompt()
else:
return basic_prompt()
def get_and_store_prompt():
prompt = generate_prompt()
print(prompt) # Keep this for debugging
return prompt
def generate_blurb(prompt):
max_tokens = random.randint(100, 1000)
chat_completion = client.chat.completions.create(
model="tgi",
messages=[
{"role": "user", "content": prompt},
],
stream=True,
max_tokens=max_tokens,
)
full_text = ""
for message in chat_completion:
full_text += message.choices[0].delta.content
yield full_text
def generate_vote_id(user_id, blurb):
# Create a unique identifier for this vote opportunity
return hashlib.md5(f"{user_id}:{blurb}".encode()).hexdigest()
# Modified log_blurb_and_vote function
def log_blurb_and_vote(prompt, blurb, vote, user_info: gr.OAuthProfile | None, *args):
user_id = user_info.username if user_info is not None else str(uuid.uuid4())
vote_id = generate_vote_id(user_id, blurb)
if vote_id in votes:
return "You've already voted on this blurb!"
votes[vote_id] = vote
log_entry = {
"timestamp": datetime.now().isoformat(),
"prompt": prompt,
"blurb": blurb,
"vote": vote,
"user_id": user_id,
}
with scheduler.lock:
with dataset_file.open("a") as f:
f.write(json.dumps(log_entry) + "\n")
gr.Info("Thank you for voting! Your feedback will be synced to the dataset.")
return f"Logged: {vote} by user {user_id}"
# Create custom theme
tufte_theme = TufteInspired()
# Create Gradio interface
with gr.Blocks(theme=tufte_theme) as demo:
gr.Markdown("<h1 style='text-align: center;'>Would you read this book?</h1>")
gr.Markdown(
"""<p style='text-align: center;'>Looking for your next summer read?
Would you read a book based on this LLM generated blurb? <br> Your vote will be added to <a href="https://huggingface.co/datasets/your-username/your-dataset-repo">this</a> Hugging Face dataset</p>"""
)
# Add the login button
with gr.Row():
login_btn = gr.LoginButton(size="sm")
with gr.Row():
generate_btn = gr.Button("Create a book", variant="primary")
prompt_state = gr.State()
blurb_output = gr.Markdown(label="Book blurb")
user_state = gr.State()
with gr.Row(visible=False) as voting_row:
upvote_btn = gr.Button("π would read")
downvote_btn = gr.Button("π wouldn't read")
vote_output = gr.Textbox(label="Vote Status", interactive=False, visible=False)
def generate_and_show(prompt, user_info):
# Reset votes for new blurb
global votes
votes = {}
return "Generating...", gr.Row.update(visible=False), user_info
def show_voting_buttons(blurb):
return blurb, gr.Row.update(visible=True)
generate_btn.click(get_and_store_prompt, outputs=prompt_state).then(
generate_and_show,
inputs=[prompt_state, login_btn],
outputs=[blurb_output, voting_row, user_state],
).then(generate_blurb, inputs=prompt_state, outputs=blurb_output).then(
show_voting_buttons, inputs=blurb_output, outputs=[blurb_output, voting_row]
)
upvote_btn.click(
log_blurb_and_vote,
inputs=[
prompt_state,
blurb_output,
gr.Textbox(value="upvote", visible=False),
user_state,
],
outputs=vote_output,
)
downvote_btn.click(
log_blurb_and_vote,
inputs=[
prompt_state,
blurb_output,
gr.Textbox(value="downvote", visible=False),
user_state,
],
outputs=vote_output,
)
if __name__ == "__main__":
demo.launch(debug=True)
|