Spaces:
Sleeping
Sleeping
from io import StringIO | |
import gradio as gr | |
import pandas as pd | |
from datasets import ClassLabel, Dataset, Image | |
from httpx import Client | |
client = Client() | |
USER_DATA = {} | |
def update_user_data(api_key, space_url, hub_api_key, hub_dataset_id): | |
USER_DATA["api_key"] = api_key | |
USER_DATA["space_url"] = space_url | |
USER_DATA["hub_api_key"] = hub_api_key | |
USER_DATA["hub_dataset_id"] = hub_dataset_id | |
def check_user_data(): | |
return bool(USER_DATA.get("api_key") and USER_DATA.get("space_url")) | |
# def list_projects(): | |
# headers = {"Authorization": f'Token {USER_DATA["api_key"]}'} | |
# resp = client.get( | |
# "https://davanstrien-label-studio.hf.space/api/projects/", headers=headers | |
# ) | |
# return resp.json() | |
# def get_column_names(): | |
# headers = {"Authorization": f'Token {USER_DATA["api_key"]}'} | |
# print(headers) | |
# # resp = client.get( | |
# # "http://davanstrien-label-studio.hf.space/api/projects/1/export?exportType=CSV", | |
# # headers=headers, | |
# # ) | |
# resp = requests.get( | |
# "http://davanstrien-label-studio.hf.space/api/projects/1/export?exportType=CSV", | |
# headers=headers, | |
# ) | |
# return pd.read_csv(StringIO(resp.text)).columns.tolist() | |
def push_annotations_to_hub(project_id, input_column, input_column_type, label_column): | |
headers = {"Authorization": f'Token {USER_DATA["api_key"]}'} | |
resp = client.get( | |
f"{USER_DATA['space_url']}/api/projects/{int(project_id)}/export?exportType=CSV", | |
headers=headers, | |
) | |
df = pd.read_csv(StringIO(resp.text)) | |
print(df.head(1)) | |
labels = df[label_column].unique().tolist() | |
ds = Dataset.from_pandas(df) | |
ds = ds.cast_column(label_column, ClassLabel(names=labels)) | |
if input_column_type == "image": | |
ds = ds.cast_column(input_column, Image()) | |
ds.push_to_hub(USER_DATA["hub_dataset_id"], token=USER_DATA["hub_api_key"]) | |
return ds.to_pandas().head(5) | |
with gr.Blocks() as demo: | |
gr.Markdown("# Push label studio datasets to the hub") | |
gr.Markdown( | |
"This is a proof of concept app which provides a GUI for exporting data from" | |
" Label Studio and pushing the loaded dataset to the Hugging Face Hub" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Row(): | |
gr.Markdown("## Label Studio details") | |
with gr.Row(): | |
gr.Markdown( | |
"Enter your Label Studio API key, you can find this under settings." | |
) | |
with gr.Row(): | |
API_KEY = gr.Textbox( | |
type="password", | |
label="Label Studio API Key", | |
) | |
with gr.Row(): | |
with gr.Row(): | |
gr.Markdown( | |
"Space URL, this can be found by clicking on the three dots" | |
" button on your space and copying the URL shown after clicking" | |
" the Embed Space button" | |
) | |
with gr.Row(): | |
SPACE_URL = gr.Textbox( | |
"e.g. https://davanstrien-label-studio.hf.space/", | |
label="Space URL", | |
placeholder="https://space.example.com", | |
) | |
with gr.Column(): | |
gr.Markdown("## Hub Dataset info") | |
gr.Markdown( | |
"""Enter a Hub [API key](https://huggingface.co/settings/tokens) with write access and the name you would like to use for your dataset""" | |
) | |
HUB_API_KEY = gr.Textbox( | |
type="password", | |
label="Hub API Key", | |
) | |
with gr.Row(): | |
gr.Markdown("Name of the dataset you would like to create") | |
with gr.Row(): | |
HUB_DATASET_ID = gr.Textbox( | |
"e.g. davanstrien/dataset_name", | |
label="Dataset name", | |
placeholder="https://space.example.com", | |
) | |
button = gr.Button("Submit details") | |
button.click(update_user_data, [API_KEY, SPACE_URL, HUB_API_KEY, HUB_DATASET_ID]) | |
with gr.Row(): | |
project_id = gr.Number(1, label="Project ID") | |
input_column = gr.Textbox("text", type="text", label="Input column") | |
input_column_type = gr.Dropdown( | |
choices=["text", "image"], label="Input column type", value="text" | |
) | |
label_column = gr.Textbox("choice", type="text", label="Label column") | |
button = gr.Button("Push annotations to Hub") | |
with gr.Row(): | |
gr.Markdown("## Preview of your dataset") | |
with gr.Row(): | |
preview = gr.DataFrame() | |
button.click( | |
push_annotations_to_hub, | |
[ | |
project_id, | |
input_column, | |
input_column_type, | |
label_column, | |
], | |
preview, | |
) | |
demo.launch(debug=True) | |