Spaces:
Runtime error
Runtime error
from typing import Any, List | |
import gradio as gr | |
from toolz import concat | |
import httpx | |
import plotly.express as px | |
import polars as pl | |
from pathlib import Path | |
from datasets import load_dataset | |
from cachetools import TTLCache, cached | |
from datetime import datetime, timedelta | |
from datasets import Dataset | |
import os | |
token = os.environ["HUGGINGFACE_TOKEN"] | |
librarian_bot_avatar = "https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg?w=200&h=200&f=face" | |
def get_hub_community_activity(user: str) -> List[Any]: | |
all_data = [] | |
for i in range(1, 2000, 100): | |
r = httpx.get( | |
f"https://huggingface.co/api/recent-activity?limit=100&type=discussion&skip={i}&user={user}" | |
) | |
activity = r.json()["recentActivity"] | |
all_data.append(activity) | |
return list(concat(all_data)) | |
def parse_date_time(date_time: str) -> datetime: | |
return datetime.strptime(date_time, "%Y-%m-%dT%H:%M:%S.%fZ") | |
def parse_pr_data(data): | |
data = data["discussionData"] | |
createdAt = parse_date_time(data["createdAt"]) | |
pr_number = data["num"] | |
status = data["status"] | |
repo_id = data["repo"]["name"] | |
repo_type = data["repo"]["type"] | |
isPullRequest = data["isPullRequest"] | |
return { | |
"createdAt": createdAt, | |
"pr_number": pr_number, | |
"status": status, | |
"repo_id": repo_id, | |
"type": repo_type, | |
"isPullRequest": isPullRequest, | |
} | |
def update_data(): | |
previous_df = pl.DataFrame( | |
load_dataset("librarian-bot/stats", split="train").data.table | |
) | |
data = get_hub_community_activity("librarian-bot") | |
data = [parse_pr_data(d) for d in data] | |
update_df = pl.DataFrame(data) | |
df = pl.concat([previous_df, update_df]).unique() | |
Dataset(df.to_arrow()).push_to_hub("librarian-bot/stats", token=token) | |
return df | |
# def get_pr_status(): | |
# df = update_data() | |
# df = df.filter(pl.col("isPullRequest") is True) | |
# return df.select(pl.col("status").value_counts()) | |
# # return frequencies(x["status"] for x in pr_data) | |
def create_pie(): | |
df = update_data() | |
df = df.filter(pl.col("isPullRequest") is True) | |
df = df["status"].value_counts().to_pandas() | |
fig = px.pie(df, values="counts", names="status", template="seaborn") | |
return gr.Plot(fig) | |
def group_status_by_pr_number(): | |
all_data = get_hub_community_activity("librarian-bot") | |
all_data = [parse_pr_data(d) for d in all_data] | |
return ( | |
pl.DataFrame(all_data).groupby("status").agg(pl.mean("pr_number")).to_pandas() | |
) | |
def plot_over_time(): | |
all_data = get_hub_community_activity("librarian-bot") | |
all_data = [parse_pr_data(d) for d in all_data] | |
df = pl.DataFrame(all_data).with_columns(pl.col("createdAt").cast(pl.Date)) | |
df = df.pivot( | |
values=["status"], | |
index=["createdAt"], | |
columns=["status"], | |
aggregate_function="count", | |
) | |
df = df.fill_null(0) | |
df = df.with_columns(pl.sum(["open", "closed", "merged"])).sort("createdAt") | |
df = df.to_pandas().set_index("createdAt").cumsum() | |
return px.line(df, x=df.index, y=[c for c in df.columns if c != "sum"]) | |
with gr.Blocks() as demo: | |
# frequencies = get_pr_status("librarian-bot") | |
gr.HTML(Path("description.html").read_text()) | |
# gr.Markdown(f"Total PRs opened: {sum(frequencies.values())}") | |
with gr.Column(): | |
gr.Markdown("## Pull requests Status") | |
gr.Markdown( | |
"The below pie chart shows the percentage of pull requests made by" | |
" librarian bot that are open, closed or merged" | |
) | |
create_pie() | |
with gr.Column(): | |
gr.Markdown("Pull requests opened, closed and merged over time (cumulative)") | |
gr.Plot(plot_over_time()) | |
with gr.Column(): | |
gr.Markdown("## Pull requests status by PR number") | |
gr.DataFrame(group_status_by_pr_number()) | |
demo.launch(debug=True) | |