librarian-bot commited on
Commit
b9692b3
1 Parent(s): 1722c11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -24
app.py CHANGED
@@ -1,18 +1,22 @@
1
  from typing import Any, List
2
  import gradio as gr
3
- from toolz import concat, frequencies
4
  import httpx
5
- from functools import lru_cache
6
- import pandas as pd
7
  import plotly.express as px
8
  import polars as pl
9
  from pathlib import Path
10
- from datetime import datetime
 
 
 
 
 
 
11
 
12
  librarian_bot_avatar = "https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg?w=200&h=200&f=face"
13
 
14
 
15
- @lru_cache(maxsize=512)
16
  def get_hub_community_activity(user: str) -> List[Any]:
17
  all_data = []
18
  for i in range(1, 2000, 100):
@@ -24,22 +28,6 @@ def get_hub_community_activity(user: str) -> List[Any]:
24
  return list(concat(all_data))
25
 
26
 
27
- @lru_cache(maxsize=512)
28
- def get_pr_status(user: str):
29
- all_data = get_hub_community_activity(user)
30
- pr_data = (
31
- x["discussionData"] for x in all_data if x["discussionData"]["isPullRequest"]
32
- )
33
- return frequencies(x["status"] for x in pr_data)
34
-
35
-
36
- def create_pie():
37
- frequencies = get_pr_status("librarian-bot")
38
- df = pd.DataFrame({"status": frequencies.keys(), "number": frequencies.values()})
39
- fig = px.pie(df, values="number", names="status", template="seaborn")
40
- return gr.Plot(fig)
41
-
42
-
43
  def parse_date_time(date_time: str) -> datetime:
44
  return datetime.strptime(date_time, "%Y-%m-%dT%H:%M:%S.%fZ")
45
 
@@ -50,14 +38,46 @@ def parse_pr_data(data):
50
  pr_number = data["num"]
51
  status = data["status"]
52
  repo_id = data["repo"]["name"]
 
 
53
  return {
54
  "createdAt": createdAt,
55
  "pr_number": pr_number,
56
  "status": status,
57
  "repo_id": repo_id,
 
 
58
  }
59
 
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def group_status_by_pr_number():
62
  all_data = get_hub_community_activity("librarian-bot")
63
  all_data = [parse_pr_data(d) for d in all_data]
@@ -83,13 +103,14 @@ def plot_over_time():
83
 
84
 
85
  with gr.Blocks() as demo:
86
- frequencies = get_pr_status("librarian-bot")
87
  gr.HTML(Path("description.html").read_text())
88
- gr.Markdown(f"Total PRs opened: {sum(frequencies.values())}")
89
  with gr.Column():
90
  gr.Markdown("## Pull requests Status")
91
  gr.Markdown(
92
- "The below pie chart shows the percentage of pull requests made by librarian bot that are open, closed or merged"
 
93
  )
94
  create_pie()
95
  with gr.Column():
 
1
  from typing import Any, List
2
  import gradio as gr
3
+ from toolz import concat
4
  import httpx
 
 
5
  import plotly.express as px
6
  import polars as pl
7
  from pathlib import Path
8
+ from datasets import load_dataset
9
+ from cachetools import TTLCache, cached
10
+ from datetime import datetime, timedelta
11
+ from datasets import Dataset
12
+ import os
13
+
14
+ token = os.environ["HUGGINGFACE_TOKEN"]
15
 
16
  librarian_bot_avatar = "https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg?w=200&h=200&f=face"
17
 
18
 
19
+ @cached(cache=TTLCache(maxsize=1000, ttl=timedelta(minutes=10), timer=datetime.now))
20
  def get_hub_community_activity(user: str) -> List[Any]:
21
  all_data = []
22
  for i in range(1, 2000, 100):
 
28
  return list(concat(all_data))
29
 
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  def parse_date_time(date_time: str) -> datetime:
32
  return datetime.strptime(date_time, "%Y-%m-%dT%H:%M:%S.%fZ")
33
 
 
38
  pr_number = data["num"]
39
  status = data["status"]
40
  repo_id = data["repo"]["name"]
41
+ repo_type = data["repo"]["type"]
42
+ isPullRequest = data["isPullRequest"]
43
  return {
44
  "createdAt": createdAt,
45
  "pr_number": pr_number,
46
  "status": status,
47
  "repo_id": repo_id,
48
+ "type": repo_type,
49
+ "isPullRequest": isPullRequest,
50
  }
51
 
52
 
53
+ @cached(cache=TTLCache(maxsize=1000, ttl=timedelta(minutes=30), timer=datetime.now))
54
+ def update_data():
55
+ previous_df = pl.DataFrame(
56
+ load_dataset("librarian-bot/stats", split="train").data.table
57
+ )
58
+ data = get_hub_community_activity("librarian-bot")
59
+ data = [parse_pr_data(d) for d in data]
60
+ update_df = pl.DataFrame(data)
61
+ df = pl.concat([previous_df, update_df]).unique()
62
+ Dataset(df.to_arrow()).push_to_hub("librarian-bot/stats", token=token)
63
+ return df
64
+
65
+
66
+ # def get_pr_status():
67
+ # df = update_data()
68
+ # df = df.filter(pl.col("isPullRequest") is True)
69
+ # return df.select(pl.col("status").value_counts())
70
+ # # return frequencies(x["status"] for x in pr_data)
71
+
72
+
73
+ def create_pie():
74
+ df = update_data()
75
+ df = df.filter(pl.col("isPullRequest") is True)
76
+ df = df["status"].value_counts().to_pandas()
77
+ fig = px.pie(df, values="counts", names="status", template="seaborn")
78
+ return gr.Plot(fig)
79
+
80
+
81
  def group_status_by_pr_number():
82
  all_data = get_hub_community_activity("librarian-bot")
83
  all_data = [parse_pr_data(d) for d in all_data]
 
103
 
104
 
105
  with gr.Blocks() as demo:
106
+ # frequencies = get_pr_status("librarian-bot")
107
  gr.HTML(Path("description.html").read_text())
108
+ # gr.Markdown(f"Total PRs opened: {sum(frequencies.values())}")
109
  with gr.Column():
110
  gr.Markdown("## Pull requests Status")
111
  gr.Markdown(
112
+ "The below pie chart shows the percentage of pull requests made by"
113
+ " librarian bot that are open, closed or merged"
114
  )
115
  create_pie()
116
  with gr.Column():