davanstrien HF staff commited on
Commit
58e96b2
1 Parent(s): 55e6e0d
Files changed (2) hide show
  1. app.py +29 -27
  2. requirements.txt +2 -0
app.py CHANGED
@@ -1,23 +1,20 @@
 
 
 
1
  from typing import Any, List
 
2
  import gradio as gr
3
- from toolz import concat
 
4
  import plotly.express as px
5
  import polars as pl
6
- from datasets import load_dataset
7
  from cachetools import TTLCache, cached
8
- from datetime import datetime, timedelta
9
- from datasets import Dataset
10
- import os
11
- from functools import lru_cache
12
- import pandas as pd
13
- from toolz import frequencies
14
  from dotenv import load_dotenv
15
- from typing import List, Any
16
- from toolz import concat
17
  from httpx import Client
 
18
  from tqdm.auto import tqdm
19
 
20
-
21
  load_dotenv()
22
  token = os.environ["HUGGINGFACE_TOKEN"]
23
  user_agent = os.environ["USER_AGENT"]
@@ -28,27 +25,32 @@ assert user_agent
28
  assert user
29
 
30
  headers = {"user-agent": user_agent, "authorization": f"Bearer {token}"}
31
-
32
- client = Client(headers=headers, http2=True)
33
 
34
 
35
- def get_hub_community_activity(user: str) -> List[Any]:
 
36
  with tqdm() as pbar:
37
  all_data = []
38
  i = 1
39
- while True:
40
- r = client.get(
41
- f"https://huggingface.co/api/recent-activity?limit=100&type=discussion&skip={i}&user={user}",
42
- )
43
- activity = r.json()["recentActivity"]
44
- if not activity:
45
- break
46
- all_data.append(activity)
47
- if len(all_data) % 1000 == 0:
48
- # print(f"Length of all_data: {len(all_data)}")
49
- pbar.write(f"Length of all_data: {len(all_data)}")
50
- i += 100
51
- pbar.update(100)
 
 
 
 
52
 
53
  return list(concat(all_data))
54
 
 
1
+ import os
2
+ from datetime import datetime, timedelta
3
+ from functools import lru_cache
4
  from typing import Any, List
5
+
6
  import gradio as gr
7
+ import httpx
8
+ import pandas as pd
9
  import plotly.express as px
10
  import polars as pl
 
11
  from cachetools import TTLCache, cached
12
+ from datasets import Dataset, load_dataset
 
 
 
 
 
13
  from dotenv import load_dotenv
 
 
14
  from httpx import Client
15
+ from toolz import concat, frequencies
16
  from tqdm.auto import tqdm
17
 
 
18
  load_dotenv()
19
  token = os.environ["HUGGINGFACE_TOKEN"]
20
  user_agent = os.environ["USER_AGENT"]
 
25
  assert user
26
 
27
  headers = {"user-agent": user_agent, "authorization": f"Bearer {token}"}
28
+ limits = httpx.Limits(max_keepalive_connections=10, max_connections=20)
29
+ client = Client(headers=headers, http2=True, limits=limits, timeout=60.0)
30
 
31
 
32
+ @lru_cache(maxsize=None)
33
+ def get_hub_community_activity(user: str, max: int = 35_000) -> List[Any]:
34
  with tqdm() as pbar:
35
  all_data = []
36
  i = 1
37
+ while i <= max:
38
+ try:
39
+ r = client.get(
40
+ f"https://huggingface.co/api/recent-activity?limit=100&type=discussion&skip={i}&user={user}",
41
+ )
42
+ activity = r.json()["recentActivity"]
43
+ if not activity:
44
+ break
45
+ all_data.append(activity)
46
+ if len(all_data) % 1000 == 0:
47
+ # print(f"Length of all_data: {len(all_data)}")
48
+ pbar.write(f"Length of all_data: {len(all_data)}")
49
+ i += 100
50
+ pbar.update(100)
51
+ except Exception as e:
52
+ print(e)
53
+ continue
54
 
55
  return list(concat(all_data))
56
 
requirements.txt CHANGED
@@ -25,6 +25,8 @@ attrs==22.2.0
25
  # via
26
  # aiohttp
27
  # jsonschema
 
 
28
  cachetools==5.3.0
29
  # via -r requirements.in
30
  certifi==2022.12.7
 
25
  # via
26
  # aiohttp
27
  # jsonschema
28
+ backoff==2.2.1
29
+ # via -r requirements.in
30
  cachetools==5.3.0
31
  # via -r requirements.in
32
  certifi==2022.12.7