smellslikeml commited on
Commit
a74e89c
1 Parent(s): 0cf2065

initial commit

Browse files
Files changed (5) hide show
  1. README.md +8 -0
  2. app.py +4 -0
  3. requirements.txt +9 -0
  4. sheet_extractor.py +92 -0
  5. tool_config.json +6 -0
README.md CHANGED
@@ -10,4 +10,12 @@ pinned: false
10
  license: mit
11
  ---
12
 
 
 
 
 
 
 
 
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
10
  license: mit
11
  ---
12
 
13
+ ## Getting Started
14
+
15
+ Please configure your OpenAI API key as an environment variable:
16
+ ```
17
+ export OPENAI_API_KEY="yout-key-here"
18
+ ```
19
+ And ensure you have your `credentials.json` in your working directory. More info [here](https://developers.google.com/sheets/api/quickstart/python)
20
+
21
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from transformers.tools.base import launch_gradio_demo
2
+ from sheet_extractor import GoogleSheetExtractorTool
3
+
4
+ launch_gradio_demo(GoogleSheetExtractorTool)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ openai
2
+ ortools
3
+ requests
4
+ gspread
5
+ huggingface_hub
6
+ google-api-python-client
7
+ google-auth-httplib2
8
+ google-auth-oauthlib
9
+ transformers
sheet_extractor.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import json
3
+ import regex
4
+ import inspect
5
+ import gspread
6
+ import guidance
7
+ from ast import literal_eval
8
+ from transformers import Tool
9
+ from oauth2client.service_account import ServiceAccountCredentials
10
+ from googleapiclient.discovery import build
11
+ from google.oauth2.credentials import Credentials
12
+
13
+ from google.auth.transport.requests import Request
14
+ from google.oauth2.credentials import Credentials
15
+ from google_auth_oauthlib.flow import InstalledAppFlow
16
+ from googleapiclient.discovery import build
17
+ from googleapiclient.errors import HttpError
18
+
19
+ from ortools.linear_solver import pywraplp
20
+
21
+ guidance.llm = guidance.llms.OpenAI("gpt-4")
22
+
23
+
24
+ structure_program = guidance(
25
+ '''
26
+ {{#user~}}
27
+ {{description}}
28
+ Help me extract args from the data blob to apply the following algorithm:
29
+ {{code}}
30
+
31
+ ----
32
+
33
+ {{~#each examples}}
34
+ Data Blob: {{this.input}}
35
+ Result: {{this.output}}
36
+ ---
37
+ {{~/each}}
38
+
39
+ Please help me extract the input values from a given data blob into a JSON.
40
+ Data Blob: {{data_blob}}
41
+ Result:
42
+ {{~/user}}
43
+
44
+ {{#assistant~}}
45
+ {{gen 'output'}}
46
+ {{~/assistant}}
47
+ ''')
48
+
49
+
50
+ class DataExtractorTool(Tool):
51
+ def __init__(self):
52
+ super().__init__()
53
+
54
+ def fetch_data(self, source_link: str) -> str:
55
+ """
56
+ Abstract method to fetch raw data from the source.
57
+ """
58
+ raise NotImplementedError("Subclasses must implement this method.")
59
+
60
+ def __call__(self, source_link: str) -> dict:
61
+ raw_data = self.fetch_data(source_link)
62
+ return raw_data
63
+
64
+
65
+ class GoogleSheetExtractorTool(DataExtractorTool):
66
+ name = "google_sheet_extractor_tool"
67
+ description = """
68
+ Tool to extract data from Google Sheets.
69
+ Input: source_link - str - a url or google sheets id
70
+ e.x. source_link='https://docs.google.com/spreadsheets/d/SHEETS_ID/'.
71
+ Output is a string.
72
+ """
73
+
74
+ def fetch_data(self, source_link: str) -> str:
75
+ # Set up the credentials
76
+ scope = ["https://www.googleapis.com/auth/spreadsheets.readonly"]
77
+ creds = Credentials.from_authorized_user_file('token.json', scope)
78
+ service = build('sheets', 'v4', credentials=creds)
79
+
80
+ # Open the spreadsheet and get all values
81
+ source_link = source_link.rstrip("/")
82
+ spreadsheet_id = source_link.split('/')[-1]
83
+ range_name = 'Sheet1!A1:Z1000'
84
+ sheet = service.spreadsheets()
85
+ result = sheet.values().get(spreadsheetId=spreadsheet_id,
86
+ range=range_name).execute()
87
+ data = result.get('values', [])
88
+
89
+ # Convert the data to a string representation
90
+ raw_data = '\n'.join([','.join(row) for row in data])
91
+ print(raw_data)
92
+ return raw_data
tool_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "description": "Tool to extract data from Google Sheets. Input: source_link - str - a url or google sheets id e.x. source_link='https://docs.google.com/spreadsheets/d/SHEETS_ID/'. Output is a string",
3
+ "name": "google_sheet_extractor_tool",
4
+ "tool_class": "sheet_extractor.GoogleSheetExtractorTool"
5
+ }
6
+