Spaces:
Runtime error
Runtime error
File size: 1,441 Bytes
65976bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
from datasets import load_dataset
DATA_PATH = "Account_Balance-main/Account Balance Queries/Account _Balance.json"
def dash_line():
print("-"*100)
def transform_raw_data(file_path: str):
dash_line()
print("Loading Data")
if file_path.endswith(".json") or file_path.endswith(".jsonl"):
data = load_dataset("json", data_files=file_path, split="train")
dash_line()
print("Transforming Data")
keyword_AI = "[|AI|]"
keyword_user = "[|User|]"
formatted_data = []
for feature in data:
row = {}
row["topic"] = feature['topic']
row["conversation"] = []
conversation = feature['input'].split("\n")
for entries in conversation:
if keyword_user in entries:
row["conversation"].append(
{
"role": "user",
"content": entries.replace(keyword_user, "").strip()
}
)
elif keyword_AI in entries:
row["conversation"].append(
{
"role": "AI",
"content": entries.replace(keyword_AI, "").strip()
}
)
formatted_data.append(row)
dash_line()
print("Data transformation completed!")
dash_line()
return formatted_data
if __name__ == "__main__":
transform_raw_data(DATA_PATH)
|