Spaces:
Runtime error
Runtime error
from datasets import load_dataset | |
DATA_PATH = "Account_Balance-main/Account Balance Queries/Account _Balance.json" | |
def dash_line(): | |
print("-"*100) | |
def transform_raw_data(file_path: str): | |
dash_line() | |
print("Loading Data") | |
if file_path.endswith(".json") or file_path.endswith(".jsonl"): | |
data = load_dataset("json", data_files=file_path, split="train") | |
dash_line() | |
print("Transforming Data") | |
keyword_AI = "[|AI|]" | |
keyword_user = "[|User|]" | |
formatted_data = [] | |
for feature in data: | |
row = {} | |
row["topic"] = feature['topic'] | |
row["conversation"] = [] | |
conversation = feature['input'].split("\n") | |
for entries in conversation: | |
if keyword_user in entries: | |
row["conversation"].append( | |
{ | |
"role": "user", | |
"content": entries.replace(keyword_user, "").strip() | |
} | |
) | |
elif keyword_AI in entries: | |
row["conversation"].append( | |
{ | |
"role": "AI", | |
"content": entries.replace(keyword_AI, "").strip() | |
} | |
) | |
formatted_data.append(row) | |
dash_line() | |
print("Data transformation completed!") | |
dash_line() | |
return formatted_data | |
if __name__ == "__main__": | |
transform_raw_data(DATA_PATH) | |