Spaces:
Runtime error
Runtime error
""" | |
Module for transforming the raw data to training data. | |
""" | |
import json | |
import os | |
DATA_PATH = "./data/raw_data.json" | |
def load_data(data_path: str): | |
"""route for loading the raw data""" | |
with open( | |
file=data_path, | |
mode="r", | |
encoding="utf-8", | |
) as data: | |
data = json.load(data) | |
return data | |
# Original JSON data | |
original_data = load_data(data_path=DATA_PATH) | |
# Convert to desired format | |
formatted_data = [] | |
# Create a dictionary to store data for each intent | |
intent_data = {} | |
for entry in original_data: | |
# print(intent_data) | |
intent = entry["intent"] | |
if intent not in intent_data: | |
intent_data[intent] = {"intent": intent, "patterns": [], "responses": []} | |
intent_data[intent]["patterns"].append(entry["user"]) | |
intent_data[intent]["responses"].append(entry["response"]) | |
# Convert dictionary values to a list | |
formatted_data = list(intent_data.values()) | |
# Output the formatted data to a JSON file | |
OUTPUT_FILE_NAME = "formatted_data.json" | |
os.makedirs("./intent-recognition/data/", exist_ok=True) | |
OUTPUT_FILE_PATH = os.path.join("./intent-recognition/data/", OUTPUT_FILE_NAME) | |
with open( | |
file=OUTPUT_FILE_PATH, | |
mode="w", | |
encoding="utf-8" | |
) as json_file: | |
json.dump(formatted_data, json_file, indent=4) | |
print(f"Formatted data has been written to {OUTPUT_FILE_PATH}") | |