Spaces:
Runtime error
Runtime error
File size: 1,359 Bytes
65976bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
"""
Module for transforming the raw data to training data.
"""
import json
import os
DATA_PATH = "./data/raw_data.json"
def load_data(data_path: str):
"""route for loading the raw data"""
with open(
file=data_path,
mode="r",
encoding="utf-8",
) as data:
data = json.load(data)
return data
# Original JSON data
original_data = load_data(data_path=DATA_PATH)
# Convert to desired format
formatted_data = []
# Create a dictionary to store data for each intent
intent_data = {}
for entry in original_data:
# print(intent_data)
intent = entry["intent"]
if intent not in intent_data:
intent_data[intent] = {"intent": intent, "patterns": [], "responses": []}
intent_data[intent]["patterns"].append(entry["user"])
intent_data[intent]["responses"].append(entry["response"])
# Convert dictionary values to a list
formatted_data = list(intent_data.values())
# Output the formatted data to a JSON file
OUTPUT_FILE_NAME = "formatted_data.json"
os.makedirs("./intent-recognition/data/", exist_ok=True)
OUTPUT_FILE_PATH = os.path.join("./intent-recognition/data/", OUTPUT_FILE_NAME)
with open(
file=OUTPUT_FILE_PATH,
mode="w",
encoding="utf-8"
) as json_file:
json.dump(formatted_data, json_file, indent=4)
print(f"Formatted data has been written to {OUTPUT_FILE_PATH}")
|