""" Module for transforming the raw data to training data. """ import json import os DATA_PATH = "./data/raw_data.json" def load_data(data_path: str): """route for loading the raw data""" with open( file=data_path, mode="r", encoding="utf-8", ) as data: data = json.load(data) return data # Original JSON data original_data = load_data(data_path=DATA_PATH) # Convert to desired format formatted_data = [] # Create a dictionary to store data for each intent intent_data = {} for entry in original_data: # print(intent_data) intent = entry["intent"] if intent not in intent_data: intent_data[intent] = {"intent": intent, "patterns": [], "responses": []} intent_data[intent]["patterns"].append(entry["user"]) intent_data[intent]["responses"].append(entry["response"]) # Convert dictionary values to a list formatted_data = list(intent_data.values()) # Output the formatted data to a JSON file OUTPUT_FILE_NAME = "formatted_data.json" os.makedirs("./intent-recognition/data/", exist_ok=True) OUTPUT_FILE_PATH = os.path.join("./intent-recognition/data/", OUTPUT_FILE_NAME) with open( file=OUTPUT_FILE_PATH, mode="w", encoding="utf-8" ) as json_file: json.dump(formatted_data, json_file, indent=4) print(f"Formatted data has been written to {OUTPUT_FILE_PATH}")