xtrade_bot / archived /intent_recognition /data_transformation.py
Josh-Ola's picture
Upload folder using huggingface_hub
65976bc verified
raw
history blame contribute delete
No virus
1.36 kB
"""
Module for transforming the raw data to training data.
"""
import json
import os
DATA_PATH = "./data/raw_data.json"
def load_data(data_path: str):
"""route for loading the raw data"""
with open(
file=data_path,
mode="r",
encoding="utf-8",
) as data:
data = json.load(data)
return data
# Original JSON data
original_data = load_data(data_path=DATA_PATH)
# Convert to desired format
formatted_data = []
# Create a dictionary to store data for each intent
intent_data = {}
for entry in original_data:
# print(intent_data)
intent = entry["intent"]
if intent not in intent_data:
intent_data[intent] = {"intent": intent, "patterns": [], "responses": []}
intent_data[intent]["patterns"].append(entry["user"])
intent_data[intent]["responses"].append(entry["response"])
# Convert dictionary values to a list
formatted_data = list(intent_data.values())
# Output the formatted data to a JSON file
OUTPUT_FILE_NAME = "formatted_data.json"
os.makedirs("./intent-recognition/data/", exist_ok=True)
OUTPUT_FILE_PATH = os.path.join("./intent-recognition/data/", OUTPUT_FILE_NAME)
with open(
file=OUTPUT_FILE_PATH,
mode="w",
encoding="utf-8"
) as json_file:
json.dump(formatted_data, json_file, indent=4)
print(f"Formatted data has been written to {OUTPUT_FILE_PATH}")