File size: 1,359 Bytes
65976bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
"""
Module for transforming the raw data to training data.
"""
import json
import os

DATA_PATH = "./data/raw_data.json"

def load_data(data_path: str):
    """route for loading the raw data"""
    with open(
        file=data_path,
        mode="r",
        encoding="utf-8",
    ) as data:
        data = json.load(data)
    return data

# Original JSON data
original_data = load_data(data_path=DATA_PATH)

# Convert to desired format
formatted_data = []

# Create a dictionary to store data for each intent
intent_data = {}

for entry in original_data:
    # print(intent_data)
    intent = entry["intent"]
    if intent not in intent_data:
        intent_data[intent] = {"intent": intent, "patterns": [], "responses": []}

    intent_data[intent]["patterns"].append(entry["user"])
    intent_data[intent]["responses"].append(entry["response"])

# Convert dictionary values to a list
formatted_data = list(intent_data.values())

# Output the formatted data to a JSON file
OUTPUT_FILE_NAME = "formatted_data.json"
os.makedirs("./intent-recognition/data/", exist_ok=True)
OUTPUT_FILE_PATH = os.path.join("./intent-recognition/data/", OUTPUT_FILE_NAME)

with open(
    file=OUTPUT_FILE_PATH,
    mode="w",
    encoding="utf-8"
) as json_file:
    json.dump(formatted_data, json_file, indent=4)

print(f"Formatted data has been written to {OUTPUT_FILE_PATH}")