File size: 925 Bytes
471bf00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import glob
import json

data_root = "data"


transcriptions = glob.glob(f"{data_root}/*/transcriptions.txt")
spk2id = {}
spk_id = 0
ms_transcriptions = open(f'{data_root}/transcriptions.txt', "w")
ms_train_set = open(f'{data_root}/train.list', "w")
ms_test_set = open(f'{data_root}/test.list', "w")
for transcription in transcriptions:
    spk = transcription.split("/")[-2]
    spk2id[spk] = spk_id
    spk_id += 1
    for line in open(transcription).readlines():
        ms_transcriptions.write(f"{spk}/{line}")
    for line in open(transcription.replace("transcriptions.txt", "train.list")):
        ms_train_set.write(f"{spk}/{line}")
    for line in open(transcription.replace("transcriptions.txt", "test.list")):
        ms_test_set.write(f"{spk}/{line}")

ms_transcriptions.close()
ms_train_set.close()
ms_test_set.close()
print("请手动将说话人与id的映射粘贴至config文件中")
print(json.dumps(spk2id))