Skip to content

Commit 506917e

Browse files
committed
Full code commit
1 parent ae26881 commit 506917e

11 files changed

+136154
-0
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*.pyc
2+
.DS_store

SpeechDataGenerator.py

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Sat Jul 20 14:09:31 2019
5+
6+
@author: Krishna
7+
"""
8+
import numpy as np
9+
import torch
10+
from utils import utils
11+
12+
class SpeechDataGenerator():
13+
"""Speech dataset."""
14+
15+
def __init__(self, manifest, mode):
16+
"""
17+
Read the textfile and get the paths
18+
"""
19+
self.mode=mode
20+
self.audio_links = [line.rstrip('\n').split(' ')[0] for line in open(manifest)]
21+
self.labels = [int(line.rstrip('\n').split(' ')[1]) for line in open(manifest)]
22+
23+
24+
def __len__(self):
25+
return len(self.audio_links)
26+
27+
def __getitem__(self, idx):
28+
audio_link =self.audio_links[idx]
29+
class_id = self.labels[idx]
30+
#lang_label=lang_id[self.audio_links[idx].split('/')[-2]]
31+
spec = utils.load_data(audio_link,mode=self.mode)
32+
sample = {'features': torch.from_numpy(np.ascontiguousarray(spec)), 'labels': torch.from_numpy(np.ascontiguousarray(class_id))}
33+
return sample
34+
35+

datasets.py

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Sat May 30 19:09:44 2020
5+
6+
@author: krishna
7+
"""
8+
9+
import os
10+
import numpy as np
11+
import glob
12+
import argparse
13+
14+
15+
16+
class_ids ={'English':0,'Hindi':1,'Kannada':2,'Tamil':3,'Telugu':4,'Malayalam':5,'Marathi':6,'Gujarathi':7}
17+
def create_meta(files_list,store_loc,mode='train'):
18+
if not os.path.exists(store_loc):
19+
os.makedirs(store_loc)
20+
21+
if mode=='train':
22+
meta_store = store_loc+'/training.txt'
23+
fid = open(meta_store,'w')
24+
for filepath in files_list:
25+
fid.write(filepath+'\n')
26+
fid.close()
27+
elif mode=='test':
28+
meta_store = store_loc+'/testing.txt'
29+
fid = open(meta_store,'w')
30+
for filepath in files_list:
31+
fid.write(filepath+'\n')
32+
fid.close()
33+
elif mode=='validation':
34+
meta_store = store_loc+'/validation.txt'
35+
fid = open(meta_store,'w')
36+
for filepath in files_list:
37+
fid.write(filepath+'\n')
38+
fid.close()
39+
else:
40+
print('Error in creating meta files')
41+
42+
def extract_files(folder_path):
43+
all_lang_folders = sorted(glob.glob(folder_path+'/*/'))
44+
train_lists=[]
45+
test_lists = []
46+
val_lists=[]
47+
48+
for lang_folderpath in all_lang_folders:
49+
language = lang_folderpath.split('/')[-2]
50+
sub_folders = sorted(glob.glob(lang_folderpath+'/*/'))
51+
train_nums = len(sub_folders)-int(len(sub_folders)*0.1)-int(len(sub_folders)*0.05)
52+
for i in range(train_nums):
53+
sub_folder = sub_folders[i]
54+
all_files = sorted(glob.glob(sub_folder+'/*.wav'))
55+
for audio_filepath in all_files:
56+
to_write = audio_filepath+' '+str(class_ids[language])
57+
train_lists.append(to_write)
58+
59+
for i in range(train_nums,train_nums+int(len(sub_folders)*0.05)):
60+
sub_folder = sub_folders[i]
61+
all_files = sorted(glob.glob(sub_folder+'/*.wav'))
62+
for audio_filepath in all_files:
63+
to_write = audio_filepath+' '+str(class_ids[language])
64+
val_lists.append(to_write)
65+
66+
for i in range(train_nums+int(len(sub_folders)*0.05),len(sub_folders)):
67+
sub_folder = sub_folders[i]
68+
all_files = sorted(glob.glob(sub_folder+'/*.wav'))
69+
for audio_filepath in all_files:
70+
to_write = audio_filepath+' '+str(class_ids[language])
71+
test_lists.append(to_write)
72+
return train_lists,test_lists,val_lists
73+
74+
75+
if __name__ == '__main__':
76+
parser = argparse.ArgumentParser("Configuration for data preparation")
77+
parser.add_argument("--processed_data", default="/media/newhd/youtube_lid_data/download_data", type=str,help='Dataset path')
78+
parser.add_argument("--meta_store_path", default="meta/", type=str,help='Save directory after processing')
79+
config = parser.parse_args()
80+
train_list, test_list,val_lists = extract_files(config.processed_data)
81+
82+
create_meta(train_list,config.meta_store_path,mode='train')
83+
create_meta(test_list,config.meta_store_path,mode='test')
84+
create_meta(val_lists,config.meta_store_path,mode='validation')
85+

meta/testing.txt

+12,193
Large diffs are not rendered by default.

meta/training.txt

+116,509
Large diffs are not rendered by default.

meta/validation.txt

+6,981
Large diffs are not rendered by default.

models/tdnn.py

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
@author: cvqluu
5+
repo: https://github.com/cvqluu/TDNN
6+
"""
7+
8+
import torch.nn as nn
9+
import torch.nn.functional as F
10+
11+
class TDNN(nn.Module):
12+
13+
def __init__(
14+
self,
15+
input_dim=23,
16+
output_dim=512,
17+
context_size=5,
18+
stride=1,
19+
dilation=1,
20+
batch_norm=True,
21+
dropout_p=0.2
22+
):
23+
'''
24+
TDNN as defined by https://www.danielpovey.com/files/2015_interspeech_multisplice.pdf
25+
26+
Affine transformation not applied globally to all frames but smaller windows with local context
27+
28+
batch_norm: True to include batch normalisation after the non linearity
29+
30+
Context size and dilation determine the frames selected
31+
(although context size is not really defined in the traditional sense)
32+
For example:
33+
context size 5 and dilation 1 is equivalent to [-2,-1,0,1,2]
34+
context size 3 and dilation 2 is equivalent to [-2, 0, 2]
35+
context size 1 and dilation 1 is equivalent to [0]
36+
'''
37+
super(TDNN, self).__init__()
38+
self.context_size = context_size
39+
self.stride = stride
40+
self.input_dim = input_dim
41+
self.output_dim = output_dim
42+
self.dilation = dilation
43+
self.dropout_p = dropout_p
44+
self.batch_norm = batch_norm
45+
46+
self.kernel = nn.Linear(input_dim*context_size, output_dim)
47+
self.nonlinearity = nn.ReLU()
48+
if self.batch_norm:
49+
self.bn = nn.BatchNorm1d(output_dim)
50+
if self.dropout_p:
51+
self.drop = nn.Dropout(p=self.dropout_p)
52+
53+
def forward(self, x):
54+
'''
55+
input: size (batch, seq_len, input_features)
56+
outpu: size (batch, new_seq_len, output_features)
57+
'''
58+
59+
_, _, d = x.shape
60+
assert (d == self.input_dim), 'Input dimension was wrong. Expected ({}), got ({})'.format(self.input_dim, d)
61+
x = x.unsqueeze(1)
62+
63+
# Unfold input into smaller temporal contexts
64+
x = F.unfold(
65+
x,
66+
(self.context_size, self.input_dim),
67+
stride=(1,self.input_dim),
68+
dilation=(self.dilation,1)
69+
)
70+
71+
# N, output_dim*context_size, new_t = x.shape
72+
x = x.transpose(1,2)
73+
x = self.kernel(x)
74+
x = self.nonlinearity(x)
75+
76+
if self.dropout_p:
77+
x = self.drop(x)
78+
79+
if self.batch_norm:
80+
x = x.transpose(1,2)
81+
x = self.bn(x)
82+
x = x.transpose(1,2)
83+
84+
return x

models/x_vector.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Sat May 30 19:59:45 2020
5+
6+
@author: krishna
7+
8+
"""
9+
10+
11+
import torch.nn as nn
12+
from models.tdnn import TDNN
13+
import torch
14+
15+
16+
class X_vector(nn.Module):
17+
def __init__(self, input_dim = 40, num_classes=8):
18+
super(X_vector, self).__init__()
19+
self.tdnn1 = TDNN(input_dim=input_dim, output_dim=512, context_size=5, dilation=1)
20+
self.tdnn2 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=1)
21+
self.tdnn3 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=2)
22+
self.tdnn4 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=1)
23+
self.tdnn5 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=3)
24+
#### Frame levelPooling
25+
self.segment6 = nn.Linear(1024, 512)
26+
self.segment7 = nn.Linear(512, 512)
27+
self.output = nn.Linear(512, num_classes)
28+
self.softmax = nn.Softmax(dim=1)
29+
def forward(self, inputs):
30+
tdnn1_out = self.tdnn1(inputs)
31+
tdnn2_out = self.tdnn2(tdnn1_out)
32+
tdnn3_out = self.tdnn3(tdnn2_out)
33+
tdnn4_out = self.tdnn4(tdnn3_out)
34+
tdnn5_out = self.tdnn5(tdnn4_out)
35+
### Stat Pool
36+
mean = torch.mean(tdnn5_out,1)
37+
std = torch.std(tdnn5_out,1)
38+
stat_pooling = torch.cat((mean,std),1)
39+
segment6_out = self.segment6(stat_pooling)
40+
x_vec = self.segment7(segment6_out)
41+
predictions = self.softmax(self.output(x_vec))
42+
return predictions,x_vec

models/x_vector_Indian_LID.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Sat May 30 19:59:45 2020
5+
6+
@author: krishna
7+
8+
"""
9+
10+
11+
import torch.nn as nn
12+
from models.tdnn import TDNN
13+
import torch
14+
15+
16+
class X_vector(nn.Module):
17+
def __init__(self, input_dim = 40, num_classes=8):
18+
super(X_vector, self).__init__()
19+
self.tdnn1 = TDNN(input_dim=input_dim, output_dim=512, context_size=5, dilation=1)
20+
self.tdnn2 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=1)
21+
self.tdnn3 = TDNN(input_dim=512, output_dim=512, context_size=2, dilation=2)
22+
self.tdnn4 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=1)
23+
self.tdnn5 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=3)
24+
#### Frame levelPooling
25+
self.segment6 = nn.Linear(1024, 512)
26+
self.segment7 = nn.Linear(512, 512)
27+
self.output = nn.Linear(512, num_classes)
28+
self.softmax = nn.Softmax(dim=1)
29+
def forward(self, inputs):
30+
tdnn1_out = self.tdnn1(inputs)
31+
tdnn2_out = self.tdnn2(tdnn1_out)
32+
tdnn3_out = self.tdnn3(tdnn2_out)
33+
tdnn4_out = self.tdnn4(tdnn3_out)
34+
tdnn5_out = self.tdnn5(tdnn4_out)
35+
### Stat Pool
36+
mean = torch.mean(tdnn5_out,1)
37+
std = torch.std(tdnn5_out,1)
38+
stat_pooling = torch.cat((mean,std),1)
39+
segment6_out = self.segment6(stat_pooling)
40+
x_vec = self.segment7(segment6_out)
41+
predictions = self.softmax(self.output(x_vec))
42+
return predictions,x_vec

0 commit comments

Comments
 (0)