KrishnaDN
diff --git a/‎.gitignore
+2 b/‎.gitignore
+2
diff --git a/‎SpeechDataGenerator.py
+35 b/‎SpeechDataGenerator.py
+35
diff --git a/‎datasets.py
+85 b/‎datasets.py
+85
diff --git a/‎meta/testing.txt
+12,193 b/‎meta/testing.txt
+12,193
diff --git a/‎meta/training.txt
+116,509 b/‎meta/training.txt
+116,509
diff --git a/‎meta/validation.txt
+6,981 b/‎meta/validation.txt
+6,981
diff --git a/‎models/tdnn.py
+84 b/‎models/tdnn.py
+84
diff --git a/‎models/x_vector.py
+42 b/‎models/x_vector.py
+42
diff --git a/‎models/x_vector_Indian_LID.py
+42 b/‎models/x_vector_Indian_LID.py
+42
@@ -0,0 +1,2 @@
+*.pyc
+.DS_store
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Jul 20 14:09:31 2019
+
+@author: Krishna
+"""
+import numpy as np
+import torch
+from utils import utils
+
+class SpeechDataGenerator():
+    """Speech dataset."""
+
+    def __init__(self, manifest, mode):
+        """
+        Read the textfile and get the paths
+        """
+        self.mode=mode
+        self.audio_links = [line.rstrip('\n').split(' ')[0] for line in open(manifest)]
+        self.labels = [int(line.rstrip('\n').split(' ')[1]) for line in open(manifest)]
+        
+
+    def __len__(self):
+        return len(self.audio_links)
+
+    def __getitem__(self, idx):
+        audio_link =self.audio_links[idx]
+        class_id = self.labels[idx]
+        #lang_label=lang_id[self.audio_links[idx].split('/')[-2]]
+        spec = utils.load_data(audio_link,mode=self.mode)
+        sample = {'features': torch.from_numpy(np.ascontiguousarray(spec)), 'labels': torch.from_numpy(np.ascontiguousarray(class_id))}
+        return sample
+        
+    
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sat May 30 19:09:44 2020
+
+@author: krishna
+"""
+
+import os
+import numpy as np
+import glob
+import argparse
+
+
+
+class_ids ={'English':0,'Hindi':1,'Kannada':2,'Tamil':3,'Telugu':4,'Malayalam':5,'Marathi':6,'Gujarathi':7}
+def create_meta(files_list,store_loc,mode='train'):
+    if not os.path.exists(store_loc):
+        os.makedirs(store_loc)
+    
+    if mode=='train':
+        meta_store = store_loc+'/training.txt'
+        fid = open(meta_store,'w')
+        for filepath in files_list:
+            fid.write(filepath+'\n')
+        fid.close()
+    elif mode=='test':
+        meta_store = store_loc+'/testing.txt'
+        fid = open(meta_store,'w')
+        for filepath in files_list:
+            fid.write(filepath+'\n')
+        fid.close()
+    elif mode=='validation':
+        meta_store = store_loc+'/validation.txt'
+        fid = open(meta_store,'w')
+        for filepath in files_list:
+            fid.write(filepath+'\n')
+        fid.close()
+    else:
+        print('Error in creating meta files')
+    
+def extract_files(folder_path):
+    all_lang_folders = sorted(glob.glob(folder_path+'/*/'))
+    train_lists=[]
+    test_lists = []
+    val_lists=[]
+    
+    for lang_folderpath in all_lang_folders:
+        language = lang_folderpath.split('/')[-2]
+        sub_folders = sorted(glob.glob(lang_folderpath+'/*/'))
+        train_nums = len(sub_folders)-int(len(sub_folders)*0.1)-int(len(sub_folders)*0.05)
+        for i in range(train_nums):
+            sub_folder = sub_folders[i]
+            all_files = sorted(glob.glob(sub_folder+'/*.wav'))
+            for audio_filepath in all_files:
+                to_write = audio_filepath+' '+str(class_ids[language])
+                train_lists.append(to_write)
+                
+        for i in range(train_nums,train_nums+int(len(sub_folders)*0.05)):
+            sub_folder = sub_folders[i]
+            all_files = sorted(glob.glob(sub_folder+'/*.wav'))
+            for audio_filepath in all_files:
+                to_write = audio_filepath+' '+str(class_ids[language])
+                val_lists.append(to_write)
+        
+        for i in range(train_nums+int(len(sub_folders)*0.05),len(sub_folders)):
+            sub_folder = sub_folders[i]
+            all_files = sorted(glob.glob(sub_folder+'/*.wav'))
+            for audio_filepath in all_files:
+                to_write = audio_filepath+' '+str(class_ids[language])
+                test_lists.append(to_write)
+    return train_lists,test_lists,val_lists
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser("Configuration for data preparation")
+    parser.add_argument("--processed_data", default="/media/newhd/youtube_lid_data/download_data", type=str,help='Dataset path')
+    parser.add_argument("--meta_store_path", default="meta/", type=str,help='Save directory after processing')
+    config = parser.parse_args()
+    train_list, test_list,val_lists = extract_files(config.processed_data)
+
+    create_meta(train_list,config.meta_store_path,mode='train')
+    create_meta(test_list,config.meta_store_path,mode='test')
+    create_meta(val_lists,config.meta_store_path,mode='validation')
+    
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+@author: cvqluu
+repo: https://github.com/cvqluu/TDNN
+"""
+
+import torch.nn as nn
+import torch.nn.functional as F
+
+class TDNN(nn.Module):
+    
+    def __init__(
+                    self, 
+                    input_dim=23, 
+                    output_dim=512,
+                    context_size=5,
+                    stride=1,
+                    dilation=1,
+                    batch_norm=True,
+                    dropout_p=0.2
+                ):
+        '''
+        TDNN as defined by https://www.danielpovey.com/files/2015_interspeech_multisplice.pdf
+
+        Affine transformation not applied globally to all frames but smaller windows with local context
+
+        batch_norm: True to include batch normalisation after the non linearity
+        
+        Context size and dilation determine the frames selected
+        (although context size is not really defined in the traditional sense)
+        For example:
+            context size 5 and dilation 1 is equivalent to [-2,-1,0,1,2]
+            context size 3 and dilation 2 is equivalent to [-2, 0, 2]
+            context size 1 and dilation 1 is equivalent to [0]
+        '''
+        super(TDNN, self).__init__()
+        self.context_size = context_size
+        self.stride = stride
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.dilation = dilation
+        self.dropout_p = dropout_p
+        self.batch_norm = batch_norm
+      
+        self.kernel = nn.Linear(input_dim*context_size, output_dim)
+        self.nonlinearity = nn.ReLU()
+        if self.batch_norm:
+            self.bn = nn.BatchNorm1d(output_dim)
+        if self.dropout_p:
+            self.drop = nn.Dropout(p=self.dropout_p)
+        
+    def forward(self, x):
+        '''
+        input: size (batch, seq_len, input_features)
+        outpu: size (batch, new_seq_len, output_features)
+        '''
+
+        _, _, d = x.shape
+        assert (d == self.input_dim), 'Input dimension was wrong. Expected ({}), got ({})'.format(self.input_dim, d)
+        x = x.unsqueeze(1)
+
+        # Unfold input into smaller temporal contexts
+        x = F.unfold(
+                        x, 
+                        (self.context_size, self.input_dim), 
+                        stride=(1,self.input_dim), 
+                        dilation=(self.dilation,1)
+                    )
+
+        # N, output_dim*context_size, new_t = x.shape
+        x = x.transpose(1,2)
+        x = self.kernel(x)
+        x = self.nonlinearity(x)
+        
+        if self.dropout_p:
+            x = self.drop(x)
+
+        if self.batch_norm:
+            x = x.transpose(1,2)
+            x = self.bn(x)
+            x = x.transpose(1,2)
+
+        return x
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sat May 30 19:59:45 2020
+
+@author: krishna
+
+"""
+
+
+import torch.nn as nn
+from models.tdnn import TDNN
+import torch
+
+
+class X_vector(nn.Module):
+    def __init__(self, input_dim = 40, num_classes=8):
+        super(X_vector, self).__init__()
+        self.tdnn1 = TDNN(input_dim=input_dim, output_dim=512, context_size=5, dilation=1)
+        self.tdnn2 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=1)
+        self.tdnn3 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=2)
+        self.tdnn4 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=1)
+        self.tdnn5 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=3)
+        #### Frame levelPooling
+        self.segment6 = nn.Linear(1024, 512)
+        self.segment7 = nn.Linear(512, 512)
+        self.output = nn.Linear(512, num_classes)
+        self.softmax = nn.Softmax(dim=1)
+    def forward(self, inputs):
+        tdnn1_out = self.tdnn1(inputs)
+        tdnn2_out = self.tdnn2(tdnn1_out)
+        tdnn3_out = self.tdnn3(tdnn2_out)
+        tdnn4_out = self.tdnn4(tdnn3_out)
+        tdnn5_out = self.tdnn5(tdnn4_out)
+        ### Stat Pool
+        mean = torch.mean(tdnn5_out,1)
+        std = torch.std(tdnn5_out,1)
+        stat_pooling = torch.cat((mean,std),1)
+        segment6_out = self.segment6(stat_pooling)
+        x_vec = self.segment7(segment6_out)
+        predictions = self.softmax(self.output(x_vec))
+        return predictions,x_vec
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sat May 30 19:59:45 2020
+
+@author: krishna
+
+"""
+
+
+import torch.nn as nn
+from models.tdnn import TDNN
+import torch
+
+
+class X_vector(nn.Module):
+    def __init__(self, input_dim = 40, num_classes=8):
+        super(X_vector, self).__init__()
+        self.tdnn1 = TDNN(input_dim=input_dim, output_dim=512, context_size=5, dilation=1)
+        self.tdnn2 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=1)
+        self.tdnn3 = TDNN(input_dim=512, output_dim=512, context_size=2, dilation=2)
+        self.tdnn4 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=1)
+        self.tdnn5 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=3)
+        #### Frame levelPooling
+        self.segment6 = nn.Linear(1024, 512)
+        self.segment7 = nn.Linear(512, 512)
+        self.output = nn.Linear(512, num_classes)
+        self.softmax = nn.Softmax(dim=1)
+    def forward(self, inputs):
+        tdnn1_out = self.tdnn1(inputs)
+        tdnn2_out = self.tdnn2(tdnn1_out)
+        tdnn3_out = self.tdnn3(tdnn2_out)
+        tdnn4_out = self.tdnn4(tdnn3_out)
+        tdnn5_out = self.tdnn5(tdnn4_out)
+        ### Stat Pool
+        mean = torch.mean(tdnn5_out,1)
+        std = torch.std(tdnn5_out,1)
+        stat_pooling = torch.cat((mean,std),1)
+        segment6_out = self.segment6(stat_pooling)
+        x_vec = self.segment7(segment6_out)
+        predictions = self.softmax(self.output(x_vec))
+        return predictions,x_vec