1
1
import torch
2
2
import torch .nn as nn
3
3
import torch .nn .functional as F
4
- import torch .optim as optim
5
4
6
5
def topk_mask (input , dim , K = 10 ):
7
6
index = input .topk (max (1 , min (K , input .size (dim ))), dim = dim )[1 ]
@@ -27,7 +26,7 @@ def forward(self, input):
27
26
return self .embedder (self .base_model (input ).view (len (input ), - 1 ))
28
27
29
28
criterion = None
30
- optim_algo = optim .SGD
29
+ optim_algo = torch . optim .SGD
31
30
optim_params = dict (lr = 1e-5 , momentum = 0.9 , weight_decay = 2e-4 , dampening = 0.9 )
32
31
optim_params_annealed = dict (epoch = float ('inf' ), gamma = 0.1 )
33
32
@@ -101,20 +100,25 @@ def criterion(self, embeddings, labels, Alpha = 0.5, Beta = 1.0, Lambda = 0.5):
101
100
optim_params_annealed = dict (epoch = 10 , gamma = 0.1 )
102
101
103
102
class Margin (Model ):
104
- def __init__ (self , base_model , num_classes , beta = 1.2 ):
103
+ def __init__ (self , base_model , num_classes , beta = 1.2 , gamma = 1.0 ):
105
104
Model .__init__ (self , base_model , num_classes )
106
- self .beta_bias = nn .Parameter (torch .Tensor ([beta ] * num_classes ))
105
+ self .beta_bias = nn .Parameter (torch .Tensor ([beta ]))
106
+ self .gamma_bias = nn .Parameter (torch .Tensor ([gamma ]))
107
107
108
108
def forward (self , input ):
109
109
return F .normalize (Model .forward (self , input ))
110
110
111
- def criterion (self , embeddings , labels , alpha = 0.1 ):
111
+ def criterion (self , embeddings , labels , alpha = 0.2 , distance_threshold = 1.0 ):
112
112
d = pdist (embeddings )
113
113
pos = torch .eq (* [labels .unsqueeze (dim ).expand_as (d ) for dim in [0 , 1 ]]).type_as (d ) - torch .autograd .Variable (torch .eye (len (d ))).type_as (d )
114
- prob = (pos .sum (1 ) / (len (pos ) - pos .sum (1 ))).unsqueeze (1 ).expand_as (pos ).masked_fill_ ((pos > 0 ) + (d < 0.5 ), 0.0 )
114
+ prob = (pos .sum (1 ) / (len (pos ) - pos .sum (1 ))).unsqueeze (1 ).expand_as (pos ).masked_fill_ ((pos > 0 ) + (d < distance_threshold ), 0.0 )
115
115
neg = torch .autograd .Variable (torch .bernoulli (prob .data )).type_as (d )
116
116
M = (pos + neg > 0 ).float ()
117
- return (M * F .relu (alpha + (pos * 2 - 1 ) * (d - self .beta_bias [labels ].unsqueeze (1 ).expand_as (d )))).sum () / M .sum ()
117
+ # print('beta', self.beta_bias.data[0])
118
+ # print('gamma', self.gamma_bias.data[0])
119
+ return (M * F .relu (alpha + (pos * 2 - 1 ) * (d - self .beta_bias .data [0 ]))).sum () / M .sum ()
118
120
121
+ #optim_algo = torch.optim.Adam
122
+ #optim_params = dict(lr = 1e-4, weight_decay = 5e-4)
119
123
optim_params = dict (lr = 1e-3 , momentum = 0.9 , weight_decay = 5e-4 )
120
- optim_params_annealed = dict (epoch = 30 , gamma = 0.1 )
124
+ # optim_params_annealed = dict(epoch = 10 , gamma = 0.5 )
0 commit comments