shayneobrien
diff --git a/‎README.md
Lines changed: 12 additions & 9 deletions b/‎README.md
Lines changed: 12 additions & 9 deletions
diff --git a/‎src/ae.py
Lines changed: 6 additions & 6 deletions b/‎src/ae.py
Lines changed: 6 additions & 6 deletions
diff --git a/‎src/bayes_gan.py
Lines changed: 5 additions & 6 deletions b/‎src/bayes_gan.py
Lines changed: 5 additions & 6 deletions
diff --git a/‎src/be_gan.py
Lines changed: 50 additions & 33 deletions b/‎src/be_gan.py
Lines changed: 50 additions & 33 deletions
@@ -1,11 +1,11 @@
 # Overview
-PyTorch version: 0.4.1 | Python 3.6.5
+PyTorch 0.4.1 | Python 3.6.5
 
 Annotated implementations with comparative introductions for minimax, non-saturating, wasserstein, wasserstein gradient penalty, least squares, deep regret analytic, bounded equilibrium, relativistic, f-divergence, Fisher, and information generative adversarial networks (GANs), and standard, variational, and bounded information rate variational autoencoders (VAEs).
 
-Paper links are supplied at the beginning of each file with a short summary of the paper. See src folder for files to run via terminal, or notebooks folder for Jupyter notebook visualizations via your local browser. The main file changes can be see in the train, train_D, and train_G of the Trainer class, although changes are not completely limited to only these two areas (e.g. Wasserstein GAN clamps weight in the train function, BEGAN gives multiple outputs from train_D, fGAN has a slight modification in viz_loss function to indicate method used in title).
+Paper links are supplied at the beginning of each file with a short summary of the paper. See src folder for files to run via terminal, or notebooks folder for Jupyter notebook visualizations via your local browser. The main file changes can be see in the ```train```, ```train_D```, and ```train_G``` of the Trainer class, although changes are not completely limited to only these two areas (e.g. Wasserstein GAN clamps weight in the train function, BEGAN gives multiple outputs from train_D, fGAN has a slight modification in viz_loss function to indicate method used in title).
 
-All code in this repository operates in a generative, unsupervised manner on binary (black and white) MNIST. The architectures are compatible with a variety of datatypes (1D, 2D, 3D) and plotting functions work with binary/RGB images too. If a GPU is detected, the models use it. Otherwise, they default to CPU.
+All code in this repository operates in a generative, unsupervised manner on binary (black and white) MNIST. The architectures are compatible with a variety of datatypes (1D, 2D, square 3D images). Plotting functions work with binary/RGB images. If a GPU is detected, the models use it. Otherwise, they default to CPU. VAE Trainer classes contain methods to visualize latent space representations (see ```make_all``` function).
 
 # Usage
 To initialize an environment:
@@ -20,7 +20,7 @@ For playing around in Jupyer notebooks:
 jupyter notebook
 ```
 
-To run from Terminal / Bash:
+To run from Terminal:
 ```
 cd src
 python bir_vae.py
@@ -34,9 +34,10 @@ Suppose we have a non-saturating GAN and we wanted to implement a least-squares
 
 [Original](https://github.com/shayneobrien/generative-models/blob/master/src/ns_gan.py#L166-L208) (NSGAN)
 ```
-def train_D(self):
-
+def train_D(self, images):
+  ...
   D_loss = -torch.mean(torch.log(DX_score + 1e-8) + torch.log(1 - DG_score + 1e-8))
+
   return D_loss
 ```
 ```
@@ -49,15 +50,17 @@ def train_G(self, images):
 
 [New](https://github.com/shayneobrien/generative-models/blob/master/src/ls_gan.py#L166-L209) (LSGAN)
 ```
-def train_D(self, images, a=0, b=1):
+def train_D(self, images):
   ...
-  D_loss = (0.50 * torch.mean((DX_score - b)**2)) + (0.50 * torch.mean((DG_score - a)**2))
+  D_loss = (0.50 * torch.mean((DX_score - 1.)**2)) + (0.50 * torch.mean((DG_score - 0.)**2))
+
   return D_loss
 ```
 ```
 def train_G(self, images):
   ...
   G_loss = 0.50 * torch.mean((DG_score - 1.)**2)
+
   return G_loss
 ```
 
@@ -72,7 +75,7 @@ All models were trained for 25 epochs with hidden dimension 400, latent dimensio
 [MMGAN](https://arxiv.org/abs/1406.2661) | <img src = 'viz/MMGAN/reconst_1.png' height = '150px'> | <img src = 'viz/MMGAN/reconst_25.png' height = '150px'> | <img src = 'viz/gifs/MMGAN_gif.gif' height = '150px'> | <img src = 'viz/losses/MMGAN_loss.png' height = '150px'>
 [NSGAN](https://arxiv.org/abs/1406.2661) | <img src = 'viz/NSGAN/reconst_1.png' height = '150px'> | <img src = 'viz/NSGAN/reconst_25.png' height = '150px'> | <img src = 'viz/gifs/NSGAN_gif.gif' height = '150px'> | <img src = 'viz/losses/NSGAN_loss.png' height = '150px'>
 [WGAN](https://arxiv.org/abs/1701.07875) | <img src = 'viz/WGAN/reconst_1.png' height = '150px'> | <img src = 'viz/WGAN/reconst_25.png' height = '150px'> | <img src = 'viz/gifs/WGAN_gif.gif' height = '150px'> | <img src = 'viz/losses/WGAN_loss.png' height = '150px'>
-[WGANGP](https://arxiv.org/abs/1704.00028) | <img src = 'viz/WGANGP/reconst_1.png' height = '150px'> | <img src = 'viz/WGANGP/reconst_25.png' height = '150px'> | <img src = 'viz/gifs/WGANGP_gif.gif' height = '150px'> | <img src = 'viz/losses/WGANGP_loss.png' height = '150px'>
+[WGPGAN](https://arxiv.org/abs/1704.00028) | <img src = 'viz/WGPGAN/reconst_1.png' height = '150px'> | <img src = 'viz/WGPGAN/reconst_25.png' height = '150px'> | <img src = 'viz/gifs/WGPGAN_gif.gif' height = '150px'> | <img src = 'viz/losses/WGPGAN_loss.png' height = '150px'>
 [DRAGAN](https://arxiv.org/abs/1705.07215) | <img src = 'viz/DRAGAN/reconst_1.png' height = '150px'> | <img src = 'viz/DRAGAN/reconst_25.png' height = '150px'> | <img src = 'viz/gifs/DRAGAN_gif.gif' height = '150px'> | <img src = 'viz/losses/DRAGAN_loss.png' height = '150px'>
 [BEGAN](https://arxiv.org/abs/1703.10717) | <img src = 'viz/BEGAN/reconst_1.png' height = '150px'> | <img src = 'viz/BEGAN/reconst_25.png' height = '150px'> | <img src = 'viz/gifs/BEGAN_gif.gif' height = '150px'> | <img src = 'viz/losses/BEGAN_loss.png' height = '150px'>
 [LSGAN](https://arxiv.org/abs/1611.04076) | <img src = 'viz/LSGAN/reconst_1.png' height = '150px'> | <img src = 'viz/LSGAN/reconst_25.png' height = '150px'> | <img src = 'viz/gifs/LSGAN_gif.gif' height = '150px'> | <img src = 'viz/losses/LSGAN_loss.png' height = '150px'>
 
@@ -164,17 +164,17 @@ def evaluate(self, iterator):
         return np.mean([self.compute_batch(batch).item() for batch in iterator])
 
     def reconstruct_images(self, images, epoch, save=True):
-        """Reconstruct a fixed input at each epoch for progress visualization"""
+        """ Reconstruct a fixed input at each epoch for progress viz """
         # Reshape images, pass through model, reshape reconstructed output
         batch = to_cuda(images.view(images.shape[0], -1))
         reconst_images = self.model(batch)
         reconst_images = reconst_images.view(images.shape).squeeze()
 
         # Plot
         plt.close()
-        size_figure_grid, k = int(reconst_images.shape[0]**0.5), 0
-        fig, ax = plt.subplots(size_figure_grid, size_figure_grid, figsize=(5, 5))
-        for i, j in product(range(size_figure_grid), range(size_figure_grid)):
+        grid_size, k = int(reconst_images.shape[0]**0.5), 0
+        fig, ax = plt.subplots(grid_size, grid_size, figsize=(5, 5))
+        for i, j in product(range(grid_size), range(grid_size)):
             ax[i,j].get_xaxis().set_visible(False)
             ax[i,j].get_yaxis().set_visible(False)
             ax[i,j].imshow(reconst_images[k].data.numpy(), cmap='gray')
@@ -187,10 +187,10 @@ def reconstruct_images(self, images, epoch, save=True):
                 os.makedirs(outname)
             torchvision.utils.save_image(self.debugging_image.data,
                                          outname + 'real.png',
-                                         nrow=size_figure_grid)
+                                         nrow=grid_size)
             torchvision.utils.save_image(reconst_images.unsqueeze(1).data,
                                          outname + 'reconst_%d.png' %(epoch),
-                                         nrow=size_figure_grid)
+                                         nrow=grid_size)
 
     def viz_loss(self):
         """ Visualize reconstruction loss """
 
@@ -1,9 +1,10 @@
 # TODO
-""" (BayesGAN)
+""" (BayesGAN) https://arxiv.org/abs/1705.09558
+Bayesian GAN
 
 From the authors:
-"
-Bayesian GAN (Saatchi and Wilson, 2017) is a Bayesian formulation of Generative
+
+"Bayesian GAN (Saatchi and Wilson, 2017) is a Bayesian formulation of Generative
 Adversarial Networks (Goodfellow, 2014) where we learn the distributions of the
 generator parameters $\theta_g$ and the discriminator parameters $\theta_d$
 instead of optimizing for point estimates. The benefits of the Bayesian approach
@@ -13,7 +14,7 @@
 
 We learn Bayesian GAN via an approximate inference algorithm called Stochastic
 Gradient Hamiltonian Monte Carlo (SGHMC) which is a gradient-based MCMC methods
-whose samples approximate the true posterior distributions of $\theta_g$ and 
+whose samples approximate the true posterior distributions of $\theta_g$ and
 $\theta_d$. The Bayesian GAN training process starts from sampling noise $z$
 from a fixed  distribution(typically standard d-dim normal). The noise is fed
 to the generator where the parameters  $\theta_g$ are sampled from the posterior
@@ -27,6 +28,4 @@
 
 SGHMC is fancy for using point estimates (as in most GANs) to infer the
 posteriors.
-
-https://arxiv.org/pdf/1705.09558.pdf
 """
@@ -1,8 +1,6 @@
-""" (BEGAN)
+""" (BEGAN) https://arxiv.org/abs/1703.10717
 Boundary Equilibrium GAN
 
-https://arxiv.org/abs/1703.10717
-
 BEGAN uses an autoencoder as a discriminator and optimizes a lower bound of the
 Wasserstein distance between auto-encoder loss distributions on real and fake
 data (as opposed to the sample distributions of the generator and real data).
@@ -44,7 +42,7 @@
 from itertools import product
 from tqdm import tqdm
 
-from utils import *
+from .utils import *
 
 
 class Generator(nn.Module):
@@ -89,6 +87,8 @@ def __init__(self, image_size, hidden_dim, z_dim):
         self.G = Generator(image_size, hidden_dim, z_dim)
         self.D = Discriminator(image_size, hidden_dim)
 
+        self.shape = int(image_size ** 0.5)
+
 
 class BEGANTrainer:
     def __init__(self, model, train_iter, val_iter, test_iter, viz=False):
@@ -114,21 +114,26 @@ def train(self, num_epochs, G_lr=1e-4, D_lr=1e-4, D_steps=1,
 
         Inputs:
             num_epochs: int, number of epochs to train for
-            G_lr: float, learning rate for generator's optimizer (default 1e-4)
-            D_lr: float, learning rate for discriminator's optimizer (default 1e-4)
-            D_steps: int, ratio for how often to train D compared to G (default 1)
-            GAMMA: float, balance equilibrium between G and D objectives (default 0.50)
-            LAMBDA: float, weight D loss for updating K (default 1e-3)
-            K: float, how much to initially emphasize loss(D(G(z))) in total D loss (default 0.00)
+            G_lr: float, learning rate for generator's optimizer
+            D_lr: float, learning rate for discriminator's optimizer
+            D_steps: int, ratio for how often to train D compared to G
+            GAMMA: float, balance equilibrium between G and D objectives
+            LAMBDA: float, weight D loss for updating K
+            K: float, how much to emphasize loss(D(G(z))) in initial D loss
         """
 
         # Adam optimizers
-        G_optimizer = optim.Adam(params=[p for p in self.model.G.parameters() if p.requires_grad], lr=G_lr)
-        D_optimizer = optim.Adam(params=[p for p in self.model.D.parameters() if p.requires_grad], lr=D_lr)
-
-        # Reduce learning rate by factor of 2 if convergence_metric stops decreasing by a threshold for last five epochs
-        G_scheduler = ReduceLROnPlateau(G_optimizer, factor=0.50, threshold=0.01, patience=5*len(self.train_iter))
-        D_scheduler = ReduceLROnPlateau(D_optimizer, factor=0.50, threshold=0.01, patience=5*len(self.train_iter))
+        G_optimizer = optim.Adam(params=[p for p in self.model.G.parameters()
+                                        if p.requires_grad], lr=G_lr)
+        D_optimizer = optim.Adam(params=[p for p in self.model.D.parameters()
+                                        if p.requires_grad], lr=D_lr)
+
+        # Reduce learning rate by factor of 2 if convergence_metric stops
+        # decreasing by a threshold for last five epochs
+        G_scheduler = ReduceLROnPlateau(G_optimizer, factor=0.50, threshold=0.01,
+                                        patience=5*len(self.train_iter))
+        D_scheduler = ReduceLROnPlateau(D_optimizer, factor=0.50, threshold=0.01,
+                                        patience=5*len(self.train_iter))
 
         # Approximate steps/epoch given D_steps per epoch
         # --> roughly train in the same way as if D_step (1) == G_step (1)
@@ -179,14 +184,15 @@ def train(self, num_epochs, G_lr=1e-4, D_lr=1e-4, D_steps=1,
                 # Save relevant output for progress logging
                 G_losses.append(G_loss.item())
 
-                # PROPORTIONAL CONTROL THEORY: Dynamically update K, log convergence measure
-                convergence_measure = (DX_loss + torch.abs(GAMMA*DX_loss - DG_loss)).item()
+                # PROPORTIONAL CONTROL THEORY: Dynamically update K,
+                # log convergence measure
+                convergence = (DX_loss+torch.abs(GAMMA*DX_loss-DG_loss)).item()
                 K_update = (K + LAMBDA*(GAMMA*DX_loss - DG_loss)).item()
                 K = min(max(0, K_update), 1)
 
                 # Learning rate scheduler
-                D_scheduler.step(convergence_measure)
-                G_scheduler.step(convergence_measure)
+                D_scheduler.step(convergence)
+                G_scheduler.step(convergence)
 
             # Save losses
             self.Glosses.extend(G_losses)
@@ -195,7 +201,7 @@ def train(self, num_epochs, G_lr=1e-4, D_lr=1e-4, D_steps=1,
             # Progress logging
             print ("Epoch[%d/%d], G Loss: %.4f, D Loss: %.4f, K: %.4f, Convergence Measure: %.4f"
                    %(epoch, num_epochs, np.mean(G_losses),
-                     np.mean(D_losses), K, convergence_measure))
+                     np.mean(D_losses), K, convergence))
             self.num_epochs += 1
 
             # Visualize generator progress
@@ -207,10 +213,11 @@ def train_D(self, images, K):
         """ Run 1 step of training for discriminator
 
         Input:
-            images: batch of images (reshaped to [batch_size, 784])
+            images: batch of images (reshaped to [batch_size, -1])
             K: how much to emphasize loss(D(G(z))) in total D loss
         Output:
-            D_loss: BEGAN loss for discriminator, E[||x - AE(x)||1] - K*E[G(z) - AE(G(z))]
+            D_loss: BEGAN loss for discriminator,
+            E[||x-AE(x)||1] - K*E[G(z) - AE(G(z))]
         """
 
         # Reconstruct the images using D (autoencoder), get reconstruction loss
@@ -239,7 +246,8 @@ def train_G(self, images):
             G_loss: BEGAN loss for G, E[||G(z) - AE(G(Z))||1]
         """
 
-        # Get noise, classify it using G, then reconstruct the output of G using D (autoencoder).
+        # Get noise, classify it using G, then reconstruct the output of G
+        # using D (autoencoder).
         noise = self.compute_noise(images.shape[0], self.model.z_dim) # z
         G_output = self.model.G(noise) # G(z)
         DG_reconst = self.model.D(G_output) # D(G(z))
@@ -250,7 +258,7 @@ def train_G(self, images):
         return G_loss
 
     def compute_noise(self, batch_size, z_dim):
-        """ Compute random noise for the generator to learn to make images from """
+        """ Compute random noise for input into Generator G """
         return to_cuda(torch.randn(batch_size, z_dim))
 
     def process_batch(self, iterator):
@@ -272,13 +280,16 @@ def generate_images(self, epoch, num_outputs=36, save=True):
         images = self.model.G(noise)
 
         # Reshape to proper image size
-        images = images.view(images.shape[0], 28, 28)
+        images = images.view(images.shape[0],
+                             self.model.shape,
+                             self.model.shape,
+                             -1).squeeze()
 
         # Plot
         plt.close()
-        size_figure_grid = int(num_outputs**0.5)
-        fig, ax = plt.subplots(size_figure_grid, size_figure_grid, figsize=(5, 5))
-        for i, j in product(range(size_figure_grid), range(size_figure_grid)):
+        grid_size = int(num_outputs**0.5)
+        fig, ax = plt.subplots(grid_size, grid_size, figsize=(5, 5))
+        for i, j in product(range(grid_size), range(grid_size)):
             ax[i,j].get_xaxis().set_visible(False)
             ax[i,j].get_yaxis().set_visible(False)
             ax[i,j].cla()
@@ -291,7 +302,7 @@ def generate_images(self, epoch, num_outputs=36, save=True):
                 os.makedirs(outname)
             torchvision.utils.save_image(images.unsqueeze(1).data,
                                          outname + 'reconst_%d.png'
-                                         %(epoch), nrow=size_figure_grid)
+                                         %(epoch), nrow=grid_size)
 
     def viz_loss(self):
         """ Visualize loss for the generator, discriminator """
@@ -300,9 +311,15 @@ def viz_loss(self):
         plt.style.use('ggplot')
         plt.rcParams["figure.figsize"] = (8,6)
 
-        # Plot Discriminator loss in red, Generator loss in green
-        plt.plot(np.linspace(1, self.num_epochs, len(self.Dlosses)), self.Dlosses, 'r')
-        plt.plot(np.linspace(1, self.num_epochs, len(self.Dlosses)), self.Glosses, 'g')
+        # Plot Discriminator loss in red
+        plt.plot(np.linspace(1, self.num_epochs, len(self.Dlosses)),
+                 self.Dlosses,
+                 'r')
+
+        # Plot Generator loss in green
+        plt.plot(np.linspace(1, self.num_epochs, len(self.Dlosses)),
+                 self.Glosses,
+                 'g')
 
         # Add legend, title
         plt.legend(['Discriminator', 'Generator'])