I am using the standard packages numpy, theano and matplotlib. I have compiled the theano with gpu support.
import numpy as np
import theano
import theano.tensor as T
import matplotlib.pyplot as plt
%matplotlib inline
Define auxiliary variables:
ADAGRAD_EPS = 1e-10
rng = np.random.RandomState(1234)
floatX = theano.config.floatX
The building block of the neural network is class Layer that contains the matrix W of input weights and array b of input biases.
class Layer(object):
def __init__(self, n_input, n_output, activation, x):
W_init = np.asarray(
rng.uniform(
low=-np.sqrt(6. / (n_input + n_output)),
high=np.sqrt(6. / (n_input + n_output)),
size=(n_input, n_output)
),
dtype=floatX
)
self.W = theano.shared(value = W_init.astype(floatX),
name = 'W',
borrow = True)
self.b = theano.shared(value = np.zeros((n_output,), dtype=floatX),
name = 'b',
borrow = True)
self.activation = activation
self.params = [self.W, self.b]
lin_output = T.dot(x, self.W) + self.b
self.output = (lin_output if self.activation is None else self.activation(lin_output))
def output2(self, x):
lin_output = T.dot(x, self.W) + self.b
return (lin_output if self.activation is None else self.activation(lin_output))
The class MLP will be used to build the generative adversarial network.
class MLP(object):
def __init__(self, layer_sizes, activations, z, x=None):
if x:
inp = T.concatenate([z, x], axis=0)
else:
inp = z
# add layers to the network
self.layers = []
for n_input, n_output, activation in zip(layer_sizes[:-1], layer_sizes[1:], activations[1:]):
self.layers.append(Layer(n_input, n_output, activation, inp))
inp = self.layers[-1].output
self.params = []
for layer in self.layers:
self.params += layer.params
self.output = layer.output
if x:
self.cost1 = -T.sum(T.log(1.0 - self.output[:z.shape[0],:]))
self.cost2 = -T.sum(T.log(self.output[z.shape[0]:,:]))
self.cost3 = -T.sum(T.log(self.output[:z.shape[0],:]))
def output2(self, x):
for layer in self.layers:
x = layer.output2(x)
return x
The generative adversarial network is an object of class GAN that contains two objects of the class MLP: the first object represents the generative network, the second - discriminative network.
class GAN(object):
def __init__(self, gen_layer_sizes, disc_layer_sizes, gen_activations, disc_activations, args):
self.x = T.matrix('x', dtype=floatX)
self.z = T.matrix('z', dtype=floatX)
self.gen_mlp = MLP(gen_layer_sizes, gen_activations, self.z)
self.disc_mlp = MLP(disc_layer_sizes, disc_activations, self.gen_mlp.output, self.x)
self.disc_cost = (self.disc_mlp.cost1 + self.disc_mlp.cost2) / (2.0*args['batch_size'])
self.gen_cost = self.disc_mlp.cost3 / args['batch_size']
self.disc_gparams = [T.grad(self.disc_cost, p) + args['lmbda'] * p for p in self.disc_mlp.params]
self.disc_gaccums = [theano.shared(value=np.zeros(p.get_value().shape, dtype=floatX)) for p in self.disc_mlp.params]
# define updates for the optimization
self.disc_updates = [
(param, param - args['lr'] * gparam / T.sqrt(gaccum + T.square(gparam) + ADAGRAD_EPS))
for param, gparam, gaccum in zip(self.disc_mlp.params, self.disc_gparams, self.disc_gaccums)
]
self.disc_updates += [
(gaccum, gaccum + T.square(gparam))
for gaccum, gparam in zip(self.disc_gaccums, self.disc_gparams)
]
self.gen_gparams = [T.grad(self.gen_cost, p) + args['lmbda'] * p for p in self.gen_mlp.params]
self.gen_gaccums = [theano.shared(value=np.zeros(p.get_value().shape, dtype=floatX)) for p in self.gen_mlp.params]
self.gen_updates = [
(param, param - args['lr'] * gparam / T.sqrt(gaccum + T.square(gparam) + ADAGRAD_EPS))
for param, gparam, gaccum in zip(self.gen_mlp.params, self.gen_gparams, self.gen_gaccums)
]
self.gen_updates += [
(gaccum, gaccum + T.square(gparam))
for gaccum, gparam in zip(self.gen_gaccums, self.gen_gparams)
]
# define the train function
self.disc_train = theano.function(
inputs=[self.x, self.z],
outputs=self.disc_cost,
updates=self.disc_updates
)
# define the train1 function
self.gen_train = theano.function(
inputs=[self.x, self.z],
outputs=self.gen_cost,
updates=self.gen_updates
)
self.generate_samples = theano.function(
inputs=[self.z],
outputs=self.gen_mlp.output
)
self.get_output = theano.function(
inputs=[self.x, self.z],
outputs=self.disc_mlp.output
)
The main code starts here. First, I create a toy dataset:
np.random.seed(0)
N = 1000
X = np.random.rand(N)
X = np.transpose(np.vstack((X, -1.0*np.sqrt(0.25 - (X - 0.5)**2) + 0.5 + 0.02*np.random.randn(N))).astype(floatX))
plt.figure(figsize = (8,4))
plt.scatter(X[:, 0], X[:, 1], lw=.3, s=3, cmap=plt.cm.cool)
plt.axis([0, 1, 0, 0.5])
plt.show()
We define the parameters of training algorithm and the network configuration. The main goal of our training is to obtain the generative network that takes uniform input and generates the samples from our toy dataset.
args = {}
args['batch_size'] = 1000
args['lmbda'] = 0.0
args['lr'] = 0.01
args['epochs'] = 3000
num_batches = X.shape[0] / args['batch_size']
gen_layer_sizes = [1, 30, 30, 50, X.shape[1]]
gen_activations = [None, T.nnet.relu, T.nnet.relu, T.tanh, None]
disc_layer_sizes = [X.shape[1], 30, 30, 30, 30, 1]
disc_activations = [None, T.nnet.relu, T.nnet.relu, T.nnet.relu, T.nnet.relu, T.nnet.sigmoid]
gan = GAN(gen_layer_sizes, disc_layer_sizes, gen_activations, disc_activations, args)
I am using nnvisual.py code to show how the autoencoder looks like. This code is modified code from here: https://github.com/miloharper/visualise-neural-network (thanks to miloharper for it). I have changed the orientation of the neural network in the original file.
from nnvisual import *
print "Generative network:"
network = NeuralNetwork()
for l in gen_layer_sizes:
network.add_layer(l)
network.draw()
print "Discriminative network:"
network = NeuralNetwork()
for l in disc_layer_sizes:
network.add_layer(l)
network.draw()
The main training loop contains two steps: 5 optimization steps for the discriminative network followed by 1 optimization step of generative network.
np.random.seed(232)
for epoch in xrange(args['epochs']*num_batches):
for i in range(5):
k = epoch % num_batches
x = X[k * args['batch_size']:(k+1) * args['batch_size'], :]
z = 4.0*np.random.rand(x.shape[0], gen_layer_sizes[0]).astype(floatX)
cost = gan.disc_train(x, z)
for i in range(1):
z = 4.0*np.random.rand(x.shape[0], gen_layer_sizes[0]).astype(floatX)
cost = gan.gen_train(x, z)
Finally, we generate samples from our toy dataset from the unifromly distributed samples (red color on the picture).
z = 4.0*np.random.rand(1000, gen_layer_sizes[0]).astype(floatX)
X_generated = gan.generate_samples(z)
plt.figure(figsize = (8,4))
plt.scatter(X[:, 0], X[:, 1], c='blue', lw=.3, s=3)
plt.scatter(X_generated[:, 0], X_generated[:, 1], c='red', lw=.3, s=3)
plt.axis([0, 1, 0, 0.5])
plt.show()