I am using the standard packages numpy, theano and matplotlib. I have compiled the theano with gpu support.
import numpy as np
import theano
import theano.tensor as T
import matplotlib.pyplot as plt
%matplotlib inline
Building block of the neural network is class Layer that contains matrix of input weights W and input biases b.
class Layer(object):
def __init__(self, W_init, b_init, activation):
n_output, n_input = W_init.shape
assert b_init.shape == (n_output,)
self.W = theano.shared(value = W_init.astype(theano.config.floatX),
name = 'W',
borrow = True)
self.b = theano.shared(value = b_init.reshape(n_output, 1).astype(theano.config.floatX),
name = 'b',
borrow = True,
broadcastable = (False, True))
self.activation = activation
self.params = [self.W, self.b]
def output(self, x):
lin_output = T.dot(self.W, x) + self.b
return (lin_output if self.activation is None else self.activation(lin_output))
The following class AutoEncoder combines all objects of class Layer.
class AutoEncoder(object):
def __init__(self, W_init, b_init, activations):
assert len(W_init) == len(b_init) == len(activations)
self.layers = []
for W, b, activation in zip(W_init, b_init, activations):
self.layers.append(Layer(W, b, activation))
self.params = []
for layer in self.layers:
self.params += layer.params
def output(self, x):
for layer in self.layers:
x = layer.output(x)
return x
def layer_output(self, x):
for layer in self.layers[0:3]:
x = layer.output(x)
return x
def squared_error(self, x):
return T.sum((self.output(x) - x)**2)
The optimization function gradient_descent_momentum could be added to the class AutoEncoder, but I have decided to add it separately.
def gradient_descent_momentum(cost, params, learning_rate, momentum):
assert momentum >= 0 and momentum <= 1
updates = []
for param in params:
param_update = theano.shared(param.get_value()*0.,
broadcastable = param.broadcastable)
updates.append((param, param - learning_rate*param_update))
updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param)))
return updates
The main code starts here. First, I create the toy dataset:
np.random.seed(0)
N = 1000
X = np.random.rand(N)
X = np.vstack((X, -1.0*np.sqrt(0.25 - (X - 0.5)**2) + 0.5 + 0.05*np.random.randn(N))).astype(theano.config.floatX)
plt.figure(figsize = (8,4))
plt.scatter(X[0, :], X[1, :], lw=.3, s=3, cmap=plt.cm.cool)
plt.axis([0, 1, 0, 0.5])
plt.show()
The desired output of our autoencoder is the projection of original points on the semicircle. The next step is to define the autoencoder:
np.random.seed(232)
layer_sizes = [X.shape[0], 10, 10, 1, 10, 10, X.shape[0]]
W_init = []
b_init = []
activations = []
for n_input, n_output in zip(layer_sizes[:-1], layer_sizes[1:]):
W_init.append(0.1*np.random.randn(n_output, n_input) - 0.05)
b_init.append(0.1*np.random.randn(n_output) - 0.05)
activations.append(T.tanh)
activations.append(None)
activations.append(None)
activations.append(None)
activations.append(T.tanh)
activations.append(None)
ae = AutoEncoder(W_init, b_init, activations)
ae_input = T.matrix('ae_input')
learning_rate = 0.0001
momentum = 0.0
I am using nnvisual.py code to show how the autoencoder looks like. This code is modified code from here: https://github.com/miloharper/visualise-neural-network (thanks to miloharper for it). I have changed the orientation of the neural network in the original file.
from nnvisual import *
network = NeuralNetwork()
for l in layer_sizes:
network.add_layer(l)
network.draw()
The additional theano functions to calculate cost and output. Besides we create the theano function train that will train our autoencoder.
cost = ae.squared_error(ae_input)
train = theano.function([ae_input], cost,
updates=gradient_descent_momentum(cost, ae.params, learning_rate, momentum))
ae_output = theano.function([ae_input], ae.output(ae_input))
ae_layer_output = theano.function([ae_input], ae.layer_output(ae_input))
The main training loop:
iteration = 0
max_iteration = 10000
while iteration < max_iteration:
current_cost = train(X)
current_output = ae_output(X)
if (iteration+1) % 2000 == 0:
print np.mean((X - current_output)**2)
iteration += 1
Finally, I compare the input values with the output of trained autoencoder:
plt.figure(figsize=(8,4))
plt.scatter(X[0, :], X[1, :], c='blue', lw=.3, s=3)
plt.scatter(current_output[0, :], current_output[1, :], c='red', lw=.3, s=3)
plt.axis([0, 1, 0, 0.5])
plt.title('Cost: {:.3f}'.format(float(np.mean((X - current_output)**2))))
plt.show()