### Training a Multilayer CNN on MNIST data
Based on tutorial originally included with TF1 documentation
and on https://www.tensorflow.org/tutorials/quickstart/advanced

In [11]:
# Load TF and the necessary Keras models
import tensorflow as tf

from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model

In [12]:
# Load, rescale and reshape the data.
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Add a dimension, which makes some downstream operations easier.
x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")

In [13]:
train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(32)

test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

In [39]:
# The model from the TF2 tutorial
# https://www.tensorflow.org/tutorials/quickstart/advanced
class DefaultModel(Model):
    def __init__(self):
        super(DefaultModel, self).__init__()
        self.conv1 = Conv2D(32, 3, activation='relu')
        self.flatten = Flatten()
        self.d1 = Dense(128, activation='relu')
        self.d2 = Dense(10)

    def call(self, x):
        x = self.conv1(x)
        x = self.flatten(x)
        x = self.d1(x)
        return self.d2(x)

In [40]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

optimizer = tf.keras.optimizers.Adam()

In [41]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

In [42]:
@tf.function
def train_step(model, images, labels):
    with tf.GradientTape() as tape:
        # training=True is only needed if there are layers with different
        # behavior during training versus inference (e.g. Dropout).
        predictions = model(images, training=True)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(labels, predictions)


In [43]:
@tf.function
def test_step(model, images, labels):
    # training=False is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
    predictions = model(images, training=False)
    t_loss = loss_object(labels, predictions)

    test_loss(t_loss)
    test_accuracy(labels, predictions)

In [44]:
mynn = DefaultModel()

EPOCHS = 5

for epoch in range(EPOCHS):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    for images, labels in train_ds:
        train_step(mynn, images, labels)

    for test_images, test_labels in test_ds:
        test_step(mynn, test_images, test_labels)

    print( f'Epoch {epoch + 1}, '
        f'Loss: {train_loss.result()}, '
        f'Accuracy: {train_accuracy.result() * 100}, '
        f'Test Loss: {test_loss.result()}, '
        f'Test Accuracy: {test_accuracy.result() * 100}')


Epoch 1, Loss: 0.13652484118938446, Accuracy: 95.92333221435547, Test Loss: 0.06616833806037903, Test Accuracy: 97.82999420166016
Epoch 2, Loss: 0.04271334782242775, Accuracy: 98.68333435058594, Test Loss: 0.05195489525794983, Test Accuracy: 98.3699951171875
Epoch 3, Loss: 0.02405359037220478, Accuracy: 99.22666931152344, Test Loss: 0.050795115530490875, Test Accuracy: 98.38999938964844
Epoch 4, Loss: 0.015074114315211773, Accuracy: 99.5183334350586, Test Loss: 0.060854196548461914, Test Accuracy: 98.29999542236328
Epoch 5, Loss: 0.009638654999434948, Accuracy: 99.68499755859375, Test Loss: 0.06737694144248962, Test Accuracy: 98.29999542236328


In [71]:
# Now let's build a slightly more interesting CNN with dropout and such.

# This is the model described in the slides
# We'll use Keras layers, rather than pipelining the layers by hand.
from tensorflow.keras import layers, models
from tensorflow.keras import Model

class CNNwithDropout(Model):
    def __init__(self):
        super(CNNwithDropout, self).__init__()
        self.model = models.Sequential()
        self.model.add(layers.Conv2D(28, (3,3), activation='relu', input_shape=(28, 28, 1)))
        self.model.add(layers.MaxPooling2D((2, 2)))
        self.model.add(layers.Conv2D(64, (3,3), activation='relu'))
        self.model.add(layers.MaxPooling2D((2, 2)))
        self.model.add(layers.Conv2D(64, (3,3), activation='relu'))

        # Add dense layers on top
        self.model.add(layers.Flatten())
        self.model.add(layers.Dense(64, activation='relu'))
        self.model.add(layers.Dropout(0.2)) # Dropout only runs in training.
        self.model.add(layers.Dense(64, activation='relu'))
        self.model.add(layers.Dense(10))

    def call(self, x, training=False):
        return self.model(x, training=training)

mycnn = CNNwithDropout()
mycnn.model.summary()

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_38 (Conv2D)           (None, 26, 26, 28)        280       
_________________________________________________________________
max_pooling2d_20 (MaxPooling (None, 13, 13, 28)        0         
_________________________________________________________________
conv2d_39 (Conv2D)           (None, 11, 11, 64)        16192     
_________________________________________________________________
max_pooling2d_21 (MaxPooling (None, 5, 5, 64)          0         
_________________________________________________________________
conv2d_40 (Conv2D)           (None, 3, 3, 64)          36928     
_________________________________________________________________
flatten_12 (Flatten)         (None, 576)               0         
_________________________________________________________________
dense_28 (Dense)             (None, 64)              

In [None]:
mycnn = CNNwithDropout()

mycnn.compile(optimizer='adagrad',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

history = mycnn.fit(x_train, y_train, epochs=2, 
                    validation_data=(x_test, y_test))

Epoch 1/2

In [61]:
model(x_test[1:10,:,:])

<tf.Tensor: shape=(9, 10), dtype=float32, numpy=
array([[ -8.717808  ,  -5.269855  ,  16.26317   ,  -7.071536  ,
        -11.583947  ,  -9.626197  ,  -5.1650305 ,  -4.932588  ,
         -5.6855364 , -12.496085  ],
       [-10.303463  ,  15.49356   ,  -4.9540277 , -13.120219  ,
         -4.2703443 ,  -5.0333347 ,  -2.420862  ,  -2.503156  ,
         -2.804342  ,  -9.269139  ],
       [ 18.821453  , -12.320669  ,  -8.074598  ,  -9.992606  ,
         -4.230956  ,  -2.6524231 ,   0.37573662, -10.530609  ,
         -7.9318852 ,  -4.313835  ],
       [ -9.477926  ,  -6.698996  ,  -3.2251656 , -13.828509  ,
         17.443289  ,  -9.997732  ,  -4.287447  ,  -5.801468  ,
         -7.7274976 ,   2.7738173 ],
       [-12.478697  ,  18.877573  ,  -6.203577  , -14.508036  ,
         -5.871354  ,  -6.4136605 ,  -3.7606971 ,  -1.0272393 ,
         -4.099281  ,  -9.900334  ],
       [ -9.651112  ,  -3.705072  ,  -5.8738174 , -12.7914295 ,
         12.636612  ,  -8.2450485 ,  -3.7551363 ,  -3.7301729 