|
| 1 | + |
| 2 | +# import necessary packages |
| 3 | +import numpy as np |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +from sklearn import datasets |
| 6 | + |
| 7 | + |
| 8 | +# ensure the same random numbers appear every time |
| 9 | +np.random.seed(0) |
| 10 | + |
| 11 | +# display images in notebook |
| 12 | + |
| 13 | +plt.rcParams['figure.figsize'] = (12,12) |
| 14 | + |
| 15 | + |
| 16 | +# download MNIST dataset |
| 17 | +digits = datasets.load_digits() |
| 18 | + |
| 19 | +# define inputs and labels |
| 20 | +inputs = digits.images |
| 21 | +labels = digits.target |
| 22 | + |
| 23 | +# RGB images have a depth of 3 |
| 24 | +# our images are grayscale so they should have a depth of 1 |
| 25 | +inputs = inputs[:,:,:,np.newaxis] |
| 26 | + |
| 27 | +print("inputs = (n_inputs, pixel_width, pixel_height, depth) = " + str(inputs.shape)) |
| 28 | +print("labels = (n_inputs) = " + str(labels.shape)) |
| 29 | + |
| 30 | + |
| 31 | +# choose some random images to display |
| 32 | +n_inputs = len(inputs) |
| 33 | +indices = np.arange(n_inputs) |
| 34 | +random_indices = np.random.choice(indices, size=5) |
| 35 | + |
| 36 | +for i, image in enumerate(digits.images[random_indices]): |
| 37 | + plt.subplot(1, 5, i+1) |
| 38 | + plt.axis('off') |
| 39 | + plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') |
| 40 | + plt.title("Label: %d" % digits.target[random_indices[i]]) |
| 41 | +plt.show() |
| 42 | + |
| 43 | +from tensorflow.keras import datasets, layers, models |
| 44 | +from tensorflow.keras.layers import Input |
| 45 | +from tensorflow.keras.models import Sequential #This allows appending layers to existing models |
| 46 | +from tensorflow.keras.layers import Dense #This allows defining the characteristics of a particular layer |
| 47 | +from tensorflow.keras import optimizers #This allows using whichever optimiser we want (sgd,adam,RMSprop) |
| 48 | +from tensorflow.keras import regularizers #This allows using whichever regularizer we want (l1,l2,l1_l2) |
| 49 | +from tensorflow.keras.utils import to_categorical #This allows using categorical cross entropy as the cost function |
| 50 | + |
| 51 | +from sklearn.model_selection import train_test_split |
| 52 | + |
| 53 | +# representation of labels |
| 54 | +labels = to_categorical(labels) |
| 55 | + |
| 56 | +# split into train and test data |
| 57 | +# one-liner from scikit-learn library |
| 58 | +train_size = 0.8 |
| 59 | +test_size = 1 - train_size |
| 60 | +X_train, X_test, Y_train, Y_test = train_test_split(inputs, labels, train_size=train_size, |
| 61 | + test_size=test_size) |
| 62 | + |
| 63 | +def create_convolutional_neural_network_keras(input_shape, receptive_field, |
| 64 | + n_filters, n_neurons_connected, n_categories, |
| 65 | + eta, lmbd): |
| 66 | + model = Sequential() |
| 67 | + model.add(layers.Conv2D(n_filters, (receptive_field, receptive_field), input_shape=input_shape, padding='same', |
| 68 | + activation='relu', kernel_regularizer=regularizers.l2(lmbd))) |
| 69 | + model.add(layers.MaxPooling2D(pool_size=(2, 2))) |
| 70 | + model.add(layers.Flatten()) |
| 71 | + model.add(layers.Dense(n_neurons_connected, activation='relu', kernel_regularizer=regularizers.l2(lmbd))) |
| 72 | + model.add(layers.Dense(n_categories, activation='softmax', kernel_regularizer=regularizers.l2(lmbd))) |
| 73 | + |
| 74 | + sgd = optimizers.SGD(learning_rate=eta) |
| 75 | + model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) |
| 76 | + |
| 77 | + return model |
| 78 | + |
| 79 | +epochs = 100 |
| 80 | +batch_size = 100 |
| 81 | +input_shape = X_train.shape[1:4] |
| 82 | +receptive_field = 3 |
| 83 | +n_filters = 10 |
| 84 | +n_neurons_connected = 50 |
| 85 | +n_categories = 10 |
| 86 | + |
| 87 | +eta_vals = np.logspace(-5, 1, 7) |
| 88 | +lmbd_vals = np.logspace(-5, 1, 7) |
| 89 | + |
| 90 | +CNN_keras = np.zeros((len(eta_vals), len(lmbd_vals)), dtype=object) |
| 91 | + |
| 92 | +for i, eta in enumerate(eta_vals): |
| 93 | + for j, lmbd in enumerate(lmbd_vals): |
| 94 | + CNN = create_convolutional_neural_network_keras(input_shape, receptive_field, |
| 95 | + n_filters, n_neurons_connected, n_categories, |
| 96 | + eta, lmbd) |
| 97 | + CNN.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, verbose=0) |
| 98 | + scores = CNN.evaluate(X_test, Y_test) |
| 99 | + |
| 100 | + CNN_keras[i][j] = CNN |
| 101 | + |
| 102 | + print("Learning rate = ", eta) |
| 103 | + print("Lambda = ", lmbd) |
| 104 | + print("Test accuracy: %.3f" % scores[1]) |
| 105 | + print() |
| 106 | + |
| 107 | +# visual representation of grid search |
| 108 | +# uses seaborn heatmap, could probably do this in matplotlib |
| 109 | +import seaborn as sns |
| 110 | + |
| 111 | +sns.set() |
| 112 | + |
| 113 | +train_accuracy = np.zeros((len(eta_vals), len(lmbd_vals))) |
| 114 | +test_accuracy = np.zeros((len(eta_vals), len(lmbd_vals))) |
| 115 | + |
| 116 | +for i in range(len(eta_vals)): |
| 117 | + for j in range(len(lmbd_vals)): |
| 118 | + CNN = CNN_keras[i][j] |
| 119 | + |
| 120 | + train_accuracy[i][j] = CNN.evaluate(X_train, Y_train)[1] |
| 121 | + test_accuracy[i][j] = CNN.evaluate(X_test, Y_test)[1] |
| 122 | + |
| 123 | + |
| 124 | +fig, ax = plt.subplots(figsize = (10, 10)) |
| 125 | +sns.heatmap(train_accuracy, annot=True, ax=ax, cmap="viridis") |
| 126 | +ax.set_title("Training Accuracy") |
| 127 | +ax.set_ylabel("$\eta$") |
| 128 | +ax.set_xlabel("$\lambda$") |
| 129 | +plt.show() |
| 130 | + |
| 131 | +fig, ax = plt.subplots(figsize = (10, 10)) |
| 132 | +sns.heatmap(test_accuracy, annot=True, ax=ax, cmap="viridis") |
| 133 | +ax.set_title("Test Accuracy") |
| 134 | +ax.set_ylabel("$\eta$") |
| 135 | +ax.set_xlabel("$\lambda$") |
| 136 | +plt.show() |
0 commit comments