Import Module
from PIL import Image import tensorflow as tf import numpy as np import os import cv2 model_path = './tf_model/' epoch_num = 30
CNN Model Code
The first half of the model definition mainly uses Keras. Conv2D (Convolution) and MaxPooling2D (Pooling) functions provided by layers.
The CNN input is a tensor with a dimension (image_height, image_width, color_channels), and the mnist dataset is black and white, so there is only one color_channel Channel (color channel), the general color picture has three (R,G,B). Students familiar with the front-end of the Web may know that some pictures have four channels (R,G,B,A), A stands for transparency. For mnist datasets, the input tensor dimension is (28,28,1) through the parameter input_ Shapes are passed to the first layer of the network.
class CNN(object): def __init__(self): model = tf.keras.models.Sequential() model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1))) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu')) model.add(tf.keras.layers.MaxPooling2D((2, 2))) model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu')) model.add(tf.keras.layers.Flatten()) model.add(tf.keras.layers.Dense(64, activation='relu')) model.add(tf.keras.layers.Dense(10, activation='softmax')) model.summary() self.model = model
Model. Summy() is used to print the structure of the model that we define.
We can see that the output of each Conv2D and MaxPooling2D layer is a three-dimensional tensor (height, width, channels). Heights and widths decrease gradually. The number of channels output is controlled by the first parameter (for example, 32 or 64), and as heights and widths decrease, channels can increase (from an arithmetic point of view).
The second half of the model defines the output tensor. layers.Flatten converts a three-dimensional tensor into a one-dimensional vector. The dimension of the pre-expansion tensor is (3, 3, 64), converted to a vector of one dimension (576), followed by layers.Dense layer, which constructs two fully connected layers, gradually changes the number of bits of a one-dimensional vector from 576 to 64 and then to 10.
The second half corresponds to the construction of a general neural network with a hidden layer of 64, an input layer of 576, and an output layer of 10. The last level of activation function is softmax, where 10 bits exactly represent 0-9 10 numbers.
data processing
60,000 training pictures, 10,000 test pictures
train_images = train_images.reshape((60000, 28, 28, 1)) test_images = test_images.reshape((10000, 28, 28, 1))
Pixel value mapped between 0 - 1
train_images, test_images = train_images / 255.0, test_images / 255.0 self.train_images, self.train_labels = train_images, train_labels self.test_images, self.test_labels = test_images, test_labels
So data processing classes are defined as follows:
class DataSource(object): def __init__(self): # The location where the mnist dataset is stored and how it does not exist will be automatically downloaded # data_path = os.path.abspath(os.path.dirname(__file__)) + '/../data_set_tf2/mnist.npz' (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data() # 60,000 training pictures, 10,000 test pictures train_images = train_images.reshape((60000, 28, 28, 1)) test_images = test_images.reshape((10000, 28, 28, 1)) # Pixel value mapped between 0 - 1 train_images, test_images = train_images / 255.0, test_images / 255.0 self.train_images, self.train_labels = train_images, train_labels self.test_images, self.test_labels = test_images, test_labels
train
class Train: def __init__(self): self.cnn = CNN() self.data = DataSource() def train(self): check_path = './checkpoint/cp-{epoch:04d}.ckpt' # period is saved every 5 epochs save_model_cb = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True, verbose=1, period=2) self.cnn.model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) self.cnn.model.fit(self.data.train_images, self.data.train_labels, epochs=epoch_num, callbacks=[save_model_cb]) test_loss, test_acc = self.cnn.model.evaluate(self.data.test_images, self.data.test_labels) print("accuracy rate: %.4f,Total Tested%d Picture " % (test_acc, len(self.data.test_labels)))
Execute Python train. After py, you get the following results:
_________________________________________________________________ WARNING:tensorflow:`period` argument is deprecated. Please use `save_freq` to specify the frequency in number of batches seen. Epoch 1/5 1875/1875 [==============================] - 60s 32ms/step - loss: 0.1528 - accuracy: 0.9538 Epoch 2/5 1875/1875 [==============================] - 54s 29ms/step - loss: 0.0485 - accuracy: 0.9849 Epoch 3/5 1875/1875 [==============================] - 54s 29ms/step - loss: 0.0356 - accuracy: 0.9890 Epoch 4/5 1875/1875 [==============================] - 55s 29ms/step - loss: 0.0269 - accuracy: 0.9922 Epoch 5/5 1874/1875 [============================>.] - ETA: 0s - loss: 0.0211 - accuracy: 0.9931 Epoch 5: saving model to ./ckpt/cp-0005.ckpt 1875/1875 [==============================] - 56s 30ms/step - loss: 0.0211 - accuracy: 0.9931 313/313 [==============================] - 3s 8ms/step - loss: 0.0317 - accuracy: 0.9913 accuracy rate: 0.9913,A total of 10,000 pictures were tested
You can see that after the first round of training, the recognition accuracy reaches 0.9538, after 5 rounds, the accuracy reaches 0.9931 using the test set verification.
In the fifth round, the model parameters were saved successfully. / ckpt/cp-0005.ckpt. Next, we can load the saved model parameters, restore the entire convolution neural network, and predict the real picture.
Picture Prediction
class Predict(object): def __init__(self): latest = tf.train.latest_checkpoint('./ckpt') self.cnn = CNN() # Restore Network Weights self.cnn.model.load_weights(latest) def predict(self, image_path): # Read pictures in black and white img = Image.open(image_path).convert('L') img = np.reshape(img, (28, 28, 1)) / 255.0 # pred_img = cv2.resize(img, (28, 28)) img = np.array(img) img = img.reshape(28, 28, 1) x = np.array([1 - img]) y = self.cnn.model.predict(x) # Because x only passes in one picture, take y[0] # Np. The subscript for which argmax() gets the maximum value, which is the number it represents print(image_path) print(y[0]) print(' -> Predict digit', np.argmax(y[0]))
Principal function
if __name__ == "__main__": app = Train() app.train() predict = Predict() list = os.listdir("./test") for i in list: predict.predict("./test/"+i)
Put the prediction file under the test folder to make a numerical prediction.