本次主題是以colab的環境進行學習的,在本篇文章中,我將講解影像辨識的物件追蹤技術,依照進度每個禮拜都會記錄不同的影像辨識方法,基本順序會從:
資料集準備:
一樣跟前一篇主題相同,本次使用的資料集是Kaggle上面的Road Segmentation Dataset - vehicle dataset,這也是本地端的資料集,由於是使用colab的關係所以要先把資料集上傳到Google雲端上,方便colab抓取資料集內容。
資料集的資料夾格式如下:
如圖:
資料夾內容:
images資料夾裡面裝要辨識的照片:
masks資料夾裡面裝要辨識的照片:
雲端硬碟掛載:
from google.colab import drive
drive.mount('/content/drive')
模型訓練:
將雲端硬碟掛載好之後,我們就可以開始訓練模型了。後面文章會再補充模型的介紹以及模型的堆疊。在訓練好模型之後我們會將模型儲存到雲端硬碟,方便下次直接使用模型。
import os
import datetime
from PIL import Image
from IPython.display import display
import PIL
from PIL import ImageOps
import matplotlib.pyplot as plt
import cv2 as cv
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Activation, ReLU
from tensorflow.keras.layers import BatchNormalization, Conv2DTranspose, Concatenate
from tensorflow.keras.models import Model, Sequential
import tensorflow as tf
import numpy as np
import tarfile
from matplotlib import gridspec
from keras.preprocessing.image import load_img
from IPython.display import clear_output
import matplotlib.pyplot as plt
from tensorflow.keras import layers
import matplotlib.image
input_dir = "/content/drive/MyDrive/Segmentation/images"
target_dir = "/content/drive/MyDrive/Segmentation/masks"
num_num_class = 5
batch_size = 1
img_size = (160, 160)
input_img_paths = sorted(
[
os.path.join(input_dir, fname)
for fname in os.listdir(input_dir)
if fname.endswith(".png")
]
)
target_img_paths = sorted(
[
os.path.join(target_dir, fname)
for fname in os.listdir(target_dir)
if fname.endswith(".png") and not fname.startswith(".")
]
)
print("樣本數:", len(input_img_paths))
for input_path, target_path in zip(input_img_paths[:10], target_img_paths[:10]):
print(input_path, "|", target_path)
def convolution_operation(entered_input, filters=64):
conv1 = Conv2D(filters, kernel_size = (3,3), padding = "same")(entered_input)
batch_norm1 = BatchNormalization()(conv1)
act1 = ReLU()(batch_norm1)
conv2 = Conv2D(filters, kernel_size = (3,3), padding = "same")(act1)
batch_norm2 = BatchNormalization()(conv2)
act2 = ReLU()(batch_norm2)
return act2
class data_read(keras.utils.Sequence):
def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
self.batch_size = batch_size
self.img_size = img_size
self.input_img_paths = input_img_paths
self.target_img_paths = target_img_paths
def __len__(self):
return len(self.target_img_paths) // self.batch_size
def __getitem__(self, idx):
i = idx * self.batch_size
batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
x = np.zeros((batch_size,) + self.img_size + (3,), dtype="float32")
for j, path in enumerate(batch_input_img_paths):
img = load_img(path, target_size=self.img_size)
x[j] = img
y = np.zeros((batch_size,) + self.img_size + (1,), dtype="uint8")
for j, path in enumerate(batch_target_img_paths):
img = load_img(path, target_size=self.img_size, color_mode="grayscale")
img = np.clip(img, 0, 9)
y[j] = np.expand_dims(img, 2)
return x, y
def decoder(entered_input, skip, filters=64):
Upsample = Conv2DTranspose(filters, (2, 2), strides=2, padding="same")(entered_input)
Connect_Skip = Concatenate()([Upsample, skip])
out = convolution_operation(Connect_Skip, filters)
return out
def encoder(entered_input, filters=64):
enc1 = convolution_operation(entered_input, filters)
MaxPool1 = MaxPooling2D(strides = (2,2))(enc1)
return enc1, MaxPool1
def get_model(img_size):
input1 = Input((160,160,3),batch_size=1)
skip1, encoder_1 = encoder(input1, 8)
skip2, encoder_2 = encoder(encoder_1, 8*2)
skip3, encoder_3 = encoder(encoder_2, 8*4)
skip4, encoder_4 = encoder(encoder_3, 8*8)
conv_block = convolution_operation(encoder_4, 8*8)
decoder_1 = decoder(conv_block, skip4, 8*8)
decoder_2 = decoder(decoder_1, skip3, 8*4)
decoder_3 = decoder(decoder_2, skip2, 8*2)
decoder_4 = decoder(decoder_3, skip1, 8)
out = Conv2D(10,1, padding="same", activation="sigmoid")(decoder_4)
model = Model(input1, out)
return model
keras.backend.clear_session()
model = get_model((160,160))
model.summary()
val_samples = 10
train_input_img_paths = input_img_paths
train_target_img_paths = target_img_paths
val_input_img_paths = input_img_paths
val_target_img_paths = target_img_paths
train_gen = data_read(
1, img_size, train_input_img_paths, train_target_img_paths
)
val_gen = data_read(1, img_size, val_input_img_paths, val_target_img_paths)
tf.config.run_functions_eagerly(True)
model.compile(optimizer="nAdam", loss="sparse_categorical_crossentropy",
metrics=['accuracy'])
class DisplayCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs=None):
clear_output(wait=True)
show_predictions()
print ('\nSample Prediction after epoch {}\n'.format(epoch+1))
log_dir = "/content/drive/MyDrive/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
callbacks = [
tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
]
log_dir = "/content/drive/MyDrive/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
print(train_gen)
epochs = 10
model.fit(train_gen,validation_data=val_gen, epochs=epochs)
model.save('/content/drive/MyDrive/fit/model_1.h5')
訓練完成後成果圖:
模型載入及預測:
model = keras.models.load_model("/content/drive/MyDrive/fit/model_1.h5")
#model.fit(train_gen,validation_data=val_gen, epochs=epochs)
model.summary()
val_gen = data_read(batch_size, img_size, val_input_img_paths,val_target_img_paths)
num_classes = 21
batch_size = 8
img_size = (160, 160)
def run_test(imagefile):
image_np = cv.imread("/content/drive/MyDrive/fit/test/"+imagefile)
image_np = cv.resize(image_np[:,:,::-1],
(160,
160))
image_np = np.expand_dims(image_np,axis=0)
val_preds = model.predict(image_np)
mask = np.argmax(val_preds[0], axis=-1)
mask = np.expand_dims(mask, axis=-1)
img = PIL.ImageOps.autocontrast(keras.preprocessing.image.array_to_img(mask))
img = img.resize((540, 540))
img.save("/content/drive/MyDrive/fit/test/mask_"+imagefile)
img2 = cv.imread("/content/drive/MyDrive/fit/test/mask_"+imagefile)
img2 = cv.resize(img2, (1920, 1080),cv.INTER_CUBIC)
img2 = np.around(img2)
print(*np.array(img2))
plt.imshow(img2,cmap="gray")
plt.show()
# cv.imwrite("/content/drive/MyDrive/fit/test/"+imagefile,img)
run_test("images_car.jpg")
print('Done.')
原始圖片:
實際預測結果:
fit的資料夾共享:
https://drive.google.com/drive/folders/1_I7ze_S5Sg3-ACmFOB2nXhudWex0hd1L?usp=sharing
如果不想自己建立資料集的話我這邊提供了一個可以下載我的資料集的連結:
https://drive.google.com/drive/folders/1icz4woL3DdWhR-qQ2p4g5ES_1QpjqCY4?usp=sharing
如果實際預測上遇到甚麼問題或是error的話歡迎丟到留言區討論喔!
文章主題一覽:
粗體字為額外更新的文章。