本次主題是以colab的環境進行學習的,在本篇文章中,我將講解影像辨識的基礎技能在接下來的文章中這些技能將多次出現,先讀過這些語法再繼續去看後面的文章會比較能快速上手喔。依照進度每個禮拜都會記錄不同的影像辨識方法,基本順序會從:
文章順序有更改,主要原因是因為模型訓練部分內容有點多,所以我先講解實際應用,如果最後還有時間我會再將模型訓練補上。
一開始先去下載已訓練好的模型,模型載好之後,就可以開始預測了,這是我自己練的能預測汽車、摩托車跟腳踏車還有人。
在原來的程式碼中我們只有把資料裡的物件框出來,在本篇將會延伸應用,將預測出來的物件框標記csv的表格裡。
模型載入及預測:
import sys
import datetime
from PIL import Image
from IPython.display import display
import PIL
from PIL import ImageOps
import matplotlib.pyplot as plt
import cv2 as cv
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Activation, ReLU
from tensorflow.keras.layers import BatchNormalization, Conv2DTranspose, Concatenate
from tensorflow.keras.models import Model, Sequential
import tensorflow as tf
import numpy as np
import os
import tarfile
from skimage import io
import shutil
import zipfile
import pathlib
import glob
import pandas as pd
from matplotlib import gridspec
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import urllib
from IPython.display import clear_output
import matplotlib.pyplot as plt
import matplotlib.image
image_url = input('enter URL')
if(image_url.find('http') == -1):
image_url = 'https://robbreport.com/wp-content/uploads/2024/04/RR_50_Most_Expensive_Cars_Update_Lead.jpg?w=1000'
filenames = image_url.split("/")
url_response = urllib.request.urlopen(image_url)
img_array = np.array(bytearray(url_response.read()), dtype=np.uint8)
img = cv.imdecode(img_array, cv.IMREAD_UNCHANGED)
num_classes = 6
batch_size = 10
img_size = (160, 160)
image_list = "/content/drive/MyDrive/submission_end/Public_Testing_Dataset_Only_for_detection"
save_dir = "."
filename = []
filesc = []
fileclass = []
filex = []
filey = []
filew = []
fileh = []
img_height = 180
img_width = 180
batch_size = 32
j = 0
submit2=pd.DataFrame()
y = 0
DETECTION_THRESHOLD = 0.3
import time
import numpy as np
model_path = 'https://github.com/max106051231/nspo/raw/cbf6cb739cc241af8038b9c5d664f20004569c7a/model.tflite'
model_path = urllib.request.urlretrieve(model_path,"model.tflite")
classes = ['???'] * 1000
label_map = 'https://raw.githubusercontent.com/max106051231/nspo/cbf6cb739cc241af8038b9c5d664f20004569c7a/labelmap.txt'
label_map = urllib.request.urlretrieve(label_map,"labelmap.txt")
label_map = open("labelmap.txt",'r')
label_line = label_map.readlines()
t = 0
for label_name in label_line:
classes[t] = label_name
t = t+1
COLORS = np.random.randint(0, 255, size=(len(classes), 3), dtype=np.uint8)
def preprocess_image(image_path, input_size):
img = tf.io.read_file(image_path)
img = tf.io.decode_image(img, channels=3)
img = tf.image.convert_image_dtype(img, np.float32)
original_image = img
resized_img = tf.image.resize(img, input_size)
resized_img = resized_img[tf.newaxis, :]
resized_img = tf.cast(resized_img, dtype=np.float32)
return resized_img, original_image
def detect_objects(interpreter, image, threshold):
signature_fn = interpreter.get_signature_runner()
output = signature_fn(input=image)
count = int(np.squeeze(output['output_0']))
scores = np.squeeze(output['output_1'])
classes = np.squeeze(output['output_2'])
boxes = np.squeeze(output['output_3'])
results = []
for i in range(count):
if scores[i] >= threshold:
result = {
'bounding_box': boxes[i],
'class_id': classes[i],
'score': scores[i]
}
results.append(result)
return results
def run_odt_and_draw_results(image_path, interpreter, threshold=0.5):
preprocessed_image, original_image = preprocess_image(
image_path,
(input_height, input_width)
)
results = detect_objects(interpreter, preprocessed_image, threshold=threshold)
original_image_np = image_nn
for obj in results:
ymin, xmin, ymax, xmax = obj['bounding_box']
xmin = int(xmin * original_image_np.shape[1])
xmax = int(xmax * original_image_np.shape[1])
ymin = int(ymin * original_image_np.shape[0])
ymax = int(ymax * original_image_np.shape[0])
filex.append(int(xmin))
filey.append(int(ymin))
fileh.append(int(ymax-ymin))
filew.append(int(xmax-xmin))
filename.append(line)
class_id = int(obj['class_id'])
#print(class_id)
if(class_id == 0):
fileclass.append(2)
elif(class_id == 1):
fileclass.append(4)
elif(class_id == 3):
fileclass.append(3)
elif(class_id == 7):
fileclass.append(1)
elif(class_id == 5):
fileclass.append(1)
elif(class_id == 2):
fileclass.append(1)
else:
fileclass.append('x')
filesc.append(obj['score'])
color = [int(c) for c in COLORS[class_id]]
cv.rectangle(original_image_np, (xmin, ymin), (xmax, ymax), color, 2)
# Make adjustments to make the label visible for all objects
y = ymin - 15 if ymin - 15 > 15 else ymin + 15
label = "{}: {:.0f}%".format(classes[class_id], obj['score'] * 100)
cv.putText(original_image_np, label, (xmin, y),
cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
original_uint8 = original_image_np.astype(np.uint8)
return original_uint8
def preprocess_image(image_path, input_size):
img = tf.io.read_file(image_path)
img = tf.io.decode_image(img, channels=3)
img = tf.image.convert_image_dtype(img, np.float32)
original_image = img
resized_img = tf.image.resize(img, input_size)
resized_img = resized_img[tf.newaxis, :]
resized_img = tf.cast(resized_img, dtype=np.float32)
return resized_img, original_image
i=0
for line in filenames[-1]:
#try:
i = i+1
image_np = img
images = line.split(".")[0]
image_n = cv.resize(image_np[:,:,::-1],(160,160))
image_nn = cv.resize(image_np[:,:,::-1],(1920,1080))
plt.imshow(image_nn)
cv.imwrite(filenames[-1].split("?")[0],image_np)
plt.title("Before identification is complete")
plt.show()
image_n = np.expand_dims(image_n,axis=0)
new_img = cv.resize(image_np, (512, 512),cv.INTER_CUBIC)
new_img_1 = new_img
DETECTION_THRESHOLD = 0.3
interpreter = tf.lite.Interpreter(model_path="model.tflite")
interpreter.allocate_tensors()
signatures = interpreter.get_signature_list()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
detection_result_image = run_odt_and_draw_results(
filenames[-1].split("?")[0],
interpreter,
threshold=DETECTION_THRESHOLD
)
cv.imwrite(save_dir+"/object_detections/"+images[0]+'.png',detection_result_image)
detection_result_image = cv.resize(detection_result_image, (1920, 1080),cv.INTER_CUBIC)
plt.imshow(detection_result_image)
plt.title("After identification is complete")
plt.show()
if(i >= 1):
break
y = y+1
submit2.insert(0,column="confidence",value=filesc)
submit2.insert(0,column="h",value=fileh)
submit2.insert(0,column="w",value=filew)
submit2.insert(0,column="y",value=filey)
submit2.insert(0,column="x",value=filex)
submit2.insert(0,column="label_id",value=fileclass)
submit2.insert(0,column="image_filename",value=filename)
submit2.to_csv(save_dir+"/object_detections/submission.csv",index=False)
原始圖片:
實際預測結果:
預測結果感覺不佳主要原因是因為訓練次數不夠,可以自行增加訓練次數,效果應該會更好。
如果實際預測上遇到甚麼問題或是error的話歡迎丟到留言區討論喔!
文章主題一覽:
粗體字為額外更新的文章。