day 3 yolo 停車場車牌辨識

2024 iThome 鐵人賽

DAY 3

AI/ ML & Data

基於人工智慧與深度學習對斑馬魚做行為分析系列第 3 篇

16th鐵人賽

neilsu02

2024-08-05 22:03:49

1850 瀏覽

分享至

第三天我們可以以平常大家都用得到的停車場智慧辨識來使用yolo，以下是程式碼

import cv2
import numpy as np
import pytesseract

# 載入 YOLO 模型
net = cv2.dnn.readNet("yolov4.weights", "yolov4.cfg")
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# 載入車牌辨識的類別
classes = []
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

# 設置 Tesseract 的路徑（需要安裝 Tesseract-OCR）
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# 辨識圖片
def detect_license_plate(image_path):
    img = cv2.imread(image_path)
    height, width, channels = img.shape

    # 將圖片轉換為 YOLO 輸入格式
    blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    # 顯示辨識結果
    class_ids = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                # 物件偵測
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                # 框座標
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    font = cv2.FONT_HERSHEY_PLAIN
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            color = (0, 255, 0)
            cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
            cv2.putText(img, label, (x, y - 10), font, 1, color, 2)

            # 提取車牌區域並進行 OCR 辨識
            license_plate = img[y:y + h, x:x + w]
            text = pytesseract.image_to_string(license_plate, config='--psm 8')
            print(f"辨識出的車牌號碼: {text}")

    cv2.imshow("Image", img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# 使用範例
detect_license_plate("car_image.jpg")

載入 YOLO 模型

首先，我們用 OpenCV 載入 YOLO 模型的權重和配置文件，還有類別名稱，這些名稱是 YOLO 模型能辨識的物體（像車子、行人等等）。

net = cv2.dnn.readNet("yolov4.weights", "yolov4.cfg")
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

設置 Tesseract

接著，我們設置 Tesseract 的路徑，這是一個用來做光學字符識別的工具，我們用它來讀取車牌上的文字。

pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

車牌辨識函數

這個函數用來辨識圖像中的車牌：

def detect_license_plate(image_path):
    img = cv2.imread(image_path)
    height, width, channels = img.shape

首先，我們讀取圖片並獲取它的尺寸。

圖像預處理

然後，我們把圖片轉換成 YOLO 模型能接受的格式，並進行前向傳播得到檢測結果。

blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)

解析 YOLO 輸出

接下來，我們解析 YOLO 模型的輸出，找出每個偵測到的物體的座標、類別和置信度。如果置信度超過 0.5，我們就把這些框的信息存起來。

class_ids = []
confidences = []
boxes = []
for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)
            w = int(detection[2] * width)
            h = int(detection[3] * height)
            x = int(center_x - w / 2)
            y = int(center_y - h / 2)
            boxes.append([x, y, w, h])
            confidences.append(float(confidence))
            class_ids.append(class_id)

非極大值抑制

我們用非極大值抑制（NMS）來過濾掉多餘的框，只保留最有信心的檢測結果。

indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

繪製結果並進行 OCR

在這裡，我們在圖片上畫出檢測到的框和標籤，提取車牌區域並使用 Tesseract 進行 OCR，最後輸出車牌號碼。

for i in range(len(boxes)):
    if i in indexes:
        x, y, w, h = boxes[i]
        label = str(classes[class_ids[i]])
        color = (0, 255, 0)
        cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
        cv2.putText(img, label, (x, y - 10), font, 1, color, 2)

        license_plate = img[y:y + h, x:x + w]
        text = pytesseract.image_to_string(license_plate, config='--psm 8')
        print(f"辨識出的車牌號碼: {text}")

cv2.imshow("Image", img)
cv2.waitKey(0)
cv2.destroyAllWindows()