範例
範例
COCO數據集是一個具有大規模目標檢測、影像分割和語意標註的數據集。
COCO數據集有5種標註類型,以json檔儲存,如下。
2.1 Object Instances(物件偵測)
2.2 Keypoint Detection(關鍵點檢測)
2.3 Image Captioning(圖像描述)
2.4 Stuff Segmentation(語意分割)
2.5 Panoptic Segmentation(全景分割)
YOLO模型主要任務為Object Detection,故,我們採用Object Instance創建COCO(json檔)。
Object Instance標註格式
4.1 以JSON排版工具呈現
4.2 info:對於資料集的描述。如:中英數OCR資料集、創立年份、提供者名稱、資料集版本。
4.3 images:對於圖片的描述。如:圖片編號、檔案名稱、圖片尺寸。
4.4 annotations:圖片標籤資訊。如:bbox中的0~3,代表bounding box的x座標、y座標、寬度與高度。
4.5 categories:紀錄物件的標籤類別。
流程與Python函式
1.1 xml轉換為annos.txt
def get(root, name):
return root.findall(name)
def get_and_check(root, name, length):
vars = root.findall(name)
if len(vars) == 0:
raise NotImplementedError('Can not find {} in {}.'
.format(name, root.tag))
if length > 0 and len(vars) != length:
raise NotImplementedError('The size of {} is supposed to be {},
but is {}.'.format(name, length,
len(vars)))
if length == 1:
vars = vars[0]
return vars
def transfer_xml_to_annos(xmlPath, saveDir):
n = 1
for xml in xmlPath:
tree = ET.parse(xml)
root = tree.getroot()
# 圖片名稱
filename = get_and_check(root, 'filename', 1).text
# 處理每個標註的檢測框
with open(saveDir, "a") as bbox:
for obj in get(root, 'object'):
category = get_and_check(obj, 'name', 1).text
label_index = str(classes.index(category) + 1)
bndbox = get_and_check(obj, 'bndbox', 1)
xmin = int(get_and_check(bndbox, 'xmin', 1).text) - 1
ymin = int(get_and_check(bndbox, 'ymin', 1).text) - 1
xmax = int(get_and_check(bndbox, 'xmax', 1).text)
ymax = int(get_and_check(bndbox, 'ymax', 1).text)
bbox.write(filename + ' {} {} {} {} {}\n'
.format(label_index, xmin, ymin, xmax, ymax))
print('※ 第{:3d}個xml檔案完成'.format(n))
print('※ 剩{:3d}個需轉換'.format(len(xmlPath)-n))
print("-" * 35)
n += 1
1.2 將圖片依照比例分配train與val資料集
def train_val_split(source, ratio):
# 讀取images資料夾內圖片檔名
indexes = os.listdir(os.path.join(source, 'images'))
# 檔案順序隨機
random.shuffle(indexes)
# 創建訓練或驗證集(依照比例分配)
pic_num = len(indexes)
train_num = int(pic_num * ratio)
train_list = indexes[:train_num]
val_list = indexes[train_num:]
return train_list, val_list
1.3 將標籤轉換成coco格式,並以json格式存檔。資料夾包含以下檔案。
def transfer_and_save_coco(source, split_list, dataset, phase):
# 紀錄處理的圖片數量
count = 0
# 讀取bbox信息
with open(os.path.join(source, 'annos.txt')) as tr:
annos = tr.readlines()
# 轉換為coco格式
for k, index in enumerate(split_list):
count += 1
# opencv讀取圖片,得到圖片寬、高
im = cv2.imread(os.path.join(source, 'images/') + index)
height, width, _ = im.shape
# 將圖片檔名、index、寬高信息存入dataset
dataset['images'].append({'file_name': index,
'id': k,
'width': width,
'height': height})
for i, anno in enumerate(annos):
parts = anno.strip().split()
# 如果圖片檔名與標籤名稱相同,則添加標籤
if parts[0] == index:
# 類別
cls_id = parts[1]
# x_min
x1 = float(parts[2])
# y_min
y1 = float(parts[3])
# x_max
x2 = float(parts[4])
# y_max
y2 = float(parts[5])
width = max(0, x2 - x1)
height = max(0, y2 - y1)
dataset['annotations'].append({
'area': width * height,
'bbox': [x1, y1, width, height],
'category_id': int(cls_id),
'id': i,
'image_id': k,
'iscrowd': 0,
# 影像分割時使用,矩形是從左上角順時針畫4點(mask)
# 影像分割時'ignore':0與
# 'segmentation':[[x1,y1,x2,y1,x2,y2,x1,y2]]
'segmentation': []
})
print(' {} images handled'.format(count))
# 儲存json檔
folder = os.path.join(source, 'annotations')
if not os.path.exists(folder):
os.makedirs(folder)
json_name = os.path.join(source, 'annotations/{}.json'.format(phase))
with open(json_name, 'w') as f:
json.dump(dataset, f)
# 生成train與val之coco格式json檔
def txt_to_coco_json(source, classes, split_list, phase):
# dataset存放圖片信息和標籤(instances目標檢測、segementation影像分割)
dataset = {'info': {'description': '', 'url': '', 'version': '1.0',
'year': 2022, 'contributor': 'James',
'date_created': ''},
'categories': [],
'annotations': [],
'images': [],
'type': 'instances'}
# 建立標籤與id的對應關係
for i, cls in enumerate(classes, 1):
dataset['categories'].append({'id': i, 'name': cls,
'supercategory': 'mark'})
# train, val資料轉換成coco格式,以json儲存
print('※ 開始轉換{}'.format(phase))
transfer_and_save_coco(source, split_list, dataset, phase)
print('※ {}.json Done'.format(phase))
1.4 移動圖片到train與val資料夾
def split_images_to_train_and_val(source, train_list, val_list):
# 創建圖片train與val資料夾
folder1 = os.path.join(source, 'train2017')
if not os.path.exists(folder1):
os.makedirs(folder1)
folder2 = os.path.join(source, 'val2017')
if not os.path.exists(folder2):
os.makedirs(folder2)
# 移動圖片到資料夾
for move_it in train_list:
shutil.move(source + '/images/' + move_it,
os.path.join(source, 'train2017', ''))
for move_it in val_list:
shutil.move(source + '/images/' + move_it,
os.path.join(source, 'val2017', ''))
print('移動圖片到train與val資料夾 Done')
完整程式碼
import shutil
import random
import json
import cv2
import os
import xml.etree.ElementTree as ET
# ----------------------------------Step1----------------------------------
def get(root, name):
return root.findall(name)
def get_and_check(root, name, length):
vars = root.findall(name)
if len(vars) == 0:
raise NotImplementedError('Can not find {} in {}.'
.format(name, root.tag))
if length > 0 and len(vars) != length:
raise NotImplementedError('The size of {} is supposed to be {},
but is {}.'.format(name, length, len(vars)))
if length == 1:
vars = vars[0]
return vars
def transfer_xml_to_annos(xmlPath, saveDir):
n = 1
for xml in xmlPath:
tree = ET.parse(xml)
root = tree.getroot()
# 圖片名稱
filename = get_and_check(root, 'filename', 1).text
# 處理每個標註的檢測框
with open(saveDir, "a") as bbox:
for obj in get(root, 'object'):
category = get_and_check(obj, 'name', 1).text
label_index = str(classes.index(category) + 1)
bndbox = get_and_check(obj, 'bndbox', 1)
xmin = int(get_and_check(bndbox, 'xmin', 1).text) - 1
ymin = int(get_and_check(bndbox, 'ymin', 1).text) - 1
xmax = int(get_and_check(bndbox, 'xmax', 1).text)
ymax = int(get_and_check(bndbox, 'ymax', 1).text)
bbox.write(filename + ' {} {} {} {} {}\n'
.format(label_index, xmin, ymin, xmax, ymax))
print('※ 第{:3d}個xml檔案完成'.format(n))
print('※ 剩{:3d}個需轉換'.format(len(xmlPath)-n))
print("-" * 35)
n += 1
# ----------------------------------Step2----------------------------------
# 將圖片依照比例分配train與val
def train_val_split(source, ratio):
# 讀取images資料夾內圖片檔名
indexes = os.listdir(os.path.join(source, 'images'))
# 檔案順序隨機
random.shuffle(indexes)
# 創建訓練或驗證集(待優化,自動比例split)
pic_num = len(indexes)
train_num = int(pic_num * ratio)
train_list = indexes[:train_num]
val_list = indexes[train_num:]
return train_list, val_list
# 轉換coco格式dataset
def transfer_and_save_coco(source, split_list, dataset, phase):
# 紀錄處理的圖片數量
count = 0
# 讀取Bbox信息
with open(os.path.join(source, 'annos.txt')) as tr:
annos = tr.readlines()
# 轉換為coco格式
for k, index in enumerate(split_list):
count += 1
# opencv讀取圖片,得到圖片寬、高
im = cv2.imread(os.path.join(source, 'images/') + index)
height, width, _ = im.shape
# 將圖片檔名、index、寬高信息存入dataset
dataset['images'].append({'file_name': index,
'id': k,
'width': width,
'height': height})
for i, anno in enumerate(annos):
parts = anno.strip().split()
# 如果圖片檔名與標籤名稱相同,則添加標籤
if parts[0] == index:
# 類別
cls_id = parts[1]
# x_min
x1 = float(parts[2])
# y_min
y1 = float(parts[3])
# x_max
x2 = float(parts[4])
# y_max
y2 = float(parts[5])
width = max(0, x2 - x1)
height = max(0, y2 - y1)
dataset['annotations'].append({
'area': width * height,
'bbox': [x1, y1, width, height],
'category_id': int(cls_id),
'id': i,
'image_id': k,
'iscrowd': 0,
# 影像分割時使用,矩形是從左上角順時針畫4點(mask)
# 影像分割時'ignore': 0 與
# 'segmentation': [[x1, y1, x2, y1, x2, y2, x1, y2]]
'segmentation': []
})
print(' {} images handled'.format(count))
# 儲存json檔
folder = os.path.join(source, 'annotations')
if not os.path.exists(folder):
os.makedirs(folder)
json_name = os.path.join(source, 'annotations/{}.json'.format(phase))
with open(json_name, 'w') as f:
json.dump(dataset, f)
# 生成train與val之coco格式json檔
def txt_to_coco_json(source, classes, split_list, phase):
# dataset存放圖片信息和標籤(instances目標檢測、segementation影像分割)
dataset = {'info': {'description': '', 'url': '', 'version': '1.0',
'year': 2022, 'contributor': 'James',
'date_created': ''},
'categories': [],
'annotations': [],
'images': [],
'type': 'instances'}
# 建立標籤與id的對應關係
for i, cls in enumerate(classes, 1):
dataset['categories'].append({'id': i, 'name': cls,
'supercategory': 'mark'})
# train, val資料轉換成coco格式,以json儲存
print('※ 開始轉換{}'.format(phase))
transfer_and_save_coco(source, split_list, dataset, phase)
print('※ {}.json Done'.format(phase))
# 移動圖片到train與val資料夾
def split_images_to_train_and_val(source, train_list, val_list):
# 創建圖片train與val資料夾
folder1 = os.path.join(source, 'train2017')
if not os.path.exists(folder1):
os.makedirs(folder1)
folder2 = os.path.join(source, 'val2017')
if not os.path.exists(folder2):
os.makedirs(folder2)
# 移動圖片到資料夾
for move_it in train_list:
shutil.move(source + '/images/' + move_it,
os.path.join(source, 'train2017', ''))
for move_it in val_list:
shutil.move(source + '/images/' + move_it,
os.path.join(source, 'val2017', ''))
print('移動圖片到train與val資料夾 Done')
if __name__ == '__main__':
source = './dataests3'
# 讀取標籤類別
with open(os.path.join(source, 'classes.txt')) as f:
classes = f.read().strip().split()
'''【Step1】xml轉換為annos.txt:其中每行為imageName、classId、xMin、
yMim、xMax、yMax,一個bbox對應一行(coco格式的id編號從1起算)'''
print('【Step1】xml轉annos.txt')
# annos.txt存檔路徑
saveDir = os.path.join(source, 'annos.txt')
# image資料夾路徑
imageDir = os.path.join(source, 'images/')
# image檔案路徑
imagePath = os.listdir(imageDir)
imagePath = [imageDir + i for i in imagePath]
# xml資料夾路徑
xmlDir = os.path.join(source, 'xmls/')
# xml檔案路徑
xmlPath = os.listdir(xmlDir)
xmlPath = [xmlDir + i for i in xmlPath]
# 將xml轉換為annos
transfer_xml_to_annos(xmlPath, saveDir)
print('=' * 60)
'''【Step2】將標籤轉換成coco格式,並以json格式存檔。資料夾包含
images(圖片資料夾)、annos.txt(bbox標記)、
classes.txt(類別清單)及annotations(儲存json的資料夾)。'''
print('【Step2】annos.txt轉coco,並以json格式儲存')
# 將圖片依照比例分配train與val
train_list, val_list = train_val_split(source, 0.9)
# # 生成train與val之coco格式json檔
txt_to_coco_json(source, classes, train_list, 'instances_train2017')
print('-' * 35)
txt_to_coco_json(source, classes, val_list, 'instances_val2017')
print('-' * 35)
# 移動圖片到train與val資料夾
split_images_to_train_and_val(source, train_list, val_list)
print('程式執行結束')
執行程式
3.1 執行前
資料夾結構
images
xmls
classes
3.2 執行後
執行結果
資料夾結構
annotations
train2017
val2017
讓我們繼續看下去...