【2018.05.21 更新】------------
训练的时候发现 loss 正常下降,可是验证阶段的 eval 一直很低很低。我猜测数据集问题,咨询了一下后,确认是这个问题。WIDER 里面有很多模糊不清的超小脸标注导致的。所以转换代码我增加过滤超小脸的功能。
习惯用 caffe 做东西,开源数据集格式不统一。
今儿写了一个将 WIDER 人脸数据格式转换为 VOC 格式的工具
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Wed May 16 14:28:19 2018
@author: hans
"""
import os
import cv2
from lxml.etree import Element, SubElement, tostring, ElementTree
#最小取20大小的脸,并且补齐
minsize2select=20
def saveINFO(img_info,box_info,face_num):
node_root = Element('annotation')
node_folder = SubElement(node_root, 'folder')
node_folder.text = img_info[0]
node_filename = SubElement(node_root, 'filename')
node_filename.text = img_info[1]
node_size = SubElement(node_root, 'size')
node_width = SubElement(node_size, 'width')
node_width.text = str(img_info[3])
node_height = SubElement(node_size, 'height')
node_height.text = str(img_info[2])
node_depth = SubElement(node_size, 'depth')
node_depth.text = str(img_info[4])
for i in range(face_num):
node_object = SubElement(node_root, 'object')
node_name = SubElement(node_object, 'name')
node_name.text = 'face'
node_bndbox = SubElement(node_object, 'bndbox')
node_xmin = SubElement(node_bndbox, 'xmin')
node_xmin.text = str(box_info[i][0])
node_ymin = SubElement(node_bndbox, 'ymin')
node_ymin.text = str(box_info[i][1])
node_xmax = SubElement(node_bndbox, 'xmax')
node_xmax.text = str(box_info[i][2])
node_ymax = SubElement(node_bndbox, 'ymax')
node_ymax.text = str(box_info[i][3])
# xml = tostring(node_root, pretty_print=True) #格式化显示,该换行的换行
if not os.path.exists(xml_root_path+img_info[0]+'/'):
os.makedirs(xml_root_path+img_info[0]+'/')
xml_path = xml_root_path+img_info[0]+'/'+img_info[1]+'.xml'
doc = ElementTree(node_root)
doc.write(open(xml_path, "w"), pretty_print=True)
print xml_path
if __name__=='__main__':
# mode = 'val'
mode = 'train'
txt_name = 'wider_face_'+mode+'_bbx_gt.txt'
img_root_path = '/data/face/WIDER/WIDER_'+mode+'/images/'
txt_root_path = '/data/face/WIDER/wider_face_split/'
xml_root_path = '/data/face/WIDER/WIDER_'+mode+'/anno_no_small_face/'
f = open(txt_root_path+txt_name,'r')
line = f.readline().split('\n')[0]
while line:
if '--' in line:
img_info = []
box_info = []
face_num = f.readline().split('\n')[0]
num = 0
for i in range(int(face_num)):
box_line = f.readline().split('\n')[0]
xmin = int(box_line.split(' ')[0])
ymin = int(box_line.split(' ')[1])
xmax = xmin+int(box_line.split(' ')[2])
ymax = ymin+int(box_line.split(' ')[3])
height = int(box_line.split(' ')[3])
width = int(box_line.split(' ')[2])
if height<minsize2select or width<minsize2select:
continue
box_info.append([])
box_info[num].append(xmin)
box_info[num].append(ymin)
box_info[num].append(xmax)
box_info[num].append(ymax)
num=num+1
if num>0:
img_info.append(line.split('/')[0]) # folder
img_info.append(line.split('/')[1].split('.')[0]) # name
img_path = img_root_path+line
img = cv2.imread(img_path)
img_info.append(img.shape[0]) # height
img_info.append(img.shape[1]) # weight
img_info.append(img.shape[2]) # channel
saveINFO(img_info,box_info,num)
# cv2.rectangle(img,(xmin,ymin),(xmax,ymax),(0,0,255),2)
# cv2.namedWindow("test", cv2.WINDOW_NORMAL)
# cv2.imshow("test",img)
# cv2.waitKey()
# print 'asdasdasdasd'
line = f.readline().split('\n')[0]
f.close()
print 'Done!'