【SSD】将WIDER人脸数据格式转换为VOC格式的工具

【2018.05.21 更新】------------

训练的时候发现 loss 正常下降，可是验证阶段的 eval 一直很低很低。我猜测数据集问题，咨询了一下后，确认是这个问题。WIDER 里面有很多模糊不清的超小脸标注导致的。所以转换代码我增加过滤超小脸的功能。

习惯用 caffe 做东西，开源数据集格式不统一。

今儿写了一个将 WIDER 人脸数据格式转换为 VOC 格式的工具

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Wed May 16 14:28:19 2018


@author: hans
"""


import os
import cv2
from lxml.etree import Element, SubElement, tostring, ElementTree




#最小取20大小的脸，并且补齐
minsize2select=20


def saveINFO(img_info,box_info,face_num):
    node_root = Element('annotation')
    
    node_folder = SubElement(node_root, 'folder')
    node_folder.text = img_info[0]
    
    node_filename = SubElement(node_root, 'filename')
    node_filename.text = img_info[1]
    
    node_size = SubElement(node_root, 'size')
    node_width = SubElement(node_size, 'width')
    node_width.text = str(img_info[3])
    
    node_height = SubElement(node_size, 'height')
    node_height.text = str(img_info[2])
    
    node_depth = SubElement(node_size, 'depth')
    node_depth.text = str(img_info[4])
    
    for i in range(face_num):
        node_object = SubElement(node_root, 'object')
        node_name = SubElement(node_object, 'name')
        node_name.text = 'face'
        node_bndbox = SubElement(node_object, 'bndbox')
        node_xmin = SubElement(node_bndbox, 'xmin')
        node_xmin.text = str(box_info[i][0])
        node_ymin = SubElement(node_bndbox, 'ymin')
        node_ymin.text = str(box_info[i][1])
        node_xmax = SubElement(node_bndbox, 'xmax')
        node_xmax.text = str(box_info[i][2])
        node_ymax = SubElement(node_bndbox, 'ymax')
        node_ymax.text = str(box_info[i][3])
#    xml = tostring(node_root, pretty_print=True)  #格式化显示，该换行的换行
    if not os.path.exists(xml_root_path+img_info[0]+'/'):
        os.makedirs(xml_root_path+img_info[0]+'/')
    xml_path = xml_root_path+img_info[0]+'/'+img_info[1]+'.xml'
    doc = ElementTree(node_root)
    doc.write(open(xml_path, "w"), pretty_print=True)
    print xml_path


if __name__=='__main__':
#    mode = 'val'
    mode = 'train'
    txt_name = 'wider_face_'+mode+'_bbx_gt.txt'
    img_root_path = '/data/face/WIDER/WIDER_'+mode+'/images/'        
    txt_root_path = '/data/face/WIDER/wider_face_split/'
    xml_root_path = '/data/face/WIDER/WIDER_'+mode+'/anno_no_small_face/'
    
    f = open(txt_root_path+txt_name,'r')
    line = f.readline().split('\n')[0]
    while line:
        if '--' in line:
            img_info = []
            box_info = []
            face_num = f.readline().split('\n')[0]
            num = 0
            for i in range(int(face_num)):
                box_line = f.readline().split('\n')[0]
                xmin = int(box_line.split(' ')[0])
                ymin = int(box_line.split(' ')[1])
                xmax = xmin+int(box_line.split(' ')[2])
                ymax = ymin+int(box_line.split(' ')[3])
                height = int(box_line.split(' ')[3])
                width = int(box_line.split(' ')[2])
                if height<minsize2select or width<minsize2select:
                    continue
                box_info.append([])
                box_info[num].append(xmin)
                box_info[num].append(ymin)
                box_info[num].append(xmax)
                box_info[num].append(ymax)
                num=num+1
            if num>0:
                img_info.append(line.split('/')[0]) # folder
                img_info.append(line.split('/')[1].split('.')[0]) # name
                img_path = img_root_path+line
                img = cv2.imread(img_path)
                img_info.append(img.shape[0]) # height
                img_info.append(img.shape[1]) # weight
                img_info.append(img.shape[2]) # channel
                saveINFO(img_info,box_info,num)
            
#                cv2.rectangle(img,(xmin,ymin),(xmax,ymax),(0,0,255),2)
#            cv2.namedWindow("test", cv2.WINDOW_NORMAL)
#            cv2.imshow("test",img)
#            cv2.waitKey()
#            print 'asdasdasdasd'
        line = f.readline().split('\n')[0]
    f.close()
    print 'Done!'