hans

hans

【Python】【Caffe】圖像特徵與特徵庫匹配的物體識別方法


前言#

Github 代碼地址: https://github.com/HansRen1024/Object-Classification-of-Mapping-Features

後來做了一些改動, 最新代碼看 Github 吧,我就不在這裡更新代碼了。

Save_Feature_cam.py 透過攝像頭拍攝,按空格鍵保存特徵入庫。

Contrast_Feature_cam.py 透過攝像頭拍攝,按空格鍵比對特徵,輸出結果。

Save_image.py 保存大量圖片用作特徵庫資源。每個類別保存三張圖片。

Save_Feature_image.py 從上面收集的圖片中保存特徵入庫。

Test_All.py 透過 test.txt
文件中圖片路徑和類別索引進行測試。test.txt 格式和訓練 caffe 模型中生成 LMDB 文件的 txt 文件內容格式一致。[圖片路徑 + 空格 + 類別索引]

這個方法其實和人臉檢測很像,只不過是用在了大眾物品檢測上面。優點是當你所要檢測的物體數據集特別少或者不方便收集數據集的時候,效果很突出。同時也可以用作在線學習。具體原理也沒什麼難的,就是透過卷積神經網絡提取特徵,將某一層特徵入庫。重新再拍一張照片和所有庫中特徵求歐拉距離。不過選什麼卷積網絡,選哪一層特徵要看情況而論。後來又完成了 C++ 分別調用 caffe 和 ncnn 實現這個功能的代碼。就不放出來了,感興趣的話自己研究研究也不難。

下面代碼關於調用 caffe 模塊的內容不懂的話,可以看看我之前寫的博文。包括怎樣轉換均值到.npy 文件的代碼都有。

一、保存特徵代碼:#

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 28 13:09:05 2017

@author: hans

http://blog.csdn.net/renhanchi
"""

import caffe
import cv2
import os 
import skimage
import numpy as np
import matplotlib.pyplot as plt
        
prototxt='doc/deploy_squeezenet.prototxt'
caffe_model='doc/squeezenet.caffemodel'
mean_file='doc/mean_squeezenet.npy'
caffe.set_mode_gpu()
net = caffe.Net(prototxt,caffe_model,caffe.TEST)
for name,feature in net.blobs.items(): #查看各層特徵規模
    print name + '\t' + str(feature.data.shape)

def show(data, padsize=1, padval=0):
    data -= data.min()
    data /= data.max()
    
    n = int(np.ceil(np.sqrt(data.shape[0])))
    padding = ((0, n ** 2 - data.shape[0]), (0, padsize), (0, padsize)) + ((0, 0),) * (data.ndim - 3)
    data = np.pad(data, padding, mode='constant', constant_values=(padval, padval))
    
    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))
    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
    plt.imshow(data)
    plt.axis('off')

def saveFeat(image):
    global prob
    im = caffe.io.resize_image(image,(227,227,3))
    transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) #data blob 結構(n, k, h, w)
    transformer.set_transpose('data', (2, 0, 1)) #改變圖片維度順序,(h, w, k) -> (k, h, w)
    transformer.set_mean('data', np.load(mean_file).mean(1).mean(1))
    transformer.set_raw_scale('data', 255)

    net.blobs['data'].data[...] = transformer.preprocess('data', im)
    net.forward()

#    labels_filename='card/words_card.txt'
#    labels = np.loadtxt(labels_filename, str, delimiter='\t')
#    prob = net.blobs['prob'].data[0].flatten()
#    order = prob.argsort()[-1]
#    print 'class:', labels[order], 'accuracy: ', prob[order]

    conv1_data = net.blobs['conv10'].data[0] #提取特徵
    conv1_data.tofile(claPath+'feat.bin')
    show(conv1_data)

c = cv2.VideoCapture(0)
while 1:
    ret, image = c.read()
    cv2.rectangle(image,(117,37),(522,442),(0,255,0),2)
    cv2.imshow("aaa", image)
    key = cv2.waitKey(10)
    if key == ord(' '):
        cla = str(raw_input("Please enter class name: "))
        claPath = os.path.join(r'features/%s/' %cla)
        if not os.path.exists(claPath):
            os.makedirs(claPath)
        else:
            print "This class has been saved before"
            os._exit(1)
        img = image[40:440, 120:520]
        img = skimage.img_as_float(image[40:440, 120:520]).astype(np.float32)
        saveFeat(img)
    elif key == 27:
        cv2.destroyAllWindows()
        break

二、對比特徵代碼:#

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 28 15:39:58 2017

@author: hans

http://blog.csdn.net/renhanchi
"""

import caffe
import cv2
import os 
import skimage
import numpy as np
from math import sqrt

dirpath = 'features/'
prototxt='doc/deploy_squeezenet.prototxt'
caffe_model='doc/squeezenet.caffemodel'
mean_file='doc/mean_squeezenet.npy'
caffe.set_mode_gpu()
net = caffe.Net(prototxt,caffe_model,caffe.TEST)

def contrastFeat(image):
    global similarity
    similarity = []
    cla = []
    im = caffe.io.resize_image(image,(227,227,3))
    transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) #data blob 結構(n, k, h, w)
    transformer.set_transpose('data', (2, 0, 1)) #改變圖片維度順序,(h, w, k) -> (k, h, w)
    transformer.set_mean('data', np.load(mean_file).mean(1).mean(1))
    transformer.set_raw_scale('data', 255)
    net.blobs['data'].data[...] = transformer.preprocess('data', im)
    net.forward()
    conv1_data = net.blobs['conv10'].data[0] #提取特徵
    
    for dirname in os.listdir(dirpath):
        if os.path.isdir(r'%s%s/' %(dirpath, dirname)):
            claPath = os.path.join(r'%s%s/' %(dirpath, dirname))
            feat = np.fromfile(claPath+'feat.bin', dtype = np.float32)
            feat = feat.reshape(conv1_data.shape[0],conv1_data.shape[1],conv1_data.shape[2])
            dis = 0
            for n in range(feat.shape[0]):
                for h in range(feat.shape[1]):
                    for w in range(feat.shape[2]):
                        dis += pow(conv1_data[n,h,w]-feat[n,h,w],2)
            L2 = sqrt(dis)
            similarity.append(1/(1+L2))
            cla.append(dirname)
    similarity = np.array(similarity)
    print similarity
    order = similarity.argsort()[-1]
    print 'clss:', cla[order], 'prob:', similarity[order]

c = cv2.VideoCapture(0)
while 1:
    ret, image = c.read()
    cv2.rectangle(image,(117,37),(522,442),(0,255,0),2)
    cv2.imshow("aaa", image)
    key = cv2.waitKey(10)
    if key == ord(' '):
        img = image[40:440, 120:520]
        img = skimage.img_as_float(image[40:440, 120:520]).astype(np.float32)
        contrastFeat(img)
    elif key == 27:
        cv2.destroyAllWindows()
        break
載入中......
此文章數據所有權由區塊鏈加密技術和智能合約保障僅歸創作者所有。