Adaboost 종류별 핵심(OpenCV에서 가져옴..)

// Discrete AdaBoost:
            //   weak_eval[i] (=f(x_i)) is in {-1,1}
            //   err = sum(w_i*(f(x_i) != y_i))/sum(w_i)
            //   C = log((1-err)/err)
            //   w_i *= exp(C*(f(x_i) != y_i))

// Real AdaBoost:
            //   weak_eval[i] = f(x_i) = 0.5*log(p(x_i)/(1-p(x_i))), p(x_i)=P(y=1|x_i)
            //   w_i *= exp(-y_i*f(x_i))

// LogitBoost:
            //   weak_eval[i] = f(x_i) in [-z_max,z_max]
            //   sum_response = F(x_i).
            //   F(x_i) += 0.5*f(x_i)
            //   p(x_i) = exp(F(x_i))/(exp(F(x_i)) + exp(-F(x_i))=1/(1+exp(-2*F(x_i)))
            //   reuse weak_eval: weak_eval[i] <- p(x_i)
            //   w_i = p(x_i)*1(1 - p(x_i))
            //   z_i = ((y_i+1)/2 - p(x_i))/(p(x_i)*(1 - p(x_i)))
            //   store z_i to the data->data_root as the new target responses

// Gentle AdaBoost:
            //   weak_eval[i] = f(x_i) in [-1,1]
            //   w_i *= exp(-y_i*f(x_i))

Boost로 Directory 파일 읽어오기

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>

#include <boost/filesystem.hpp>

#include <vector>
#include <string>
using namespace cv;

int main(int argc, char **argv)
{
namespace fs = boost::filesystem;
fs::path someDir(argv[1]);
fs::directory_iterator end_iter;

typedef std::multimap<std::time_t, fs::path> result_set_t;
result_set_t result_set;

std::vector<std::string> vc;

if ( fs::exists(someDir) && fs::is_directory(someDir))
{
for( fs::directory_iterator dir_iter(someDir) ; dir_iter != end_iter ; ++dir_iter)
{
if (fs::is_regular_file(dir_iter->status()) )
{
//printf("%s\n", dir_iter->path().filename() );
//std::cout << dir_iter->path().filename() << std::endl;
vc.push_back( dir_iter->path().filename().generic_string() );
}
}
}

for(int i=0;i<vc.size();i++)
{
Mat img, luv;
img = imread(cv::format("%s\\%s", argv[1], vc[i].c_str()).c_str());

cvtColor(img, luv, CV_BGR2Luv);

Mat l,u,v;

extractChannel(luv, l, 0);
extractChannel(luv, u, 1);
extractChannel(luv, v, 2);

imshow("l", l);
imshow("u", u);
imshow("v", v);
waitKey(0);
}

return 0;
}

libSVM 윈도우에서 빌드하기

일단 커맨드에서 하면 path나 환경설정 번거롭기 때문에

Visual Studio > Tools > Visual Studio Command Prompt

로 접근해서 커맨드 창 실행시키고

libsvm 폴더에 가서

nmake /f Makefile.win


Opencv Matching to many images type

Feature Detector

Descriptor Detector
* SIFT
* SURF
* ORB
* OpponentColorDescriptorExtractor
* FREAK
* BRISK
* BriefDescriptorExtractor
* BOWImgDescriptorExtractor ( BOW = Bad Of visual Words )

Descriptor Matcher
* FlannBased
* BruteForce(L2)
* BruteForce(SL2)
* BruteForce(L1)
* BruteForce-Hamming

OpenCV 시간 측정 매크로

double __detectionTimes;
#define TIME_START __detectionTimes = (double)cvGetTickCount()
#define TIME_END(str) \
{ double d = (double)cvGetTickCount(); double t = (d-__detectionTimes)/(cvGetTickFrequency()*1000); \
std::cout << str << t << "ms" << std::endl; }

SIMD unsigned char 배열 요소 값, 제곱 합 구하는 코드

int sum_array(const unsigned char* a, int n)
{
const __m128i vk0 = _mm_set1_epi8(0);       // constant vector of all 0s for use with _mm_unpacklo_epi8/_mm_unpackhi_epi8
const __m128i vk1 = _mm_set1_epi16(1);      // constant vector of all 1s for use with _mm_madd_epi16
__m128i vsum = _mm_set1_epi32(0);           // initialise vector of four partial 32 bit sums
int sum;
int i;

for (i = 0; i < n-16; i += 16)
{
__m128i v = _mm_load_si128((__m128i *)&a[i]);      // load vector of 8 bit values
__m128i vl = _mm_unpacklo_epi8(v, vk0); // unpack to two vectors of 16 bit values
__m128i vh = _mm_unpackhi_epi8(v, vk0);
vsum = _mm_add_epi32(vsum, _mm_madd_epi16(vl, vk1));
vsum = _mm_add_epi32(vsum, _mm_madd_epi16(vh, vk1));
// unpack and accumulate 16 bit values to
// 32 bit partial sum vector

}
// horizontal add of four 32 bit partial sums and return result
vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
sum = _mm_cvtsi128_si32(vsum);
for(;i<n;i++)
{
sum+=a[i];
}
return sum;
}

int sum_square_array(const unsigned char* a, int n)
{
const __m128i vk0 = _mm_set1_epi8(0);       // constant vector of all 0s for use with _mm_unpacklo_epi8/_mm_unpackhi_epi8
const __m128i vk1 = _mm_set1_epi16(1);      // constant vector of all 1s for use with _mm_madd_epi16
__m128i vsum = _mm_set1_epi32(0);           // initialise vector of four partial 32 bit sums
int sum;
int i;

for (i = 0; i < n-16; i += 16)
{
__m128i v = _mm_load_si128((__m128i *)&a[i]);      // load vector of 8 bit values
__m128i vl = _mm_unpacklo_epi8(v, vk0); // unpack to two vectors of 16 bit values
__m128i vh = _mm_unpackhi_epi8(v, vk0);

vsum = _mm_add_epi32(vsum, _mm_madd_epi16(vl, vl));
vsum = _mm_add_epi32(vsum, _mm_madd_epi16(vh, vh));

//vsum = _mm_mul_epi32(vsum, vsum);
// unpack and accumulate 16 bit values to
// 32 bit partial sum vector
}
// horizontal add of four 32 bit partial sums and return result
vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
sum = _mm_cvtsi128_si32(vsum);

for(;i<n;i++)
{
sum+=(a[i]*a[i]);
}
return sum;
}

Bagging과 Boosting

Bagging :

 훈련 집합들 가운데 뽑는 방법을

 균일한 확률 분포에 따라

 반복적으로 샘플링해서

 모델을 여러개 빌드하는 방법

Boosting :

 모델을 빌드할때

 이전 모델에서 오분류한 학습데이터를

 다음 모델이 빌드될 때

 선택되어질 가능성을 높여줘서(reweight)
 
 다음 모델에서 오류에 대한 보완을 유도하게끔 하는 방법
 

BinaryTree preorder 스택사용시

void preorderTraversal(Node root)
{
    NodeStack stack = new NodeStack();
    stack.push(root);
 
    while(true)
    {
        Node cur = stack.pop();
        if(cur == null) break;
        cur.printValue();
    
        // 오른쪽 먼저 넣음(stack이니까)
        Node n = cur.getRight();
        if(n != null) stack.push(n);
 
        // 왼쪽
        n = cur.getLeft();
        if(n != null) stack.push(n);
    }
}

SIMD GAUSSIAN BLUR(3x3)

uchar *src = (uchar *)inputGrayImg->imageData;
uchar *dst = (uchar *)outputGrayImg->imageData;
 
uchar *iterSrc;
uchar *iterDst;
 
unsigned int nStartX, nStartY, nEndX, nEndY;
nStartX = nStartY = 1;
nEndX = w-1;
nEndY = h-1;
 
__m128i ZeroData = _mm_setzero_si128();
__m128i ImageHigh;
__m128i ImageLow;
__m128i ResultHigh;
__m128i ResultLow;
 
for(int xIndex = nStartX; xIndex < nEndX; xIndex+=16 )
{
    for(int yIndex = nStartY; yIndex < nEndY; yIndex++ )
    {
        iterSrc = src+xIndex+yIndex*w;
        iterDst = dst+xIndex+yIndex*w;
 
        ResultHigh = _mm_setzero_si128();
        ResultLow = _mm_setzero_si128();
 
        //#1 (x-1, y-1) Load
        ImageLow = _mm_loadu_si128((__m128i *)(iterSrc-1-w));
 
        //unpacking 
        ImageHigh = _mm_unpackhi_epi8(ImageLow, ZeroData);
        ImageLow = _mm_unpacklo_epi8(ImageLow, ZeroData);
 
        // shift operation
        ImageHigh = _mm_srli_epi16(ImageHigh, 4);
        ImageLow = _mm_srli_epi16(ImageLow, 4);
 
        // add operation
        ResultHigh = _mm_adds_epi16(ResultHigh, ImageHigh);
        ResultLow = _mm_add_epi16(ResultLow, ImageLow);
 
        //#2 (x, y-1) Load
        ImageLow = _mm_loadu_si128((__m128i *)(iterSrc-w));
 
        //unpacking 
        ImageHigh = _mm_unpackhi_epi8(ImageLow, ZeroData);
        ImageLow = _mm_unpacklo_epi8(ImageLow, ZeroData);
 
        // shift operation
        ImageHigh = _mm_srli_epi16(ImageHigh, 3);
        ImageLow = _mm_srli_epi16(ImageLow, 3);
 
        // add operation
        ResultHigh = _mm_adds_epi16(ResultHigh, ImageHigh);
        ResultLow = _mm_add_epi16(ResultLow, ImageLow);
 
        //#3 (x+1, y-1) Load
        ImageLow = _mm_loadu_si128((__m128i *)(iterSrc+1-w));
 
        //unpacking 
        ImageHigh = _mm_unpackhi_epi8(ImageLow, ZeroData);
        ImageLow = _mm_unpacklo_epi8(ImageLow, ZeroData);
 
        // shift operation
        ImageHigh = _mm_srli_epi16(ImageHigh, 4);
        ImageLow = _mm_srli_epi16(ImageLow, 4);
 
        // add operation
        ResultHigh = _mm_adds_epi16(ResultHigh, ImageHigh);
        ResultLow = _mm_add_epi16(ResultLow, ImageLow);
 
        //#4 (x-1, y) Load
        ImageLow = _mm_loadu_si128((__m128i *)(iterSrc-1));
 
        //unpacking 
        ImageHigh = _mm_unpackhi_epi8(ImageLow, ZeroData);
        ImageLow = _mm_unpacklo_epi8(ImageLow, ZeroData);
 
        // shift operation
        ImageHigh = _mm_srli_epi16(ImageHigh, 3);
        ImageLow = _mm_srli_epi16(ImageLow, 3);
 
        // add operation
        ResultHigh = _mm_adds_epi16(ResultHigh, ImageHigh);
        ResultLow = _mm_add_epi16(ResultLow, ImageLow);
 
        //#6 (x+1, y) Load
        ImageLow = _mm_loadu_si128((__m128i *)(iterSrc+1));
 
        //unpacking 
        ImageHigh = _mm_unpackhi_epi8(ImageLow, ZeroData);
        ImageLow = _mm_unpacklo_epi8(ImageLow, ZeroData);
 
        // shift operation
        ImageHigh = _mm_srli_epi16(ImageHigh, 3);
        ImageLow = _mm_srli_epi16(ImageLow, 3);
 
        // add operation
        ResultHigh = _mm_adds_epi16(ResultHigh, ImageHigh);
        ResultLow = _mm_add_epi16(ResultLow, ImageLow);
 
        //#7 (x-1, y+1) Load
        ImageLow = _mm_loadu_si128((__m128i *)(iterSrc-1+w));
 
        //unpacking 
        ImageHigh = _mm_unpackhi_epi8(ImageLow, ZeroData);
        ImageLow = _mm_unpacklo_epi8(ImageLow, ZeroData);
 
        // shift operation
        ImageHigh = _mm_srli_epi16(ImageHigh, 4);
        ImageLow = _mm_srli_epi16(ImageLow, 4);
 
        // add operation
        ResultHigh = _mm_adds_epi16(ResultHigh, ImageHigh);
        ResultLow = _mm_add_epi16(ResultLow, ImageLow);
 
        //#8 (x, y+1) Load
        ImageLow = _mm_loadu_si128((__m128i *)(iterSrc+w));
 
        //unpacking 
        ImageHigh = _mm_unpackhi_epi8(ImageLow, ZeroData);
        ImageLow = _mm_unpacklo_epi8(ImageLow, ZeroData);
 
        // shift operation
        ImageHigh = _mm_srli_epi16(ImageHigh, 3);
        ImageLow = _mm_srli_epi16(ImageLow, 3);
 
        // add operation
        ResultHigh = _mm_adds_epi16(ResultHigh, ImageHigh);
        ResultLow = _mm_add_epi16(ResultLow, ImageLow);
 
        //#9 (x+1, y+1) Load
        ImageLow = _mm_loadu_si128((__m128i *)(iterSrc+1+w));
 
        //unpacking 
        ImageHigh = _mm_unpackhi_epi8(ImageLow, ZeroData);
        ImageLow = _mm_unpacklo_epi8(ImageLow, ZeroData);
 
        // shift operation
        ImageHigh = _mm_srli_epi16(ImageHigh, 4);
        ImageLow = _mm_srli_epi16(ImageLow, 4);
 
        // add operation
        ResultHigh = _mm_adds_epi16(ResultHigh, ImageHigh);
        ResultLow = _mm_add_epi16(ResultLow, ImageLow);
 
        //#5 (x, y) Load
        ImageLow = _mm_loadu_si128((__m128i *)(iterSrc));
 
        //unpacking 
        ImageHigh = _mm_unpackhi_epi8(ImageLow, ZeroData);
        ImageLow = _mm_unpacklo_epi8(ImageLow, ZeroData);
 
        // shift operation
        ImageHigh = _mm_srli_epi16(ImageHigh, 2);
        ImageLow = _mm_srli_epi16(ImageLow, 2);
 
        // add operation
        ResultHigh = _mm_adds_epi16(ResultHigh, ImageHigh);
        ResultLow = _mm_add_epi16(ResultLow, ImageLow);
 
        ResultLow = _mm_packus_epi16(ResultLow, ResultHigh);
        _mm_storeu_si128( (__m128i *)iterDst, ResultLow);
    }
}

참고책1) 을 보고 가우시안 블러 작성한 것, 문제라면 나누기 연산을 하면서 값들의 소수점이 약간씩 사라져서 결국 계단 현상 같은게 좀 나타나긴 함. 이걸 보완하려면 float 연산으로 하는게 맞음.. 좀 더 알아봐야지..


p.s Seperable 필터를 SIMD 이용해서 해봤는데 결과의 정확성을 떠나서 일반 SIMD Gaussian 코드와 성능에서 거의 차이가 없음을 확인.. 뭐가 문제일까..


참고 문헌

1) (SSE, AVX를 이용한 고속 프로그래밍) SIMD 병렬 프로그래밍, 정영훈 저, 프리렉 출판, 2012


SIMD max, min 찾기 코드 성능비교

   1: #include <iostream>
   2: #include <ctime>
   3: #include <xmmintrin.h>
   4: #include <opencv/cv.h>
   5:  
   6: using namespace std;
   7:  
   8: double detectionTime;
   9: #define TIME_START detectionTime = (double)cvGetTickCount()
  10: #define TIME_END(str) \
  11:     cout << str << (cvGetTickCount()-detectionTime)/(cvGetTickFrequency()*1000) << "ms" << endl
  12:  
  13:    
  14:int main(int argc, char **argv)  
  15:{
  16:     const int ARRAYSIZE = 640*480;
  17:     __declspec(align(16)) float *m_arr;
  18:     m_arr = (float *)_aligned_malloc(ARRAYSIZE*sizeof(float), 16);
  19:  
  20:     srand(time(0));
  21:  
  22:     for(int i=0;i<ARRAYSIZE;i++)
  23:     {
  24:         m_arr[i] = (rand()%255)/255.0f;
  25:         //printf("[%d]:%f\n", i, m_arr[i]);
  26:     }
  27:  
  28:     TIME_START;
  29:  
  30:     float maxx = 0;
  31:     float minn = 1;
  32:     for(int i=0;i<ARRAYSIZE;i++)
  33:     {
  34:         if(maxx < m_arr[i]) maxx = m_arr[i];
  35:         if(minn > m_arr[i]) minn = m_arr[i];
  36:     }
  37:  
  38:     TIME_END(" SINGLE CPU ");
  39:     
  40:     printf("array max : %f\n", maxx);
  41:     printf("array min : %f\n", minn);
  42:  
  43:     //__m128 
  44:  
  45:     TIME_START;
  46:         
  47:     float simdmin = 0.0f;
  48:     float simdmax = 0.0f;
  49:     __m128 min128 = _mm_set_ps1(FLT_MAX);
  50:     __m128 max128 = _mm_set_ps1(FLT_MIN);
  51:  
  52:     __m128 *pSource = (__m128 *)m_arr;
  53:  
  54:     for(int i=0;i<ARRAYSIZE/4;i++)
  55:     {
  56:         min128 = _mm_min_ps(*pSource, min128);
  57:         max128 = _mm_max_ps(*pSource, max128);
  58:  
  59:         pSource++;
  60:     }
  61:  
  62:     union u
  63:     {
  64:         __m128 m;
  65:         float f[4];
  66:     } xx;
  67:  
  68:     xx.m = min128;
  69:     simdmin = min(xx.f[0], min(xx.f[1], min(xx.f[2], xx.f[3])));
  70:  
  71:     xx.m = max128;
  72:     simdmax = max(xx.f[0], max(xx.f[1], max(xx.f[2], xx.f[3])));
  73:  
  74:     TIME_END(" SIMD ");
  75:  
  76:     printf("simd array max : %f\n", simdmax);
  77:     printf("simd array min : %f\n", simdmin);
  78:  
  79:     _aligned_free(m_arr);
  80:  
  81:     return 0;
  82: }



1 2 3 4 5


Google Analysis