/* The code is adopted from VLFeat with heavily rewrite. * The original code is licenced under 2-clause BSD license, * should be compatible with New BSD Licence used by ccv. * The original Copyright: * * Copyright (C) 2007-12, Andrea Vedaldi and Brian Fulkerson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "ccv.h" #include "ccv_internal.h" const ccv_sift_param_t ccv_sift_default_params = { .noctaves = 3, .nlevels = 6, .up2x = 1, .edge_threshold = 10, .norm_threshold = 0, .peak_threshold = 0, }; inline static double _ccv_keypoint_interpolate(float N9[3][9], int ix, int iy, int is, ccv_keypoint_t* kp) { double Dxx = N9[1][3] - 2 * N9[1][4] + N9[1][5]; double Dyy = N9[1][1] - 2 * N9[1][4] + N9[1][7]; double Dxy = (N9[1][8] - N9[1][6] - N9[1][2] + N9[1][0]) * 0.25; double score = (Dxx + Dyy) * (Dxx + Dyy) / (Dxx * Dyy - Dxy * Dxy); double Dx = (N9[1][5] - N9[1][3]) * 0.5; double Dy = (N9[1][7] - N9[1][1]) * 0.5; double Ds = (N9[2][4] - N9[0][4]) * 0.5; double Dxs = (N9[2][5] + N9[0][3] - N9[2][3] - N9[0][5]) * 0.25; double Dys = (N9[2][7] + N9[0][1] - N9[2][1] - N9[0][7]) * 0.25; double Dss = N9[0][4] - 2 * N9[1][4] + N9[2][4]; double A[3][3] = { { Dxx, Dxy, Dxs }, { Dxy, Dyy, Dys }, { Dxs, Dys, Dss } }; double b[3] = { -Dx, -Dy, -Ds }; /* Gauss elimination */ int i, j, ii, jj; for(j = 0; j < 3; j++) { double maxa = 0; double maxabsa = 0; int maxi = j; double tmp; /* look for the maximally stable pivot */ for (i = j; i < 3; i++) { double a = A[i][j]; double absa = fabs(a); if (absa > maxabsa) { maxa = a; maxabsa = absa; maxi = i; } } /* if singular give up */ if (maxabsa < 1e-10f) { b[0] = b[1] = b[2] = 0; break; } i = maxi; /* swap j-th row with i-th row and normalize j-th row */ for(jj = j; jj < 3; jj++) { tmp = A[i][jj]; A[i][jj] = A[j][jj]; A[j][jj] = tmp; A[j][jj] /= maxa; } tmp = b[j]; b[j] = b[i]; b[i] = tmp; b[j] /= maxa; /* elimination */ for (ii = j + 1; ii < 3; ii++) { double x = A[ii][j]; for (jj = j; jj < 3; jj++) A[ii][jj] -= x * A[j][jj]; b[ii] -= x * b[j]; } } /* backward substitution */ for (i = 2; i > 0; i--) { double x = b[i]; for (ii = i - 1; ii >= 0; ii--) b[ii] -= x * A[ii][i]; } kp->x = ix + ccv_min(ccv_max(b[0], -1), 1); kp->y = iy + ccv_min(ccv_max(b[1], -1), 1); kp->regular.scale = is + b[2]; return score; } static float _ccv_mod_2pi(float x) { while (x > 2 * CCV_PI) x -= 2 * CCV_PI; while (x < 0) x += 2 * CCV_PI; return x; } static int _ccv_floor(float x) { int xi = (int)x; if (x >= 0 || (float)xi == x) return xi; return xi - 1; } #define EXPN_SZ 256 /* fast_expn table size */ #define EXPN_MAX 25.0 /* fast_expn table max */ static double _ccv_expn_tab[EXPN_SZ + 1]; /* fast_expn table */ static int _ccv_expn_init = 0; static inline double _ccv_expn(double x) { double a, b, r; int i; assert(0 <= x && x <= EXPN_MAX); if (x > EXPN_MAX) return 0.0; x *= EXPN_SZ / EXPN_MAX; i = (int)x; r = x - i; a = _ccv_expn_tab[i]; b = _ccv_expn_tab[i + 1]; return a + r * (b - a); } static void _ccv_precomputed_expn() { int i; for(i = 0; i < EXPN_SZ + 1; i++) _ccv_expn_tab[i] = exp(-(double)i * (EXPN_MAX / EXPN_SZ)); _ccv_expn_init = 1; } void ccv_sift(ccv_dense_matrix_t* a, ccv_array_t** _keypoints, ccv_dense_matrix_t** _desc, int type, ccv_sift_param_t params) { assert(CCV_GET_CHANNEL(a->type) == CCV_C1); ccv_dense_matrix_t** g = (ccv_dense_matrix_t**)alloca(sizeof(ccv_dense_matrix_t*) * (params.nlevels + 1) * (params.up2x ? params.noctaves + 1 : params.noctaves)); memset(g, 0, sizeof(ccv_dense_matrix_t*) * (params.nlevels + 1) * (params.up2x ? params.noctaves + 1 : params.noctaves)); ccv_dense_matrix_t** dog = (ccv_dense_matrix_t**)alloca(sizeof(ccv_dense_matrix_t*) * (params.nlevels - 1) * (params.up2x ? params.noctaves + 1 : params.noctaves)); memset(dog, 0, sizeof(ccv_dense_matrix_t*) * (params.nlevels - 1) * (params.up2x ? params.noctaves + 1 : params.noctaves)); ccv_dense_matrix_t** th = (ccv_dense_matrix_t**)alloca(sizeof(ccv_dense_matrix_t*) * (params.nlevels - 3) * (params.up2x ? params.noctaves + 1 : params.noctaves)); memset(th, 0, sizeof(ccv_dense_matrix_t*) * (params.nlevels - 3) * (params.up2x ? params.noctaves + 1 : params.noctaves)); ccv_dense_matrix_t** md = (ccv_dense_matrix_t**)alloca(sizeof(ccv_dense_matrix_t*) * (params.nlevels - 3) * (params.up2x ? params.noctaves + 1 : params.noctaves)); memset(md, 0, sizeof(ccv_dense_matrix_t*) * (params.nlevels - 3) * (params.up2x ? params.noctaves + 1 : params.noctaves)); if (params.up2x) { g += params.nlevels + 1; dog += params.nlevels - 1; th += params.nlevels - 3; md += params.nlevels - 3; } ccv_array_t* keypoints = *_keypoints; int custom_keypoints = 0; if (keypoints == 0) keypoints = *_keypoints = ccv_array_new(sizeof(ccv_keypoint_t), 10, 0); else custom_keypoints = 1; int i, j, k, x, y; double sigma0 = 1.6; double sigmak = pow(2.0, 1.0 / (params.nlevels - 3)); double dsigma0 = sigma0 * sigmak * sqrt(1.0 - 1.0 / (sigmak * sigmak)); if (params.up2x) { ccv_sample_up(a, &g[-(params.nlevels + 1)], 0, 0, 0); /* since there is a gaussian filter in sample_up function already, * the default sigma for upsampled image is sqrt(2) */ double sd = sqrt(sigma0 * sigma0 - 2.0); ccv_blur(g[-(params.nlevels + 1)], &g[-(params.nlevels + 1) + 1], CCV_32F | CCV_C1, sd); ccv_matrix_free(g[-(params.nlevels + 1)]); for (j = 1; j < params.nlevels; j++) { sd = dsigma0 * pow(sigmak, j - 1); ccv_blur(g[-(params.nlevels + 1) + j], &g[-(params.nlevels + 1) + j + 1], 0, sd); ccv_subtract(g[-(params.nlevels + 1) + j + 1], g[-(params.nlevels + 1) + j], (ccv_matrix_t**)&dog[-(params.nlevels - 1) + j - 1], 0); if (j > 1 && j < params.nlevels - 1) ccv_gradient(g[-(params.nlevels + 1) + j], &th[-(params.nlevels - 3) + j - 2], 0, &md[-(params.nlevels - 3) + j - 2], 0, 1, 1); ccv_matrix_free(g[-(params.nlevels + 1) + j]); } ccv_matrix_free(g[-1]); } double sd = sqrt(sigma0 * sigma0 - 0.25); g[0] = a; /* generate gaussian pyramid (g, dog) & gradient pyramid (th, md) */ ccv_blur(g[0], &g[1], CCV_32F | CCV_C1, sd); for (j = 1; j < params.nlevels; j++) { sd = dsigma0 * pow(sigmak, j - 1); ccv_blur(g[j], &g[j + 1], 0, sd); ccv_subtract(g[j + 1], g[j], (ccv_matrix_t**)&dog[j - 1], 0); if (j > 1 && j < params.nlevels - 1) ccv_gradient(g[j], &th[j - 2], 0, &md[j - 2], 0, 1, 1); ccv_matrix_free(g[j]); } ccv_matrix_free(g[params.nlevels]); for (i = 1; i < params.noctaves; i++) { ccv_sample_down(g[(i - 1) * (params.nlevels + 1)], &g[i * (params.nlevels + 1)], 0, 0, 0); if (i - 1 > 0) ccv_matrix_free(g[(i - 1) * (params.nlevels + 1)]); sd = sqrt(sigma0 * sigma0 - 0.25); ccv_blur(g[i * (params.nlevels + 1)], &g[i * (params.nlevels + 1) + 1], CCV_32F | CCV_C1, sd); for (j = 1; j < params.nlevels; j++) { sd = dsigma0 * pow(sigmak, j - 1); ccv_blur(g[i * (params.nlevels + 1) + j], &g[i * (params.nlevels + 1) + j + 1], 0, sd); ccv_subtract(g[i * (params.nlevels + 1) + j + 1], g[i * (params.nlevels + 1) + j], (ccv_matrix_t**)&dog[i * (params.nlevels - 1) + j - 1], 0); if (j > 1 && j < params.nlevels - 1) ccv_gradient(g[i * (params.nlevels + 1) + j], &th[i * (params.nlevels - 3) + j - 2], 0, &md[i * (params.nlevels - 3) + j - 2], 0, 1, 1); ccv_matrix_free(g[i * (params.nlevels + 1) + j]); } ccv_matrix_free(g[i * (params.nlevels + 1) + params.nlevels]); } ccv_matrix_free(g[(params.noctaves - 1) * (params.nlevels + 1)]); if (!custom_keypoints) { /* detect keypoint */ for (i = (params.up2x ? -1 : 0); i < params.noctaves; i++) { double s = pow(2.0, i); int rows = dog[i * (params.nlevels - 1)]->rows; int cols = dog[i * (params.nlevels - 1)]->cols; for (j = 1; j < params.nlevels - 2; j++) { float* bf = dog[i * (params.nlevels - 1) + j - 1]->data.f32 + cols; float* cf = dog[i * (params.nlevels - 1) + j]->data.f32 + cols; float* uf = dog[i * (params.nlevels - 1) + j + 1]->data.f32 + cols; for (y = 1; y < rows - 1; y++) { for (x = 1; x < cols - 1; x++) { float v = cf[x]; #define locality_if(CMP, SGN) \ (v CMP ## = SGN params.peak_threshold && v CMP cf[x - 1] && v CMP cf[x + 1] && \ v CMP cf[x - cols - 1] && v CMP cf[x - cols] && v CMP cf[x - cols + 1] && \ v CMP cf[x + cols - 1] && v CMP cf[x + cols] && v CMP cf[x + cols + 1] && \ v CMP bf[x - 1] && v CMP bf[x] && v CMP bf[x + 1] && \ v CMP bf[x - cols - 1] && v CMP bf[x - cols] && v CMP bf[x - cols + 1] && \ v CMP bf[x + cols - 1] && v CMP bf[x + cols] && v CMP bf[x + cols + 1] && \ v CMP uf[x - 1] && v CMP uf[x] && v CMP uf[x + 1] && \ v CMP uf[x - cols - 1] && v CMP uf[x - cols] && v CMP uf[x - cols + 1] && \ v CMP uf[x + cols - 1] && v CMP uf[x + cols] && v CMP uf[x + cols + 1]) if (locality_if(<, -) || locality_if(>, +)) { ccv_keypoint_t kp; int ix = x, iy = y; double score = -1; int cvg = 0; int offset = ix + (iy - y) * cols; /* iteratively converge to meet subpixel accuracy */ for (k = 0; k < 5; k++) { offset = ix + (iy - y) * cols; float N9[3][9] = { { bf[offset - cols - 1], bf[offset - cols], bf[offset - cols + 1], bf[offset - 1], bf[offset], bf[offset + 1], bf[offset + cols - 1], bf[offset + cols], bf[offset + cols + 1] }, { cf[offset - cols - 1], cf[offset - cols], cf[offset - cols + 1], cf[offset - 1], cf[offset], cf[offset + 1], cf[offset + cols - 1], cf[offset + cols], cf[offset + cols + 1] }, { uf[offset - cols - 1], uf[offset - cols], uf[offset - cols + 1], uf[offset - 1], uf[offset], uf[offset + 1], uf[offset + cols - 1], uf[offset + cols], uf[offset + cols + 1] } }; score = _ccv_keypoint_interpolate(N9, ix, iy, j, &kp); if (kp.x >= 1 && kp.x <= cols - 2 && kp.y >= 1 && kp.y <= rows - 2) { int nx = (int)(kp.x + 0.5); int ny = (int)(kp.y + 0.5); if (ix == nx && iy == ny) break; ix = nx; iy = ny; } else { cvg = -1; break; } } if (cvg == 0 && fabs(cf[offset]) > params.peak_threshold && score >= 0 && score < (params.edge_threshold + 1) * (params.edge_threshold + 1) / params.edge_threshold && kp.regular.scale > 0 && kp.regular.scale < params.nlevels - 1) { kp.x *= s; kp.y *= s; kp.octave = i; kp.level = j; kp.regular.scale = sigma0 * sigmak * pow(2.0, kp.regular.scale / (double)(params.nlevels - 3)); ccv_array_push(keypoints, &kp); } } #undef locality_if } bf += cols; cf += cols; uf += cols; } } } } /* repeatable orientation/angle (p.s. it will push more keypoints (with different angles) to array) */ float const winf = 1.5; double bins[36]; int kpnum = keypoints->rnum; if (!_ccv_expn_init) _ccv_precomputed_expn(); for (i = 0; i < kpnum; i++) { ccv_keypoint_t* kp = (ccv_keypoint_t*)ccv_array_get(keypoints, i); float ds = pow(2.0, kp->octave); float dx = kp->x / ds; float dy = kp->y / ds; int ix = (int)(dx + 0.5); int iy = (int)(dy + 0.5); float const sigmaw = winf * kp->regular.scale; int wz = ccv_max((int)(3.0 * sigmaw + 0.5), 1); ccv_dense_matrix_t* tho = th[kp->octave * (params.nlevels - 3) + kp->level - 1]; ccv_dense_matrix_t* mdo = md[kp->octave * (params.nlevels - 3) + kp->level - 1]; assert(tho->rows == mdo->rows && tho->cols == mdo->cols); if (ix >= 0 && ix < tho->cols && iy >=0 && iy < tho->rows) { float* theta = tho->data.f32 + ccv_max(iy - wz, 0) * tho->cols; float* magnitude = mdo->data.f32 + ccv_max(iy - wz, 0) * mdo->cols; memset(bins, 0, 36 * sizeof(double)); /* oriented histogram with bilinear interpolation */ for (y = ccv_max(iy - wz, 0); y <= ccv_min(iy + wz, tho->rows - 1); y++) { for (x = ccv_max(ix - wz, 0); x <= ccv_min(ix + wz, tho->cols - 1); x++) { float r2 = (x - dx) * (x - dx) + (y - dy) * (y - dy); if (r2 > wz * wz + 0.6) continue; float weight = _ccv_expn(r2 / (2.0 * sigmaw * sigmaw)); float fbin = theta[x] * 0.1; int ibin = _ccv_floor(fbin - 0.5); float rbin = fbin - ibin - 0.5; /* bilinear interpolation */ bins[(ibin + 36) % 36] += (1 - rbin) * magnitude[x] * weight; bins[(ibin + 1) % 36] += rbin * magnitude[x] * weight; } theta += tho->cols; magnitude += mdo->cols; } /* smoothing histogram */ for (j = 0; j < 6; j++) { double first = bins[0]; double prev = bins[35]; for (k = 0; k < 35; k++) { double nb = (prev + bins[k] + bins[k + 1]) / 3.0; prev = bins[k]; bins[k] = nb; } bins[35] = (prev + bins[35] + first) / 3.0; } int maxib = 0; for (j = 1; j < 36; j++) if (bins[j] > bins[maxib]) maxib = j; double maxb = bins[maxib]; double bm = bins[(maxib + 35) % 36]; double bp = bins[(maxib + 1) % 36]; double di = -0.5 * (bp - bm) / (bp + bm - 2 * maxb); kp->regular.angle = 2 * CCV_PI * (maxib + di + 0.5) / 36.0; maxb *= 0.8; for (j = 0; j < 36; j++) if (j != maxib) { bm = bins[(j + 35) % 36]; bp = bins[(j + 1) % 36]; if (bins[j] > maxb && bins[j] > bm && bins[j] > bp) { di = -0.5 * (bp - bm) / (bp + bm - 2 * bins[j]); ccv_keypoint_t nkp = *kp; nkp.regular.angle = 2 * CCV_PI * (j + di + 0.5) / 36.0; ccv_array_push(keypoints, &nkp); } } } } /* calculate descriptor */ if (_desc != 0) { ccv_dense_matrix_t* desc = *_desc = ccv_dense_matrix_new(keypoints->rnum, 128, CCV_32F | CCV_C1, 0, 0); float* fdesc = desc->data.f32; memset(fdesc, 0, sizeof(float) * keypoints->rnum * 128); for (i = 0; i < keypoints->rnum; i++) { ccv_keypoint_t* kp = (ccv_keypoint_t*)ccv_array_get(keypoints, i); float ds = pow(2.0, kp->octave); float dx = kp->x / ds; float dy = kp->y / ds; int ix = (int)(dx + 0.5); int iy = (int)(dy + 0.5); double SBP = 3.0 * kp->regular.scale; int wz = ccv_max((int)(SBP * sqrt(2.0) * 2.5 + 0.5), 1); ccv_dense_matrix_t* tho = th[kp->octave * (params.nlevels - 3) + kp->level - 1]; ccv_dense_matrix_t* mdo = md[kp->octave * (params.nlevels - 3) + kp->level - 1]; assert(tho->rows == mdo->rows && tho->cols == mdo->cols); assert(ix >= 0 && ix < tho->cols && iy >=0 && iy < tho->rows); float* theta = tho->data.f32 + ccv_max(iy - wz, 0) * tho->cols; float* magnitude = mdo->data.f32 + ccv_max(iy - wz, 0) * mdo->cols; float ca = cos(kp->regular.angle); float sa = sin(kp->regular.angle); float sigmaw = 2.0; /* sidenote: NBP = 4, NBO = 8 */ for (y = ccv_max(iy - wz, 0); y <= ccv_min(iy + wz, tho->rows - 1); y++) { for (x = ccv_max(ix - wz, 0); x <= ccv_min(ix + wz, tho->cols - 1); x++) { float nx = (ca * (x - dx) + sa * (y - dy)) / SBP; float ny = (-sa * (x - dx) + ca * (y - dy)) / SBP; float nt = 8.0 * _ccv_mod_2pi(theta[x] * CCV_PI / 180.0 - kp->regular.angle) / (2.0 * CCV_PI); float weight = _ccv_expn((nx * nx + ny * ny) / (2.0 * sigmaw * sigmaw)); int binx = _ccv_floor(nx - 0.5); int biny = _ccv_floor(ny - 0.5); int bint = _ccv_floor(nt); float rbinx = nx - (binx + 0.5); float rbiny = ny - (biny + 0.5); float rbint = nt - bint; int dbinx, dbiny, dbint; /* Distribute the current sample into the 8 adjacent bins*/ for(dbinx = 0; dbinx < 2; dbinx++) for(dbiny = 0; dbiny < 2; dbiny++) for(dbint = 0; dbint < 2; dbint++) if (binx + dbinx >= -2 && binx + dbinx < 2 && biny + dbiny >= -2 && biny + dbiny < 2) fdesc[(2 + biny + dbiny) * 32 + (2 + binx + dbinx) * 8 + (bint + dbint) % 8] += weight * magnitude[x] * fabs(1 - dbinx - rbinx) * fabs(1 - dbiny - rbiny) * fabs(1 - dbint - rbint); } theta += tho->cols; magnitude += mdo->cols; } ccv_dense_matrix_t tm = ccv_dense_matrix(1, 128, CCV_32F | CCV_C1, fdesc, 0); ccv_dense_matrix_t* tmp = &tm; double norm = ccv_normalize(&tm, (ccv_matrix_t**)&tmp, 0, CCV_L2_NORM); int num = (ccv_min(iy + wz, tho->rows - 1) - ccv_max(iy - wz, 0) + 1) * (ccv_min(ix + wz, tho->cols - 1) - ccv_max(ix - wz, 0) + 1); if (params.norm_threshold && norm < params.norm_threshold * num) { for (j = 0; j < 128; j++) fdesc[j] = 0; } else { for (j = 0; j < 128; j++) if (fdesc[j] > 0.2) fdesc[j] = 0.2; ccv_normalize(&tm, (ccv_matrix_t**)&tmp, 0, CCV_L2_NORM); } fdesc += 128; } } for (i = (params.up2x ? -(params.nlevels - 1) : 0); i < (params.nlevels - 1) * params.noctaves; i++) ccv_matrix_free(dog[i]); for (i = (params.up2x ? -(params.nlevels - 3) : 0); i < (params.nlevels - 3) * params.noctaves; i++) { ccv_matrix_free(th[i]); ccv_matrix_free(md[i]); } }