This source file includes following definitions.
- calcPixelCostBT
- computeDisparitySGBM
- compute
- getMinDisparity
- setMinDisparity
- getNumDisparities
- setNumDisparities
- getBlockSize
- setBlockSize
- getSpeckleWindowSize
- setSpeckleWindowSize
- getSpeckleRange
- setSpeckleRange
- getDisp12MaxDiff
- setDisp12MaxDiff
- getPreFilterCap
- setPreFilterCap
- getUniquenessRatio
- setUniquenessRatio
- getP1
- setP1
- getP2
- setP2
- getMode
- setMode
- write
- read
- create
- getValidDisparityROI
- filterSpecklesImpl
- filterSpeckles
- validateDisparity
#include "precomp.hpp"
#include <limits.h>
namespace cv
{
typedef uchar PixType;
typedef short CostType;
typedef short DispType;
enum { NR = 16, NR2 = NR/2 };
struct StereoSGBMParams
{
StereoSGBMParams()
{
minDisparity = numDisparities = 0;
SADWindowSize = 0;
P1 = P2 = 0;
disp12MaxDiff = 0;
preFilterCap = 0;
uniquenessRatio = 0;
speckleWindowSize = 0;
speckleRange = 0;
mode = StereoSGBM::MODE_SGBM;
}
StereoSGBMParams( int _minDisparity, int _numDisparities, int _SADWindowSize,
int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
int _mode )
{
minDisparity = _minDisparity;
numDisparities = _numDisparities;
SADWindowSize = _SADWindowSize;
P1 = _P1;
P2 = _P2;
disp12MaxDiff = _disp12MaxDiff;
preFilterCap = _preFilterCap;
uniquenessRatio = _uniquenessRatio;
speckleWindowSize = _speckleWindowSize;
speckleRange = _speckleRange;
mode = _mode;
}
int minDisparity;
int numDisparities;
int SADWindowSize;
int preFilterCap;
int uniquenessRatio;
int P1;
int P2;
int speckleWindowSize;
int speckleRange;
int disp12MaxDiff;
int mode;
};
static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
int minD, int maxD, CostType* cost,
PixType* buffer, const PixType* tab,
int tabOfs, int )
{
int x, c, width = img1.cols, cn = img1.channels();
int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0);
int minX2 = std::max(minX1 - maxD, 0), maxX2 = std::min(maxX1 - minD, width);
int D = maxD - minD, width1 = maxX1 - minX1, width2 = maxX2 - minX2;
const PixType *row1 = img1.ptr<PixType>(y), *row2 = img2.ptr<PixType>(y);
PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn*2;
tab += tabOfs;
for( c = 0; c < cn*2; c++ )
{
prow1[width*c] = prow1[width*c + width-1] =
prow2[width*c] = prow2[width*c + width-1] = tab[0];
}
int n1 = y > 0 ? -(int)img1.step : 0, s1 = y < img1.rows-1 ? (int)img1.step : 0;
int n2 = y > 0 ? -(int)img2.step : 0, s2 = y < img2.rows-1 ? (int)img2.step : 0;
if( cn == 1 )
{
for( x = 1; x < width-1; x++ )
{
prow1[x] = tab[(row1[x+1] - row1[x-1])*2 + row1[x+n1+1] - row1[x+n1-1] + row1[x+s1+1] - row1[x+s1-1]];
prow2[width-1-x] = tab[(row2[x+1] - row2[x-1])*2 + row2[x+n2+1] - row2[x+n2-1] + row2[x+s2+1] - row2[x+s2-1]];
prow1[x+width] = row1[x];
prow2[width-1-x+width] = row2[x];
}
}
else
{
for( x = 1; x < width-1; x++ )
{
prow1[x] = tab[(row1[x*3+3] - row1[x*3-3])*2 + row1[x*3+n1+3] - row1[x*3+n1-3] + row1[x*3+s1+3] - row1[x*3+s1-3]];
prow1[x+width] = tab[(row1[x*3+4] - row1[x*3-2])*2 + row1[x*3+n1+4] - row1[x*3+n1-2] + row1[x*3+s1+4] - row1[x*3+s1-2]];
prow1[x+width*2] = tab[(row1[x*3+5] - row1[x*3-1])*2 + row1[x*3+n1+5] - row1[x*3+n1-1] + row1[x*3+s1+5] - row1[x*3+s1-1]];
prow2[width-1-x] = tab[(row2[x*3+3] - row2[x*3-3])*2 + row2[x*3+n2+3] - row2[x*3+n2-3] + row2[x*3+s2+3] - row2[x*3+s2-3]];
prow2[width-1-x+width] = tab[(row2[x*3+4] - row2[x*3-2])*2 + row2[x*3+n2+4] - row2[x*3+n2-2] + row2[x*3+s2+4] - row2[x*3+s2-2]];
prow2[width-1-x+width*2] = tab[(row2[x*3+5] - row2[x*3-1])*2 + row2[x*3+n2+5] - row2[x*3+n2-1] + row2[x*3+s2+5] - row2[x*3+s2-1]];
prow1[x+width*3] = row1[x*3];
prow1[x+width*4] = row1[x*3+1];
prow1[x+width*5] = row1[x*3+2];
prow2[width-1-x+width*3] = row2[x*3];
prow2[width-1-x+width*4] = row2[x*3+1];
prow2[width-1-x+width*5] = row2[x*3+2];
}
}
memset( cost, 0, width1*D*sizeof(cost[0]) );
buffer -= minX2;
cost -= minX1*D + minD;
#if CV_SSE2
volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
#endif
#if 1
for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
{
int diff_scale = c < cn ? 0 : 2;
for( x = minX2; x < maxX2; x++ )
{
int v = prow2[x];
int vl = x > 0 ? (v + prow2[x-1])/2 : v;
int vr = x < width-1 ? (v + prow2[x+1])/2 : v;
int v0 = std::min(vl, vr); v0 = std::min(v0, v);
int v1 = std::max(vl, vr); v1 = std::max(v1, v);
buffer[x] = (PixType)v0;
buffer[x + width2] = (PixType)v1;
}
for( x = minX1; x < maxX1; x++ )
{
int u = prow1[x];
int ul = x > 0 ? (u + prow1[x-1])/2 : u;
int ur = x < width-1 ? (u + prow1[x+1])/2 : u;
int u0 = std::min(ul, ur); u0 = std::min(u0, u);
int u1 = std::max(ul, ur); u1 = std::max(u1, u);
#if CV_SSE2
if( useSIMD )
{
__m128i _u = _mm_set1_epi8((char)u), _u0 = _mm_set1_epi8((char)u0);
__m128i _u1 = _mm_set1_epi8((char)u1), z = _mm_setzero_si128();
__m128i ds = _mm_cvtsi32_si128(diff_scale);
for( int d = minD; d < maxD; d += 16 )
{
__m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-x-1 + d));
__m128i _v0 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d));
__m128i _v1 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d + width2));
__m128i c0 = _mm_max_epu8(_mm_subs_epu8(_u, _v1), _mm_subs_epu8(_v0, _u));
__m128i c1 = _mm_max_epu8(_mm_subs_epu8(_v, _u1), _mm_subs_epu8(_u0, _v));
__m128i diff = _mm_min_epu8(c0, c1);
c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
_mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_srl_epi16(_mm_unpacklo_epi8(diff,z), ds)));
_mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_srl_epi16(_mm_unpackhi_epi8(diff,z), ds)));
}
}
else
#endif
{
for( int d = minD; d < maxD; d++ )
{
int v = prow2[width-x-1 + d];
int v0 = buffer[width-x-1 + d];
int v1 = buffer[width-x-1 + d + width2];
int c0 = std::max(0, u - v1); c0 = std::max(c0, v0 - u);
int c1 = std::max(0, v - u1); c1 = std::max(c1, u0 - v);
cost[x*D + d] = (CostType)(cost[x*D+d] + (std::min(c0, c1) >> diff_scale));
}
}
}
}
#else
for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
{
for( x = minX1; x < maxX1; x++ )
{
int u = prow1[x];
#if CV_SSE2
if( useSIMD )
{
__m128i _u = _mm_set1_epi8(u), z = _mm_setzero_si128();
for( int d = minD; d < maxD; d += 16 )
{
__m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-1-x + d));
__m128i diff = _mm_adds_epu8(_mm_subs_epu8(_u,_v), _mm_subs_epu8(_v,_u));
__m128i c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
__m128i c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
_mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_unpacklo_epi8(diff,z)));
_mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_unpackhi_epi8(diff,z)));
}
}
else
#endif
{
for( int d = minD; d < maxD; d++ )
{
int v = prow2[width-1-x + d];
cost[x*D + d] = (CostType)(cost[x*D + d] + (CostType)std::abs(u - v));
}
}
}
}
#endif
}
static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
Mat& disp1, const StereoSGBMParams& params,
Mat& buffer )
{
#if CV_SSE2
static const uchar LSBTab[] =
{
0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
};
volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
#endif
const int ALIGN = 16;
const int DISP_SHIFT = StereoMatcher::DISP_SHIFT;
const int DISP_SCALE = (1 << DISP_SHIFT);
const CostType MAX_COST = SHRT_MAX;
int minD = params.minDisparity, maxD = minD + params.numDisparities;
Size SADWindowSize;
SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5;
int ftzero = std::max(params.preFilterCap, 15) | 1;
int uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10;
int disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1;
int P1 = params.P1 > 0 ? params.P1 : 2, P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1);
int k, width = disp1.cols, height = disp1.rows;
int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0);
int D = maxD - minD, width1 = maxX1 - minX1;
int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
int SW2 = SADWindowSize.width/2, SH2 = SADWindowSize.height/2;
bool fullDP = params.mode == StereoSGBM::MODE_HH;
int npasses = fullDP ? 2 : 1;
const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2;
PixType clipTab[TAB_SIZE];
for( k = 0; k < TAB_SIZE; k++ )
clipTab[k] = (PixType)(std::min(std::max(k - TAB_OFS, -ftzero), ftzero) + ftzero);
if( minX1 >= maxX1 )
{
disp1 = Scalar::all(INVALID_DISP_SCALED);
return;
}
CV_Assert( D % 16 == 0 );
int D2 = D+16, NRD2 = NR2*D2;
const int NLR = 2;
const int LrBorder = NLR - 1;
size_t costBufSize = width1*D;
size_t CSBufSize = costBufSize*(fullDP ? height : 1);
size_t minLrSize = (width1 + LrBorder*2)*NR2, LrSize = minLrSize*D2;
int hsumBufNRows = SH2*2 + 2;
size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) +
costBufSize*(hsumBufNRows + 1)*sizeof(CostType) +
CSBufSize*2*sizeof(CostType) +
width*16*img1.channels()*sizeof(PixType) +
width*(sizeof(CostType) + sizeof(DispType)) + 1024;
if( buffer.empty() || !buffer.isContinuous() ||
buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize )
buffer.create(1, (int)totalBufSize, CV_8U);
CostType* Cbuf = (CostType*)alignPtr(buffer.ptr(), ALIGN);
CostType* Sbuf = Cbuf + CSBufSize;
CostType* hsumBuf = Sbuf + CSBufSize;
CostType* pixDiff = hsumBuf + costBufSize*hsumBufNRows;
CostType* disp2cost = pixDiff + costBufSize + (LrSize + minLrSize)*NLR;
DispType* disp2ptr = (DispType*)(disp2cost + width);
PixType* tempBuf = (PixType*)(disp2ptr + width);
for( k = 0; k < width1*D; k++ )
Cbuf[k] = (CostType)P2;
for( int pass = 1; pass <= npasses; pass++ )
{
int x1, y1, x2, y2, dx, dy;
if( pass == 1 )
{
y1 = 0; y2 = height; dy = 1;
x1 = 0; x2 = width1; dx = 1;
}
else
{
y1 = height-1; y2 = -1; dy = -1;
x1 = width1-1; x2 = -1; dx = -1;
}
CostType *Lr[NLR]={0}, *minLr[NLR]={0};
for( k = 0; k < NLR; k++ )
{
Lr[k] = pixDiff + costBufSize + LrSize*k + NRD2*LrBorder + 8;
memset( Lr[k] - LrBorder*NRD2 - 8, 0, LrSize*sizeof(CostType) );
minLr[k] = pixDiff + costBufSize + LrSize*NLR + minLrSize*k + NR2*LrBorder;
memset( minLr[k] - LrBorder*NR2, 0, minLrSize*sizeof(CostType) );
}
for( int y = y1; y != y2; y += dy )
{
int x, d;
DispType* disp1ptr = disp1.ptr<DispType>(y);
CostType* C = Cbuf + (!fullDP ? 0 : y*costBufSize);
CostType* S = Sbuf + (!fullDP ? 0 : y*costBufSize);
if( pass == 1 )
{
int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1;
for( k = dy1; k <= dy2; k++ )
{
CostType* hsumAdd = hsumBuf + (std::min(k, height-1) % hsumBufNRows)*costBufSize;
if( k < height )
{
calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero );
memset(hsumAdd, 0, D*sizeof(CostType));
for( x = 0; x <= SW2*D; x += D )
{
int scale = x == 0 ? SW2 + 1 : 1;
for( d = 0; d < D; d++ )
hsumAdd[d] = (CostType)(hsumAdd[d] + pixDiff[x + d]*scale);
}
if( y > 0 )
{
const CostType* hsumSub = hsumBuf + (std::max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize;
const CostType* Cprev = !fullDP || y == 0 ? C : C - costBufSize;
for( x = D; x < width1*D; x += D )
{
const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
#if CV_SSE2
if( useSIMD )
{
for( d = 0; d < D; d += 8 )
{
__m128i hv = _mm_load_si128((const __m128i*)(hsumAdd + x - D + d));
__m128i Cx = _mm_load_si128((__m128i*)(Cprev + x + d));
hv = _mm_adds_epi16(_mm_subs_epi16(hv,
_mm_load_si128((const __m128i*)(pixSub + d))),
_mm_load_si128((const __m128i*)(pixAdd + d)));
Cx = _mm_adds_epi16(_mm_subs_epi16(Cx,
_mm_load_si128((const __m128i*)(hsumSub + x + d))),
hv);
_mm_store_si128((__m128i*)(hsumAdd + x + d), hv);
_mm_store_si128((__m128i*)(C + x + d), Cx);
}
}
else
#endif
{
for( d = 0; d < D; d++ )
{
int hv = hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
C[x + d] = (CostType)(Cprev[x + d] + hv - hsumSub[x + d]);
}
}
}
}
else
{
for( x = D; x < width1*D; x += D )
{
const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
for( d = 0; d < D; d++ )
hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
}
}
}
if( y == 0 )
{
int scale = k == 0 ? SH2 + 1 : 1;
for( x = 0; x < width1*D; x++ )
C[x] = (CostType)(C[x] + hsumAdd[x]*scale);
}
}
for( k = 0; k < width1*D; k++ )
S[k] = 0;
}
memset( Lr[0] - NRD2*LrBorder - 8, 0, NRD2*LrBorder*sizeof(CostType) );
memset( Lr[0] + width1*NRD2 - 8, 0, NRD2*LrBorder*sizeof(CostType) );
memset( minLr[0] - NR2*LrBorder, 0, NR2*LrBorder*sizeof(CostType) );
memset( minLr[0] + width1*NR2, 0, NR2*LrBorder*sizeof(CostType) );
for( x = x1; x != x2; x += dx )
{
int xm = x*NR2, xd = xm*D2;
int delta0 = minLr[0][xm - dx*NR2] + P2, delta1 = minLr[1][xm - NR2 + 1] + P2;
int delta2 = minLr[1][xm + 2] + P2, delta3 = minLr[1][xm + NR2 + 3] + P2;
CostType* Lr_p0 = Lr[0] + xd - dx*NRD2;
CostType* Lr_p1 = Lr[1] + xd - NRD2 + D2;
CostType* Lr_p2 = Lr[1] + xd + D2*2;
CostType* Lr_p3 = Lr[1] + xd + NRD2 + D2*3;
Lr_p0[-1] = Lr_p0[D] = Lr_p1[-1] = Lr_p1[D] =
Lr_p2[-1] = Lr_p2[D] = Lr_p3[-1] = Lr_p3[D] = MAX_COST;
CostType* Lr_p = Lr[0] + xd;
const CostType* Cp = C + x*D;
CostType* Sp = S + x*D;
#if CV_SSE2
if( useSIMD )
{
__m128i _P1 = _mm_set1_epi16((short)P1);
__m128i _delta0 = _mm_set1_epi16((short)delta0);
__m128i _delta1 = _mm_set1_epi16((short)delta1);
__m128i _delta2 = _mm_set1_epi16((short)delta2);
__m128i _delta3 = _mm_set1_epi16((short)delta3);
__m128i _minL0 = _mm_set1_epi16((short)MAX_COST);
for( d = 0; d < D; d += 8 )
{
__m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d));
__m128i L0, L1, L2, L3;
L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
L1 = _mm_load_si128((const __m128i*)(Lr_p1 + d));
L2 = _mm_load_si128((const __m128i*)(Lr_p2 + d));
L3 = _mm_load_si128((const __m128i*)(Lr_p3 + d));
L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d - 1)), _P1));
L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d + 1)), _P1));
L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d - 1)), _P1));
L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d + 1)), _P1));
L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d - 1)), _P1));
L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d + 1)), _P1));
L0 = _mm_min_epi16(L0, _delta0);
L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
L1 = _mm_min_epi16(L1, _delta1);
L1 = _mm_adds_epi16(_mm_subs_epi16(L1, _delta1), Cpd);
L2 = _mm_min_epi16(L2, _delta2);
L2 = _mm_adds_epi16(_mm_subs_epi16(L2, _delta2), Cpd);
L3 = _mm_min_epi16(L3, _delta3);
L3 = _mm_adds_epi16(_mm_subs_epi16(L3, _delta3), Cpd);
_mm_store_si128( (__m128i*)(Lr_p + d), L0);
_mm_store_si128( (__m128i*)(Lr_p + d + D2), L1);
_mm_store_si128( (__m128i*)(Lr_p + d + D2*2), L2);
_mm_store_si128( (__m128i*)(Lr_p + d + D2*3), L3);
__m128i t0 = _mm_min_epi16(_mm_unpacklo_epi16(L0, L2), _mm_unpackhi_epi16(L0, L2));
__m128i t1 = _mm_min_epi16(_mm_unpacklo_epi16(L1, L3), _mm_unpackhi_epi16(L1, L3));
t0 = _mm_min_epi16(_mm_unpacklo_epi16(t0, t1), _mm_unpackhi_epi16(t0, t1));
_minL0 = _mm_min_epi16(_minL0, t0);
__m128i Sval = _mm_load_si128((const __m128i*)(Sp + d));
L0 = _mm_adds_epi16(L0, L1);
L2 = _mm_adds_epi16(L2, L3);
Sval = _mm_adds_epi16(Sval, L0);
Sval = _mm_adds_epi16(Sval, L2);
_mm_store_si128((__m128i*)(Sp + d), Sval);
}
_minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
_mm_storel_epi64((__m128i*)&minLr[0][xm], _minL0);
}
else
#endif
{
int minL0 = MAX_COST, minL1 = MAX_COST, minL2 = MAX_COST, minL3 = MAX_COST;
for( d = 0; d < D; d++ )
{
int Cpd = Cp[d], L0, L1, L2, L3;
L0 = Cpd + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0;
L1 = Cpd + std::min((int)Lr_p1[d], std::min(Lr_p1[d-1] + P1, std::min(Lr_p1[d+1] + P1, delta1))) - delta1;
L2 = Cpd + std::min((int)Lr_p2[d], std::min(Lr_p2[d-1] + P1, std::min(Lr_p2[d+1] + P1, delta2))) - delta2;
L3 = Cpd + std::min((int)Lr_p3[d], std::min(Lr_p3[d-1] + P1, std::min(Lr_p3[d+1] + P1, delta3))) - delta3;
Lr_p[d] = (CostType)L0;
minL0 = std::min(minL0, L0);
Lr_p[d + D2] = (CostType)L1;
minL1 = std::min(minL1, L1);
Lr_p[d + D2*2] = (CostType)L2;
minL2 = std::min(minL2, L2);
Lr_p[d + D2*3] = (CostType)L3;
minL3 = std::min(minL3, L3);
Sp[d] = saturate_cast<CostType>(Sp[d] + L0 + L1 + L2 + L3);
}
minLr[0][xm] = (CostType)minL0;
minLr[0][xm+1] = (CostType)minL1;
minLr[0][xm+2] = (CostType)minL2;
minLr[0][xm+3] = (CostType)minL3;
}
}
if( pass == npasses )
{
for( x = 0; x < width; x++ )
{
disp1ptr[x] = disp2ptr[x] = (DispType)INVALID_DISP_SCALED;
disp2cost[x] = MAX_COST;
}
for( x = width1 - 1; x >= 0; x-- )
{
CostType* Sp = S + x*D;
int minS = MAX_COST, bestDisp = -1;
if( npasses == 1 )
{
int xm = x*NR2, xd = xm*D2;
int minL0 = MAX_COST;
int delta0 = minLr[0][xm + NR2] + P2;
CostType* Lr_p0 = Lr[0] + xd + NRD2;
Lr_p0[-1] = Lr_p0[D] = MAX_COST;
CostType* Lr_p = Lr[0] + xd;
const CostType* Cp = C + x*D;
#if CV_SSE2
if( useSIMD )
{
__m128i _P1 = _mm_set1_epi16((short)P1);
__m128i _delta0 = _mm_set1_epi16((short)delta0);
__m128i _minL0 = _mm_set1_epi16((short)minL0);
__m128i _minS = _mm_set1_epi16(MAX_COST), _bestDisp = _mm_set1_epi16(-1);
__m128i _d8 = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7), _8 = _mm_set1_epi16(8);
for( d = 0; d < D; d += 8 )
{
__m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)), L0;
L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
L0 = _mm_min_epi16(L0, _delta0);
L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
_mm_store_si128((__m128i*)(Lr_p + d), L0);
_minL0 = _mm_min_epi16(_minL0, L0);
L0 = _mm_adds_epi16(L0, *(__m128i*)(Sp + d));
_mm_store_si128((__m128i*)(Sp + d), L0);
__m128i mask = _mm_cmpgt_epi16(_minS, L0);
_minS = _mm_min_epi16(_minS, L0);
_bestDisp = _mm_xor_si128(_bestDisp, _mm_and_si128(_mm_xor_si128(_bestDisp,_d8), mask));
_d8 = _mm_adds_epi16(_d8, _8);
}
short CV_DECL_ALIGNED(16) bestDispBuf[8];
_mm_store_si128((__m128i*)bestDispBuf, _bestDisp);
_minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
_minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 4));
_minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 2));
__m128i qS = _mm_min_epi16(_minS, _mm_srli_si128(_minS, 8));
qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 4));
qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 2));
minLr[0][xm] = (CostType)_mm_cvtsi128_si32(_minL0);
minS = (CostType)_mm_cvtsi128_si32(qS);
qS = _mm_shuffle_epi32(_mm_unpacklo_epi16(qS, qS), 0);
qS = _mm_cmpeq_epi16(_minS, qS);
int idx = _mm_movemask_epi8(_mm_packs_epi16(qS, qS)) & 255;
bestDisp = bestDispBuf[LSBTab[idx]];
}
else
#endif
{
for( d = 0; d < D; d++ )
{
int L0 = Cp[d] + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0;
Lr_p[d] = (CostType)L0;
minL0 = std::min(minL0, L0);
int Sval = Sp[d] = saturate_cast<CostType>(Sp[d] + L0);
if( Sval < minS )
{
minS = Sval;
bestDisp = d;
}
}
minLr[0][xm] = (CostType)minL0;
}
}
else
{
for( d = 0; d < D; d++ )
{
int Sval = Sp[d];
if( Sval < minS )
{
minS = Sval;
bestDisp = d;
}
}
}
for( d = 0; d < D; d++ )
{
if( Sp[d]*(100 - uniquenessRatio) < minS*100 && std::abs(bestDisp - d) > 1 )
break;
}
if( d < D )
continue;
d = bestDisp;
int _x2 = x + minX1 - d - minD;
if( disp2cost[_x2] > minS )
{
disp2cost[_x2] = (CostType)minS;
disp2ptr[_x2] = (DispType)(d + minD);
}
if( 0 < d && d < D-1 )
{
int denom2 = std::max(Sp[d-1] + Sp[d+1] - 2*Sp[d], 1);
d = d*DISP_SCALE + ((Sp[d-1] - Sp[d+1])*DISP_SCALE + denom2)/(denom2*2);
}
else
d *= DISP_SCALE;
disp1ptr[x + minX1] = (DispType)(d + minD*DISP_SCALE);
}
for( x = minX1; x < maxX1; x++ )
{
int d1 = disp1ptr[x];
if( d1 == INVALID_DISP_SCALED )
continue;
int _d = d1 >> DISP_SHIFT;
int d_ = (d1 + DISP_SCALE-1) >> DISP_SHIFT;
int _x = x - _d, x_ = x - d_;
if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff &&
0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff )
disp1ptr[x] = (DispType)INVALID_DISP_SCALED;
}
}
std::swap( Lr[0], Lr[1] );
std::swap( minLr[0], minLr[1] );
}
}
}
class StereoSGBMImpl : public StereoSGBM
{
public:
StereoSGBMImpl()
{
params = StereoSGBMParams();
}
StereoSGBMImpl( int _minDisparity, int _numDisparities, int _SADWindowSize,
int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
int _mode )
{
params = StereoSGBMParams( _minDisparity, _numDisparities, _SADWindowSize,
_P1, _P2, _disp12MaxDiff, _preFilterCap,
_uniquenessRatio, _speckleWindowSize, _speckleRange,
_mode );
}
void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr )
{
Mat left = leftarr.getMat(), right = rightarr.getMat();
CV_Assert( left.size() == right.size() && left.type() == right.type() &&
left.depth() == CV_8U );
disparr.create( left.size(), CV_16S );
Mat disp = disparr.getMat();
computeDisparitySGBM( left, right, disp, params, buffer );
medianBlur(disp, disp, 3);
if( params.speckleWindowSize > 0 )
filterSpeckles(disp, (params.minDisparity - 1)*StereoMatcher::DISP_SCALE, params.speckleWindowSize,
StereoMatcher::DISP_SCALE*params.speckleRange, buffer);
}
int getMinDisparity() const { return params.minDisparity; }
void setMinDisparity(int minDisparity) { params.minDisparity = minDisparity; }
int getNumDisparities() const { return params.numDisparities; }
void setNumDisparities(int numDisparities) { params.numDisparities = numDisparities; }
int getBlockSize() const { return params.SADWindowSize; }
void setBlockSize(int blockSize) { params.SADWindowSize = blockSize; }
int getSpeckleWindowSize() const { return params.speckleWindowSize; }
void setSpeckleWindowSize(int speckleWindowSize) { params.speckleWindowSize = speckleWindowSize; }
int getSpeckleRange() const { return params.speckleRange; }
void setSpeckleRange(int speckleRange) { params.speckleRange = speckleRange; }
int getDisp12MaxDiff() const { return params.disp12MaxDiff; }
void setDisp12MaxDiff(int disp12MaxDiff) { params.disp12MaxDiff = disp12MaxDiff; }
int getPreFilterCap() const { return params.preFilterCap; }
void setPreFilterCap(int preFilterCap) { params.preFilterCap = preFilterCap; }
int getUniquenessRatio() const { return params.uniquenessRatio; }
void setUniquenessRatio(int uniquenessRatio) { params.uniquenessRatio = uniquenessRatio; }
int getP1() const { return params.P1; }
void setP1(int P1) { params.P1 = P1; }
int getP2() const { return params.P2; }
void setP2(int P2) { params.P2 = P2; }
int getMode() const { return params.mode; }
void setMode(int mode) { params.mode = mode; }
void write(FileStorage& fs) const
{
fs << "name" << name_
<< "minDisparity" << params.minDisparity
<< "numDisparities" << params.numDisparities
<< "blockSize" << params.SADWindowSize
<< "speckleWindowSize" << params.speckleWindowSize
<< "speckleRange" << params.speckleRange
<< "disp12MaxDiff" << params.disp12MaxDiff
<< "preFilterCap" << params.preFilterCap
<< "uniquenessRatio" << params.uniquenessRatio
<< "P1" << params.P1
<< "P2" << params.P2
<< "mode" << params.mode;
}
void read(const FileNode& fn)
{
FileNode n = fn["name"];
CV_Assert( n.isString() && String(n) == name_ );
params.minDisparity = (int)fn["minDisparity"];
params.numDisparities = (int)fn["numDisparities"];
params.SADWindowSize = (int)fn["blockSize"];
params.speckleWindowSize = (int)fn["speckleWindowSize"];
params.speckleRange = (int)fn["speckleRange"];
params.disp12MaxDiff = (int)fn["disp12MaxDiff"];
params.preFilterCap = (int)fn["preFilterCap"];
params.uniquenessRatio = (int)fn["uniquenessRatio"];
params.P1 = (int)fn["P1"];
params.P2 = (int)fn["P2"];
params.mode = (int)fn["mode"];
}
StereoSGBMParams params;
Mat buffer;
static const char* name_;
};
const char* StereoSGBMImpl::name_ = "StereoMatcher.SGBM";
Ptr<StereoSGBM> StereoSGBM::create(int minDisparity, int numDisparities, int SADWindowSize,
int P1, int P2, int disp12MaxDiff,
int preFilterCap, int uniquenessRatio,
int speckleWindowSize, int speckleRange,
int mode)
{
return Ptr<StereoSGBM>(
new StereoSGBMImpl(minDisparity, numDisparities, SADWindowSize,
P1, P2, disp12MaxDiff,
preFilterCap, uniquenessRatio,
speckleWindowSize, speckleRange,
mode));
}
Rect getValidDisparityROI( Rect roi1, Rect roi2,
int minDisparity,
int numberOfDisparities,
int SADWindowSize )
{
int SW2 = SADWindowSize/2;
int minD = minDisparity, maxD = minDisparity + numberOfDisparities - 1;
int xmin = std::max(roi1.x, roi2.x + maxD) + SW2;
int xmax = std::min(roi1.x + roi1.width, roi2.x + roi2.width - minD) - SW2;
int ymin = std::max(roi1.y, roi2.y) + SW2;
int ymax = std::min(roi1.y + roi1.height, roi2.y + roi2.height) - SW2;
Rect r(xmin, ymin, xmax - xmin, ymax - ymin);
return r.width > 0 && r.height > 0 ? r : Rect();
}
typedef cv::Point_<short> Point2s;
template <typename T>
void filterSpecklesImpl(cv::Mat& img, int newVal, int maxSpeckleSize, int maxDiff, cv::Mat& _buf)
{
using namespace cv;
int width = img.cols, height = img.rows, npixels = width*height;
size_t bufSize = npixels*(int)(sizeof(Point2s) + sizeof(int) + sizeof(uchar));
if( !_buf.isContinuous() || _buf.empty() || _buf.cols*_buf.rows*_buf.elemSize() < bufSize )
_buf.create(1, (int)bufSize, CV_8U);
uchar* buf = _buf.ptr();
int i, j, dstep = (int)(img.step/sizeof(T));
int* labels = (int*)buf;
buf += npixels*sizeof(labels[0]);
Point2s* wbuf = (Point2s*)buf;
buf += npixels*sizeof(wbuf[0]);
uchar* rtype = (uchar*)buf;
int curlabel = 0;
memset(labels, 0, npixels*sizeof(labels[0]));
for( i = 0; i < height; i++ )
{
T* ds = img.ptr<T>(i);
int* ls = labels + width*i;
for( j = 0; j < width; j++ )
{
if( ds[j] != newVal )
{
if( ls[j] )
{
if( rtype[ls[j]] )
ds[j] = (T)newVal;
}
else
{
Point2s* ws = wbuf;
Point2s p((short)j, (short)i);
curlabel++;
int count = 0;
ls[j] = curlabel;
while( ws >= wbuf )
{
count++;
T* dpp = &img.at<T>(p.y, p.x);
T dp = *dpp;
int* lpp = labels + width*p.y + p.x;
if( p.y < height-1 && !lpp[+width] && dpp[+dstep] != newVal && std::abs(dp - dpp[+dstep]) <= maxDiff )
{
lpp[+width] = curlabel;
*ws++ = Point2s(p.x, p.y+1);
}
if( p.y > 0 && !lpp[-width] && dpp[-dstep] != newVal && std::abs(dp - dpp[-dstep]) <= maxDiff )
{
lpp[-width] = curlabel;
*ws++ = Point2s(p.x, p.y-1);
}
if( p.x < width-1 && !lpp[+1] && dpp[+1] != newVal && std::abs(dp - dpp[+1]) <= maxDiff )
{
lpp[+1] = curlabel;
*ws++ = Point2s(p.x+1, p.y);
}
if( p.x > 0 && !lpp[-1] && dpp[-1] != newVal && std::abs(dp - dpp[-1]) <= maxDiff )
{
lpp[-1] = curlabel;
*ws++ = Point2s(p.x-1, p.y);
}
p = *--ws;
}
if( count <= maxSpeckleSize )
{
rtype[ls[j]] = 1;
ds[j] = (T)newVal;
}
else
rtype[ls[j]] = 0;
}
}
}
}
}
}
void cv::filterSpeckles( InputOutputArray _img, double _newval, int maxSpeckleSize,
double _maxDiff, InputOutputArray __buf )
{
Mat img = _img.getMat();
int type = img.type();
Mat temp, &_buf = __buf.needed() ? __buf.getMatRef() : temp;
CV_Assert( type == CV_8UC1 || type == CV_16SC1 );
int newVal = cvRound(_newval), maxDiff = cvRound(_maxDiff);
#if IPP_VERSION_X100 >= 801
CV_IPP_CHECK()
{
Ipp32s bufsize = 0;
IppiSize roisize = { img.cols, img.rows };
IppDataType datatype = type == CV_8UC1 ? ipp8u : ipp16s;
if (!__buf.needed() && (type == CV_8UC1 || type == CV_16SC1))
{
IppStatus status = ippiMarkSpecklesGetBufferSize(roisize, datatype, CV_MAT_CN(type), &bufsize);
Ipp8u * buffer = ippsMalloc_8u(bufsize);
if ((int)status >= 0)
{
if (type == CV_8UC1)
status = ippiMarkSpeckles_8u_C1IR(img.ptr<Ipp8u>(), (int)img.step, roisize,
(Ipp8u)newVal, maxSpeckleSize, (Ipp8u)maxDiff, ippiNormL1, buffer);
else
status = ippiMarkSpeckles_16s_C1IR(img.ptr<Ipp16s>(), (int)img.step, roisize,
(Ipp16s)newVal, maxSpeckleSize, (Ipp16s)maxDiff, ippiNormL1, buffer);
}
if (status >= 0)
{
CV_IMPL_ADD(CV_IMPL_IPP);
return;
}
setIppErrorStatus();
}
}
#endif
if (type == CV_8UC1)
filterSpecklesImpl<uchar>(img, newVal, maxSpeckleSize, maxDiff, _buf);
else
filterSpecklesImpl<short>(img, newVal, maxSpeckleSize, maxDiff, _buf);
}
void cv::validateDisparity( InputOutputArray _disp, InputArray _cost, int minDisparity,
int numberOfDisparities, int disp12MaxDiff )
{
Mat disp = _disp.getMat(), cost = _cost.getMat();
int cols = disp.cols, rows = disp.rows;
int minD = minDisparity, maxD = minDisparity + numberOfDisparities;
int x, minX1 = std::max(maxD, 0), maxX1 = cols + std::min(minD, 0);
AutoBuffer<int> _disp2buf(cols*2);
int* disp2buf = _disp2buf;
int* disp2cost = disp2buf + cols;
const int DISP_SHIFT = 4, DISP_SCALE = 1 << DISP_SHIFT;
int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
int costType = cost.type();
disp12MaxDiff *= DISP_SCALE;
CV_Assert( numberOfDisparities > 0 && disp.type() == CV_16S &&
(costType == CV_16S || costType == CV_32S) &&
disp.size() == cost.size() );
for( int y = 0; y < rows; y++ )
{
short* dptr = disp.ptr<short>(y);
for( x = 0; x < cols; x++ )
{
disp2buf[x] = INVALID_DISP_SCALED;
disp2cost[x] = INT_MAX;
}
if( costType == CV_16S )
{
const short* cptr = cost.ptr<short>(y);
for( x = minX1; x < maxX1; x++ )
{
int d = dptr[x], c = cptr[x];
int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT);
if( disp2cost[x2] > c )
{
disp2cost[x2] = c;
disp2buf[x2] = d;
}
}
}
else
{
const int* cptr = cost.ptr<int>(y);
for( x = minX1; x < maxX1; x++ )
{
int d = dptr[x], c = cptr[x];
int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT);
if( disp2cost[x2] < c )
{
disp2cost[x2] = c;
disp2buf[x2] = d;
}
}
}
for( x = minX1; x < maxX1; x++ )
{
int d = dptr[x];
if( d == INVALID_DISP_SCALED )
continue;
int d0 = d >> DISP_SHIFT;
int d1 = (d + DISP_SCALE-1) >> DISP_SHIFT;
int x0 = x - d0, x1 = x - d1;
if( (0 <= x0 && x0 < cols && disp2buf[x0] > INVALID_DISP_SCALED && std::abs(disp2buf[x0] - d) > disp12MaxDiff) &&
(0 <= x1 && x1 < cols && disp2buf[x1] > INVALID_DISP_SCALED && std::abs(disp2buf[x1] - d) > disp12MaxDiff) )
dptr[x] = (short)INVALID_DISP_SCALED;
}
}
}