This source file includes following definitions.
- deblockCTU
- bsCuEdge
- deblockCU
- calcBsIdx
- setEdgefilterMultiple
- setEdgefilterTU
- setEdgefilterPU
- getBoundaryStrength
- calcDP
- calcDQ
- useStrongFiltering
- pelFilterLuma
- edgeFilterLuma
- edgeFilterChroma
#include "common.h"
#include "deblock.h"
#include "framedata.h"
#include "picyuv.h"
#include "slice.h"
#include "mv.h"
using namespace X265_NS;
#define DEBLOCK_SMALLEST_BLOCK 8
#define DEFAULT_INTRA_TC_OFFSET 2
void Deblock::deblockCTU(const CUData* ctu, const CUGeom& cuGeom, int32_t dir)
{
uint8_t blockStrength[MAX_NUM_PARTITIONS];
memset(blockStrength, 0, sizeof(uint8_t) * cuGeom.numPartitions);
deblockCU(ctu, cuGeom, dir, blockStrength);
}
static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir)
{
if (dir == Deblock::EDGE_VER)
{
if (cu->m_cuPelX + g_zscanToPelX[absPartIdx] > 0)
{
uint32_t tempPartIdx;
const CUData* tempCU = cu->getPULeft(tempPartIdx, absPartIdx);
return tempCU ? 2 : 0;
}
}
else
{
if (cu->m_cuPelY + g_zscanToPelY[absPartIdx] > 0)
{
uint32_t tempPartIdx;
const CUData* tempCU = cu->getPUAbove(tempPartIdx, absPartIdx);
return tempCU ? 2 : 0;
}
}
return 0;
}
void Deblock::deblockCU(const CUData* cu, const CUGeom& cuGeom, const int32_t dir, uint8_t blockStrength[])
{
uint32_t absPartIdx = cuGeom.absPartIdx;
uint32_t depth = cuGeom.depth;
if (cu->m_predMode[absPartIdx] == MODE_NONE)
return;
if (cu->m_cuDepth[absPartIdx] > depth)
{
for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
{
const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
if (childGeom.flags & CUGeom::PRESENT)
deblockCU(cu, childGeom, dir, blockStrength);
}
return;
}
uint32_t numUnits = 1 << (cuGeom.log2CUSize - LOG2_UNIT_SIZE);
setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits);
setEdgefilterTU(cu, absPartIdx, 0, dir, blockStrength);
setEdgefilterMultiple(absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits);
uint32_t numParts = cuGeom.numPartitions;
for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + numParts; partIdx++)
{
uint32_t bsCheck = !(partIdx & (1 << dir));
if (bsCheck && blockStrength[partIdx])
blockStrength[partIdx] = getBoundaryStrength(cu, dir, partIdx, blockStrength);
}
const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
uint32_t shiftFactor = (dir == EDGE_VER) ? cu->m_hChromaShift : cu->m_vChromaShift;
uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absPartIdx] : g_zscanToPelY[absPartIdx]) >> LOG2_UNIT_SIZE;
for (uint32_t e = 0; e < numUnits; e += partIdxIncr)
{
edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
if (!((e0 + e) & chromaMask) && cu->m_chromaFormat != X265_CSP_I400)
edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength);
}
}
static inline uint32_t calcBsIdx(uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx)
{
if (dir)
return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (edgeIdx << LOG2_RASTER_SIZE) + baseUnitIdx];
else
return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (baseUnitIdx << LOG2_RASTER_SIZE) + edgeIdx];
}
void Deblock::setEdgefilterMultiple(uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits)
{
X265_CHECK(numUnits > 0, "numUnits edge filter check\n");
for (uint32_t i = 0; i < numUnits; i++)
{
const uint32_t bsidx = calcBsIdx(scanIdx, dir, edgeIdx, i);
blockStrength[bsidx] = value;
}
}
void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t tuDepth, int32_t dir, uint8_t blockStrength[])
{
uint32_t log2TrSize = cu->m_log2CUSize[absPartIdx] - tuDepth;
if (cu->m_tuDepth[absPartIdx] > tuDepth)
{
uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE - 1) * 2;
for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
setEdgefilterTU(cu, absPartIdx, tuDepth + 1, dir, blockStrength);
return;
}
uint32_t numUnits = 1 << (log2TrSize - LOG2_UNIT_SIZE);
setEdgefilterMultiple(absPartIdx, dir, 0, 2, blockStrength, numUnits);
}
void Deblock::setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits)
{
const uint32_t hNumUnits = numUnits >> 1;
const uint32_t qNumUnits = numUnits >> 2;
switch (cu->m_partSize[absPartIdx])
{
case SIZE_2NxN:
if (EDGE_HOR == dir)
setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
break;
case SIZE_Nx2N:
if (EDGE_VER == dir)
setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
break;
case SIZE_NxN:
setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
break;
case SIZE_2NxnU:
if (EDGE_HOR == dir)
setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
break;
case SIZE_nLx2N:
if (EDGE_VER == dir)
setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
break;
case SIZE_2NxnD:
if (EDGE_HOR == dir)
setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
break;
case SIZE_nRx2N:
if (EDGE_VER == dir)
setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
break;
case SIZE_2Nx2N:
default:
break;
}
}
uint8_t Deblock::getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[])
{
uint32_t partP;
const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
if (cuP->isIntra(partP) || cuQ->isIntra(partQ))
return 2;
if (blockStrength[partQ] > 1 &&
(cuQ->getCbf(partQ, TEXT_LUMA, cuQ->m_tuDepth[partQ]) ||
cuP->getCbf(partP, TEXT_LUMA, cuP->m_tuDepth[partP])))
return 1;
static const MV zeroMv(0, 0);
const Slice* const sliceQ = cuQ->m_slice;
const Slice* const sliceP = cuP->m_slice;
const Frame* refP0 = sliceP->m_refFrameList[0][cuP->m_refIdx[0][partP]];
const Frame* refQ0 = sliceQ->m_refFrameList[0][cuQ->m_refIdx[0][partQ]];
const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv;
const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv;
if (sliceQ->isInterP() && sliceP->isInterP())
{
return ((refP0 != refQ0) ||
(abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4)) ? 1 : 0;
}
const Frame* refP1 = sliceP->m_refFrameList[1][cuP->m_refIdx[1][partP]];
const Frame* refQ1 = sliceQ->m_refFrameList[1][cuQ->m_refIdx[1][partQ]];
const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv;
const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv;
if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0)))
{
if (refP0 != refP1)
{
if (refP0 == refQ0)
return ((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
(abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) ? 1 : 0;
else
return ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
(abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4)) ? 1 : 0;
}
else
{
return (((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
(abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) &&
((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
(abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4))) ? 1 : 0;
}
}
return 1;
}
static inline int32_t calcDP(pixel* src, intptr_t offset)
{
return abs(static_cast<int32_t>(src[-offset * 3]) - 2 * src[-offset * 2] + src[-offset]);
}
static inline int32_t calcDQ(pixel* src, intptr_t offset)
{
return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]);
}
static inline bool useStrongFiltering(intptr_t offset, int32_t beta, int32_t tc, pixel* src)
{
int16_t m4 = (int16_t)src[0];
int16_t m3 = (int16_t)src[-offset];
int16_t m7 = (int16_t)src[offset * 3];
int16_t m0 = (int16_t)src[-offset * 4];
int32_t strong = abs(m0 - m3) + abs(m7 - m4);
return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
}
static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ,
int32_t maskP1, int32_t maskQ1)
{
int32_t thrCut = tc * 10;
int32_t tc2 = tc >> 1;
maskP1 &= maskP;
maskQ1 &= maskQ;
for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
{
int16_t m4 = (int16_t)src[0];
int16_t m3 = (int16_t)src[-offset];
int16_t m5 = (int16_t)src[offset];
int16_t m2 = (int16_t)src[-offset * 2];
int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
if (abs(delta) < thrCut)
{
delta = x265_clip3(-tc, tc, delta);
src[-offset] = x265_clip(m3 + (delta & maskP));
src[0] = x265_clip(m4 - (delta & maskQ));
if (maskP1)
{
int16_t m1 = (int16_t)src[-offset * 3];
int32_t delta1 = x265_clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
src[-offset * 2] = x265_clip(m2 + delta1);
}
if (maskQ1)
{
int16_t m6 = (int16_t)src[offset * 2];
int32_t delta2 = x265_clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
src[offset] = x265_clip(m5 + delta2);
}
}
}
}
void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
{
PicYuv* reconPic = cuQ->m_encData->m_reconPic;
pixel* src = reconPic->getLumaAddr(cuQ->m_cuAddr, absPartIdx);
intptr_t stride = reconPic->m_stride;
const PPS* pps = cuQ->m_slice->m_pps;
intptr_t offset, srcStep;
int32_t maskP = -1;
int32_t maskQ = -1;
int32_t betaOffset = pps->deblockingFilterBetaOffsetDiv2 << 1;
int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
bool bCheckNoFilter = pps->bTransquantBypassEnabled;
if (dir == EDGE_VER)
{
offset = 1;
srcStep = stride;
src += (edge << LOG2_UNIT_SIZE);
}
else
{
offset = stride;
srcStep = 1;
src += (edge << LOG2_UNIT_SIZE) * stride;
}
uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> depth;
for (uint32_t idx = 0; idx < numUnits; idx++)
{
uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx);
uint32_t bs = blockStrength[partQ];
if (!bs)
continue;
uint32_t partP;
const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
if (bCheckNoFilter)
{
maskP = cuP->m_tqBypass[partP] - 1;
maskQ = cuQ->m_tqBypass[partQ] - 1;
if (!(maskP | maskQ))
continue;
}
int32_t qpQ = cuQ->m_qp[partQ];
int32_t qpP = cuP->m_qp[partP];
int32_t qp = (qpP + qpQ + 1) >> 1;
int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset);
const int32_t bitdepthShift = X265_DEPTH - 8;
int32_t beta = s_betaTable[indexB] << bitdepthShift;
intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
int32_t dp0 = calcDP(src + unitOffset , offset);
int32_t dq0 = calcDQ(src + unitOffset , offset);
int32_t dp3 = calcDP(src + unitOffset + srcStep * 3, offset);
int32_t dq3 = calcDQ(src + unitOffset + srcStep * 3, offset);
int32_t d0 = dp0 + dq0;
int32_t d3 = dp3 + dq3;
int32_t d = d0 + d3;
if (d >= beta)
continue;
int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
int32_t tc = s_tcTable[indexTC] << bitdepthShift;
bool sw = (2 * d0 < (beta >> 2) &&
2 * d3 < (beta >> 2) &&
useStrongFiltering(offset, beta, tc, src + unitOffset ) &&
useStrongFiltering(offset, beta, tc, src + unitOffset + srcStep * 3));
if (sw)
{
int32_t tc2 = 2 * tc;
int32_t tcP = (tc2 & maskP);
int32_t tcQ = (tc2 & maskQ);
primitives.pelFilterLumaStrong[dir](src + unitOffset, srcStep, offset, tcP, tcQ);
}
else
{
int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
int32_t dp = dp0 + dp3;
int32_t dq = dq0 + dq3;
int32_t maskP1 = (dp < sideThreshold ? -1 : 0);
int32_t maskQ1 = (dq < sideThreshold ? -1 : 0);
pelFilterLuma(src + unitOffset, srcStep, offset, tc, maskP, maskQ, maskP1, maskQ1);
}
}
}
void Deblock::edgeFilterChroma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
{
int32_t chFmt = cuQ->m_chromaFormat, chromaShift;
intptr_t offset, srcStep;
const PPS* pps = cuQ->m_slice->m_pps;
int32_t maskP = -1;
int32_t maskQ = -1;
int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
X265_CHECK(((dir == EDGE_VER)
? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_hChromaShift)
: ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0,
"invalid edge\n");
PicYuv* reconPic = cuQ->m_encData->m_reconPic;
intptr_t stride = reconPic->m_strideC;
intptr_t srcOffset = reconPic->getChromaAddrOffset(cuQ->m_cuAddr, absPartIdx);
bool bCheckNoFilter = pps->bTransquantBypassEnabled;
if (dir == EDGE_VER)
{
chromaShift = cuQ->m_vChromaShift;
srcOffset += (edge << (LOG2_UNIT_SIZE - cuQ->m_hChromaShift));
offset = 1;
srcStep = stride;
}
else
{
chromaShift = cuQ->m_hChromaShift;
srcOffset += edge * stride << (LOG2_UNIT_SIZE - cuQ->m_vChromaShift);
offset = stride;
srcStep = 1;
}
pixel* srcChroma[2];
srcChroma[0] = reconPic->m_picOrg[1] + srcOffset;
srcChroma[1] = reconPic->m_picOrg[2] + srcOffset;
uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift);
for (uint32_t idx = 0; idx < numUnits; idx++)
{
uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx << chromaShift);
uint32_t bs = blockStrength[partQ];
if (bs <= 1)
continue;
uint32_t partP;
const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
if (bCheckNoFilter)
{
maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
if (!(maskP | maskQ))
continue;
}
int32_t qpQ = cuQ->m_qp[partQ];
int32_t qpP = cuP->m_qp[partP];
int32_t qpA = (qpP + qpQ + 1) >> 1;
intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
{
int32_t qp = qpA + pps->chromaQpOffset[chromaIdx];
if (qp >= 30)
qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : X265_MIN(qp, QP_MAX_SPEC);
int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
const int32_t bitdepthShift = X265_DEPTH - 8;
int32_t tc = s_tcTable[indexTC] << bitdepthShift;
pixel* srcC = srcChroma[chromaIdx];
primitives.pelFilterChroma[dir](srcC + unitOffset, srcStep, offset, tc, maskP, maskQ);
}
}
}
const uint8_t Deblock::s_tcTable[54] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24
};
const uint8_t Deblock::s_betaTable[52] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64
};