#ifndef X265_SEARCH_H
#define X265_SEARCH_H
#include "common.h"
#include "predict.h"
#include "quant.h"
#include "bitcost.h"
#include "framedata.h"
#include "yuv.h"
#include "threadpool.h"
#include "rdcost.h"
#include "entropy.h"
#include "motion.h"
#if DETAILED_CU_STATS
#define ProfileCUScopeNamed(name, cu, acc, count) \
m_stats[cu.m_encData->m_frameEncoderID].count++; \
ScopedElapsedTime name(m_stats[cu.m_encData->m_frameEncoderID].acc)
#define ProfileCUScope(cu, acc, count) ProfileCUScopeNamed(timedScope, cu, acc, count)
#define ProfileCounter(cu, count) m_stats[cu.m_encData->m_frameEncoderID].count++;
#else
#define ProfileCUScopeNamed(name, cu, acc, count)
#define ProfileCUScope(cu, acc, count)
#define ProfileCounter(cu, count)
#endif
#define NUM_SUBPART MAX_TS_SIZE * 4
namespace X265_NS {
class Entropy;
struct ThreadLocalData;
struct RQTData
{
Entropy cur;
Entropy rqtRoot;
Entropy rqtTemp;
Entropy rqtTest;
coeff_t* coeffRQT[3];
Yuv reconQtYuv;
ShortYuv resiQtYuv;
ShortYuv tmpResiYuv;
Yuv tmpPredYuv;
Yuv bidirPredYuv[2];
};
struct MotionData
{
MV mv;
MV mvp;
int mvpIdx;
int ref;
int bits;
uint32_t mvCost;
uint32_t cost;
MotionData()
{
memset(this, 0, sizeof(MotionData));
}
};
struct Mode
{
CUData cu;
const Yuv* fencYuv;
Yuv predYuv;
Yuv reconYuv;
Entropy contexts;
enum { MAX_INTER_PARTS = 2 };
MotionData bestME[MAX_INTER_PARTS][2];
MV amvpCand[2][MAX_NUM_REF][AMVP_NUM_CANDS];
InterNeighbourMV interNeighbours[6];
uint64_t rdCost;
uint64_t sa8dCost;
uint32_t sa8dBits;
uint32_t psyEnergy;
uint32_t ssimEnergy;
sse_t resEnergy;
sse_t lumaDistortion;
sse_t chromaDistortion;
sse_t distortion;
uint32_t totalBits;
uint32_t mvBits;
uint32_t coeffBits;
void initCosts()
{
rdCost = 0;
sa8dCost = 0;
sa8dBits = 0;
psyEnergy = 0;
ssimEnergy = 0;
resEnergy = 0;
lumaDistortion = 0;
chromaDistortion = 0;
distortion = 0;
totalBits = 0;
mvBits = 0;
coeffBits = 0;
}
void addSubCosts(const Mode& subMode)
{
rdCost += subMode.rdCost;
sa8dCost += subMode.sa8dCost;
sa8dBits += subMode.sa8dBits;
psyEnergy += subMode.psyEnergy;
ssimEnergy += subMode.ssimEnergy;
resEnergy += subMode.resEnergy;
lumaDistortion += subMode.lumaDistortion;
chromaDistortion += subMode.chromaDistortion;
distortion += subMode.distortion;
totalBits += subMode.totalBits;
mvBits += subMode.mvBits;
coeffBits += subMode.coeffBits;
}
};
#if DETAILED_CU_STATS
struct CUStats
{
int64_t intraRDOElapsedTime[NUM_CU_DEPTH];
int64_t interRDOElapsedTime[NUM_CU_DEPTH];
int64_t intraAnalysisElapsedTime;
int64_t motionEstimationElapsedTime;
int64_t loopFilterElapsedTime;
int64_t pmeTime;
int64_t pmeBlockTime;
int64_t pmodeTime;
int64_t pmodeBlockTime;
int64_t weightAnalyzeTime;
int64_t totalCTUTime;
uint32_t skippedMotionReferences[NUM_CU_DEPTH];
uint32_t totalMotionReferences[NUM_CU_DEPTH];
uint32_t skippedIntraCU[NUM_CU_DEPTH];
uint32_t totalIntraCU[NUM_CU_DEPTH];
uint64_t countIntraRDO[NUM_CU_DEPTH];
uint64_t countInterRDO[NUM_CU_DEPTH];
uint64_t countIntraAnalysis;
uint64_t countMotionEstimate;
uint64_t countLoopFilter;
uint64_t countPMETasks;
uint64_t countPMEMasters;
uint64_t countPModeTasks;
uint64_t countPModeMasters;
uint64_t countWeightAnalyze;
uint64_t totalCTUs;
CUStats() { clear(); }
void clear()
{
memset(this, 0, sizeof(*this));
}
void accumulate(CUStats& other, x265_param& param)
{
for (uint32_t i = 0; i <= param.maxCUDepth; i++)
{
intraRDOElapsedTime[i] += other.intraRDOElapsedTime[i];
interRDOElapsedTime[i] += other.interRDOElapsedTime[i];
countIntraRDO[i] += other.countIntraRDO[i];
countInterRDO[i] += other.countInterRDO[i];
skippedMotionReferences[i] += other.skippedMotionReferences[i];
totalMotionReferences[i] += other.totalMotionReferences[i];
skippedIntraCU[i] += other.skippedIntraCU[i];
totalIntraCU[i] += other.totalIntraCU[i];
}
intraAnalysisElapsedTime += other.intraAnalysisElapsedTime;
motionEstimationElapsedTime += other.motionEstimationElapsedTime;
loopFilterElapsedTime += other.loopFilterElapsedTime;
pmeTime += other.pmeTime;
pmeBlockTime += other.pmeBlockTime;
pmodeTime += other.pmodeTime;
pmodeBlockTime += other.pmodeBlockTime;
weightAnalyzeTime += other.weightAnalyzeTime;
totalCTUTime += other.totalCTUTime;
countIntraAnalysis += other.countIntraAnalysis;
countMotionEstimate += other.countMotionEstimate;
countLoopFilter += other.countLoopFilter;
countPMETasks += other.countPMETasks;
countPMEMasters += other.countPMEMasters;
countPModeTasks += other.countPModeTasks;
countPModeMasters += other.countPModeMasters;
countWeightAnalyze += other.countWeightAnalyze;
totalCTUs += other.totalCTUs;
other.clear();
}
};
#endif
inline int getTUBits(int idx, int numIdx)
{
return idx + (idx < numIdx - 1);
}
class Search : public Predict
{
public:
static const int16_t zeroShort[MAX_CU_SIZE];
MotionEstimate m_me;
Quant m_quant;
RDCost m_rdCost;
const x265_param* m_param;
Frame* m_frame;
const Slice* m_slice;
Entropy m_entropyCoder;
RQTData m_rqt[NUM_FULL_DEPTH];
uint8_t* m_qtTempCbf[3];
uint8_t* m_qtTempTransformSkipFlag[3];
pixel* m_fencScaled;
pixel* m_fencTransposed;
pixel* m_intraPred;
pixel* m_intraPredAngs;
coeff_t* m_tsCoeff;
int16_t* m_tsResidual;
pixel* m_tsRecon;
bool m_bFrameParallel;
uint32_t m_numLayers;
uint32_t m_refLagPixels;
int32_t m_maxTUDepth;
uint16_t m_limitTU;
int16_t m_sliceMaxY;
int16_t m_sliceMinY;
#if DETAILED_CU_STATS
CUStats m_stats[X265_MAX_FRAME_THREADS];
#endif
Search();
~Search();
bool initSearch(const x265_param& param, ScalingList& scalingList);
int setLambdaFromQP(const CUData& ctu, int qp, int lambdaQP = -1);
void invalidateContexts(int fromDepth);
void checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSizes);
void checkIntraInInter(Mode& intraMode, const CUGeom& cuGeom);
void encodeIntraInInter(Mode& intraMode, const CUGeom& cuGeom);
void predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC, uint32_t masks[2]);
void searchMV(Mode& interMode, const PredictionUnit& pu, int list, int ref, MV& outmv);
void encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom);
void encodeResAndCalcRdSkipCU(Mode& interMode);
void residualTransformQuantInter(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2]);
void residualTransformQuantIntra(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2]);
void residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth);
void getBestIntraModeChroma(Mode& intraMode, const CUGeom& cuGeom);
void checkDQP(Mode& mode, const CUGeom& cuGeom);
void checkDQPForSplitPred(Mode& mode, const CUGeom& cuGeom);
MV getLowresMV(const CUData& cu, const PredictionUnit& pu, int list, int ref);
class PME : public BondedTaskGroup
{
public:
Search& master;
Mode& mode;
const CUGeom& cuGeom;
const PredictionUnit& pu;
int puIdx;
struct {
int ref[2][MAX_NUM_REF];
int refCnt[2];
} m_jobs;
PME(Search& s, Mode& m, const CUGeom& g, const PredictionUnit& u, int p) : master(s), mode(m), cuGeom(g), pu(u), puIdx(p) {}
void processTasks(int workerThreadId);
protected:
PME operator=(const PME&);
};
void processPME(PME& pme, Search& slave);
void singleMotionEstimation(Search& master, Mode& interMode, const PredictionUnit& pu, int part, int list, int ref);
protected:
ThreadLocalData* m_tld;
uint32_t m_listSelBits[3];
Lock m_meLock;
void saveResidualQTData(CUData& cu, ShortYuv& resiYuv, uint32_t absPartIdx, uint32_t tuDepth);
sse_t estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2]);
sse_t estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom);
void codeSubdivCbfQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx);
void codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t tuDepth, const uint32_t depthRange[2]);
void codeCoeffQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx, TextType ttype);
struct Cost
{
uint64_t rdcost;
uint32_t bits;
sse_t distortion;
uint32_t energy;
Cost() { rdcost = 0; bits = 0; distortion = 0; energy = 0; }
};
struct TUInfoCache
{
Cost cost[NUM_SUBPART];
uint32_t bestTransformMode[NUM_SUBPART][MAX_NUM_COMPONENT][2];
uint8_t cbfFlag[NUM_SUBPART][MAX_NUM_COMPONENT][2];
Entropy rqtStore[NUM_SUBPART];
} m_cacheTU;
uint64_t estimateNullCbfCost(sse_t dist, uint32_t energy, uint32_t tuDepth, TextType compId);
bool splitTU(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, ShortYuv& resiYuv, Cost& splitCost, const uint32_t depthRange[2], int32_t splitMore);
void estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& costs, const uint32_t depthRange[2], int32_t splitMore = -1);
void codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& costs, const uint32_t depthRange[2]);
void codeIntraLumaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, Cost& costs);
void extractIntraResultQT(CUData& cu, Yuv& reconYuv, uint32_t tuDepth, uint32_t absPartIdx);
void codeIntraChromaQt(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, Cost& outCost);
void codeIntraChromaTSkip(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t tuDepthC, uint32_t absPartIdx, Cost& outCost);
void extractIntraResultChromaQT(CUData& cu, Yuv& reconYuv, uint32_t absPartIdx, uint32_t tuDepth);
void offsetSubTUCBFs(CUData& cu, TextType ttype, uint32_t tuDepth, uint32_t absPartIdx);
struct MergeData
{
MVField mvField[2];
uint32_t dir;
uint32_t index;
uint32_t bits;
};
int selectMVP(const CUData& cu, const PredictionUnit& pu, const MV amvp[AMVP_NUM_CANDS], int list, int ref);
const MV& checkBestMVP(const MV amvpCand[2], const MV& mv, int& mvpIdx, uint32_t& outBits, uint32_t& outCost) const;
void setSearchRange(const CUData& cu, const MV& mvp, int merange, MV& mvmin, MV& mvmax) const;
uint32_t mergeEstimation(CUData& cu, const CUGeom& cuGeom, const PredictionUnit& pu, int puIdx, MergeData& m);
static void getBlkBits(PartSize cuMode, bool bPSlice, int puIdx, uint32_t lastMode, uint32_t blockBit[3]);
enum { MAX_RD_INTRA_MODES = 16 };
static void updateCandList(uint32_t mode, uint64_t cost, int maxCandCount, uint32_t* candModeList, uint64_t* candCostList);
uint32_t getIntraRemModeBits(CUData & cu, uint32_t absPartIdx, uint32_t mpmModes[3], uint64_t& mpms) const;
void updateModeCost(Mode& m) const { m.rdCost = m_rdCost.m_psyRd ? m_rdCost.calcPsyRdCost(m.distortion, m.totalBits, m.psyEnergy)
: (m_rdCost.m_ssimRd ? m_rdCost.calcSsimRdCost(m.distortion, m.totalBits, m.ssimEnergy)
: m_rdCost.calcRdCost(m.distortion, m.totalBits)); }
};
}
#endif