This source file includes following definitions.
- ALLOC_ALIGNED
 
- de265_alloc_image_plane
 
- de265_free_image_plane
 
- de265_image_get_buffer
 
- de265_image_release_buffer
 
- set_image_plane
 
- alloc_image
 
- release
 
- fill_image
 
- copy_image
 
- copy_lines_from
 
- exchange_pixel_data_with
 
- thread_start
 
- thread_run
 
- thread_blocks
 
- thread_unblocks
 
- thread_finishes
 
- wait_for_progress
 
- wait_for_progress
 
- wait_for_completion
 
- debug_is_completed
 
- clear_metadata
 
- set_mv_info
 
- available_zscan
 
- available_pred_blk
 
#include "image.h"
#include "decctx.h"
#include "encoder/encoder-context.h"
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <limits>
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#ifdef HAVE_SSE4_1
#define MEMORY_PADDING  8
#else
#define MEMORY_PADDING  0
#endif
#define STANDARD_ALIGNMENT 16
#ifdef HAVE___MINGW_ALIGNED_MALLOC
#define ALLOC_ALIGNED(alignment, size)         __mingw_aligned_malloc((size), (alignment))
#define FREE_ALIGNED(mem)                      __mingw_aligned_free((mem))
#elif _WIN32
#define ALLOC_ALIGNED(alignment, size)         _aligned_malloc((size), (alignment))
#define FREE_ALIGNED(mem)                      _aligned_free((mem))
#elif defined(HAVE_POSIX_MEMALIGN)
static inline void *ALLOC_ALIGNED(size_t alignment, size_t size) {
    void *mem = NULL;
    if (posix_memalign(&mem, alignment, size) != 0) {
        return NULL;
    }
    return mem;
};
#define FREE_ALIGNED(mem)                      free((mem))
#else
#define ALLOC_ALIGNED(alignment, size)      memalign((alignment), (size))
#define FREE_ALIGNED(mem)                   free((mem))
#endif
#define ALLOC_ALIGNED_16(size)              ALLOC_ALIGNED(16, size)
static const int alignment = 16;
LIBDE265_API void* de265_alloc_image_plane(struct de265_image* img, int cIdx,
                                           void* inputdata, int inputstride, void *userdata)
{
  int alignment = STANDARD_ALIGNMENT;
  int stride = (img->get_width(cIdx) + alignment-1) / alignment * alignment;
  int height = img->get_height(cIdx);
  uint8_t* p = (uint8_t *)ALLOC_ALIGNED_16(stride * height + MEMORY_PADDING);
  if (p==NULL) { return NULL; }
  img->set_image_plane(cIdx, p, stride, userdata);
  
  if (inputdata != NULL) {
    if (inputstride == stride) {
      memcpy(p, inputdata, stride*height);
    }
    else {
      for (int y=0;y<height;y++) {
        memcpy(p+y*stride, ((char*)inputdata) + inputstride*y, inputstride);
      }
    }
  }
  return p;
}
LIBDE265_API void de265_free_image_plane(struct de265_image* img, int cIdx)
{
  uint8_t* p = (uint8_t*)img->get_image_plane(cIdx);
  assert(p);
  FREE_ALIGNED(p);
}
static int  de265_image_get_buffer(de265_decoder_context* ctx,
                                   de265_image_spec* spec, de265_image* img, void* userdata)
{
  const int rawChromaWidth  = spec->width  / img->SubWidthC;
  const int rawChromaHeight = spec->height / img->SubHeightC;
  int luma_stride   = (spec->width    + spec->alignment-1) / spec->alignment * spec->alignment;
  int chroma_stride = (rawChromaWidth + spec->alignment-1) / spec->alignment * spec->alignment;
  assert(img->BitDepth_Y >= 8 && img->BitDepth_Y <= 16);
  assert(img->BitDepth_C >= 8 && img->BitDepth_C <= 16);
  int luma_bpl   = luma_stride   * ((img->BitDepth_Y+7)/8);
  int chroma_bpl = chroma_stride * ((img->BitDepth_C+7)/8);
  int luma_height   = spec->height;
  int chroma_height = rawChromaHeight;
  bool alloc_failed = false;
  uint8_t* p[3] = { 0,0,0 };
  p[0] = (uint8_t *)ALLOC_ALIGNED_16(luma_height   * luma_bpl   + MEMORY_PADDING);
  if (p[0]==NULL) { alloc_failed=true; }
  if (img->get_chroma_format() != de265_chroma_mono) {
    p[1] = (uint8_t *)ALLOC_ALIGNED_16(chroma_height * chroma_bpl + MEMORY_PADDING);
    p[2] = (uint8_t *)ALLOC_ALIGNED_16(chroma_height * chroma_bpl + MEMORY_PADDING);
    if (p[1]==NULL || p[2]==NULL) { alloc_failed=true; }
  }
  else {
    p[1] = NULL;
    p[2] = NULL;
    chroma_stride = 0;
  }
  if (alloc_failed) {
    for (int i=0;i<3;i++)
      if (p[i]) {
        FREE_ALIGNED(p[i]);
      }
    return 0;
  }
  img->set_image_plane(0, p[0], luma_stride, NULL);
  img->set_image_plane(1, p[1], chroma_stride, NULL);
  img->set_image_plane(2, p[2], chroma_stride, NULL);
  return 1;
}
static void de265_image_release_buffer(de265_decoder_context* ctx,
                                       de265_image* img, void* userdata)
{
  for (int i=0;i<3;i++) {
    uint8_t* p = (uint8_t*)img->get_image_plane(i);
    if (p) {
      FREE_ALIGNED(p);
    }
  }
}
de265_image_allocation de265_image::default_image_allocation = {
  de265_image_get_buffer,
  de265_image_release_buffer
};
void de265_image::set_image_plane(int cIdx, uint8_t* mem, int stride, void *userdata)
{
  pixels[cIdx] = mem;
  plane_user_data[cIdx] = userdata;
  if (cIdx==0) { this->stride        = stride; }
  else         { this->chroma_stride = stride; }
}
uint32_t de265_image::s_next_image_ID = 0;
de265_image::de265_image()
{
  ID = -1;
  removed_at_picture_id = 0; 
  decctx = NULL;
  encctx = NULL;
  encoder_image_release_func = NULL;
  
  
  for (int c=0;c<3;c++) {
    pixels[c] = NULL;
    pixels_confwin[c] = NULL;
    plane_user_data[c] = NULL;
  }
  width=height=0;
  pts = 0;
  user_data = NULL;
  ctb_progress = NULL;
  integrity = INTEGRITY_NOT_DECODED;
  picture_order_cnt_lsb = -1; 
  PicOrderCntVal = -1; 
  PicState = UnusedForReference;
  PicOutputFlag = false;
  nThreadsQueued   = 0;
  nThreadsRunning  = 0;
  nThreadsBlocked  = 0;
  nThreadsFinished = 0;
  nThreadsTotal    = 0;
  de265_mutex_init(&mutex);
  de265_cond_init(&finished_cond);
}
de265_error de265_image::alloc_image(int w,int h, enum de265_chroma c,
                                     std::shared_ptr<const seq_parameter_set> sps, bool allocMetadata,
                                     decoder_context* dctx,
                                     encoder_context* ectx,
                                     de265_PTS pts, void* user_data,
                                     bool useCustomAllocFunc)
{
  
  if (allocMetadata) { assert(sps); }
  if (sps) { this->sps = sps; }
  release(); 
  ID = s_next_image_ID++;
  removed_at_picture_id = std::numeric_limits<int32_t>::max();
  decctx = dctx;
  encctx = ectx;
  
  chroma_format= c;
  width = w;
  height = h;
  chroma_width = w;
  chroma_height= h;
  this->user_data = user_data;
  this->pts = pts;
  de265_image_spec spec;
  int WinUnitX, WinUnitY;
  switch (chroma_format) {
  case de265_chroma_mono: WinUnitX=1; WinUnitY=1; break;
  case de265_chroma_420:  WinUnitX=2; WinUnitY=2; break;
  case de265_chroma_422:  WinUnitX=2; WinUnitY=1; break;
  case de265_chroma_444:  WinUnitX=1; WinUnitY=1; break;
  default:
    assert(0);
  }
  switch (chroma_format) {
  case de265_chroma_420:
    spec.format = de265_image_format_YUV420P8;
    chroma_width  = (chroma_width +1)/2;
    chroma_height = (chroma_height+1)/2;
    SubWidthC  = 2;
    SubHeightC = 2;
    break;
  case de265_chroma_422:
    spec.format = de265_image_format_YUV422P8;
    chroma_width = (chroma_width+1)/2;
    SubWidthC  = 2;
    SubHeightC = 1;
    break;
  case de265_chroma_444:
    spec.format = de265_image_format_YUV444P8;
    SubWidthC  = 1;
    SubHeightC = 1;
    break;
  case de265_chroma_mono:
    spec.format = de265_image_format_mono8;
    chroma_width = 0;
    chroma_height= 0;
    SubWidthC  = 1;
    SubHeightC = 1;
    break;
  default:
    assert(false);
    break;
  }
  if (sps) {
    assert(sps->SubWidthC  == SubWidthC);
    assert(sps->SubHeightC == SubHeightC);
  }
  spec.width  = w;
  spec.height = h;
  spec.alignment = STANDARD_ALIGNMENT;
  
  int left   = sps ? sps->conf_win_left_offset : 0;
  int right  = sps ? sps->conf_win_right_offset : 0;
  int top    = sps ? sps->conf_win_top_offset : 0;
  int bottom = sps ? sps->conf_win_bottom_offset : 0;
  width_confwin = width - (left+right)*WinUnitX;
  height_confwin= height- (top+bottom)*WinUnitY;
  chroma_width_confwin = chroma_width -left-right;
  chroma_height_confwin= chroma_height-top-bottom;
  spec.crop_left  = left *WinUnitX;
  spec.crop_right = right*WinUnitX;
  spec.crop_top   = top   *WinUnitY;
  spec.crop_bottom= bottom*WinUnitY;
  spec.visible_width = width_confwin;
  spec.visible_height= height_confwin;
  BitDepth_Y = (sps==NULL) ? 8 : sps->BitDepth_Y;
  BitDepth_C = (sps==NULL) ? 8 : sps->BitDepth_C;
  bpp_shift[0] = (BitDepth_Y <= 8) ? 0 : 1;
  bpp_shift[1] = (BitDepth_C <= 8) ? 0 : 1;
  bpp_shift[2] = bpp_shift[1];
  
  void* alloc_userdata = NULL;
  if (decctx) alloc_userdata = decctx->param_image_allocation_userdata;
  if (encctx) alloc_userdata = encctx->param_image_allocation_userdata; 
  if (encctx && useCustomAllocFunc) {
    encoder_image_release_func = encctx->release_func;
    
    if (encoder_image_release_func == NULL) {
      image_allocation_functions = de265_image::default_image_allocation;
    }
    else {
      image_allocation_functions.get_buffer     = NULL;
      image_allocation_functions.release_buffer = NULL;
    }
  }
  else if (decctx && useCustomAllocFunc) {
    image_allocation_functions = decctx->param_image_allocation_functions;
  }
  else {
    image_allocation_functions = de265_image::default_image_allocation;
  }
  bool mem_alloc_success = true;
  if (image_allocation_functions.get_buffer != NULL) {
    mem_alloc_success = image_allocation_functions.get_buffer(decctx, &spec, this,
                                                              alloc_userdata);
    pixels_confwin[0] = pixels[0] + left*WinUnitX + top*WinUnitY*stride;
    pixels_confwin[1] = pixels[1] + left + top*chroma_stride;
    pixels_confwin[2] = pixels[2] + left + top*chroma_stride;
    
    if (!mem_alloc_success)
      {
        return DE265_ERROR_OUT_OF_MEMORY;
      }
  }
  
  
  
  if (allocMetadata) {
    
    mem_alloc_success &= intraPredMode.alloc(sps->PicWidthInMinPUs, sps->PicHeightInMinPUs,
                                             sps->Log2MinPUSize);
    mem_alloc_success &= intraPredModeC.alloc(sps->PicWidthInMinPUs, sps->PicHeightInMinPUs,
                                              sps->Log2MinPUSize);
    
    mem_alloc_success &= cb_info.alloc(sps->PicWidthInMinCbsY, sps->PicHeightInMinCbsY,
                                       sps->Log2MinCbSizeY);
    
    int puWidth  = sps->PicWidthInMinCbsY  << (sps->Log2MinCbSizeY -2);
    int puHeight = sps->PicHeightInMinCbsY << (sps->Log2MinCbSizeY -2);
    mem_alloc_success &= pb_info.alloc(puWidth,puHeight, 2);
    
    mem_alloc_success &= tu_info.alloc(sps->PicWidthInTbsY, sps->PicHeightInTbsY,
                                       sps->Log2MinTrafoSize);
    
    int deblk_w = (sps->pic_width_in_luma_samples +3)/4;
    int deblk_h = (sps->pic_height_in_luma_samples+3)/4;
    mem_alloc_success &= deblk_info.alloc(deblk_w, deblk_h, 2);
    
    if (ctb_info.data_size != sps->PicSizeInCtbsY)
      {
        delete[] ctb_progress;
        mem_alloc_success &= ctb_info.alloc(sps->PicWidthInCtbsY, sps->PicHeightInCtbsY,
                                            sps->Log2CtbSizeY);
        ctb_progress = new de265_progress_lock[ ctb_info.data_size ];
      }
    
    if (!mem_alloc_success)
      {
        return DE265_ERROR_OUT_OF_MEMORY;
      }
  }
  return DE265_OK;
}
de265_image::~de265_image()
{
  release();
  
  if (ctb_progress) {
    delete[] ctb_progress;
  }
  de265_cond_destroy(&finished_cond);
  de265_mutex_destroy(&mutex);
}
void de265_image::release()
{
  
  if (pixels[0])
    {
      if (encoder_image_release_func != NULL) {
        encoder_image_release_func(encctx, this,
                                   encctx->param_image_allocation_userdata);
      }
      else {
        image_allocation_functions.release_buffer(decctx, this,
                                                decctx ?
                                                  decctx->param_image_allocation_userdata :
                                                  NULL);
      }
      for (int i=0;i<3;i++)
        {
          pixels[i] = NULL;
          pixels_confwin[i] = NULL;
        }
    }
  
  for (int i=0;i<slices.size();i++) {
    delete slices[i];
  }
  slices.clear();
}
void de265_image::fill_image(int y,int cb,int cr)
{
  if (y>=0) {
    memset(pixels[0], y, stride * height);
  }
  if (cb>=0) {
    memset(pixels[1], cb, chroma_stride * chroma_height);
  }
  if (cr>=0) {
    memset(pixels[2], cr, chroma_stride * chroma_height);
  }
}
de265_error de265_image::copy_image(const de265_image* src)
{
  
  de265_error err = alloc_image(src->width, src->height, src->chroma_format, src->sps, false,
                                src->decctx, src->encctx, src->pts, src->user_data, false);
  if (err != DE265_OK) {
    return err;
  }
  copy_lines_from(src, 0, src->height);
  return err;
}
void de265_image::copy_lines_from(const de265_image* src, int first, int end)
{
  if (end > src->height) end=src->height;
  assert(first % 2 == 0);
  assert(end   % 2 == 0);
  int luma_bpp   = (sps->BitDepth_Y+7)/8;
  int chroma_bpp = (sps->BitDepth_C+7)/8;
  if (src->stride == stride) {
    memcpy(pixels[0]      + first*stride * luma_bpp,
           src->pixels[0] + first*src->stride * luma_bpp,
           (end-first)*stride * luma_bpp);
  }
  else {
    for (int yp=first;yp<end;yp++) {
      memcpy(pixels[0]+yp*stride * luma_bpp,
             src->pixels[0]+yp*src->stride * luma_bpp,
             src->width * luma_bpp);
    }
  }
  int first_chroma = first / src->SubHeightC;
  int end_chroma   = end   / src->SubHeightC;
  if (src->chroma_format != de265_chroma_mono) {
    if (src->chroma_stride == chroma_stride) {
      memcpy(pixels[1]      + first_chroma*chroma_stride * chroma_bpp,
             src->pixels[1] + first_chroma*chroma_stride * chroma_bpp,
             (end_chroma-first_chroma) * chroma_stride * chroma_bpp);
      memcpy(pixels[2]      + first_chroma*chroma_stride * chroma_bpp,
             src->pixels[2] + first_chroma*chroma_stride * chroma_bpp,
             (end_chroma-first_chroma) * chroma_stride * chroma_bpp);
    }
    else {
      for (int y=first_chroma;y<end_chroma;y++) {
        memcpy(pixels[1]+y*chroma_stride * chroma_bpp,
               src->pixels[1]+y*src->chroma_stride * chroma_bpp,
               src->chroma_width * chroma_bpp);
        memcpy(pixels[2]+y*chroma_stride * chroma_bpp,
               src->pixels[2]+y*src->chroma_stride * chroma_bpp,
               src->chroma_width * chroma_bpp);
      }
    }
  }
}
void de265_image::exchange_pixel_data_with(de265_image& b)
{
  for (int i=0;i<3;i++) {
    std::swap(pixels[i], b.pixels[i]);
    std::swap(pixels_confwin[i], b.pixels_confwin[i]);
    std::swap(plane_user_data[i], b.plane_user_data[i]);
  }
  std::swap(stride, b.stride);
  std::swap(chroma_stride, b.chroma_stride);
  std::swap(image_allocation_functions, b.image_allocation_functions);
}
void de265_image::thread_start(int nThreads)
{
  de265_mutex_lock(&mutex);
  
  nThreadsQueued += nThreads;
  nThreadsTotal += nThreads;
  
  de265_mutex_unlock(&mutex);
}
void de265_image::thread_run(const thread_task* task)
{
  
  de265_mutex_lock(&mutex);
  nThreadsQueued--;
  nThreadsRunning++;
  de265_mutex_unlock(&mutex);
}
void de265_image::thread_blocks()
{
  de265_mutex_lock(&mutex);
  nThreadsRunning--;
  nThreadsBlocked++;
  de265_mutex_unlock(&mutex);
}
void de265_image::thread_unblocks()
{
  de265_mutex_lock(&mutex);
  nThreadsBlocked--;
  nThreadsRunning++;
  de265_mutex_unlock(&mutex);
}
void de265_image::thread_finishes(const thread_task* task)
{
  
  de265_mutex_lock(&mutex);
  nThreadsRunning--;
  nThreadsFinished++;
  assert(nThreadsRunning >= 0);
  if (nThreadsFinished==nThreadsTotal) {
    de265_cond_broadcast(&finished_cond, &mutex);
  }
  de265_mutex_unlock(&mutex);
}
void de265_image::wait_for_progress(thread_task* task, int ctbx,int ctby, int progress)
{
  const int ctbW = sps->PicWidthInCtbsY;
  wait_for_progress(task, ctbx + ctbW*ctby, progress);
}
void de265_image::wait_for_progress(thread_task* task, int ctbAddrRS, int progress)
{
  if (task==NULL) { return; }
  de265_progress_lock* progresslock = &ctb_progress[ctbAddrRS];
  if (progresslock->get_progress() < progress) {
    thread_blocks();
    assert(task!=NULL);
    task->state = thread_task::Blocked;
    
    progresslock->wait_for_progress(progress);
    task->state = thread_task::Running;
    thread_unblocks();
  }
}
void de265_image::wait_for_completion()
{
  de265_mutex_lock(&mutex);
  while (nThreadsFinished!=nThreadsTotal) {
    de265_cond_wait(&finished_cond, &mutex);
  }
  de265_mutex_unlock(&mutex);
}
bool de265_image::debug_is_completed() const
{
  return nThreadsFinished==nThreadsTotal;
}
void de265_image::clear_metadata()
{
  
  
  cb_info.clear();
  
  ctb_info.clear();
  deblk_info.clear();
  
  for (int i=0;i<ctb_info.data_size;i++) {
    ctb_progress[i].reset(CTB_PROGRESS_NONE);
  }
}
void de265_image::set_mv_info(int x,int y, int nPbW,int nPbH, const PBMotion& mv)
{
  int log2PuSize = 2;
  int xPu = x >> log2PuSize;
  int yPu = y >> log2PuSize;
  int wPu = nPbW >> log2PuSize;
  int hPu = nPbH >> log2PuSize;
  int stride = pb_info.width_in_units;
  for (int pby=0;pby<hPu;pby++)
    for (int pbx=0;pbx<wPu;pbx++)
      {
        pb_info[ xPu+pbx + (yPu+pby)*stride ] = mv;
      }
}
bool de265_image::available_zscan(int xCurr,int yCurr, int xN,int yN) const
{
  if (xN<0 || yN<0) return false;
  if (xN>=sps->pic_width_in_luma_samples ||
      yN>=sps->pic_height_in_luma_samples) return false;
  int minBlockAddrN = pps->MinTbAddrZS[ (xN>>sps->Log2MinTrafoSize) +
                                        (yN>>sps->Log2MinTrafoSize) * sps->PicWidthInTbsY ];
  int minBlockAddrCurr = pps->MinTbAddrZS[ (xCurr>>sps->Log2MinTrafoSize) +
                                           (yCurr>>sps->Log2MinTrafoSize) * sps->PicWidthInTbsY ];
  if (minBlockAddrN > minBlockAddrCurr) return false;
  int xCurrCtb = xCurr >> sps->Log2CtbSizeY;
  int yCurrCtb = yCurr >> sps->Log2CtbSizeY;
  int xNCtb = xN >> sps->Log2CtbSizeY;
  int yNCtb = yN >> sps->Log2CtbSizeY;
  if (get_SliceAddrRS(xCurrCtb,yCurrCtb) !=
      get_SliceAddrRS(xNCtb,   yNCtb)) {
    return false;
  }
  if (pps->TileIdRS[xCurrCtb + yCurrCtb*sps->PicWidthInCtbsY] !=
      pps->TileIdRS[xNCtb    + yNCtb   *sps->PicWidthInCtbsY]) {
    return false;
  }
  return true;
}
bool de265_image::available_pred_blk(int xC,int yC, int nCbS, int xP, int yP,
                                     int nPbW, int nPbH, int partIdx, int xN,int yN) const
{
  logtrace(LogMotion,"C:%d;%d P:%d;%d N:%d;%d size=%d;%d\n",xC,yC,xP,yP,xN,yN,nPbW,nPbH);
  int sameCb = (xC <= xN && xN < xC+nCbS &&
                yC <= yN && yN < yC+nCbS);
  bool availableN;
  if (!sameCb) {
    availableN = available_zscan(xP,yP,xN,yN);
  }
  else {
    availableN = !(nPbW<<1 == nCbS && nPbH<<1 == nCbS &&  
                   partIdx==1 &&
                   yN >= yC+nPbH && xN < xC+nPbW);  
  }
  if (availableN && get_pred_mode(xN,yN) == MODE_INTRA) {
    availableN = false;
  }
  return availableN;
}