root/modules/cudacodec/src/video_writer.cpp

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. load
  2. save
  3. createVideoWriter
  4. createVideoWriter
  5. createVideoWriter
  6. createVideoWriter
  7. cuCtxLock_
  8. cuCtxLock_
  9. initEncoder
  10. setEncodeParams
  11. getEncoderParams
  12. initGpuMemory
  13. initCallBacks
  14. createHWEncoder
  15. copyUYVYorYUY2Frame
  16. copyYV12orIYUVFrame
  17. copyNV12Frame
  18. write
  19. HandleAcquireBitStream
  20. HandleReleaseBitStream
  21. HandleOnBeginFrame
  22. HandleOnEndFrame
  23. init_MediaStream_FFMPEG
  24. isKeyFrame_
  25. acquireBitStream
  26. releaseBitStream
  27. onBeginFrame
  28. onEndFrame
  29. load
  30. save
  31. createVideoWriter
  32. createVideoWriter
  33. createVideoWriter
  34. createVideoWriter

/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                          License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "precomp.hpp"

using namespace cv;
using namespace cv::cuda;
using namespace cv::cudacodec;

#if !defined(HAVE_NVCUVID) || !defined(WIN32)

cv::cudacodec::EncoderParams::EncoderParams() { throw_no_cuda(); }
cv::cudacodec::EncoderParams::EncoderParams(const String&) { throw_no_cuda(); }
void cv::cudacodec::EncoderParams::load(const String&) { throw_no_cuda(); }
void cv::cudacodec::EncoderParams::save(const String&) const { throw_no_cuda(); }

Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const String&, Size, double, SurfaceFormat) { throw_no_cuda(); return Ptr<VideoWriter>(); }
Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const String&, Size, double, const EncoderParams&, SurfaceFormat) { throw_no_cuda(); return Ptr<VideoWriter>(); }

Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const Ptr<EncoderCallBack>&, Size, double, SurfaceFormat) { throw_no_cuda(); return Ptr<VideoWriter>(); }
Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const Ptr<EncoderCallBack>&, Size, double, const EncoderParams&, SurfaceFormat) { throw_no_cuda(); return Ptr<VideoWriter>(); }

#else // !defined HAVE_CUDA || !defined WIN32

void RGB_to_YV12(const GpuMat& src, GpuMat& dst);

///////////////////////////////////////////////////////////////////////////
// VideoWriterImpl

namespace
{
    class NVEncoderWrapper
    {
    public:
        NVEncoderWrapper() : encoder_(0)
        {
            int err;

            err = NVGetHWEncodeCaps();
            if (err)
                CV_Error(Error::GpuNotSupported, "No CUDA capability present");

            // Create the Encoder API Interface
            err = NVCreateEncoder(&encoder_);
            CV_Assert( err == 0 );
        }

        ~NVEncoderWrapper()
        {
            if (encoder_)
                NVDestroyEncoder(encoder_);
        }

        operator NVEncoder() const
        {
            return encoder_;
        }

    private:
        NVEncoder encoder_;
    };

    enum CodecType
    {
        MPEG1, // not supported yet
        MPEG2, // not supported yet
        MPEG4, // not supported yet
        H264
    };

    class VideoWriterImpl : public VideoWriter
    {
    public:
        VideoWriterImpl(const Ptr<EncoderCallBack>& callback, Size frameSize, double fps, SurfaceFormat format, CodecType codec = H264);
        VideoWriterImpl(const Ptr<EncoderCallBack>& callback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format, CodecType codec = H264);

        void write(InputArray frame, bool lastFrame = false);

        EncoderParams getEncoderParams() const;

    private:
        void initEncoder(double fps);
        void setEncodeParams(const EncoderParams& params);
        void initGpuMemory();
        void initCallBacks();
        void createHWEncoder();

        Ptr<EncoderCallBack> callback_;
        Size frameSize_;

        CodecType codec_;
        SurfaceFormat inputFormat_;
        NVVE_SurfaceFormat surfaceFormat_;

        NVEncoderWrapper encoder_;

        GpuMat videoFrame_;
        CUvideoctxlock cuCtxLock_;

        // CallBacks

        static unsigned char* NVENCAPI HandleAcquireBitStream(int* pBufferSize, void* pUserdata);
        static void NVENCAPI HandleReleaseBitStream(int nBytesInBuffer, unsigned char* cb, void* pUserdata);
        static void NVENCAPI HandleOnBeginFrame(const NVVE_BeginFrameInfo* pbfi, void* pUserdata);
        static void NVENCAPI HandleOnEndFrame(const NVVE_EndFrameInfo* pefi, void* pUserdata);
    };

    VideoWriterImpl::VideoWriterImpl(const Ptr<EncoderCallBack>& callback, Size frameSize, double fps, SurfaceFormat format, CodecType codec) :
        callback_(callback),
        frameSize_(frameSize),
        codec_(codec),
        inputFormat_(format),
        cuCtxLock_(0)
    {
        surfaceFormat_ = (inputFormat_ == SF_BGR ? YV12 : static_cast<NVVE_SurfaceFormat>(inputFormat_));

        initEncoder(fps);

        initGpuMemory();

        initCallBacks();

        createHWEncoder();
    }

    VideoWriterImpl::VideoWriterImpl(const Ptr<EncoderCallBack>& callback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format, CodecType codec) :
        callback_(callback),
        frameSize_(frameSize),
        codec_(codec),
        inputFormat_(format),
        cuCtxLock_(0)
    {
        surfaceFormat_ = (inputFormat_ == SF_BGR ? YV12 : static_cast<NVVE_SurfaceFormat>(inputFormat_));

        initEncoder(fps);

        setEncodeParams(params);

        initGpuMemory();

        initCallBacks();

        createHWEncoder();
    }

    void VideoWriterImpl::initEncoder(double fps)
    {
        int err;

        // Set codec

        static const unsigned long codecs_id[] =
        {
            NV_CODEC_TYPE_MPEG1, NV_CODEC_TYPE_MPEG2, NV_CODEC_TYPE_MPEG4, NV_CODEC_TYPE_H264, NV_CODEC_TYPE_VC1
        };
        err = NVSetCodec(encoder_, codecs_id[codec_]);
        if (err)
            CV_Error(Error::StsNotImplemented, "Codec format is not supported");

        // Set default params

        err = NVSetDefaultParam(encoder_);
        CV_Assert( err == 0 );

        // Set some common params

        int inputSize[] = { frameSize_.width, frameSize_.height };
        err = NVSetParamValue(encoder_, NVVE_IN_SIZE, &inputSize);
        CV_Assert( err == 0 );
        err = NVSetParamValue(encoder_, NVVE_OUT_SIZE, &inputSize);
        CV_Assert( err == 0 );

        int aspectRatio[] = { frameSize_.width, frameSize_.height, ASPECT_RATIO_DAR };
        err = NVSetParamValue(encoder_, NVVE_ASPECT_RATIO, &aspectRatio);
        CV_Assert( err == 0 );

        // FPS

        int frame_rate = static_cast<int>(fps + 0.5);
        int frame_rate_base = 1;
        while (fabs(static_cast<double>(frame_rate) / frame_rate_base) - fps > 0.001)
        {
            frame_rate_base *= 10;
            frame_rate = static_cast<int>(fps*frame_rate_base + 0.5);
        }
        int FrameRate[] = { frame_rate, frame_rate_base };
        err = NVSetParamValue(encoder_, NVVE_FRAME_RATE, &FrameRate);
        CV_Assert( err == 0 );

        // Select device for encoding

        int gpuID = getDevice();
        err = NVSetParamValue(encoder_, NVVE_FORCE_GPU_SELECTION, &gpuID);
        CV_Assert( err == 0 );
    }

    void VideoWriterImpl::setEncodeParams(const EncoderParams& params)
    {
        int err;

        int P_Interval = params.P_Interval;
        err = NVSetParamValue(encoder_, NVVE_P_INTERVAL, &P_Interval);
        CV_Assert( err == 0 );

        int IDR_Period = params.IDR_Period;
        err = NVSetParamValue(encoder_, NVVE_IDR_PERIOD, &IDR_Period);
        CV_Assert( err == 0 );

        int DynamicGOP = params.DynamicGOP;
        err = NVSetParamValue(encoder_, NVVE_DYNAMIC_GOP, &DynamicGOP);
        CV_Assert( err == 0 );

        NVVE_RateCtrlType RCType = static_cast<NVVE_RateCtrlType>(params.RCType);
        err = NVSetParamValue(encoder_, NVVE_RC_TYPE, &RCType);
        CV_Assert( err == 0 );

        int AvgBitrate = params.AvgBitrate;
        err = NVSetParamValue(encoder_, NVVE_AVG_BITRATE, &AvgBitrate);
        CV_Assert( err == 0 );

        int PeakBitrate = params.PeakBitrate;
        err = NVSetParamValue(encoder_, NVVE_PEAK_BITRATE, &PeakBitrate);
        CV_Assert( err == 0 );

        int QP_Level_Intra = params.QP_Level_Intra;
        err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTRA, &QP_Level_Intra);
        CV_Assert( err == 0 );

        int QP_Level_InterP = params.QP_Level_InterP;
        err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTER_P, &QP_Level_InterP);
        CV_Assert( err == 0 );

        int QP_Level_InterB = params.QP_Level_InterB;
        err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTER_B, &QP_Level_InterB);
        CV_Assert( err == 0 );

        int DeblockMode = params.DeblockMode;
        err = NVSetParamValue(encoder_, NVVE_DEBLOCK_MODE, &DeblockMode);
        CV_Assert( err == 0 );

        int ProfileLevel = params.ProfileLevel;
        err = NVSetParamValue(encoder_, NVVE_PROFILE_LEVEL, &ProfileLevel);
        CV_Assert( err == 0 );

        int ForceIntra = params.ForceIntra;
        err = NVSetParamValue(encoder_, NVVE_FORCE_INTRA, &ForceIntra);
        CV_Assert( err == 0 );

        int ForceIDR = params.ForceIDR;
        err = NVSetParamValue(encoder_, NVVE_FORCE_IDR, &ForceIDR);
        CV_Assert( err == 0 );

        int ClearStat = params.ClearStat;
        err = NVSetParamValue(encoder_, NVVE_CLEAR_STAT, &ClearStat);
        CV_Assert( err == 0 );

        NVVE_DI_MODE DIMode = static_cast<NVVE_DI_MODE>(params.DIMode);
        err = NVSetParamValue(encoder_, NVVE_SET_DEINTERLACE, &DIMode);
        CV_Assert( err == 0 );

        if (params.Presets != -1)
        {
            NVVE_PRESETS_TARGET Presets = static_cast<NVVE_PRESETS_TARGET>(params.Presets);
            err = NVSetParamValue(encoder_, NVVE_PRESETS, &Presets);
            CV_Assert( err == 0 );
        }

        int DisableCabac = params.DisableCabac;
        err = NVSetParamValue(encoder_, NVVE_DISABLE_CABAC, &DisableCabac);
        CV_Assert( err == 0 );

        int NaluFramingType = params.NaluFramingType;
        err = NVSetParamValue(encoder_, NVVE_CONFIGURE_NALU_FRAMING_TYPE, &NaluFramingType);
        CV_Assert( err == 0 );

        int DisableSPSPPS = params.DisableSPSPPS;
        err = NVSetParamValue(encoder_, NVVE_DISABLE_SPS_PPS, &DisableSPSPPS);
        CV_Assert( err == 0 );
    }

    EncoderParams VideoWriterImpl::getEncoderParams() const
    {
        int err;

        EncoderParams params;

        int P_Interval;
        err = NVGetParamValue(encoder_, NVVE_P_INTERVAL, &P_Interval);
        CV_Assert( err == 0 );
        params.P_Interval = P_Interval;

        int IDR_Period;
        err = NVGetParamValue(encoder_, NVVE_IDR_PERIOD, &IDR_Period);
        CV_Assert( err == 0 );
        params.IDR_Period = IDR_Period;

        int DynamicGOP;
        err = NVGetParamValue(encoder_, NVVE_DYNAMIC_GOP, &DynamicGOP);
        CV_Assert( err == 0 );
        params.DynamicGOP = DynamicGOP;

        NVVE_RateCtrlType RCType;
        err = NVGetParamValue(encoder_, NVVE_RC_TYPE, &RCType);
        CV_Assert( err == 0 );
        params.RCType = RCType;

        int AvgBitrate;
        err = NVGetParamValue(encoder_, NVVE_AVG_BITRATE, &AvgBitrate);
        CV_Assert( err == 0 );
        params.AvgBitrate = AvgBitrate;

        int PeakBitrate;
        err = NVGetParamValue(encoder_, NVVE_PEAK_BITRATE, &PeakBitrate);
        CV_Assert( err == 0 );
        params.PeakBitrate = PeakBitrate;

        int QP_Level_Intra;
        err = NVGetParamValue(encoder_, NVVE_QP_LEVEL_INTRA, &QP_Level_Intra);
        CV_Assert( err == 0 );
        params.QP_Level_Intra = QP_Level_Intra;

        int QP_Level_InterP;
        err = NVGetParamValue(encoder_, NVVE_QP_LEVEL_INTER_P, &QP_Level_InterP);
        CV_Assert( err == 0 );
        params.QP_Level_InterP = QP_Level_InterP;

        int QP_Level_InterB;
        err = NVGetParamValue(encoder_, NVVE_QP_LEVEL_INTER_B, &QP_Level_InterB);
        CV_Assert( err == 0 );
        params.QP_Level_InterB = QP_Level_InterB;

        int DeblockMode;
        err = NVGetParamValue(encoder_, NVVE_DEBLOCK_MODE, &DeblockMode);
        CV_Assert( err == 0 );
        params.DeblockMode = DeblockMode;

        int ProfileLevel;
        err = NVGetParamValue(encoder_, NVVE_PROFILE_LEVEL, &ProfileLevel);
        CV_Assert( err == 0 );
        params.ProfileLevel = ProfileLevel;

        int ForceIntra;
        err = NVGetParamValue(encoder_, NVVE_FORCE_INTRA, &ForceIntra);
        CV_Assert( err == 0 );
        params.ForceIntra = ForceIntra;

        int ForceIDR;
        err = NVGetParamValue(encoder_, NVVE_FORCE_IDR, &ForceIDR);
        CV_Assert( err == 0 );
        params.ForceIDR = ForceIDR;

        int ClearStat;
        err = NVGetParamValue(encoder_, NVVE_CLEAR_STAT, &ClearStat);
        CV_Assert( err == 0 );
        params.ClearStat = ClearStat;

        NVVE_DI_MODE DIMode;
        err = NVGetParamValue(encoder_, NVVE_SET_DEINTERLACE, &DIMode);
        CV_Assert( err == 0 );
        params.DIMode = DIMode;

        params.Presets = -1;

        int DisableCabac;
        err = NVGetParamValue(encoder_, NVVE_DISABLE_CABAC, &DisableCabac);
        CV_Assert( err == 0 );
        params.DisableCabac = DisableCabac;

        int NaluFramingType;
        err = NVGetParamValue(encoder_, NVVE_CONFIGURE_NALU_FRAMING_TYPE, &NaluFramingType);
        CV_Assert( err == 0 );
        params.NaluFramingType = NaluFramingType;

        int DisableSPSPPS;
        err = NVGetParamValue(encoder_, NVVE_DISABLE_SPS_PPS, &DisableSPSPPS);
        CV_Assert( err == 0 );
        params.DisableSPSPPS = DisableSPSPPS;

        return params;
    }

    void VideoWriterImpl::initGpuMemory()
    {
        int err;

        // initialize context
        GpuMat temp(1, 1, CV_8U);
        temp.release();

        static const int bpp[] =
        {
            16, // UYVY, 4:2:2
            16, // YUY2, 4:2:2
            12, // YV12, 4:2:0
            12, // NV12, 4:2:0
            12, // IYUV, 4:2:0
        };

        CUcontext cuContext;
        cuSafeCall( cuCtxGetCurrent(&cuContext) );

        // Allocate the CUDA memory Pitched Surface
        if (surfaceFormat_ == UYVY || surfaceFormat_ == YUY2)
            videoFrame_.create(frameSize_.height, (frameSize_.width * bpp[surfaceFormat_]) / 8, CV_8UC1);
        else
            videoFrame_.create((frameSize_.height * bpp[surfaceFormat_]) / 8, frameSize_.width, CV_8UC1);

        // Create the Video Context Lock (used for synchronization)
        cuSafeCall( cuvidCtxLockCreate(&cuCtxLock_, cuContext) );

        // If we are using GPU Device Memory with NVCUVENC, it is necessary to create a
        // CUDA Context with a Context Lock cuvidCtxLock.  The Context Lock needs to be passed to NVCUVENC

        int iUseDeviceMem = 1;
        err = NVSetParamValue(encoder_, NVVE_DEVICE_MEMORY_INPUT, &iUseDeviceMem);
        CV_Assert( err == 0 );

        err = NVSetParamValue(encoder_, NVVE_DEVICE_CTX_LOCK, &cuCtxLock_);
        CV_Assert( err == 0 );
    }

    void VideoWriterImpl::initCallBacks()
    {
        NVVE_CallbackParams cb;
        memset(&cb, 0, sizeof(NVVE_CallbackParams));

        cb.pfnacquirebitstream = HandleAcquireBitStream;
        cb.pfnonbeginframe     = HandleOnBeginFrame;
        cb.pfnonendframe       = HandleOnEndFrame;
        cb.pfnreleasebitstream = HandleReleaseBitStream;

        NVRegisterCB(encoder_, cb, this);
    }

    void VideoWriterImpl::createHWEncoder()
    {
        int err;

        // Create the NVIDIA HW resources for Encoding on NVIDIA hardware
        err = NVCreateHWEncoder(encoder_);
        CV_Assert( err == 0 );
    }

    // UYVY/YUY2 are both 4:2:2 formats (16bpc)
    // Luma, U, V are interleaved, chroma is subsampled (w/2,h)
    void copyUYVYorYUY2Frame(Size frameSize, const GpuMat& src, GpuMat& dst)
    {
        // Source is YUVY/YUY2 4:2:2, the YUV data in a packed and interleaved

        // YUV Copy setup
        CUDA_MEMCPY2D stCopyYUV422;
        memset(&stCopyYUV422, 0, sizeof(CUDA_MEMCPY2D));

        stCopyYUV422.srcXInBytes          = 0;
        stCopyYUV422.srcY                 = 0;
        stCopyYUV422.srcMemoryType        = CU_MEMORYTYPE_DEVICE;
        stCopyYUV422.srcHost              = 0;
        stCopyYUV422.srcDevice            = (CUdeviceptr) src.data;
        stCopyYUV422.srcArray             = 0;
        stCopyYUV422.srcPitch             = src.step;

        stCopyYUV422.dstXInBytes          = 0;
        stCopyYUV422.dstY                 = 0;
        stCopyYUV422.dstMemoryType        = CU_MEMORYTYPE_DEVICE;
        stCopyYUV422.dstHost              = 0;
        stCopyYUV422.dstDevice            = (CUdeviceptr) dst.data;
        stCopyYUV422.dstArray             = 0;
        stCopyYUV422.dstPitch             = dst.step;

        stCopyYUV422.WidthInBytes         = frameSize.width * 2;
        stCopyYUV422.Height               = frameSize.height;

        // DMA Luma/Chroma
        cuSafeCall( cuMemcpy2D(&stCopyYUV422) );
    }

    // YV12/IYUV are both 4:2:0 planar formats (12bpc)
    // Luma, U, V chroma planar (12bpc), chroma is subsampled (w/2,h/2)
    void copyYV12orIYUVFrame(Size frameSize, const GpuMat& src, GpuMat& dst)
    {
        // Source is YV12/IYUV, this native format is converted to NV12 format by the video encoder

        // (1) luma copy setup
        CUDA_MEMCPY2D stCopyLuma;
        memset(&stCopyLuma, 0, sizeof(CUDA_MEMCPY2D));

        stCopyLuma.srcXInBytes          = 0;
        stCopyLuma.srcY                 = 0;
        stCopyLuma.srcMemoryType        = CU_MEMORYTYPE_DEVICE;
        stCopyLuma.srcHost              = 0;
        stCopyLuma.srcDevice            = (CUdeviceptr) src.data;
        stCopyLuma.srcArray             = 0;
        stCopyLuma.srcPitch             = src.step;

        stCopyLuma.dstXInBytes          = 0;
        stCopyLuma.dstY                 = 0;
        stCopyLuma.dstMemoryType        = CU_MEMORYTYPE_DEVICE;
        stCopyLuma.dstHost              = 0;
        stCopyLuma.dstDevice            = (CUdeviceptr) dst.data;
        stCopyLuma.dstArray             = 0;
        stCopyLuma.dstPitch             = dst.step;

        stCopyLuma.WidthInBytes         = frameSize.width;
        stCopyLuma.Height               = frameSize.height;

        // (2) chroma copy setup, U/V can be done together
        CUDA_MEMCPY2D stCopyChroma;
        memset(&stCopyChroma, 0, sizeof(CUDA_MEMCPY2D));

        stCopyChroma.srcXInBytes        = 0;
        stCopyChroma.srcY               = frameSize.height << 1; // U/V chroma offset
        stCopyChroma.srcMemoryType      = CU_MEMORYTYPE_DEVICE;
        stCopyChroma.srcHost            = 0;
        stCopyChroma.srcDevice          = (CUdeviceptr) src.data;
        stCopyChroma.srcArray           = 0;
        stCopyChroma.srcPitch           = src.step >> 1; // chroma is subsampled by 2 (but it has U/V are next to each other)

        stCopyChroma.dstXInBytes        = 0;
        stCopyChroma.dstY               = frameSize.height << 1; // chroma offset (srcY*srcPitch now points to the chroma planes)
        stCopyChroma.dstMemoryType      = CU_MEMORYTYPE_DEVICE;
        stCopyChroma.dstHost            = 0;
        stCopyChroma.dstDevice          = (CUdeviceptr) dst.data;
        stCopyChroma.dstArray           = 0;
        stCopyChroma.dstPitch           = dst.step >> 1;

        stCopyChroma.WidthInBytes       = frameSize.width >> 1;
        stCopyChroma.Height             = frameSize.height; // U/V are sent together

        // DMA Luma
        cuSafeCall( cuMemcpy2D(&stCopyLuma) );

        // DMA Chroma channels (UV side by side)
        cuSafeCall( cuMemcpy2D(&stCopyChroma) );
    }

    // NV12 is 4:2:0 format (12bpc)
    // Luma followed by U/V chroma interleaved (12bpc), chroma is subsampled (w/2,h/2)
    void copyNV12Frame(Size frameSize, const GpuMat& src, GpuMat& dst)
    {
        // Source is NV12 in pitch linear memory
        // Because we are assume input is NV12 (if we take input in the native format), the encoder handles NV12 as a native format in pitch linear memory

        // Luma/Chroma can be done in a single transfer
        CUDA_MEMCPY2D stCopyNV12;
        memset(&stCopyNV12, 0, sizeof(CUDA_MEMCPY2D));

        stCopyNV12.srcXInBytes          = 0;
        stCopyNV12.srcY                 = 0;
        stCopyNV12.srcMemoryType        = CU_MEMORYTYPE_DEVICE;
        stCopyNV12.srcHost              = 0;
        stCopyNV12.srcDevice            = (CUdeviceptr) src.data;
        stCopyNV12.srcArray             = 0;
        stCopyNV12.srcPitch             = src.step;

        stCopyNV12.dstXInBytes          = 0;
        stCopyNV12.dstY                 = 0;
        stCopyNV12.dstMemoryType        = CU_MEMORYTYPE_DEVICE;
        stCopyNV12.dstHost              = 0;
        stCopyNV12.dstDevice            = (CUdeviceptr) dst.data;
        stCopyNV12.dstArray             = 0;
        stCopyNV12.dstPitch             = dst.step;

        stCopyNV12.WidthInBytes         = frameSize.width;
        stCopyNV12.Height               = (frameSize.height * 3) >> 1;

        // DMA Luma/Chroma
        cuSafeCall( cuMemcpy2D(&stCopyNV12) );
    }

    void VideoWriterImpl::write(InputArray _frame, bool lastFrame)
    {
        GpuMat frame = _frame.getGpuMat();

        if (inputFormat_ == SF_BGR)
        {
            CV_Assert( frame.size() == frameSize_ );
            CV_Assert( frame.type() == CV_8UC1 || frame.type() == CV_8UC3 || frame.type() == CV_8UC4 );
        }
        else
        {
            CV_Assert( frame.size() == videoFrame_.size() );
            CV_Assert( frame.type() == videoFrame_.type() );
        }

        NVVE_EncodeFrameParams efparams;
        efparams.Width = frameSize_.width;
        efparams.Height = frameSize_.height;
        efparams.Pitch = static_cast<int>(videoFrame_.step);
        efparams.SurfFmt = surfaceFormat_;
        efparams.PictureStruc = FRAME_PICTURE;
        efparams.topfieldfirst =  0;
        efparams.repeatFirstField = 0;
        efparams.progressiveFrame = (surfaceFormat_ == NV12) ? 1 : 0;
        efparams.bLast = lastFrame;
        efparams.picBuf = 0; // Must be set to NULL in order to support device memory input

        // Don't forget we need to lock/unlock between memcopies
        cuSafeCall( cuvidCtxLock(cuCtxLock_, 0) );

        if (inputFormat_ == SF_BGR)
        {
            RGB_to_YV12(frame, videoFrame_);
        }
        else
        {
            switch (surfaceFormat_)
            {
            case UYVY: // UYVY (4:2:2)
            case YUY2: // YUY2 (4:2:2)
                copyUYVYorYUY2Frame(frameSize_, frame, videoFrame_);
                break;

            case YV12: // YV12 (4:2:0), Y V U
            case IYUV: // IYUV (4:2:0), Y U V
                copyYV12orIYUVFrame(frameSize_, frame, videoFrame_);
                break;

            case NV12: // NV12 (4:2:0)
                copyNV12Frame(frameSize_, frame, videoFrame_);
                break;
            }
        }

        cuSafeCall( cuvidCtxUnlock(cuCtxLock_, 0) );

        int err = NVEncodeFrame(encoder_, &efparams, 0, videoFrame_.data);
        CV_Assert( err == 0 );
    }

    unsigned char* NVENCAPI VideoWriterImpl::HandleAcquireBitStream(int* pBufferSize, void* pUserdata)
    {
        VideoWriterImpl* thiz = static_cast<VideoWriterImpl*>(pUserdata);

        return thiz->callback_->acquireBitStream(pBufferSize);
    }

    void NVENCAPI VideoWriterImpl::HandleReleaseBitStream(int nBytesInBuffer, unsigned char* cb, void* pUserdata)
    {
        VideoWriterImpl* thiz = static_cast<VideoWriterImpl*>(pUserdata);

        thiz->callback_->releaseBitStream(cb, nBytesInBuffer);
    }

    void NVENCAPI VideoWriterImpl::HandleOnBeginFrame(const NVVE_BeginFrameInfo* pbfi, void* pUserdata)
    {
        VideoWriterImpl* thiz = static_cast<VideoWriterImpl*>(pUserdata);

        thiz->callback_->onBeginFrame(pbfi->nFrameNumber, static_cast<EncoderCallBack::PicType>(pbfi->nPicType));
    }

    void NVENCAPI VideoWriterImpl::HandleOnEndFrame(const NVVE_EndFrameInfo* pefi, void* pUserdata)
    {
        VideoWriterImpl* thiz = static_cast<VideoWriterImpl*>(pUserdata);

        thiz->callback_->onEndFrame(pefi->nFrameNumber, static_cast<EncoderCallBack::PicType>(pefi->nPicType));
    }

    ///////////////////////////////////////////////////////////////////////////
    // FFMPEG

    class EncoderCallBackFFMPEG : public EncoderCallBack
    {
    public:
        EncoderCallBackFFMPEG(const String& fileName, Size frameSize, double fps);
        ~EncoderCallBackFFMPEG();

        unsigned char* acquireBitStream(int* bufferSize);
        void releaseBitStream(unsigned char* data, int size);
        void onBeginFrame(int frameNumber, PicType picType);
        void onEndFrame(int frameNumber, PicType picType);

    private:
        static bool init_MediaStream_FFMPEG();

        struct OutputMediaStream_FFMPEG* stream_;
        std::vector<uchar> buf_;
        bool isKeyFrame_;

        static Create_OutputMediaStream_FFMPEG_Plugin create_OutputMediaStream_FFMPEG_p;
        static Release_OutputMediaStream_FFMPEG_Plugin release_OutputMediaStream_FFMPEG_p;
        static Write_OutputMediaStream_FFMPEG_Plugin write_OutputMediaStream_FFMPEG_p;
    };

    Create_OutputMediaStream_FFMPEG_Plugin EncoderCallBackFFMPEG::create_OutputMediaStream_FFMPEG_p = 0;
    Release_OutputMediaStream_FFMPEG_Plugin EncoderCallBackFFMPEG::release_OutputMediaStream_FFMPEG_p = 0;
    Write_OutputMediaStream_FFMPEG_Plugin EncoderCallBackFFMPEG::write_OutputMediaStream_FFMPEG_p = 0;

    bool EncoderCallBackFFMPEG::init_MediaStream_FFMPEG()
    {
        static bool initialized = false;

        if (!initialized)
        {
            #if defined(WIN32) || defined(_WIN32)
                const char* module_name = "opencv_ffmpeg"
                    CVAUX_STR(CV_VERSION_MAJOR) CVAUX_STR(CV_VERSION_MINOR) CVAUX_STR(CV_VERSION_REVISION)
                #if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__)
                    "_64"
                #endif
                    ".dll";

                static HMODULE cvFFOpenCV = LoadLibrary(module_name);

                if (cvFFOpenCV)
                {
                    create_OutputMediaStream_FFMPEG_p =
                        (Create_OutputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "create_OutputMediaStream_FFMPEG");
                    release_OutputMediaStream_FFMPEG_p =
                        (Release_OutputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "release_OutputMediaStream_FFMPEG");
                    write_OutputMediaStream_FFMPEG_p =
                        (Write_OutputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "write_OutputMediaStream_FFMPEG");

                    initialized = create_OutputMediaStream_FFMPEG_p != 0 && release_OutputMediaStream_FFMPEG_p != 0 && write_OutputMediaStream_FFMPEG_p != 0;
                }
            #elif defined(HAVE_FFMPEG)
                create_OutputMediaStream_FFMPEG_p = create_OutputMediaStream_FFMPEG;
                release_OutputMediaStream_FFMPEG_p = release_OutputMediaStream_FFMPEG;
                write_OutputMediaStream_FFMPEG_p = write_OutputMediaStream_FFMPEG;

                initialized = true;
            #endif
        }

        return initialized;
    }

    EncoderCallBackFFMPEG::EncoderCallBackFFMPEG(const String& fileName, Size frameSize, double fps) :
        stream_(0), isKeyFrame_(false)
    {
        int buf_size = std::max(frameSize.area() * 4, 1024 * 1024);
        buf_.resize(buf_size);

        CV_Assert( init_MediaStream_FFMPEG() );

        stream_ = create_OutputMediaStream_FFMPEG_p(fileName.c_str(), frameSize.width, frameSize.height, fps);
        CV_Assert( stream_ != 0 );
    }

    EncoderCallBackFFMPEG::~EncoderCallBackFFMPEG()
    {
        release_OutputMediaStream_FFMPEG_p(stream_);
    }

    unsigned char* EncoderCallBackFFMPEG::acquireBitStream(int* bufferSize)
    {
        *bufferSize = static_cast<int>(buf_.size());
        return &buf_[0];
    }

    void EncoderCallBackFFMPEG::releaseBitStream(unsigned char* data, int size)
    {
        write_OutputMediaStream_FFMPEG_p(stream_, data, size, isKeyFrame_);
    }

    void EncoderCallBackFFMPEG::onBeginFrame(int frameNumber, PicType picType)
    {
        (void) frameNumber;
        isKeyFrame_ = (picType == IFRAME);
    }

    void EncoderCallBackFFMPEG::onEndFrame(int frameNumber, PicType picType)
    {
        (void) frameNumber;
        (void) picType;
    }
}

///////////////////////////////////////////////////////////////////////////
// EncoderParams

cv::cudacodec::EncoderParams::EncoderParams()
{
    P_Interval = 3;
    IDR_Period = 15;
    DynamicGOP = 0;
    RCType = 1;
    AvgBitrate = 4000000;
    PeakBitrate = 10000000;
    QP_Level_Intra = 25;
    QP_Level_InterP = 28;
    QP_Level_InterB = 31;
    DeblockMode = 1;
    ProfileLevel = 65357;
    ForceIntra = 0;
    ForceIDR = 0;
    ClearStat = 0;
    DIMode = 1;
    Presets = 2;
    DisableCabac = 0;
    NaluFramingType = 0;
    DisableSPSPPS = 0;
}

cv::cudacodec::EncoderParams::EncoderParams(const String& configFile)
{
    load(configFile);
}

void cv::cudacodec::EncoderParams::load(const String& configFile)
{
    FileStorage fs(configFile, FileStorage::READ);
    CV_Assert( fs.isOpened() );

    read(fs["P_Interval"     ], P_Interval, 3);
    read(fs["IDR_Period"     ], IDR_Period, 15);
    read(fs["DynamicGOP"     ], DynamicGOP, 0);
    read(fs["RCType"         ], RCType, 1);
    read(fs["AvgBitrate"     ], AvgBitrate, 4000000);
    read(fs["PeakBitrate"    ], PeakBitrate, 10000000);
    read(fs["QP_Level_Intra" ], QP_Level_Intra, 25);
    read(fs["QP_Level_InterP"], QP_Level_InterP, 28);
    read(fs["QP_Level_InterB"], QP_Level_InterB, 31);
    read(fs["DeblockMode"    ], DeblockMode, 1);
    read(fs["ProfileLevel"   ], ProfileLevel, 65357);
    read(fs["ForceIntra"     ], ForceIntra, 0);
    read(fs["ForceIDR"       ], ForceIDR, 0);
    read(fs["ClearStat"      ], ClearStat, 0);
    read(fs["DIMode"         ], DIMode, 1);
    read(fs["Presets"        ], Presets, 2);
    read(fs["DisableCabac"   ], DisableCabac, 0);
    read(fs["NaluFramingType"], NaluFramingType, 0);
    read(fs["DisableSPSPPS"  ], DisableSPSPPS, 0);
}

void cv::cudacodec::EncoderParams::save(const String& configFile) const
{
    FileStorage fs(configFile, FileStorage::WRITE);
    CV_Assert( fs.isOpened() );

    write(fs, "P_Interval"     , P_Interval);
    write(fs, "IDR_Period"     , IDR_Period);
    write(fs, "DynamicGOP"     , DynamicGOP);
    write(fs, "RCType"         , RCType);
    write(fs, "AvgBitrate"     , AvgBitrate);
    write(fs, "PeakBitrate"    , PeakBitrate);
    write(fs, "QP_Level_Intra" , QP_Level_Intra);
    write(fs, "QP_Level_InterP", QP_Level_InterP);
    write(fs, "QP_Level_InterB", QP_Level_InterB);
    write(fs, "DeblockMode"    , DeblockMode);
    write(fs, "ProfileLevel"   , ProfileLevel);
    write(fs, "ForceIntra"     , ForceIntra);
    write(fs, "ForceIDR"       , ForceIDR);
    write(fs, "ClearStat"      , ClearStat);
    write(fs, "DIMode"         , DIMode);
    write(fs, "Presets"        , Presets);
    write(fs, "DisableCabac"   , DisableCabac);
    write(fs, "NaluFramingType", NaluFramingType);
    write(fs, "DisableSPSPPS"  , DisableSPSPPS);
}

///////////////////////////////////////////////////////////////////////////
// createVideoWriter

Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const String& fileName, Size frameSize, double fps, SurfaceFormat format)
{
    Ptr<EncoderCallBack> encoderCallback(new EncoderCallBackFFMPEG(fileName, frameSize, fps));
    return createVideoWriter(encoderCallback, frameSize, fps, format);
}

Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const String& fileName, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format)
{
    Ptr<EncoderCallBack> encoderCallback(new EncoderCallBackFFMPEG(fileName, frameSize, fps));
    return createVideoWriter(encoderCallback, frameSize, fps, params, format);
}

Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const Ptr<EncoderCallBack>& encoderCallback, Size frameSize, double fps, SurfaceFormat format)
{
    return makePtr<VideoWriterImpl>(encoderCallback, frameSize, fps, format);
}

Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const Ptr<EncoderCallBack>& encoderCallback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format)
{
    return makePtr<VideoWriterImpl>(encoderCallback, frameSize, fps, params, format);
}

#endif // !defined HAVE_CUDA || !defined WIN32

/* [<][>][^][v][top][bottom][index][help] */