avcodec/nvenc: use dynamically loaded CUDA
This commit is contained in:
1
configure
vendored
1
configure
vendored
@@ -2693,6 +2693,7 @@ vaapi_encode_deps="vaapi"
|
|||||||
hwupload_cuda_filter_deps="cuda"
|
hwupload_cuda_filter_deps="cuda"
|
||||||
scale_npp_filter_deps="cuda libnpp"
|
scale_npp_filter_deps="cuda libnpp"
|
||||||
|
|
||||||
|
nvenc_deps="cuda"
|
||||||
nvenc_deps_any="dlopen LoadLibrary"
|
nvenc_deps_any="dlopen LoadLibrary"
|
||||||
nvenc_encoder_deps="nvenc"
|
nvenc_encoder_deps="nvenc"
|
||||||
h264_cuvid_decoder_deps="cuda cuvid"
|
h264_cuvid_decoder_deps="cuda cuvid"
|
||||||
|
@@ -21,56 +21,20 @@
|
|||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
#include "nvenc.h"
|
||||||
# define CUDA_LIBNAME "nvcuda.dll"
|
|
||||||
# if ARCH_X86_64
|
|
||||||
# define NVENC_LIBNAME "nvEncodeAPI64.dll"
|
|
||||||
# else
|
|
||||||
# define NVENC_LIBNAME "nvEncodeAPI.dll"
|
|
||||||
# endif
|
|
||||||
#else
|
|
||||||
# define CUDA_LIBNAME "libcuda.so.1"
|
|
||||||
# define NVENC_LIBNAME "libnvidia-encode.so.1"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(_WIN32)
|
|
||||||
#include "compat/w32dlfcn.h"
|
|
||||||
#else
|
|
||||||
#include <dlfcn.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#include "libavutil/hwcontext_cuda.h"
|
||||||
#include "libavutil/hwcontext.h"
|
#include "libavutil/hwcontext.h"
|
||||||
#include "libavutil/imgutils.h"
|
#include "libavutil/imgutils.h"
|
||||||
#include "libavutil/avassert.h"
|
#include "libavutil/avassert.h"
|
||||||
#include "libavutil/mem.h"
|
#include "libavutil/mem.h"
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
#include "nvenc.h"
|
|
||||||
|
|
||||||
#define NVENC_CAP 0x30
|
#define NVENC_CAP 0x30
|
||||||
#define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \
|
#define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \
|
||||||
rc == NV_ENC_PARAMS_RC_2_PASS_QUALITY || \
|
rc == NV_ENC_PARAMS_RC_2_PASS_QUALITY || \
|
||||||
rc == NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP)
|
rc == NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP)
|
||||||
|
|
||||||
#define LOAD_LIBRARY(l, path) \
|
|
||||||
do { \
|
|
||||||
if (!((l) = dlopen(path, RTLD_LAZY))) { \
|
|
||||||
av_log(avctx, AV_LOG_ERROR, \
|
|
||||||
"Cannot load %s\n", \
|
|
||||||
path); \
|
|
||||||
return AVERROR_UNKNOWN; \
|
|
||||||
} \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define LOAD_SYMBOL(fun, lib, symbol) \
|
|
||||||
do { \
|
|
||||||
if (!((fun) = dlsym(lib, symbol))) { \
|
|
||||||
av_log(avctx, AV_LOG_ERROR, \
|
|
||||||
"Cannot load %s\n", \
|
|
||||||
symbol); \
|
|
||||||
return AVERROR_UNKNOWN; \
|
|
||||||
} \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
|
const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
|
||||||
AV_PIX_FMT_YUV420P,
|
AV_PIX_FMT_YUV420P,
|
||||||
AV_PIX_FMT_NV12,
|
AV_PIX_FMT_NV12,
|
||||||
@@ -79,9 +43,7 @@ const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
|
|||||||
AV_PIX_FMT_YUV444P16,
|
AV_PIX_FMT_YUV444P16,
|
||||||
AV_PIX_FMT_0RGB32,
|
AV_PIX_FMT_0RGB32,
|
||||||
AV_PIX_FMT_0BGR32,
|
AV_PIX_FMT_0BGR32,
|
||||||
#if CONFIG_CUDA
|
|
||||||
AV_PIX_FMT_CUDA,
|
AV_PIX_FMT_CUDA,
|
||||||
#endif
|
|
||||||
AV_PIX_FMT_NONE
|
AV_PIX_FMT_NONE
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -153,42 +115,19 @@ static av_cold int nvenc_load_libraries(AVCodecContext *avctx)
|
|||||||
{
|
{
|
||||||
NvencContext *ctx = avctx->priv_data;
|
NvencContext *ctx = avctx->priv_data;
|
||||||
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
|
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
|
||||||
PNVENCODEAPIGETMAXSUPPORTEDVERSION nvenc_get_max_ver;
|
|
||||||
PNVENCODEAPICREATEINSTANCE nvenc_create_instance;
|
|
||||||
NVENCSTATUS err;
|
NVENCSTATUS err;
|
||||||
uint32_t nvenc_max_ver;
|
uint32_t nvenc_max_ver;
|
||||||
|
int ret;
|
||||||
|
|
||||||
#if CONFIG_CUDA
|
ret = cuda_load_functions(&dl_fn->cuda_dl);
|
||||||
dl_fn->cu_init = cuInit;
|
if (ret < 0)
|
||||||
dl_fn->cu_device_get_count = cuDeviceGetCount;
|
return ret;
|
||||||
dl_fn->cu_device_get = cuDeviceGet;
|
|
||||||
dl_fn->cu_device_get_name = cuDeviceGetName;
|
|
||||||
dl_fn->cu_device_compute_capability = cuDeviceComputeCapability;
|
|
||||||
dl_fn->cu_ctx_create = cuCtxCreate_v2;
|
|
||||||
dl_fn->cu_ctx_pop_current = cuCtxPopCurrent_v2;
|
|
||||||
dl_fn->cu_ctx_destroy = cuCtxDestroy_v2;
|
|
||||||
#else
|
|
||||||
LOAD_LIBRARY(dl_fn->cuda, CUDA_LIBNAME);
|
|
||||||
|
|
||||||
LOAD_SYMBOL(dl_fn->cu_init, dl_fn->cuda, "cuInit");
|
ret = nvenc_load_functions(&dl_fn->nvenc_dl);
|
||||||
LOAD_SYMBOL(dl_fn->cu_device_get_count, dl_fn->cuda, "cuDeviceGetCount");
|
if (ret < 0)
|
||||||
LOAD_SYMBOL(dl_fn->cu_device_get, dl_fn->cuda, "cuDeviceGet");
|
return ret;
|
||||||
LOAD_SYMBOL(dl_fn->cu_device_get_name, dl_fn->cuda, "cuDeviceGetName");
|
|
||||||
LOAD_SYMBOL(dl_fn->cu_device_compute_capability, dl_fn->cuda,
|
|
||||||
"cuDeviceComputeCapability");
|
|
||||||
LOAD_SYMBOL(dl_fn->cu_ctx_create, dl_fn->cuda, "cuCtxCreate_v2");
|
|
||||||
LOAD_SYMBOL(dl_fn->cu_ctx_pop_current, dl_fn->cuda, "cuCtxPopCurrent_v2");
|
|
||||||
LOAD_SYMBOL(dl_fn->cu_ctx_destroy, dl_fn->cuda, "cuCtxDestroy_v2");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
LOAD_LIBRARY(dl_fn->nvenc, NVENC_LIBNAME);
|
err = dl_fn->nvenc_dl->NvEncodeAPIGetMaxSupportedVersion(&nvenc_max_ver);
|
||||||
|
|
||||||
LOAD_SYMBOL(nvenc_get_max_ver, dl_fn->nvenc,
|
|
||||||
"NvEncodeAPIGetMaxSupportedVersion");
|
|
||||||
LOAD_SYMBOL(nvenc_create_instance, dl_fn->nvenc,
|
|
||||||
"NvEncodeAPICreateInstance");
|
|
||||||
|
|
||||||
err = nvenc_get_max_ver(&nvenc_max_ver);
|
|
||||||
if (err != NV_ENC_SUCCESS)
|
if (err != NV_ENC_SUCCESS)
|
||||||
return nvenc_print_error(avctx, err, "Failed to query nvenc max version");
|
return nvenc_print_error(avctx, err, "Failed to query nvenc max version");
|
||||||
|
|
||||||
@@ -204,7 +143,7 @@ static av_cold int nvenc_load_libraries(AVCodecContext *avctx)
|
|||||||
|
|
||||||
dl_fn->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
|
dl_fn->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
|
||||||
|
|
||||||
err = nvenc_create_instance(&dl_fn->nvenc_funcs);
|
err = dl_fn->nvenc_dl->NvEncodeAPICreateInstance(&dl_fn->nvenc_funcs);
|
||||||
if (err != NV_ENC_SUCCESS)
|
if (err != NV_ENC_SUCCESS)
|
||||||
return nvenc_print_error(avctx, err, "Failed to create nvenc instance");
|
return nvenc_print_error(avctx, err, "Failed to create nvenc instance");
|
||||||
|
|
||||||
@@ -376,7 +315,7 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
|
|||||||
if (ctx->device == LIST_DEVICES)
|
if (ctx->device == LIST_DEVICES)
|
||||||
loglevel = AV_LOG_INFO;
|
loglevel = AV_LOG_INFO;
|
||||||
|
|
||||||
cu_res = dl_fn->cu_device_get(&cu_device, idx);
|
cu_res = dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx);
|
||||||
if (cu_res != CUDA_SUCCESS) {
|
if (cu_res != CUDA_SUCCESS) {
|
||||||
av_log(avctx, AV_LOG_ERROR,
|
av_log(avctx, AV_LOG_ERROR,
|
||||||
"Cannot access the CUDA device %d\n",
|
"Cannot access the CUDA device %d\n",
|
||||||
@@ -384,11 +323,11 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
cu_res = dl_fn->cu_device_get_name(name, sizeof(name), cu_device);
|
cu_res = dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device);
|
||||||
if (cu_res != CUDA_SUCCESS)
|
if (cu_res != CUDA_SUCCESS)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
cu_res = dl_fn->cu_device_compute_capability(&major, &minor, cu_device);
|
cu_res = dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device);
|
||||||
if (cu_res != CUDA_SUCCESS)
|
if (cu_res != CUDA_SUCCESS)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@@ -398,7 +337,7 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
|
|||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
cu_res = dl_fn->cu_ctx_create(&ctx->cu_context_internal, 0, cu_device);
|
cu_res = dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device);
|
||||||
if (cu_res != CUDA_SUCCESS) {
|
if (cu_res != CUDA_SUCCESS) {
|
||||||
av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res);
|
av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res);
|
||||||
goto fail;
|
goto fail;
|
||||||
@@ -406,7 +345,7 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
|
|||||||
|
|
||||||
ctx->cu_context = ctx->cu_context_internal;
|
ctx->cu_context = ctx->cu_context_internal;
|
||||||
|
|
||||||
cu_res = dl_fn->cu_ctx_pop_current(&dummy);
|
cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
|
||||||
if (cu_res != CUDA_SUCCESS) {
|
if (cu_res != CUDA_SUCCESS) {
|
||||||
av_log(avctx, AV_LOG_FATAL, "Failed popping CUDA context: 0x%x\n", (int)cu_res);
|
av_log(avctx, AV_LOG_FATAL, "Failed popping CUDA context: 0x%x\n", (int)cu_res);
|
||||||
goto fail2;
|
goto fail2;
|
||||||
@@ -430,7 +369,7 @@ fail3:
|
|||||||
ctx->nvencoder = NULL;
|
ctx->nvencoder = NULL;
|
||||||
|
|
||||||
fail2:
|
fail2:
|
||||||
dl_fn->cu_ctx_destroy(ctx->cu_context_internal);
|
dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
|
||||||
ctx->cu_context_internal = NULL;
|
ctx->cu_context_internal = NULL;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
@@ -454,7 +393,6 @@ static av_cold int nvenc_setup_device(AVCodecContext *avctx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
|
if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
|
||||||
#if CONFIG_CUDA
|
|
||||||
AVHWFramesContext *frames_ctx;
|
AVHWFramesContext *frames_ctx;
|
||||||
AVCUDADeviceContext *device_hwctx;
|
AVCUDADeviceContext *device_hwctx;
|
||||||
int ret;
|
int ret;
|
||||||
@@ -476,19 +414,16 @@ static av_cold int nvenc_setup_device(AVCodecContext *avctx)
|
|||||||
av_log(avctx, AV_LOG_FATAL, "Provided device doesn't support required NVENC features\n");
|
av_log(avctx, AV_LOG_FATAL, "Provided device doesn't support required NVENC features\n");
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
return AVERROR_BUG;
|
|
||||||
#endif
|
|
||||||
} else {
|
} else {
|
||||||
int i, nb_devices = 0;
|
int i, nb_devices = 0;
|
||||||
|
|
||||||
if ((dl_fn->cu_init(0)) != CUDA_SUCCESS) {
|
if ((dl_fn->cuda_dl->cuInit(0)) != CUDA_SUCCESS) {
|
||||||
av_log(avctx, AV_LOG_ERROR,
|
av_log(avctx, AV_LOG_ERROR,
|
||||||
"Cannot init CUDA\n");
|
"Cannot init CUDA\n");
|
||||||
return AVERROR_UNKNOWN;
|
return AVERROR_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((dl_fn->cu_device_get_count(&nb_devices)) != CUDA_SUCCESS) {
|
if ((dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) != CUDA_SUCCESS) {
|
||||||
av_log(avctx, AV_LOG_ERROR,
|
av_log(avctx, AV_LOG_ERROR,
|
||||||
"Cannot enumerate the CUDA devices\n");
|
"Cannot enumerate the CUDA devices\n");
|
||||||
return AVERROR_UNKNOWN;
|
return AVERROR_UNKNOWN;
|
||||||
@@ -1265,30 +1200,14 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
|
|||||||
ctx->nvencoder = NULL;
|
ctx->nvencoder = NULL;
|
||||||
|
|
||||||
if (ctx->cu_context_internal)
|
if (ctx->cu_context_internal)
|
||||||
dl_fn->cu_ctx_destroy(ctx->cu_context_internal);
|
dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
|
||||||
ctx->cu_context = ctx->cu_context_internal = NULL;
|
ctx->cu_context = ctx->cu_context_internal = NULL;
|
||||||
|
|
||||||
if (dl_fn->nvenc)
|
nvenc_free_functions(&dl_fn->nvenc_dl);
|
||||||
dlclose(dl_fn->nvenc);
|
cuda_free_functions(&dl_fn->cuda_dl);
|
||||||
dl_fn->nvenc = NULL;
|
|
||||||
|
|
||||||
dl_fn->nvenc_device_count = 0;
|
dl_fn->nvenc_device_count = 0;
|
||||||
|
|
||||||
#if !CONFIG_CUDA
|
|
||||||
if (dl_fn->cuda)
|
|
||||||
dlclose(dl_fn->cuda);
|
|
||||||
dl_fn->cuda = NULL;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
dl_fn->cu_init = NULL;
|
|
||||||
dl_fn->cu_device_get_count = NULL;
|
|
||||||
dl_fn->cu_device_get = NULL;
|
|
||||||
dl_fn->cu_device_get_name = NULL;
|
|
||||||
dl_fn->cu_device_compute_capability = NULL;
|
|
||||||
dl_fn->cu_ctx_create = NULL;
|
|
||||||
dl_fn->cu_ctx_pop_current = NULL;
|
|
||||||
dl_fn->cu_ctx_destroy = NULL;
|
|
||||||
|
|
||||||
av_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n");
|
av_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n");
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@@ -23,29 +23,12 @@
|
|||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
|
#include "compat/cuda/dynlink_loader.h"
|
||||||
#include "libavutil/fifo.h"
|
#include "libavutil/fifo.h"
|
||||||
#include "libavutil/opt.h"
|
#include "libavutil/opt.h"
|
||||||
|
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
|
|
||||||
#if CONFIG_CUDA
|
|
||||||
#include "libavutil/hwcontext_cuda.h"
|
|
||||||
#else
|
|
||||||
|
|
||||||
#if defined(_WIN32)
|
|
||||||
#define CUDAAPI __stdcall
|
|
||||||
#else
|
|
||||||
#define CUDAAPI
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef enum cudaError_enum {
|
|
||||||
CUDA_SUCCESS = 0
|
|
||||||
} CUresult;
|
|
||||||
typedef int CUdevice;
|
|
||||||
typedef void* CUcontext;
|
|
||||||
typedef void* CUdeviceptr;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define MAX_REGISTERED_FRAMES 64
|
#define MAX_REGISTERED_FRAMES 64
|
||||||
|
|
||||||
typedef struct NvencSurface
|
typedef struct NvencSurface
|
||||||
@@ -64,33 +47,10 @@ typedef struct NvencSurface
|
|||||||
int lockCount;
|
int lockCount;
|
||||||
} NvencSurface;
|
} NvencSurface;
|
||||||
|
|
||||||
typedef CUresult(CUDAAPI *PCUINIT)(unsigned int Flags);
|
|
||||||
typedef CUresult(CUDAAPI *PCUDEVICEGETCOUNT)(int *count);
|
|
||||||
typedef CUresult(CUDAAPI *PCUDEVICEGET)(CUdevice *device, int ordinal);
|
|
||||||
typedef CUresult(CUDAAPI *PCUDEVICEGETNAME)(char *name, int len, CUdevice dev);
|
|
||||||
typedef CUresult(CUDAAPI *PCUDEVICECOMPUTECAPABILITY)(int *major, int *minor, CUdevice dev);
|
|
||||||
typedef CUresult(CUDAAPI *PCUCTXCREATE)(CUcontext *pctx, unsigned int flags, CUdevice dev);
|
|
||||||
typedef CUresult(CUDAAPI *PCUCTXPOPCURRENT)(CUcontext *pctx);
|
|
||||||
typedef CUresult(CUDAAPI *PCUCTXDESTROY)(CUcontext ctx);
|
|
||||||
|
|
||||||
typedef NVENCSTATUS (NVENCAPI *PNVENCODEAPIGETMAXSUPPORTEDVERSION)(uint32_t* version);
|
|
||||||
typedef NVENCSTATUS (NVENCAPI *PNVENCODEAPICREATEINSTANCE)(NV_ENCODE_API_FUNCTION_LIST *functionList);
|
|
||||||
|
|
||||||
typedef struct NvencDynLoadFunctions
|
typedef struct NvencDynLoadFunctions
|
||||||
{
|
{
|
||||||
#if !CONFIG_CUDA
|
CudaFunctions *cuda_dl;
|
||||||
void *cuda;
|
NvencFunctions *nvenc_dl;
|
||||||
#endif
|
|
||||||
void *nvenc;
|
|
||||||
|
|
||||||
PCUINIT cu_init;
|
|
||||||
PCUDEVICEGETCOUNT cu_device_get_count;
|
|
||||||
PCUDEVICEGET cu_device_get;
|
|
||||||
PCUDEVICEGETNAME cu_device_get_name;
|
|
||||||
PCUDEVICECOMPUTECAPABILITY cu_device_compute_capability;
|
|
||||||
PCUCTXCREATE cu_ctx_create;
|
|
||||||
PCUCTXPOPCURRENT cu_ctx_pop_current;
|
|
||||||
PCUCTXDESTROY cu_ctx_destroy;
|
|
||||||
|
|
||||||
NV_ENCODE_API_FUNCTION_LIST nvenc_funcs;
|
NV_ENCODE_API_FUNCTION_LIST nvenc_funcs;
|
||||||
int nvenc_device_count;
|
int nvenc_device_count;
|
||||||
|
Reference in New Issue
Block a user