From bb65eb62991e5165b9fad2702a8954a1fe3c6f1f Mon Sep 17 00:00:00 2001 From: Ben Chang Date: Sat, 24 Jun 2017 12:10:10 +0000 Subject: [PATCH 1/3] nvenc: Add an explicit auto alias --- libavcodec/nvenc_h264.c | 1 + libavcodec/nvenc_hevc.c | 1 + 2 files changed, 2 insertions(+) diff --git a/libavcodec/nvenc_h264.c b/libavcodec/nvenc_h264.c index f7e5cd05dc..1c9e07a776 100644 --- a/libavcodec/nvenc_h264.c +++ b/libavcodec/nvenc_h264.c @@ -47,6 +47,7 @@ static const AVOption options[] = { { "high_444", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_H264_PROFILE_HIGH_444 }, 0, 0, VE, "profile" }, { "constrained_high", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_H264_PROFILE_CONSTRAINED_HIGH }, 0, 0, VE, "profile" }, { "level", "Set the encoding level restriction", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = NV_ENC_LEVEL_AUTOSELECT }, NV_ENC_LEVEL_AUTOSELECT, NV_ENC_LEVEL_H264_51, VE, "level" }, + { "auto", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_LEVEL_AUTOSELECT }, 0, 0, VE, "level" }, { "1.0", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_LEVEL_H264_1 }, 0, 0, VE, "level" }, { "1.b", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_LEVEL_H264_1b }, 0, 0, VE, "level" }, { "1.1", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_LEVEL_H264_11 }, 0, 0, VE, "level" }, diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c index 9102d8a3b3..a13db98356 100644 --- a/libavcodec/nvenc_hevc.c +++ b/libavcodec/nvenc_hevc.c @@ -47,6 +47,7 @@ static const AVOption options[] = { { "rext", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_HEVC_PROFILE_REXT }, 0, 0, VE, "profile" }, #endif /* NVENCAPI_MAJOR_VERSION >= 7 */ { "level", "Set the encoding level restriction", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = NV_ENC_LEVEL_AUTOSELECT }, NV_ENC_LEVEL_AUTOSELECT, NV_ENC_LEVEL_HEVC_62, VE, "level" }, + { "auto", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_LEVEL_AUTOSELECT }, 0, 0, VE, "level" }, { "1.0", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_LEVEL_HEVC_1 }, 0, 0, VE, "level" }, { "2.0", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_LEVEL_HEVC_2 }, 0, 0, VE, "level" }, { "2.1", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_LEVEL_HEVC_21 }, 0, 0, VE, "level" }, From 2e8d88ad5281ab923e1d9772eb99fdfb483435c2 Mon Sep 17 00:00:00 2001 From: Ben Chang Date: Sat, 24 Jun 2017 12:14:22 +0000 Subject: [PATCH 2/3] nvenc: Use a fifo to manage the free surface pool Previously, if a session allocates x surfaces, only x-1 surfaces are used (due to combination of output delay and lock toggle logic). --- libavcodec/nvenc.c | 23 ++++++++++++++--------- libavcodec/nvenc.h | 3 +-- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c index f16e509df3..7b30ad768e 100644 --- a/libavcodec/nvenc.c +++ b/libavcodec/nvenc.c @@ -986,6 +986,7 @@ static int nvenc_alloc_surface(AVCodecContext *avctx, int idx) { NVENCContext *ctx = avctx->priv_data; NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs; + NVENCFrame *tmp_surface = &ctx->frames[idx]; int ret; NV_ENC_CREATE_BITSTREAM_BUFFER out_buffer = { 0 }; @@ -1046,6 +1047,8 @@ static int nvenc_alloc_surface(AVCodecContext *avctx, int idx) ctx->frames[idx].out = out_buffer.bitstreamBuffer; + av_fifo_generic_write(ctx->unused_surface_queue, &tmp_surface, sizeof(tmp_surface), NULL); + return 0; } @@ -1066,6 +1069,9 @@ static int nvenc_setup_surfaces(AVCodecContext *avctx) ctx->timestamps = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t)); if (!ctx->timestamps) return AVERROR(ENOMEM); + ctx->unused_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NVENCFrame*)); + if (!ctx->unused_surface_queue) + return AVERROR(ENOMEM); ctx->pending = av_fifo_alloc(ctx->nb_surfaces * sizeof(*ctx->frames)); if (!ctx->pending) return AVERROR(ENOMEM); @@ -1123,6 +1129,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx) av_fifo_free(ctx->timestamps); av_fifo_free(ctx->pending); av_fifo_free(ctx->ready); + av_fifo_free(ctx->unused_surface_queue); if (ctx->frames) { for (i = 0; i < ctx->nb_surfaces; ++i) { @@ -1201,16 +1208,14 @@ av_cold int ff_nvenc_encode_init(AVCodecContext *avctx) static NVENCFrame *get_free_frame(NVENCContext *ctx) { - int i; + NVENCFrame *tmp_surf; - for (i = 0; i < ctx->nb_surfaces; i++) { - if (!ctx->frames[i].locked) { - ctx->frames[i].locked = 1; - return &ctx->frames[i]; - } - } + if (!(av_fifo_size(ctx->unused_surface_queue) > 0)) + // queue empty + return NULL; - return NULL; + av_fifo_generic_read(ctx->unused_surface_queue, &tmp_surf, sizeof(tmp_surf), NULL); + return tmp_surf; } static int nvenc_copy_frame(NV_ENC_LOCK_INPUT_BUFFER *in, const AVFrame *frame) @@ -1510,7 +1515,7 @@ static int nvenc_get_output(AVCodecContext *avctx, AVPacket *pkt) frame->in = NULL; } - frame->locked = 0; + av_fifo_generic_write(ctx->unused_surface_queue, &frame, sizeof(frame), NULL); ret = nvenc_set_timestamp(avctx, ¶ms, pkt); if (ret < 0) diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h index 3602f16e83..b42b930920 100644 --- a/libavcodec/nvenc.h +++ b/libavcodec/nvenc.h @@ -56,7 +56,6 @@ typedef struct NVENCFrame { NV_ENC_OUTPUT_PTR out; NV_ENC_BUFFER_FORMAT format; - int locked; } NVENCFrame; typedef CUresult(CUDAAPI *PCUINIT)(unsigned int Flags); @@ -145,7 +144,7 @@ typedef struct NVENCContext { int nb_surfaces; NVENCFrame *frames; AVFifoBuffer *timestamps; - AVFifoBuffer *pending, *ready; + AVFifoBuffer *pending, *ready, *unused_surface_queue; struct { CUdeviceptr ptr; From 7cb053e4ddf258e2dbf52ccc586548680742d758 Mon Sep 17 00:00:00 2001 From: Ben Chang Date: Sat, 24 Jun 2017 12:17:14 +0000 Subject: [PATCH 3/3] nvenc: Minimize the surface allocation The previous default sets the allocated surfaces to 32 unless it is user-overridden or the lookahead parameter is set. Change the surfaces calculation for default, B-frames and lookahead scenario. --- libavcodec/nvenc.c | 45 ++++++++++++++++++++++++++++++++++++----- libavcodec/nvenc_h264.c | 4 ++-- libavcodec/nvenc_hevc.c | 4 ++-- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c index 7b30ad768e..884b344b96 100644 --- a/libavcodec/nvenc.c +++ b/libavcodec/nvenc.c @@ -874,6 +874,44 @@ static int nvenc_setup_codec_config(AVCodecContext *avctx) return 0; } +static int nvenc_recalc_surfaces(AVCodecContext *avctx) +{ + NVENCContext *ctx = avctx->priv_data; + // default minimum of 4 surfaces + // multiply by 2 for number of NVENCs on gpu (hardcode to 2) + // another multiply by 2 to avoid blocking next PBB group + int nb_surfaces = FFMAX(4, ctx->config.frameIntervalP * 2 * 2); + + // lookahead enabled + if (ctx->rc_lookahead > 0) { + // +1 is to account for lkd_bound calculation later + // +4 is to allow sufficient pipelining with lookahead + nb_surfaces = FFMAX(1, FFMAX(nb_surfaces, ctx->rc_lookahead + ctx->config.frameIntervalP + 1 + 4)); + if (nb_surfaces > ctx->nb_surfaces && ctx->nb_surfaces > 0) { + av_log(avctx, AV_LOG_WARNING, + "Defined rc_lookahead requires more surfaces, " + "increasing used surfaces %d -> %d\n", + ctx->nb_surfaces, nb_surfaces); + } + ctx->nb_surfaces = FFMAX(nb_surfaces, ctx->nb_surfaces); + } else { + if (ctx->config.frameIntervalP > 1 && + ctx->nb_surfaces < nb_surfaces && ctx->nb_surfaces > 0) { + av_log(avctx, AV_LOG_WARNING, + "Defined b-frame requires more surfaces, " + "increasing used surfaces %d -> %d\n", + ctx->nb_surfaces, nb_surfaces); + ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, nb_surfaces); + } else if (ctx->nb_surfaces <= 0) + ctx->nb_surfaces = nb_surfaces; + // otherwise use user specified value + } + + ctx->nb_surfaces = FFMAX(1, FFMIN(MAX_REGISTERED_FRAMES, ctx->nb_surfaces)); + ctx->async_depth = FFMIN(ctx->async_depth, ctx->nb_surfaces - 1); + return 0; +} + static int nvenc_setup_encoder(AVCodecContext *avctx) { NVENCContext *ctx = avctx->priv_data; @@ -956,6 +994,8 @@ static int nvenc_setup_encoder(AVCodecContext *avctx) ctx->initial_pts[0] = AV_NOPTS_VALUE; ctx->initial_pts[1] = AV_NOPTS_VALUE; + nvenc_recalc_surfaces(avctx); + nvenc_setup_rate_control(avctx); if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) { @@ -1057,11 +1097,6 @@ static int nvenc_setup_surfaces(AVCodecContext *avctx) NVENCContext *ctx = avctx->priv_data; int i, ret; - ctx->nb_surfaces = FFMAX(4 + avctx->max_b_frames, - ctx->nb_surfaces); - ctx->async_depth = FFMIN(ctx->async_depth, ctx->nb_surfaces - 1); - - ctx->frames = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->frames)); if (!ctx->frames) return AVERROR(ENOMEM); diff --git a/libavcodec/nvenc_h264.c b/libavcodec/nvenc_h264.c index 1c9e07a776..bf983265df 100644 --- a/libavcodec/nvenc_h264.c +++ b/libavcodec/nvenc_h264.c @@ -72,14 +72,14 @@ static const AVOption options[] = { { "ll_2pass_quality", "Multi-pass optimized for image quality (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_QUALITY }, 0, 0, VE, "rc" }, { "ll_2pass_size", "Multi-pass optimized for constant frame size (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" }, { "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" }, - { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, INT_MAX, VE }, + { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_REGISTERED_FRAMES, VE }, { "device", "Select a specific NVENC device", OFFSET(device), AV_OPT_TYPE_INT, { .i64 = -1 }, -2, INT_MAX, VE, "device" }, { "any", "Pick the first device available", 0, AV_OPT_TYPE_CONST, { .i64 = ANY_DEVICE }, 0, 0, VE, "device" }, { "list", "List the available devices", 0, AV_OPT_TYPE_CONST, { .i64 = LIST_DEVICES }, 0, 0, VE, "device" }, { "async_depth", "Delay frame output by the given amount of frames", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE }, { "delay", "Delay frame output by the given amount of frames", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE }, #if NVENCAPI_MAJOR_VERSION >= 7 - { "rc-lookahead", "Number of frames to look ahead for rate-control", OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE }, + { "rc-lookahead", "Number of frames to look ahead for rate-control", OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = 0 }, -1, INT_MAX, VE }, { "no-scenecut", "When lookahead is enabled, set this to 1 to disable adaptive I-frame insertion at scene cuts", OFFSET(no_scenecut), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE }, { "b_adapt", "When lookahead is enabled, set this to 0 to disable adaptive B-frame decision", OFFSET(b_adapt), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, VE }, { "spatial-aq", "set to 1 to enable Spatial AQ", OFFSET(aq), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE }, diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c index a13db98356..caf7c4add9 100644 --- a/libavcodec/nvenc_hevc.c +++ b/libavcodec/nvenc_hevc.c @@ -72,14 +72,14 @@ static const AVOption options[] = { { "ll_2pass_quality", "Multi-pass optimized for image quality (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_QUALITY }, 0, 0, VE, "rc" }, { "ll_2pass_size", "Multi-pass optimized for constant frame size (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" }, { "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" }, - { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, INT_MAX, VE }, + { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_REGISTERED_FRAMES, VE }, { "device", "Select a specific NVENC device", OFFSET(device), AV_OPT_TYPE_INT, { .i64 = -1 }, -2, INT_MAX, VE, "device" }, { "any", "Pick the first device available", 0, AV_OPT_TYPE_CONST, { .i64 = ANY_DEVICE }, 0, 0, VE, "device" }, { "list", "List the available devices", 0, AV_OPT_TYPE_CONST, { .i64 = LIST_DEVICES }, 0, 0, VE, "device" }, { "async_depth", "Delay frame output by the given amount of frames", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE }, { "delay", "Delay frame output by the given amount of frames", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE }, #if NVENCAPI_MAJOR_VERSION >= 7 - { "rc-lookahead", "Number of frames to look ahead for rate-control", OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE }, + { "rc-lookahead", "Number of frames to look ahead for rate-control", OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = 0 }, -1, INT_MAX, VE }, { "no-scenecut", "When lookahead is enabled, set this to 1 to disable adaptive I-frame insertion at scene cuts", OFFSET(no_scenecut), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE }, { "spatial_aq", "set to 1 to enable Spatial AQ", OFFSET(aq), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE }, { "zerolatency", "Set 1 to indicate zero latency operation (no reordering delay)", OFFSET(zerolatency), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },