Fixes multiplane support on Nvidia. Also, remove the ENCODE usage, even if the driver signals it as supported. Currently, it's not used, and when it is used, it'll be gated behind two extension checks.
460 lines
18 KiB
C
460 lines
18 KiB
C
/*
|
|
* Copyright (c) Lynne
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include "vulkan_filter.h"
|
|
|
|
int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s,
|
|
AVBufferRef *frames_ref,
|
|
int width, int height, enum AVPixelFormat sw_format)
|
|
{
|
|
int err;
|
|
AVHWFramesContext *frames_ctx;
|
|
AVHWDeviceContext *device_ctx;
|
|
AVVulkanFramesContext *vk_frames;
|
|
AVVulkanDeviceContext *vk_dev;
|
|
AVBufferRef *device_ref = avctx->hw_device_ctx;
|
|
|
|
/* Check if context is reusable as-is */
|
|
if (frames_ref) {
|
|
int no_storage = 0;
|
|
FFVulkanFunctions *vk;
|
|
const VkFormat *sub = av_vkfmt_from_pixfmt(sw_format);
|
|
|
|
frames_ctx = (AVHWFramesContext *)frames_ref->data;
|
|
device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data;
|
|
vk_frames = frames_ctx->hwctx;
|
|
vk_dev = device_ctx->hwctx;
|
|
|
|
/* Width and height mismatch */
|
|
if (width != frames_ctx->width ||
|
|
height != frames_ctx->height)
|
|
goto skip;
|
|
|
|
/* Format mismatch */
|
|
if (sw_format != frames_ctx->sw_format)
|
|
goto skip;
|
|
|
|
/* Unusual tiling mismatch. Don't let linear through either. */
|
|
if (vk_frames->tiling != VK_IMAGE_TILING_OPTIMAL)
|
|
goto skip;
|
|
|
|
/* Usage mismatch */
|
|
if ((vk_frames->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT)) !=
|
|
(VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT))
|
|
goto skip;
|
|
|
|
s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions,
|
|
vk_dev->nb_enabled_dev_extensions);
|
|
err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1);
|
|
if (err < 0)
|
|
return err;
|
|
vk = &s->vkfn;
|
|
|
|
/* Check if the subformats can do storage */
|
|
for (int i = 0; sub[i] != VK_FORMAT_UNDEFINED; i++) {
|
|
VkFormatProperties2 prop = {
|
|
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
|
|
};
|
|
vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, sub[i],
|
|
&prop);
|
|
|
|
if (vk_frames->tiling == VK_IMAGE_TILING_LINEAR) {
|
|
no_storage |= !(prop.formatProperties.linearTilingFeatures &
|
|
VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
|
|
} else {
|
|
no_storage |= !(prop.formatProperties.optimalTilingFeatures &
|
|
VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
|
|
}
|
|
}
|
|
|
|
/* Check if it's usable */
|
|
if (no_storage) {
|
|
skip:
|
|
device_ref = frames_ctx->device_ref;
|
|
frames_ref = NULL;
|
|
} else {
|
|
accept:
|
|
frames_ref = av_buffer_ref(frames_ref);
|
|
if (!frames_ref)
|
|
return AVERROR(ENOMEM);
|
|
}
|
|
}
|
|
|
|
if (!frames_ref) {
|
|
if (!device_ref) {
|
|
av_log(avctx, AV_LOG_ERROR,
|
|
"Vulkan filtering requires a device context!\n");
|
|
return AVERROR(EINVAL);
|
|
}
|
|
|
|
frames_ref = av_hwframe_ctx_alloc(device_ref);
|
|
|
|
frames_ctx = (AVHWFramesContext *)frames_ref->data;
|
|
frames_ctx->format = AV_PIX_FMT_VULKAN;
|
|
frames_ctx->sw_format = sw_format;
|
|
frames_ctx->width = width;
|
|
frames_ctx->height = height;
|
|
|
|
vk_frames = frames_ctx->hwctx;
|
|
vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
|
|
vk_frames->usage = VK_IMAGE_USAGE_SAMPLED_BIT |
|
|
VK_IMAGE_USAGE_STORAGE_BIT |
|
|
VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
|
VK_IMAGE_USAGE_TRANSFER_DST_BIT;
|
|
|
|
err = av_hwframe_ctx_init(frames_ref);
|
|
if (err < 0) {
|
|
av_buffer_unref(&frames_ref);
|
|
return err;
|
|
}
|
|
|
|
device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data;
|
|
vk_dev = device_ctx->hwctx;
|
|
}
|
|
|
|
s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions,
|
|
vk_dev->nb_enabled_dev_extensions);
|
|
|
|
/**
|
|
* libplacebo does not use descriptor buffers.
|
|
*/
|
|
if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) &&
|
|
strcmp(avctx->filter->name, "libplacebo")) {
|
|
av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires that "
|
|
"the %s extension is supported!\n",
|
|
VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME);
|
|
av_buffer_unref(&frames_ref);
|
|
return AVERROR(EINVAL);
|
|
}
|
|
|
|
err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1);
|
|
if (err < 0) {
|
|
av_buffer_unref(&frames_ref);
|
|
return err;
|
|
}
|
|
|
|
s->frames_ref = frames_ref;
|
|
s->frames = frames_ctx;
|
|
s->hwfc = vk_frames;
|
|
s->device = device_ctx;
|
|
s->hwctx = device_ctx->hwctx;
|
|
|
|
err = ff_vk_load_props(s);
|
|
if (err < 0)
|
|
av_buffer_unref(&s->frames_ref);
|
|
|
|
return err;
|
|
}
|
|
|
|
int ff_vk_filter_config_input(AVFilterLink *inlink)
|
|
{
|
|
AVHWFramesContext *input_frames;
|
|
AVFilterContext *avctx = inlink->dst;
|
|
FFVulkanContext *s = inlink->dst->priv;
|
|
|
|
if (!inlink->hw_frames_ctx) {
|
|
av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a "
|
|
"hardware frames context on the input.\n");
|
|
return AVERROR(EINVAL);
|
|
}
|
|
|
|
input_frames = (AVHWFramesContext *)inlink->hw_frames_ctx->data;
|
|
if (input_frames->format != AV_PIX_FMT_VULKAN)
|
|
return AVERROR(EINVAL);
|
|
|
|
/* Extract the device and default output format from the first input. */
|
|
if (avctx->inputs[0] != inlink)
|
|
return 0;
|
|
|
|
/* Save the ref, without reffing it */
|
|
s->input_frames_ref = inlink->hw_frames_ctx;
|
|
|
|
/* Defaults */
|
|
s->output_format = input_frames->sw_format;
|
|
s->output_width = inlink->w;
|
|
s->output_height = inlink->h;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int ff_vk_filter_config_output(AVFilterLink *outlink)
|
|
{
|
|
int err;
|
|
FFVulkanContext *s = outlink->src->priv;
|
|
|
|
av_buffer_unref(&outlink->hw_frames_ctx);
|
|
|
|
err = ff_vk_filter_init_context(outlink->src, s, s->input_frames_ref,
|
|
s->output_width, s->output_height,
|
|
s->output_format);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref);
|
|
if (!outlink->hw_frames_ctx)
|
|
return AVERROR(ENOMEM);
|
|
|
|
outlink->w = s->output_width;
|
|
outlink->h = s->output_height;
|
|
|
|
return err;
|
|
}
|
|
|
|
int ff_vk_filter_init(AVFilterContext *avctx)
|
|
{
|
|
FFVulkanContext *s = avctx->priv;
|
|
|
|
s->output_format = AV_PIX_FMT_NONE;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e,
|
|
FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f,
|
|
VkSampler sampler, void *push_src, size_t push_size)
|
|
{
|
|
int err = 0;
|
|
FFVulkanFunctions *vk = &vkctx->vkfn;
|
|
VkImageView in_views[AV_NUM_DATA_POINTERS];
|
|
VkImageView out_views[AV_NUM_DATA_POINTERS];
|
|
VkImageMemoryBarrier2 img_bar[37];
|
|
int nb_img_bar = 0;
|
|
|
|
/* Update descriptors and init the exec context */
|
|
FFVkExecContext *exec = ff_vk_exec_get(e);
|
|
ff_vk_exec_start(vkctx, exec);
|
|
|
|
ff_vk_exec_bind_pipeline(vkctx, exec, pl);
|
|
|
|
if (push_src)
|
|
ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
|
|
0, push_size, push_src);
|
|
|
|
if (in_f) {
|
|
RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
|
RET(ff_vk_create_imageviews(vkctx, exec, in_views, in_f));
|
|
ff_vk_update_descriptor_img_array(vkctx, pl, exec, in_f, in_views, 0, 0,
|
|
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
|
sampler);
|
|
ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
VK_ACCESS_SHADER_READ_BIT,
|
|
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
|
VK_QUEUE_FAMILY_IGNORED);
|
|
}
|
|
|
|
RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
|
RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f));
|
|
ff_vk_update_descriptor_img_array(vkctx, pl, exec, out_f, out_views, 0, !!in_f,
|
|
VK_IMAGE_LAYOUT_GENERAL,
|
|
VK_NULL_HANDLE);
|
|
ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
VK_ACCESS_SHADER_WRITE_BIT,
|
|
VK_IMAGE_LAYOUT_GENERAL,
|
|
VK_QUEUE_FAMILY_IGNORED);
|
|
|
|
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
|
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
.pImageMemoryBarriers = img_bar,
|
|
.imageMemoryBarrierCount = nb_img_bar,
|
|
});
|
|
|
|
vk->CmdDispatch(exec->buf,
|
|
FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
|
|
FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
|
|
pl->wg_size[2]);
|
|
|
|
return ff_vk_exec_submit(vkctx, exec);
|
|
fail:
|
|
ff_vk_exec_discard_deps(vkctx, exec);
|
|
return err;
|
|
}
|
|
|
|
int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e,
|
|
FFVulkanPipeline *pls[2],
|
|
AVFrame *out, AVFrame *tmp, AVFrame *in,
|
|
VkSampler sampler, void *push_src, size_t push_size)
|
|
{
|
|
int err = 0;
|
|
FFVulkanFunctions *vk = &vkctx->vkfn;
|
|
VkImageView in_views[AV_NUM_DATA_POINTERS];
|
|
VkImageView tmp_views[AV_NUM_DATA_POINTERS];
|
|
VkImageView out_views[AV_NUM_DATA_POINTERS];
|
|
VkImageMemoryBarrier2 img_bar[37];
|
|
int nb_img_bar = 0;
|
|
|
|
/* Update descriptors and init the exec context */
|
|
FFVkExecContext *exec = ff_vk_exec_get(e);
|
|
ff_vk_exec_start(vkctx, exec);
|
|
|
|
RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
|
RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
|
RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
|
|
|
RET(ff_vk_create_imageviews(vkctx, exec, in_views, in));
|
|
RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp));
|
|
RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
|
|
|
|
ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
VK_ACCESS_SHADER_READ_BIT,
|
|
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
|
VK_QUEUE_FAMILY_IGNORED);
|
|
ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
|
VK_IMAGE_LAYOUT_GENERAL,
|
|
VK_QUEUE_FAMILY_IGNORED);
|
|
ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
VK_ACCESS_SHADER_WRITE_BIT,
|
|
VK_IMAGE_LAYOUT_GENERAL,
|
|
VK_QUEUE_FAMILY_IGNORED);
|
|
|
|
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
|
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
.pImageMemoryBarriers = img_bar,
|
|
.imageMemoryBarrierCount = nb_img_bar,
|
|
});
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
FFVulkanPipeline *pl = pls[i];
|
|
AVFrame *src_f = !i ? in : tmp;
|
|
AVFrame *dst_f = !i ? tmp : out;
|
|
VkImageView *src_views = !i ? in_views : tmp_views;
|
|
VkImageView *dst_views = !i ? tmp_views : out_views;
|
|
|
|
ff_vk_exec_bind_pipeline(vkctx, exec, pl);
|
|
|
|
if (push_src)
|
|
ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
|
|
0, push_size, push_src);
|
|
|
|
ff_vk_update_descriptor_img_array(vkctx, pl, exec, src_f, src_views, 0, 0,
|
|
!i ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL :
|
|
VK_IMAGE_LAYOUT_GENERAL,
|
|
sampler);
|
|
ff_vk_update_descriptor_img_array(vkctx, pl, exec, dst_f, dst_views, 0, 1,
|
|
VK_IMAGE_LAYOUT_GENERAL,
|
|
VK_NULL_HANDLE);
|
|
|
|
vk->CmdDispatch(exec->buf,
|
|
FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
|
|
FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
|
|
pl->wg_size[2]);
|
|
}
|
|
|
|
return ff_vk_exec_submit(vkctx, exec);
|
|
fail:
|
|
ff_vk_exec_discard_deps(vkctx, exec);
|
|
return err;
|
|
}
|
|
|
|
int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e,
|
|
FFVulkanPipeline *pl,
|
|
AVFrame *out, AVFrame *in[], int nb_in,
|
|
VkSampler sampler, void *push_src, size_t push_size)
|
|
{
|
|
int err = 0;
|
|
FFVulkanFunctions *vk = &vkctx->vkfn;
|
|
VkImageView in_views[16][AV_NUM_DATA_POINTERS];
|
|
VkImageView out_views[AV_NUM_DATA_POINTERS];
|
|
VkImageMemoryBarrier2 img_bar[128];
|
|
int nb_img_bar = 0;
|
|
|
|
/* Update descriptors and init the exec context */
|
|
FFVkExecContext *exec = ff_vk_exec_get(e);
|
|
ff_vk_exec_start(vkctx, exec);
|
|
|
|
/* Inputs */
|
|
for (int i = 0; i < nb_in; i++) {
|
|
RET(ff_vk_exec_add_dep_frame(vkctx, exec, in[i],
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
|
RET(ff_vk_create_imageviews(vkctx, exec, in_views[i], in[i]));
|
|
|
|
ff_vk_frame_barrier(vkctx, exec, in[i], img_bar, &nb_img_bar,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
VK_ACCESS_SHADER_READ_BIT,
|
|
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
|
VK_QUEUE_FAMILY_IGNORED);
|
|
}
|
|
|
|
/* Output */
|
|
RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
|
RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
|
|
ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
|
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
VK_ACCESS_SHADER_WRITE_BIT,
|
|
VK_IMAGE_LAYOUT_GENERAL,
|
|
VK_QUEUE_FAMILY_IGNORED);
|
|
|
|
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
|
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
.pImageMemoryBarriers = img_bar,
|
|
.imageMemoryBarrierCount = nb_img_bar,
|
|
});
|
|
|
|
ff_vk_exec_bind_pipeline(vkctx, exec, pl);
|
|
|
|
if (push_src)
|
|
ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
|
|
0, push_size, push_src);
|
|
|
|
for (int i = 0; i < nb_in; i++)
|
|
ff_vk_update_descriptor_img_array(vkctx, pl, exec, in[i], in_views[i], 0, i,
|
|
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
|
sampler);
|
|
|
|
ff_vk_update_descriptor_img_array(vkctx, pl, exec, out, out_views, 0, nb_in,
|
|
VK_IMAGE_LAYOUT_GENERAL,
|
|
VK_NULL_HANDLE);
|
|
|
|
vk->CmdDispatch(exec->buf,
|
|
FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
|
|
FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
|
|
pl->wg_size[2]);
|
|
|
|
return ff_vk_exec_submit(vkctx, exec);
|
|
fail:
|
|
ff_vk_exec_discard_deps(vkctx, exec);
|
|
return err;
|
|
}
|