From 02f3b9312a99c4ea58211b1ee3d1cb41d04255b8 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Sun, 7 May 2023 23:18:43 +0200
Subject: [PATCH] lavfi/vf_libplacebo: allow fps conversion

This exposes libplacebo's frame mixing functionality to vf_libplacebo,
by allowing users to specify a desired target fps to output at. Incoming
frames will be smoothly resampled (in a manner determined by the
`frame_mixer` option, to be added in the next commit).

To generate a consistently timed output stream, we directly use the
desired framerate as the timebase, and simply output frames in
sequential order (tracked by the number of frames output so far).
---
 doc/filters.texi            |  7 +++++++
 libavfilter/vf_libplacebo.c | 38 ++++++++++++++++++++++++++++++-------
 2 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index ef48a4cf31..70bb0996d3 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -16044,6 +16044,13 @@ and @code{(oh-ph)/2}.
 Set the output placement width/height expressions, default values are @code{ow}
 and @code{oh}.
 
+@item fps
+Set the output frame rate. This can be rational, e.g. @code{60000/1001}. If
+set to the special string @code{none} (the default), input timestamps will
+instead be passed through to the output unmodified. Otherwise, the input video
+frames will be interpolated as necessary to rescale the video to the specified
+target framerate, in a manner as determined by the @option{frame_mixer} option.
+
 @item format
 Set the output format override. If unset (the default), frames will be output
 in the same format as the respective input frames. Otherwise, format conversion
diff --git a/libavfilter/vf_libplacebo.c b/libavfilter/vf_libplacebo.c
index 472bbb6b2c..f8b93f975f 100644
--- a/libavfilter/vf_libplacebo.c
+++ b/libavfilter/vf_libplacebo.c
@@ -139,6 +139,8 @@ typedef struct LibplaceboContext {
     double var_values[VAR_VARS_NB];
     char *w_expr;
     char *h_expr;
+    char *fps_string;
+    AVRational fps; ///< parsed FPS, or 0/0 for "none"
     char *crop_x_expr, *crop_y_expr;
     char *crop_w_expr, *crop_h_expr;
     char *pos_x_expr, *pos_y_expr;
@@ -166,6 +168,7 @@ typedef struct LibplaceboContext {
     float antiringing;
     int sigmoid;
     int skip_aa;
+    int skip_cache;
     float polar_cutoff;
     int disable_linear;
     int disable_builtin;
@@ -400,7 +403,7 @@ static int update_settings(AVFilterContext *ctx)
         .num_hooks = s->num_hooks,
 
         .skip_anti_aliasing = s->skip_aa,
-        .skip_caching_single_frame = true,
+        .skip_caching_single_frame = s->skip_cache,
         .polar_cutoff = s->polar_cutoff,
         .disable_linear_scaling = s->disable_linear,
         .disable_builtin_scalers = s->disable_builtin,
@@ -465,6 +468,8 @@ static int libplacebo_init(AVFilterContext *avctx)
 
     /* Initialize dynamic filter state */
     s->out_pts = av_fifo_alloc2(1, sizeof(int64_t), AV_FIFO_FLAG_AUTO_GROW);
+    if (strcmp(s->fps_string, "none") != 0)
+        RET(av_parse_video_rate(&s->fps, s->fps_string));
 
     /* Note: s->vulkan etc. are initialized later, when hwctx is available */
     return 0;
@@ -663,6 +668,8 @@ static int output_frame_mix(AVFilterContext *ctx,
     out->pts = pts;
     out->width = outlink->w;
     out->height = outlink->h;
+    if (s->fps.num)
+        out->duration = 1;
 
     if (s->apply_dovi && av_frame_get_side_data(ref, AV_FRAME_DATA_DOVI_METADATA)) {
         /* Output of dovi reshaping is always BT.2020+PQ, so infer the correct
@@ -784,9 +791,11 @@ static int libplacebo_activate(AVFilterContext *ctx)
             .discard     = discard_frame,
         });
 
-        /* Internally queue an output frame for the same PTS */
-        av_assert1(!av_cmp_q(link->time_base, outlink->time_base));
-        av_fifo_write(s->out_pts, &in->pts, 1);
+        if (!s->fps.num) {
+            /* Internally queue an output frame for the same PTS */
+            av_assert1(!av_cmp_q(link->time_base, outlink->time_base));
+            av_fifo_write(s->out_pts, &in->pts, 1);
+        }
     }
 
     if (ret < 0)
@@ -799,7 +808,8 @@ static int libplacebo_activate(AVFilterContext *ctx)
             /* Signal EOF to pl_queue, and enqueue this output frame to
              * make sure we see PL_QUEUE_EOF returned eventually */
             pl_queue_push(s->queue, NULL);
-            av_fifo_write(s->out_pts, &pts, 1);
+            if (!s->fps.num)
+                av_fifo_write(s->out_pts, &pts, 1);
         } else {
             ff_outlink_set_status(outlink, status, pts);
             return 0;
@@ -810,7 +820,9 @@ static int libplacebo_activate(AVFilterContext *ctx)
         struct pl_frame_mix mix;
         enum pl_queue_status ret;
 
-        if (av_fifo_peek(s->out_pts, &pts, 1, 0) < 0) {
+        if (s->fps.num) {
+            pts = outlink->frame_count_out;
+        } else if (av_fifo_peek(s->out_pts, &pts, 1, 0) < 0) {
             ff_inlink_request_frame(inlink);
             return 0;
         }
@@ -826,7 +838,8 @@ static int libplacebo_activate(AVFilterContext *ctx)
             ff_inlink_request_frame(inlink);
             return 0;
         case PL_QUEUE_OK:
-            av_fifo_drain2(s->out_pts, 1);
+            if (!s->fps.num)
+                av_fifo_drain2(s->out_pts, 1);
             return output_frame_mix(ctx, &mix, pts);
         case PL_QUEUE_EOF:
             ff_outlink_set_status(outlink, AVERROR_EOF, pts);
@@ -942,6 +955,7 @@ static int libplacebo_config_output(AVFilterLink *outlink)
     AVVulkanFramesContext *vkfc;
     AVRational scale_sar;
 
+    /* Frame dimensions */
     RET(ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink,
                                  &outlink->w, &outlink->h));
 
@@ -965,6 +979,15 @@ static int libplacebo_config_output(AVFilterLink *outlink)
             outlink->sample_aspect_ratio = scale_sar;
     }
 
+    /* Frame rate */
+    if (s->fps.num) {
+        outlink->frame_rate = s->fps;
+        outlink->time_base = av_inv_q(s->fps);
+        s->skip_cache = av_cmp_q(inlink->frame_rate, s->fps) > 0;
+    } else {
+        s->skip_cache = true;
+    }
+
     /* Static variables */
     s->var_values[VAR_IN_W]     = s->var_values[VAR_IW] = inlink->w;
     s->var_values[VAR_IN_H]     = s->var_values[VAR_IH] = inlink->h;
@@ -1009,6 +1032,7 @@ fail:
 static const AVOption libplacebo_options[] = {
     { "w", "Output video frame width",  OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = STATIC },
     { "h", "Output video frame height", OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = STATIC },
+    { "fps", "Output video frame rate", OFFSET(fps_string), AV_OPT_TYPE_STRING, {.str = "none"}, .flags = STATIC },
     { "crop_x", "Input video crop x", OFFSET(crop_x_expr), AV_OPT_TYPE_STRING, {.str = "(iw-cw)/2"}, .flags = DYNAMIC },
     { "crop_y", "Input video crop y", OFFSET(crop_y_expr), AV_OPT_TYPE_STRING, {.str = "(ih-ch)/2"}, .flags = DYNAMIC },
     { "crop_w", "Input video crop w", OFFSET(crop_w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = DYNAMIC },