diff --git a/doc/APIchanges b/doc/APIchanges index c353d2d281..33be750af2 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -15,6 +15,9 @@ libavutil: 2017-10-21 API changes, most recent first: +2021-02-21 - xxxxxxxxxx - lavu 56.66.100 - tx.h + Add enum AVTXFlags and AVTXFlags.AV_TX_INPLACE + 2021-02-14 - xxxxxxxxxx - lavd 58.12.100 - avdevice.h Deprecated avdevice_capabilities_create() and avdevice_capabilities_free(). diff --git a/libavutil/tx.c b/libavutil/tx.c index 3b0568a5e1..49d5e125ae 100644 --- a/libavutil/tx.c +++ b/libavutil/tx.c @@ -107,6 +107,42 @@ int ff_tx_gen_ptwo_revtab(AVTXContext *s) return 0; } +int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s) +{ + int nb_inplace_idx = 0; + + if (!(s->inplace_idx = av_malloc(s->m*sizeof(*s->inplace_idx)))) + return AVERROR(ENOMEM); + + for (int d = 1; d < s->m; d++) { + int src = d; + int dst = s->revtab[src]; + + if (dst <= src) + continue; + + int found = 0; + int start_src = src; + do { + src = dst; + for (int j = 0; j < nb_inplace_idx; j++) { + if (dst == s->inplace_idx[j]) { + found = 1; + break; + } + } + dst = s->revtab[dst]; + } while (dst != start_src && !found); + + if (!found) + s->inplace_idx[nb_inplace_idx++] = start_src; + } + + s->inplace_idx[nb_inplace_idx++] = 0; + + return 0; +} + av_cold void av_tx_uninit(AVTXContext **ctx) { if (!(*ctx)) @@ -115,6 +151,7 @@ av_cold void av_tx_uninit(AVTXContext **ctx) av_free((*ctx)->pfatab); av_free((*ctx)->exptab); av_free((*ctx)->revtab); + av_free((*ctx)->inplace_idx); av_free((*ctx)->tmp); av_freep(ctx); diff --git a/libavutil/tx.h b/libavutil/tx.h index 14d6311e02..983b5e9307 100644 --- a/libavutil/tx.h +++ b/libavutil/tx.h @@ -98,6 +98,18 @@ enum AVTXType { */ typedef void (*av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride); +/** + * Flags for av_tx_init() + */ +enum AVTXFlags { + /** + * Performs an in-place transformation on the input. The output parameter + * to av_tn_fn() will be ignored. May be unsupported or slower for some + * transform types. + */ + AV_TX_INPLACE = 1ULL << 0, +}; + /** * Initialize a transform context with the given configuration * (i)MDCTs with an odd length are currently not supported. @@ -108,7 +120,7 @@ typedef void (*av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride); * @param inv whether to do an inverse or a forward transform * @param len the size of the transform in samples * @param scale pointer to the value to scale the output if supported by type - * @param flags currently unused + * @param flags a bitmask of AVTXFlags or 0 * * @return 0 on success, negative error code on failure */ diff --git a/libavutil/tx_priv.h b/libavutil/tx_priv.h index 18a07c312c..e9fba02a35 100644 --- a/libavutil/tx_priv.h +++ b/libavutil/tx_priv.h @@ -106,22 +106,25 @@ typedef void FFTComplex; /* Used by asm, reorder with care */ struct AVTXContext { - int n; /* Nptwo part */ - int m; /* Ptwo part */ - int inv; /* Is inverted */ + int n; /* Non-power-of-two part */ + int m; /* Power-of-two part */ + int inv; /* Is inverse */ int type; /* Type */ + uint64_t flags; /* Flags */ double scale; /* Scale */ FFTComplex *exptab; /* MDCT exptab */ FFTComplex *tmp; /* Temporary buffer needed for all compound transforms */ int *pfatab; /* Input/Output mapping for compound transforms */ int *revtab; /* Input mapping for power of two transforms */ + int *inplace_idx; /* Required indices to revtab for in-place transforms */ }; /* Shared functions */ int ff_tx_type_is_mdct(enum AVTXType type); int ff_tx_gen_compound_mapping(AVTXContext *s); int ff_tx_gen_ptwo_revtab(AVTXContext *s); +int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s); /* Also used by SIMD init */ static inline int split_radix_permutation(int i, int n, int inverse) diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c index 155e879f8e..f43173e920 100644 --- a/libavutil/tx_template.c +++ b/libavutil/tx_template.c @@ -392,8 +392,28 @@ static void monolithic_fft(AVTXContext *s, void *_out, void *_in, FFTComplex *in = _in; FFTComplex *out = _out; int m = s->m, mb = av_log2(m); - for (int i = 0; i < m; i++) - out[s->revtab[i]] = in[i]; + + if (s->flags & AV_TX_INPLACE) { + FFTComplex tmp; + int src, dst, *inplace_idx = s->inplace_idx; + + out = in; + src = *inplace_idx++; + + do { + tmp = out[src]; + dst = s->revtab[src]; + do { + FFSWAP(FFTComplex, tmp, out[dst]); + dst = s->revtab[dst]; + } while (dst != src); /* Can be > as well, but is less predictable */ + out[dst] = tmp; + } while ((src = *inplace_idx++)); + } else { + for (int i = 0; i < m; i++) + out[s->revtab[i]] = in[i]; + } + fft_dispatch[mb](out); } @@ -678,6 +698,7 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx, s->m = m; s->inv = inv; s->type = type; + s->flags = flags; /* If we weren't able to split the length into factors we can handle, * resort to using the naive and slow FT. This also filters out @@ -685,6 +706,8 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx, if (len > 1 || m == 1) { if (is_mdct && (l & 1)) /* Odd (i)MDCTs are not supported yet */ return AVERROR(ENOSYS); + if (flags & AV_TX_INPLACE) /* Neither are in-place naive transforms */ + return AVERROR(ENOSYS); s->n = l; s->m = 1; *tx = naive_fft; @@ -716,7 +739,14 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx, if (n != 1) init_cos_tabs(0); if (m != 1) { - ff_tx_gen_ptwo_revtab(s); + if ((err = ff_tx_gen_ptwo_revtab(s))) + return err; + if (flags & AV_TX_INPLACE) { + if (is_mdct) /* In-place MDCTs are not supported yet */ + return AVERROR(ENOSYS); + if ((err = ff_tx_gen_ptwo_inplace_revtab_idx(s))) + return err; + } for (int i = 4; i <= av_log2(m); i++) init_cos_tabs(i); } diff --git a/libavutil/version.h b/libavutil/version.h index b2165754f9..b7c5892a37 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -79,7 +79,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 56 -#define LIBAVUTIL_VERSION_MINOR 65 +#define LIBAVUTIL_VERSION_MINOR 66 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \