From 07a8fbaa553205b496b68a62cb7bf7a968d0281b Mon Sep 17 00:00:00 2001 From: Ganesh Ajjanagadde Date: Wed, 9 Dec 2015 18:50:28 -0500 Subject: [PATCH] lavc/nellymoserenc: avoid wasteful pow exp2 suffices here. Some trivial speedup is done in addition here by reusing results. This retains accuracy, and in particular results in identical values with GNU libm + gcc/clang. sample benchmark (Haswell, GNU/Linux): proposed : 424160 decicycles in pow_table, 512 runs, 0 skips exp2 only: 1262093 decicycles in pow_table, 512 runs, 0 skips old : 2849085 decicycles in pow_table, 512 runs, 0 skips Reviewed-by: Michael Niedermayer Signed-off-by: Ganesh Ajjanagadde --- libavcodec/nellymoserenc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/libavcodec/nellymoserenc.c b/libavcodec/nellymoserenc.c index d998dbafee..9d22ac8cca 100644 --- a/libavcodec/nellymoserenc.c +++ b/libavcodec/nellymoserenc.c @@ -179,8 +179,18 @@ static av_cold int encode_init(AVCodecContext *avctx) /* Generate overlap window */ ff_init_ff_sine_windows(7); + /* faster way of doing for (i = 0; i < POW_TABLE_SIZE; i++) - pow_table[i] = pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET); + pow_table[i] = 2^(-i / 2048.0 - 3.0 + POW_TABLE_OFFSET); */ + pow_table[0] = 1; + pow_table[1024] = M_SQRT1_2; + for (i = 1; i < 513; i++) { + double tmp = exp2(-i / 2048.0); + pow_table[i] = tmp; + pow_table[1024-i] = M_SQRT1_2 / tmp; + pow_table[1024+i] = tmp * M_SQRT1_2; + pow_table[2048-i] = 0.5 / tmp; + } if (s->avctx->trellis) { s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));