Merge commit '12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5'

* commit '12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5':
  audiodsp/x86: yasmify vector_clipf_sse
  audiodsp: reorder arguments for vector_clipf

Merged the version from Libav after a discussion with James Almer on
IRC:

19:22 <ubitux> jamrial: opinion on 12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5?
19:23 <ubitux> it was apparently yasmified differently
19:23 <ubitux> (it depends on the previous commit arg shuffle)
19:24 <ubitux> i don't see the magic movsxdifnidn in your port btw
19:24 <ubitux> it's a port from 1d36defe94c7d7ebf995d4dbb4f878d06272f9c6
19:25 <jamrial> seems better thanks to said arg shuffle
19:25 <jamrial> the loop is the same, but init is simpler
19:25 <jamrial> probably worth merging
19:25 <ubitux> OK
19:25 <ubitux> thanks
19:26 <jamrial> curious they didn't make len ptrdiff_t after the previous bunch of commits, heh
19:26 <ubitux> yeah indeed

Both commits are merged at the same time to prevent a conflict with our
existing yasmified ff_vector_clipf_sse.

Merged-by: Clément Bœsch <u@pkh.me>
This commit is contained in:
Clément Bœsch 2017-03-20 22:28:38 +01:00
commit 83cd80d10a
9 changed files with 54 additions and 56 deletions

View File

@ -121,7 +121,7 @@ static void sum_square_butterfly(AC3EncodeContext *s, float sum[4],
static void clip_coefficients(AudioDSPContext *adsp, float *coef, static void clip_coefficients(AudioDSPContext *adsp, float *coef,
unsigned int len) unsigned int len)
{ {
adsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len); adsp->vector_clipf(coef, coef, len, COEF_MIN, COEF_MAX);
} }

View File

@ -25,8 +25,7 @@
#include "libavcodec/audiodsp.h" #include "libavcodec/audiodsp.h"
#include "audiodsp_arm.h" #include "audiodsp_arm.h"
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, void ff_vector_clipf_neon(float *dst, const float *src, int len, float min, float max);
int len);
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len); int32_t max, unsigned int len);

View File

@ -24,9 +24,8 @@
function ff_vector_clipf_neon, export=1 function ff_vector_clipf_neon, export=1
VFP vdup.32 q1, d0[1] VFP vdup.32 q1, d0[1]
VFP vdup.32 q0, d0[0] VFP vdup.32 q0, d0[0]
NOVFP vdup.32 q0, r2 NOVFP vdup.32 q0, r3
NOVFP vdup.32 q1, r3 NOVFP vld1.32 {d2[],d3[]}, [sp]
NOVFP ldr r2, [sp]
vld1.f32 {q2},[r1,:128]! vld1.f32 {q2},[r1,:128]!
vmin.f32 q10, q2, q1 vmin.f32 q10, q2, q1
vld1.f32 {q3},[r1,:128]! vld1.f32 {q3},[r1,:128]!

View File

@ -55,8 +55,8 @@ static void vector_clipf_c_opposite_sign(float *dst, const float *src,
} }
} }
static void vector_clipf_c(float *dst, const float *src, static void vector_clipf_c(float *dst, const float *src, int len,
float min, float max, int len) float min, float max)
{ {
int i; int i;

View File

@ -48,7 +48,8 @@ typedef struct AudioDSPContext {
/* assume len is a multiple of 16, and arrays are 16-byte aligned */ /* assume len is a multiple of 16, and arrays are 16-byte aligned */
void (*vector_clipf)(float *dst /* align 16 */, void (*vector_clipf)(float *dst /* align 16 */,
const float *src /* align 16 */, const float *src /* align 16 */,
float min, float max, int len /* align 16 */); int len /* align 16 */,
float min, float max);
} AudioDSPContext; } AudioDSPContext;
void ff_audiodsp_init(AudioDSPContext *c); void ff_audiodsp_init(AudioDSPContext *c);

View File

@ -882,7 +882,7 @@ static inline void decode_bytes_and_gain(COOKContext *q, COOKSubpacket *p,
static void saturate_output_float(COOKContext *q, float *out) static void saturate_output_float(COOKContext *q, float *out)
{ {
q->adsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel, q->adsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel,
-1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8)); FFALIGN(q->samples_per_channel, 8), -1.0f, 1.0f);
} }

View File

@ -132,46 +132,45 @@ VECTOR_CLIP_INT32 11, 1, 1, 0
VECTOR_CLIP_INT32 6, 1, 0, 0 VECTOR_CLIP_INT32 6, 1, 0, 0
%endif %endif
;----------------------------------------------------- ; void ff_vector_clipf_sse(float *dst, const float *src,
;void ff_vector_clipf(float *dst, const float *src, ; int len, float min, float max)
; float min, float max, int len)
;-----------------------------------------------------
INIT_XMM sse INIT_XMM sse
%if UNIX64 cglobal vector_clipf, 3, 3, 6, dst, src, len, min, max
cglobal vector_clipf, 3,3,6, dst, src, len %if ARCH_X86_32
%else VBROADCASTSS m0, minm
cglobal vector_clipf, 5,5,6, dst, src, min, max, len VBROADCASTSS m1, maxm
%elif WIN64
VBROADCASTSS m0, m3
VBROADCASTSS m1, maxm
%else ; 64bit sysv
VBROADCASTSS m0, m0
VBROADCASTSS m1, m1
%endif %endif
%if WIN64
SWAP 0, 2 movsxdifnidn lenq, lend
SWAP 1, 3
%elif ARCH_X86_32 .loop
movss m0, minm mova m2, [srcq + 4 * lenq - 4 * mmsize]
movss m1, maxm mova m3, [srcq + 4 * lenq - 3 * mmsize]
%endif mova m4, [srcq + 4 * lenq - 2 * mmsize]
SPLATD m0 mova m5, [srcq + 4 * lenq - 1 * mmsize]
SPLATD m1
shl lend, 2
add srcq, lenq
add dstq, lenq
neg lenq
.loop:
mova m2, [srcq+lenq+mmsize*0]
mova m3, [srcq+lenq+mmsize*1]
mova m4, [srcq+lenq+mmsize*2]
mova m5, [srcq+lenq+mmsize*3]
maxps m2, m0 maxps m2, m0
maxps m3, m0 maxps m3, m0
maxps m4, m0 maxps m4, m0
maxps m5, m0 maxps m5, m0
minps m2, m1 minps m2, m1
minps m3, m1 minps m3, m1
minps m4, m1 minps m4, m1
minps m5, m1 minps m5, m1
mova [dstq+lenq+mmsize*0], m2
mova [dstq+lenq+mmsize*1], m3 mova [dstq + 4 * lenq - 4 * mmsize], m2
mova [dstq+lenq+mmsize*2], m4 mova [dstq + 4 * lenq - 3 * mmsize], m3
mova [dstq+lenq+mmsize*3], m5 mova [dstq + 4 * lenq - 2 * mmsize], m4
add lenq, mmsize*4 mova [dstq + 4 * lenq - 1 * mmsize], m5
jl .loop
REP_RET sub lenq, mmsize
jg .loop
RET

View File

@ -38,7 +38,7 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src,
void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src,
int32_t min, int32_t max, unsigned int len); int32_t min, int32_t max, unsigned int len);
void ff_vector_clipf_sse(float *dst, const float *src, void ff_vector_clipf_sse(float *dst, const float *src,
float min, float max, int len); int len, float min, float max);
av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
{ {

View File

@ -120,7 +120,7 @@ void checkasm_check_audiodsp(void)
int i, len; int i, len;
declare_func_emms(AV_CPU_FLAG_MMX, void, float *dst, const float *src, declare_func_emms(AV_CPU_FLAG_MMX, void, float *dst, const float *src,
float min, float max, unsigned int len); int len, float min, float max);
val1 = (float)rnd() / (UINT_MAX >> 1) - 1.0f; val1 = (float)rnd() / (UINT_MAX >> 1) - 1.0f;
val2 = (float)rnd() / (UINT_MAX >> 1) - 1.0f; val2 = (float)rnd() / (UINT_MAX >> 1) - 1.0f;
@ -133,13 +133,13 @@ void checkasm_check_audiodsp(void)
len = rnd() % 128; len = rnd() % 128;
len = 16 * FFMAX(len, 1); len = 16 * FFMAX(len, 1);
call_ref(dst0, src, min, max, len); call_ref(dst0, src, len, min, max);
call_new(dst1, src, min, max, len); call_new(dst1, src, len, min, max);
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
if (!float_near_ulp_array(dst0, dst1, 3, len)) if (!float_near_ulp_array(dst0, dst1, 3, len))
fail(); fail();
} }
bench_new(dst1, src, min, max, MAX_SIZE); bench_new(dst1, src, MAX_SIZE, min, max);
} }
report("audiodsp"); report("audiodsp");