x86: add colons after labels
nasm prints a warning if the colon is missing. Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
parent
36ef5369ee
commit
a3df4781f4
@ -39,7 +39,7 @@ cglobal deinterlace_line_mmx, 7,7,7, dst, lum_m4, lum_m3, lum_m2, lum_m1
|
|||||||
%endif
|
%endif
|
||||||
pxor mm7, mm7
|
pxor mm7, mm7
|
||||||
movq mm6, [pw_4]
|
movq mm6, [pw_4]
|
||||||
.nextrow
|
.nextrow:
|
||||||
movd mm0, [lum_m4q]
|
movd mm0, [lum_m4q]
|
||||||
movd mm1, [lum_m3q]
|
movd mm1, [lum_m3q]
|
||||||
movd mm2, [lum_m2q]
|
movd mm2, [lum_m2q]
|
||||||
|
@ -1143,7 +1143,7 @@ VECTOR_CLIP_INT32 6, 1, 0, 0
|
|||||||
cglobal vector_fmul_reverse, 4,4,2, dst, src0, src1, len
|
cglobal vector_fmul_reverse, 4,4,2, dst, src0, src1, len
|
||||||
lea lenq, [lend*4 - 2*mmsize]
|
lea lenq, [lend*4 - 2*mmsize]
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
.loop
|
.loop:
|
||||||
%if cpuflag(avx)
|
%if cpuflag(avx)
|
||||||
vmovaps xmm0, [src1q + 16]
|
vmovaps xmm0, [src1q + 16]
|
||||||
vinsertf128 m0, m0, [src1q], 1
|
vinsertf128 m0, m0, [src1q], 1
|
||||||
@ -1182,7 +1182,7 @@ VECTOR_FMUL_REVERSE
|
|||||||
cglobal vector_fmul_add, 5,5,2, dst, src0, src1, src2, len
|
cglobal vector_fmul_add, 5,5,2, dst, src0, src1, src2, len
|
||||||
lea lenq, [lend*4 - 2*mmsize]
|
lea lenq, [lend*4 - 2*mmsize]
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
.loop
|
.loop:
|
||||||
mova m0, [src0q + lenq]
|
mova m0, [src0q + lenq]
|
||||||
mova m1, [src0q + lenq + mmsize]
|
mova m1, [src0q + lenq + mmsize]
|
||||||
mulps m0, m0, [src1q + lenq]
|
mulps m0, m0, [src1q + lenq]
|
||||||
@ -1313,7 +1313,7 @@ cglobal bswap32_buf, 3,4,5
|
|||||||
add r0, 4
|
add r0, 4
|
||||||
dec r2
|
dec r2
|
||||||
jnz .loop2
|
jnz .loop2
|
||||||
.end
|
.end:
|
||||||
RET
|
RET
|
||||||
|
|
||||||
; %1 = aligned/unaligned
|
; %1 = aligned/unaligned
|
||||||
|
@ -184,7 +184,7 @@ cglobal hadamard8_diff16_%1, 5, 6, %2
|
|||||||
call hadamard8x8_diff_%1
|
call hadamard8x8_diff_%1
|
||||||
add r5d, eax
|
add r5d, eax
|
||||||
|
|
||||||
.done
|
.done:
|
||||||
mov eax, r5d
|
mov eax, r5d
|
||||||
%ifndef m8
|
%ifndef m8
|
||||||
ADD rsp, pad
|
ADD rsp, pad
|
||||||
@ -288,7 +288,7 @@ cglobal sse16_sse2, 5, 5, 8
|
|||||||
pxor m0, m0 ; mm0 = 0
|
pxor m0, m0 ; mm0 = 0
|
||||||
pxor m7, m7 ; mm7 holds the sum
|
pxor m7, m7 ; mm7 holds the sum
|
||||||
|
|
||||||
.next2lines ; FIXME why are these unaligned movs? pix1[] is aligned
|
.next2lines: ; FIXME why are these unaligned movs? pix1[] is aligned
|
||||||
movu m1, [r1 ] ; mm1 = pix1[0][0-15]
|
movu m1, [r1 ] ; mm1 = pix1[0][0-15]
|
||||||
movu m2, [r2 ] ; mm2 = pix2[0][0-15]
|
movu m2, [r2 ] ; mm2 = pix2[0][0-15]
|
||||||
movu m3, [r1+r3] ; mm3 = pix1[1][0-15]
|
movu m3, [r1+r3] ; mm3 = pix1[1][0-15]
|
||||||
|
@ -607,7 +607,7 @@ cglobal fft_calc, 2,5,8
|
|||||||
add rcx, 3
|
add rcx, 3
|
||||||
shl r2, cl
|
shl r2, cl
|
||||||
sub r4, r2
|
sub r4, r2
|
||||||
.loop
|
.loop:
|
||||||
%if mmsize == 8
|
%if mmsize == 8
|
||||||
PSWAPD m0, [r4 + r2 + 4]
|
PSWAPD m0, [r4 + r2 + 4]
|
||||||
mova [r4 + r2 + 4], m0
|
mova [r4 + r2 + 4], m0
|
||||||
|
@ -404,7 +404,7 @@ cglobal float_interleave2_%1, 3,4,%2, dst, src, len, src1
|
|||||||
mov src1q, [srcq+gprsize]
|
mov src1q, [srcq+gprsize]
|
||||||
mov srcq, [srcq ]
|
mov srcq, [srcq ]
|
||||||
sub src1q, srcq
|
sub src1q, srcq
|
||||||
.loop
|
.loop:
|
||||||
MOVPS m0, [srcq ]
|
MOVPS m0, [srcq ]
|
||||||
MOVPS m1, [srcq+src1q ]
|
MOVPS m1, [srcq+src1q ]
|
||||||
MOVPS m3, [srcq +mmsize]
|
MOVPS m3, [srcq +mmsize]
|
||||||
|
@ -69,7 +69,7 @@ SECTION .text
|
|||||||
|
|
||||||
%macro mv0_pixels_mc8 0
|
%macro mv0_pixels_mc8 0
|
||||||
lea r4, [r2*2 ]
|
lea r4, [r2*2 ]
|
||||||
.next4rows
|
.next4rows:
|
||||||
movq mm0, [r1 ]
|
movq mm0, [r1 ]
|
||||||
movq mm1, [r1+r2]
|
movq mm1, [r1+r2]
|
||||||
add r1, r4
|
add r1, r4
|
||||||
@ -117,7 +117,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7 + extra_regs, 0
|
|||||||
mv0_pixels_mc8
|
mv0_pixels_mc8
|
||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
.at_least_one_non_zero
|
.at_least_one_non_zero:
|
||||||
%ifidn %2, rv40
|
%ifidn %2, rv40
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
mov r7, r5
|
mov r7, r5
|
||||||
@ -145,7 +145,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7 + extra_regs, 0
|
|||||||
test r4d, r4d
|
test r4d, r4d
|
||||||
mov r6, r2 ; dxy = x ? 1 : stride
|
mov r6, r2 ; dxy = x ? 1 : stride
|
||||||
jne .both_non_zero
|
jne .both_non_zero
|
||||||
.my_is_zero
|
.my_is_zero:
|
||||||
; mx == 0 XOR my == 0 - 1 dimensional filter only
|
; mx == 0 XOR my == 0 - 1 dimensional filter only
|
||||||
or r4d, r5d ; x + y
|
or r4d, r5d ; x + y
|
||||||
|
|
||||||
@ -166,7 +166,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7 + extra_regs, 0
|
|||||||
pxor m7, m7
|
pxor m7, m7
|
||||||
psubw m4, m5 ; mm4 = A = 8-x
|
psubw m4, m5 ; mm4 = A = 8-x
|
||||||
|
|
||||||
.next1drow
|
.next1drow:
|
||||||
movq m0, [r1 ] ; mm0 = src[0..7]
|
movq m0, [r1 ] ; mm0 = src[0..7]
|
||||||
movq m2, [r1+r6] ; mm1 = src[1..8]
|
movq m2, [r1+r6] ; mm1 = src[1..8]
|
||||||
|
|
||||||
@ -197,7 +197,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7 + extra_regs, 0
|
|||||||
jne .next1drow
|
jne .next1drow
|
||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
.both_non_zero ; general case, bilinear
|
.both_non_zero: ; general case, bilinear
|
||||||
movd m4, r4d ; x
|
movd m4, r4d ; x
|
||||||
movd m6, r5d ; y
|
movd m6, r5d ; y
|
||||||
%ifidn %2, rv40
|
%ifidn %2, rv40
|
||||||
@ -232,7 +232,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7 + extra_regs, 0
|
|||||||
|
|
||||||
movq m0, [r1 ] ; mm0 = src[0..7]
|
movq m0, [r1 ] ; mm0 = src[0..7]
|
||||||
movq m1, [r1+1] ; mm1 = src[1..8]
|
movq m1, [r1+1] ; mm1 = src[1..8]
|
||||||
.next2drow
|
.next2drow:
|
||||||
add r1, r2
|
add r1, r2
|
||||||
|
|
||||||
movq m2, m0
|
movq m2, m0
|
||||||
@ -330,7 +330,7 @@ cglobal %1_%2_chroma_mc4_%3, 6, 6 + extra_regs, 0
|
|||||||
pmullw m6, m2
|
pmullw m6, m2
|
||||||
paddw m6, m0
|
paddw m6, m0
|
||||||
|
|
||||||
.next2rows
|
.next2rows:
|
||||||
movd m0, [r1 ]
|
movd m0, [r1 ]
|
||||||
movd m1, [r1+1]
|
movd m1, [r1+1]
|
||||||
add r1, r2
|
add r1, r2
|
||||||
@ -397,7 +397,7 @@ cglobal %1_%2_chroma_mc2_%3, 6, 7, 0
|
|||||||
punpcklbw m2, m7
|
punpcklbw m2, m7
|
||||||
pshufw m2, m2, 0x94 ; mm0 = src[0,1,1,2]
|
pshufw m2, m2, 0x94 ; mm0 = src[0,1,1,2]
|
||||||
|
|
||||||
.nextrow
|
.nextrow:
|
||||||
add r1, r2
|
add r1, r2
|
||||||
movq m1, m2
|
movq m1, m2
|
||||||
pmaddwd m1, m5 ; mm1 = A * src[0,1] + B * src[1,2]
|
pmaddwd m1, m5 ; mm1 = A * src[0,1] + B * src[1,2]
|
||||||
@ -474,7 +474,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
|
|||||||
mv0_pixels_mc8
|
mv0_pixels_mc8
|
||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
.at_least_one_non_zero
|
.at_least_one_non_zero:
|
||||||
test r5d, r5d
|
test r5d, r5d
|
||||||
je .my_is_zero
|
je .my_is_zero
|
||||||
test r4d, r4d
|
test r4d, r4d
|
||||||
@ -501,7 +501,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
|
|||||||
movlhps m7, m7
|
movlhps m7, m7
|
||||||
movlhps m6, m6
|
movlhps m6, m6
|
||||||
|
|
||||||
.next2rows
|
.next2rows:
|
||||||
movq m1, [r1+r2*1 ]
|
movq m1, [r1+r2*1 ]
|
||||||
movq m2, [r1+r2*1+1]
|
movq m2, [r1+r2*1+1]
|
||||||
movq m3, [r1+r2*2 ]
|
movq m3, [r1+r2*2 ]
|
||||||
@ -535,7 +535,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
|
|||||||
jg .next2rows
|
jg .next2rows
|
||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
.my_is_zero
|
.my_is_zero:
|
||||||
mov r5d, r4d
|
mov r5d, r4d
|
||||||
shl r4d, 8
|
shl r4d, 8
|
||||||
add r4, 8
|
add r4, 8
|
||||||
@ -545,7 +545,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
|
|||||||
pshuflw m7, m7, 0
|
pshuflw m7, m7, 0
|
||||||
movlhps m7, m7
|
movlhps m7, m7
|
||||||
|
|
||||||
.next2xrows
|
.next2xrows:
|
||||||
movq m0, [r1 ]
|
movq m0, [r1 ]
|
||||||
movq m1, [r1 +1]
|
movq m1, [r1 +1]
|
||||||
movq m2, [r1+r2 ]
|
movq m2, [r1+r2 ]
|
||||||
@ -572,7 +572,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
|
|||||||
jg .next2xrows
|
jg .next2xrows
|
||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
.mx_is_zero
|
.mx_is_zero:
|
||||||
mov r4d, r5d
|
mov r4d, r5d
|
||||||
shl r5d, 8
|
shl r5d, 8
|
||||||
add r5, 8
|
add r5, 8
|
||||||
@ -582,7 +582,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
|
|||||||
pshuflw m7, m7, 0
|
pshuflw m7, m7, 0
|
||||||
movlhps m7, m7
|
movlhps m7, m7
|
||||||
|
|
||||||
.next2yrows
|
.next2yrows:
|
||||||
movq m0, [r1 ]
|
movq m0, [r1 ]
|
||||||
movq m1, [r1+r2 ]
|
movq m1, [r1+r2 ]
|
||||||
movdqa m2, m1
|
movdqa m2, m1
|
||||||
@ -632,7 +632,7 @@ cglobal %1_%2_chroma_mc4_%3, 6, 7, 0
|
|||||||
punpcklbw m0, [r1+1]
|
punpcklbw m0, [r1+1]
|
||||||
pshufw m6, m6, 0
|
pshufw m6, m6, 0
|
||||||
|
|
||||||
.next2rows
|
.next2rows:
|
||||||
movd m1, [r1+r2*1 ]
|
movd m1, [r1+r2*1 ]
|
||||||
movd m3, [r1+r2*2 ]
|
movd m3, [r1+r2*2 ]
|
||||||
punpcklbw m1, [r1+r2*1+1]
|
punpcklbw m1, [r1+r2*1+1]
|
||||||
|
@ -38,7 +38,7 @@ SECTION .text
|
|||||||
%macro MV0_PIXELS_MC8 0
|
%macro MV0_PIXELS_MC8 0
|
||||||
lea r4, [r2*3 ]
|
lea r4, [r2*3 ]
|
||||||
lea r5, [r2*4 ]
|
lea r5, [r2*4 ]
|
||||||
.next4rows
|
.next4rows:
|
||||||
movu m0, [r1 ]
|
movu m0, [r1 ]
|
||||||
movu m1, [r1+r2 ]
|
movu m1, [r1+r2 ]
|
||||||
CHROMAMC_AVG m0, [r0 ]
|
CHROMAMC_AVG m0, [r0 ]
|
||||||
@ -72,14 +72,14 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
|
|||||||
MV0_PIXELS_MC8
|
MV0_PIXELS_MC8
|
||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
.at_least_one_non_zero
|
.at_least_one_non_zero:
|
||||||
mov r6d, 2
|
mov r6d, 2
|
||||||
test r5d, r5d
|
test r5d, r5d
|
||||||
je .x_interpolation
|
je .x_interpolation
|
||||||
mov r6, r2 ; dxy = x ? 1 : stride
|
mov r6, r2 ; dxy = x ? 1 : stride
|
||||||
test r4d, r4d
|
test r4d, r4d
|
||||||
jne .xy_interpolation
|
jne .xy_interpolation
|
||||||
.x_interpolation
|
.x_interpolation:
|
||||||
; mx == 0 XOR my == 0 - 1 dimensional filter only
|
; mx == 0 XOR my == 0 - 1 dimensional filter only
|
||||||
or r4d, r5d ; x + y
|
or r4d, r5d ; x + y
|
||||||
movd m5, r4d
|
movd m5, r4d
|
||||||
@ -88,7 +88,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
|
|||||||
SPLATW m5, m5 ; mm5 = B = x
|
SPLATW m5, m5 ; mm5 = B = x
|
||||||
psubw m4, m5 ; mm4 = A = 8-x
|
psubw m4, m5 ; mm4 = A = 8-x
|
||||||
|
|
||||||
.next1drow
|
.next1drow:
|
||||||
movu m0, [r1 ] ; mm0 = src[0..7]
|
movu m0, [r1 ] ; mm0 = src[0..7]
|
||||||
movu m2, [r1+r6] ; mm2 = src[1..8]
|
movu m2, [r1+r6] ; mm2 = src[1..8]
|
||||||
|
|
||||||
@ -107,7 +107,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
|
|||||||
jne .next1drow
|
jne .next1drow
|
||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
.xy_interpolation ; general case, bilinear
|
.xy_interpolation: ; general case, bilinear
|
||||||
movd m4, r4m ; x
|
movd m4, r4m ; x
|
||||||
movd m6, r5m ; y
|
movd m6, r5m ; y
|
||||||
|
|
||||||
@ -125,7 +125,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
|
|||||||
|
|
||||||
movu m0, [r1 ] ; mm0 = src[0..7]
|
movu m0, [r1 ] ; mm0 = src[0..7]
|
||||||
movu m1, [r1+2] ; mm1 = src[1..8]
|
movu m1, [r1+2] ; mm1 = src[1..8]
|
||||||
.next2drow
|
.next2drow:
|
||||||
add r1, r2
|
add r1, r2
|
||||||
|
|
||||||
pmullw m2, m0, m4
|
pmullw m2, m0, m4
|
||||||
@ -192,7 +192,7 @@ cglobal %1_h264_chroma_mc4_10, 6,6,7
|
|||||||
pmullw m6, m2
|
pmullw m6, m2
|
||||||
paddw m6, m0
|
paddw m6, m0
|
||||||
|
|
||||||
.next2rows
|
.next2rows:
|
||||||
MC4_OP m0, m6
|
MC4_OP m0, m6
|
||||||
MC4_OP m6, m0
|
MC4_OP m6, m0
|
||||||
sub r3d, 2
|
sub r3d, 2
|
||||||
@ -221,7 +221,7 @@ cglobal %1_h264_chroma_mc2_10, 6,7
|
|||||||
pxor m7, m7
|
pxor m7, m7
|
||||||
pshufw m2, [r1], 0x94 ; mm0 = src[0,1,1,2]
|
pshufw m2, [r1], 0x94 ; mm0 = src[0,1,1,2]
|
||||||
|
|
||||||
.nextrow
|
.nextrow:
|
||||||
add r1, r2
|
add r1, r2
|
||||||
movq m1, m2
|
movq m1, m2
|
||||||
pmaddwd m1, m5 ; mm1 = A * src[0,1] + B * src[1,2]
|
pmaddwd m1, m5 ; mm1 = A * src[0,1] + B * src[1,2]
|
||||||
|
@ -623,7 +623,7 @@ cglobal deblock_v_luma_intra_10, 4,7,16
|
|||||||
shl r2d, 2
|
shl r2d, 2
|
||||||
shl r3d, 2
|
shl r3d, 2
|
||||||
LOAD_AB aa, bb, r2d, r3d
|
LOAD_AB aa, bb, r2d, r3d
|
||||||
.loop
|
.loop:
|
||||||
mova p2, [r4+r1]
|
mova p2, [r4+r1]
|
||||||
mova p1, [r4+2*r1]
|
mova p1, [r4+2*r1]
|
||||||
mova p0, [r4+r5]
|
mova p0, [r4+r5]
|
||||||
@ -674,7 +674,7 @@ cglobal deblock_h_luma_intra_10, 4,7,16
|
|||||||
mova m0, [pw_2]
|
mova m0, [pw_2]
|
||||||
shl r2d, 2
|
shl r2d, 2
|
||||||
shl r3d, 2
|
shl r3d, 2
|
||||||
.loop
|
.loop:
|
||||||
movu q3, [r0-8]
|
movu q3, [r0-8]
|
||||||
movu q2, [r0+r1-8]
|
movu q2, [r0+r1-8]
|
||||||
movu q1, [r0+r1*2-8]
|
movu q1, [r0+r1*2-8]
|
||||||
|
@ -308,7 +308,7 @@ cglobal h264_idct_add16_8_mmx, 5, 7 + npicregs, 0, dst, block_offset, block, str
|
|||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea picregq, [scan8_mem]
|
lea picregq, [scan8_mem]
|
||||||
%endif
|
%endif
|
||||||
.nextblock
|
.nextblock:
|
||||||
movzx r6, byte [scan8+r5]
|
movzx r6, byte [scan8+r5]
|
||||||
movzx r6, byte [r4+r6]
|
movzx r6, byte [r4+r6]
|
||||||
test r6, r6
|
test r6, r6
|
||||||
@ -316,7 +316,7 @@ cglobal h264_idct_add16_8_mmx, 5, 7 + npicregs, 0, dst, block_offset, block, str
|
|||||||
mov r6d, dword [r1+r5*4]
|
mov r6d, dword [r1+r5*4]
|
||||||
lea r6, [r0+r6]
|
lea r6, [r0+r6]
|
||||||
IDCT4_ADD r6, r2, r3
|
IDCT4_ADD r6, r2, r3
|
||||||
.skipblock
|
.skipblock:
|
||||||
inc r5
|
inc r5
|
||||||
add r2, 32
|
add r2, 32
|
||||||
cmp r5, 16
|
cmp r5, 16
|
||||||
@ -333,7 +333,7 @@ cglobal h264_idct8_add4_8_mmx, 5, 7 + npicregs, 0, dst, block_offset, block, str
|
|||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea picregq, [scan8_mem]
|
lea picregq, [scan8_mem]
|
||||||
%endif
|
%endif
|
||||||
.nextblock
|
.nextblock:
|
||||||
movzx r6, byte [scan8+r5]
|
movzx r6, byte [scan8+r5]
|
||||||
movzx r6, byte [r4+r6]
|
movzx r6, byte [r4+r6]
|
||||||
test r6, r6
|
test r6, r6
|
||||||
@ -347,7 +347,7 @@ cglobal h264_idct8_add4_8_mmx, 5, 7 + npicregs, 0, dst, block_offset, block, str
|
|||||||
mov r6d, dword [r1+r5*4]
|
mov r6d, dword [r1+r5*4]
|
||||||
lea r6, [r0+r6+4]
|
lea r6, [r0+r6+4]
|
||||||
IDCT8_ADD_MMX_END r6 , rsp+8, r3
|
IDCT8_ADD_MMX_END r6 , rsp+8, r3
|
||||||
.skipblock
|
.skipblock:
|
||||||
add r5, 4
|
add r5, 4
|
||||||
add r2, 128
|
add r2, 128
|
||||||
cmp r5, 16
|
cmp r5, 16
|
||||||
@ -362,7 +362,7 @@ cglobal h264_idct_add16_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, s
|
|||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea picregq, [scan8_mem]
|
lea picregq, [scan8_mem]
|
||||||
%endif
|
%endif
|
||||||
.nextblock
|
.nextblock:
|
||||||
movzx r6, byte [scan8+r5]
|
movzx r6, byte [scan8+r5]
|
||||||
movzx r6, byte [r4+r6]
|
movzx r6, byte [r4+r6]
|
||||||
test r6, r6
|
test r6, r6
|
||||||
@ -388,11 +388,11 @@ cglobal h264_idct_add16_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, s
|
|||||||
cmp r5, 16
|
cmp r5, 16
|
||||||
jl .nextblock
|
jl .nextblock
|
||||||
REP_RET
|
REP_RET
|
||||||
.no_dc
|
.no_dc:
|
||||||
mov r6d, dword [r1+r5*4]
|
mov r6d, dword [r1+r5*4]
|
||||||
add r6, r0
|
add r6, r0
|
||||||
IDCT4_ADD r6, r2, r3
|
IDCT4_ADD r6, r2, r3
|
||||||
.skipblock
|
.skipblock:
|
||||||
inc r5
|
inc r5
|
||||||
add r2, 32
|
add r2, 32
|
||||||
cmp r5, 16
|
cmp r5, 16
|
||||||
@ -406,7 +406,7 @@ cglobal h264_idct_add16intra_8_mmx, 5, 7 + npicregs, 0, dst, block_offset, block
|
|||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea picregq, [scan8_mem]
|
lea picregq, [scan8_mem]
|
||||||
%endif
|
%endif
|
||||||
.nextblock
|
.nextblock:
|
||||||
movzx r6, byte [scan8+r5]
|
movzx r6, byte [scan8+r5]
|
||||||
movzx r6, byte [r4+r6]
|
movzx r6, byte [r4+r6]
|
||||||
or r6w, word [r2]
|
or r6w, word [r2]
|
||||||
@ -415,7 +415,7 @@ cglobal h264_idct_add16intra_8_mmx, 5, 7 + npicregs, 0, dst, block_offset, block
|
|||||||
mov r6d, dword [r1+r5*4]
|
mov r6d, dword [r1+r5*4]
|
||||||
add r6, r0
|
add r6, r0
|
||||||
IDCT4_ADD r6, r2, r3
|
IDCT4_ADD r6, r2, r3
|
||||||
.skipblock
|
.skipblock:
|
||||||
inc r5
|
inc r5
|
||||||
add r2, 32
|
add r2, 32
|
||||||
cmp r5, 16
|
cmp r5, 16
|
||||||
@ -429,7 +429,7 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, blo
|
|||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea picregq, [scan8_mem]
|
lea picregq, [scan8_mem]
|
||||||
%endif
|
%endif
|
||||||
.nextblock
|
.nextblock:
|
||||||
movzx r6, byte [scan8+r5]
|
movzx r6, byte [scan8+r5]
|
||||||
movzx r6, byte [r4+r6]
|
movzx r6, byte [r4+r6]
|
||||||
test r6, r6
|
test r6, r6
|
||||||
@ -442,7 +442,7 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, blo
|
|||||||
cmp r5, 16
|
cmp r5, 16
|
||||||
jl .nextblock
|
jl .nextblock
|
||||||
REP_RET
|
REP_RET
|
||||||
.try_dc
|
.try_dc:
|
||||||
movsx r6, word [r2]
|
movsx r6, word [r2]
|
||||||
test r6, r6
|
test r6, r6
|
||||||
jz .skipblock
|
jz .skipblock
|
||||||
@ -457,7 +457,7 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, blo
|
|||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
mov r1, r1m
|
mov r1, r1m
|
||||||
%endif
|
%endif
|
||||||
.skipblock
|
.skipblock:
|
||||||
inc r5
|
inc r5
|
||||||
add r2, 32
|
add r2, 32
|
||||||
cmp r5, 16
|
cmp r5, 16
|
||||||
@ -474,7 +474,7 @@ cglobal h264_idct8_add4_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, s
|
|||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea picregq, [scan8_mem]
|
lea picregq, [scan8_mem]
|
||||||
%endif
|
%endif
|
||||||
.nextblock
|
.nextblock:
|
||||||
movzx r6, byte [scan8+r5]
|
movzx r6, byte [scan8+r5]
|
||||||
movzx r6, byte [r4+r6]
|
movzx r6, byte [r4+r6]
|
||||||
test r6, r6
|
test r6, r6
|
||||||
@ -504,7 +504,7 @@ cglobal h264_idct8_add4_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, s
|
|||||||
|
|
||||||
ADD rsp, pad
|
ADD rsp, pad
|
||||||
RET
|
RET
|
||||||
.no_dc
|
.no_dc:
|
||||||
mov r6d, dword [r1+r5*4]
|
mov r6d, dword [r1+r5*4]
|
||||||
add r6, r0
|
add r6, r0
|
||||||
add word [r2], 32
|
add word [r2], 32
|
||||||
@ -514,7 +514,7 @@ cglobal h264_idct8_add4_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, s
|
|||||||
mov r6d, dword [r1+r5*4]
|
mov r6d, dword [r1+r5*4]
|
||||||
lea r6, [r0+r6+4]
|
lea r6, [r0+r6+4]
|
||||||
IDCT8_ADD_MMX_END r6 , rsp+8, r3
|
IDCT8_ADD_MMX_END r6 , rsp+8, r3
|
||||||
.skipblock
|
.skipblock:
|
||||||
add r5, 4
|
add r5, 4
|
||||||
add r2, 128
|
add r2, 128
|
||||||
cmp r5, 16
|
cmp r5, 16
|
||||||
@ -531,7 +531,7 @@ cglobal h264_idct8_add4_8_sse2, 5, 8 + npicregs, 10, dst1, block_offset, block,
|
|||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea picregq, [scan8_mem]
|
lea picregq, [scan8_mem]
|
||||||
%endif
|
%endif
|
||||||
.nextblock
|
.nextblock:
|
||||||
movzx r6, byte [scan8+r5]
|
movzx r6, byte [scan8+r5]
|
||||||
movzx r6, byte [r4+r6]
|
movzx r6, byte [r4+r6]
|
||||||
test r6, r6
|
test r6, r6
|
||||||
@ -560,7 +560,7 @@ INIT_MMX
|
|||||||
cmp r5, 16
|
cmp r5, 16
|
||||||
jl .nextblock
|
jl .nextblock
|
||||||
REP_RET
|
REP_RET
|
||||||
.no_dc
|
.no_dc:
|
||||||
INIT_XMM
|
INIT_XMM
|
||||||
mov dst2d, dword [r1+r5*4]
|
mov dst2d, dword [r1+r5*4]
|
||||||
add dst2q, r0
|
add dst2q, r0
|
||||||
@ -568,7 +568,7 @@ INIT_XMM
|
|||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
mov r1, r1m
|
mov r1, r1m
|
||||||
%endif
|
%endif
|
||||||
.skipblock
|
.skipblock:
|
||||||
add r5, 4
|
add r5, 4
|
||||||
add r2, 128
|
add r2, 128
|
||||||
cmp r5, 16
|
cmp r5, 16
|
||||||
@ -577,7 +577,7 @@ INIT_XMM
|
|||||||
|
|
||||||
INIT_MMX
|
INIT_MMX
|
||||||
h264_idct_add8_mmx_plane:
|
h264_idct_add8_mmx_plane:
|
||||||
.nextblock
|
.nextblock:
|
||||||
movzx r6, byte [scan8+r5]
|
movzx r6, byte [scan8+r5]
|
||||||
movzx r6, byte [r4+r6]
|
movzx r6, byte [r4+r6]
|
||||||
or r6w, word [r2]
|
or r6w, word [r2]
|
||||||
@ -592,7 +592,7 @@ h264_idct_add8_mmx_plane:
|
|||||||
add r0, dword [r1+r5*4]
|
add r0, dword [r1+r5*4]
|
||||||
%endif
|
%endif
|
||||||
IDCT4_ADD r0, r2, r3
|
IDCT4_ADD r0, r2, r3
|
||||||
.skipblock
|
.skipblock:
|
||||||
inc r5
|
inc r5
|
||||||
add r2, 32
|
add r2, 32
|
||||||
test r5, 3
|
test r5, 3
|
||||||
@ -621,8 +621,8 @@ cglobal h264_idct_add8_8_mmx, 5, 8 + npicregs, 0, dst1, block_offset, block, str
|
|||||||
call h264_idct_add8_mmx_plane
|
call h264_idct_add8_mmx_plane
|
||||||
RET
|
RET
|
||||||
|
|
||||||
h264_idct_add8_mmx2_plane
|
h264_idct_add8_mmx2_plane:
|
||||||
.nextblock
|
.nextblock:
|
||||||
movzx r6, byte [scan8+r5]
|
movzx r6, byte [scan8+r5]
|
||||||
movzx r6, byte [r4+r6]
|
movzx r6, byte [r4+r6]
|
||||||
test r6, r6
|
test r6, r6
|
||||||
@ -641,7 +641,7 @@ h264_idct_add8_mmx2_plane
|
|||||||
test r5, 3
|
test r5, 3
|
||||||
jnz .nextblock
|
jnz .nextblock
|
||||||
rep ret
|
rep ret
|
||||||
.try_dc
|
.try_dc:
|
||||||
movsx r6, word [r2]
|
movsx r6, word [r2]
|
||||||
test r6, r6
|
test r6, r6
|
||||||
jz .skipblock
|
jz .skipblock
|
||||||
@ -655,7 +655,7 @@ h264_idct_add8_mmx2_plane
|
|||||||
add r0, dword [r1+r5*4]
|
add r0, dword [r1+r5*4]
|
||||||
%endif
|
%endif
|
||||||
DC_ADD_MMX2_OP movh, r0, r3, r6
|
DC_ADD_MMX2_OP movh, r0, r3, r6
|
||||||
.skipblock
|
.skipblock:
|
||||||
inc r5
|
inc r5
|
||||||
add r2, 32
|
add r2, 32
|
||||||
test r5, 3
|
test r5, 3
|
||||||
@ -734,7 +734,7 @@ h264_add8x4_idct_sse2:
|
|||||||
add r0, r0m
|
add r0, r0m
|
||||||
%endif
|
%endif
|
||||||
call h264_add8x4_idct_sse2
|
call h264_add8x4_idct_sse2
|
||||||
.cycle%1end
|
.cycle%1end:
|
||||||
%if %1 < 7
|
%if %1 < 7
|
||||||
add r2, 64
|
add r2, 64
|
||||||
%endif
|
%endif
|
||||||
@ -770,7 +770,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5 + ARCH_X86_64, 8
|
|||||||
%endif
|
%endif
|
||||||
call h264_add8x4_idct_sse2
|
call h264_add8x4_idct_sse2
|
||||||
jmp .cycle%1end
|
jmp .cycle%1end
|
||||||
.try%1dc
|
.try%1dc:
|
||||||
movsx r0, word [r2 ]
|
movsx r0, word [r2 ]
|
||||||
or r0w, word [r2+32]
|
or r0w, word [r2+32]
|
||||||
jz .cycle%1end
|
jz .cycle%1end
|
||||||
@ -781,7 +781,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5 + ARCH_X86_64, 8
|
|||||||
add r0, r0m
|
add r0, r0m
|
||||||
%endif
|
%endif
|
||||||
call h264_idct_dc_add8_mmx2
|
call h264_idct_dc_add8_mmx2
|
||||||
.cycle%1end
|
.cycle%1end:
|
||||||
%if %1 < 7
|
%if %1 < 7
|
||||||
add r2, 64
|
add r2, 64
|
||||||
%endif
|
%endif
|
||||||
@ -817,7 +817,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7 + ARCH_X86_64, 8
|
|||||||
%endif
|
%endif
|
||||||
call h264_add8x4_idct_sse2
|
call h264_add8x4_idct_sse2
|
||||||
jmp .cycle%1end
|
jmp .cycle%1end
|
||||||
.try%1dc
|
.try%1dc:
|
||||||
movsx r0, word [r2 ]
|
movsx r0, word [r2 ]
|
||||||
or r0w, word [r2+32]
|
or r0w, word [r2+32]
|
||||||
jz .cycle%1end
|
jz .cycle%1end
|
||||||
@ -830,7 +830,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7 + ARCH_X86_64, 8
|
|||||||
add r0, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
|
add r0, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
|
||||||
%endif
|
%endif
|
||||||
call h264_idct_dc_add8_mmx2
|
call h264_idct_dc_add8_mmx2
|
||||||
.cycle%1end
|
.cycle%1end:
|
||||||
%if %1 == 1
|
%if %1 == 1
|
||||||
add r2, 384+64
|
add r2, 384+64
|
||||||
%elif %1 < 3
|
%elif %1 < 3
|
||||||
|
@ -225,7 +225,7 @@ IDCT8_DC_ADD
|
|||||||
; h264_idct_add16intra(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
|
; h264_idct_add16intra(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
%macro AC 1
|
%macro AC 1
|
||||||
.ac%1
|
.ac%1:
|
||||||
mov r5d, [r1+(%1+0)*4]
|
mov r5d, [r1+(%1+0)*4]
|
||||||
call add4x4_idct %+ SUFFIX
|
call add4x4_idct %+ SUFFIX
|
||||||
mov r5d, [r1+(%1+1)*4]
|
mov r5d, [r1+(%1+1)*4]
|
||||||
|
@ -484,7 +484,7 @@ cglobal pred16x16_plane_%1, 2,9,7
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
mov r4, 8
|
mov r4, 8
|
||||||
.loop
|
.loop:
|
||||||
mova m3, m0 ; b[0..7]
|
mova m3, m0 ; b[0..7]
|
||||||
mova m4, m2 ; b[8..15]
|
mova m4, m2 ; b[8..15]
|
||||||
psraw m3, 5
|
psraw m3, 5
|
||||||
@ -680,7 +680,7 @@ cglobal pred8x8_plane, 2,9,7
|
|||||||
|
|
||||||
mov r4, 4
|
mov r4, 4
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
.loop
|
.loop:
|
||||||
%if mmsize == 16
|
%if mmsize == 16
|
||||||
mova m3, m0 ; b[0..7]
|
mova m3, m0 ; b[0..7]
|
||||||
paddw m0, m1
|
paddw m0, m1
|
||||||
@ -1045,7 +1045,7 @@ cglobal pred8x8l_top_dc_%1, 4,4
|
|||||||
psrlq mm5, 56
|
psrlq mm5, 56
|
||||||
psllq mm5, 56
|
psllq mm5, 56
|
||||||
pxor mm1, mm5
|
pxor mm1, mm5
|
||||||
.body
|
.body:
|
||||||
PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
|
PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
|
||||||
psadbw mm7, mm0
|
psadbw mm7, mm0
|
||||||
paddw mm7, [pw_4]
|
paddw mm7, [pw_4]
|
||||||
@ -1141,7 +1141,7 @@ cglobal pred8x8l_dc_%1, 4,5
|
|||||||
jz .fix_lt_2
|
jz .fix_lt_2
|
||||||
test r2, r2
|
test r2, r2
|
||||||
jz .fix_tr_1
|
jz .fix_tr_1
|
||||||
.body
|
.body:
|
||||||
lea r1, [r0+r3*2]
|
lea r1, [r0+r3*2]
|
||||||
PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
|
PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
|
||||||
pxor mm0, mm0
|
pxor mm0, mm0
|
||||||
@ -1276,7 +1276,7 @@ cglobal pred8x8l_vertical_%1, 4,4
|
|||||||
psrlq mm5, 56
|
psrlq mm5, 56
|
||||||
psllq mm5, 56
|
psllq mm5, 56
|
||||||
pxor mm1, mm5
|
pxor mm1, mm5
|
||||||
.body
|
.body:
|
||||||
PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
|
PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
|
||||||
%rep 3
|
%rep 3
|
||||||
movq [r0+r3*1], mm0
|
movq [r0+r3*1], mm0
|
||||||
@ -1576,7 +1576,7 @@ cglobal pred8x8l_down_right_mmxext, 4,5
|
|||||||
psllq mm5, 56
|
psllq mm5, 56
|
||||||
pxor mm1, mm5
|
pxor mm1, mm5
|
||||||
jmp .do_top
|
jmp .do_top
|
||||||
.body
|
.body:
|
||||||
lea r1, [r0+r3*2]
|
lea r1, [r0+r3*2]
|
||||||
movq mm1, mm7
|
movq mm1, mm7
|
||||||
movq mm7, mm5
|
movq mm7, mm5
|
||||||
@ -1822,7 +1822,7 @@ cglobal pred8x8l_vertical_right_mmxext, 4,5
|
|||||||
jz .fix_lt_2
|
jz .fix_lt_2
|
||||||
test r2, r2
|
test r2, r2
|
||||||
jz .fix_tr_1
|
jz .fix_tr_1
|
||||||
.do_top
|
.do_top:
|
||||||
PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
|
PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
|
||||||
lea r1, [r0+r3*2]
|
lea r1, [r0+r3*2]
|
||||||
movq mm2, mm6
|
movq mm2, mm6
|
||||||
@ -1931,7 +1931,7 @@ cglobal pred8x8l_vertical_right_%1, 4,5,7
|
|||||||
jz .fix_lt_2
|
jz .fix_lt_2
|
||||||
test r2, r2
|
test r2, r2
|
||||||
jz .fix_tr_1
|
jz .fix_tr_1
|
||||||
.do_top
|
.do_top:
|
||||||
PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
|
PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
|
||||||
lea r1, [r0+r3*2]
|
lea r1, [r0+r3*2]
|
||||||
movq2dq xmm4, mm6
|
movq2dq xmm4, mm6
|
||||||
|
@ -264,7 +264,7 @@ cglobal_mc %1, %2, mc20, %3, 3,4,9
|
|||||||
%else
|
%else
|
||||||
%define p16 [pw_16]
|
%define p16 [pw_16]
|
||||||
%endif
|
%endif
|
||||||
.nextrow
|
.nextrow:
|
||||||
%if %0 == 4
|
%if %0 == 4
|
||||||
movu m2, [r1-4]
|
movu m2, [r1-4]
|
||||||
movu m3, [r1-2]
|
movu m3, [r1-2]
|
||||||
@ -330,7 +330,7 @@ MC_CACHE MC30
|
|||||||
%macro MC10 3-4
|
%macro MC10 3-4
|
||||||
cglobal_mc %1, %2, mc10, %3, 3,5,9
|
cglobal_mc %1, %2, mc10, %3, 3,5,9
|
||||||
mov r4, r1
|
mov r4, r1
|
||||||
.body
|
.body:
|
||||||
mov r3d, %3
|
mov r3d, %3
|
||||||
mova m1, [pw_pixel_max]
|
mova m1, [pw_pixel_max]
|
||||||
%if num_mmregs > 8
|
%if num_mmregs > 8
|
||||||
@ -339,7 +339,7 @@ cglobal_mc %1, %2, mc10, %3, 3,5,9
|
|||||||
%else
|
%else
|
||||||
%define p16 [pw_16]
|
%define p16 [pw_16]
|
||||||
%endif
|
%endif
|
||||||
.nextrow
|
.nextrow:
|
||||||
%if %0 == 4
|
%if %0 == 4
|
||||||
movu m2, [r1-4]
|
movu m2, [r1-4]
|
||||||
movu m3, [r1-2]
|
movu m3, [r1-2]
|
||||||
@ -446,7 +446,7 @@ MC MC02
|
|||||||
%macro MC01 3
|
%macro MC01 3
|
||||||
cglobal_mc %1, %2, mc01, %3, 3,5,8
|
cglobal_mc %1, %2, mc01, %3, 3,5,8
|
||||||
mov r4, r1
|
mov r4, r1
|
||||||
.body
|
.body:
|
||||||
PRELOAD_V
|
PRELOAD_V
|
||||||
|
|
||||||
sub r4, r2
|
sub r4, r2
|
||||||
@ -535,7 +535,7 @@ SWAP 0,1,2,3,4,5
|
|||||||
; this REALLY needs x86_64
|
; this REALLY needs x86_64
|
||||||
cglobal_mc %1, %2, mc11, %3, 3,6,8
|
cglobal_mc %1, %2, mc11, %3, 3,6,8
|
||||||
mov r4, r1
|
mov r4, r1
|
||||||
.body
|
.body:
|
||||||
PRELOAD_V
|
PRELOAD_V
|
||||||
|
|
||||||
sub r0, r2
|
sub r0, r2
|
||||||
@ -778,7 +778,7 @@ cglobal_mc %1, %2, mc12, %3, 3,7,12
|
|||||||
call put_hv%3_10_%1
|
call put_hv%3_10_%1
|
||||||
|
|
||||||
xor r4d, r4d
|
xor r4d, r4d
|
||||||
.body
|
.body:
|
||||||
mov r3d, %3
|
mov r3d, %3
|
||||||
pxor m0, m0
|
pxor m0, m0
|
||||||
mova m7, [pw_pixel_max]
|
mova m7, [pw_pixel_max]
|
||||||
@ -837,7 +837,7 @@ put_h%2_10_%1:
|
|||||||
mov r3d, %2
|
mov r3d, %2
|
||||||
xor r4d, r4d
|
xor r4d, r4d
|
||||||
mova m6, [pad20]
|
mova m6, [pad20]
|
||||||
.nextrow
|
.nextrow:
|
||||||
movu m2, [r5-4]
|
movu m2, [r5-4]
|
||||||
movu m3, [r5-2]
|
movu m3, [r5-2]
|
||||||
movu m4, [r5+0]
|
movu m4, [r5+0]
|
||||||
@ -864,7 +864,7 @@ H_NRD sse2 , 8
|
|||||||
%macro MC21 3
|
%macro MC21 3
|
||||||
cglobal_mc %1, %2, mc21, %3, 3,7,12
|
cglobal_mc %1, %2, mc21, %3, 3,7,12
|
||||||
mov r5, r1
|
mov r5, r1
|
||||||
.body
|
.body:
|
||||||
%define PAD mmsize*8*3*2 ; SIZE*16*4*sizeof(pixel)
|
%define PAD mmsize*8*3*2 ; SIZE*16*4*sizeof(pixel)
|
||||||
mov r6, rsp ; backup stack pointer
|
mov r6, rsp ; backup stack pointer
|
||||||
and rsp, ~(mmsize-1) ; align stack
|
and rsp, ~(mmsize-1) ; align stack
|
||||||
|
@ -73,7 +73,7 @@ SECTION .text
|
|||||||
INIT_MMX
|
INIT_MMX
|
||||||
cglobal h264_weight_16_mmx2, 6, 6, 0
|
cglobal h264_weight_16_mmx2, 6, 6, 0
|
||||||
WEIGHT_SETUP
|
WEIGHT_SETUP
|
||||||
.nextrow
|
.nextrow:
|
||||||
WEIGHT_OP 0, 4
|
WEIGHT_OP 0, 4
|
||||||
mova [r0 ], m0
|
mova [r0 ], m0
|
||||||
WEIGHT_OP 8, 12
|
WEIGHT_OP 8, 12
|
||||||
@ -86,7 +86,7 @@ cglobal h264_weight_16_mmx2, 6, 6, 0
|
|||||||
%macro WEIGHT_FUNC_MM 3
|
%macro WEIGHT_FUNC_MM 3
|
||||||
cglobal h264_weight_%1_%3, 6, 6, %2
|
cglobal h264_weight_%1_%3, 6, 6, %2
|
||||||
WEIGHT_SETUP
|
WEIGHT_SETUP
|
||||||
.nextrow
|
.nextrow:
|
||||||
WEIGHT_OP 0, mmsize/2
|
WEIGHT_OP 0, mmsize/2
|
||||||
mova [r0], m0
|
mova [r0], m0
|
||||||
add r0, r1
|
add r0, r1
|
||||||
@ -105,7 +105,7 @@ cglobal h264_weight_%1_%3, 6, 6, %2
|
|||||||
WEIGHT_SETUP
|
WEIGHT_SETUP
|
||||||
sar r2d, 1
|
sar r2d, 1
|
||||||
lea r3, [r1*2]
|
lea r3, [r1*2]
|
||||||
.nextrow
|
.nextrow:
|
||||||
WEIGHT_OP 0, r1
|
WEIGHT_OP 0, r1
|
||||||
movh [r0], m0
|
movh [r0], m0
|
||||||
%if mmsize == 16
|
%if mmsize == 16
|
||||||
@ -178,7 +178,7 @@ INIT_MMX
|
|||||||
cglobal h264_biweight_16_mmx2, 7, 8, 0
|
cglobal h264_biweight_16_mmx2, 7, 8, 0
|
||||||
BIWEIGHT_SETUP
|
BIWEIGHT_SETUP
|
||||||
movifnidn r3d, r3m
|
movifnidn r3d, r3m
|
||||||
.nextrow
|
.nextrow:
|
||||||
BIWEIGHT_STEPA 0, 1, 0
|
BIWEIGHT_STEPA 0, 1, 0
|
||||||
BIWEIGHT_STEPA 1, 2, 4
|
BIWEIGHT_STEPA 1, 2, 4
|
||||||
BIWEIGHT_STEPB
|
BIWEIGHT_STEPB
|
||||||
@ -197,7 +197,7 @@ cglobal h264_biweight_16_mmx2, 7, 8, 0
|
|||||||
cglobal h264_biweight_%1_%3, 7, 8, %2
|
cglobal h264_biweight_%1_%3, 7, 8, %2
|
||||||
BIWEIGHT_SETUP
|
BIWEIGHT_SETUP
|
||||||
movifnidn r3d, r3m
|
movifnidn r3d, r3m
|
||||||
.nextrow
|
.nextrow:
|
||||||
BIWEIGHT_STEPA 0, 1, 0
|
BIWEIGHT_STEPA 0, 1, 0
|
||||||
BIWEIGHT_STEPA 1, 2, mmsize/2
|
BIWEIGHT_STEPA 1, 2, mmsize/2
|
||||||
BIWEIGHT_STEPB
|
BIWEIGHT_STEPB
|
||||||
@ -220,7 +220,7 @@ cglobal h264_biweight_%1_%3, 7, 8, %2
|
|||||||
movifnidn r3d, r3m
|
movifnidn r3d, r3m
|
||||||
sar r3, 1
|
sar r3, 1
|
||||||
lea r4, [r2*2]
|
lea r4, [r2*2]
|
||||||
.nextrow
|
.nextrow:
|
||||||
BIWEIGHT_STEPA 0, 1, 0
|
BIWEIGHT_STEPA 0, 1, 0
|
||||||
BIWEIGHT_STEPA 1, 2, r2
|
BIWEIGHT_STEPA 1, 2, r2
|
||||||
BIWEIGHT_STEPB
|
BIWEIGHT_STEPB
|
||||||
@ -281,7 +281,7 @@ cglobal h264_biweight_16_ssse3, 7, 8, 8
|
|||||||
BIWEIGHT_SSSE3_SETUP
|
BIWEIGHT_SSSE3_SETUP
|
||||||
movifnidn r3d, r3m
|
movifnidn r3d, r3m
|
||||||
|
|
||||||
.nextrow
|
.nextrow:
|
||||||
movh m0, [r0]
|
movh m0, [r0]
|
||||||
movh m2, [r0+8]
|
movh m2, [r0+8]
|
||||||
movh m3, [r1+8]
|
movh m3, [r1+8]
|
||||||
@ -302,7 +302,7 @@ cglobal h264_biweight_8_ssse3, 7, 8, 8
|
|||||||
sar r3, 1
|
sar r3, 1
|
||||||
lea r4, [r2*2]
|
lea r4, [r2*2]
|
||||||
|
|
||||||
.nextrow
|
.nextrow:
|
||||||
movh m0, [r0]
|
movh m0, [r0]
|
||||||
movh m1, [r1]
|
movh m1, [r1]
|
||||||
movh m2, [r0+r2]
|
movh m2, [r0+r2]
|
||||||
|
@ -40,7 +40,7 @@ SECTION .text
|
|||||||
; int weight, int offset);
|
; int weight, int offset);
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
%macro WEIGHT_PROLOGUE 0
|
%macro WEIGHT_PROLOGUE 0
|
||||||
.prologue
|
.prologue:
|
||||||
PROLOGUE 0,6,8
|
PROLOGUE 0,6,8
|
||||||
movifnidn r0, r0mp
|
movifnidn r0, r0mp
|
||||||
movifnidn r1d, r1m
|
movifnidn r1d, r1m
|
||||||
@ -93,7 +93,7 @@ SECTION .text
|
|||||||
cglobal h264_weight_16_10_%1
|
cglobal h264_weight_16_10_%1
|
||||||
WEIGHT_PROLOGUE
|
WEIGHT_PROLOGUE
|
||||||
WEIGHT_SETUP %1
|
WEIGHT_SETUP %1
|
||||||
.nextrow
|
.nextrow:
|
||||||
WEIGHT_OP %1, 0
|
WEIGHT_OP %1, 0
|
||||||
mova [r0 ], m5
|
mova [r0 ], m5
|
||||||
WEIGHT_OP %1, 16
|
WEIGHT_OP %1, 16
|
||||||
@ -113,7 +113,7 @@ WEIGHT_FUNC_DBL sse4
|
|||||||
cglobal h264_weight_8_10_%1
|
cglobal h264_weight_8_10_%1
|
||||||
WEIGHT_PROLOGUE
|
WEIGHT_PROLOGUE
|
||||||
WEIGHT_SETUP %1
|
WEIGHT_SETUP %1
|
||||||
.nextrow
|
.nextrow:
|
||||||
WEIGHT_OP %1, 0
|
WEIGHT_OP %1, 0
|
||||||
mova [r0], m5
|
mova [r0], m5
|
||||||
add r0, r1
|
add r0, r1
|
||||||
@ -133,7 +133,7 @@ cglobal h264_weight_4_10_%1
|
|||||||
sar r2d, 1
|
sar r2d, 1
|
||||||
WEIGHT_SETUP %1
|
WEIGHT_SETUP %1
|
||||||
lea r3, [r1*2]
|
lea r3, [r1*2]
|
||||||
.nextrow
|
.nextrow:
|
||||||
WEIGHT_OP %1, 0, r1
|
WEIGHT_OP %1, 0, r1
|
||||||
movh [r0], m5
|
movh [r0], m5
|
||||||
movhps [r0+r1], m5
|
movhps [r0+r1], m5
|
||||||
@ -159,7 +159,7 @@ DECLARE_REG_TMP 7
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
%macro BIWEIGHT_PROLOGUE 0
|
%macro BIWEIGHT_PROLOGUE 0
|
||||||
.prologue
|
.prologue:
|
||||||
PROLOGUE 0,8,8
|
PROLOGUE 0,8,8
|
||||||
movifnidn r0, r0mp
|
movifnidn r0, r0mp
|
||||||
movifnidn r1, r1mp
|
movifnidn r1, r1mp
|
||||||
@ -221,7 +221,7 @@ DECLARE_REG_TMP 7
|
|||||||
cglobal h264_biweight_16_10_%1
|
cglobal h264_biweight_16_10_%1
|
||||||
BIWEIGHT_PROLOGUE
|
BIWEIGHT_PROLOGUE
|
||||||
BIWEIGHT_SETUP %1
|
BIWEIGHT_SETUP %1
|
||||||
.nextrow
|
.nextrow:
|
||||||
BIWEIGHT %1, 0
|
BIWEIGHT %1, 0
|
||||||
mova [r0 ], m0
|
mova [r0 ], m0
|
||||||
BIWEIGHT %1, 16
|
BIWEIGHT %1, 16
|
||||||
@ -241,7 +241,7 @@ BIWEIGHT_FUNC_DBL sse4
|
|||||||
cglobal h264_biweight_8_10_%1
|
cglobal h264_biweight_8_10_%1
|
||||||
BIWEIGHT_PROLOGUE
|
BIWEIGHT_PROLOGUE
|
||||||
BIWEIGHT_SETUP %1
|
BIWEIGHT_SETUP %1
|
||||||
.nextrow
|
.nextrow:
|
||||||
BIWEIGHT %1, 0
|
BIWEIGHT %1, 0
|
||||||
mova [r0], m0
|
mova [r0], m0
|
||||||
add r0, r2
|
add r0, r2
|
||||||
@ -261,7 +261,7 @@ cglobal h264_biweight_4_10_%1
|
|||||||
BIWEIGHT_SETUP %1
|
BIWEIGHT_SETUP %1
|
||||||
sar r3d, 1
|
sar r3d, 1
|
||||||
lea r4, [r2*2]
|
lea r4, [r2*2]
|
||||||
.nextrow
|
.nextrow:
|
||||||
BIWEIGHT %1, 0, r2
|
BIWEIGHT %1, 0, r2
|
||||||
movh [r0 ], m0
|
movh [r0 ], m0
|
||||||
movhps [r0+r2], m0
|
movhps [r0+r2], m0
|
||||||
|
@ -139,7 +139,7 @@ cglobal vp6_filter_diag4, 5, 7, 8
|
|||||||
|
|
||||||
mov r3, rsp
|
mov r3, rsp
|
||||||
mov r6, 11
|
mov r6, 11
|
||||||
.nextrow
|
.nextrow:
|
||||||
DIAG4 r1, -1, 0, 1, 2, r3
|
DIAG4 r1, -1, 0, 1, 2, r3
|
||||||
add r3, 8
|
add r3, 8
|
||||||
add r1, r2
|
add r1, r2
|
||||||
@ -151,7 +151,7 @@ cglobal vp6_filter_diag4, 5, 7, 8
|
|||||||
|
|
||||||
lea r3, [rsp+8]
|
lea r3, [rsp+8]
|
||||||
mov r6, 8
|
mov r6, 8
|
||||||
.nextcol
|
.nextcol:
|
||||||
DIAG4 r3, -8, 0, 8, 16, r0
|
DIAG4 r3, -8, 0, 8, 16, r0
|
||||||
add r3, 8
|
add r3, 8
|
||||||
add r0, r2
|
add r0, r2
|
||||||
|
@ -189,7 +189,7 @@ cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, h
|
|||||||
mova m6, [sixtap_filter_hb+mxq*8-32]
|
mova m6, [sixtap_filter_hb+mxq*8-32]
|
||||||
mova m7, [sixtap_filter_hb+mxq*8-16]
|
mova m7, [sixtap_filter_hb+mxq*8-16]
|
||||||
|
|
||||||
.nextrow
|
.nextrow:
|
||||||
movu m0, [srcq-2]
|
movu m0, [srcq-2]
|
||||||
mova m1, m0
|
mova m1, m0
|
||||||
mova m2, m0
|
mova m2, m0
|
||||||
@ -229,7 +229,7 @@ cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, h
|
|||||||
mova m5, [fourtap_filter_hb+mxq-16] ; set up 4tap filter in bytes
|
mova m5, [fourtap_filter_hb+mxq-16] ; set up 4tap filter in bytes
|
||||||
mova m6, [fourtap_filter_hb+mxq]
|
mova m6, [fourtap_filter_hb+mxq]
|
||||||
|
|
||||||
.nextrow
|
.nextrow:
|
||||||
movu m0, [srcq-1]
|
movu m0, [srcq-1]
|
||||||
mova m1, m0
|
mova m1, m0
|
||||||
pshufb m0, m3
|
pshufb m0, m3
|
||||||
@ -264,7 +264,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr
|
|||||||
movh m2, [srcq+2*srcstrideq]
|
movh m2, [srcq+2*srcstrideq]
|
||||||
add srcq, srcstrideq
|
add srcq, srcstrideq
|
||||||
|
|
||||||
.nextrow
|
.nextrow:
|
||||||
movh m3, [srcq+2*srcstrideq] ; read new row
|
movh m3, [srcq+2*srcstrideq] ; read new row
|
||||||
mova m4, m0
|
mova m4, m0
|
||||||
mova m0, m1
|
mova m0, m1
|
||||||
@ -304,7 +304,7 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picr
|
|||||||
movh m3, [srcq]
|
movh m3, [srcq]
|
||||||
movh m4, [srcq+srcstrideq]
|
movh m4, [srcq+srcstrideq]
|
||||||
|
|
||||||
.nextrow
|
.nextrow:
|
||||||
movh m5, [srcq+2*srcstrideq] ; read new row
|
movh m5, [srcq+2*srcstrideq] ; read new row
|
||||||
mova m6, m0
|
mova m6, m0
|
||||||
punpcklbw m6, m5
|
punpcklbw m6, m5
|
||||||
@ -350,7 +350,7 @@ cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he
|
|||||||
movq mm7, [pw_64]
|
movq mm7, [pw_64]
|
||||||
pxor mm6, mm6
|
pxor mm6, mm6
|
||||||
|
|
||||||
.nextrow
|
.nextrow:
|
||||||
movq mm1, [srcq-1] ; (ABCDEFGH) load 8 horizontal pixels
|
movq mm1, [srcq-1] ; (ABCDEFGH) load 8 horizontal pixels
|
||||||
|
|
||||||
; first set of 2 pixels
|
; first set of 2 pixels
|
||||||
@ -399,7 +399,7 @@ cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he
|
|||||||
movq mm7, [pw_64]
|
movq mm7, [pw_64]
|
||||||
pxor mm3, mm3
|
pxor mm3, mm3
|
||||||
|
|
||||||
.nextrow
|
.nextrow:
|
||||||
movq mm1, [srcq-2] ; (ABCDEFGH) load 8 horizontal pixels
|
movq mm1, [srcq-2] ; (ABCDEFGH) load 8 horizontal pixels
|
||||||
|
|
||||||
; first set of 2 pixels
|
; first set of 2 pixels
|
||||||
@ -459,7 +459,7 @@ cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, h
|
|||||||
mova m8, [mxq+32]
|
mova m8, [mxq+32]
|
||||||
mova m9, [mxq+48]
|
mova m9, [mxq+48]
|
||||||
%endif
|
%endif
|
||||||
.nextrow
|
.nextrow:
|
||||||
movq m0, [srcq-1]
|
movq m0, [srcq-1]
|
||||||
movq m1, [srcq-0]
|
movq m1, [srcq-0]
|
||||||
movq m2, [srcq+1]
|
movq m2, [srcq+1]
|
||||||
@ -510,7 +510,7 @@ cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, h
|
|||||||
mova m12, [mxq+64]
|
mova m12, [mxq+64]
|
||||||
mova m13, [mxq+80]
|
mova m13, [mxq+80]
|
||||||
%endif
|
%endif
|
||||||
.nextrow
|
.nextrow:
|
||||||
movq m0, [srcq-2]
|
movq m0, [srcq-2]
|
||||||
movq m1, [srcq-1]
|
movq m1, [srcq-1]
|
||||||
movq m2, [srcq-0]
|
movq m2, [srcq-0]
|
||||||
@ -577,7 +577,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr
|
|||||||
punpcklbw m1, m7
|
punpcklbw m1, m7
|
||||||
punpcklbw m2, m7
|
punpcklbw m2, m7
|
||||||
|
|
||||||
.nextrow
|
.nextrow:
|
||||||
; first calculate negative taps (to prevent losing positive overflows)
|
; first calculate negative taps (to prevent losing positive overflows)
|
||||||
movh m4, [srcq+2*srcstrideq] ; read new row
|
movh m4, [srcq+2*srcstrideq] ; read new row
|
||||||
punpcklbw m4, m7
|
punpcklbw m4, m7
|
||||||
@ -635,7 +635,7 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picr
|
|||||||
punpcklbw m3, m7
|
punpcklbw m3, m7
|
||||||
punpcklbw m4, m7
|
punpcklbw m4, m7
|
||||||
|
|
||||||
.nextrow
|
.nextrow:
|
||||||
; first calculate negative taps (to prevent losing positive overflows)
|
; first calculate negative taps (to prevent losing positive overflows)
|
||||||
mova m5, m1
|
mova m5, m1
|
||||||
pmullw m5, [myq+16]
|
pmullw m5, [myq+16]
|
||||||
@ -689,7 +689,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
|
|||||||
mova m5, [bilinear_filter_vw+myq-1*16]
|
mova m5, [bilinear_filter_vw+myq-1*16]
|
||||||
neg myq
|
neg myq
|
||||||
mova m4, [bilinear_filter_vw+myq+7*16]
|
mova m4, [bilinear_filter_vw+myq+7*16]
|
||||||
.nextrow
|
.nextrow:
|
||||||
movh m0, [srcq+srcstrideq*0]
|
movh m0, [srcq+srcstrideq*0]
|
||||||
movh m1, [srcq+srcstrideq*1]
|
movh m1, [srcq+srcstrideq*1]
|
||||||
movh m3, [srcq+srcstrideq*2]
|
movh m3, [srcq+srcstrideq*2]
|
||||||
@ -733,7 +733,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride
|
|||||||
mova m5, [bilinear_filter_vw+mxq-1*16]
|
mova m5, [bilinear_filter_vw+mxq-1*16]
|
||||||
neg mxq
|
neg mxq
|
||||||
mova m4, [bilinear_filter_vw+mxq+7*16]
|
mova m4, [bilinear_filter_vw+mxq+7*16]
|
||||||
.nextrow
|
.nextrow:
|
||||||
movh m0, [srcq+srcstrideq*0+0]
|
movh m0, [srcq+srcstrideq*0+0]
|
||||||
movh m1, [srcq+srcstrideq*0+1]
|
movh m1, [srcq+srcstrideq*0+1]
|
||||||
movh m2, [srcq+srcstrideq*1+0]
|
movh m2, [srcq+srcstrideq*1+0]
|
||||||
@ -783,7 +783,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, p
|
|||||||
%endif
|
%endif
|
||||||
pxor m4, m4
|
pxor m4, m4
|
||||||
mova m3, [bilinear_filter_vb+myq-16]
|
mova m3, [bilinear_filter_vb+myq-16]
|
||||||
.nextrow
|
.nextrow:
|
||||||
movh m0, [srcq+srcstrideq*0]
|
movh m0, [srcq+srcstrideq*0]
|
||||||
movh m1, [srcq+srcstrideq*1]
|
movh m1, [srcq+srcstrideq*1]
|
||||||
movh m2, [srcq+srcstrideq*2]
|
movh m2, [srcq+srcstrideq*2]
|
||||||
@ -820,7 +820,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride
|
|||||||
pxor m4, m4
|
pxor m4, m4
|
||||||
mova m2, [filter_h2_shuf]
|
mova m2, [filter_h2_shuf]
|
||||||
mova m3, [bilinear_filter_vb+mxq-16]
|
mova m3, [bilinear_filter_vb+mxq-16]
|
||||||
.nextrow
|
.nextrow:
|
||||||
movu m0, [srcq+srcstrideq*0]
|
movu m0, [srcq+srcstrideq*0]
|
||||||
movu m1, [srcq+srcstrideq*1]
|
movu m1, [srcq+srcstrideq*1]
|
||||||
pshufb m0, m2
|
pshufb m0, m2
|
||||||
@ -1488,7 +1488,7 @@ cglobal vp8_%1_loop_filter_simple, 3, %2, 8, dst, stride, flim, cntr
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
%if mmsize == 8 ; mmx / mmxext
|
%if mmsize == 8 ; mmx / mmxext
|
||||||
.next8px
|
.next8px:
|
||||||
%endif
|
%endif
|
||||||
%ifidn %1, v
|
%ifidn %1, v
|
||||||
; read 4 half/full rows of pixels
|
; read 4 half/full rows of pixels
|
||||||
|
@ -361,7 +361,7 @@ cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, s
|
|||||||
mov src0q, [src0q]
|
mov src0q, [src0q]
|
||||||
add src0q, lenq
|
add src0q, lenq
|
||||||
neg lenq
|
neg lenq
|
||||||
.loop
|
.loop:
|
||||||
; for x86-32 with 7-8 channels we do not have enough gp registers for all src
|
; for x86-32 with 7-8 channels we do not have enough gp registers for all src
|
||||||
; pointers, so we have to load some of them from the stack each time
|
; pointers, so we have to load some of them from the stack each time
|
||||||
%define copy_src_from_stack ARCH_X86_32 && in_channels >= 7 && %%i >= 5
|
%define copy_src_from_stack ARCH_X86_32 && in_channels >= 7 && %%i >= 5
|
||||||
|
@ -30,7 +30,7 @@ SECTION .text
|
|||||||
cglobal vector_fmul, 4,4,2, dst, src0, src1, len
|
cglobal vector_fmul, 4,4,2, dst, src0, src1, len
|
||||||
lea lenq, [lend*4 - 2*mmsize]
|
lea lenq, [lend*4 - 2*mmsize]
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
.loop
|
.loop:
|
||||||
mova m0, [src0q + lenq]
|
mova m0, [src0q + lenq]
|
||||||
mova m1, [src0q + lenq + mmsize]
|
mova m1, [src0q + lenq + mmsize]
|
||||||
mulps m0, m0, [src1q + lenq]
|
mulps m0, m0, [src1q + lenq]
|
||||||
@ -72,7 +72,7 @@ cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len
|
|||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
lea lenq, [lend*4-2*mmsize]
|
lea lenq, [lend*4-2*mmsize]
|
||||||
.loop
|
.loop:
|
||||||
mulps m1, m0, [srcq+lenq ]
|
mulps m1, m0, [srcq+lenq ]
|
||||||
mulps m2, m0, [srcq+lenq+mmsize]
|
mulps m2, m0, [srcq+lenq+mmsize]
|
||||||
addps m1, m1, [dstq+lenq ]
|
addps m1, m1, [dstq+lenq ]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user