avcodec/x86/h26x/h2656_inter: add dststride to put
Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
This commit is contained in:
@@ -22,8 +22,6 @@
|
|||||||
; */
|
; */
|
||||||
%include "libavutil/x86/x86util.asm"
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
|
||||||
%define MAX_PB_SIZE 64
|
|
||||||
|
|
||||||
SECTION_RODATA 32
|
SECTION_RODATA 32
|
||||||
cextern pw_255
|
cextern pw_255
|
||||||
cextern pw_512
|
cextern pw_512
|
||||||
@@ -342,7 +340,7 @@ SECTION .text
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro LOOP_END 3
|
%macro LOOP_END 3
|
||||||
add %1q, 2*MAX_PB_SIZE ; dst += dststride
|
add %1q, dststrideq ; dst += dststride
|
||||||
add %2q, %3q ; src += srcstride
|
add %2q, %3q ; src += srcstride
|
||||||
dec heightd ; cmp height
|
dec heightd ; cmp height
|
||||||
jnz .loop ; height loop
|
jnz .loop ; height loop
|
||||||
@@ -539,7 +537,7 @@ SECTION .text
|
|||||||
|
|
||||||
|
|
||||||
; ******************************
|
; ******************************
|
||||||
; void %1_put_pixels(int16_t *dst, const uint8_t *_src, ptrdiff_t srcstride,
|
; void %1_put_pixels(int16_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t srcstride,
|
||||||
; int height, const int8_t *hf, const int8_t *vf, int width)
|
; int height, const int8_t *hf, const int8_t *vf, int width)
|
||||||
; ******************************
|
; ******************************
|
||||||
|
|
||||||
@@ -549,7 +547,7 @@ SECTION .text
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro MC_PIXELS 3
|
%macro MC_PIXELS 3
|
||||||
cglobal %1_put_pixels%2_%3, 4, 4, 3, dst, src, srcstride, height
|
cglobal %1_put_pixels%2_%3, 5, 5, 3, dst, dststride, src, srcstride, height
|
||||||
pxor m2, m2
|
pxor m2, m2
|
||||||
.loop:
|
.loop:
|
||||||
SIMPLE_LOAD %2, %3, srcq, m0
|
SIMPLE_LOAD %2, %3, srcq, m0
|
||||||
@@ -579,10 +577,10 @@ cglobal %1_put_uni_pixels%2_%3, 5, 5, 2, dst, dststride, src, srcstride, height
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
; ******************************
|
; ******************************
|
||||||
; void %1_put_4tap_hX(int16_t *dst,
|
; void %1_put_4tap_hX(int16_t *dst, ptrdiff_t dststride,
|
||||||
; const uint8_t *_src, ptrdiff_t _srcstride, int height, int8_t *hf, int8_t *vf, int width);
|
; const uint8_t *_src, ptrdiff_t _srcstride, int height, int8_t *hf, int8_t *vf, int width);
|
||||||
; ******************************
|
; ******************************
|
||||||
cglobal %1_put_4tap_h%2_%3, 5, 5, XMM_REGS, dst, src, srcstride, height, hf
|
cglobal %1_put_4tap_h%2_%3, 6, 6, XMM_REGS, dst, dststride, src, srcstride, height, hf
|
||||||
%assign %%stride ((%3 + 7)/8)
|
%assign %%stride ((%3 + 7)/8)
|
||||||
MC_4TAP_FILTER %3, hf, m4, m5
|
MC_4TAP_FILTER %3, hf, m4, m5
|
||||||
.loop:
|
.loop:
|
||||||
@@ -612,10 +610,10 @@ cglobal %1_put_uni_4tap_h%2_%3, 6, 7, XMM_REGS, dst, dststride, src, srcstride,
|
|||||||
RET
|
RET
|
||||||
|
|
||||||
; ******************************
|
; ******************************
|
||||||
; void %1_put_4tap_v(int16_t *dst,
|
; void %1_put_4tap_v(int16_t *dst, ptrdiff_t dststride,
|
||||||
; const uint8_t *_src, ptrdiff_t _srcstride, int height, int8_t *hf, int8_t *vf, int width)
|
; const uint8_t *_src, ptrdiff_t _srcstride, int height, int8_t *hf, int8_t *vf, int width)
|
||||||
; ******************************
|
; ******************************
|
||||||
cglobal %1_put_4tap_v%2_%3, 6, 6, XMM_REGS, dst, src, srcstride, height, r3src, vf
|
cglobal %1_put_4tap_v%2_%3, 7, 7, XMM_REGS, dst, dststride, src, srcstride, height, r3src, vf
|
||||||
sub srcq, srcstrideq
|
sub srcq, srcstrideq
|
||||||
MC_4TAP_FILTER %3, vf, m4, m5
|
MC_4TAP_FILTER %3, vf, m4, m5
|
||||||
lea r3srcq, [srcstrideq*3]
|
lea r3srcq, [srcstrideq*3]
|
||||||
@@ -649,10 +647,10 @@ cglobal %1_put_uni_4tap_v%2_%3, 7, 7, XMM_REGS, dst, dststride, src, srcstride,
|
|||||||
|
|
||||||
%macro PUT_4TAP_HV 3
|
%macro PUT_4TAP_HV 3
|
||||||
; ******************************
|
; ******************************
|
||||||
; void put_4tap_hv(int16_t *dst,
|
; void put_4tap_hv(int16_t *dst, ptrdiff_t dststride,
|
||||||
; const uint8_t *_src, ptrdiff_t _srcstride, int height, int8_t *hf, int8_t *vf, int width)
|
; const uint8_t *_src, ptrdiff_t _srcstride, int height, int8_t *hf, int8_t *vf, int width)
|
||||||
; ******************************
|
; ******************************
|
||||||
cglobal %1_put_4tap_hv%2_%3, 6, 7, 16 , dst, src, srcstride, height, hf, vf, r3src
|
cglobal %1_put_4tap_hv%2_%3, 7, 8, 16 , dst, dststride, src, srcstride, height, hf, vf, r3src
|
||||||
%assign %%stride ((%3 + 7)/8)
|
%assign %%stride ((%3 + 7)/8)
|
||||||
sub srcq, srcstrideq
|
sub srcq, srcstrideq
|
||||||
MC_4TAP_HV_FILTER %3
|
MC_4TAP_HV_FILTER %3
|
||||||
@@ -784,12 +782,12 @@ cglobal %1_put_uni_4tap_hv%2_%3, 7, 8, 16 , dst, dststride, src, srcstride, heig
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
; ******************************
|
; ******************************
|
||||||
; void put_8tap_hX_X_X(int16_t *dst, const uint8_t *_src, ptrdiff_t srcstride,
|
; void put_8tap_hX_X_X(int16_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t srcstride,
|
||||||
; int height, const int8_t *hf, const int8_t *vf, int width)
|
; int height, const int8_t *hf, const int8_t *vf, int width)
|
||||||
; ******************************
|
; ******************************
|
||||||
|
|
||||||
%macro PUT_8TAP 3
|
%macro PUT_8TAP 3
|
||||||
cglobal %1_put_8tap_h%2_%3, 5, 5, 16, dst, src, srcstride, height, hf
|
cglobal %1_put_8tap_h%2_%3, 6, 6, 16, dst, dststride, src, srcstride, height, hf
|
||||||
MC_8TAP_FILTER %3, hf
|
MC_8TAP_FILTER %3, hf
|
||||||
.loop:
|
.loop:
|
||||||
MC_8TAP_H_LOAD %3, srcq, %2, 10
|
MC_8TAP_H_LOAD %3, srcq, %2, 10
|
||||||
@@ -824,10 +822,10 @@ cglobal %1_put_uni_8tap_h%2_%3, 6, 7, 16 , dst, dststride, src, srcstride, heigh
|
|||||||
|
|
||||||
|
|
||||||
; ******************************
|
; ******************************
|
||||||
; void put_8tap_vX_X_X(int16_t *dst, const uint8_t *_src, ptrdiff_t srcstride,
|
; void put_8tap_vX_X_X(int16_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t srcstride,
|
||||||
; int height, const int8_t *hf, const int8_t *vf, int width)
|
; int height, const int8_t *hf, const int8_t *vf, int width)
|
||||||
; ******************************
|
; ******************************
|
||||||
cglobal %1_put_8tap_v%2_%3, 6, 8, 16, dst, src, srcstride, height, r3src, vf
|
cglobal %1_put_8tap_v%2_%3, 7, 8, 16, dst, dststride, src, srcstride, height, r3src, vf
|
||||||
MC_8TAP_FILTER %3, vf
|
MC_8TAP_FILTER %3, vf
|
||||||
lea r3srcq, [srcstrideq*3]
|
lea r3srcq, [srcstrideq*3]
|
||||||
.loop:
|
.loop:
|
||||||
@@ -866,11 +864,11 @@ cglobal %1_put_uni_8tap_v%2_%3, 7, 9, 16, dst, dststride, src, srcstride, height
|
|||||||
|
|
||||||
|
|
||||||
; ******************************
|
; ******************************
|
||||||
; void put_8tap_hvX_X(int16_t *dst, const uint8_t *_src, ptrdiff_t srcstride,
|
; void put_8tap_hvX_X(int16_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t srcstride,
|
||||||
; int height, const int8_t *hf, const int8_t *vf, int width)
|
; int height, const int8_t *hf, const int8_t *vf, int width)
|
||||||
; ******************************
|
; ******************************
|
||||||
%macro PUT_8TAP_HV 3
|
%macro PUT_8TAP_HV 3
|
||||||
cglobal %1_put_8tap_hv%2_%3, 6, 7, 16, 0 - mmsize*16, dst, src, srcstride, height, hf, vf, r3src
|
cglobal %1_put_8tap_hv%2_%3, 7, 8, 16, 0 - mmsize*16, dst, dststride, src, srcstride, height, hf, vf, r3src
|
||||||
MC_8TAP_FILTER %3, hf, 0
|
MC_8TAP_FILTER %3, hf, 0
|
||||||
lea hfq, [rsp]
|
lea hfq, [rsp]
|
||||||
MC_8TAP_FILTER %3, vf, 8*mmsize
|
MC_8TAP_FILTER %3, vf, 8*mmsize
|
||||||
|
@@ -24,7 +24,7 @@
|
|||||||
#include "h2656dsp.h"
|
#include "h2656dsp.h"
|
||||||
|
|
||||||
#define mc_rep_func(name, bitd, step, W, opt) \
|
#define mc_rep_func(name, bitd, step, W, opt) \
|
||||||
void ff_h2656_put_##name##W##_##bitd##_##opt(int16_t *_dst, \
|
void ff_h2656_put_##name##W##_##bitd##_##opt(int16_t *_dst, ptrdiff_t dststride, \
|
||||||
const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width) \
|
const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width) \
|
||||||
{ \
|
{ \
|
||||||
int i; \
|
int i; \
|
||||||
@@ -32,7 +32,7 @@ void ff_h2656_put_##name##W##_##bitd##_##opt(int16_t *_dst,
|
|||||||
for (i = 0; i < W; i += step) { \
|
for (i = 0; i < W; i += step) { \
|
||||||
const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
|
const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \
|
||||||
dst = _dst + i; \
|
dst = _dst + i; \
|
||||||
ff_h2656_put_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, hf, vf, width); \
|
ff_h2656_put_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, height, hf, vf, width); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -30,7 +30,7 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#define H2656_PEL_PROTOTYPE(name, D, opt) \
|
#define H2656_PEL_PROTOTYPE(name, D, opt) \
|
||||||
void ff_h2656_put_ ## name ## _ ## D ## _##opt(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width); \
|
void ff_h2656_put_ ## name ## _ ## D ## _##opt(int16_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width); \
|
||||||
void ff_h2656_put_uni_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width); \
|
void ff_h2656_put_uni_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width); \
|
||||||
|
|
||||||
#define H2656_MC_8TAP_PROTOTYPES(fname, bitd, opt) \
|
#define H2656_MC_8TAP_PROTOTYPES(fname, bitd, opt) \
|
||||||
|
@@ -96,7 +96,7 @@ void ff_hevc_put_hevc_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t
|
|||||||
int height, intptr_t mx, intptr_t my,int width) \
|
int height, intptr_t mx, intptr_t my,int width) \
|
||||||
{ \
|
{ \
|
||||||
DECL_HV_FILTER(p) \
|
DECL_HV_FILTER(p) \
|
||||||
ff_h2656_put_ ## b ## _ ## depth ## _##opt(dst, src, srcstride, height, hf, vf, width); \
|
ff_h2656_put_ ## b ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define FW_PUT_UNI(p, a, b, depth, opt) \
|
#define FW_PUT_UNI(p, a, b, depth, opt) \
|
||||||
|
Reference in New Issue
Block a user