From a83f5b8a9670e87fa41ab1db6922773a2978d42f Mon Sep 17 00:00:00 2001 From: Mike Melanson Date: Sat, 7 Jan 2012 19:59:06 -0800 Subject: [PATCH 01/19] FATE test: cvid-grayscale; ensures that the grayscale Cinepak variant is exercised. Signed-off-by: Ronald S. Bultje --- tests/fate/video.mak | 3 + tests/ref/fate/cvid-grayscale | 152 ++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 tests/ref/fate/cvid-grayscale diff --git a/tests/fate/video.mak b/tests/fate/video.mak index 01dd306d1a..6d23134f8a 100644 --- a/tests/fate/video.mak +++ b/tests/fate/video.mak @@ -49,6 +49,9 @@ fate-cvid: CMD = framecrc -i $(SAMPLES)/cvid/laracroft-cinepak-partial.avi -an FATE_TESTS += fate-cvid-palette fate-cvid-palette: CMD = framecrc -i $(SAMPLES)/cvid/catfight-cvid-pal8-partial.mov -pix_fmt rgb24 -an +FATE_TESTS += fate-cvid-grayscale +fate-cvid-grayscale: CMD = framecrc -i $(SAMPLES)/cvid/pcitva15.avi -an + FATE_TESTS += fate-cyberia-c93 fate-cyberia-c93: CMD = framecrc -i $(SAMPLES)/cyberia-c93/intro1.c93 -t 3 -pix_fmt rgb24 diff --git a/tests/ref/fate/cvid-grayscale b/tests/ref/fate/cvid-grayscale new file mode 100644 index 0000000000..4e3957156f --- /dev/null +++ b/tests/ref/fate/cvid-grayscale @@ -0,0 +1,152 @@ +0, 0, 11300, 0x46c78923 +0, 17921, 11300, 0x3f2a1175 +0, 35842, 11300, 0x722de221 +0, 53763, 11300, 0x01746b88 +0, 71684, 11300, 0x549587a7 +0, 89605, 11300, 0x843ab943 +0, 107526, 11300, 0x62fdee48 +0, 125447, 11300, 0x74a62867 +0, 143368, 11300, 0x35a20e2f +0, 161289, 11300, 0x4e9ef54d +0, 179210, 11300, 0xec7201f5 +0, 197131, 11300, 0x363bfe27 +0, 215052, 11300, 0x2aaab418 +0, 232973, 11300, 0x6a48ab3f +0, 250894, 11300, 0x3fecea34 +0, 268815, 11300, 0xa371f55e +0, 286736, 11300, 0xa86b147c +0, 304657, 11300, 0x49e9206e +0, 322578, 11300, 0x6c9a2155 +0, 340499, 11300, 0x2c8a4798 +0, 358420, 11300, 0x3485676c +0, 376341, 11300, 0xb0b293f2 +0, 394262, 11300, 0xe4a9b068 +0, 412183, 11300, 0xd68d0556 +0, 430104, 11300, 0xc28e5193 +0, 448025, 11300, 0xf6948483 +0, 465945, 11300, 0xf21fbf57 +0, 483866, 11300, 0x8345eb44 +0, 501787, 11300, 0x8124f045 +0, 519708, 11300, 0x18e31f10 +0, 537629, 11300, 0xdb1943fc +0, 555550, 11300, 0x8701699f +0, 573471, 11300, 0xd7b18550 +0, 591392, 11300, 0xa56faccc +0, 609313, 11300, 0xf8bcc17c +0, 627234, 11300, 0x446acab9 +0, 645155, 11300, 0x755fd295 +0, 663076, 11300, 0x92e3d100 +0, 680997, 11300, 0x54895bb3 +0, 698918, 11300, 0xd18bffda +0, 716839, 11300, 0x480dbe4f +0, 734760, 11300, 0x49ea9dbe +0, 752681, 11300, 0x00d3a003 +0, 770602, 11300, 0xda7bbfb2 +0, 788523, 11300, 0x9700d9c2 +0, 806444, 11300, 0xa0a9e490 +0, 824365, 11300, 0x00eb0979 +0, 842286, 11300, 0x32b04630 +0, 860207, 11300, 0xdfb73e51 +0, 878128, 11300, 0x3d8e4f96 +0, 896049, 11300, 0x2ca83271 +0, 913970, 11300, 0xb5b123c0 +0, 931891, 11300, 0x8a570e58 +0, 949812, 11300, 0xc6c805bc +0, 967733, 11300, 0x27caf7a5 +0, 985654, 11300, 0x5319ecb0 +0, 1003575, 11300, 0x5471e3fd +0, 1021496, 11300, 0x6d68a6f4 +0, 1039417, 11300, 0x872b7194 +0, 1057338, 11300, 0x007c36bd +0, 1075259, 11300, 0x2714f1b5 +0, 1093180, 11300, 0x6c8eb50f +0, 1111101, 11300, 0xf5d57be8 +0, 1129022, 11300, 0x981f412b +0, 1146943, 11300, 0x1a9804a1 +0, 1164864, 11300, 0xf0c1d24a +0, 1182785, 11300, 0xa70a9d9b +0, 1200706, 11300, 0x8c466876 +0, 1218627, 11300, 0xcf2e32df +0, 1236548, 11300, 0xcb8cfebf +0, 1254469, 11300, 0xb961ca99 +0, 1272390, 11300, 0x666d9619 +0, 1290311, 11300, 0x84bf5b55 +0, 1308232, 11300, 0xbfa22ccc +0, 1326153, 11300, 0xcde41849 +0, 1344074, 11300, 0x71372dcd +0, 1361994, 11300, 0x13402cfd +0, 1379915, 11300, 0xdebdd321 +0, 1397836, 11300, 0xdda66de1 +0, 1415757, 11300, 0x7f4bb682 +0, 1433678, 11300, 0xf67fd528 +0, 1451599, 11300, 0xe739ff8c +0, 1469520, 11300, 0x2e131774 +0, 1487441, 11300, 0xfa942811 +0, 1505362, 11300, 0x0cd93ac2 +0, 1523283, 11300, 0xd0445e0e +0, 1541204, 11300, 0x3f3497c7 +0, 1559125, 11300, 0x11b5bd2c +0, 1577046, 11300, 0xccd5e62a +0, 1594967, 11300, 0xa9d4fcb5 +0, 1612888, 11300, 0x34aa1a03 +0, 1630809, 11300, 0x1ce6299e +0, 1648730, 11300, 0x661c2745 +0, 1666651, 11300, 0x27d8a8b3 +0, 1684572, 11300, 0x9eb07467 +0, 1702493, 11300, 0x128374d2 +0, 1720414, 11300, 0x05c36ff5 +0, 1738335, 11300, 0x8a136bde +0, 1756256, 11300, 0x15c47c99 +0, 1774177, 11300, 0xcc4a93f4 +0, 1792098, 11300, 0x19529b2b +0, 1810019, 11300, 0x9943c076 +0, 1827940, 11300, 0xf898e583 +0, 1845861, 11300, 0x40f71f94 +0, 1863782, 11300, 0x5b604afb +0, 1881703, 11300, 0x8c176af4 +0, 1899624, 11300, 0x0f1a6216 +0, 1917545, 11300, 0x38bbd13d +0, 1935466, 11300, 0x90c8d1fc +0, 1953387, 11300, 0x253000d7 +0, 1971308, 11300, 0xb94b03b1 +0, 1989229, 11300, 0xbc872268 +0, 2007150, 11300, 0xe77adb8c +0, 2025071, 11300, 0xa38936b7 +0, 2042992, 11300, 0xd6153632 +0, 2060913, 11300, 0x1ae633cc +0, 2078834, 11300, 0xb90c286e +0, 2096755, 11300, 0xbc7e333d +0, 2114676, 11300, 0x1b5421f8 +0, 2132597, 11300, 0xdde6506d +0, 2150518, 11300, 0xd3eb757e +0, 2168439, 11300, 0x5ad1929c +0, 2186360, 11300, 0x4f6aa47d +0, 2204281, 11300, 0xab3caf55 +0, 2222202, 11300, 0x5ff9b39a +0, 2240123, 11300, 0x1454e12e +0, 2258043, 11300, 0xf18216e8 +0, 2275964, 11300, 0x62144880 +0, 2293885, 11300, 0x54284241 +0, 2311806, 11300, 0x8e8c7228 +0, 2329727, 11300, 0xb498d06e +0, 2347648, 11300, 0x7b1e6be1 +0, 2365569, 11300, 0x5e5ea1f4 +0, 2383490, 11300, 0x41eda28e +0, 2401411, 11300, 0x7ba6aa92 +0, 2419332, 11300, 0xa8a8b1c7 +0, 2437253, 11300, 0x0d30bd08 +0, 2455174, 11300, 0xc610bf16 +0, 2473095, 11300, 0xed57c075 +0, 2491016, 11300, 0xb86dbfea +0, 2508937, 11300, 0x0970c03d +0, 2526858, 11300, 0x743ac2ac +0, 2544779, 11300, 0x0a44c816 +0, 2562700, 11300, 0xe32acd6b +0, 2580621, 11300, 0x209bcdab +0, 2598542, 11300, 0x3cd0d105 +0, 2616463, 11300, 0xc0bcd330 +0, 2634384, 11300, 0x4785d6dc +0, 2652305, 11300, 0xe85f9c90 +0, 2670226, 11300, 0xd4a72850 +0, 2688147, 11300, 0x04766e41 +0, 2706068, 11300, 0x04766e41 From 83890c66c4db13153f771332f04afeffb1fb6a54 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Fri, 30 Dec 2011 18:37:16 +0100 Subject: [PATCH 02/19] swscale: K&R reformatting cosmetics for header files --- libswscale/ppc/yuv2rgb_altivec.h | 19 ++- libswscale/rgb2rgb.h | 30 ++-- libswscale/swscale.h | 5 +- libswscale/swscale_internal.h | 247 ++++++++++++++++--------------- 4 files changed, 157 insertions(+), 144 deletions(-) diff --git a/libswscale/ppc/yuv2rgb_altivec.h b/libswscale/ppc/yuv2rgb_altivec.h index ab5fcde513..626d2b0886 100644 --- a/libswscale/ppc/yuv2rgb_altivec.h +++ b/libswscale/ppc/yuv2rgb_altivec.h @@ -24,13 +24,18 @@ #ifndef SWSCALE_PPC_YUV2RGB_ALTIVEC_H #define SWSCALE_PPC_YUV2RGB_ALTIVEC_H -#define YUV2PACKEDX_HEADER(suffix) \ -void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \ - const int16_t **lumSrc, int lumFilterSize, \ - const int16_t *chrFilter, const int16_t **chrUSrc, \ - const int16_t **chrVSrc, int chrFilterSize, \ - const int16_t **alpSrc, uint8_t *dest, \ - int dstW, int dstY); +#define YUV2PACKEDX_HEADER(suffix) \ + void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, \ + const int16_t *lumFilter, \ + const int16_t **lumSrc, \ + int lumFilterSize, \ + const int16_t *chrFilter, \ + const int16_t **chrUSrc, \ + const int16_t **chrVSrc, \ + int chrFilterSize, \ + const int16_t **alpSrc, \ + uint8_t *dest, \ + int dstW, int dstY); YUV2PACKEDX_HEADER(abgr); YUV2PACKEDX_HEADER(bgra); diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h index 9d051de4f6..833a984cec 100644 --- a/libswscale/rgb2rgb.h +++ b/libswscale/rgb2rgb.h @@ -36,32 +36,33 @@ extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb32to16) (const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb32to15) (const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb15to16) (const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb15to32) (const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb16to15) (const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb16to32) (const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb24to16) (const uint8_t *src, uint8_t *dst, int src_size); -extern void (*rgb24to15) (const uint8_t *src, uint8_t *dst, int src_size); -extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size); +extern void (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); -void rgb24to32 (const uint8_t *src, uint8_t *dst, int src_size); -void rgb32to24 (const uint8_t *src, uint8_t *dst, int src_size); +extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size); + +void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size); +void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size); void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size); -void rgb16to24 (const uint8_t *src, uint8_t *dst, int src_size); +void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size); void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size); void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size); void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size); -void rgb15to24 (const uint8_t *src, uint8_t *dst, int src_size); +void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size); void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size); void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size); -void bgr8torgb8 (const uint8_t *src, uint8_t *dst, int src_size); +void bgr8torgb8(const uint8_t *src, uint8_t *dst, int src_size); void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, int src_size); void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size); @@ -138,7 +139,6 @@ extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint int srcStride1, int srcStride2, int srcStride3, int dstStride); - extern void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride); diff --git a/libswscale/swscale.h b/libswscale/swscale.h index 5cd55a70ba..b5a6a57681 100644 --- a/libswscale/swscale.h +++ b/libswscale/swscale.h @@ -132,7 +132,6 @@ const char *swscale_license(void); */ const int *sws_getCoefficients(int colorspace); - // when used for filters they must have an odd number of elements // coeffs cannot be shared between vectors typedef struct { @@ -232,9 +231,9 @@ struct SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat * the destination image * @return the height of the output slice */ -int sws_scale(struct SwsContext *c, const uint8_t* const srcSlice[], +int sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], const int srcStride[], int srcSliceY, int srcSliceH, - uint8_t* const dst[], const int dstStride[]); + uint8_t *const dst[], const int dstStride[]); /** * @param inv_table the yuv2rgb coefficients, normally ff_yuv2rgb_coeffs[x] diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index bb3b52d8c9..3436b92788 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -32,9 +32,9 @@ #include "libavutil/pixfmt.h" #include "libavutil/pixdesc.h" -#define STR(s) AV_TOSTRING(s) //AV_STRINGIFY is too long +#define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long -#define FAST_BGR2YV12 //use 7-bit instead of 15-bit coefficients +#define FAST_BGR2YV12 // use 7-bit instead of 15-bit coefficients #define MAX_FILTER_SIZE 256 @@ -45,21 +45,20 @@ #endif #if ARCH_X86_64 -# define APCK_PTR2 8 +# define APCK_PTR2 8 # define APCK_COEF 16 # define APCK_SIZE 24 #else -# define APCK_PTR2 4 -# define APCK_COEF 8 +# define APCK_PTR2 4 +# define APCK_COEF 8 # define APCK_SIZE 16 #endif struct SwsContext; -typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[], +typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, - uint8_t* dst[], int dstStride[]); - + uint8_t *dst[], int dstStride[]); /** * Write one line of horizontally scaled data to planar output @@ -73,8 +72,8 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[], * @param dither ordered dither array of type int16_t and size 8 * @param offset Dither offset */ -typedef void (*yuv2planar1_fn) (const int16_t *src, uint8_t *dest, int dstW, - const uint8_t *dither, int offset); +typedef void (*yuv2planar1_fn)(const int16_t *src, uint8_t *dest, int dstW, + const uint8_t *dither, int offset); /** * Write one line of horizontally scaled data to planar output @@ -89,9 +88,9 @@ typedef void (*yuv2planar1_fn) (const int16_t *src, uint8_t *dest, int dstW, * @param dstW width of destination pixels * @param offset Dither offset */ -typedef void (*yuv2planarX_fn) (const int16_t *filter, int filterSize, - const int16_t **src, uint8_t *dest, int dstW, - const uint8_t *dither, int offset); +typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize, + const int16_t **src, uint8_t *dest, int dstW, + const uint8_t *dither, int offset); /** * Write one line of horizontally scaled chroma to interleaved output @@ -108,9 +107,12 @@ typedef void (*yuv2planarX_fn) (const int16_t *filter, int filterSize, * output, this is in uint16_t * @param dstW width of chroma planes */ -typedef void (*yuv2interleavedX_fn) (struct SwsContext *c, const int16_t *chrFilter, int chrFilterSize, - const int16_t **chrUSrc, const int16_t **chrVSrc, - uint8_t *dest, int dstW); +typedef void (*yuv2interleavedX_fn)(struct SwsContext *c, + const int16_t *chrFilter, + int chrFilterSize, + const int16_t **chrUSrc, + const int16_t **chrVSrc, + uint8_t *dest, int dstW); /** * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB @@ -141,10 +143,11 @@ typedef void (*yuv2interleavedX_fn) (struct SwsContext *c, const int16_t *chrFil * but can be used to generate comfort noise using dithering * for some output formats. */ -typedef void (*yuv2packed1_fn) (struct SwsContext *c, const int16_t *lumSrc, - const int16_t *chrUSrc[2], const int16_t *chrVSrc[2], - const int16_t *alpSrc, uint8_t *dest, - int dstW, int uvalpha, int y); +typedef void (*yuv2packed1_fn)(struct SwsContext *c, const int16_t *lumSrc, + const int16_t *chrUSrc[2], + const int16_t *chrVSrc[2], + const int16_t *alpSrc, uint8_t *dest, + int dstW, int uvalpha, int y); /** * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB * output by doing bilinear scaling between two input lines. @@ -173,10 +176,12 @@ typedef void (*yuv2packed1_fn) (struct SwsContext *c, const int16_t *lumSrc, * but can be used to generate comfort noise using dithering * for some output formats. */ -typedef void (*yuv2packed2_fn) (struct SwsContext *c, const int16_t *lumSrc[2], - const int16_t *chrUSrc[2], const int16_t *chrVSrc[2], - const int16_t *alpSrc[2], uint8_t *dest, - int dstW, int yalpha, int uvalpha, int y); +typedef void (*yuv2packed2_fn)(struct SwsContext *c, const int16_t *lumSrc[2], + const int16_t *chrUSrc[2], + const int16_t *chrVSrc[2], + const int16_t *alpSrc[2], + uint8_t *dest, + int dstW, int yalpha, int uvalpha, int y); /** * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB * output by doing multi-point vertical scaling between input pixels. @@ -203,12 +208,13 @@ typedef void (*yuv2packed2_fn) (struct SwsContext *c, const int16_t *lumSrc[2], * but can be used to generate comfort noise using dithering * or some output formats. */ -typedef void (*yuv2packedX_fn) (struct SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, - int dstW, int y); +typedef void (*yuv2packedX_fn)(struct SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, + const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, + int dstW, int y); /* This struct should be aligned on at least a 32-byte boundary. */ typedef struct SwsContext { @@ -261,12 +267,12 @@ typedef struct SwsContext { int16_t **chrUPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler. int16_t **chrVPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler. int16_t **alpPixBuf; ///< Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler. - int vLumBufSize; ///< Number of vertical luma/alpha lines allocated in the ring buffer. - int vChrBufSize; ///< Number of vertical chroma lines allocated in the ring buffer. - int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer. - int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer. - int lumBufIndex; ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source. - int chrBufIndex; ///< Index in ring buffer of the last scaled horizontal chroma line from source. + int vLumBufSize; ///< Number of vertical luma/alpha lines allocated in the ring buffer. + int vChrBufSize; ///< Number of vertical chroma lines allocated in the ring buffer. + int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer. + int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer. + int lumBufIndex; ///< Index in ring buffer of the last scaled horizontal luma/alpha line from source. + int chrBufIndex; ///< Index in ring buffer of the last scaled horizontal chroma line from source. //@} uint8_t *formatConvBuffer; @@ -293,10 +299,10 @@ typedef struct SwsContext { int16_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes. int16_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes. int16_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes. - int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels. - int hChrFilterSize; ///< Horizontal filter size for chroma pixels. - int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels. - int vChrFilterSize; ///< Vertical filter size for chroma pixels. + int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels. + int hChrFilterSize; ///< Horizontal filter size for chroma pixels. + int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels. + int vChrFilterSize; ///< Vertical filter size for chroma pixels. //@} int lumMmx2FilterCodeSize; ///< Runtime-generated MMX2 horizontal fast bilinear scaler code size for luma/alpha planes. @@ -308,11 +314,11 @@ typedef struct SwsContext { int dstY; ///< Last destination vertical line output from last slice. int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc... - void * yuvTable; // pointer to the yuv->rgb table start so it can be freed() - uint8_t * table_rV[256]; - uint8_t * table_gU[256]; - int table_gV[256]; - uint8_t * table_bU[256]; + void *yuvTable; // pointer to the yuv->rgb table start so it can be freed() + uint8_t *table_rV[256]; + uint8_t *table_gU[256]; + int table_gV[256]; + uint8_t *table_bU[256]; //Colorspace stuff int contrast, brightness, saturation; // for sws_getColorspaceDetails @@ -364,15 +370,15 @@ typedef struct SwsContext { DECLARE_ALIGNED(8, uint64_t, yOffset); DECLARE_ALIGNED(8, uint64_t, uOffset); DECLARE_ALIGNED(8, uint64_t, vOffset); - int32_t lumMmxFilter[4*MAX_FILTER_SIZE]; - int32_t chrMmxFilter[4*MAX_FILTER_SIZE]; + int32_t lumMmxFilter[4 * MAX_FILTER_SIZE]; + int32_t chrMmxFilter[4 * MAX_FILTER_SIZE]; int dstW; ///< Width of destination luma/alpha planes. DECLARE_ALIGNED(8, uint64_t, esp); DECLARE_ALIGNED(8, uint64_t, vRounder); DECLARE_ALIGNED(8, uint64_t, u_temp); DECLARE_ALIGNED(8, uint64_t, v_temp); DECLARE_ALIGNED(8, uint64_t, y_temp); - int32_t alpMmxFilter[4*MAX_FILTER_SIZE]; + int32_t alpMmxFilter[4 * MAX_FILTER_SIZE]; // alignment of these values is not necessary, but merely here // to maintain the same offset across x8632 and x86-64. Once we // use proper offset macros in the asm, they can be removed. @@ -391,7 +397,7 @@ typedef struct SwsContext { vector signed short CGV; vector signed short OY; vector unsigned short CSHIFT; - vector signed short *vYCoeffsBank, *vCCoeffsBank; + vector signed short *vYCoeffsBank, *vCCoeffsBank; #endif #if ARCH_BFIN @@ -420,21 +426,25 @@ typedef struct SwsContext { yuv2packed2_fn yuv2packed2; yuv2packedX_fn yuv2packedX; + /// Unscaled conversion of luma plane to YV12 for horizontal scaler. void (*lumToYV12)(uint8_t *dst, const uint8_t *src, - int width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler. + int width, uint32_t *pal); + /// Unscaled conversion of alpha plane to YV12 for horizontal scaler. void (*alpToYV12)(uint8_t *dst, const uint8_t *src, - int width, uint32_t *pal); ///< Unscaled conversion of alpha plane to YV12 for horizontal scaler. + int width, uint32_t *pal); + /// Unscaled conversion of chroma planes to YV12 for horizontal scaler. void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler. + int width, uint32_t *pal); /** - * Functions to read planar input, such as planar RGB, and convert - * internally to Y/UV. - */ + * Functions to read planar input, such as planar RGB, and convert + * internally to Y/UV. + */ /** @{ */ void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width); - void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width); + void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], + int width); /** @} */ /** @@ -496,19 +506,20 @@ typedef struct SwsContext { * to simplify creating SIMD code. */ /** @{ */ - void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, - const int16_t *filter, const int16_t *filterPos, - int filterSize); - void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, - const int16_t *filter, const int16_t *filterPos, - int filterSize); + void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW, + const uint8_t *src, const int16_t *filter, + const int16_t *filterPos, int filterSize); + void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW, + const uint8_t *src, const int16_t *filter, + const int16_t *filterPos, int filterSize); /** @} */ - void (*lumConvertRange)(int16_t *dst, int width); ///< Color range conversion function for luma plane if needed. - void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed. + /// Color range conversion function for luma plane if needed. + void (*lumConvertRange)(int16_t *dst, int width); + /// Color range conversion function for chroma planes if needed. + void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); int needs_hcscale; ///< Set if there are chroma planes to be converted. - } SwsContext; //FIXME check init (where 0) @@ -557,66 +568,64 @@ const char *sws_format_name(enum PixelFormat format); (!(av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) && \ av_pix_fmt_descriptors[x].nb_components <= 2) #else -#define isGray(x) ( \ - (x)==PIX_FMT_GRAY8 \ - || (x)==PIX_FMT_Y400A \ - || (x)==PIX_FMT_GRAY16BE \ - || (x)==PIX_FMT_GRAY16LE \ - ) +#define isGray(x) \ + ((x) == PIX_FMT_GRAY8 || \ + (x) == PIX_FMT_Y400A || \ + (x) == PIX_FMT_GRAY16BE || \ + (x) == PIX_FMT_GRAY16LE) #endif -#define isRGBinInt(x) ( \ - (x)==PIX_FMT_RGB48BE \ - || (x)==PIX_FMT_RGB48LE \ - || (x)==PIX_FMT_RGB32 \ - || (x)==PIX_FMT_RGB32_1 \ - || (x)==PIX_FMT_RGB24 \ - || (x)==PIX_FMT_RGB565BE \ - || (x)==PIX_FMT_RGB565LE \ - || (x)==PIX_FMT_RGB555BE \ - || (x)==PIX_FMT_RGB555LE \ - || (x)==PIX_FMT_RGB444BE \ - || (x)==PIX_FMT_RGB444LE \ - || (x)==PIX_FMT_RGB8 \ - || (x)==PIX_FMT_RGB4 \ - || (x)==PIX_FMT_RGB4_BYTE \ - || (x)==PIX_FMT_MONOBLACK \ - || (x)==PIX_FMT_MONOWHITE \ - ) -#define isBGRinInt(x) ( \ - (x)==PIX_FMT_BGR48BE \ - || (x)==PIX_FMT_BGR48LE \ - || (x)==PIX_FMT_BGR32 \ - || (x)==PIX_FMT_BGR32_1 \ - || (x)==PIX_FMT_BGR24 \ - || (x)==PIX_FMT_BGR565BE \ - || (x)==PIX_FMT_BGR565LE \ - || (x)==PIX_FMT_BGR555BE \ - || (x)==PIX_FMT_BGR555LE \ - || (x)==PIX_FMT_BGR444BE \ - || (x)==PIX_FMT_BGR444LE \ - || (x)==PIX_FMT_BGR8 \ - || (x)==PIX_FMT_BGR4 \ - || (x)==PIX_FMT_BGR4_BYTE \ - || (x)==PIX_FMT_MONOBLACK \ - || (x)==PIX_FMT_MONOWHITE \ - ) -#define isAnyRGB(x) ( \ - isRGBinInt(x) \ - || isBGRinInt(x) \ - ) -#define isALPHA(x) \ - (av_pix_fmt_descriptors[x].nb_components == 2 || \ +#define isRGBinInt(x) \ + ((x) == PIX_FMT_RGB48BE || \ + (x) == PIX_FMT_RGB48LE || \ + (x) == PIX_FMT_RGB32 || \ + (x) == PIX_FMT_RGB32_1 || \ + (x) == PIX_FMT_RGB24 || \ + (x) == PIX_FMT_RGB565BE || \ + (x) == PIX_FMT_RGB565LE || \ + (x) == PIX_FMT_RGB555BE || \ + (x) == PIX_FMT_RGB555LE || \ + (x) == PIX_FMT_RGB444BE || \ + (x) == PIX_FMT_RGB444LE || \ + (x) == PIX_FMT_RGB8 || \ + (x) == PIX_FMT_RGB4 || \ + (x) == PIX_FMT_RGB4_BYTE || \ + (x) == PIX_FMT_MONOBLACK || \ + (x) == PIX_FMT_MONOWHITE) + +#define isBGRinInt(x) \ + ((x) == PIX_FMT_BGR48BE || \ + (x) == PIX_FMT_BGR48LE || \ + (x) == PIX_FMT_BGR32 || \ + (x) == PIX_FMT_BGR32_1 || \ + (x) == PIX_FMT_BGR24 || \ + (x) == PIX_FMT_BGR565BE || \ + (x) == PIX_FMT_BGR565LE || \ + (x) == PIX_FMT_BGR555BE || \ + (x) == PIX_FMT_BGR555LE || \ + (x) == PIX_FMT_BGR444BE || \ + (x) == PIX_FMT_BGR444LE || \ + (x) == PIX_FMT_BGR8 || \ + (x) == PIX_FMT_BGR4 || \ + (x) == PIX_FMT_BGR4_BYTE || \ + (x) == PIX_FMT_MONOBLACK || \ + (x) == PIX_FMT_MONOWHITE) + +#define isAnyRGB(x) \ + (isRGBinInt(x) || \ + isBGRinInt(x)) + +#define isALPHA(x) \ + (av_pix_fmt_descriptors[x].nb_components == 2 || \ av_pix_fmt_descriptors[x].nb_components == 4) -#define isPacked(x) (\ - (av_pix_fmt_descriptors[x].nb_components >= 2 && \ - !(av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR)) || \ - (x) == PIX_FMT_PAL8\ - ) +#define isPacked(x) \ + ((av_pix_fmt_descriptors[x].nb_components >= 2 && \ + !(av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR)) || \ + (x) == PIX_FMT_PAL8) #define isPlanar(x) \ - (av_pix_fmt_descriptors[x].nb_components >= 2 && \ + (av_pix_fmt_descriptors[x].nb_components >= 2 && \ (av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR)) #define usePal(x) ((av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) || (x) == PIX_FMT_Y400A) From 999484c9fbc38f81df300ed356cf0f6ce26c949d Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Tue, 3 Jan 2012 12:20:59 +0100 Subject: [PATCH 03/19] swscale: K&R formatting cosmetics for code examples --- libswscale/colorspace-test.c | 79 ++++++++------- libswscale/swscale-test.c | 184 +++++++++++++++++------------------ 2 files changed, 135 insertions(+), 128 deletions(-) diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c index ac3bb71f31..10cee8a955 100644 --- a/libswscale/colorspace-test.c +++ b/libswscale/colorspace-test.c @@ -27,19 +27,19 @@ #include "swscale.h" #include "rgb2rgb.h" -#define SIZE 1000 +#define SIZE 1000 #define srcByte 0x55 #define dstByte 0xBB -#define FUNC(s,d,n) {s,d,#n,n} +#define FUNC(s, d, n) { s, d, #n, n } int main(int argc, char **argv) { int i, funcNum; uint8_t *srcBuffer = av_malloc(SIZE); uint8_t *dstBuffer = av_malloc(SIZE); - int failedNum=0; - int passedNum=0; + int failedNum = 0; + int passedNum = 0; if (!srcBuffer || !dstBuffer) return -1; @@ -47,7 +47,7 @@ int main(int argc, char **argv) av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n"); sws_rgb2rgb_init(); - for(funcNum=0; ; funcNum++) { + for (funcNum = 0; ; funcNum++) { struct func_info_s { int src_bpp; int dst_bpp; @@ -85,67 +85,78 @@ int main(int argc, char **argv) FUNC(0, 0, NULL) }; int width; - int failed=0; - int srcBpp=0; - int dstBpp=0; + int failed = 0; + int srcBpp = 0; + int dstBpp = 0; - if (!func_info[funcNum].func) break; + if (!func_info[funcNum].func) + break; - av_log(NULL, AV_LOG_INFO,"."); + av_log(NULL, AV_LOG_INFO, "."); memset(srcBuffer, srcByte, SIZE); - for(width=63; width>0; width--) { + for (width = 63; width > 0; width--) { int dstOffset; - for(dstOffset=128; dstOffset<196; dstOffset+=4) { + for (dstOffset = 128; dstOffset < 196; dstOffset += 4) { int srcOffset; memset(dstBuffer, dstByte, SIZE); - for(srcOffset=128; srcOffset<196; srcOffset+=4) { - uint8_t *src= srcBuffer+srcOffset; - uint8_t *dst= dstBuffer+dstOffset; - const char *name=NULL; + for (srcOffset = 128; srcOffset < 196; srcOffset += 4) { + uint8_t *src = srcBuffer + srcOffset; + uint8_t *dst = dstBuffer + dstOffset; + const char *name = NULL; - if(failed) break; //don't fill the screen with shit ... + // don't fill the screen with shit ... + if (failed) + break; srcBpp = func_info[funcNum].src_bpp; dstBpp = func_info[funcNum].dst_bpp; name = func_info[funcNum].name; - func_info[funcNum].func(src, dst, width*srcBpp); + func_info[funcNum].func(src, dst, width * srcBpp); - if(!srcBpp) break; + if (!srcBpp) + break; - for(i=0; i %s\n", av_pix_fmt_descriptors[srcFormat].name, av_pix_fmt_descriptors[dstFormat].name); res = -1; - goto end; } @@ -167,9 +164,9 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, sws_scale(dstContext, src, srcStride, 0, srcH, dst, dstStride); - for (i = 0; i < 4 && dstStride[i]; i++) { - crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i], dstStride[i] * dstH); - } + for (i = 0; i < 4 && dstStride[i]; i++) + crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i], + dstStride[i] * dstH); if (r && crc == r->crc) { ssdY = r->ssdY; @@ -177,60 +174,59 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, ssdV = r->ssdV; ssdA = r->ssdA; } else { - for (i=0; i<4; i++) { + for (i = 0; i < 4; i++) { if (refStride[i]) - out[i]= av_mallocz(refStride[i]*h); + out[i] = av_mallocz(refStride[i] * h); if (refStride[i] && !out[i]) { perror("Malloc"); res = -1; - goto end; } } - outContext= sws_getContext(dstW, dstH, dstFormat, w, h, PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL); + outContext = sws_getContext(dstW, dstH, dstFormat, w, h, + PIX_FMT_YUVA420P, SWS_BILINEAR, + NULL, NULL, NULL); if (!outContext) { fprintf(stderr, "Failed to get %s ---> %s\n", av_pix_fmt_descriptors[dstFormat].name, av_pix_fmt_descriptors[PIX_FMT_YUVA420P].name); res = -1; - goto end; } sws_scale(outContext, dst, dstStride, 0, dstH, out, refStride); - ssdY= getSSD(ref[0], out[0], refStride[0], refStride[0], w, h); + ssdY = getSSD(ref[0], out[0], refStride[0], refStride[0], w, h); if (hasChroma(srcFormat) && hasChroma(dstFormat)) { //FIXME check that output is really gray - ssdU= getSSD(ref[1], out[1], refStride[1], refStride[1], (w+1)>>1, (h+1)>>1); - ssdV= getSSD(ref[2], out[2], refStride[2], refStride[2], (w+1)>>1, (h+1)>>1); + ssdU = getSSD(ref[1], out[1], refStride[1], refStride[1], + (w + 1) >> 1, (h + 1) >> 1); + ssdV = getSSD(ref[2], out[2], refStride[2], refStride[2], + (w + 1) >> 1, (h + 1) >> 1); } if (isALPHA(srcFormat) && isALPHA(dstFormat)) - ssdA= getSSD(ref[3], out[3], refStride[3], refStride[3], w, h); + ssdA = getSSD(ref[3], out[3], refStride[3], refStride[3], w, h); - ssdY/= w*h; - ssdU/= w*h/4; - ssdV/= w*h/4; - ssdA/= w*h; + ssdY /= w * h; + ssdU /= w * h / 4; + ssdV /= w * h / 4; + ssdA /= w * h; sws_freeContext(outContext); - for (i=0; i<4; i++) { + for (i = 0; i < 4; i++) if (refStride[i]) av_free(out[i]); - } } - printf(" CRC=%08x SSD=%5"PRId64",%5"PRId64",%5"PRId64",%5"PRId64"\n", + printf(" CRC=%08x SSD=%5"PRId64 ",%5"PRId64 ",%5"PRId64 ",%5"PRId64 "\n", crc, ssdY, ssdU, ssdV, ssdA); end: - sws_freeContext(dstContext); - for (i=0; i<4; i++) { + for (i = 0; i < 4; i++) if (dstStride[i]) av_free(dst[i]); - } return res; } @@ -239,18 +235,18 @@ static void selfTest(uint8_t *ref[4], int refStride[4], int w, int h, enum PixelFormat srcFormat_in, enum PixelFormat dstFormat_in) { - const int flags[] = { SWS_FAST_BILINEAR, - SWS_BILINEAR, SWS_BICUBIC, - SWS_X , SWS_POINT , SWS_AREA, 0 }; - const int srcW = w; - const int srcH = h; - const int dstW[] = { srcW - srcW/3, srcW, srcW + srcW/3, 0 }; - const int dstH[] = { srcH - srcH/3, srcH, srcH + srcH/3, 0 }; + const int flags[] = { SWS_FAST_BILINEAR, SWS_BILINEAR, SWS_BICUBIC, + SWS_X, SWS_POINT, SWS_AREA, 0 }; + const int srcW = w; + const int srcH = h; + const int dstW[] = { srcW - srcW / 3, srcW, srcW + srcW / 3, 0 }; + const int dstH[] = { srcH - srcH / 3, srcH, srcH + srcH / 3, 0 }; enum PixelFormat srcFormat, dstFormat; for (srcFormat = srcFormat_in != PIX_FMT_NONE ? srcFormat_in : 0; srcFormat < PIX_FMT_NB; srcFormat++) { - if (!sws_isSupportedInput(srcFormat) || !sws_isSupportedOutput(srcFormat)) + if (!sws_isSupportedInput(srcFormat) || + !sws_isSupportedOutput(srcFormat)) continue; for (dstFormat = dstFormat_in != PIX_FMT_NONE ? dstFormat_in : 0; @@ -258,7 +254,8 @@ static void selfTest(uint8_t *ref[4], int refStride[4], int w, int h, int i, j, k; int res = 0; - if (!sws_isSupportedInput(dstFormat) || !sws_isSupportedOutput(dstFormat)) + if (!sws_isSupportedInput(dstFormat) || + !sws_isSupportedOutput(dstFormat)) continue; printf("%s -> %s\n", @@ -266,14 +263,13 @@ static void selfTest(uint8_t *ref[4], int refStride[4], int w, int h, av_pix_fmt_descriptors[dstFormat].name); fflush(stdout); - for (k = 0; flags[k] && !res; k++) { + for (k = 0; flags[k] && !res; k++) for (i = 0; dstW[i] && !res; i++) for (j = 0; dstH[j] && !res; j++) res = doTest(ref, refStride, w, h, srcFormat, dstFormat, srcW, srcH, dstW[i], dstH[j], flags[k], NULL); - } if (dstFormat_in != PIX_FMT_NONE) break; } @@ -299,13 +295,14 @@ static int fileTest(uint8_t *ref[4], int refStride[4], int w, int h, FILE *fp, int flags; int ret; - ret = sscanf(buf, " %12s %dx%d -> %12s %dx%d flags=%d CRC=%x" - " SSD=%"PRId64", %"PRId64", %"PRId64", %"PRId64"\n", - srcStr, &srcW, &srcH, dstStr, &dstW, &dstH, - &flags, &r.crc, &r.ssdY, &r.ssdU, &r.ssdV, &r.ssdA); + ret = sscanf(buf, + " %12s %dx%d -> %12s %dx%d flags=%d CRC=%x" + " SSD=%"PRId64 ", %"PRId64 ", %"PRId64 ", %"PRId64 "\n", + srcStr, &srcW, &srcH, dstStr, &dstW, &dstH, + &flags, &r.crc, &r.ssdY, &r.ssdU, &r.ssdV, &r.ssdA); if (ret != 12) { srcStr[0] = dstStr[0] = 0; - ret = sscanf(buf, "%12s -> %12s\n", srcStr, dstStr); + ret = sscanf(buf, "%12s -> %12s\n", srcStr, dstStr); } srcFormat = av_get_pix_fmt(srcStr); @@ -339,12 +336,12 @@ int main(int argc, char **argv) { enum PixelFormat srcFormat = PIX_FMT_NONE; enum PixelFormat dstFormat = PIX_FMT_NONE; - uint8_t *rgb_data = av_malloc (W*H*4); - uint8_t *rgb_src[3]= {rgb_data, NULL, NULL}; - int rgb_stride[3]={4*W, 0, 0}; - uint8_t *data = av_malloc (4*W*H); - uint8_t *src[4]= {data, data+W*H, data+W*H*2, data+W*H*3}; - int stride[4]={W, W, W, W}; + uint8_t *rgb_data = av_malloc(W * H * 4); + uint8_t *rgb_src[3] = { rgb_data, NULL, NULL }; + int rgb_stride[3] = { 4 * W, 0, 0 }; + uint8_t *data = av_malloc(4 * W * H); + uint8_t *src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 }; + int stride[4] = { W, W, W, W }; int x, y; struct SwsContext *sws; AVLFG rand; @@ -354,41 +351,40 @@ int main(int argc, char **argv) if (!rgb_data || !data) return -1; - sws= sws_getContext(W/12, H/12, PIX_FMT_RGB32, W, H, PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL); + sws = sws_getContext(W / 12, H / 12, PIX_FMT_RGB32, W, H, + PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL); av_lfg_init(&rand, 1); - for (y=0; y Date: Sun, 8 Jan 2012 13:14:37 -0600 Subject: [PATCH 04/19] mjpegdec: K&R formatting cosmetics Signed-off-by: Diego Biurrun --- libavcodec/mjpegdec.c | 1439 +++++++++++++++++++++-------------------- 1 file changed, 741 insertions(+), 698 deletions(-) diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c index 7a85c16d95..2ae502ddeb 100644 --- a/libavcodec/mjpegdec.c +++ b/libavcodec/mjpegdec.c @@ -30,7 +30,7 @@ * MJPEG decoder. */ -//#define DEBUG +// #define DEBUG #include #include "libavutil/imgutils.h" @@ -42,8 +42,9 @@ #include "jpeglsdec.h" -static int build_vlc(VLC *vlc, const uint8_t *bits_table, const uint8_t *val_table, - int nb_codes, int use_static, int is_ac) +static int build_vlc(VLC *vlc, const uint8_t *bits_table, + const uint8_t *val_table, int nb_codes, + int use_static, int is_ac) { uint8_t huff_size[256]; uint16_t huff_code[256]; @@ -55,15 +56,18 @@ static int build_vlc(VLC *vlc, const uint8_t *bits_table, const uint8_t *val_tab memset(huff_size, 0, sizeof(huff_size)); ff_mjpeg_build_huffman_codes(huff_size, huff_code, bits_table, val_table); - for(i=0; i<256; i++) - huff_sym[i]= i + 16*is_ac; + for (i = 0; i < 256; i++) + huff_sym[i] = i + 16 * is_ac; - if(is_ac) huff_sym[0]= 16*256; + if (is_ac) + huff_sym[0] = 16 * 256; - return init_vlc_sparse(vlc, 9, nb_codes, huff_size, 1, 1, huff_code, 2, 2, huff_sym, 2, 2, use_static); + return init_vlc_sparse(vlc, 9, nb_codes, huff_size, 1, 1, + huff_code, 2, 2, huff_sym, 2, 2, use_static); } -static void build_basic_mjpeg_vlc(MJpegDecodeContext * s) { +static void build_basic_mjpeg_vlc(MJpegDecodeContext *s) +{ build_vlc(&s->vlcs[0][0], ff_mjpeg_bits_dc_luminance, ff_mjpeg_val_dc, 12, 0, 0); build_vlc(&s->vlcs[0][1], ff_mjpeg_bits_dc_chrominance, @@ -88,11 +92,11 @@ av_cold int ff_mjpeg_decode_init(AVCodecContext *avctx) s->avctx = avctx; dsputil_init(&s->dsp, avctx); ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); - s->buffer_size = 0; - s->buffer = NULL; - s->start_code = -1; + s->buffer_size = 0; + s->buffer = NULL; + s->start_code = -1; s->first_picture = 1; - s->org_height = avctx->coded_height; + s->org_height = avctx->coded_height; avctx->chroma_sample_location = AVCHROMA_LOC_CENTER; build_basic_mjpeg_vlc(s); @@ -101,17 +105,17 @@ av_cold int ff_mjpeg_decode_init(AVCodecContext *avctx) if (avctx->flags & CODEC_FLAG_EXTERN_HUFF) s->extern_huff = 1; #endif - if (s->extern_huff) - { + if (s->extern_huff) { av_log(avctx, AV_LOG_INFO, "mjpeg: using external huffman table\n"); - init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8); + init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size * 8); if (ff_mjpeg_decode_dht(s)) { - av_log(avctx, AV_LOG_ERROR, "mjpeg: error using external huffman table\n"); + av_log(avctx, AV_LOG_ERROR, + "mjpeg: error using external huffman table\n"); return AVERROR_INVALIDDATA; } } if (avctx->field_order == AV_FIELD_BB) { /* quicktime icefloe 019 */ - s->interlace_polarity = 1; /* bottom field first */ + s->interlace_polarity = 1; /* bottom field first */ av_log(avctx, AV_LOG_DEBUG, "mjpeg bottom field first\n"); } if (avctx->codec->id == CODEC_ID_AMV) @@ -130,8 +134,7 @@ int ff_mjpeg_decode_dqt(MJpegDecodeContext *s) while (len >= 65) { /* only 8 bit precision handled */ - if (get_bits(&s->gb, 4) != 0) - { + if (get_bits(&s->gb, 4) != 0) { av_log(s->avctx, AV_LOG_ERROR, "dqt: 16bit precision\n"); return -1; } @@ -140,19 +143,18 @@ int ff_mjpeg_decode_dqt(MJpegDecodeContext *s) return -1; av_log(s->avctx, AV_LOG_DEBUG, "index=%d\n", index); /* read quant table */ - for(i=0;i<64;i++) { + for (i = 0; i < 64; i++) { j = s->scantable.permutated[i]; s->quant_matrixes[index][j] = get_bits(&s->gb, 8); } - //XXX FIXME finetune, and perhaps add dc too - s->qscale[index]= FFMAX( - s->quant_matrixes[index][s->scantable.permutated[1]], - s->quant_matrixes[index][s->scantable.permutated[8]]) >> 1; - av_log(s->avctx, AV_LOG_DEBUG, "qscale[%d]: %d\n", index, s->qscale[index]); + // XXX FIXME finetune, and perhaps add dc too + s->qscale[index] = FFMAX(s->quant_matrixes[index][s->scantable.permutated[1]], + s->quant_matrixes[index][s->scantable.permutated[8]]) >> 1; + av_log(s->avctx, AV_LOG_DEBUG, "qscale[%d]: %d\n", + index, s->qscale[index]); len -= 65; } - return 0; } @@ -175,7 +177,7 @@ int ff_mjpeg_decode_dht(MJpegDecodeContext *s) if (index >= 4) return -1; n = 0; - for(i=1;i<=16;i++) { + for (i = 1; i <= 16; i++) { bits_table[i] = get_bits(&s->gb, 8); n += bits_table[i]; } @@ -184,7 +186,7 @@ int ff_mjpeg_decode_dht(MJpegDecodeContext *s) return -1; code_max = 0; - for(i=0;igb, 8); if (v > code_max) code_max = v; @@ -196,15 +198,15 @@ int ff_mjpeg_decode_dht(MJpegDecodeContext *s) free_vlc(&s->vlcs[class][index]); av_log(s->avctx, AV_LOG_DEBUG, "class=%d index=%d nb_codes=%d\n", class, index, code_max + 1); - if(build_vlc(&s->vlcs[class][index], bits_table, val_table, code_max + 1, 0, class > 0) < 0){ + if (build_vlc(&s->vlcs[class][index], bits_table, val_table, + code_max + 1, 0, class > 0) < 0) return -1; - } - if(class>0){ + if (class > 0) { free_vlc(&s->vlcs[2][index]); - if(build_vlc(&s->vlcs[2][index], bits_table, val_table, code_max + 1, 0, 0) < 0){ - return -1; - } + if (build_vlc(&s->vlcs[2][index], bits_table, val_table, + code_max + 1, 0, 0) < 0) + return -1; } } return 0; @@ -215,44 +217,47 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s) int len, nb_components, i, width, height, pix_fmt_id; /* XXX: verify len field validity */ - len = get_bits(&s->gb, 16); - s->bits= get_bits(&s->gb, 8); + len = get_bits(&s->gb, 16); + s->bits = get_bits(&s->gb, 8); - if(s->pegasus_rct) s->bits=9; - if(s->bits==9 && !s->pegasus_rct) s->rct=1; //FIXME ugly + if (s->pegasus_rct) + s->bits = 9; + if (s->bits == 9 && !s->pegasus_rct) + s->rct = 1; // FIXME ugly - if (s->bits != 8 && !s->lossless){ + if (s->bits != 8 && !s->lossless) { av_log(s->avctx, AV_LOG_ERROR, "only 8 bits/component accepted\n"); return -1; } height = get_bits(&s->gb, 16); - width = get_bits(&s->gb, 16); + width = get_bits(&s->gb, 16); - //HACK for odd_height.mov - if(s->interlaced && s->width == width && s->height == height + 1) + // HACK for odd_height.mov + if (s->interlaced && s->width == width && s->height == height + 1) height= s->height; av_log(s->avctx, AV_LOG_DEBUG, "sof0: picture: %dx%d\n", width, height); - if(av_image_check_size(width, height, 0, s->avctx)) + if (av_image_check_size(width, height, 0, s->avctx)) return -1; nb_components = get_bits(&s->gb, 8); if (nb_components <= 0 || nb_components > MAX_COMPONENTS) return -1; - if (s->ls && !(s->bits <= 8 || nb_components == 1)){ - av_log(s->avctx, AV_LOG_ERROR, "only <= 8 bits/component or 16-bit gray accepted for JPEG-LS\n"); + if (s->ls && !(s->bits <= 8 || nb_components == 1)) { + av_log(s->avctx, AV_LOG_ERROR, + "only <= 8 bits/component or 16-bit gray accepted for JPEG-LS\n"); return -1; } s->nb_components = nb_components; - s->h_max = 1; - s->v_max = 1; - for(i=0;ih_max = 1; + s->v_max = 1; + for (i = 0; i < nb_components; i++) { /* component id */ s->component_id[i] = get_bits(&s->gb, 8) - 1; - s->h_count[i] = get_bits(&s->gb, 4); - s->v_count[i] = get_bits(&s->gb, 4); + s->h_count[i] = get_bits(&s->gb, 4); + s->v_count[i] = get_bits(&s->gb, 4); /* compute hmax and vmax (only used in interleaved case) */ if (s->h_count[i] > s->h_max) s->h_max = s->h_count[i]; @@ -261,45 +266,47 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s) s->quant_index[i] = get_bits(&s->gb, 8); if (s->quant_index[i] >= 4) return -1; - av_log(s->avctx, AV_LOG_DEBUG, "component %d %d:%d id: %d quant:%d\n", i, s->h_count[i], - s->v_count[i], s->component_id[i], s->quant_index[i]); + av_log(s->avctx, AV_LOG_DEBUG, "component %d %d:%d id: %d quant:%d\n", + i, s->h_count[i], s->v_count[i], + s->component_id[i], s->quant_index[i]); } - if(s->ls && (s->h_max > 1 || s->v_max > 1)) { - av_log(s->avctx, AV_LOG_ERROR, "Subsampling in JPEG-LS is not supported.\n"); + if (s->ls && (s->h_max > 1 || s->v_max > 1)) { + av_log(s->avctx, AV_LOG_ERROR, + "Subsampling in JPEG-LS is not supported.\n"); return -1; } - if(s->v_max==1 && s->h_max==1 && s->lossless==1) s->rgb=1; + if (s->v_max == 1 && s->h_max == 1 && s->lossless == 1) + s->rgb = 1; /* if different size, realloc/alloc picture */ /* XXX: also check h_count and v_count */ if (width != s->width || height != s->height) { av_freep(&s->qscale_table); - s->width = width; - s->height = height; + s->width = width; + s->height = height; s->interlaced = 0; /* test interlaced mode */ - if (s->first_picture && + if (s->first_picture && s->org_height != 0 && s->height < ((s->org_height * 3) / 4)) { - s->interlaced = 1; - s->bottom_field = s->interlace_polarity; + s->interlaced = 1; + s->bottom_field = s->interlace_polarity; s->picture_ptr->interlaced_frame = 1; - s->picture_ptr->top_field_first = !s->interlace_polarity; + s->picture_ptr->top_field_first = !s->interlace_polarity; height *= 2; } avcodec_set_dimensions(s->avctx, width, height); - s->qscale_table= av_mallocz((s->width+15)/16); - + s->qscale_table = av_mallocz((s->width + 15) / 16); s->first_picture = 0; } - if(s->interlaced && (s->bottom_field == !s->interlace_polarity)) + if (s->interlaced && (s->bottom_field == !s->interlace_polarity)) return 0; /* XXX: not complete test ! */ @@ -308,19 +315,20 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s) (s->h_count[2] << 12) | (s->v_count[2] << 8) | (s->h_count[3] << 4) | s->v_count[3]; av_log(s->avctx, AV_LOG_DEBUG, "pix fmt id %x\n", pix_fmt_id); - //NOTE we do not allocate pictures large enough for the possible padding of h/v_count being 4 - if(!(pix_fmt_id & 0xD0D0D0D0)) - pix_fmt_id-= (pix_fmt_id & 0xF0F0F0F0)>>1; - if(!(pix_fmt_id & 0x0D0D0D0D)) - pix_fmt_id-= (pix_fmt_id & 0x0F0F0F0F)>>1; + /* NOTE we do not allocate pictures large enough for the possible + * padding of h/v_count being 4 */ + if (!(pix_fmt_id & 0xD0D0D0D0)) + pix_fmt_id -= (pix_fmt_id & 0xF0F0F0F0) >> 1; + if (!(pix_fmt_id & 0x0D0D0D0D)) + pix_fmt_id -= (pix_fmt_id & 0x0F0F0F0F) >> 1; - switch(pix_fmt_id){ + switch (pix_fmt_id) { case 0x11111100: - if(s->rgb){ + if (s->rgb) s->avctx->pix_fmt = PIX_FMT_BGRA; - }else + else s->avctx->pix_fmt = s->cs_itu601 ? PIX_FMT_YUV444P : PIX_FMT_YUVJ444P; - assert(s->nb_components==3); + assert(s->nb_components == 3); break; case 0x11000000: s->avctx->pix_fmt = PIX_FMT_GRAY8; @@ -338,47 +346,46 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s) av_log(s->avctx, AV_LOG_ERROR, "Unhandled pixel format 0x%x\n", pix_fmt_id); return -1; } - if(s->ls){ - if(s->nb_components > 1) + if (s->ls) { + if (s->nb_components > 1) s->avctx->pix_fmt = PIX_FMT_RGB24; - else if(s->bits <= 8) + else if (s->bits <= 8) s->avctx->pix_fmt = PIX_FMT_GRAY8; else s->avctx->pix_fmt = PIX_FMT_GRAY16; } - if(s->picture_ptr->data[0]) + if (s->picture_ptr->data[0]) s->avctx->release_buffer(s->avctx, s->picture_ptr); - if(s->avctx->get_buffer(s->avctx, s->picture_ptr) < 0){ + if (s->avctx->get_buffer(s->avctx, s->picture_ptr) < 0) { av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); return -1; } - s->picture_ptr->pict_type= AV_PICTURE_TYPE_I; - s->picture_ptr->key_frame= 1; - s->got_picture = 1; + s->picture_ptr->pict_type = AV_PICTURE_TYPE_I; + s->picture_ptr->key_frame = 1; + s->got_picture = 1; - for(i=0; i<3; i++){ - s->linesize[i]= s->picture_ptr->linesize[i] << s->interlaced; - } + for (i = 0; i < 3; i++) + s->linesize[i] = s->picture_ptr->linesize[i] << s->interlaced; -// printf("%d %d %d %d %d %d\n", s->width, s->height, s->linesize[0], s->linesize[1], s->interlaced, s->avctx->height); +// printf("%d %d %d %d %d %d\n", +// s->width, s->height, s->linesize[0], s->linesize[1], +// s->interlaced, s->avctx->height); - if (len != (8+(3*nb_components))) - { + if (len != (8 + (3 * nb_components))) av_log(s->avctx, AV_LOG_DEBUG, "decode_sof0: error, len(%d) mismatch\n", len); - } /* totally blank picture as progressive JPEG will only add details to it */ - if(s->progressive){ - int bw = (width + s->h_max*8-1) / (s->h_max*8); - int bh = (height + s->v_max*8-1) / (s->v_max*8); - for(i=0; inb_components; i++) { + if (s->progressive) { + int bw = (width + s->h_max * 8 - 1) / (s->h_max * 8); + int bh = (height + s->v_max * 8 - 1) / (s->v_max * 8); + for (i = 0; i < s->nb_components; i++) { int size = bw * bh * s->h_count[i] * s->v_count[i]; av_freep(&s->blocks[i]); av_freep(&s->last_nnz[i]); - s->blocks[i] = av_malloc(size * sizeof(**s->blocks)); - s->last_nnz[i] = av_mallocz(size * sizeof(**s->last_nnz)); + s->blocks[i] = av_malloc(size * sizeof(**s->blocks)); + s->last_nnz[i] = av_mallocz(size * sizeof(**s->last_nnz)); s->block_stride[i] = bw * s->h_count[i]; } memset(s->coefs_finished, 0, sizeof(s->coefs_finished)); @@ -390,22 +397,22 @@ static inline int mjpeg_decode_dc(MJpegDecodeContext *s, int dc_index) { int code; code = get_vlc2(&s->gb, s->vlcs[0][dc_index].table, 9, 2); - if (code < 0) - { - av_log(s->avctx, AV_LOG_WARNING, "mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index, - &s->vlcs[0][dc_index]); + if (code < 0) { + av_log(s->avctx, AV_LOG_WARNING, + "mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", + 0, dc_index, &s->vlcs[0][dc_index]); return 0xffff; } - if(code) + if (code) return get_xbits(&s->gb, code); else return 0; } /* decode block and dequantize */ -static int decode_block(MJpegDecodeContext *s, DCTELEM *block, - int component, int dc_index, int ac_index, int16_t *quant_matrix) +static int decode_block(MJpegDecodeContext *s, DCTELEM *block, int component, + int dc_index, int ac_index, int16_t *quant_matrix) { int code, i, j, level, val; @@ -427,14 +434,14 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block, i += ((unsigned)code) >> 4; code &= 0xf; - if(code){ - if(code > MIN_CACHE_BITS - 16){ + if (code) { + if (code > MIN_CACHE_BITS - 16) UPDATE_CACHE(re, &s->gb); - } + { - int cache=GET_CACHE(re,&s->gb); - int sign=(~cache)>>31; - level = (NEG_USR32(sign ^ cache,code) ^ sign) - sign; + int cache = GET_CACHE(re, &s->gb); + int sign = (~cache) >> 31; + level = (NEG_USR32(sign ^ cache,code) ^ sign) - sign; } LAST_SKIP_BITS(re, &s->gb, code); @@ -443,17 +450,18 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block, av_log(s->avctx, AV_LOG_ERROR, "error count: %d\n", i); return -1; } - j = s->scantable.permutated[i]; + j = s->scantable.permutated[i]; block[j] = level * quant_matrix[j]; } - }while(i<63); + } while (i < 63); CLOSE_READER(re, &s->gb);} return 0; } -static int decode_dc_progressive(MJpegDecodeContext *s, DCTELEM *block, int component, - int dc_index, int16_t *quant_matrix, int Al) +static int decode_dc_progressive(MJpegDecodeContext *s, DCTELEM *block, + int component, int dc_index, + int16_t *quant_matrix, int Al) { int val; s->dsp.clear_block(block); @@ -469,113 +477,121 @@ static int decode_dc_progressive(MJpegDecodeContext *s, DCTELEM *block, int comp } /* decode block and dequantize - progressive JPEG version */ -static int decode_block_progressive(MJpegDecodeContext *s, DCTELEM *block, uint8_t *last_nnz, - int ac_index, int16_t *quant_matrix, +static int decode_block_progressive(MJpegDecodeContext *s, DCTELEM *block, + uint8_t *last_nnz, int ac_index, + int16_t *quant_matrix, int ss, int se, int Al, int *EOBRUN) { int code, i, j, level, val, run; - if(*EOBRUN){ + if (*EOBRUN) { (*EOBRUN)--; return 0; } - {OPEN_READER(re, &s->gb); - for(i=ss;;i++) { - UPDATE_CACHE(re, &s->gb); - GET_VLC(code, re, &s->gb, s->vlcs[2][ac_index].table, 9, 2); - run = ((unsigned) code) >> 4; - code &= 0xF; - if(code) { - i += run; - if(code > MIN_CACHE_BITS - 16){ - UPDATE_CACHE(re, &s->gb); - } - { - int cache=GET_CACHE(re,&s->gb); - int sign=(~cache)>>31; - level = (NEG_USR32(sign ^ cache,code) ^ sign) - sign; - } - - LAST_SKIP_BITS(re, &s->gb, code); - - if (i >= se) { - if(i == se){ - j = s->scantable.permutated[se]; - block[j] = level * quant_matrix[j] << Al; - break; - } - av_log(s->avctx, AV_LOG_ERROR, "error count: %d\n", i); - return -1; - } - j = s->scantable.permutated[i]; - block[j] = level * quant_matrix[j] << Al; - }else{ - if(run == 0xF){// ZRL - skip 15 coefficients - i += 15; - if (i >= se) { - av_log(s->avctx, AV_LOG_ERROR, "ZRL overflow: %d\n", i); - return -1; - } - }else{ - val = (1 << run); - if(run){ - UPDATE_CACHE(re, &s->gb); - val += NEG_USR32(GET_CACHE(re, &s->gb), run); - LAST_SKIP_BITS(re, &s->gb, run); - } - *EOBRUN = val - 1; - break; - } - } - } - CLOSE_READER(re, &s->gb);} - if(i > *last_nnz) - *last_nnz = i; - return 0; -} - -#define REFINE_BIT(j) {\ - UPDATE_CACHE(re, &s->gb);\ - sign = block[j]>>15;\ - block[j] += SHOW_UBITS(re, &s->gb, 1) * ((quant_matrix[j]^sign)-sign) << Al;\ - LAST_SKIP_BITS(re, &s->gb, 1);\ -} - -#define ZERO_RUN \ -for(;;i++) {\ - if(i > last) {\ - i += run;\ - if(i > se) {\ - av_log(s->avctx, AV_LOG_ERROR, "error count: %d\n", i);\ - return -1;\ - }\ - break;\ - }\ - j = s->scantable.permutated[i];\ - if(block[j])\ - REFINE_BIT(j)\ - else if(run-- == 0)\ - break;\ -} - -/* decode block and dequantize - progressive JPEG refinement pass */ -static int decode_block_refinement(MJpegDecodeContext *s, DCTELEM *block, uint8_t *last_nnz, - int ac_index, int16_t *quant_matrix, - int ss, int se, int Al, int *EOBRUN) -{ - int code, i=ss, j, sign, val, run; - int last = FFMIN(se, *last_nnz); - - OPEN_READER(re, &s->gb); - if(*EOBRUN) - (*EOBRUN)--; - else { - for(;;i++) { + { + OPEN_READER(re, &s->gb); + for (i = ss; ; i++) { UPDATE_CACHE(re, &s->gb); GET_VLC(code, re, &s->gb, s->vlcs[2][ac_index].table, 9, 2); - if(code & 0xF) { + run = ((unsigned) code) >> 4; + code &= 0xF; + if (code) { + i += run; + if (code > MIN_CACHE_BITS - 16) + UPDATE_CACHE(re, &s->gb); + + { + int cache = GET_CACHE(re, &s->gb); + int sign = (~cache) >> 31; + level = (NEG_USR32(sign ^ cache,code) ^ sign) - sign; + } + + LAST_SKIP_BITS(re, &s->gb, code); + + if (i >= se) { + if (i == se) { + j = s->scantable.permutated[se]; + block[j] = level * quant_matrix[j] << Al; + break; + } + av_log(s->avctx, AV_LOG_ERROR, "error count: %d\n", i); + return -1; + } + j = s->scantable.permutated[i]; + block[j] = level * quant_matrix[j] << Al; + } else { + if (run == 0xF) {// ZRL - skip 15 coefficients + i += 15; + if (i >= se) { + av_log(s->avctx, AV_LOG_ERROR, "ZRL overflow: %d\n", i); + return -1; + } + } else { + val = (1 << run); + if (run) { + UPDATE_CACHE(re, &s->gb); + val += NEG_USR32(GET_CACHE(re, &s->gb), run); + LAST_SKIP_BITS(re, &s->gb, run); + } + *EOBRUN = val - 1; + break; + } + } + } + CLOSE_READER(re, &s->gb); + } + + if (i > *last_nnz) + *last_nnz = i; + + return 0; +} + +#define REFINE_BIT(j) { \ + UPDATE_CACHE(re, &s->gb); \ + sign = block[j] >> 15; \ + block[j] += SHOW_UBITS(re, &s->gb, 1) * \ + ((quant_matrix[j] ^ sign) - sign) << Al; \ + LAST_SKIP_BITS(re, &s->gb, 1); \ +} + +#define ZERO_RUN \ +for (; ; i++) { \ + if (i > last) { \ + i += run; \ + if (i > se) { \ + av_log(s->avctx, AV_LOG_ERROR, "error count: %d\n", i); \ + return -1; \ + } \ + break; \ + } \ + j = s->scantable.permutated[i]; \ + if (block[j]) \ + REFINE_BIT(j) \ + else if (run-- == 0) \ + break; \ +} + +/* decode block and dequantize - progressive JPEG refinement pass */ +static int decode_block_refinement(MJpegDecodeContext *s, DCTELEM *block, + uint8_t *last_nnz, + int ac_index, int16_t *quant_matrix, + int ss, int se, int Al, int *EOBRUN) +{ + int code, i = ss, j, sign, val, run; + int last = FFMIN(se, *last_nnz); + + OPEN_READER(re, &s->gb); + if (*EOBRUN) { + (*EOBRUN)--; + } else { + for (; ; i++) { + UPDATE_CACHE(re, &s->gb); + GET_VLC(code, re, &s->gb, s->vlcs[2][ac_index].table, 9, 2); + + if (code & 0xF) { run = ((unsigned) code) >> 4; UPDATE_CACHE(re, &s->gb); val = SHOW_UBITS(re, &s->gb, 1); @@ -583,21 +599,21 @@ static int decode_block_refinement(MJpegDecodeContext *s, DCTELEM *block, uint8_ ZERO_RUN; j = s->scantable.permutated[i]; val--; - block[j] = ((quant_matrix[j]^val)-val) << Al; - if(i == se) { - if(i > *last_nnz) + block[j] = ((quant_matrix[j]^val) - val) << Al; + if (i == se) { + if (i > *last_nnz) *last_nnz = i; CLOSE_READER(re, &s->gb); return 0; } - }else{ + } else { run = ((unsigned) code) >> 4; - if(run == 0xF){ + if (run == 0xF) { ZERO_RUN; - }else{ + } else { val = run; run = (1 << run); - if(val) { + if (val) { UPDATE_CACHE(re, &s->gb); run += SHOW_UBITS(re, &s->gb, val); LAST_SKIP_BITS(re, &s->gb, val); @@ -608,13 +624,13 @@ static int decode_block_refinement(MJpegDecodeContext *s, DCTELEM *block, uint8_ } } - if(i > *last_nnz) + if (i > *last_nnz) *last_nnz = i; } - for(;i<=last;i++) { + for (; i <= last; i++) { j = s->scantable.permutated[i]; - if(block[j]) + if (block[j]) REFINE_BIT(j) } CLOSE_READER(re, &s->gb); @@ -624,43 +640,46 @@ static int decode_block_refinement(MJpegDecodeContext *s, DCTELEM *block, uint8_ #undef REFINE_BIT #undef ZERO_RUN -static int ljpeg_decode_rgb_scan(MJpegDecodeContext *s, int predictor, int point_transform){ +static int ljpeg_decode_rgb_scan(MJpegDecodeContext *s, int predictor, + int point_transform) +{ int i, mb_x, mb_y; uint16_t (*buffer)[4]; int left[3], top[3], topleft[3]; - const int linesize= s->linesize[0]; - const int mask= (1<bits)-1; + const int linesize = s->linesize[0]; + const int mask = (1 << s->bits) - 1; - av_fast_malloc(&s->ljpeg_buffer, &s->ljpeg_buffer_size, (unsigned)s->mb_width * 4 * sizeof(s->ljpeg_buffer[0][0])); - buffer= s->ljpeg_buffer; + av_fast_malloc(&s->ljpeg_buffer, &s->ljpeg_buffer_size, + (unsigned)s->mb_width * 4 * sizeof(s->ljpeg_buffer[0][0])); + buffer = s->ljpeg_buffer; - for(i=0; i<3; i++){ - buffer[0][i]= 1 << (s->bits + point_transform - 1); - } - for(mb_y = 0; mb_y < s->mb_height; mb_y++) { - const int modified_predictor= mb_y ? predictor : 1; + for (i = 0; i < 3; i++) + buffer[0][i] = 1 << (s->bits + point_transform - 1); + + for (mb_y = 0; mb_y < s->mb_height; mb_y++) { + const int modified_predictor = mb_y ? predictor : 1; uint8_t *ptr = s->picture_ptr->data[0] + (linesize * mb_y); if (s->interlaced && s->bottom_field) ptr += linesize >> 1; - for(i=0; i<3; i++){ - top[i]= left[i]= topleft[i]= buffer[0][i]; - } - for(mb_x = 0; mb_x < s->mb_width; mb_x++) { + for (i = 0; i < 3; i++) + top[i] = left[i] = topleft[i] = buffer[0][i]; + + for (mb_x = 0; mb_x < s->mb_width; mb_x++) { if (s->restart_interval && !s->restart_count) s->restart_count = s->restart_interval; - for(i=0;i<3;i++) { + for (i = 0; i < 3; i++) { int pred; - topleft[i]= top[i]; - top[i]= buffer[mb_x][i]; + topleft[i] = top[i]; + top[i] = buffer[mb_x][i]; PREDICT(pred, topleft[i], top[i], left[i], modified_predictor); - left[i]= - buffer[mb_x][i]= mask & (pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform)); + left[i] = buffer[mb_x][i] = + mask & (pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform)); } if (s->restart_interval && !--s->restart_count) { @@ -669,71 +688,74 @@ static int ljpeg_decode_rgb_scan(MJpegDecodeContext *s, int predictor, int point } } - if(s->rct){ - for(mb_x = 0; mb_x < s->mb_width; mb_x++) { - ptr[4*mb_x+1] = buffer[mb_x][0] - ((buffer[mb_x][1] + buffer[mb_x][2] - 0x200)>>2); - ptr[4*mb_x+0] = buffer[mb_x][1] + ptr[4*mb_x+1]; - ptr[4*mb_x+2] = buffer[mb_x][2] + ptr[4*mb_x+1]; + if (s->rct) { + for (mb_x = 0; mb_x < s->mb_width; mb_x++) { + ptr[4 * mb_x + 1] = buffer[mb_x][0] - ((buffer[mb_x][1] + buffer[mb_x][2] - 0x200) >> 2); + ptr[4 * mb_x + 0] = buffer[mb_x][1] + ptr[4 * mb_x + 1]; + ptr[4 * mb_x + 2] = buffer[mb_x][2] + ptr[4 * mb_x + 1]; } - }else if(s->pegasus_rct){ - for(mb_x = 0; mb_x < s->mb_width; mb_x++) { - ptr[4*mb_x+1] = buffer[mb_x][0] - ((buffer[mb_x][1] + buffer[mb_x][2])>>2); - ptr[4*mb_x+0] = buffer[mb_x][1] + ptr[4*mb_x+1]; - ptr[4*mb_x+2] = buffer[mb_x][2] + ptr[4*mb_x+1]; + } else if (s->pegasus_rct) { + for (mb_x = 0; mb_x < s->mb_width; mb_x++) { + ptr[4 * mb_x + 1] = buffer[mb_x][0] - ((buffer[mb_x][1] + buffer[mb_x][2]) >> 2); + ptr[4 * mb_x + 0] = buffer[mb_x][1] + ptr[4 * mb_x + 1]; + ptr[4 * mb_x + 2] = buffer[mb_x][2] + ptr[4 * mb_x + 1]; } - }else{ - for(mb_x = 0; mb_x < s->mb_width; mb_x++) { - ptr[4*mb_x+0] = buffer[mb_x][2]; - ptr[4*mb_x+1] = buffer[mb_x][1]; - ptr[4*mb_x+2] = buffer[mb_x][0]; + } else { + for (mb_x = 0; mb_x < s->mb_width; mb_x++) { + ptr[4 * mb_x + 0] = buffer[mb_x][2]; + ptr[4 * mb_x + 1] = buffer[mb_x][1]; + ptr[4 * mb_x + 2] = buffer[mb_x][0]; } } } return 0; } -static int ljpeg_decode_yuv_scan(MJpegDecodeContext *s, int predictor, int point_transform){ +static int ljpeg_decode_yuv_scan(MJpegDecodeContext *s, int predictor, + int point_transform) +{ int i, mb_x, mb_y; - const int nb_components=3; + const int nb_components = 3; - for(mb_y = 0; mb_y < s->mb_height; mb_y++) { - for(mb_x = 0; mb_x < s->mb_width; mb_x++) { + for (mb_y = 0; mb_y < s->mb_height; mb_y++) { + for (mb_x = 0; mb_x < s->mb_width; mb_x++) { if (s->restart_interval && !s->restart_count) s->restart_count = s->restart_interval; - if(mb_x==0 || mb_y==0 || s->interlaced){ - for(i=0;iinterlaced) { + for (i = 0; i < nb_components; i++) { uint8_t *ptr; int n, h, v, x, y, c, j, linesize; - n = s->nb_blocks[i]; - c = s->comp_index[i]; - h = s->h_scount[i]; - v = s->v_scount[i]; - x = 0; - y = 0; - linesize= s->linesize[c]; + n = s->nb_blocks[i]; + c = s->comp_index[i]; + h = s->h_scount[i]; + v = s->v_scount[i]; + x = 0; + y = 0; + linesize = s->linesize[c]; - for(j=0; jpicture_ptr->data[c] + (linesize * (v * mb_y + y)) + (h * mb_x + x); //FIXME optimize this crap - if(y==0 && mb_y==0){ - if(x==0 && mb_x==0){ - pred= 128 << point_transform; - }else{ - pred= ptr[-1]; - } - }else{ - if(x==0 && mb_x==0){ - pred= ptr[-linesize]; - }else{ - PREDICT(pred, ptr[-linesize-1], ptr[-linesize], ptr[-1], predictor); - } - } + // FIXME optimize this crap + ptr = s->picture_ptr->data[c] + + (linesize * (v * mb_y + y)) + + (h * mb_x + x); + if (y == 0 && mb_y == 0) { + if (x == 0 && mb_x == 0) + pred = 128 << point_transform; + else + pred = ptr[-1]; + } else { + if (x == 0 && mb_x == 0) + pred = ptr[-linesize]; + else + PREDICT(pred, ptr[-linesize - 1], + ptr[-linesize], ptr[-1], predictor); + } if (s->interlaced && s->bottom_field) ptr += linesize >> 1; - *ptr= pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform); + *ptr = pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform); if (++x == h) { x = 0; @@ -741,24 +763,28 @@ static int ljpeg_decode_yuv_scan(MJpegDecodeContext *s, int predictor, int point } } } - }else{ - for(i=0;inb_blocks[i]; - c = s->comp_index[i]; - h = s->h_scount[i]; - v = s->v_scount[i]; - x = 0; - y = 0; - linesize= s->linesize[c]; + n = s->nb_blocks[i]; + c = s->comp_index[i]; + h = s->h_scount[i]; + v = s->v_scount[i]; + x = 0; + y = 0; + linesize = s->linesize[c]; - for(j=0; jpicture_ptr->data[c] + (linesize * (v * mb_y + y)) + (h * mb_x + x); //FIXME optimize this crap - PREDICT(pred, ptr[-linesize-1], ptr[-linesize], ptr[-1], predictor); - *ptr= pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform); + // FIXME optimize this crap + ptr = s->picture_ptr->data[c] + + (linesize * (v * mb_y + y)) + + (h * mb_x + x); + PREDICT(pred, ptr[-linesize - 1], + ptr[-linesize], ptr[-1], predictor); + *ptr = pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform); if (++x == h) { x = 0; y++; @@ -790,49 +816,54 @@ static av_always_inline void mjpeg_copy_block(uint8_t *dst, const uint8_t *src, } } -static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah, int Al, - const uint8_t *mb_bitmask, const AVFrame *reference){ +static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah, + int Al, const uint8_t *mb_bitmask, + const AVFrame *reference) +{ int i, mb_x, mb_y; - uint8_t* data[MAX_COMPONENTS]; + uint8_t *data[MAX_COMPONENTS]; const uint8_t *reference_data[MAX_COMPONENTS]; int linesize[MAX_COMPONENTS]; GetBitContext mb_bitmask_gb; - if (mb_bitmask) { - init_get_bits(&mb_bitmask_gb, mb_bitmask, s->mb_width*s->mb_height); - } + if (mb_bitmask) + init_get_bits(&mb_bitmask_gb, mb_bitmask, s->mb_width * s->mb_height); - if(s->flipped && s->avctx->flags & CODEC_FLAG_EMU_EDGE) { - av_log(s->avctx, AV_LOG_ERROR, "Can not flip image with CODEC_FLAG_EMU_EDGE set!\n"); + if (s->flipped && s->avctx->flags & CODEC_FLAG_EMU_EDGE) { + av_log(s->avctx, AV_LOG_ERROR, + "Can not flip image with CODEC_FLAG_EMU_EDGE set!\n"); s->flipped = 0; } - for(i=0; i < nb_components; i++) { - int c = s->comp_index[i]; + + for (i = 0; i < nb_components; i++) { + int c = s->comp_index[i]; data[c] = s->picture_ptr->data[c]; reference_data[c] = reference ? reference->data[c] : NULL; - linesize[c]=s->linesize[c]; + linesize[c] = s->linesize[c]; s->coefs_finished[c] |= 1; - if(s->flipped) { - //picture should be flipped upside-down for this codec - int offset = (linesize[c] * (s->v_scount[i] * (8 * s->mb_height -((s->height/s->v_max)&7)) - 1 )); - data[c] += offset; + if (s->flipped) { + // picture should be flipped upside-down for this codec + int offset = (linesize[c] * (s->v_scount[i] * + (8 * s->mb_height - ((s->height / s->v_max) & 7)) - 1)); + data[c] += offset; reference_data[c] += offset; - linesize[c] *= -1; + linesize[c] *= -1; } } - for(mb_y = 0; mb_y < s->mb_height; mb_y++) { - for(mb_x = 0; mb_x < s->mb_width; mb_x++) { + for (mb_y = 0; mb_y < s->mb_height; mb_y++) { + for (mb_x = 0; mb_x < s->mb_width; mb_x++) { const int copy_mb = mb_bitmask && !get_bits1(&mb_bitmask_gb); if (s->restart_interval && !s->restart_count) s->restart_count = s->restart_interval; - if(get_bits_count(&s->gb)>s->gb.size_in_bits){ - av_log(s->avctx, AV_LOG_ERROR, "overread %d\n", get_bits_count(&s->gb) - s->gb.size_in_bits); + if (get_bits_count(&s->gb)>s->gb.size_in_bits) { + av_log(s->avctx, AV_LOG_ERROR, "overread %d\n", + get_bits_count(&s->gb) - s->gb.size_in_bits); return -1; } - for(i=0;iv_scount[i]; x = 0; y = 0; - for(j=0;j> s->avctx->lowres); - if(s->interlaced && s->bottom_field) + if (s->interlaced && s->bottom_field) block_offset += linesize[c] >> 1; ptr = data[c] + block_offset; - if(!s->progressive) { - if (copy_mb) { - mjpeg_copy_block(ptr, reference_data[c] + block_offset, linesize[c], s->avctx->lowres); - } else { - s->dsp.clear_block(s->block); - if(decode_block(s, s->block, i, - s->dc_index[i], s->ac_index[i], - s->quant_matrixes[ s->quant_index[c] ]) < 0) { - av_log(s->avctx, AV_LOG_ERROR, "error y=%d x=%d\n", mb_y, mb_x); - return -1; - } - s->dsp.idct_put(ptr, linesize[c], s->block); + if (!s->progressive) { + if (copy_mb) + mjpeg_copy_block(ptr, reference_data[c] + block_offset, + linesize[c], s->avctx->lowres); + else { + s->dsp.clear_block(s->block); + if (decode_block(s, s->block, i, + s->dc_index[i], s->ac_index[i], + s->quant_matrixes[s->quant_index[c]]) < 0) { + av_log(s->avctx, AV_LOG_ERROR, + "error y=%d x=%d\n", mb_y, mb_x); + return -1; + } + s->dsp.idct_put(ptr, linesize[c], s->block); } } else { - int block_idx = s->block_stride[c] * (v * mb_y + y) + (h * mb_x + x); + int block_idx = s->block_stride[c] * (v * mb_y + y) + + (h * mb_x + x); DCTELEM *block = s->blocks[c][block_idx]; - if(Ah) - block[0] += get_bits1(&s->gb) * s->quant_matrixes[ s->quant_index[c] ][0] << Al; - else if(decode_dc_progressive(s, block, i, s->dc_index[i], s->quant_matrixes[ s->quant_index[c] ], Al) < 0) { - av_log(s->avctx, AV_LOG_ERROR, "error y=%d x=%d\n", mb_y, mb_x); + if (Ah) + block[0] += get_bits1(&s->gb) * + s->quant_matrixes[s->quant_index[c]][0] << Al; + else if (decode_dc_progressive(s, block, i, s->dc_index[i], + s->quant_matrixes[s->quant_index[c]], + Al) < 0) { + av_log(s->avctx, AV_LOG_ERROR, + "error y=%d x=%d\n", mb_y, mb_x); return -1; } } -// av_log(s->avctx, AV_LOG_DEBUG, "mb: %d %d processed\n", mb_y, mb_x); -//av_log(NULL, AV_LOG_DEBUG, "%d %d %d %d %d %d %d %d \n", mb_x, mb_y, x, y, c, s->bottom_field, (v * mb_y + y) * 8, (h * mb_x + x) * 8); + // av_log(s->avctx, AV_LOG_DEBUG, "mb: %d %d processed\n", + // mb_y, mb_x); + // av_log(NULL, AV_LOG_DEBUG, "%d %d %d %d %d %d %d %d \n", + // mb_x, mb_y, x, y, c, s->bottom_field, + // (v * mb_y + y) * 8, (h * mb_x + x) * 8); if (++x == h) { x = 0; y++; @@ -893,9 +934,8 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah, i if ((get_bits(&s->gb, 8) & 0xF8) == 0xD0) { for (i = 0; i < nb_components; i++) /* reset dc */ s->last_dc[i] = 1024; - } else { + } else skip_bits_long(&s->gb, pos - get_bits_count(&s->gb)); - } } } } @@ -903,62 +943,66 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah, i return 0; } -static int mjpeg_decode_scan_progressive_ac(MJpegDecodeContext *s, int ss, int se, int Ah, int Al, - const uint8_t *mb_bitmask, const AVFrame *reference){ +static int mjpeg_decode_scan_progressive_ac(MJpegDecodeContext *s, int ss, + int se, int Ah, int Al, + const uint8_t *mb_bitmask, + const AVFrame *reference) +{ int mb_x, mb_y; int EOBRUN = 0; int c = s->comp_index[0]; - uint8_t* data = s->picture_ptr->data[c]; + uint8_t *data = s->picture_ptr->data[c]; const uint8_t *reference_data = reference ? reference->data[c] : NULL; - int linesize = s->linesize[c]; + int linesize = s->linesize[c]; int last_scan = 0; - int16_t *quant_matrix = s->quant_matrixes[ s->quant_index[c] ]; + int16_t *quant_matrix = s->quant_matrixes[s->quant_index[c]]; GetBitContext mb_bitmask_gb; - if (mb_bitmask) { - init_get_bits(&mb_bitmask_gb, mb_bitmask, s->mb_width*s->mb_height); - } + if (mb_bitmask) + init_get_bits(&mb_bitmask_gb, mb_bitmask, s->mb_width * s->mb_height); - if(!Al) { - s->coefs_finished[c] |= (1LL<<(se+1))-(1LL<coefs_finished[c] |= (1LL << (se + 1)) - (1LL << ss); last_scan = !~s->coefs_finished[c]; } - if(s->interlaced && s->bottom_field) { - int offset = linesize >> 1; - data += offset; + if (s->interlaced && s->bottom_field) { + int offset = linesize >> 1; + data += offset; reference_data += offset; } - for(mb_y = 0; mb_y < s->mb_height; mb_y++) { - int block_offset = (mb_y*linesize*8 >> s->avctx->lowres); - uint8_t *ptr = data + block_offset; - int block_idx = mb_y * s->block_stride[c]; + for (mb_y = 0; mb_y < s->mb_height; mb_y++) { + int block_offset = (mb_y * linesize * 8 >> s->avctx->lowres); + uint8_t *ptr = data + block_offset; + int block_idx = mb_y * s->block_stride[c]; DCTELEM (*block)[64] = &s->blocks[c][block_idx]; - uint8_t *last_nnz = &s->last_nnz[c][block_idx]; - for(mb_x = 0; mb_x < s->mb_width; mb_x++, block++, last_nnz++) { + uint8_t *last_nnz = &s->last_nnz[c][block_idx]; + for (mb_x = 0; mb_x < s->mb_width; mb_x++, block++, last_nnz++) { const int copy_mb = mb_bitmask && !get_bits1(&mb_bitmask_gb); if (!copy_mb) { - int ret; - if(Ah) - ret = decode_block_refinement(s, *block, last_nnz, s->ac_index[0], - quant_matrix, ss, se, Al, &EOBRUN); - else - ret = decode_block_progressive(s, *block, last_nnz, s->ac_index[0], - quant_matrix, ss, se, Al, &EOBRUN); - if(ret < 0) { - av_log(s->avctx, AV_LOG_ERROR, "error y=%d x=%d\n", mb_y, mb_x); - return -1; - } + int ret; + if (Ah) + ret = decode_block_refinement(s, *block, last_nnz, s->ac_index[0], + quant_matrix, ss, se, Al, &EOBRUN); + else + ret = decode_block_progressive(s, *block, last_nnz, s->ac_index[0], + quant_matrix, ss, se, Al, &EOBRUN); + if (ret < 0) { + av_log(s->avctx, AV_LOG_ERROR, + "error y=%d x=%d\n", mb_y, mb_x); + return -1; + } } - if(last_scan) { + if (last_scan) { if (copy_mb) { - mjpeg_copy_block(ptr, reference_data + block_offset, linesize, s->avctx->lowres); + mjpeg_copy_block(ptr, reference_data + block_offset, + linesize, s->avctx->lowres); } else { - s->dsp.idct_put(ptr, linesize, *block); - ptr += 8 >> s->avctx->lowres; + s->dsp.idct_put(ptr, linesize, *block); + ptr += 8 >> s->avctx->lowres; } } } @@ -966,36 +1010,36 @@ static int mjpeg_decode_scan_progressive_ac(MJpegDecodeContext *s, int ss, int s return 0; } -int ff_mjpeg_decode_sos(MJpegDecodeContext *s, - const uint8_t *mb_bitmask, const AVFrame *reference) +int ff_mjpeg_decode_sos(MJpegDecodeContext *s, const uint8_t *mb_bitmask, + const AVFrame *reference) { int len, nb_components, i, h, v, predictor, point_transform; int index, id; - const int block_size= s->lossless ? 1 : 8; + const int block_size = s->lossless ? 1 : 8; int ilv, prev_shift; /* XXX: verify len field validity */ len = get_bits(&s->gb, 16); nb_components = get_bits(&s->gb, 8); - if (nb_components == 0 || nb_components > MAX_COMPONENTS){ - av_log(s->avctx, AV_LOG_ERROR, "decode_sos: nb_components (%d) unsupported\n", nb_components); + if (nb_components == 0 || nb_components > MAX_COMPONENTS) { + av_log(s->avctx, AV_LOG_ERROR, + "decode_sos: nb_components (%d) unsupported\n", nb_components); return -1; } - if (len != 6+2*nb_components) - { + if (len != 6 + 2 * nb_components) { av_log(s->avctx, AV_LOG_ERROR, "decode_sos: invalid len (%d)\n", len); return -1; } - for(i=0;igb, 8) - 1; av_log(s->avctx, AV_LOG_DEBUG, "component: %d\n", id); /* find component index */ - for(index=0;indexnb_components;index++) + for (index = 0; index < s->nb_components; index++) if (id == s->component_id[index]) break; - if (index == s->nb_components) - { - av_log(s->avctx, AV_LOG_ERROR, "decode_sos: index(%d) out of components\n", index); + if (index == s->nb_components) { + av_log(s->avctx, AV_LOG_ERROR, + "decode_sos: index(%d) out of components\n", index); return -1; } /* Metasoft MJPEG codec has Cb and Cr swapped */ @@ -1006,8 +1050,8 @@ int ff_mjpeg_decode_sos(MJpegDecodeContext *s, s->comp_index[i] = index; s->nb_blocks[i] = s->h_count[index] * s->v_count[index]; - s->h_scount[i] = s->h_count[index]; - s->v_scount[i] = s->v_count[index]; + s->h_scount[i] = s->h_count[index]; + s->v_scount[i] = s->v_count[index]; s->dc_index[i] = get_bits(&s->gb, 4); s->ac_index[i] = get_bits(&s->gb, 4); @@ -1015,34 +1059,36 @@ int ff_mjpeg_decode_sos(MJpegDecodeContext *s, if (s->dc_index[i] < 0 || s->ac_index[i] < 0 || s->dc_index[i] >= 4 || s->ac_index[i] >= 4) goto out_of_range; - if (!s->vlcs[0][s->dc_index[i]].table || !s->vlcs[1][s->ac_index[i]].table) + if (!s->vlcs[0][s->dc_index[i]].table || + !s->vlcs[1][s->ac_index[i]].table) goto out_of_range; } - predictor= get_bits(&s->gb, 8); /* JPEG Ss / lossless JPEG predictor /JPEG-LS NEAR */ - ilv= get_bits(&s->gb, 8); /* JPEG Se / JPEG-LS ILV */ - prev_shift = get_bits(&s->gb, 4); /* Ah */ - point_transform= get_bits(&s->gb, 4); /* Al */ + predictor = get_bits(&s->gb, 8); /* JPEG Ss / lossless JPEG predictor /JPEG-LS NEAR */ + ilv = get_bits(&s->gb, 8); /* JPEG Se / JPEG-LS ILV */ + prev_shift = get_bits(&s->gb, 4); /* Ah */ + point_transform = get_bits(&s->gb, 4); /* Al */ - for(i=0;ilast_dc[i] = 1024; if (nb_components > 1) { /* interleaved stream */ s->mb_width = (s->width + s->h_max * block_size - 1) / (s->h_max * block_size); s->mb_height = (s->height + s->v_max * block_size - 1) / (s->v_max * block_size); - } else if(!s->ls) { /* skip this for JPEG-LS */ + } else if (!s->ls) { /* skip this for JPEG-LS */ h = s->h_max / s->h_scount[0]; v = s->v_max / s->v_scount[0]; - s->mb_width = (s->width + h * block_size - 1) / (h * block_size); - s->mb_height = (s->height + v * block_size - 1) / (v * block_size); + s->mb_width = (s->width + h * block_size - 1) / (h * block_size); + s->mb_height = (s->height + v * block_size - 1) / (v * block_size); s->nb_blocks[0] = 1; - s->h_scount[0] = 1; - s->v_scount[0] = 1; + s->h_scount[0] = 1; + s->v_scount[0] = 1; } - if(s->avctx->debug & FF_DEBUG_PICT_INFO) - av_log(s->avctx, AV_LOG_DEBUG, "%s %s p:%d >>:%d ilv:%d bits:%d %s\n", s->lossless ? "lossless" : "sequential DCT", s->rgb ? "RGB" : "", + if (s->avctx->debug & FF_DEBUG_PICT_INFO) + av_log(s->avctx, AV_LOG_DEBUG, "%s %s p:%d >>:%d ilv:%d bits:%d %s\n", + s->lossless ? "lossless" : "sequential DCT", s->rgb ? "RGB" : "", predictor, point_transform, ilv, s->bits, s->pegasus_rct ? "PRCT" : (s->rct ? "RCT" : "")); @@ -1051,30 +1097,31 @@ int ff_mjpeg_decode_sos(MJpegDecodeContext *s, for (i = s->mjpb_skiptosod; i > 0; i--) skip_bits(&s->gb, 8); - if(s->lossless){ - if(CONFIG_JPEGLS_DECODER && s->ls){ -// for(){ + if (s->lossless) { + if (CONFIG_JPEGLS_DECODER && s->ls) { +// for () { // reset_ls_coding_parameters(s, 0); - if(ff_jpegls_decode_picture(s, predictor, point_transform, ilv) < 0) + if (ff_jpegls_decode_picture(s, predictor, point_transform, ilv) < 0) return -1; - }else{ - if(s->rgb){ - if(ljpeg_decode_rgb_scan(s, predictor, point_transform) < 0) + } else { + if (s->rgb) { + if (ljpeg_decode_rgb_scan(s, predictor, point_transform) < 0) return -1; - }else{ - if(ljpeg_decode_yuv_scan(s, predictor, point_transform) < 0) + } else { + if (ljpeg_decode_yuv_scan(s, predictor, point_transform) < 0) return -1; } } - }else{ - if(s->progressive && predictor) { - if(mjpeg_decode_scan_progressive_ac(s, predictor, ilv, prev_shift, point_transform, - mb_bitmask, reference) < 0) + } else { + if (s->progressive && predictor) { + if (mjpeg_decode_scan_progressive_ac(s, predictor, ilv, prev_shift, + point_transform, + mb_bitmask, reference) < 0) return -1; } else { - if(mjpeg_decode_scan(s, nb_components, prev_shift, point_transform, - mb_bitmask, reference) < 0) + if (mjpeg_decode_scan(s, nb_components, prev_shift, point_transform, + mb_bitmask, reference) < 0) return -1; } } @@ -1090,8 +1137,9 @@ static int mjpeg_decode_dri(MJpegDecodeContext *s) if (get_bits(&s->gb, 16) != 4) return -1; s->restart_interval = get_bits(&s->gb, 16); - s->restart_count = 0; - av_log(s->avctx, AV_LOG_DEBUG, "restart interval: %d\n", s->restart_interval); + s->restart_count = 0; + av_log(s->avctx, AV_LOG_DEBUG, "restart interval: %d\n", + s->restart_interval); return 0; } @@ -1103,22 +1151,20 @@ static int mjpeg_decode_app(MJpegDecodeContext *s) len = get_bits(&s->gb, 16); if (len < 5) return -1; - if(8*len + get_bits_count(&s->gb) > s->gb.size_in_bits) + if (8 * len + get_bits_count(&s->gb) > s->gb.size_in_bits) return -1; - id = get_bits_long(&s->gb, 32); - id = av_be2ne32(id); + id = get_bits_long(&s->gb, 32); + id = av_be2ne32(id); len -= 6; - if(s->avctx->debug & FF_DEBUG_STARTCODE){ + if (s->avctx->debug & FF_DEBUG_STARTCODE) av_log(s->avctx, AV_LOG_DEBUG, "APPx %8X\n", id); - } /* Buggy AVID, it puts EOI only at every 10th frame. */ /* Also, this fourcc is used by non-avid files too, it holds some information, but it's always present in AVID-created files. */ - if (id == AV_RL32("AVI1")) - { + if (id == AV_RL32("AVI1")) { /* structure: 4bytes AVI1 1bytes polarity @@ -1126,12 +1172,14 @@ static int mjpeg_decode_app(MJpegDecodeContext *s) 4bytes field_size 4bytes field_size_less_padding */ - s->buggy_avid = 1; -// if (s->first_picture) -// printf("mjpeg: workarounding buggy AVID\n"); + s->buggy_avid = 1; +// if (s->first_picture) +// printf("mjpeg: workarounding buggy AVID\n"); i = get_bits(&s->gb, 8); - if (i==2) s->bottom_field= 1; - else if(i==1) s->bottom_field= 0; + if (i == 2) + s->bottom_field = 1; + else if (i == 1) + s->bottom_field = 0; #if 0 skip_bits(&s->gb, 8); skip_bits(&s->gb, 32); @@ -1145,63 +1193,61 @@ static int mjpeg_decode_app(MJpegDecodeContext *s) // len -= 2; - if (id == AV_RL32("JFIF")) - { + if (id == AV_RL32("JFIF")) { int t_w, t_h, v1, v2; skip_bits(&s->gb, 8); /* the trailing zero-byte */ - v1= get_bits(&s->gb, 8); - v2= get_bits(&s->gb, 8); + v1 = get_bits(&s->gb, 8); + v2 = get_bits(&s->gb, 8); skip_bits(&s->gb, 8); - s->avctx->sample_aspect_ratio.num= get_bits(&s->gb, 16); - s->avctx->sample_aspect_ratio.den= get_bits(&s->gb, 16); + s->avctx->sample_aspect_ratio.num = get_bits(&s->gb, 16); + s->avctx->sample_aspect_ratio.den = get_bits(&s->gb, 16); if (s->avctx->debug & FF_DEBUG_PICT_INFO) - av_log(s->avctx, AV_LOG_INFO, "mjpeg: JFIF header found (version: %x.%x) SAR=%d/%d\n", - v1, v2, - s->avctx->sample_aspect_ratio.num, - s->avctx->sample_aspect_ratio.den - ); + av_log(s->avctx, AV_LOG_INFO, + "mjpeg: JFIF header found (version: %x.%x) SAR=%d/%d\n", + v1, v2, + s->avctx->sample_aspect_ratio.num, + s->avctx->sample_aspect_ratio.den); t_w = get_bits(&s->gb, 8); t_h = get_bits(&s->gb, 8); - if (t_w && t_h) - { + if (t_w && t_h) { /* skip thumbnail */ - if (len-10-(t_w*t_h*3) > 0) - len -= t_w*t_h*3; + if (len -10 - (t_w * t_h * 3) > 0) + len -= t_w * t_h * 3; } len -= 10; goto out; } - if (id == AV_RL32("Adob") && (get_bits(&s->gb, 8) == 'e')) - { + if (id == AV_RL32("Adob") && (get_bits(&s->gb, 8) == 'e')) { if (s->avctx->debug & FF_DEBUG_PICT_INFO) av_log(s->avctx, AV_LOG_INFO, "mjpeg: Adobe header found\n"); skip_bits(&s->gb, 16); /* version */ skip_bits(&s->gb, 16); /* flags0 */ skip_bits(&s->gb, 16); /* flags1 */ - skip_bits(&s->gb, 8); /* transform */ + skip_bits(&s->gb, 8); /* transform */ len -= 7; goto out; } - if (id == AV_RL32("LJIF")){ + if (id == AV_RL32("LJIF")) { if (s->avctx->debug & FF_DEBUG_PICT_INFO) - av_log(s->avctx, AV_LOG_INFO, "Pegasus lossless jpeg header found\n"); + av_log(s->avctx, AV_LOG_INFO, + "Pegasus lossless jpeg header found\n"); skip_bits(&s->gb, 16); /* version ? */ skip_bits(&s->gb, 16); /* unknwon always 0? */ skip_bits(&s->gb, 16); /* unknwon always 0? */ skip_bits(&s->gb, 16); /* unknwon always 0? */ - switch( get_bits(&s->gb, 8)){ + switch (get_bits(&s->gb, 8)) { case 1: - s->rgb= 1; - s->pegasus_rct=0; + s->rgb = 1; + s->pegasus_rct = 0; break; case 2: - s->rgb= 1; - s->pegasus_rct=1; + s->rgb = 1; + s->pegasus_rct = 1; break; default: av_log(s->avctx, AV_LOG_ERROR, "unknown colorspace\n"); @@ -1211,13 +1257,12 @@ static int mjpeg_decode_app(MJpegDecodeContext *s) } /* Apple MJPEG-A */ - if ((s->start_code == APP1) && (len > (0x28 - 8))) - { - id = get_bits_long(&s->gb, 32); - id = av_be2ne32(id); + if ((s->start_code == APP1) && (len > (0x28 - 8))) { + id = get_bits_long(&s->gb, 32); + id = av_be2ne32(id); len -= 4; - if (id == AV_RL32("mjpg")) /* Apple MJPEG-A */ - { + /* Apple MJPEG-A */ + if (id == AV_RL32("mjpg")) { #if 0 skip_bits(&s->gb, 32); /* field size */ skip_bits(&s->gb, 32); /* pad field size */ @@ -1236,8 +1281,9 @@ static int mjpeg_decode_app(MJpegDecodeContext *s) out: /* slow but needed for extreme adobe jpegs */ if (len < 0) - av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error, decode_app parser read over the end\n"); - while(--len > 0) + av_log(s->avctx, AV_LOG_ERROR, + "mjpeg: error, decode_app parser read over the end\n"); + while (--len > 0) skip_bits(&s->gb, 8); return 0; @@ -1246,34 +1292,31 @@ out: static int mjpeg_decode_com(MJpegDecodeContext *s) { int len = get_bits(&s->gb, 16); - if (len >= 2 && 8*len - 16 + get_bits_count(&s->gb) <= s->gb.size_in_bits) { + if (len >= 2 && + 8 * len - 16 + get_bits_count(&s->gb) <= s->gb.size_in_bits) { char *cbuf = av_malloc(len - 1); if (cbuf) { int i; for (i = 0; i < len - 2; i++) cbuf[i] = get_bits(&s->gb, 8); - if (i > 0 && cbuf[i-1] == '\n') - cbuf[i-1] = 0; + if (i > 0 && cbuf[i - 1] == '\n') + cbuf[i - 1] = 0; else cbuf[i] = 0; - if(s->avctx->debug & FF_DEBUG_PICT_INFO) + if (s->avctx->debug & FF_DEBUG_PICT_INFO) av_log(s->avctx, AV_LOG_INFO, "mjpeg comment: '%s'\n", cbuf); /* buggy avid, it puts EOI only at every 10th frame */ - if (!strcmp(cbuf, "AVID")) - { + if (!strcmp(cbuf, "AVID")) { s->buggy_avid = 1; - // if (s->first_picture) - // printf("mjpeg: workarounding buggy AVID\n"); - } - else if(!strcmp(cbuf, "CS=ITU601")){ - s->cs_itu601= 1; - } - else if((len > 20 && !strncmp(cbuf, "Intel(R) JPEG Library", 21)) || - (len > 19 && !strncmp(cbuf, "Metasoft MJPEG Codec", 20))){ + // if (s->first_picture) + // printf("mjpeg: workarounding buggy AVID\n"); + } else if (!strcmp(cbuf, "CS=ITU601")) + s->cs_itu601 = 1; + else if ((len > 20 && !strncmp(cbuf, "Intel(R) JPEG Library", 21)) || + (len > 19 && !strncmp(cbuf, "Metasoft MJPEG Codec", 20))) s->flipped = 1; - } av_free(cbuf); } @@ -1290,12 +1333,12 @@ static int find_marker(const uint8_t **pbuf_ptr, const uint8_t *buf_end) unsigned int v, v2; int val; #ifdef DEBUG - int skipped=0; + int skipped = 0; #endif buf_ptr = *pbuf_ptr; while (buf_ptr < buf_end) { - v = *buf_ptr++; + v = *buf_ptr++; v2 = *buf_ptr; if ((v == 0xff) && (v2 >= 0xc0) && (v2 <= 0xfe) && buf_ptr < buf_end) { val = *buf_ptr++; @@ -1314,105 +1357,97 @@ found: int ff_mjpeg_find_marker(MJpegDecodeContext *s, const uint8_t **buf_ptr, const uint8_t *buf_end, - const uint8_t **unescaped_buf_ptr, int *unescaped_buf_size) + const uint8_t **unescaped_buf_ptr, + int *unescaped_buf_size) { int start_code; start_code = find_marker(buf_ptr, buf_end); - if ((buf_end - *buf_ptr) > s->buffer_size) - { - av_free(s->buffer); - s->buffer_size = buf_end - *buf_ptr; - s->buffer = av_malloc(s->buffer_size + FF_INPUT_BUFFER_PADDING_SIZE); - av_log(s->avctx, AV_LOG_DEBUG, "buffer too small, expanding to %d bytes\n", - s->buffer_size); - } + if ((buf_end - *buf_ptr) > s->buffer_size) { + av_free(s->buffer); + s->buffer_size = buf_end - *buf_ptr; + s->buffer = av_malloc(s->buffer_size + FF_INPUT_BUFFER_PADDING_SIZE); + av_log(s->avctx, AV_LOG_DEBUG, + "buffer too small, expanding to %d bytes\n", s->buffer_size); + } - /* unescape buffer of SOS, use special treatment for JPEG-LS */ - if (start_code == SOS && !s->ls) - { - const uint8_t *src = *buf_ptr; - uint8_t *dst = s->buffer; + /* unescape buffer of SOS, use special treatment for JPEG-LS */ + if (start_code == SOS && !s->ls) { + const uint8_t *src = *buf_ptr; + uint8_t *dst = s->buffer; - while (srcavctx->codec_id != CODEC_ID_THP) { + if (x == 0xff) { + while (src < buf_end && x == 0xff) + x = *(src++); + + if (x >= 0xd0 && x <= 0xd7) *(dst++) = x; - if (s->avctx->codec_id != CODEC_ID_THP) - { - if (x == 0xff) { - while (src < buf_end && x == 0xff) - x = *(src++); - - if (x >= 0xd0 && x <= 0xd7) - *(dst++) = x; - else if (x) - break; - } - } - } - *unescaped_buf_ptr = s->buffer; - *unescaped_buf_size = dst - s->buffer; - - av_log(s->avctx, AV_LOG_DEBUG, "escaping removed %td bytes\n", - (buf_end - *buf_ptr) - (dst - s->buffer)); + else if (x) + break; } - else if(start_code == SOS && s->ls){ - const uint8_t *src = *buf_ptr; - uint8_t *dst = s->buffer; - int bit_count = 0; - int t = 0, b = 0; - PutBitContext pb; + } + } + *unescaped_buf_ptr = s->buffer; + *unescaped_buf_size = dst - s->buffer; - s->cur_scan++; + av_log(s->avctx, AV_LOG_DEBUG, "escaping removed %td bytes\n", + (buf_end - *buf_ptr) - (dst - s->buffer)); + } else if (start_code == SOS && s->ls) { + const uint8_t *src = *buf_ptr; + uint8_t *dst = s->buffer; + int bit_count = 0; + int t = 0, b = 0; + PutBitContext pb; - /* find marker */ - while (src + t < buf_end){ - uint8_t x = src[t++]; - if (x == 0xff){ - while((src + t < buf_end) && x == 0xff) - x = src[t++]; - if (x & 0x80) { - t -= 2; - break; - } - } - } - bit_count = t * 8; + s->cur_scan++; - init_put_bits(&pb, dst, t); - - /* unescape bitstream */ - while(b < t){ - uint8_t x = src[b++]; - put_bits(&pb, 8, x); - if(x == 0xFF){ - x = src[b++]; - put_bits(&pb, 7, x); - bit_count--; - } - } - flush_put_bits(&pb); - - *unescaped_buf_ptr = dst; - *unescaped_buf_size = (bit_count + 7) >> 3; - } - else - { - *unescaped_buf_ptr = *buf_ptr; - *unescaped_buf_size = buf_end - *buf_ptr; + /* find marker */ + while (src + t < buf_end) { + uint8_t x = src[t++]; + if (x == 0xff) { + while ((src + t < buf_end) && x == 0xff) + x = src[t++]; + if (x & 0x80) { + t -= 2; + break; } + } + } + bit_count = t * 8; + init_put_bits(&pb, dst, t); + + /* unescape bitstream */ + while (b < t) { + uint8_t x = src[b++]; + put_bits(&pb, 8, x); + if (x == 0xFF) { + x = src[b++]; + put_bits(&pb, 7, x); + bit_count--; + } + } + flush_put_bits(&pb); + + *unescaped_buf_ptr = dst; + *unescaped_buf_size = (bit_count + 7) >> 3; + } else { + *unescaped_buf_ptr = *buf_ptr; + *unescaped_buf_size = buf_end - *buf_ptr; + } return start_code; } -int ff_mjpeg_decode_frame(AVCodecContext *avctx, - void *data, int *data_size, - AVPacket *avpkt) +int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *data_size, + AVPacket *avpkt) { const uint8_t *buf = avpkt->data; - int buf_size = avpkt->size; + int buf_size = avpkt->size; MJpegDecodeContext *s = avctx->priv_data; const uint8_t *buf_end, *buf_ptr; const uint8_t *unescaped_buf_ptr; @@ -1426,149 +1461,155 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, while (buf_ptr < buf_end) { /* find start next marker */ start_code = ff_mjpeg_find_marker(s, &buf_ptr, buf_end, - &unescaped_buf_ptr, &unescaped_buf_size); - { - /* EOF */ - if (start_code < 0) { - goto the_end; - } else { - av_log(avctx, AV_LOG_DEBUG, "marker=%x avail_size_in_buf=%td\n", start_code, buf_end - buf_ptr); + &unescaped_buf_ptr, + &unescaped_buf_size); + /* EOF */ + if (start_code < 0) { + goto the_end; + } else { + av_log(avctx, AV_LOG_DEBUG, "marker=%x avail_size_in_buf=%td\n", + start_code, buf_end - buf_ptr); - init_get_bits(&s->gb, unescaped_buf_ptr, unescaped_buf_size*8); + init_get_bits(&s->gb, unescaped_buf_ptr, unescaped_buf_size * 8); - s->start_code = start_code; - if(s->avctx->debug & FF_DEBUG_STARTCODE){ - av_log(avctx, AV_LOG_DEBUG, "startcode: %X\n", start_code); + s->start_code = start_code; + if (s->avctx->debug & FF_DEBUG_STARTCODE) + av_log(avctx, AV_LOG_DEBUG, "startcode: %X\n", start_code); + + /* process markers */ + if (start_code >= 0xd0 && start_code <= 0xd7) + av_log(avctx, AV_LOG_DEBUG, + "restart marker: %d\n", start_code & 0x0f); + /* APP fields */ + else if (start_code >= APP0 && start_code <= APP15) + mjpeg_decode_app(s); + /* Comment */ + else if (start_code == COM) + mjpeg_decode_com(s); + + switch (start_code) { + case SOI: + s->restart_interval = 0; + s->restart_count = 0; + /* nothing to do on SOI */ + break; + case DQT: + ff_mjpeg_decode_dqt(s); + break; + case DHT: + if (ff_mjpeg_decode_dht(s) < 0) { + av_log(avctx, AV_LOG_ERROR, "huffman table decode error\n"); + return -1; } - - /* process markers */ - if (start_code >= 0xd0 && start_code <= 0xd7) { - av_log(avctx, AV_LOG_DEBUG, "restart marker: %d\n", start_code&0x0f); - /* APP fields */ - } else if (start_code >= APP0 && start_code <= APP15) { - mjpeg_decode_app(s); - /* Comment */ - } else if (start_code == COM){ - mjpeg_decode_com(s); - } - - switch(start_code) { - case SOI: - s->restart_interval = 0; - - s->restart_count = 0; - /* nothing to do on SOI */ + break; + case SOF0: + case SOF1: + s->lossless = 0; + s->ls = 0; + s->progressive = 0; + if (ff_mjpeg_decode_sof(s) < 0) + return -1; + break; + case SOF2: + s->lossless = 0; + s->ls = 0; + s->progressive = 1; + if (ff_mjpeg_decode_sof(s) < 0) + return -1; + break; + case SOF3: + s->lossless = 1; + s->ls = 0; + s->progressive = 0; + if (ff_mjpeg_decode_sof(s) < 0) + return -1; + break; + case SOF48: + s->lossless = 1; + s->ls = 1; + s->progressive = 0; + if (ff_mjpeg_decode_sof(s) < 0) + return -1; + break; + case LSE: + if (!CONFIG_JPEGLS_DECODER || ff_jpegls_decode_lse(s) < 0) + return -1; + break; + case EOI: + s->cur_scan = 0; + if ((s->buggy_avid && !s->interlaced) || s->restart_interval) break; - case DQT: - ff_mjpeg_decode_dqt(s); - break; - case DHT: - if(ff_mjpeg_decode_dht(s) < 0){ - av_log(avctx, AV_LOG_ERROR, "huffman table decode error\n"); - return -1; - } - break; - case SOF0: - case SOF1: - s->lossless=0; - s->ls=0; - s->progressive=0; - if (ff_mjpeg_decode_sof(s) < 0) - return -1; - break; - case SOF2: - s->lossless=0; - s->ls=0; - s->progressive=1; - if (ff_mjpeg_decode_sof(s) < 0) - return -1; - break; - case SOF3: - s->lossless=1; - s->ls=0; - s->progressive=0; - if (ff_mjpeg_decode_sof(s) < 0) - return -1; - break; - case SOF48: - s->lossless=1; - s->ls=1; - s->progressive=0; - if (ff_mjpeg_decode_sof(s) < 0) - return -1; - break; - case LSE: - if (!CONFIG_JPEGLS_DECODER || ff_jpegls_decode_lse(s) < 0) - return -1; - break; - case EOI: - s->cur_scan = 0; - if ((s->buggy_avid && !s->interlaced) || s->restart_interval) - break; eoi_parser: - if (!s->got_picture) { - av_log(avctx, AV_LOG_WARNING, "Found EOI before any SOF, ignoring\n"); - break; + if (!s->got_picture) { + av_log(avctx, AV_LOG_WARNING, + "Found EOI before any SOF, ignoring\n"); + break; } - if (s->interlaced) { - s->bottom_field ^= 1; - /* if not bottom field, do not output image yet */ - if (s->bottom_field == !s->interlace_polarity) - goto not_the_end; + if (s->interlaced) { + s->bottom_field ^= 1; + /* if not bottom field, do not output image yet */ + if (s->bottom_field == !s->interlace_polarity) + goto not_the_end; } - *picture = *s->picture_ptr; + *picture = *s->picture_ptr; *data_size = sizeof(AVFrame); - if(!s->lossless){ - picture->quality= FFMAX3(s->qscale[0], s->qscale[1], s->qscale[2]); - picture->qstride= 0; - picture->qscale_table= s->qscale_table; - memset(picture->qscale_table, picture->quality, (s->width+15)/16); - if(avctx->debug & FF_DEBUG_QP) - av_log(avctx, AV_LOG_DEBUG, "QP: %d\n", picture->quality); - picture->quality*= FF_QP2LAMBDA; + if (!s->lossless) { + picture->quality = FFMAX3(s->qscale[0], + s->qscale[1], + s->qscale[2]); + picture->qstride = 0; + picture->qscale_table = s->qscale_table; + memset(picture->qscale_table, picture->quality, + (s->width + 15) / 16); + if (avctx->debug & FF_DEBUG_QP) + av_log(avctx, AV_LOG_DEBUG, + "QP: %d\n", picture->quality); + picture->quality *= FF_QP2LAMBDA; } - goto the_end; - case SOS: - if (!s->got_picture) { - av_log(avctx, AV_LOG_WARNING, "Can not process SOS before SOF, skipping\n"); - break; + goto the_end; + case SOS: + if (!s->got_picture) { + av_log(avctx, AV_LOG_WARNING, + "Can not process SOS before SOF, skipping\n"); + break; } - if (ff_mjpeg_decode_sos(s, NULL, NULL) < 0 && - (avctx->err_recognition & AV_EF_EXPLODE)) - return AVERROR_INVALIDDATA; - /* buggy avid puts EOI every 10-20th frame */ - /* if restart period is over process EOI */ - if ((s->buggy_avid && !s->interlaced) || s->restart_interval) - goto eoi_parser; - break; - case DRI: - mjpeg_decode_dri(s); - break; - case SOF5: - case SOF6: - case SOF7: - case SOF9: - case SOF10: - case SOF11: - case SOF13: - case SOF14: - case SOF15: - case JPG: - av_log(avctx, AV_LOG_ERROR, "mjpeg: unsupported coding type (%x)\n", start_code); - break; -// default: -// printf("mjpeg: unsupported marker (%x)\n", start_code); -// break; - } + if (ff_mjpeg_decode_sos(s, NULL, NULL) < 0 && + (avctx->err_recognition & AV_EF_EXPLODE)) + return AVERROR_INVALIDDATA; + /* buggy avid puts EOI every 10-20th frame */ + /* if restart period is over process EOI */ + if ((s->buggy_avid && !s->interlaced) || s->restart_interval) + goto eoi_parser; + break; + case DRI: + mjpeg_decode_dri(s); + break; + case SOF5: + case SOF6: + case SOF7: + case SOF9: + case SOF10: + case SOF11: + case SOF13: + case SOF14: + case SOF15: + case JPG: + av_log(avctx, AV_LOG_ERROR, + "mjpeg: unsupported coding type (%x)\n", start_code); + break; +// default: +// printf("mjpeg: unsupported marker (%x)\n", start_code); +// break; + } not_the_end: - /* eof process start code */ - buf_ptr += (get_bits_count(&s->gb)+7)/8; - av_log(avctx, AV_LOG_DEBUG, "marker parser used %d bytes (%d bits)\n", - (get_bits_count(&s->gb)+7)/8, get_bits_count(&s->gb)); - } + /* eof process start code */ + buf_ptr += (get_bits_count(&s->gb) + 7) / 8; + av_log(avctx, AV_LOG_DEBUG, + "marker parser used %d bytes (%d bits)\n", + (get_bits_count(&s->gb) + 7) / 8, get_bits_count(&s->gb)); } } if (s->got_picture) { @@ -1578,8 +1619,9 @@ not_the_end: av_log(avctx, AV_LOG_FATAL, "No JPEG data found in image\n"); return -1; the_end: - av_log(avctx, AV_LOG_DEBUG, "mjpeg decode frame unused %td bytes\n", buf_end - buf_ptr); -// return buf_end - buf_ptr; + av_log(avctx, AV_LOG_DEBUG, "mjpeg decode frame unused %td bytes\n", + buf_end - buf_ptr); +// return buf_end - buf_ptr; return buf_ptr - buf; } @@ -1594,13 +1636,13 @@ av_cold int ff_mjpeg_decode_end(AVCodecContext *avctx) av_free(s->buffer); av_free(s->qscale_table); av_freep(&s->ljpeg_buffer); - s->ljpeg_buffer_size=0; + s->ljpeg_buffer_size = 0; - for(i=0;i<3;i++) { - for(j=0;j<4;j++) + for (i = 0; i < 3; i++) { + for (j = 0; j < 4; j++) free_vlc(&s->vlcs[i][j]); } - for(i=0; iblocks[i]); av_freep(&s->last_nnz[i]); } @@ -1610,7 +1652,8 @@ av_cold int ff_mjpeg_decode_end(AVCodecContext *avctx) #define OFFSET(x) offsetof(MJpegDecodeContext, x) #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM static const AVOption options[] = { - { "extern_huff", "Use external huffman table.", OFFSET(extern_huff), AV_OPT_TYPE_INT, { 0 }, 0, 1, VD }, + { "extern_huff", "Use external huffman table.", + OFFSET(extern_huff), AV_OPT_TYPE_INT, { 0 }, 0, 1, VD }, { NULL }, }; @@ -1630,8 +1673,8 @@ AVCodec ff_mjpeg_decoder = { .close = ff_mjpeg_decode_end, .decode = ff_mjpeg_decode_frame, .capabilities = CODEC_CAP_DR1, - .max_lowres = 3, - .long_name = NULL_IF_CONFIG_SMALL("MJPEG (Motion JPEG)"), + .max_lowres = 3, + .long_name = NULL_IF_CONFIG_SMALL("MJPEG (Motion JPEG)"), .priv_class = &mjpegdec_class, }; @@ -1644,6 +1687,6 @@ AVCodec ff_thp_decoder = { .close = ff_mjpeg_decode_end, .decode = ff_mjpeg_decode_frame, .capabilities = CODEC_CAP_DR1, - .max_lowres = 3, - .long_name = NULL_IF_CONFIG_SMALL("Nintendo Gamecube THP video"), + .max_lowres = 3, + .long_name = NULL_IF_CONFIG_SMALL("Nintendo Gamecube THP video"), }; From 599855748a93dc8bdaf5a5b3ac681e9a7026bc04 Mon Sep 17 00:00:00 2001 From: Mike Melanson Date: Sat, 7 Jan 2012 19:59:09 -0800 Subject: [PATCH 05/19] FATE: test to exercise WTV demuxer. Does not attempt to decode perceptual codecs inside. Code coverage: libavformat/wtv.c: 0% -> 71% Signed-off-by: Ronald S. Bultje --- tests/fate/demux.mak | 3 + tests/ref/fate/wtv-demux | 139 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 tests/ref/fate/wtv-demux diff --git a/tests/fate/demux.mak b/tests/fate/demux.mak index 1e7d781e3c..b40e4b7a01 100644 --- a/tests/fate/demux.mak +++ b/tests/fate/demux.mak @@ -82,5 +82,8 @@ fate-siff: CMD = framecrc -i $(SAMPLES)/SIFF/INTRO_B.VB -t 3 -pix_fmt rgb24 FATE_TESTS += fate-westwood-aud fate-westwood-aud: CMD = md5 -i $(SAMPLES)/westwood-aud/excellent.aud -f s16le +FATE_TESTS += fate-wtv-demux +fate-wtv-demux: CMD = framecrc -i $(SAMPLES)/wtv/law-and-order-partial.wtv -vcodec copy -acodec copy + FATE_TESTS += fate-xmv-demux fate-xmv-demux: CMD = framecrc -i $(SAMPLES)/xmv/logos1p.fmv -vcodec copy -acodec copy diff --git a/tests/ref/fate/wtv-demux b/tests/ref/fate/wtv-demux new file mode 100644 index 0000000000..b3744ee2d1 --- /dev/null +++ b/tests/ref/fate/wtv-demux @@ -0,0 +1,139 @@ +1, 0, 576, 0x9b6e1638 +1, 1620, 576, 0x0ca91183 +1, 3780, 576, 0xec6a180f +1, 5940, 576, 0x478a2b9b +1, 8100, 576, 0x00fa15b3 +1, 10260, 576, 0xfb551816 +1, 12960, 576, 0x422e12bd +1, 15120, 576, 0xa7581b29 +1, 17280, 576, 0xd4b31a74 +1, 19440, 576, 0x11521b10 +1, 21600, 576, 0x3dcc1474 +1, 23760, 576, 0x66c31aab +1, 25920, 576, 0x97f318a8 +1, 28080, 576, 0xd3fb1a30 +1, 30240, 576, 0xd2bd16af +1, 32400, 576, 0x6c10146a +1, 34560, 576, 0x10d81468 +1, 36720, 576, 0x3813162d +1, 38880, 576, 0x89e71d95 +1, 41040, 576, 0xd1c717f9 +1, 43200, 576, 0x1a311e5f +1, 45360, 576, 0x0ea80e05 +1, 47520, 576, 0x2f1718f2 +1, 49680, 576, 0xffe01e13 +1, 51840, 576, 0xa7b02296 +1, 54000, 576, 0x199f1597 +1, 56160, 576, 0xdea217ba +1, 58320, 576, 0x8a790f01 +1, 60480, 576, 0x23e80038 +1, 62640, 576, 0x75dc048a +1, 64800, 576, 0xeb4b0d93 +1, 66960, 576, 0xde1322f5 +1, 69120, 576, 0xc3131f35 +1, 71280, 576, 0x708f1381 +1, 73440, 576, 0x1f00137e +0, 74578, 41980, 0xd4920915 +1, 75600, 576, 0x05131eb0 +1, 77760, 576, 0x78151c22 +0, 78178, 7228, 0x1b141fa3 +1, 79920, 576, 0x31771239 +0, 81777, 7492, 0x1a47f3e4 +1, 82080, 576, 0x3ce4097c +1, 84240, 576, 0x180e15f4 +0, 85378, 25068, 0xcb70a744 +1, 86400, 576, 0x30db0604 +1, 88560, 576, 0x9b290284 +0, 88978, 7212, 0x0ab9f558 +1, 90720, 576, 0xcf340753 +0, 92578, 7612, 0xa93054f0 +1, 92880, 576, 0xdaa41457 +1, 95040, 576, 0x34d310a2 +0, 96177, 22868, 0xa77db64a +1, 97200, 576, 0x58b31010 +1, 99360, 576, 0x19610f54 +0, 99778, 6260, 0x6cf76411 +1, 101520, 576, 0x17762352 +0, 103377, 6156, 0xe168394b +1, 103680, 576, 0x1fea1448 +1, 105840, 576, 0x55840a01 +0, 106977, 23364, 0x53164f1e +1, 108000, 576, 0x6c9c24ce +1, 110160, 576, 0x955f1e97 +0, 110578, 6708, 0x89877269 +1, 112320, 576, 0x2827134f +0, 114178, 6908, 0x8d62a249 +1, 114480, 576, 0x34a01c29 +1, 116640, 576, 0x7d351e52 +0, 117778, 38156, 0xec41f682 +1, 118800, 576, 0x00c91d9e +1, 120960, 576, 0x57ea1a97 +0, 121377, 5764, 0xcc04534b +1, 123120, 576, 0xef3a1c74 +0, 124977, 5388, 0xb8a1c3c5 +1, 125280, 576, 0x11fc217d +1, 127440, 576, 0x59ce20e5 +0, 128578, 16764, 0x59460d96 +1, 129600, 576, 0xaafc1dbf +1, 131760, 576, 0xdd941609 +0, 132177, 5548, 0x5c91e93d +1, 133920, 576, 0x900420b0 +0, 135777, 5652, 0x5e321aed +1, 136080, 576, 0x5f4f1aa1 +1, 138240, 576, 0x7d7e18de +0, 139377, 15564, 0xefdf5080 +1, 140400, 576, 0x986c0d9d +1, 142560, 576, 0xcb4c21c0 +0, 142977, 6492, 0xd1d5c5f8 +1, 144720, 576, 0xbcfb1e8b +0, 146577, 5604, 0xf9472b44 +1, 146880, 576, 0xcb541b4c +1, 149040, 576, 0x980426e9 +0, 150177, 17924, 0x45815b7b +1, 151200, 576, 0x09d00aa0 +1, 153360, 576, 0xad591374 +0, 153778, 5020, 0x3cc5e554 +1, 155520, 576, 0x97bf1461 +0, 157378, 5276, 0xa0554c12 +1, 157680, 576, 0xdc871cc4 +1, 159840, 576, 0x56781896 +0, 160977, 31460, 0x5765eb5f +1, 162000, 576, 0xc77714e3 +1, 164160, 576, 0x280e18d4 +0, 164577, 4972, 0x91adbab7 +1, 166320, 576, 0xbc0d2302 +0, 168178, 5580, 0xfea707cb +1, 168480, 576, 0x79191384 +1, 170640, 576, 0x65481c97 +0, 171778, 17412, 0x0afe4d27 +1, 172800, 576, 0xc94d227d +1, 174960, 576, 0xa68a1f14 +0, 175378, 5236, 0x03f55309 +1, 177120, 576, 0x6af11a5c +0, 178977, 4924, 0x558e753c +1, 179280, 576, 0x4d1019ef +1, 181440, 576, 0x3b1b17b5 +0, 182577, 15396, 0xf145d121 +1, 183600, 576, 0xcdd8159f +1, 185760, 576, 0x97cd1d06 +0, 186177, 4708, 0x43066a92 +1, 187920, 576, 0x5d1b1123 +0, 189778, 4332, 0x9e22bcba +1, 190080, 576, 0x888d0cb0 +1, 192240, 576, 0x556e1dad +0, 193377, 12876, 0x46ff9ef4 +1, 194400, 576, 0xf7af0bce +1, 196560, 576, 0xb5da160a +0, 196978, 5940, 0x27cba62e +1, 198720, 576, 0x4a8d0e98 +0, 200578, 6124, 0x6bab0a6d +1, 200880, 576, 0x183b1c7e +1, 203040, 576, 0xc47120e6 +0, 204178, 36428, 0x942f9648 +1, 205200, 576, 0xb1f31346 +0, 207777, 6660, 0x545a0db7 +0, 211377, 6780, 0x2d1d4189 +0, 214978, 16460, 0x7c3b3ca4 +0, 218578, 6724, 0x8538cc6f +0, 222178, 7068, 0x69574fd0 +0, 225777, 19552, 0xf230e854 From 2170a0e6add6bb0a6fbdf689b82361c21d9b72be Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sun, 8 Jan 2012 16:37:43 -0800 Subject: [PATCH 06/19] swscale: convert yuy2/uyvy/nv12/nv21ToY/UV from inline asm to yasm. Also implement SSE2/AVX variants. --- libswscale/Makefile | 3 +- libswscale/x86/input.asm | 242 ++++++++++++++++++++++++++++++ libswscale/x86/swscale_mmx.c | 83 ++++++++++ libswscale/x86/swscale_template.c | 163 -------------------- 4 files changed, 327 insertions(+), 164 deletions(-) create mode 100644 libswscale/x86/input.asm diff --git a/libswscale/Makefile b/libswscale/Makefile index 89182573f0..bef4200c59 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -17,7 +17,8 @@ OBJS-$(HAVE_MMX) += x86/rgb2rgb.o \ x86/swscale_mmx.o \ x86/yuv2rgb_mmx.o OBJS-$(HAVE_VIS) += sparc/yuv2rgb_vis.o -MMX-OBJS-$(HAVE_YASM) += x86/output.o \ +MMX-OBJS-$(HAVE_YASM) += x86/input.o \ + x86/output.o \ x86/scale.o TESTPROGS = colorspace swscale diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm new file mode 100644 index 0000000000..4bdb575765 --- /dev/null +++ b/libswscale/x86/input.asm @@ -0,0 +1,242 @@ +;****************************************************************************** +;* x86-optimized input routines; does shuffling of packed +;* YUV formats into individual planes, and converts RGB +;* into YUV planes also. +;* Copyright (c) 2012 Ronald S. Bultje +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86inc.asm" +%include "x86util.asm" + +SECTION_RODATA + +SECTION .text + +;----------------------------------------------------------------------------- +; YUYV/UYVY/NV12/NV21 packed pixel shuffling. +; +; void ToY_(uint8_t *dst, const uint8_t *src, int w); +; and +; void toUV_(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, +; const uint8_t *unused, int w); +;----------------------------------------------------------------------------- + +; %1 = a (aligned) or u (unaligned) +; %2 = yuyv or uyvy +%macro LOOP_YUYV_TO_Y 2 +.loop_%1: + mov%1 m0, [srcq+wq*2] ; (byte) { Y0, U0, Y1, V0, ... } + mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { Y8, U4, Y9, V4, ... } +%ifidn %2, yuyv + pand m0, m2 ; (word) { Y0, Y1, ..., Y7 } + pand m1, m2 ; (word) { Y8, Y9, ..., Y15 } +%else ; uyvy + psrlw m0, 8 ; (word) { Y0, Y1, ..., Y7 } + psrlw m1, 8 ; (word) { Y8, Y9, ..., Y15 } +%endif ; yuyv/uyvy + packuswb m0, m1 ; (byte) { Y0, ..., Y15 } + mova [dstq+wq], m0 + add wq, mmsize + jl .loop_%1 + REP_RET +%endmacro + +; %1 = nr. of XMM registers +; %2 = yuyv or uyvy +; %3 = if specified, it means that unaligned and aligned code in loop +; will be the same (i.e. YUYV+AVX), and thus we don't need to +; split the loop in an aligned and unaligned case +%macro YUYV_TO_Y_FN 2-3 +cglobal %2ToY, 3, 3, %1, dst, src, w +%ifdef ARCH_X86_64 + movsxd wq, wd +%endif + add dstq, wq +%if mmsize == 16 + test srcq, 15 +%endif + lea srcq, [srcq+wq*2] +%ifidn %2, yuyv + pcmpeqb m2, m2 ; (byte) { 0xff } x 16 + psrlw m2, 8 ; (word) { 0x00ff } x 8 +%endif ; yuyv +%if mmsize == 16 + jnz .loop_u_start + neg wq + LOOP_YUYV_TO_Y a, %2 +.loop_u_start: + neg wq + LOOP_YUYV_TO_Y u, %2 +%else ; mmsize == 8 + neg wq + LOOP_YUYV_TO_Y a, %2 +%endif ; mmsize == 8/16 +%endmacro + +; %1 = a (aligned) or u (unaligned) +; %2 = yuyv or uyvy +%macro LOOP_YUYV_TO_UV 2 +.loop_%1: +%ifidn %2, yuyv + mov%1 m0, [srcq+wq*4] ; (byte) { Y0, U0, Y1, V0, ... } + mov%1 m1, [srcq+wq*4+mmsize] ; (byte) { Y8, U4, Y9, V4, ... } + psrlw m0, 8 ; (word) { U0, V0, ..., U3, V3 } + psrlw m1, 8 ; (word) { U4, V4, ..., U7, V7 } +%else ; uyvy +%if cpuflag(avx) + vpand m0, m2, [srcq+wq*4] ; (word) { U0, V0, ..., U3, V3 } + vpand m1, m2, [srcq+wq*4+mmsize] ; (word) { U4, V4, ..., U7, V7 } +%else + mov%1 m0, [srcq+wq*4] ; (byte) { Y0, U0, Y1, V0, ... } + mov%1 m1, [srcq+wq*4+mmsize] ; (byte) { Y8, U4, Y9, V4, ... } + pand m0, m2 ; (word) { U0, V0, ..., U3, V3 } + pand m1, m2 ; (word) { U4, V4, ..., U7, V7 } +%endif +%endif ; yuyv/uyvy + packuswb m0, m1 ; (byte) { U0, V0, ..., U7, V7 } + pand m1, m0, m2 ; (word) { U0, U1, ..., U7 } + psrlw m0, 8 ; (word) { V0, V1, ..., V7 } +%if mmsize == 16 + packuswb m1, m0 ; (byte) { U0, ... U7, V1, ... V7 } + movh [dstUq+wq], m1 + movhps [dstVq+wq], m1 +%else ; mmsize == 8 + packuswb m1, m1 ; (byte) { U0, ... U3 } + packuswb m0, m0 ; (byte) { V0, ... V3 } + movh [dstUq+wq], m1 + movh [dstVq+wq], m0 +%endif ; mmsize == 8/16 + add wq, mmsize / 2 + jl .loop_%1 + REP_RET +%endmacro + +; %1 = nr. of XMM registers +; %2 = yuyv or uyvy +; %3 = if specified, it means that unaligned and aligned code in loop +; will be the same (i.e. UYVY+AVX), and thus we don't need to +; split the loop in an aligned and unaligned case +%macro YUYV_TO_UV_FN 2-3 +cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w +%ifdef ARCH_X86_64 + movsxd wq, r4m +%else ; x86-32 + mov wq, r4m +%endif + add dstUq, wq + add dstVq, wq +%if mmsize == 16 && %0 == 2 + test srcq, 15 +%endif + lea srcq, [srcq+wq*4] + pcmpeqb m2, m2 ; (byte) { 0xff } x 16 + psrlw m2, 8 ; (word) { 0x00ff } x 8 + ; NOTE: if uyvy+avx, u/a are identical +%if mmsize == 16 && %0 == 2 + jnz .loop_u_start + neg wq + LOOP_YUYV_TO_UV a, %2 +.loop_u_start: + neg wq + LOOP_YUYV_TO_UV u, %2 +%else ; mmsize == 8 + neg wq + LOOP_YUYV_TO_UV a, %2 +%endif ; mmsize == 8/16 +%endmacro + +; %1 = a (aligned) or u (unaligned) +; %2 = nv12 or nv21 +%macro LOOP_NVXX_TO_UV 2 +.loop_%1: + mov%1 m0, [srcq+wq*2] ; (byte) { U0, V0, U1, V1, ... } + mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { U8, V8, U9, V9, ... } + pand m2, m0, m4 ; (word) { U0, U1, ..., U7 } + pand m3, m1, m4 ; (word) { U8, U9, ..., U15 } + psrlw m0, 8 ; (word) { V0, V1, ..., V7 } + psrlw m1, 8 ; (word) { V8, V9, ..., V15 } + packuswb m2, m3 ; (byte) { U0, ..., U15 } + packuswb m0, m1 ; (byte) { V0, ..., V15 } +%ifidn %2, nv12 + mova [dstUq+wq], m2 + mova [dstVq+wq], m0 +%else ; nv21 + mova [dstVq+wq], m2 + mova [dstUq+wq], m0 +%endif ; nv12/21 + add wq, mmsize + jl .loop_%1 + REP_RET +%endmacro + +; %1 = nr. of XMM registers +; %2 = nv12 or nv21 +%macro NVXX_TO_UV_FN 2 +cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w +%ifdef ARCH_X86_64 + movsxd wq, r4m +%else ; x86-32 + mov wq, r4m +%endif + add dstUq, wq + add dstVq, wq +%if mmsize == 16 + test srcq, 15 +%endif + lea srcq, [srcq+wq*2] + pcmpeqb m4, m4 ; (byte) { 0xff } x 16 + psrlw m4, 8 ; (word) { 0x00ff } x 8 +%if mmsize == 16 + jnz .loop_u_start + neg wq + LOOP_NVXX_TO_UV a, %2 +.loop_u_start: + neg wq + LOOP_NVXX_TO_UV u, %2 +%else ; mmsize == 8 + neg wq + LOOP_NVXX_TO_UV a, %2 +%endif ; mmsize == 8/16 +%endmacro + +%ifdef ARCH_X86_32 +INIT_MMX mmx +YUYV_TO_Y_FN 0, yuyv +YUYV_TO_Y_FN 0, uyvy +YUYV_TO_UV_FN 0, yuyv +YUYV_TO_UV_FN 0, uyvy +NVXX_TO_UV_FN 0, nv12 +NVXX_TO_UV_FN 0, nv21 +%endif + +INIT_XMM sse2 +YUYV_TO_Y_FN 3, yuyv +YUYV_TO_Y_FN 2, uyvy +YUYV_TO_UV_FN 3, yuyv +YUYV_TO_UV_FN 3, uyvy +NVXX_TO_UV_FN 5, nv12 +NVXX_TO_UV_FN 5, nv21 + +INIT_XMM avx +; in theory, we could write a yuy2-to-y using vpand (i.e. AVX), but +; that's not faster in practice +YUYV_TO_UV_FN 3, yuyv +YUYV_TO_UV_FN 3, uyvy, 1 +NVXX_TO_UV_FN 5, nv12 +NVXX_TO_UV_FN 5, nv21 diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c index 4305cef41d..867a9f1244 100644 --- a/libswscale/x86/swscale_mmx.c +++ b/libswscale/x86/swscale_mmx.c @@ -244,6 +244,26 @@ VSCALE_FUNCS(sse2, sse2); VSCALE_FUNC(16, sse4); VSCALE_FUNCS(avx, avx); +#define INPUT_UV_FUNC(fmt, opt) \ +extern void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *src, const uint8_t *unused1, \ + int w, uint32_t *unused2) +#define INPUT_FUNC(fmt, opt) \ +extern void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \ + int w, uint32_t *unused); \ + INPUT_UV_FUNC(fmt, opt) +#define INPUT_FUNCS(opt) \ + INPUT_FUNC(uyvy, opt); \ + INPUT_FUNC(yuyv, opt); \ + INPUT_UV_FUNC(nv12, opt); \ + INPUT_UV_FUNC(nv21, opt) + +#if ARCH_X86_32 +INPUT_FUNCS(mmx); +#endif +INPUT_FUNCS(sse2); +INPUT_FUNCS(avx); + void ff_sws_init_swScale_mmx(SwsContext *c) { int cpu_flags = av_get_cpu_flags(); @@ -296,6 +316,30 @@ switch(c->dstBpc){ \ ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2); + + switch (c->srcFormat) { + case PIX_FMT_Y400A: + c->lumToYV12 = ff_yuyvToY_mmx; + if (c->alpPixBuf) + c->alpToYV12 = ff_uyvyToY_mmx; + break; + case PIX_FMT_YUYV422: + c->lumToYV12 = ff_yuyvToY_mmx; + c->chrToYV12 = ff_yuyvToUV_mmx; + break; + case PIX_FMT_UYVY422: + c->lumToYV12 = ff_uyvyToY_mmx; + c->chrToYV12 = ff_uyvyToUV_mmx; + break; + case PIX_FMT_NV12: + c->chrToYV12 = ff_nv12ToUV_mmx; + break; + case PIX_FMT_NV21: + c->chrToYV12 = ff_nv21ToUV_mmx; + break; + default: + break; + } } if (cpu_flags & AV_CPU_FLAG_MMX2) { ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2,); @@ -314,6 +358,28 @@ switch(c->dstBpc){ \ ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2); ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2,); ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1); + + switch (c->srcFormat) { + case PIX_FMT_Y400A: + c->lumToYV12 = ff_yuyvToY_sse2; + if (c->alpPixBuf) + c->alpToYV12 = ff_uyvyToY_sse2; + break; + case PIX_FMT_YUYV422: + c->lumToYV12 = ff_yuyvToY_sse2; + c->chrToYV12 = ff_yuyvToUV_sse2; + break; + case PIX_FMT_UYVY422: + c->lumToYV12 = ff_uyvyToY_sse2; + c->chrToYV12 = ff_uyvyToUV_sse2; + break; + case PIX_FMT_NV12: + c->chrToYV12 = ff_nv12ToUV_sse2; + break; + case PIX_FMT_NV21: + c->chrToYV12 = ff_nv21ToUV_sse2; + break; + } } if (cpu_flags & AV_CPU_FLAG_SSSE3) { ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3); @@ -332,6 +398,23 @@ switch(c->dstBpc){ \ if (cpu_flags & AV_CPU_FLAG_AVX) { ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx,); ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); + + switch (c->srcFormat) { + case PIX_FMT_YUYV422: + c->chrToYV12 = ff_yuyvToUV_avx; + break; + case PIX_FMT_UYVY422: + c->chrToYV12 = ff_uyvyToUV_avx; + break; + case PIX_FMT_NV12: + c->chrToYV12 = ff_nv12ToUV_avx; + break; + case PIX_FMT_NV21: + c->chrToYV12 = ff_nv21ToUV_avx; + break; + default: + break; + } } #endif } diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 5e7df5c4a0..b3d7336588 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -1361,147 +1361,6 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, } } -#if !COMPILE_TEMPLATE_MMX2 -//FIXME yuy2* can read up to 7 samples too much - -static void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, - int width, uint32_t *unused) -{ - __asm__ volatile( - "movq "MANGLE(bm01010101)", %%mm2 \n\t" - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",2), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",2), %%mm1 \n\t" - "pand %%mm2, %%mm0 \n\t" - "pand %%mm2, %%mm1 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "movq %%mm0, (%2, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width) - : "%"REG_a - ); -} - -static void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) -{ - __asm__ volatile( - "movq "MANGLE(bm01010101)", %%mm4 \n\t" - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",4), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",4), %%mm1 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm1 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "movq %%mm0, %%mm1 \n\t" - "psrlw $8, %%mm0 \n\t" - "pand %%mm4, %%mm1 \n\t" - "packuswb %%mm0, %%mm0 \n\t" - "packuswb %%mm1, %%mm1 \n\t" - "movd %%mm0, (%3, %%"REG_a") \n\t" - "movd %%mm1, (%2, %%"REG_a") \n\t" - "add $4, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width) - : "%"REG_a - ); - assert(src1 == src2); -} - -/* This is almost identical to the previous, end exists only because - * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */ -static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, - int width, uint32_t *unused) -{ - __asm__ volatile( - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",2), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",2), %%mm1 \n\t" - "psrlw $8, %%mm0 \n\t" - "psrlw $8, %%mm1 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "movq %%mm0, (%2, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width) - : "%"REG_a - ); -} - -static void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) -{ - __asm__ volatile( - "movq "MANGLE(bm01010101)", %%mm4 \n\t" - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",4), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",4), %%mm1 \n\t" - "pand %%mm4, %%mm0 \n\t" - "pand %%mm4, %%mm1 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "movq %%mm0, %%mm1 \n\t" - "psrlw $8, %%mm0 \n\t" - "pand %%mm4, %%mm1 \n\t" - "packuswb %%mm0, %%mm0 \n\t" - "packuswb %%mm1, %%mm1 \n\t" - "movd %%mm0, (%3, %%"REG_a") \n\t" - "movd %%mm1, (%2, %%"REG_a") \n\t" - "add $4, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width) - : "%"REG_a - ); - assert(src1 == src2); -} - -static av_always_inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2, - const uint8_t *src, int width) -{ - __asm__ volatile( - "movq "MANGLE(bm01010101)", %%mm4 \n\t" - "mov %0, %%"REG_a" \n\t" - "1: \n\t" - "movq (%1, %%"REG_a",2), %%mm0 \n\t" - "movq 8(%1, %%"REG_a",2), %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm1, %%mm3 \n\t" - "pand %%mm4, %%mm0 \n\t" - "pand %%mm4, %%mm1 \n\t" - "psrlw $8, %%mm2 \n\t" - "psrlw $8, %%mm3 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "packuswb %%mm3, %%mm2 \n\t" - "movq %%mm0, (%2, %%"REG_a") \n\t" - "movq %%mm2, (%3, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst1+width), "r" (dst2+width) - : "%"REG_a - ); -} - -static void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) -{ - RENAME(nvXXtoUV)(dstU, dstV, src1, width); -} - -static void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) -{ - RENAME(nvXXtoUV)(dstV, dstU, src1, width); -} -#endif /* !COMPILE_TEMPLATE_MMX2 */ - static av_always_inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, int width, enum PixelFormat srcFormat) { @@ -1856,15 +1715,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) #endif /* COMPILE_TEMPLATE_MMX2 */ } -#if !COMPILE_TEMPLATE_MMX2 - switch(srcFormat) { - case PIX_FMT_YUYV422 : c->chrToYV12 = RENAME(yuy2ToUV); break; - case PIX_FMT_UYVY422 : c->chrToYV12 = RENAME(uyvyToUV); break; - case PIX_FMT_NV12 : c->chrToYV12 = RENAME(nv12ToUV); break; - case PIX_FMT_NV21 : c->chrToYV12 = RENAME(nv21ToUV); break; - default: break; - } -#endif /* !COMPILE_TEMPLATE_MMX2 */ if (!c->chrSrcHSubSample) { switch(srcFormat) { case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break; @@ -1874,21 +1724,8 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) } switch (srcFormat) { -#if !COMPILE_TEMPLATE_MMX2 - case PIX_FMT_YUYV422 : - case PIX_FMT_Y400A : c->lumToYV12 = RENAME(yuy2ToY); break; - case PIX_FMT_UYVY422 : c->lumToYV12 = RENAME(uyvyToY); break; -#endif /* !COMPILE_TEMPLATE_MMX2 */ case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break; case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break; default: break; } -#if !COMPILE_TEMPLATE_MMX2 - if (c->alpPixBuf) { - switch (srcFormat) { - case PIX_FMT_Y400A : c->alpToYV12 = RENAME(yuy2ToY); break; - default: break; - } - } -#endif /* !COMPILE_TEMPLATE_MMX2 */ } From 421c99a4a7c116fc2d4e7a6c866c2209852ef581 Mon Sep 17 00:00:00 2001 From: Vitor Sessak Date: Tue, 3 Jan 2012 21:40:57 +0100 Subject: [PATCH 07/19] mpegaudiodec: interleave iMDCT buffer to simplify future SIMD implementations Signed-off-by: Ronald S. Bultje --- libavcodec/mpegaudiodec.c | 50 +++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c index cd7b7f5053..702476b7ec 100644 --- a/libavcodec/mpegaudiodec.c +++ b/libavcodec/mpegaudiodec.c @@ -621,17 +621,17 @@ static void imdct36(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in, INTFLOAT *win) t0 = s0 + s1; t1 = s0 - s1; - out[(9 + j) * SBLIMIT] = MULH3(t1, win[ 9 + j], 1) + buf[9 + j]; - out[(8 - j) * SBLIMIT] = MULH3(t1, win[ 8 - j], 1) + buf[8 - j]; - buf[ 9 + j ] = MULH3(t0, win[18 + 9 + j], 1); - buf[ 8 - j ] = MULH3(t0, win[18 + 8 - j], 1); + out[(9 + j) * SBLIMIT] = MULH3(t1, win[ 9 + j], 1) + buf[4*(9 + j)]; + out[(8 - j) * SBLIMIT] = MULH3(t1, win[ 8 - j], 1) + buf[4*(8 - j)]; + buf[4 * ( 9 + j )] = MULH3(t0, win[18 + 9 + j], 1); + buf[4 * ( 8 - j )] = MULH3(t0, win[18 + 8 - j], 1); t0 = s2 + s3; t1 = s2 - s3; - out[(9 + 8 - j) * SBLIMIT] = MULH3(t1, win[ 9 + 8 - j], 1) + buf[9 + 8 - j]; - out[ j * SBLIMIT] = MULH3(t1, win[ j], 1) + buf[ j]; - buf[ 9 + 8 - j ] = MULH3(t0, win[18 + 9 + 8 - j], 1); - buf[ j ] = MULH3(t0, win[18 + j], 1); + out[(9 + 8 - j) * SBLIMIT] = MULH3(t1, win[ 9 + 8 - j], 1) + buf[4*(9 + 8 - j)]; + out[ j * SBLIMIT] = MULH3(t1, win[ j], 1) + buf[4*( j)]; + buf[4 * ( 9 + 8 - j )] = MULH3(t0, win[18 + 9 + 8 - j], 1); + buf[4 * ( j )] = MULH3(t0, win[18 + j], 1); i += 4; } @@ -639,10 +639,10 @@ static void imdct36(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in, INTFLOAT *win) s1 = MULH3(tmp[17], icos36h[4], 2); t0 = s0 + s1; t1 = s0 - s1; - out[(9 + 4) * SBLIMIT] = MULH3(t1, win[ 9 + 4], 1) + buf[9 + 4]; - out[(8 - 4) * SBLIMIT] = MULH3(t1, win[ 8 - 4], 1) + buf[8 - 4]; - buf[ 9 + 4 ] = MULH3(t0, win[18 + 9 + 4], 1); - buf[ 8 - 4 ] = MULH3(t0, win[18 + 8 - 4], 1); + out[(9 + 4) * SBLIMIT] = MULH3(t1, win[ 9 + 4], 1) + buf[4*(9 + 4)]; + out[(8 - 4) * SBLIMIT] = MULH3(t1, win[ 8 - 4], 1) + buf[4*(8 - 4)]; + buf[4 * ( 9 + 4 )] = MULH3(t0, win[18 + 9 + 4], 1); + buf[4 * ( 8 - 4 )] = MULH3(t0, win[18 + 8 - 4], 1); } /* return the number of decoded frames */ @@ -1407,7 +1407,7 @@ static void compute_imdct(MPADecodeContext *s, GranuleDef *g, imdct36(out_ptr, buf, ptr, win); out_ptr += 18 * SBLIMIT; ptr += 18; - buf += 18; + buf += (j&3) != 3 ? 1 : (4*18-3); } for (j = mdct_long_end; j < sblimit; j++) { /* select frequency inversion */ @@ -1415,40 +1415,40 @@ static void compute_imdct(MPADecodeContext *s, GranuleDef *g, out_ptr = sb_samples + j; for (i = 0; i < 6; i++) { - *out_ptr = buf[i]; + *out_ptr = buf[4*i]; out_ptr += SBLIMIT; } imdct12(out2, ptr + 0); for (i = 0; i < 6; i++) { - *out_ptr = MULH3(out2[i ], win[i ], 1) + buf[i + 6*1]; - buf[i + 6*2] = MULH3(out2[i + 6], win[i + 6], 1); + *out_ptr = MULH3(out2[i ], win[i ], 1) + buf[4*(i + 6*1)]; + buf[4*(i + 6*2)] = MULH3(out2[i + 6], win[i + 6], 1); out_ptr += SBLIMIT; } imdct12(out2, ptr + 1); for (i = 0; i < 6; i++) { - *out_ptr = MULH3(out2[i ], win[i ], 1) + buf[i + 6*2]; - buf[i + 6*0] = MULH3(out2[i + 6], win[i + 6], 1); + *out_ptr = MULH3(out2[i ], win[i ], 1) + buf[4*(i + 6*2)]; + buf[4*(i + 6*0)] = MULH3(out2[i + 6], win[i + 6], 1); out_ptr += SBLIMIT; } imdct12(out2, ptr + 2); for (i = 0; i < 6; i++) { - buf[i + 6*0] = MULH3(out2[i ], win[i ], 1) + buf[i + 6*0]; - buf[i + 6*1] = MULH3(out2[i + 6], win[i + 6], 1); - buf[i + 6*2] = 0; + buf[4*(i + 6*0)] = MULH3(out2[i ], win[i ], 1) + buf[4*(i + 6*0)]; + buf[4*(i + 6*1)] = MULH3(out2[i + 6], win[i + 6], 1); + buf[4*(i + 6*2)] = 0; } ptr += 18; - buf += 18; + buf += (j&3) != 3 ? 1 : (4*18-3); } /* zero bands */ for (j = sblimit; j < SBLIMIT; j++) { /* overlap */ out_ptr = sb_samples + j; for (i = 0; i < 18; i++) { - *out_ptr = buf[i]; - buf[i] = 0; + *out_ptr = buf[4*i]; + buf[4*i] = 0; out_ptr += SBLIMIT; } - buf += 18; + buf += (j&3) != 3 ? 1 : (4*18-3); } } From 6dfcf53092aba9f1ef31629e11515df5752327db Mon Sep 17 00:00:00 2001 From: Vitor Sessak Date: Wed, 4 Jan 2012 21:32:47 +0100 Subject: [PATCH 08/19] mpegaudiodec: move imdct and windowing function to mpegaudiodsp Signed-off-by: Ronald S. Bultje --- libavcodec/mpegaudiodec.c | 193 +++-------------------------- libavcodec/mpegaudiodsp.c | 5 + libavcodec/mpegaudiodsp.h | 16 +++ libavcodec/mpegaudiodsp_template.c | 190 ++++++++++++++++++++++++++++ 4 files changed, 226 insertions(+), 178 deletions(-) diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c index 702476b7ec..70c5f76381 100644 --- a/libavcodec/mpegaudiodec.c +++ b/libavcodec/mpegaudiodec.c @@ -130,7 +130,6 @@ static uint16_t band_index_long[9][23]; static INTFLOAT is_table[2][16]; static INTFLOAT is_table_lsf[2][2][16]; static INTFLOAT csa_table[8][4]; -static INTFLOAT mdct_win[8][36]; static int16_t division_tab3[1<<6 ]; static int16_t division_tab5[1<<8 ]; @@ -417,43 +416,6 @@ static av_cold void decode_init_static(void) csa_table[i][3] = ca - cs; #endif } - - /* compute mdct windows */ - for (i = 0; i < 36; i++) { - for (j = 0; j < 4; j++) { - double d; - - if (j == 2 && i % 3 != 1) - continue; - - d = sin(M_PI * (i + 0.5) / 36.0); - if (j == 1) { - if (i >= 30) d = 0; - else if (i >= 24) d = sin(M_PI * (i - 18 + 0.5) / 12.0); - else if (i >= 18) d = 1; - } else if (j == 3) { - if (i < 6) d = 0; - else if (i < 12) d = sin(M_PI * (i - 6 + 0.5) / 12.0); - else if (i < 18) d = 1; - } - //merge last stage of imdct into the window coefficients - d *= 0.5 / cos(M_PI * (2 * i + 19) / 72); - - if (j == 2) - mdct_win[j][i/3] = FIXHR((d / (1<<5))); - else - mdct_win[j][i ] = FIXHR((d / (1<<5))); - } - } - - /* NOTE: we do frequency inversion adter the MDCT by changing - the sign of the right window coefs */ - for (j = 0; j < 4; j++) { - for (i = 0; i < 36; i += 2) { - mdct_win[j + 4][i ] = mdct_win[j][i ]; - mdct_win[j + 4][i + 1] = -mdct_win[j][i + 1]; - } - } } static av_cold int decode_init(AVCodecContext * avctx) @@ -483,32 +445,9 @@ static av_cold int decode_init(AVCodecContext * avctx) } #define C3 FIXHR(0.86602540378443864676/2) - -/* 0.5 / cos(pi*(2*i+1)/36) */ -static const INTFLOAT icos36[9] = { - FIXR(0.50190991877167369479), - FIXR(0.51763809020504152469), //0 - FIXR(0.55168895948124587824), - FIXR(0.61038729438072803416), - FIXR(0.70710678118654752439), //1 - FIXR(0.87172339781054900991), - FIXR(1.18310079157624925896), - FIXR(1.93185165257813657349), //2 - FIXR(5.73685662283492756461), -}; - -/* 0.5 / cos(pi*(2*i+1)/36) */ -static const INTFLOAT icos36h[9] = { - FIXHR(0.50190991877167369479/2), - FIXHR(0.51763809020504152469/2), //0 - FIXHR(0.55168895948124587824/2), - FIXHR(0.61038729438072803416/2), - FIXHR(0.70710678118654752439/2), //1 - FIXHR(0.87172339781054900991/2), - FIXHR(1.18310079157624925896/4), - FIXHR(1.93185165257813657349/4), //2 -// FIXHR(5.73685662283492756461), -}; +#define C4 FIXHR(0.70710678118654752439/2) //0.5 / cos(pi*(9)/36) +#define C5 FIXHR(0.51763809020504152469/2) //0.5 / cos(pi*(5)/36) +#define C6 FIXHR(1.93185165257813657349/4) //0.5 / cos(pi*(15)/36) /* 12 points IMDCT. We compute it "by hand" by factorizing obvious cases. */ @@ -529,7 +468,7 @@ static void imdct12(INTFLOAT *out, INTFLOAT *in) in3 = MULH3(in3, C3, 4); t1 = in0 - in4; - t2 = MULH3(in1 - in5, icos36h[4], 2); + t2 = MULH3(in1 - in5, C4, 2); out[ 7] = out[10] = t1 + t2; @@ -539,112 +478,20 @@ static void imdct12(INTFLOAT *out, INTFLOAT *in) in0 += SHR(in4, 1); in4 = in0 + in2; in5 += 2*in1; - in1 = MULH3(in5 + in3, icos36h[1], 1); + in1 = MULH3(in5 + in3, C5, 1); out[ 8] = out[ 9] = in4 + in1; out[ 2] = out[ 3] = in4 - in1; in0 -= in2; - in5 = MULH3(in5 - in3, icos36h[7], 2); + in5 = MULH3(in5 - in3, C6, 2); out[ 0] = out[ 5] = in0 - in5; out[ 6] = out[11] = in0 + in5; } -/* cos(pi*i/18) */ -#define C1 FIXHR(0.98480775301220805936/2) -#define C2 FIXHR(0.93969262078590838405/2) -#define C3 FIXHR(0.86602540378443864676/2) -#define C4 FIXHR(0.76604444311897803520/2) -#define C5 FIXHR(0.64278760968653932632/2) -#define C6 FIXHR(0.5/2) -#define C7 FIXHR(0.34202014332566873304/2) -#define C8 FIXHR(0.17364817766693034885/2) - - -/* using Lee like decomposition followed by hand coded 9 points DCT */ -static void imdct36(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in, INTFLOAT *win) -{ - int i, j; - INTFLOAT t0, t1, t2, t3, s0, s1, s2, s3; - INTFLOAT tmp[18], *tmp1, *in1; - - for (i = 17; i >= 1; i--) - in[i] += in[i-1]; - for (i = 17; i >= 3; i -= 2) - in[i] += in[i-2]; - - for (j = 0; j < 2; j++) { - tmp1 = tmp + j; - in1 = in + j; - - t2 = in1[2*4] + in1[2*8] - in1[2*2]; - - t3 = in1[2*0] + SHR(in1[2*6],1); - t1 = in1[2*0] - in1[2*6]; - tmp1[ 6] = t1 - SHR(t2,1); - tmp1[16] = t1 + t2; - - t0 = MULH3(in1[2*2] + in1[2*4] , C2, 2); - t1 = MULH3(in1[2*4] - in1[2*8] , -2*C8, 1); - t2 = MULH3(in1[2*2] + in1[2*8] , -C4, 2); - - tmp1[10] = t3 - t0 - t2; - tmp1[ 2] = t3 + t0 + t1; - tmp1[14] = t3 + t2 - t1; - - tmp1[ 4] = MULH3(in1[2*5] + in1[2*7] - in1[2*1], -C3, 2); - t2 = MULH3(in1[2*1] + in1[2*5], C1, 2); - t3 = MULH3(in1[2*5] - in1[2*7], -2*C7, 1); - t0 = MULH3(in1[2*3], C3, 2); - - t1 = MULH3(in1[2*1] + in1[2*7], -C5, 2); - - tmp1[ 0] = t2 + t3 + t0; - tmp1[12] = t2 + t1 - t0; - tmp1[ 8] = t3 - t1 - t0; - } - - i = 0; - for (j = 0; j < 4; j++) { - t0 = tmp[i]; - t1 = tmp[i + 2]; - s0 = t1 + t0; - s2 = t1 - t0; - - t2 = tmp[i + 1]; - t3 = tmp[i + 3]; - s1 = MULH3(t3 + t2, icos36h[ j], 2); - s3 = MULLx(t3 - t2, icos36 [8 - j], FRAC_BITS); - - t0 = s0 + s1; - t1 = s0 - s1; - out[(9 + j) * SBLIMIT] = MULH3(t1, win[ 9 + j], 1) + buf[4*(9 + j)]; - out[(8 - j) * SBLIMIT] = MULH3(t1, win[ 8 - j], 1) + buf[4*(8 - j)]; - buf[4 * ( 9 + j )] = MULH3(t0, win[18 + 9 + j], 1); - buf[4 * ( 8 - j )] = MULH3(t0, win[18 + 8 - j], 1); - - t0 = s2 + s3; - t1 = s2 - s3; - out[(9 + 8 - j) * SBLIMIT] = MULH3(t1, win[ 9 + 8 - j], 1) + buf[4*(9 + 8 - j)]; - out[ j * SBLIMIT] = MULH3(t1, win[ j], 1) + buf[4*( j)]; - buf[4 * ( 9 + 8 - j )] = MULH3(t0, win[18 + 9 + 8 - j], 1); - buf[4 * ( j )] = MULH3(t0, win[18 + j], 1); - i += 4; - } - - s0 = tmp[16]; - s1 = MULH3(tmp[17], icos36h[4], 2); - t0 = s0 + s1; - t1 = s0 - s1; - out[(9 + 4) * SBLIMIT] = MULH3(t1, win[ 9 + 4], 1) + buf[4*(9 + 4)]; - out[(8 - 4) * SBLIMIT] = MULH3(t1, win[ 8 - 4], 1) + buf[4*(8 - 4)]; - buf[4 * ( 9 + 4 )] = MULH3(t0, win[18 + 9 + 4], 1); - buf[4 * ( 8 - 4 )] = MULH3(t0, win[18 + 8 - 4], 1); -} - /* return the number of decoded frames */ static int mp_decode_layer1(MPADecodeContext *s) { @@ -1366,7 +1213,7 @@ static void compute_antialias(MPADecodeContext *s, GranuleDef *g) static void compute_imdct(MPADecodeContext *s, GranuleDef *g, INTFLOAT *sb_samples, INTFLOAT *mdct_buf) { - INTFLOAT *win, *win1, *out_ptr, *ptr, *buf, *ptr1; + INTFLOAT *win, *out_ptr, *ptr, *buf, *ptr1; INTFLOAT out2[12]; int i, j, mdct_long_end, sblimit; @@ -1392,26 +1239,16 @@ static void compute_imdct(MPADecodeContext *s, GranuleDef *g, mdct_long_end = sblimit; } - buf = mdct_buf; - ptr = g->sb_hybrid; - for (j = 0; j < mdct_long_end; j++) { - /* apply window & overlap with previous buffer */ - out_ptr = sb_samples + j; - /* select window */ - if (g->switch_point && j < 2) - win1 = mdct_win[0]; - else - win1 = mdct_win[g->block_type]; - /* select frequency inversion */ - win = win1 + ((4 * 36) & -(j & 1)); - imdct36(out_ptr, buf, ptr, win); - out_ptr += 18 * SBLIMIT; - ptr += 18; - buf += (j&3) != 3 ? 1 : (4*18-3); - } + s->mpadsp.RENAME(imdct36_blocks)(sb_samples, mdct_buf, g->sb_hybrid, + mdct_long_end, g->switch_point, + g->block_type); + + buf = mdct_buf + 4*18*(mdct_long_end >> 2) + (mdct_long_end & 3); + ptr = g->sb_hybrid + 18 * mdct_long_end; + for (j = mdct_long_end; j < sblimit; j++) { /* select frequency inversion */ - win = mdct_win[2 + (4 & -(j & 1))]; + win = RENAME(ff_mdct_win)[2 + (4 & -(j & 1))]; out_ptr = sb_samples + j; for (i = 0; i < 6; i++) { diff --git a/libavcodec/mpegaudiodsp.c b/libavcodec/mpegaudiodsp.c index 438b097d06..431724a71c 100644 --- a/libavcodec/mpegaudiodsp.c +++ b/libavcodec/mpegaudiodsp.c @@ -28,6 +28,8 @@ void ff_mpadsp_init(MPADSPContext *s) DCTContext dct; ff_dct_init(&dct, 5, DCT_II); + ff_init_mpadsp_tabs_float(); + ff_init_mpadsp_tabs_fixed(); s->apply_window_float = ff_mpadsp_apply_window_float; s->apply_window_fixed = ff_mpadsp_apply_window_fixed; @@ -35,6 +37,9 @@ void ff_mpadsp_init(MPADSPContext *s) s->dct32_float = dct.dct32; s->dct32_fixed = ff_dct32_fixed; + s->imdct36_blocks_float = ff_imdct36_blocks_float; + s->imdct36_blocks_fixed = ff_imdct36_blocks_fixed; + if (ARCH_ARM) ff_mpadsp_init_arm(s); if (HAVE_MMX) ff_mpadsp_init_mmx(s); if (HAVE_ALTIVEC) ff_mpadsp_init_altivec(s); diff --git a/libavcodec/mpegaudiodsp.h b/libavcodec/mpegaudiodsp.h index 8a18db8325..01fd698f3e 100644 --- a/libavcodec/mpegaudiodsp.h +++ b/libavcodec/mpegaudiodsp.h @@ -28,6 +28,10 @@ typedef struct MPADSPContext { int *dither_state, int16_t *samples, int incr); void (*dct32_float)(float *dst, const float *src); void (*dct32_fixed)(int *dst, const int *src); + void (*imdct36_blocks_float)(float *out, float *buf, float *in, + int count, int switch_point, int block_type); + void (*imdct36_blocks_fixed)(int *out, int *buf, int *in, + int count, int switch_point, int block_type); } MPADSPContext; void ff_mpadsp_init(MPADSPContext *s); @@ -61,4 +65,16 @@ void ff_mpadsp_apply_window_fixed(int32_t *synth_buf, int32_t *window, int *dither_state, int16_t *samples, int incr); +void ff_imdct36_blocks_float(float *out, float *buf, float *in, + int count, int switch_point, int block_type); + +void ff_imdct36_blocks_fixed(int *out, int *buf, int *in, + int count, int switch_point, int block_type); + +void ff_init_mpadsp_tabs_float(void); +void ff_init_mpadsp_tabs_fixed(void); + +extern int ff_mdct_win_fixed[8][36]; +extern float ff_mdct_win_float[8][36]; + #endif /* AVCODEC_MPEGAUDIODSP_H */ diff --git a/libavcodec/mpegaudiodsp_template.c b/libavcodec/mpegaudiodsp_template.c index 5561c46135..5a6adb8cca 100644 --- a/libavcodec/mpegaudiodsp_template.c +++ b/libavcodec/mpegaudiodsp_template.c @@ -39,7 +39,12 @@ static inline float round_sample(float *sum) #define MACS(rt, ra, rb) rt+=(ra)*(rb) #define MULS(ra, rb) ((ra)*(rb)) +#define MULH3(x, y, s) ((s)*(y)*(x)) #define MLSS(rt, ra, rb) rt-=(ra)*(rb) +#define MULLx(x, y, s) ((y)*(x)) +#define FIXHR(x) ((float)(x)) +#define FIXR(x) ((float)(x)) +#define SHR(a,b) ((a)*(1.0f/(1<<(b)))) #else @@ -57,8 +62,16 @@ static inline int round_sample(int64_t *sum) # define MULS(ra, rb) MUL64(ra, rb) # define MACS(rt, ra, rb) MAC64(rt, ra, rb) # define MLSS(rt, ra, rb) MLS64(rt, ra, rb) +# define MULH3(x, y, s) MULH((s)*(x), y) +# define MULLx(x, y, s) MULL(x,y,s) +# define SHR(a,b) ((a)>>(b)) +# define FIXR(a) ((int)((a) * FRAC_ONE + 0.5)) +# define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5)) #endif +/** Window for MDCT. */ +DECLARE_ALIGNED(16, INTFLOAT, RENAME(ff_mdct_win))[8][36]; + DECLARE_ALIGNED(16, MPA_INT, RENAME(ff_mpa_synth_window))[512+256]; #define SUM8(op, sum, w, p) \ @@ -194,6 +207,7 @@ void av_cold RENAME(ff_mpa_synth_init)(MPA_INT *window) window[512 - i] = v; } + // Needed for avoiding shuffles in ASM implementations for(i=0; i < 8; i++) for(j=0; j < 16; j++) @@ -203,3 +217,179 @@ void av_cold RENAME(ff_mpa_synth_init)(MPA_INT *window) for(j=0; j < 16; j++) window[512+128+16*i+j] = window[64*i+48-j]; } + +void RENAME(ff_init_mpadsp_tabs)(void) +{ + int i, j; + /* compute mdct windows */ + for (i = 0; i < 36; i++) { + for (j = 0; j < 4; j++) { + double d; + + if (j == 2 && i % 3 != 1) + continue; + + d = sin(M_PI * (i + 0.5) / 36.0); + if (j == 1) { + if (i >= 30) d = 0; + else if (i >= 24) d = sin(M_PI * (i - 18 + 0.5) / 12.0); + else if (i >= 18) d = 1; + } else if (j == 3) { + if (i < 6) d = 0; + else if (i < 12) d = sin(M_PI * (i - 6 + 0.5) / 12.0); + else if (i < 18) d = 1; + } + //merge last stage of imdct into the window coefficients + d *= 0.5 / cos(M_PI * (2 * i + 19) / 72); + + if (j == 2) + RENAME(ff_mdct_win)[j][i/3] = FIXHR((d / (1<<5))); + else + RENAME(ff_mdct_win)[j][i ] = FIXHR((d / (1<<5))); + } + } + + /* NOTE: we do frequency inversion adter the MDCT by changing + the sign of the right window coefs */ + for (j = 0; j < 4; j++) { + for (i = 0; i < 36; i += 2) { + RENAME(ff_mdct_win)[j + 4][i ] = RENAME(ff_mdct_win)[j][i ]; + RENAME(ff_mdct_win)[j + 4][i + 1] = -RENAME(ff_mdct_win)[j][i + 1]; + } + } +} +/* cos(pi*i/18) */ +#define C1 FIXHR(0.98480775301220805936/2) +#define C2 FIXHR(0.93969262078590838405/2) +#define C3 FIXHR(0.86602540378443864676/2) +#define C4 FIXHR(0.76604444311897803520/2) +#define C5 FIXHR(0.64278760968653932632/2) +#define C6 FIXHR(0.5/2) +#define C7 FIXHR(0.34202014332566873304/2) +#define C8 FIXHR(0.17364817766693034885/2) + +/* 0.5 / cos(pi*(2*i+1)/36) */ +static const INTFLOAT icos36[9] = { + FIXR(0.50190991877167369479), + FIXR(0.51763809020504152469), //0 + FIXR(0.55168895948124587824), + FIXR(0.61038729438072803416), + FIXR(0.70710678118654752439), //1 + FIXR(0.87172339781054900991), + FIXR(1.18310079157624925896), + FIXR(1.93185165257813657349), //2 + FIXR(5.73685662283492756461), +}; + +/* 0.5 / cos(pi*(2*i+1)/36) */ +static const INTFLOAT icos36h[9] = { + FIXHR(0.50190991877167369479/2), + FIXHR(0.51763809020504152469/2), //0 + FIXHR(0.55168895948124587824/2), + FIXHR(0.61038729438072803416/2), + FIXHR(0.70710678118654752439/2), //1 + FIXHR(0.87172339781054900991/2), + FIXHR(1.18310079157624925896/4), + FIXHR(1.93185165257813657349/4), //2 +// FIXHR(5.73685662283492756461), +}; + +/* using Lee like decomposition followed by hand coded 9 points DCT */ +static void imdct36(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in, INTFLOAT *win) +{ + int i, j; + INTFLOAT t0, t1, t2, t3, s0, s1, s2, s3; + INTFLOAT tmp[18], *tmp1, *in1; + + for (i = 17; i >= 1; i--) + in[i] += in[i-1]; + for (i = 17; i >= 3; i -= 2) + in[i] += in[i-2]; + + for (j = 0; j < 2; j++) { + tmp1 = tmp + j; + in1 = in + j; + + t2 = in1[2*4] + in1[2*8] - in1[2*2]; + + t3 = in1[2*0] + SHR(in1[2*6],1); + t1 = in1[2*0] - in1[2*6]; + tmp1[ 6] = t1 - SHR(t2,1); + tmp1[16] = t1 + t2; + + t0 = MULH3(in1[2*2] + in1[2*4] , C2, 2); + t1 = MULH3(in1[2*4] - in1[2*8] , -2*C8, 1); + t2 = MULH3(in1[2*2] + in1[2*8] , -C4, 2); + + tmp1[10] = t3 - t0 - t2; + tmp1[ 2] = t3 + t0 + t1; + tmp1[14] = t3 + t2 - t1; + + tmp1[ 4] = MULH3(in1[2*5] + in1[2*7] - in1[2*1], -C3, 2); + t2 = MULH3(in1[2*1] + in1[2*5], C1, 2); + t3 = MULH3(in1[2*5] - in1[2*7], -2*C7, 1); + t0 = MULH3(in1[2*3], C3, 2); + + t1 = MULH3(in1[2*1] + in1[2*7], -C5, 2); + + tmp1[ 0] = t2 + t3 + t0; + tmp1[12] = t2 + t1 - t0; + tmp1[ 8] = t3 - t1 - t0; + } + + i = 0; + for (j = 0; j < 4; j++) { + t0 = tmp[i]; + t1 = tmp[i + 2]; + s0 = t1 + t0; + s2 = t1 - t0; + + t2 = tmp[i + 1]; + t3 = tmp[i + 3]; + s1 = MULH3(t3 + t2, icos36h[ j], 2); + s3 = MULLx(t3 - t2, icos36 [8 - j], FRAC_BITS); + + t0 = s0 + s1; + t1 = s0 - s1; + out[(9 + j) * SBLIMIT] = MULH3(t1, win[ 9 + j], 1) + buf[4*(9 + j)]; + out[(8 - j) * SBLIMIT] = MULH3(t1, win[ 8 - j], 1) + buf[4*(8 - j)]; + buf[4 * ( 9 + j )] = MULH3(t0, win[18 + 9 + j], 1); + buf[4 * ( 8 - j )] = MULH3(t0, win[18 + 8 - j], 1); + + t0 = s2 + s3; + t1 = s2 - s3; + out[(9 + 8 - j) * SBLIMIT] = MULH3(t1, win[ 9 + 8 - j], 1) + buf[4*(9 + 8 - j)]; + out[ j * SBLIMIT] = MULH3(t1, win[ j], 1) + buf[4*( j)]; + buf[4 * ( 9 + 8 - j )] = MULH3(t0, win[18 + 9 + 8 - j], 1); + buf[4 * ( j )] = MULH3(t0, win[18 + j], 1); + i += 4; + } + + s0 = tmp[16]; + s1 = MULH3(tmp[17], icos36h[4], 2); + t0 = s0 + s1; + t1 = s0 - s1; + out[(9 + 4) * SBLIMIT] = MULH3(t1, win[ 9 + 4], 1) + buf[4*(9 + 4)]; + out[(8 - 4) * SBLIMIT] = MULH3(t1, win[ 8 - 4], 1) + buf[4*(8 - 4)]; + buf[4 * ( 9 + 4 )] = MULH3(t0, win[18 + 9 + 4], 1); + buf[4 * ( 8 - 4 )] = MULH3(t0, win[18 + 8 - 4], 1); +} + +void RENAME(ff_imdct36_blocks)(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in, + int count, int switch_point, int block_type) +{ + int j; + for (j=0 ; j < count; j++) { + /* apply window & overlap with previous buffer */ + + /* select window */ + int win_idx = (switch_point && j < 2) ? 0 : block_type; + INTFLOAT *win = RENAME(ff_mdct_win)[win_idx + (4 & -(j & 1))]; + + imdct36(out, buf, in, win); + + in += 18; + buf += ((j&3) != 3 ? 1 : (72-3)); + out++; + } +} From 06677d0dd9d3c656665566e585e9ca6de815f247 Mon Sep 17 00:00:00 2001 From: Vitor Sessak Date: Wed, 4 Jan 2012 21:43:47 +0100 Subject: [PATCH 09/19] mpegaudiodec: change imdct window arrangment for better pointer alignment Signed-off-by: Ronald S. Bultje --- libavcodec/mpegaudiodsp.h | 8 ++++++-- libavcodec/mpegaudiodsp_template.c | 27 ++++++++++++++++----------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/libavcodec/mpegaudiodsp.h b/libavcodec/mpegaudiodsp.h index 01fd698f3e..c24ea4117e 100644 --- a/libavcodec/mpegaudiodsp.h +++ b/libavcodec/mpegaudiodsp.h @@ -20,6 +20,7 @@ #define AVCODEC_MPEGAUDIODSP_H #include +#include "libavutil/common.h" typedef struct MPADSPContext { void (*apply_window_float)(float *synth_buf, float *window, @@ -74,7 +75,10 @@ void ff_imdct36_blocks_fixed(int *out, int *buf, int *in, void ff_init_mpadsp_tabs_float(void); void ff_init_mpadsp_tabs_fixed(void); -extern int ff_mdct_win_fixed[8][36]; -extern float ff_mdct_win_float[8][36]; +/** For SSE implementation, MDCT_BUF_SIZE/2 should be 128-bit aligned */ +#define MDCT_BUF_SIZE FFALIGN(36, 2*4) + +extern int ff_mdct_win_fixed[8][MDCT_BUF_SIZE]; +extern float ff_mdct_win_float[8][MDCT_BUF_SIZE]; #endif /* AVCODEC_MPEGAUDIODSP_H */ diff --git a/libavcodec/mpegaudiodsp_template.c b/libavcodec/mpegaudiodsp_template.c index 5a6adb8cca..d616f8aec9 100644 --- a/libavcodec/mpegaudiodsp_template.c +++ b/libavcodec/mpegaudiodsp_template.c @@ -69,8 +69,11 @@ static inline int round_sample(int64_t *sum) # define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5)) #endif -/** Window for MDCT. */ -DECLARE_ALIGNED(16, INTFLOAT, RENAME(ff_mdct_win))[8][36]; +/** Window for MDCT. Actually only the elements in [0,17] and + [MDCT_BUF_SIZE/2, MDCT_BUF_SIZE/2 + 17] are actually used. The rest + is just to preserve alignment for SIMD implementations. +*/ +DECLARE_ALIGNED(16, INTFLOAT, RENAME(ff_mdct_win))[8][MDCT_BUF_SIZE]; DECLARE_ALIGNED(16, MPA_INT, RENAME(ff_mpa_synth_window))[512+256]; @@ -244,15 +247,17 @@ void RENAME(ff_init_mpadsp_tabs)(void) if (j == 2) RENAME(ff_mdct_win)[j][i/3] = FIXHR((d / (1<<5))); - else - RENAME(ff_mdct_win)[j][i ] = FIXHR((d / (1<<5))); + else { + int idx = i < 18 ? i : i + (MDCT_BUF_SIZE/2 - 18); + RENAME(ff_mdct_win)[j][idx] = FIXHR((d / (1<<5))); + } } } /* NOTE: we do frequency inversion adter the MDCT by changing the sign of the right window coefs */ for (j = 0; j < 4; j++) { - for (i = 0; i < 36; i += 2) { + for (i = 0; i < MDCT_BUF_SIZE; i += 2) { RENAME(ff_mdct_win)[j + 4][i ] = RENAME(ff_mdct_win)[j][i ]; RENAME(ff_mdct_win)[j + 4][i + 1] = -RENAME(ff_mdct_win)[j][i + 1]; } @@ -353,15 +358,15 @@ static void imdct36(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in, INTFLOAT *win) t1 = s0 - s1; out[(9 + j) * SBLIMIT] = MULH3(t1, win[ 9 + j], 1) + buf[4*(9 + j)]; out[(8 - j) * SBLIMIT] = MULH3(t1, win[ 8 - j], 1) + buf[4*(8 - j)]; - buf[4 * ( 9 + j )] = MULH3(t0, win[18 + 9 + j], 1); - buf[4 * ( 8 - j )] = MULH3(t0, win[18 + 8 - j], 1); + buf[4 * ( 9 + j )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 9 + j], 1); + buf[4 * ( 8 - j )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 8 - j], 1); t0 = s2 + s3; t1 = s2 - s3; out[(9 + 8 - j) * SBLIMIT] = MULH3(t1, win[ 9 + 8 - j], 1) + buf[4*(9 + 8 - j)]; out[ j * SBLIMIT] = MULH3(t1, win[ j], 1) + buf[4*( j)]; - buf[4 * ( 9 + 8 - j )] = MULH3(t0, win[18 + 9 + 8 - j], 1); - buf[4 * ( j )] = MULH3(t0, win[18 + j], 1); + buf[4 * ( 9 + 8 - j )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 9 + 8 - j], 1); + buf[4 * ( j )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + j], 1); i += 4; } @@ -371,8 +376,8 @@ static void imdct36(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in, INTFLOAT *win) t1 = s0 - s1; out[(9 + 4) * SBLIMIT] = MULH3(t1, win[ 9 + 4], 1) + buf[4*(9 + 4)]; out[(8 - 4) * SBLIMIT] = MULH3(t1, win[ 8 - 4], 1) + buf[4*(8 - 4)]; - buf[4 * ( 9 + 4 )] = MULH3(t0, win[18 + 9 + 4], 1); - buf[4 * ( 8 - 4 )] = MULH3(t0, win[18 + 8 - 4], 1); + buf[4 * ( 9 + 4 )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 9 + 4], 1); + buf[4 * ( 8 - 4 )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 8 - 4], 1); } void RENAME(ff_imdct36_blocks)(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in, From 39df0c434c76aa6a6decccb969dfa51468440823 Mon Sep 17 00:00:00 2001 From: Vitor Sessak Date: Thu, 5 Jan 2012 20:26:33 +0100 Subject: [PATCH 10/19] mpegaudiodec: optimized iMDCT transform Signed-off-by: Ronald S. Bultje --- libavcodec/mpegaudiodec.c | 2 +- libavcodec/x86/Makefile | 1 + libavcodec/x86/imdct36_sse.asm | 721 ++++++++++++++++++++++++++++++ libavcodec/x86/mpegaudiodec_mmx.c | 80 ++++ libavutil/x86/x86inc.asm | 2 + 5 files changed, 805 insertions(+), 1 deletion(-) create mode 100644 libavcodec/x86/imdct36_sse.asm diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c index 70c5f76381..6a06afa680 100644 --- a/libavcodec/mpegaudiodec.c +++ b/libavcodec/mpegaudiodec.c @@ -58,7 +58,7 @@ typedef struct GranuleDef { int preflag; int short_start, long_end; /* long/short band indexes */ uint8_t scale_factors[40]; - INTFLOAT sb_hybrid[SBLIMIT * 18]; /* 576 samples */ + DECLARE_ALIGNED(16, INTFLOAT, sb_hybrid)[SBLIMIT * 18]; /* 576 samples */ } GranuleDef; typedef struct MPADecodeContext { diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index aa97942dba..2abe4fbe72 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -33,6 +33,7 @@ YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o MMX-OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp_mmx.o MMX-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhd_mmx.o MMX-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodec_mmx.o +YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36_sse.o MMX-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_yasm.o MMX-OBJS-$(CONFIG_GPL) += x86/idct_mmx.o diff --git a/libavcodec/x86/imdct36_sse.asm b/libavcodec/x86/imdct36_sse.asm new file mode 100644 index 0000000000..2908459db7 --- /dev/null +++ b/libavcodec/x86/imdct36_sse.asm @@ -0,0 +1,721 @@ +;****************************************************************************** +;* 36 point SSE-optimized IMDCT transform +;* Copyright (c) 2011 Vitor Sessak +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86inc.asm" +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +align 16 +ps_mask: dd 0, ~0, ~0, ~0 +ps_mask2: dd 0, ~0, 0, ~0 +ps_mask3: dd 0, 0, 0, ~0 +ps_mask4: dd 0, ~0, 0, 0 + +ps_val1: dd -0.5, -0.5, -0.8660254038, -0.8660254038 +ps_val2: dd 1.0, 1.0, 0.8660254038, 0.8660254038 +ps_val3: dd 0.1736481777, 0.1736481777, 0.3420201433, 0.3420201433 +ps_val4: dd -0.7660444431, -0.7660444431, 0.8660254038, 0.8660254038 +ps_val5: dd -0.9396926208, -0.9396926208, -0.9848077530, -0.9848077530 +ps_val6: dd 0.5, 0.5, -0.6427876097, -0.6427876097 +ps_val7: dd 1.0, 1.0, -0.6427876097, -0.6427876097 + +ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000 +ps_p1m1p1m1: dd 0, 0x80000000, 0, 0x80000000 + +ps_cosh: dd 1.0, 0.50190991877167369479, 1.0, 5.73685662283492756461 + dd 1.0, 0.51763809020504152469, 1.0, 1.93185165257813657349 + dd 1.0, 0.55168895948124587824, -1.0, -1.18310079157624925896 + dd 1.0, 0.61038729438072803416, -1.0, -0.87172339781054900991 + dd 1.0, 0.70710678118654752439, 0.0, 0.0 + +ps_cosh_sse3: dd 1.0, -0.50190991877167369479, 1.0, -5.73685662283492756461 + dd 1.0, -0.51763809020504152469, 1.0, -1.93185165257813657349 + dd 1.0, -0.55168895948124587824, -1.0, 1.18310079157624925896 + dd 1.0, -0.61038729438072803416, -1.0, 0.87172339781054900991 + dd 1.0, 0.70710678118654752439, 0.0, 0.0 + +costabs: times 4 dd 0.98480773 + times 4 dd 0.93969262 + times 4 dd 0.86602539 + times 4 dd -0.76604444 + times 4 dd -0.64278764 + times 4 dd 0.50000000 + times 4 dd -0.50000000 + times 4 dd -0.34202015 + times 4 dd -0.17364818 + times 4 dd 0.50190992 + times 4 dd 0.51763808 + times 4 dd 0.55168896 + times 4 dd 0.61038726 + times 4 dd 0.70710677 + times 4 dd 0.87172341 + times 4 dd 1.18310082 + times 4 dd 1.93185163 + times 4 dd 5.73685646 + +%define SBLIMIT 32 +SECTION_TEXT + +%macro PSHUFD 3 +%if cpuflag(sse2) && notcpuflag(avx) + pshufd %1, %2, %3 +%else + shufps %1, %2, %2, %3 +%endif +%endmacro + +; input %2={x1,x2,x3,x4}, %3={y1,y2,y3,y4} +; output %1={x3,x4,y1,y2} +%macro BUILDINVHIGHLOW 3 +%if cpuflag(avx) + shufps %1, %2, %3, 0x4e +%else + movlhps %1, %3 + movhlps %1, %2 +%endif +%endmacro + +; input %2={x1,x2,x3,x4}, %3={y1,y2,y3,y4} +; output %1={x4,y1,y2,y3} +%macro ROTLEFT 3 +%if cpuflag(ssse3) + palignr %1, %3, %2, 12 +%else + BUILDINVHIGHLOW %1, %2, %3 + shufps %1, %1, %3, 0x99 +%endif +%endmacro + +%macro INVERTHL 2 +%if cpuflag(sse2) + PSHUFD %1, %2, 0x4e +%else + movhlps %1, %2 + movlhps %1, %2 +%endif +%endmacro + +%macro BUTTERF 3 + INVERTHL %2, %1 + xorps %1, [ps_p1p1m1m1] + addps %1, %2 +%if cpuflag(sse3) + mulps %1, %1, [ps_cosh_sse3 + %3] + PSHUFD %2, %1, 0xb1 + addsubps %1, %1, %2 +%else + mulps %1, [ps_cosh + %3] + PSHUFD %2, %1, 0xb1 + xorps %1, [ps_p1m1p1m1] + addps %1, %2 +%endif +%endmacro + +%macro STORE 4 + movhlps %2, %1 + movss [%3 ], %1 + movss [%3 + 2*%4], %2 + shufps %1, %1, 0xb1 + movss [%3 + %4], %1 + movhlps %2, %1 + movss [%3 + 3*%4], %2 +%endmacro + +%macro LOAD 4 + movlps %1, [%3 ] + movhps %1, [%3 + %4] + movlps %2, [%3 + 2*%4] + movhps %2, [%3 + 3*%4] + shufps %1, %2, 0x88 +%endmacro + +%macro LOADA64 2 +%if cpuflag(avx) + movu %1, [%2] +%else + movlps %1, [%2] + movhps %1, [%2 + 8] +%endif +%endmacro + +%macro DEFINE_IMDCT 0 +cglobal imdct36_float, 4,4,9, out, buf, in, win + + ; for(i=17;i>=1;i--) in[i] += in[i-1]; + LOADA64 m0, inq + LOADA64 m1, inq + 16 + + ROTLEFT m5, m0, m1 + + PSHUFD m6, m0, 0x93 + andps m6, m6, [ps_mask] + addps m0, m0, m6 + + LOADA64 m2, inq + 32 + + ROTLEFT m7, m1, m2 + + addps m1, m1, m5 + LOADA64 m3, inq + 48 + + ROTLEFT m5, m2, m3 + + xorps m4, m4, m4 + movlps m4, [inq+64] + BUILDINVHIGHLOW m6, m3, m4 + shufps m6, m6, m4, 0xa9 + + addps m4, m4, m6 + addps m2, m2, m7 + addps m3, m3, m5 + + ; for(i=17;i>=3;i-=2) in[i] += in[i-2]; + movlhps m5, m5, m0 + andps m5, m5, [ps_mask3] + + BUILDINVHIGHLOW m7, m0, m1 + andps m7, m7, [ps_mask2] + + addps m0, m0, m5 + + BUILDINVHIGHLOW m6, m1, m2 + andps m6, m6, [ps_mask2] + + addps m1, m1, m7 + + BUILDINVHIGHLOW m7, m2, m3 + andps m7, m7, [ps_mask2] + + addps m2, m2, m6 + + movhlps m6, m6, m3 + andps m6, m6, [ps_mask4] + + addps m3, m3, m7 + addps m4, m4, m6 + + ; Populate tmp[] + movlhps m6, m1, m5 ; zero out high values + subps m6, m6, m4 + + subps m5, m0, m3 + +%ifdef ARCH_X86_64 + SWAP m5, m8 +%endif + + mulps m7, m2, [ps_val1] + +%ifdef ARCH_X86_64 + mulps m5, m8, [ps_val2] +%else + mulps m5, m5, [ps_val2] +%endif + addps m7, m7, m5 + + mulps m5, m6, [ps_val1] + subps m7, m7, m5 + +%ifdef ARCH_X86_64 + SWAP m5, m8 +%else + subps m5, m0, m3 +%endif + + subps m5, m5, m6 + addps m5, m5, m2 + + shufps m6, m4, m3, 0xe4 + subps m6, m6, m2 + mulps m6, m6, [ps_val3] + + addps m4, m4, m1 + mulps m4, m4, [ps_val4] + + shufps m1, m1, m0, 0xe4 + addps m1, m1, m2 + mulps m1, m1, [ps_val5] + + mulps m3, m3, [ps_val6] + mulps m0, m0, [ps_val7] + addps m0, m0, m3 + + xorps m2, m1, [ps_p1p1m1m1] + subps m2, m2, m4 + addps m2, m2, m0 + + addps m3, m4, m0 + subps m3, m3, m6 + xorps m3, m3, [ps_p1p1m1m1] + + shufps m0, m0, m4, 0xe4 + subps m0, m0, m1 + addps m0, m0, m6 + + BUILDINVHIGHLOW m4, m2, m3 + shufps m3, m3, m2, 0x4e + + ; we have tmp = {SwAPLH(m0), SwAPLH(m7), m3, m4, m5} + + BUTTERF m0, m1, 0 + BUTTERF m7, m2, 16 + BUTTERF m3, m6, 32 + BUTTERF m4, m1, 48 + + mulps m5, m5, [ps_cosh + 64] + PSHUFD m1, m5, 0xe1 + xorps m5, m5, [ps_p1m1p1m1] + addps m5, m5, m1 + + ; permutates: + ; m0 0 1 2 3 => 2 6 10 14 m1 + ; m7 4 5 6 7 => 3 7 11 15 m2 + ; m3 8 9 10 11 => 17 13 9 5 m3 + ; m4 12 13 14 15 => 16 12 8 4 m5 + ; m5 16 17 xx xx => 0 1 xx xx m0 + + unpckhps m1, m0, m7 + unpckhps m6, m3, m4 + movhlps m2, m6, m1 + movlhps m1, m1, m6 + + unpcklps m5, m5, m4 + unpcklps m3, m3, m7 + movhlps m4, m3, m5 + movlhps m5, m5, m3 + SWAP m4, m3 + ; permutation done + + PSHUFD m6, m2, 0xb1 + movss m4, [bufq + 4*68] + movss m7, [bufq + 4*64] + unpcklps m7, m7, m4 + mulps m6, m6, [winq + 16*4] + addps m6, m6, m7 + movss [outq + 64*SBLIMIT], m6 + shufps m6, m6, m6, 0xb1 + movss [outq + 68*SBLIMIT], m6 + + mulps m6, m3, [winq + 4*4] + LOAD m4, m7, bufq + 4*16, 16 + addps m6, m6, m4 + STORE m6, m7, outq + 16*SBLIMIT, 4*SBLIMIT + + shufps m4, m0, m3, 0xb5 + mulps m4, m4, [winq + 8*4] + LOAD m7, m6, bufq + 4*32, 16 + addps m4, m4, m7 + STORE m4, m6, outq + 32*SBLIMIT, 4*SBLIMIT + + shufps m3, m3, m2, 0xb1 + mulps m3, m3, [winq + 12*4] + LOAD m7, m6, bufq + 4*48, 16 + addps m3, m3, m7 + STORE m3, m7, outq + 48*SBLIMIT, 4*SBLIMIT + + mulps m2, m2, [winq] + LOAD m6, m7, bufq, 16 + addps m2, m2, m6 + STORE m2, m7, outq, 4*SBLIMIT + + mulps m4, m1, [winq + 20*4] + STORE m4, m7, bufq, 16 + + mulps m3, m5, [winq + 24*4] + STORE m3, m7, bufq + 4*16, 16 + + shufps m0, m0, m5, 0xb0 + mulps m0, m0, [winq + 28*4] + STORE m0, m7, bufq + 4*32, 16 + + shufps m5, m5, m1, 0xb1 + mulps m5, m5, [winq + 32*4] + STORE m5, m7, bufq + 4*48, 16 + + shufps m1, m1, m1, 0xb1 + mulps m1, m1, [winq + 36*4] + movss [bufq + 4*64], m1 + shufps m1, m1, 0xb1 + movss [bufq + 4*68], m1 + RET +%endmacro + +INIT_XMM sse +DEFINE_IMDCT + +INIT_XMM sse2 +DEFINE_IMDCT + +INIT_XMM sse3 +DEFINE_IMDCT + +INIT_XMM ssse3 +DEFINE_IMDCT + +INIT_XMM avx +DEFINE_IMDCT + +INIT_XMM sse + +%ifdef ARCH_X86_64 +%define SPILL SWAP +%define UNSPILL SWAP +%define SPILLED(x) m %+ x +%else +%define SPILLED(x) [tmpq+(x-8)*16 + 32*4] +%macro SPILL 2 ; xmm#, mempos + movaps SPILLED(%2), m%1 +%endmacro +%macro UNSPILL 2 + movaps m%1, SPILLED(%2) +%endmacro +%endif + +%macro DEFINE_FOUR_IMDCT 0 +cglobal four_imdct36_float, 5,5,8, out, buf, in, win, tmp + movlps m0, [inq+64] + movhps m0, [inq+64 + 72] + movlps m3, [inq+64 + 2*72] + movhps m3, [inq+64 + 3*72] + + shufps m5, m0, m3, 0xdd + shufps m0, m0, m3, 0x88 + + mova m1, [inq+48] + movu m6, [inq+48 + 72] + mova m7, [inq+48 + 2*72] + movu m3, [inq+48 + 3*72] + + TRANSPOSE4x4PS 1, 6, 7, 3, 4 + + addps m4, m6, m7 + mova [tmpq+4*28], m4 + + addps m7, m3 + addps m6, m1 + addps m3, m0 + addps m0, m5 + addps m0, m7 + addps m7, m6 + mova [tmpq+4*12], m7 + SPILL 3, 12 + + mova m4, [inq+32] + movu m5, [inq+32 + 72] + mova m2, [inq+32 + 2*72] + movu m7, [inq+32 + 3*72] + + TRANSPOSE4x4PS 4, 5, 2, 7, 3 + + addps m1, m7 + SPILL 1, 11 + + addps m3, m5, m2 + SPILL 3, 13 + + addps m7, m2 + addps m5, m4 + addps m6, m7 + mova [tmpq], m6 + addps m7, m5 + mova [tmpq+4*16], m7 + + mova m2, [inq+16] + movu m7, [inq+16 + 72] + mova m1, [inq+16 + 2*72] + movu m6, [inq+16 + 3*72] + + TRANSPOSE4x4PS 2, 7, 1, 6, 3 + + addps m4, m6 + addps m6, m1 + addps m1, m7 + addps m7, m2 + addps m5, m6 + SPILL 5, 15 + addps m6, m7 + mulps m6, [costabs + 16*2] + mova [tmpq+4*8], m6 + SPILL 1, 10 + SPILL 0, 14 + + mova m1, [inq] + movu m6, [inq + 72] + mova m3, [inq + 2*72] + movu m5, [inq + 3*72] + + TRANSPOSE4x4PS 1, 6, 3, 5, 0 + + addps m2, m5 + addps m5, m3 + addps m7, m5 + addps m3, m6 + addps m6, m1 + SPILL 7, 8 + addps m5, m6 + SPILL 6, 9 + addps m6, m4, SPILLED(12) + subps m6, m2 + UNSPILL 7, 11 + SPILL 5, 11 + subps m5, m1, m7 + mulps m7, [costabs + 16*5] + addps m7, m1 + mulps m0, m6, [costabs + 16*6] + addps m0, m5 + mova [tmpq+4*24], m0 + addps m6, m5 + mova [tmpq+4*4], m6 + addps m6, m4, m2 + mulps m6, [costabs + 16*1] + subps m4, SPILLED(12) + mulps m4, [costabs + 16*8] + addps m2, SPILLED(12) + mulps m2, [costabs + 16*3] + subps m5, m7, m6 + subps m5, m2 + addps m6, m7 + addps m6, m4 + addps m7, m2 + subps m7, m4 + mova [tmpq+4*20], m7 + mova m2, [tmpq+4*28] + mova [tmpq+4*28], m5 + UNSPILL 7, 13 + subps m5, m7, m2 + mulps m5, [costabs + 16*7] + UNSPILL 1, 10 + mulps m1, [costabs + 16*2] + addps m4, m3, m2 + mulps m4, [costabs + 16*4] + addps m2, m7 + addps m7, m3 + mulps m7, [costabs] + subps m3, m2 + mulps m3, [costabs + 16*2] + addps m2, m7, m5 + addps m2, m1 + SPILL 2, 10 + addps m7, m4 + subps m7, m1 + SPILL 7, 12 + subps m5, m4 + subps m5, m1 + UNSPILL 0, 14 + SPILL 5, 13 + addps m1, m0, SPILLED(15) + subps m1, SPILLED(8) + mova m4, [costabs + 16*5] + mulps m4, [tmpq] + UNSPILL 2, 9 + addps m4, m2 + subps m2, [tmpq] + mulps m5, m1, [costabs + 16*6] + addps m5, m2 + SPILL 5, 9 + addps m2, m1 + SPILL 2, 14 + UNSPILL 5, 15 + subps m7, m5, m0 + addps m5, SPILLED(8) + mulps m5, [costabs + 16*1] + mulps m7, [costabs + 16*8] + addps m0, SPILLED(8) + mulps m0, [costabs + 16*3] + subps m2, m4, m5 + subps m2, m0 + SPILL 2, 15 + addps m5, m4 + addps m5, m7 + addps m4, m0 + subps m4, m7 + SPILL 4, 8 + mova m7, [tmpq+4*16] + mova m2, [tmpq+4*12] + addps m0, m7, m2 + subps m0, SPILLED(11) + mulps m0, [costabs + 16*2] + addps m4, m7, SPILLED(11) + mulps m4, [costabs] + subps m7, m2 + mulps m7, [costabs + 16*7] + addps m2, SPILLED(11) + mulps m2, [costabs + 16*4] + addps m1, m7, [tmpq+4*8] + addps m1, m4 + addps m4, m2 + subps m4, [tmpq+4*8] + SPILL 4, 11 + subps m7, m2 + subps m7, [tmpq+4*8] + addps m4, m6, SPILLED(10) + subps m6, SPILLED(10) + addps m2, m5, m1 + mulps m2, [costabs + 16*9] + subps m5, m1 + mulps m5, [costabs + 16*17] + subps m1, m4, m2 + addps m4, m2 + mulps m2, m1, [winq+4*36] + addps m2, [bufq+4*36] + mova [outq+1152], m2 + mulps m1, [winq+4*32] + addps m1, [bufq+4*32] + mova [outq+1024], m1 + mulps m1, m4, [winq+4*116] + mova [bufq+4*36], m1 + mulps m4, [winq+4*112] + mova [bufq+4*32], m4 + addps m2, m6, m5 + subps m6, m5 + mulps m1, m6, [winq+4*68] + addps m1, [bufq+4*68] + mova [outq+2176], m1 + mulps m6, [winq] + addps m6, [bufq] + mova [outq], m6 + mulps m1, m2, [winq+4*148] + mova [bufq+4*68], m1 + mulps m2, [winq+4*80] + mova [bufq], m2 + addps m5, m3, [tmpq+4*24] + mova m2, [tmpq+4*24] + subps m2, m3 + mova m1, SPILLED(9) + subps m1, m0 + mulps m1, [costabs + 16*10] + addps m0, SPILLED(9) + mulps m0, [costabs + 16*16] + addps m6, m5, m1 + subps m5, m1 + mulps m3, m5, [winq+4*40] + addps m3, [bufq+4*40] + mova [outq+1280], m3 + mulps m5, [winq+4*28] + addps m5, [bufq+4*28] + mova [outq+896], m5 + mulps m1, m6, [winq+4*120] + mova [bufq+4*40], m1 + mulps m6, [winq+4*108] + mova [bufq+4*28], m6 + addps m1, m2, m0 + subps m2, m0 + mulps m5, m2, [winq+4*64] + addps m5, [bufq+4*64] + mova [outq+2048], m5 + mulps m2, [winq+4*4] + addps m2, [bufq+4*4] + mova [outq+128], m2 + mulps m0, m1, [winq+4*144] + mova [bufq+4*64], m0 + mulps m1, [winq+4*84] + mova [bufq+4*4], m1 + mova m1, [tmpq+4*28] + mova m5, m1 + addps m1, SPILLED(13) + subps m5, SPILLED(13) + UNSPILL 3, 15 + addps m2, m7, m3 + mulps m2, [costabs + 16*11] + subps m3, m7 + mulps m3, [costabs + 16*15] + addps m0, m2, m1 + subps m1, m2 + SWAP m0, m2 + mulps m6, m1, [winq+4*44] + addps m6, [bufq+4*44] + mova [outq+1408], m6 + mulps m1, [winq+4*24] + addps m1, [bufq+4*24] + mova [outq+768], m1 + mulps m0, m2, [winq+4*124] + mova [bufq+4*44], m0 + mulps m2, [winq+4*104] + mova [bufq+4*24], m2 + addps m0, m5, m3 + subps m5, m3 + mulps m1, m5, [winq+4*60] + addps m1, [bufq+4*60] + mova [outq+1920], m1 + mulps m5, [winq+4*8] + addps m5, [bufq+4*8] + mova [outq+256], m5 + mulps m1, m0, [winq+4*140] + mova [bufq+4*60], m1 + mulps m0, [winq+4*88] + mova [bufq+4*8], m0 + mova m1, [tmpq+4*20] + addps m1, SPILLED(12) + mova m2, [tmpq+4*20] + subps m2, SPILLED(12) + UNSPILL 7, 8 + subps m0, m7, SPILLED(11) + addps m7, SPILLED(11) + mulps m4, m7, [costabs + 16*12] + mulps m0, [costabs + 16*14] + addps m5, m1, m4 + subps m1, m4 + mulps m7, m1, [winq+4*48] + addps m7, [bufq+4*48] + mova [outq+1536], m7 + mulps m1, [winq+4*20] + addps m1, [bufq+4*20] + mova [outq+640], m1 + mulps m1, m5, [winq+4*128] + mova [bufq+4*48], m1 + mulps m5, [winq+4*100] + mova [bufq+4*20], m5 + addps m6, m2, m0 + subps m2, m0 + mulps m1, m2, [winq+4*56] + addps m1, [bufq+4*56] + mova [outq+1792], m1 + mulps m2, [winq+4*12] + addps m2, [bufq+4*12] + mova [outq+384], m2 + mulps m0, m6, [winq+4*136] + mova [bufq+4*56], m0 + mulps m6, [winq+4*92] + mova [bufq+4*12], m6 + UNSPILL 0, 14 + mulps m0, [costabs + 16*13] + mova m3, [tmpq+4*4] + addps m2, m0, m3 + subps m3, m0 + mulps m0, m3, [winq+4*52] + addps m0, [bufq+4*52] + mova [outq+1664], m0 + mulps m3, [winq+4*16] + addps m3, [bufq+4*16] + mova [outq+512], m3 + mulps m0, m2, [winq+4*132] + mova [bufq+4*52], m0 + mulps m2, [winq+4*96] + mova [bufq+4*16], m2 + RET +%endmacro + +INIT_XMM sse +DEFINE_FOUR_IMDCT + +INIT_XMM avx +DEFINE_FOUR_IMDCT diff --git a/libavcodec/x86/mpegaudiodec_mmx.c b/libavcodec/x86/mpegaudiodec_mmx.c index b64461513e..06ffbca90a 100644 --- a/libavcodec/x86/mpegaudiodec_mmx.c +++ b/libavcodec/x86/mpegaudiodec_mmx.c @@ -24,6 +24,18 @@ #include "libavcodec/dsputil.h" #include "libavcodec/mpegaudiodsp.h" +void ff_imdct36_float_sse(float *out, float *buf, float *in, float *win); +void ff_imdct36_float_sse2(float *out, float *buf, float *in, float *win); +void ff_imdct36_float_sse3(float *out, float *buf, float *in, float *win); +void ff_imdct36_float_ssse3(float *out, float *buf, float *in, float *win); +void ff_imdct36_float_avx(float *out, float *buf, float *in, float *win); +void ff_four_imdct36_float_sse(float *out, float *buf, float *in, float *win, + float *tmpbuf); +void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win, + float *tmpbuf); + +DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40]; + #define MACS(rt, ra, rb) rt+=(ra)*(rb) #define MLSS(rt, ra, rb) rt-=(ra)*(rb) @@ -147,11 +159,79 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out, *out = sum; } + +#define DECL_IMDCT_BLOCKS(CPU1, CPU2) \ +static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in, \ + int count, int switch_point, int block_type) \ +{ \ + int align_end = count - (count & 3); \ + int j; \ + for (j = 0; j < align_end; j+= 4) { \ + LOCAL_ALIGNED_16(float, tmpbuf, [1024]); \ + float *win = mdct_win_sse[switch_point && j < 4][block_type]; \ + /* apply window & overlap with previous buffer */ \ + \ + /* select window */ \ + ff_four_imdct36_float_ ## CPU2(out, buf, in, win, tmpbuf); \ + in += 4*18; \ + buf += 4*18; \ + out += 4; \ + } \ + for (; j < count; j++) { \ + /* apply window & overlap with previous buffer */ \ + \ + /* select window */ \ + int win_idx = (switch_point && j < 2) ? 0 : block_type; \ + float *win = ff_mdct_win_float[win_idx + (4 & -(j & 1))]; \ + \ + ff_imdct36_float_ ## CPU1(out, buf, in, win); \ + \ + in += 18; \ + buf++; \ + out++; \ + } \ +} + +DECL_IMDCT_BLOCKS(sse,sse) +DECL_IMDCT_BLOCKS(sse2,sse) +DECL_IMDCT_BLOCKS(sse3,sse) +DECL_IMDCT_BLOCKS(ssse3,sse) +DECL_IMDCT_BLOCKS(avx,avx) + void ff_mpadsp_init_mmx(MPADSPContext *s) { int mm_flags = av_get_cpu_flags(); + int i, j; + for (j = 0; j < 4; j++) { + for (i = 0; i < 40; i ++) { + mdct_win_sse[0][j][4*i ] = ff_mdct_win_float[j ][i]; + mdct_win_sse[0][j][4*i + 1] = ff_mdct_win_float[j + 4][i]; + mdct_win_sse[0][j][4*i + 2] = ff_mdct_win_float[j ][i]; + mdct_win_sse[0][j][4*i + 3] = ff_mdct_win_float[j + 4][i]; + mdct_win_sse[1][j][4*i ] = ff_mdct_win_float[0 ][i]; + mdct_win_sse[1][j][4*i + 1] = ff_mdct_win_float[4 ][i]; + mdct_win_sse[1][j][4*i + 2] = ff_mdct_win_float[j ][i]; + mdct_win_sse[1][j][4*i + 3] = ff_mdct_win_float[j + 4][i]; + } + } + if (mm_flags & AV_CPU_FLAG_SSE2) { s->apply_window_float = apply_window_mp3; } +#if HAVE_YASM + if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { + s->imdct36_blocks_float = imdct36_blocks_avx; +#if HAVE_SSE + } else if (mm_flags & AV_CPU_FLAG_SSSE3) { + s->imdct36_blocks_float = imdct36_blocks_ssse3; + } else if (mm_flags & AV_CPU_FLAG_SSE3) { + s->imdct36_blocks_float = imdct36_blocks_sse3; + } else if (mm_flags & AV_CPU_FLAG_SSE2) { + s->imdct36_blocks_float = imdct36_blocks_sse2; + } else if (mm_flags & AV_CPU_FLAG_SSE) { + s->imdct36_blocks_float = imdct36_blocks_sse; +#endif /* HAVE_SSE */ + } +#endif /* HAVE_YASM */ } diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index 475e70e3fc..6941c1ac2f 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -916,6 +916,8 @@ AVX_INSTR minpd, 1, 0, 1 AVX_INSTR minps, 1, 0, 1 AVX_INSTR minsd, 1, 0, 1 AVX_INSTR minss, 1, 0, 1 +AVX_INSTR movhlps, 1, 0, 0 +AVX_INSTR movlhps, 1, 0, 0 AVX_INSTR movsd, 1, 0, 0 AVX_INSTR movss, 1, 0, 0 AVX_INSTR mpsadbw, 0, 1, 0 From 2dee0cdb8e692725fa261490a856299bb26c7be5 Mon Sep 17 00:00:00 2001 From: Mike Melanson Date: Sat, 7 Jan 2012 19:59:08 -0800 Subject: [PATCH 11/19] FATE: add test for SMJPEG demuxer and associated IMA ADPCM audio decoder. (Don't attempt to decode JPEG data.) Code coverage: libavformat/smjpeg.c: 0% -> 69% libavcodec/adpcm.c: 0% -> 10% (fresh run); 92.4% -> 93% following a FATE run Signed-off-by: Ronald S. Bultje --- tests/fate/demux.mak | 3 + tests/ref/fate/smjpeg | 423 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 426 insertions(+) create mode 100644 tests/ref/fate/smjpeg diff --git a/tests/fate/demux.mak b/tests/fate/demux.mak index b40e4b7a01..a01d771546 100644 --- a/tests/fate/demux.mak +++ b/tests/fate/demux.mak @@ -79,6 +79,9 @@ fate-sierra-vmd: CMD = framecrc -i $(SAMPLES)/vmd/12.vmd -vsync 0 -pix_fmt rgb24 FATE_TESTS += fate-siff fate-siff: CMD = framecrc -i $(SAMPLES)/SIFF/INTRO_B.VB -t 3 -pix_fmt rgb24 +FATE_TESTS += fate-smjpeg +fate-smjpeg: CMD = framecrc -i $(SAMPLES)/smjpeg/scenwin.mjpg -vcodec copy + FATE_TESTS += fate-westwood-aud fate-westwood-aud: CMD = md5 -i $(SAMPLES)/westwood-aud/excellent.aud -f s16le diff --git a/tests/ref/fate/smjpeg b/tests/ref/fate/smjpeg new file mode 100644 index 0000000000..ec91959e7c --- /dev/null +++ b/tests/ref/fate/smjpeg @@ -0,0 +1,423 @@ +0, 0, 734, 0x5a042c2c +1, 0, 1024, 0x00000000 +1, 2090, 1024, 0x00000000 +1, 4180, 1024, 0xd89a448e +1, 6269, 1024, 0x695b369c +1, 8359, 1024, 0xc8ba5707 +0, 9990, 763, 0xb5893f2f +1, 10449, 1024, 0xdf241fc6 +1, 12539, 1024, 0x61cf4166 +1, 14629, 1024, 0x97cbc386 +1, 16718, 1024, 0x44899d04 +1, 18808, 1024, 0xa7cbaa62 +0, 19980, 3023, 0x0f3907d3 +1, 20898, 1024, 0xa7aea60c +1, 22988, 1024, 0xd7b18a89 +1, 25078, 1024, 0x268e81f6 +1, 27167, 1024, 0x9cf83a2f +1, 29257, 1024, 0x5559b508 +0, 29970, 4800, 0x22e6e18a +1, 31347, 1024, 0xe1b9e71c +1, 33437, 1024, 0xdcee733e +1, 35527, 1024, 0xe5918f60 +1, 37616, 1024, 0x29dbd209 +1, 39706, 1024, 0x9bcbcf16 +0, 39960, 6417, 0x427adde5 +1, 41796, 1024, 0x86f5f458 +1, 43886, 1024, 0xabcbda86 +1, 45976, 1024, 0xc51f77b9 +1, 48065, 1024, 0xf6b3a504 +0, 49950, 6776, 0x7a74c6ad +1, 50155, 1024, 0x1af3e40e +1, 52245, 1024, 0x3866b03b +1, 54335, 1024, 0xbc005403 +1, 56424, 1024, 0xe9dfcc51 +1, 58514, 1024, 0x83c837cb +0, 59940, 6808, 0x1f6eb7c3 +1, 60604, 1024, 0xfa649580 +1, 62694, 1024, 0x519452ea +1, 64784, 1024, 0xd4978774 +1, 66873, 1024, 0xe2a3b1cd +1, 68963, 1024, 0x9a9472ad +0, 69930, 6726, 0x452087e6 +1, 71053, 1024, 0xa12d4060 +1, 73143, 1024, 0x31fb0646 +1, 75233, 1024, 0xfc44343f +1, 77322, 1024, 0x0847751a +1, 79412, 1024, 0x227968a2 +0, 79920, 6829, 0xee82b109 +1, 81502, 1024, 0x7cce9f1c +1, 83592, 1024, 0xb8356713 +1, 85682, 1024, 0xb29f6e6f +1, 87771, 1024, 0x9e1430ab +1, 89861, 1024, 0x26d85423 +0, 89910, 7055, 0xf41f1108 +1, 91951, 1024, 0x6496547d +1, 94041, 1024, 0x316b1a86 +1, 96131, 1024, 0x3cd83afc +1, 98220, 1024, 0x993ff633 +0, 99990, 6977, 0xf8fe1ede +1, 100310, 1024, 0x0708d1a2 +1, 102400, 1024, 0xd7230db9 +1, 104490, 1024, 0xbb0779ca +1, 106580, 1024, 0xc6094e1b +1, 108669, 1024, 0x15a8b039 +0, 109980, 6942, 0x9ad105c6 +1, 110759, 1024, 0xd6dbe88c +1, 112849, 1024, 0x7e8d1140 +1, 114939, 1024, 0xef88e525 +1, 117029, 1024, 0x44e21149 +1, 119118, 1024, 0x65b0f5f4 +0, 119970, 6926, 0xe239dad6 +1, 121208, 1024, 0xb955f687 +1, 123298, 1024, 0xc85fba9c +1, 125388, 1024, 0xf59655ad +1, 127478, 1024, 0x6de80bf1 +1, 129567, 1024, 0x2dcf6e41 +0, 129960, 6966, 0x81dcfab1 +1, 131657, 1024, 0xd0ddcf8a +1, 133747, 1024, 0x00135c2d +1, 135837, 1024, 0x697f8efd +1, 137927, 1024, 0x7a9bada5 +0, 139950, 6896, 0x31e6cc02 +1, 140016, 1024, 0x0d22783c +1, 142106, 1024, 0x7726d07d +1, 144196, 1024, 0xa2f14f67 +1, 146286, 1024, 0x7f51060d +1, 148376, 1024, 0xc4ec6aea +0, 149940, 6889, 0x1cc1006e +1, 150465, 1024, 0x9bb37ca4 +1, 152555, 1024, 0x9b085577 +1, 154645, 1024, 0x8812f8af +1, 156735, 1024, 0x788f5221 +1, 158824, 1024, 0x3a2ce642 +0, 159930, 6933, 0xc303f87f +1, 160914, 1024, 0x72415692 +1, 163004, 1024, 0xe3dcc105 +1, 165094, 1024, 0xb26c0599 +1, 167184, 1024, 0x5c9e55eb +1, 169273, 1024, 0x8fe88707 +0, 169920, 7034, 0xb4970a20 +1, 171363, 1024, 0xc5d7beb6 +1, 173453, 1024, 0xe1d3a3b4 +1, 175543, 1024, 0x012da0c6 +1, 177633, 1024, 0x8d010922 +1, 179722, 1024, 0x3366eb0d +0, 179910, 6961, 0xf064095d +1, 181812, 1024, 0xc9381a27 +1, 183902, 1024, 0x0774f685 +1, 185992, 1024, 0xc5cae0a5 +1, 188082, 1024, 0xa6f4737c +0, 189990, 7089, 0x5ba350f9 +1, 190171, 1024, 0x8fb6d0d1 +1, 192261, 1024, 0x05f579c2 +1, 194351, 1024, 0x56905d99 +1, 196441, 1024, 0x002ee18d +1, 198531, 1024, 0xeb37ef51 +0, 199980, 7078, 0xa83f3e88 +1, 200620, 1024, 0x38025635 +1, 202710, 1024, 0x4fe643c8 +1, 204800, 1024, 0x11d66ab1 +1, 206890, 1024, 0xcc3051e9 +1, 208980, 1024, 0xcd93e854 +0, 209970, 7147, 0xcda66cfc +1, 211069, 1024, 0x38f1196d +1, 213159, 1024, 0x657a15fc +1, 215249, 1024, 0x669ce2a9 +1, 217339, 1024, 0x95862dda +1, 219429, 1024, 0x1726a7b2 +0, 219960, 7173, 0xb7455859 +1, 221518, 1024, 0xd6ece2a1 +1, 223608, 1024, 0x33ab9553 +1, 225698, 1024, 0xd50c73a6 +1, 227788, 1024, 0xfe25b63a +1, 229878, 1024, 0x7e2959e3 +0, 229950, 7213, 0x97b89994 +1, 231967, 1024, 0xa4c07b34 +1, 234057, 1024, 0xd6d8f15c +1, 236147, 1024, 0x1eccddd7 +1, 238237, 1024, 0x2b69f9cb +0, 239940, 7170, 0xca8b2948 +1, 240327, 1024, 0x667b775f +1, 242416, 1024, 0xad3b84e9 +1, 244506, 1024, 0x4f29fc67 +1, 246596, 1024, 0x8d611ab7 +1, 248686, 1024, 0x278966ea +0, 249930, 7174, 0xc7cc6bbb +1, 250776, 1024, 0xaf33812b +1, 252865, 1024, 0xa55f4265 +1, 254955, 1024, 0x023cb51c +1, 257045, 1024, 0x1d1f1005 +1, 259135, 1024, 0x874cccf7 +0, 259920, 7235, 0xc2e68d2b +1, 261224, 1024, 0xda705428 +1, 263314, 1024, 0x48d9b440 +1, 265404, 1024, 0xa14e0712 +1, 267494, 1024, 0x7efbad1f +1, 269584, 1024, 0xdb82c17f +0, 270000, 7261, 0x8204a423 +1, 271673, 1024, 0xcbe87613 +1, 273763, 1024, 0x3a63df1d +1, 275853, 1024, 0xd5636bba +1, 277943, 1024, 0x9397af23 +0, 279990, 7353, 0xacc7e7c0 +1, 280033, 1024, 0x32a07c98 +1, 282122, 1024, 0x202ca667 +1, 284212, 1024, 0xdf969011 +1, 286302, 1024, 0xc434d238 +1, 288392, 1024, 0xe9ad7562 +0, 289980, 7065, 0x45035c5c +1, 290482, 1024, 0xb51b6b50 +1, 292571, 1024, 0xe70aecd3 +1, 294661, 1024, 0x03c816b2 +1, 296751, 1024, 0x869fdf25 +1, 298841, 1024, 0xd40a0a62 +0, 299970, 7269, 0x72edbb76 +1, 300931, 1024, 0x5af7dd35 +1, 303020, 1024, 0x891ffc72 +1, 305110, 1024, 0x1ff68a08 +1, 307200, 1024, 0x5a7517a9 +1, 309290, 1024, 0x0f959f74 +0, 309960, 7220, 0xb926772f +1, 311380, 1024, 0xe92a12a2 +1, 313469, 1024, 0x38000e55 +1, 315559, 1024, 0x39fbdd70 +1, 317649, 1024, 0xca3d9184 +1, 319739, 1024, 0x66c8995b +0, 319950, 7326, 0x0a66c632 +1, 321829, 1024, 0xac25acea +1, 323918, 1024, 0x3cd1046c +1, 326008, 1024, 0x6a1df31c +1, 328098, 1024, 0x21ca10a1 +0, 329940, 7225, 0xe39076ab +1, 330188, 1024, 0x1aeccedc +1, 332278, 1024, 0xddea1335 +1, 334367, 1024, 0x19f5ca9f +1, 336457, 1024, 0x88e95e43 +1, 338547, 1024, 0x726284fe +0, 339930, 7265, 0xe0209036 +1, 340637, 1024, 0x6b85b40e +1, 342727, 1024, 0x111fee2a +1, 344816, 1024, 0x3656b588 +1, 346906, 1024, 0xa5a2b552 +1, 348996, 1024, 0x38fb2467 +0, 349920, 7337, 0x7a5dc093 +1, 351086, 1024, 0xaa919ccc +1, 353176, 1024, 0x15993dbc +1, 355265, 1024, 0xbe01a7b9 +1, 357355, 1024, 0xefe93c09 +1, 359445, 1024, 0x1bb566e5 +0, 360000, 7246, 0x519a7a3c +1, 361535, 1024, 0x15ce6237 +1, 363624, 1024, 0xa8552e66 +1, 365714, 1024, 0x9d80187e +1, 367804, 1024, 0x5df3fc30 +1, 369894, 1024, 0x1a312aa5 +0, 369990, 7266, 0x352c8078 +1, 371984, 1024, 0x6bb8e302 +1, 374073, 1024, 0xbd9684bb +1, 376163, 1024, 0x78b0b166 +1, 378253, 1024, 0xd9af5eae +0, 379980, 7323, 0xcaf69d7c +1, 380343, 1024, 0xdb90fe82 +1, 382433, 1024, 0x327614e9 +1, 384522, 1024, 0x1f19b7fe +1, 386612, 1024, 0x46c53f96 +1, 388702, 1024, 0x921b2189 +0, 389970, 7309, 0x98c1e6f7 +1, 390792, 1024, 0xa8fbc85a +1, 392882, 1024, 0xabfdaaae +1, 394971, 1024, 0x6acc7387 +1, 397061, 1024, 0x0d9c27b5 +1, 399151, 1024, 0xba4dd809 +0, 399960, 7121, 0x913d5bd6 +1, 401241, 1024, 0x2a2ad521 +1, 403331, 1024, 0x892de38a +1, 405420, 1024, 0xdc97a2eb +1, 407510, 1024, 0x4f614ca4 +1, 409600, 1024, 0x9c8a77ea +0, 409950, 7088, 0x56302362 +1, 411690, 1024, 0x2d30e646 +1, 413780, 1024, 0x74e800a7 +1, 415869, 1024, 0x1e01fb02 +1, 417959, 1024, 0x4ed2c1d8 +0, 419940, 7104, 0xc0d14f78 +1, 420049, 1024, 0xf2fdbe63 +1, 422139, 1024, 0x8d6f63a1 +1, 424229, 1024, 0xded468d9 +1, 426318, 1024, 0xccad839e +1, 428408, 1024, 0xdde7c082 +0, 429930, 7169, 0xd03c825b +1, 430498, 1024, 0x548613c5 +1, 432588, 1024, 0x383909bd +1, 434678, 1024, 0xfd37627b +1, 436767, 1024, 0x6d95a481 +1, 438857, 1024, 0x56aa87fa +0, 439920, 7038, 0x1ecc201d +1, 440947, 1024, 0x7b67258c +1, 443037, 1024, 0x7dd99a92 +1, 445127, 1024, 0x4a66d102 +1, 447216, 1024, 0x7b3fce51 +1, 449306, 1024, 0xbbd968aa +0, 450000, 7015, 0x83c94454 +1, 451396, 1024, 0x8283ec36 +1, 453486, 1024, 0x3c96493d +1, 455576, 1024, 0xfa4f8cf8 +1, 457665, 1024, 0xe2cf872d +1, 459755, 1024, 0x0a9e7aa6 +0, 459990, 6983, 0x9e51f54d +1, 461845, 1024, 0x6e7a0550 +1, 463935, 1024, 0x3acfea2f +1, 466024, 1024, 0x7111d0fa +1, 468114, 1024, 0xe9a1eca9 +0, 469980, 7088, 0x70d33de1 +1, 470204, 1024, 0x24da6c46 +1, 472294, 1024, 0x117cff37 +1, 474384, 1024, 0x0f27cab6 +1, 476473, 1024, 0x69b6b4e6 +1, 478563, 1024, 0x1e6cc841 +0, 479970, 7096, 0x4d0f81b5 +1, 480653, 1024, 0xb01e2365 +1, 482743, 1024, 0x14e200d3 +1, 484833, 1024, 0xd1184c98 +1, 486922, 1024, 0xef9140e9 +1, 489012, 1024, 0x4cbb645e +0, 489960, 7106, 0xd1a83ddc +1, 491102, 1024, 0xe7fe2f06 +1, 493192, 1024, 0xf8c45028 +1, 495282, 1024, 0x561358f4 +1, 497371, 1024, 0xd0129b77 +1, 499461, 1024, 0xcc636e88 +0, 499950, 7219, 0x20f47fe4 +1, 501551, 1024, 0xe9406321 +1, 503641, 1024, 0x9f16a041 +1, 505731, 1024, 0x468bf409 +1, 507820, 1024, 0x3df70f7b +1, 509910, 1024, 0xa880b11b +0, 509940, 7184, 0x45dc6a0e +1, 512000, 1024, 0x3286c489 +1, 514090, 1024, 0x39fe9ebc +1, 516180, 1024, 0xc533d83b +1, 518269, 1024, 0x153b195d +0, 519930, 7222, 0x488c6499 +1, 520359, 1024, 0xd84786a1 +1, 522449, 1024, 0xdc295aaa +1, 524539, 1024, 0xfb764d8c +1, 526629, 1024, 0xeebc9db9 +1, 528718, 1024, 0x7ba9403e +0, 529920, 7254, 0xbd097ba7 +1, 530808, 1024, 0x4e5571ec +1, 532898, 1024, 0xd965fad4 +1, 534988, 1024, 0x87e259f2 +1, 537078, 1024, 0xae7e533b +1, 539167, 1024, 0x313cf4d6 +0, 540000, 7189, 0x46e06d43 +1, 541257, 1024, 0xe1844c90 +1, 543347, 1024, 0xbb057b44 +1, 545437, 1024, 0xa5099687 +1, 547527, 1024, 0xbff10707 +1, 549616, 1024, 0x37c4ffc0 +0, 549990, 7283, 0x19dd7319 +1, 551706, 1024, 0xf9fb6caa +1, 553796, 1024, 0x3b6a3a1f +1, 555886, 1024, 0x83431edb +1, 557976, 1024, 0x1eb713cf +0, 559980, 7161, 0x23171d02 +1, 560065, 1024, 0xd7b07a6d +1, 562155, 1024, 0x81ae3391 +1, 564245, 1024, 0xf150130a +1, 566335, 1024, 0x09678eaa +1, 568424, 1024, 0xb94e06f1 +0, 569970, 6976, 0xcc610c26 +1, 570514, 1024, 0x67b1dbc9 +1, 572604, 1024, 0xd6edc235 +1, 574694, 1024, 0x34e4c499 +1, 576784, 1024, 0xeefd89c0 +1, 578873, 1024, 0x38afdaf1 +0, 579960, 7056, 0x6cd917b0 +1, 580963, 1024, 0x29a60d76 +1, 583053, 1024, 0xe28a4372 +1, 585143, 1024, 0x7089454d +1, 587233, 1024, 0x0c01bb7b +1, 589322, 1024, 0xbd776a72 +0, 589950, 6736, 0x02b78951 +1, 591412, 1024, 0x86776fd0 +1, 593502, 1024, 0xb37c88f7 +1, 595592, 1024, 0x5f90aaf8 +1, 597682, 1024, 0x203d4222 +1, 599771, 1024, 0x382692a6 +0, 599940, 6540, 0x767e0854 +1, 601861, 1024, 0xf37c95fd +1, 603951, 1024, 0x6c0b8877 +1, 606041, 1024, 0x2e54a8b6 +1, 608131, 1024, 0x7f266488 +0, 609930, 6170, 0xc84962fb +1, 610220, 1024, 0xfbf20f9a +1, 612310, 1024, 0xf2985cc0 +1, 614400, 1024, 0xc7075340 +1, 616490, 1024, 0xe4585695 +1, 618580, 1024, 0xbdffa380 +0, 619920, 6169, 0x27e06c03 +1, 620669, 1024, 0x2422a8a9 +1, 622759, 1024, 0x59cbd75f +1, 624849, 1024, 0x04ad1a8c +1, 626939, 1024, 0x33c09191 +1, 629029, 1024, 0x55efa6fd +0, 630000, 5864, 0xd14db83f +1, 631118, 1024, 0xf73d0e5d +1, 633208, 1024, 0x6141ebae +1, 635298, 1024, 0x7db17a68 +1, 637388, 1024, 0xa6c690b6 +1, 639478, 1024, 0xa6fd6725 +0, 639990, 5375, 0x4a21055d +1, 641567, 1024, 0x50a90b9b +1, 643657, 1024, 0xef990dc8 +1, 645747, 1024, 0x75adf6b5 +1, 647837, 1024, 0x61eac43e +1, 649927, 1024, 0x67797a19 +0, 649980, 5206, 0x95ead3cb +1, 652016, 1024, 0xf325277a +1, 654106, 1024, 0x18bf254a +1, 656196, 1024, 0x2ce6bee3 +1, 658286, 1024, 0x8d320860 +0, 659970, 5220, 0xcfdcc37e +1, 660376, 1024, 0xc979b6e8 +1, 662465, 1024, 0xdb644b41 +1, 664555, 1024, 0xe1b368ba +1, 666645, 1024, 0xacc53d15 +1, 668735, 1024, 0x42ea8c18 +0, 669960, 4946, 0x2d864a77 +1, 670824, 1024, 0xe52c99a4 +1, 672914, 1024, 0xd7db54a6 +1, 675004, 1024, 0x7f27a7e3 +1, 677094, 1024, 0xf7ffeaa9 +1, 679184, 1024, 0x792b6088 +0, 679950, 4390, 0x2ab9f462 +1, 681273, 1024, 0x61d99724 +1, 683363, 1024, 0x5213720e +1, 685453, 1024, 0xac09dd30 +1, 687543, 1024, 0x960bf6bb +1, 689633, 1024, 0xc90168e1 +0, 689940, 4051, 0x1d09592e +1, 691722, 1024, 0x43b45768 +1, 693812, 1024, 0x935d60a1 +1, 695902, 1024, 0x9a342ef2 +1, 697992, 1024, 0xc894709f +0, 699930, 3680, 0x39bd6a12 +1, 700082, 1024, 0x59b43b07 +1, 702171, 1024, 0x36a1a98d +1, 704261, 1024, 0x9e1a121c +1, 706351, 1024, 0x02208b78 +1, 708441, 1024, 0xd1d7b274 +0, 709920, 2910, 0x6337ece9 +1, 710531, 1024, 0xdacd5096 +1, 712620, 1024, 0x51b71ead +1, 714710, 1024, 0xd009a7ca +1, 716800, 1024, 0xb6d5a938 +1, 718890, 1024, 0xf3d45e47 +0, 720000, 2153, 0xf4e3bc17 +1, 720980, 1024, 0xea8e04fc +1, 723069, 1024, 0x0b928bd8 +1, 725159, 1024, 0x0f02caec +1, 727249, 1024, 0xe2b137a8 +1, 729339, 1024, 0xd5f94892 From d41d7773bc0c10edef78f72370792f00170ff66b Mon Sep 17 00:00:00 2001 From: Mike Melanson Date: Sat, 7 Jan 2012 19:59:07 -0800 Subject: [PATCH 12/19] FATE: add test for xWMA demuxer. (Does not attempt to decode percetual audio data inside.) Code coverage: libavformat/xwma.c: 3% -> 75% Signed-off-by: Ronald S. Bultje --- tests/fate/demux.mak | 3 +++ tests/ref/fate/xwma-demux | 1 + 2 files changed, 4 insertions(+) create mode 100644 tests/ref/fate/xwma-demux diff --git a/tests/fate/demux.mak b/tests/fate/demux.mak index a01d771546..4dd953e76f 100644 --- a/tests/fate/demux.mak +++ b/tests/fate/demux.mak @@ -90,3 +90,6 @@ fate-wtv-demux: CMD = framecrc -i $(SAMPLES)/wtv/law-and-order-partial.wtv -vcod FATE_TESTS += fate-xmv-demux fate-xmv-demux: CMD = framecrc -i $(SAMPLES)/xmv/logos1p.fmv -vcodec copy -acodec copy + +FATE_TESTS += fate-xwma-demux +fate-xwma-demux: CMD = crc -i $(SAMPLES)/xwma/ergon.xwma -acodec copy diff --git a/tests/ref/fate/xwma-demux b/tests/ref/fate/xwma-demux new file mode 100644 index 0000000000..83a3b086cf --- /dev/null +++ b/tests/ref/fate/xwma-demux @@ -0,0 +1 @@ +CRC=0x2ac2159e From 5cad97097112016ea3c414546984ec614d38d031 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Mon, 9 Jan 2012 01:24:33 +0000 Subject: [PATCH 13/19] swscale: RGB4444 and BGR444 input Signed-off-by: Ronald S. Bultje --- libswscale/swscale.c | 16 ++++++++++++++++ libswscale/utils.c | 8 ++++---- tests/ref/lavfi/pixdesc | 4 ++++ tests/ref/lavfi/pixfmts_copy | 4 ++++ tests/ref/lavfi/pixfmts_null | 4 ++++ tests/ref/lavfi/pixfmts_scale | 4 ++++ tests/ref/lavfi/pixfmts_vflip | 4 ++++ 7 files changed, 40 insertions(+), 4 deletions(-) diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 980fc4a401..05ee8a4d91 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -1537,12 +1537,16 @@ rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8) rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8) rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7) +rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4) rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8) rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7) +rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4) rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8) rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7) +rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4) rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8) rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7) +rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4) static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused) { @@ -2748,6 +2752,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break; case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break; case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break; + case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_half_c; break; + case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_half_c; break; case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break; case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break; case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break; @@ -2755,6 +2761,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break; case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break; case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break; + case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_half_c; break; + case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_half_c; break; } } else { switch(srcFormat) { @@ -2769,6 +2777,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break; case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break; case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break; + case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_c; break; + case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_c; break; case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break; case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break; case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break; @@ -2776,6 +2786,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break; case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break; case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break; + case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_c; break; + case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_c; break; } } @@ -2820,11 +2832,15 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break; case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break; case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break; + case PIX_FMT_BGR444LE : c->lumToYV12 = bgr12leToY_c; break; + case PIX_FMT_BGR444BE : c->lumToYV12 = bgr12beToY_c; break; case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break; case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break; case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break; case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break; case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break; + case PIX_FMT_RGB444LE : c->lumToYV12 = rgb12leToY_c; break; + case PIX_FMT_RGB444BE : c->lumToYV12 = rgb12beToY_c; break; case PIX_FMT_RGB8 : case PIX_FMT_BGR8 : case PIX_FMT_PAL8 : diff --git a/libswscale/utils.c b/libswscale/utils.c index 073285b102..d252f2e32d 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -120,10 +120,10 @@ const static FormatEntry format_entries[PIX_FMT_NB] = { [PIX_FMT_YUV422P16BE] = { 1 , 1 }, [PIX_FMT_YUV444P16LE] = { 1 , 1 }, [PIX_FMT_YUV444P16BE] = { 1 , 1 }, - [PIX_FMT_RGB444LE] = { 0 , 1 }, - [PIX_FMT_RGB444BE] = { 0 , 1 }, - [PIX_FMT_BGR444LE] = { 0 , 1 }, - [PIX_FMT_BGR444BE] = { 0 , 1 }, + [PIX_FMT_RGB444LE] = { 1 , 1 }, + [PIX_FMT_RGB444BE] = { 1 , 1 }, + [PIX_FMT_BGR444LE] = { 1 , 1 }, + [PIX_FMT_BGR444BE] = { 1 , 1 }, [PIX_FMT_Y400A] = { 1 , 0 }, [PIX_FMT_BGR48BE] = { 1 , 1 }, [PIX_FMT_BGR48LE] = { 1 , 1 }, diff --git a/tests/ref/lavfi/pixdesc b/tests/ref/lavfi/pixdesc index a5016eeb22..5dfa270021 100644 --- a/tests/ref/lavfi/pixdesc +++ b/tests/ref/lavfi/pixdesc @@ -1,6 +1,8 @@ abgr 037bf9df6a765520ad6d490066bf4b89 argb c442a8261c2265a07212ef0f72e35f5a bgr24 0d0cb38ab3fa0b2ec0865c14f78b217b +bgr444be d9ea9307d21b162225b8b2c524cf9477 +bgr444le 88035350e9da3a8f67387890b956f0bc bgr48be 00624e6c7ec7ab19897ba2f0a3257fe8 bgr48le d02c235ebba7167881ca2d576497ff84 bgr4_byte 50d23cc82d9dcef2fd12adb81fb9b806 @@ -18,6 +20,8 @@ monow 9251497f3b0634f1165d12d5a289d943 nv12 e0af357888584d36eec5aa0f673793ef nv21 9a3297f3b34baa038b1f37cb202b512f rgb24 b41eba9651e1b5fe386289b506188105 +rgb444be 9e89db334568c6b2e3d5d0540f4ba960 +rgb444le 0a68cb6de8bf530aa30c5c1205c25155 rgb48be cc139ec1dd9451f0e049c0cb3a0c8aa2 rgb48le 86c5608904f75360d492dbc5c9589969 rgb4_byte c93ba89b74c504e7f5ae9d9ab1546c73 diff --git a/tests/ref/lavfi/pixfmts_copy b/tests/ref/lavfi/pixfmts_copy index a5016eeb22..5dfa270021 100644 --- a/tests/ref/lavfi/pixfmts_copy +++ b/tests/ref/lavfi/pixfmts_copy @@ -1,6 +1,8 @@ abgr 037bf9df6a765520ad6d490066bf4b89 argb c442a8261c2265a07212ef0f72e35f5a bgr24 0d0cb38ab3fa0b2ec0865c14f78b217b +bgr444be d9ea9307d21b162225b8b2c524cf9477 +bgr444le 88035350e9da3a8f67387890b956f0bc bgr48be 00624e6c7ec7ab19897ba2f0a3257fe8 bgr48le d02c235ebba7167881ca2d576497ff84 bgr4_byte 50d23cc82d9dcef2fd12adb81fb9b806 @@ -18,6 +20,8 @@ monow 9251497f3b0634f1165d12d5a289d943 nv12 e0af357888584d36eec5aa0f673793ef nv21 9a3297f3b34baa038b1f37cb202b512f rgb24 b41eba9651e1b5fe386289b506188105 +rgb444be 9e89db334568c6b2e3d5d0540f4ba960 +rgb444le 0a68cb6de8bf530aa30c5c1205c25155 rgb48be cc139ec1dd9451f0e049c0cb3a0c8aa2 rgb48le 86c5608904f75360d492dbc5c9589969 rgb4_byte c93ba89b74c504e7f5ae9d9ab1546c73 diff --git a/tests/ref/lavfi/pixfmts_null b/tests/ref/lavfi/pixfmts_null index a5016eeb22..5dfa270021 100644 --- a/tests/ref/lavfi/pixfmts_null +++ b/tests/ref/lavfi/pixfmts_null @@ -1,6 +1,8 @@ abgr 037bf9df6a765520ad6d490066bf4b89 argb c442a8261c2265a07212ef0f72e35f5a bgr24 0d0cb38ab3fa0b2ec0865c14f78b217b +bgr444be d9ea9307d21b162225b8b2c524cf9477 +bgr444le 88035350e9da3a8f67387890b956f0bc bgr48be 00624e6c7ec7ab19897ba2f0a3257fe8 bgr48le d02c235ebba7167881ca2d576497ff84 bgr4_byte 50d23cc82d9dcef2fd12adb81fb9b806 @@ -18,6 +20,8 @@ monow 9251497f3b0634f1165d12d5a289d943 nv12 e0af357888584d36eec5aa0f673793ef nv21 9a3297f3b34baa038b1f37cb202b512f rgb24 b41eba9651e1b5fe386289b506188105 +rgb444be 9e89db334568c6b2e3d5d0540f4ba960 +rgb444le 0a68cb6de8bf530aa30c5c1205c25155 rgb48be cc139ec1dd9451f0e049c0cb3a0c8aa2 rgb48le 86c5608904f75360d492dbc5c9589969 rgb4_byte c93ba89b74c504e7f5ae9d9ab1546c73 diff --git a/tests/ref/lavfi/pixfmts_scale b/tests/ref/lavfi/pixfmts_scale index 7abe0d723d..4a5bf676d9 100644 --- a/tests/ref/lavfi/pixfmts_scale +++ b/tests/ref/lavfi/pixfmts_scale @@ -1,6 +1,8 @@ abgr d894cb97f6c80eb21bdbe8a4eea62d86 argb 54346f2b2eef10919e0f247241df3b24 bgr24 570f8d6b51a838aed022ef67535f6bdc +bgr444be 25fe04f73a3bad4140d1c4f96ca5b670 +bgr444le 2fde227e6cea6dca5decdd0b7c0866f7 bgr48be 390d3058a12a99c2b153ed7922508bea bgr48le 39fe06feb4ec1d9730dccc04a0cfac4c bgr4_byte ee1d35a7baf8e9016891929a2f565c0b @@ -18,6 +20,8 @@ monow d31772ebaa877fc2a78565937f7f9673 nv12 4676d59db43d657dc12841f6bc3ab452 nv21 69c699510ff1fb777b118ebee1002f14 rgb24 514692e28e8ff6860e415ce4fcf6eb8c +rgb444be 12254053ae93373869fca18b2afcba31 +rgb444le badbd68b59c87df6ae73248309637634 rgb48be 8fac63787a711886030f8e056872b488 rgb48le ab92f2763a2eb264c3870cc758f97149 rgb4_byte d81ffd3add95842a618eec81024f0b5c diff --git a/tests/ref/lavfi/pixfmts_vflip b/tests/ref/lavfi/pixfmts_vflip index e834394a71..f21927bdad 100644 --- a/tests/ref/lavfi/pixfmts_vflip +++ b/tests/ref/lavfi/pixfmts_vflip @@ -1,6 +1,8 @@ abgr 25e72e9dbd01ab00727c976d577f7be5 argb 19869bf1a5ac0b6af4d8bbe2c104533c bgr24 89108a4ba00201f79b75b9305c42352d +bgr444be 9ef12c42fb791948ca4423c452dc6b9a +bgr444le 3650ecfc163abd1596c0cd29d130c4b0 bgr48be 2f23931844f57641f3737348182d118c bgr48le 4242a026012b6c135a6aa138a6d67031 bgr4_byte 407fcf564ed764c38e1d748f700ab921 @@ -18,6 +20,8 @@ monow ff9869d067ecb94eb9d90c9750c31fea nv12 046f00f598ce14d9854a3534a5c99114 nv21 01ea369dd2d0d3ed7451dc5c8d61497f rgb24 eaefabc168d0b14576bab45bc1e56e1e +rgb444be 06722e03f8404e7d2226665ed2444a32 +rgb444le 185c9a5d9c2877484310d4196ef4cd6f rgb48be 62dd185862ed142283bd300eb6dbd216 rgb48le dcb76353268bc5862194d131762220da rgb4_byte 8c6ff02df0b06dd2d574836c3741b2a2 From 1ef8ff4534706de0b1da3442f380be58a650acf2 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Sat, 7 Jan 2012 20:27:31 +0100 Subject: [PATCH 14/19] cabac: remove put_cabac_u/ueg from cabac-test. The functions are not used in any part of Libav, therefore testing them in the cabac-test is unnecessary. Since this makes them unused, remove the functions. --- libavcodec/cabac.c | 73 ---------------------------------------------- 1 file changed, 73 deletions(-) diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c index 466d6239dc..54414fa1e5 100644 --- a/libavcodec/cabac.c +++ b/libavcodec/cabac.c @@ -248,67 +248,6 @@ static int put_cabac_terminate(CABACContext *c, int bit){ return (put_bits_count(&c->pb)+7)>>3; } -/** - * put (truncated) unary binarization. - */ -static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){ - int i; - - assert(v <= max); - - for(i=0; i= m){ //FIXME optimize - put_cabac_bypass(c, 1); - v-= m; - m+= m; - } - put_cabac_bypass(c, 0); - while(m>>=1){ - put_cabac_bypass(c, v&m); - } - } - - if(is_signed) - put_cabac_bypass(c, sign); - } -} - int main(void){ CABACContext c; uint8_t b[9*SIZE]; @@ -337,18 +276,6 @@ START_TIMER STOP_TIMER("put_cabac") } - for(i=0; i Date: Fri, 6 Jan 2012 17:22:45 -0800 Subject: [PATCH 15/19] aacdec: Turn off PS for multichannel files that use PCE based configs. Fixes al_sbr_cm_48_5.1.mp4. --- libavcodec/aac.h | 1 + libavcodec/aacdec.c | 21 +++++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/libavcodec/aac.h b/libavcodec/aac.h index 30491fe85a..a36080cb6f 100644 --- a/libavcodec/aac.h +++ b/libavcodec/aac.h @@ -84,6 +84,7 @@ enum BandType { #define IS_CODEBOOK_UNSIGNED(x) ((x - 1) & 10) enum ChannelPosition { + AAC_CHANNEL_OFF = 0, AAC_CHANNEL_FRONT = 1, AAC_CHANNEL_SIDE = 2, AAC_CHANNEL_BACK = 3, diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c index 4d3f1ff0d0..b2fc740d4d 100644 --- a/libavcodec/aacdec.c +++ b/libavcodec/aacdec.c @@ -163,6 +163,19 @@ static ChannelElement *get_che(AACContext *ac, int type, int elem_id) } } +static int count_channels(enum ChannelPosition che_pos[4][MAX_ELEM_ID]) +{ + int i, type, sum = 0; + for (i = 0; i < MAX_ELEM_ID; i++) { + for (type = 0; type < 4; type++) { + sum += (1 + (type == TYPE_CPE)) * + (che_pos[type][i] != AAC_CHANNEL_OFF && + che_pos[type][i] != AAC_CHANNEL_CC); + } + } + return sum; +} + /** * Check for the channel element in the current channel position configuration. * If it exists, make sure the appropriate element is allocated and map the @@ -418,6 +431,12 @@ static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx, if ((ret = set_default_channel_config(avctx, new_che_pos, channel_config))) return ret; } + + if (count_channels(new_che_pos) > 1) { + m4ac->ps = 0; + } else if (m4ac->sbr == 1 && m4ac->ps == -1) + m4ac->ps = 1; + if (ac && (ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR))) return ret; @@ -476,8 +495,6 @@ static int decode_audio_specific_config(AACContext *ac, av_log(avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", m4ac->sampling_index); return -1; } - if (m4ac->sbr == 1 && m4ac->ps == -1) - m4ac->ps = 1; skip_bits_long(&gb, i); From 46ef355c652fed8984a5c92e961b78282d9ebb2c Mon Sep 17 00:00:00 2001 From: Alex Converse Date: Fri, 6 Jan 2012 17:23:49 -0800 Subject: [PATCH 16/19] aacdec: Add a fate test for 5.1 channel SBR. --- tests/fate/aac.mak | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/fate/aac.mak b/tests/fate/aac.mak index f17c914d1a..a3e145fb4a 100644 --- a/tests/fate/aac.mak +++ b/tests/fate/aac.mak @@ -26,6 +26,10 @@ FATE_AAC += fate-aac-al_sbr_hq_cm_48_2 fate-aac-al_sbr_hq_cm_48_2: CMD = pcm -i $(SAMPLES)/aac/al_sbr_cm_48_2.mp4 fate-aac-al_sbr_hq_cm_48_2: REF = $(SAMPLES)/aac/al_sbr_hq_cm_48_2.s16 +FATE_AAC += fate-aac-al_sbr_hq_cm_48_5.1 +fate-aac-al_sbr_hq_cm_48_5.1: CMD = pcm -i $(SAMPLES)/aac/al_sbr_cm_48_5.1.mp4 +fate-aac-al_sbr_hq_cm_48_5.1: REF = $(SAMPLES)/aac/al_sbr_hq_cm_48_5.1.s16 + FATE_AAC += fate-aac-al_sbr_ps_06_ur fate-aac-al_sbr_ps_06_ur: CMD = pcm -i $(SAMPLES)/aac/al_sbr_ps_06_new.mp4 fate-aac-al_sbr_ps_06_ur: REF = $(SAMPLES)/aac/al_sbr_ps_06_ur.s16 From cd6e34d3693436c1732ff1e977ab659d53314dc1 Mon Sep 17 00:00:00 2001 From: Alex Converse Date: Thu, 5 Jan 2012 18:39:20 -0800 Subject: [PATCH 17/19] fate: Add tests for vc1/wmapro in ism. --- tests/fate/microsoft.mak | 3 + tests/fate/wma.mak | 5 ++ tests/ref/fate/vc1-ism | 120 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+) create mode 100644 tests/ref/fate/vc1-ism diff --git a/tests/fate/microsoft.mak b/tests/fate/microsoft.mak index bb7e7018d7..b36f854628 100644 --- a/tests/fate/microsoft.mak +++ b/tests/fate/microsoft.mak @@ -25,3 +25,6 @@ fate-vc1_sa10091: CMD = framecrc -i $(SAMPLES)/vc1/SA10091.vc1 FATE_TESTS += fate-vc1_sa20021 fate-vc1_sa20021: CMD = framecrc -i $(SAMPLES)/vc1/SA20021.vc1 + +FATE_TESTS += fate-vc1-ism +fate-vc1-ism: CMD = framecrc -i $(SAMPLES)/isom/vc1-wmapro.ism -an diff --git a/tests/fate/wma.mak b/tests/fate/wma.mak index 25626f3db6..e448d76472 100644 --- a/tests/fate/wma.mak +++ b/tests/fate/wma.mak @@ -8,6 +8,11 @@ fate-wmapro-5.1: CMD = pcm -i $(SAMPLES)/wmapro/latin_192_mulitchannel_cut.wma fate-wmapro-5.1: CMP = oneoff fate-wmapro-5.1: REF = $(SAMPLES)/wmapro/latin_192_mulitchannel_cut.pcm +FATE_TESTS += fate-wmapro-ism +fate-wmapro-ism: CMD = pcm -i $(SAMPLES)/isom/vc1-wmapro.ism -vn +fate-wmapro-ism: CMP = oneoff +fate-wmapro-ism: REF = $(SAMPLES)/isom/vc1-wmapro.pcm + FATE_TESTS += fate-wmavoice-7k fate-wmavoice-7k: CMD = pcm -i $(SAMPLES)/wmavoice/streaming_CBR-7K.wma fate-wmavoice-7k: CMP = stddev diff --git a/tests/ref/fate/vc1-ism b/tests/ref/fate/vc1-ism new file mode 100644 index 0000000000..886e583c5d --- /dev/null +++ b/tests/ref/fate/vc1-ism @@ -0,0 +1,120 @@ +0, 0, 37440, 0xd1bc5235 +0, 3750, 37440, 0x158e6167 +0, 7500, 37440, 0x0faa4481 +0, 11250, 37440, 0x427158c5 +0, 15000, 37440, 0x4eb53ac6 +0, 18750, 37440, 0x99304eea +0, 22500, 37440, 0xcc554a6f +0, 26250, 37440, 0xabeb6c35 +0, 30000, 37440, 0xddfc7e18 +0, 33750, 37440, 0xaa79b504 +0, 37500, 37440, 0x5cb1c839 +0, 41250, 37440, 0x7e36ecca +0, 45000, 37440, 0xf486f425 +0, 48750, 37440, 0xf1b4138f +0, 52500, 37440, 0x966f1a49 +0, 56250, 37440, 0x5eff21da +0, 60000, 37440, 0x333f39b1 +0, 63750, 37440, 0x62e5963e +0, 67500, 37440, 0x26930671 +0, 71250, 37440, 0x27b4bb6c +0, 75000, 37440, 0xdbd07766 +0, 78750, 37440, 0x04260104 +0, 82500, 37440, 0x9b1e078b +0, 86250, 37440, 0xdf4e2474 +0, 90000, 37440, 0x57d44986 +0, 93750, 37440, 0x8780e34c +0, 97500, 37440, 0xf80c8bc0 +0, 101250, 37440, 0x630a7583 +0, 105000, 37440, 0x235ae089 +0, 108750, 37440, 0x984b8f0e +0, 112500, 37440, 0x865cf592 +0, 116250, 37440, 0x70f376f2 +0, 120000, 37440, 0x8b30c035 +0, 123750, 37440, 0xde772d79 +0, 127500, 37440, 0x8e076be5 +0, 131250, 37440, 0x3dc2bd9f +0, 135000, 37440, 0xb782eb67 +0, 138750, 37440, 0x02025d73 +0, 142500, 37440, 0x86bbbce8 +0, 146250, 37440, 0xd6554f62 +0, 150000, 37440, 0xb831b917 +0, 153750, 37440, 0x80643560 +0, 157500, 37440, 0x4ecf9afd +0, 161250, 37440, 0x9ce51e0b +0, 165000, 37440, 0x179466cd +0, 168750, 37440, 0x145fc900 +0, 172500, 37440, 0xb1b50402 +0, 176250, 37440, 0x0a87552a +0, 180000, 37440, 0x8f53821d +0, 183750, 37440, 0x1c07c825 +0, 187500, 37440, 0x49dde82f +0, 191250, 37440, 0xb1a32605 +0, 195000, 37440, 0x410f3cd5 +0, 198750, 37440, 0xff5e6696 +0, 202500, 37440, 0x96f678c9 +0, 206250, 37440, 0x6c9e9e68 +0, 210000, 37440, 0x79a2a655 +0, 213750, 37440, 0xf237bd6c +0, 217500, 37440, 0x4051b611 +0, 221250, 37440, 0xc7ccc918 +0, 225000, 37440, 0xbd02c122 +0, 228750, 37440, 0xacb3c881 +0, 232500, 37440, 0x2abdb940 +0, 236250, 37440, 0x19d5be85 +0, 240000, 37440, 0xfa5fb1ba +0, 243750, 37440, 0xdae7a7aa +0, 247500, 37440, 0x6b0f9f69 +0, 251250, 37440, 0x353e8201 +0, 255000, 37440, 0xa21443aa +0, 258750, 37440, 0x66c8d7e0 +0, 262500, 37440, 0xc332068e +0, 266250, 37440, 0x71431b9b +0, 270000, 37440, 0x392f15cb +0, 273750, 37440, 0x95a146bb +0, 277500, 37440, 0x7c51740a +0, 281250, 37440, 0xa3bdd43c +0, 285000, 37440, 0xa079f965 +0, 288750, 37440, 0xa95423ea +0, 292500, 37440, 0xd1bd2c67 +0, 296250, 37440, 0x6cf82844 +0, 300000, 37440, 0xd401e128 +0, 303750, 37440, 0x1f7db118 +0, 307500, 37440, 0x2e0a65a9 +0, 311250, 37440, 0x321c1c40 +0, 315000, 37440, 0x95b2a127 +0, 318750, 37440, 0xa1471f4b +0, 322500, 37440, 0x29d148c0 +0, 326250, 37440, 0x24c07107 +0, 330000, 37440, 0x0ead678d +0, 333750, 37440, 0xd0ca6495 +0, 337500, 37440, 0x08f935ef +0, 341250, 37440, 0xb5ec3c38 +0, 345000, 37440, 0xce371628 +0, 348750, 37440, 0x68170812 +0, 352500, 37440, 0xe222699e +0, 356250, 37440, 0xd688706c +0, 360000, 37440, 0x81a033f9 +0, 363750, 37440, 0x28bd0fbf +0, 367500, 37440, 0xe36db7b2 +0, 371250, 37440, 0x30559121 +0, 375000, 37440, 0xbf2b5fc8 +0, 378750, 37440, 0x4b427672 +0, 382500, 37440, 0x0544b0b4 +0, 386250, 37440, 0x38a70b06 +0, 390000, 37440, 0x4ed62607 +0, 393750, 37440, 0x6efe8ea6 +0, 397500, 37440, 0x81197e11 +0, 401250, 37440, 0xf4060050 +0, 405000, 37440, 0xaf205f13 +0, 408750, 37440, 0x5fa21382 +0, 412500, 37440, 0x8627ad05 +0, 416250, 37440, 0xf7130133 +0, 420000, 37440, 0x76dea7ba +0, 423750, 37440, 0x1dbae1be +0, 427500, 37440, 0x74a933f7 +0, 431250, 37440, 0xbdcd41a3 +0, 435000, 37440, 0xf0fe8c1c +0, 438750, 37440, 0xc0036222 +0, 442500, 37440, 0x3058385c +0, 446250, 37440, 0x68141016 From 9de3cbc182577ad4813804ece967ccf401d1b559 Mon Sep 17 00:00:00 2001 From: Alex Converse Date: Fri, 6 Jan 2012 14:49:11 -0800 Subject: [PATCH 18/19] aacps: Add missing newline in error message. --- libavcodec/aacps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/aacps.c b/libavcodec/aacps.c index d7d6ef88d8..3da912c6c7 100644 --- a/libavcodec/aacps.c +++ b/libavcodec/aacps.c @@ -223,7 +223,7 @@ int ff_ps_read_data(AVCodecContext *avctx, GetBitContext *gb_host, PSContext *ps cnt -= 2 + ps_read_extension_data(gb, ps, ps_extension_id); } if (cnt < 0) { - av_log(avctx, AV_LOG_ERROR, "ps extension overflow %d", cnt); + av_log(avctx, AV_LOG_ERROR, "ps extension overflow %d\n", cnt); goto err; } skip_bits(gb, cnt); From a67b8c86d06eb5b78a0fe4cb9be4e93b29726db1 Mon Sep 17 00:00:00 2001 From: Gaurav Narula Date: Fri, 30 Dec 2011 02:50:22 +0530 Subject: [PATCH 19/19] fate: Add tests for more AAC features. al15_44 uses independent coupling. al18_44 uses PNS (perceptual noise substitution). am05_44 uses main prediction and independent coupling. Signed-off-by: Alex Converse --- tests/fate/aac.mak | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/fate/aac.mak b/tests/fate/aac.mak index a3e145fb4a..ea87436698 100644 --- a/tests/fate/aac.mak +++ b/tests/fate/aac.mak @@ -14,14 +14,26 @@ FATE_AAC += fate-aac-al07_96 fate-aac-al07_96: CMD = pcm -i $(SAMPLES)/aac/al07_96.mp4 fate-aac-al07_96: REF = $(SAMPLES)/aac/al07_96.s16 +FATE_AAC += fate-aac-al15_44 +fate-aac-al15_44: CMD = pcm -i $(SAMPLES)/aac/al15_44.mp4 +fate-aac-al15_44: REF = $(SAMPLES)/aac/al15_44.s16 + FATE_AAC += fate-aac-al17_44 fate-aac-al17_44: CMD = pcm -i $(SAMPLES)/aac/al17_44.mp4 fate-aac-al17_44: REF = $(SAMPLES)/aac/al17_44.s16 +FATE_AAC += fate-aac-al18_44 +fate-aac-al18_44: CMD = pcm -i $(SAMPLES)/aac/al18_44.mp4 +fate-aac-al18_44: REF = $(SAMPLES)/aac/al18_44.s16 + FATE_AAC += fate-aac-am00_88 fate-aac-am00_88: CMD = pcm -i $(SAMPLES)/aac/am00_88.mp4 fate-aac-am00_88: REF = $(SAMPLES)/aac/am00_88.s16 +FATE_AAC += fate-aac-am05_44 +fate-aac-am05_44: CMD = pcm -i $(SAMPLES)/aac/am05_44.mp4 +fate-aac-am05_44: REF = $(SAMPLES)/aac/am05_44.s16 + FATE_AAC += fate-aac-al_sbr_hq_cm_48_2 fate-aac-al_sbr_hq_cm_48_2: CMD = pcm -i $(SAMPLES)/aac/al_sbr_cm_48_2.mp4 fate-aac-al_sbr_hq_cm_48_2: REF = $(SAMPLES)/aac/al_sbr_hq_cm_48_2.s16