Michael Niedermayer 56629aa012 Merge branch 'master' into oldabi
* master:
  mmsh: fixed printf injection bug in mmsh request
  ac3enc: use correct alignment and length in channel coupling dsp functions.
  ffmpeg: don't abuse a global for passing framerate from input to output
  ffmpeg: don't abuse a global for passing channels from input to output
  ffmpeg: don't abuse a global for passing samplerate from input to output
  Make buffer size check consistent and avoid a possible overflow.
  Fix spelling.
  Full support for sending H.264 in RTP
  ARM: update ff_h264_idct8_add4_neon for 4:4:4 changes
  swscale: use SwsContext for av_log when available
  Support reading chan atoms with empty channel descriptions.
  Reindent after last commit.
  Fix multi-channel AAC encoding.
  Fix "redundant redeclaration" warning.
  Fix compilation with --disable-everything --enable-encoder=ac3/ac3_fixed.
  vf_mp: Fix large memleak.
  swscale: Remove HAVE_MMX from files that are only compiled with MMX enabled.
  swscale: Fix compilation with --disable-mmx2.
  mjpegenc: Fix JFIF version
  swscale: remove misplaced comment.
  ffmpeg: fix streaming to ffserver.
  swscale: split out RGB48 output functions from yuv2packed[12X]_c().
  build: move vpath directives to main Makefile
  swscale: fix JPEG-range YUV scaling artifacts.
  build: move ALLFFLIBS to a more logical place
  ARM: factor some repetitive code into macros
  CrystalHD: Use mp4toannexb bitstream filter.
  CrystalHD: Keep mp4toannexb filter around for entire decoder lifetime.
  Fix SVQ3 after adding 4:4:4 H.264 support
  H.264: fix CODEC_FLAG_GRAY
  4:4:4 H.264 decoding support
  matroskadec: properly decode color space in an endian neutral way
  matroskadec: use a temporary fourcc variable
  matroskaenc: ensure the written colorspace don't depend on host endianness
  ac3enc: fix allocation of floating point samples.
  utils: Drop pointless '#if 1' preprocessor directive.
  ac3enc: remove empty ac3_float function that is never called
  ac3enc: split templated float vs. fixed functions into a separate file.
  ac3enc: dynamically allocate AC3EncodeContext fields windowed_samples and mdct
  ac3enc: use function pointer to choose between AC-3 and E-AC-3 header output functions.
  Roll back 4:4:4 H.264 for now Needs some ARM/PPC asm modifications.
  Fix SVQ3 after adding 4:4:4 H.264 support
  H.264: fix CODEC_FLAG_GRAY
  4:4:4 H.264 decoding support
  h264_parser: Fix whitespace after previous change.
  h264_parser: Fix behaviour when PARSER_FLAG_COMPLETE_FRAMES is set.
  wav: remove an invalid free().
  lavf: initialise reference_dts in av_estimate_timings_from_pts.
  h264: don't be so picky on decoding pps in extradata.
  avcodec.h: add or elaborate on some documentation comments.
  h264: change a few comments into error messages
  ac3dec: fix doxy-style for comment ("///>" should be "///<" instead).
  img2: add .dpx to the list of supported file extensions.
  ffv1: fix undefined behavior with insane widths.
  replace remaining usage of deprecated av_metadata_set2() by av_dict_set()
  matroskaenc: write colourspace element for rawvideo tracks
  nsv: simplify probe function
  nsv: return error code instead of discarding it in read_header()
  ARM: jrevdct_arm: simplify stack usage
  ARM: jrevdct_arm: use push/pop mnemonics
  ARM: jrevdct_arm: misc cleanup
  ARM: optimised mpadsp_apply_window_fixed
  Add some (important) changelog entries
  H264: Reduce pointless diffs to qatar
  Revert "H264: Split out hl_motion and template it, this seems a bit faster"
  libavfilter: implement avfilter_fill_frame_from_video_buffer_ref()
  avfiltergraph: make the AVFilterInOut alloc/free API public
  avfiltergraph: change the syntax of avfilter_graph_parse()
  graphparser: prefer void * over AVClass * for log contexts
  h264: Complexify frame num gap shortening code
  Update todo
  mpeg12: replace 2 asserts by av_assert0
  cmdutils: add missing NULL check in parse_options()
  x11grab: remove a memory allocation and the associated memcpy.
  Fix --disable-everything
  build: fix "make install" with documentation disabled
  build: simplify some conditional targets
  resample: clarify supported resampling.
  lavfi: fix signature for avfilter_graph_parse() and avfilter_graph_config()
  avfiltergraph: use meaningful error codes
  Revert "ac3: there was no libav in 2010 thus this code cannot be from  libav."
  Fix -t option for formats which holds dts and no pts
  dnxhd: Renama tables
  Extract rotation in MOV metadata
  bitstream: Properly promote av_reverse values before shifting.
  pixfmt: Replace 9/10bit deprecation by a technical explanation.
  libavutil/swscale: YUV444P10/YUV444P9 support.
  H.264: Fix high bit depth explicit biweight
  h264: Fix 10-bit H.264 x86 chroma v loopfilter asm.
  Replace DEBUG_SEEK/DEBUG_SI + av_log combinations by av_dlog.
  Update copyright year for ac3enc_opts_template.c.
  adts: Adjust frame size mask to follow the specification.
  APIchanges: fill hash for the avfilter_get_audio_buffer_ref_from_arrays addition
  lavfi: avfilter_merge_formats: handle case where inputs are same
  lavfi: use avfilter_get_audio_buffer_ref_from_arrays() in defaults.c
  lavfi: implement avfilter_get_audio_buffer_ref_from_arrays()
  APIchanges: remove duplicated entry
  APIchanges: fill in dates and numbers
  APIchanges: remove duplicated entry
  APIchanges: correctly interleave entries
  APIchanges: add entry for av_force_cpu_flags() addition
  lavf: bump minor after the addition of fps_probe_size to AVFormatContext
  lavc: bump minor after the addition of AVCodecContext.request_sample_fmt
  movenc: Add RTP muxer/hinter options
  movenc: Pass the RTP AVFormatContext to the SDP generation
  rtspenc: Add RTP muxer options
  rtspenc: Add an AVClass for setting muxer specific options
  rtpenc_chain: Pass the rtpflags options through to the chained muxer
  rtpenc: Declare the rtp flags private AVOptions in rtpenc.h
  sdp: Reindent after the previous commit
  rtpenc: MP4A-LATM payload support
  avoptions: Add an av_opt_flag_is_set function for inspecting flag fields
  sdp: Allow passing an AVFormatContext to the SDP generation
  mov: Fix wrong timestamp generation for fragmented movies that have time offset caused by the first edit list entry.
  mpeg12: more advanced ffmpeg mpeg2 aspect guessing code.
  ac3: there was no libav in 2010 thus this code cannot be from  libav.
  swscale: split YUYV output out of yuv2packed[12X]_c().
  dict: This code was developed in ffmpeg and not libav, nor by libav developers. Correct copyright notices.
  lavf: make compute_pkt_fields2() return meaningful error values
  matroskadec: set timestamps for RealAudio packets.
  intelh263dec: aspect ratio processing fix.
  intelh263dec: fix "Strict H.263 compliance"  file playback
  oss,sndio: simplify by using FFMIN.
  swscale: extract monowhite/black output from yuv2packed[12X]_c().
  swscale: de-macro'ify RGB15/16/32 input functions.
  swscale: rearrange code.
  movdec: Add support for the 'wfex' atom.
  ffmpeg.c: Add a necessary const qualifier
  riff: Fix potential memleak.
  swscale: change 48bit RGB input macros to inline functions.
  swscale: change 9/10bit YUV input macros to inline functions.
  swscale: extract gray16 output functions from yuv2packed[12X]().
  swscale: use standard clipping functions.
  swscale: merge macros that are used only once.
  swscale: fix function declarations in swscale.c.
  swscale: fix function declaration keywords in x86/swscale_template.c.
  jpegdec: actually search for and parse RSTn
  crypto: Use av_freep instead of av_free
  Revert "crypto: fix potential double free"
  Revert "build: remove empty $(OBJS) target"
  crypto: Use av_freep instead of av_free
  aac: fix adts frame size mask, fix demuxer probing for some files.
  lavf: don't try to free private options if priv_data is NULL.
  lavfi: handle NULL lists in avfilter_make_format_list
  swscale: fix types of assembly arguments.
  swscale: move two macros that are only used once into caller.
  swscale: remove unused function.
  Fix "mixed declarations and code" warnings.
  options: Add missing braces around struct initializer.
  mov: Remove leftover crufty debug statement with references to a local file.
  dvbsubdec: Fix compilation of debug code.
  Remove all uses of now deprecated metadata functions.
  Move metadata API from lavf to lavu.
  crypto: fix potential double free
  libx264: fix double free
  ffplay: remove -debug option
  ffplay: remove -vismv option
  mpegvideo: use av_get_picture_type_char() in ff_print_debug_info()
  Remove some non-compiling debug messages.
  ffplay: Fix non-compiling debug printf and replace it by av_dlog.
  H264: x86 predict init cosmetics.
  ac3enc: Fix linking of AC-3 encoder without the E-AC-3 encoder.
  Move E-AC-3 encoder functions to a separate eac3enc.c file.
  ac3enc: remove convenience macro, #define DEBUG
  ac3enc: remove unused #define
  vc1: re-initialize tables after width/height change.
  APIchanges: fill-in git commit hash for av_get_bytes_per_sample() addition
  samplefmt: add av_get_bytes_per_sample()
  libvpxenc: add forgotten AVClass.
  iirfilter: fix biquad filter coefficients.
  swscale: remove duplicate conversion routine in swScale().
  swscale: add yuv2planar/packed function typedefs.
  swscale: integrate yuv2nv12X_C into yuv2yuvX() function pointers.
  swscale: reindent x86 init code.
  swscale: extract SWS_FULL_CHR_H_INT conditional into init code.
  swscale: cosmetics.
  swscale: remove alp/chr/lumSrcOffset.
  swscale: un-special-case yuv2yuvX16_c().
  shorten: Remove stray DEBUG #define and corresponding av_dlog statement.
  vorbisdec: Restore mistakenly removed debug output.
  v4l2: set default standard to NULL
  sws: make dither_scale const
  configure: Document --enable-vdpau.
  Replace some av_log/printf + #ifdef combinations by av_dlog.
  Replace some nonstandard DEBUG_* preprocessor directives by plain DEBUG.
  svq1dec: Fix debug statements that referenced non-existing context.
  Replace some printf instances in debug code by av_log.
  showfiltfmts: use av_get_pix_fmt_name()
  inverse.c: Replace unnecessary intmath.h header by necessary stdint.h.
  Drop unnecessary directory prefixes from #include directives.
  Makefile: critical build fix after the merge. make fate passed locally due to ffmpeg/ffmpeg_g being there from before
  ffplay: Fix -vismv
  mem: Trying to workaround posix_memalign() bug on OSX
  build: remove empty $(OBJS) target
  build: make rule for linking ff* apply only to these targets
  eval: add support for pow() function
  build: rearrange some lines in a more logical way
  s302m: fix resampling for 16 and 24bits.
  ARM: remove MUL64 and MAC64 inline asm
  build: clean up .PHONY lists
  build: move all (un)install* target aliases to toplevel Makefile
  flvenc: propagate error properly
  build: remove stale dependency
  build: do not add CFLAGS-yes to CFLAGS
  utils.c: fix crash with threading enabled.
  configure: simplify source_path setup
  configure: remove --source-path option
  pixdesc: remove duplicated header inclusion
  lavfi: use av_samples_alloc() in avfilter_default_get_audio_buffer()
  lavfi: prefer nb_samples over size in AVFilterBufferRefAudioProps
  samplefmt: switch nb_channels/nb_samples params order in av_samples_alloc()
  samplefmt: change layout for arrays created by av_samples_alloc() and _fill_arrays()
  lavf: deprecate AVFormatParameters.time_base.
  img2: add framerate private option.
  img2: add video_size private option.
  img2: add pixel_format private option.
  tty: add framerate private option.
  Move code for "ffmpeg: fix massive leak occurring when seeking" / e4841a404bdabfeafb917454d510b60d888cb761 elsewhere
  lavf: remove reference to output-example in Makefile
  vsrc_buffer: add flags param to av_vsrc_buffer_add_video_buffer_ref
  Remove some unused scripts from tools/.
  Add x86 assembly for some 10-bit H.264 intra predict functions.
  v4l2: do not force NTSC as standard
  Add const to avfilter_get_video_buffer_ref_from_arrays arguments.
  Skip tableprint.h during 'make checkheaders'.
  Remove unnecessary LIBAVFORMAT_BUILD #ifdef.
  Drop explicit filenames from @file Doxygen tags.
  Skip generated table headers during 'make checkheaders'.
  lavf,lavc: free avoptions in a generic way.
  AVOptions: add av_opt_free convenience function.
  sdl: align option fields after last commit
  ffmpeg: fix massive leak occurring when seeking
  ffprobe: implement -i FILE option
  tableprint: Restore mistakenly deleted common.h #include for FF_ARRAY_ELEMS.
  ffplay.texi: document -i FILE option
  cmdutils: remove unnecessary OPT_DUMMY implementation
  cmdutils: change the signature of the function argument in parse_options()
  sdl: use the filename for defining the window title, if not specified
  tiff: print log in case of unknown / unsupported tag.
  tiff: fix linesize for mono-white/black formats.
  Fix build of eval-test program
  configure: Document --enable-vaapi
  swscale: override the lack of the accurate rounding flag when needed for dither.
  swscale: factor should_dither out
  ac3enc: extract all exponents for the frame at once
  ARM: remove MULL inline asm
  mathops: use MUL64 macro where it forms part of other ops
  tty: factorise returning error codes.
  rawdec: add framerate private option.
  x11grab: add framerate private option.
  fbdev,v4l2: remove some forgotten uses of AVFormatParameters.time_base.
  bktr: don't error when AVFormatParameters.time_base isn't set.
  cmdutils: add missing const qualifier
  Skip headers not designed to work standalone during 'make checkheaders'.
  Add missing #includes to make headers self-contained.
  musepack: remove unnecessary #include from mpcdata.h
  musepack: remove extraneous mpcdata.h inclusions
  Fix error check in av_file_map()
  udp: support old, crappy non pthread mode
  ffmpeg: use opt_acodec when setting audio codec in opt_target.
  ffmpeg: fix segfault with too many output files
  ffplay: error out with invalid sample rate or channels.
  oggdec: fix Ticket185
  build: simplify commands for clean target
  j2kdec: dont fail on non zero cblock style.
  makefile: fix j2k encoder dependancies
  udp: fix indention
  swscale: split swscale.c in unscaled and generic conversion routines.
  swscale: cosmetics.
  swscale: integrate (literally) swscale_template.c in swscale.c.
  swscale: split out x86/swscale_template.c from swscale.c.
  swscale: enable hScale_altivec_real.
  swscale: split out ppc _template.c files from main swscale.c.
  swscale: remove indirections in ppc/swscale_template.c.
  swscale: split out unscaled altivec YUV converters in their own file.
  mpegvideoenc: fix multislice fate tests with threading disabled.
  cmdutils: move "#undef main" from ffplay.c to cmdutils.h
  wav: update size check for ds64
  wav: fix skip size at end of ds64 chunk
  mpegts: Wrap #ifdef DEBUG and av_hex_dump_log() combination in a macro.
  build: Simplify texi2html invocation through the --output option.
  Mark some variables with av_unused
  Replace avcodec_get_pix_fmt_name() by av_get_pix_fmt_name().
  svq3: Check negative mb_type to fix potential crash.
  svq3: Move svq3-specific fields to their own context.
  rawdec: initialize return value to 0.
  Remove unused get_psnr() prototype
  rawdec: don't leak option strings.
  bktr: get default framerate from video standard.
  swscale: remove unused COMPILE_TEMPLATE_ALTIVEC.
  h264 fill_filter_caches: Dont init chroma nnz_cache.
  In print_report, print progression time in hours:mins:secs:us
  ffmpeg: In print_report, use int64_t for pts to check for 0 and avoid inf value for bitrate.
  In libswscale, use all lines when converting from 422p to rgb with mmx, improve quality.
  Replace custom DEBUG preprocessor trickery by the standard one.
  vorbis: Remove non-compiling debug statement.
  vorbis: Remove pointless DEBUG #ifdef around debug output macros.
  cook: Remove non-compiling debug output.
  Remove pointless #ifdefs around function declarations in a header.
  Replace #ifdef + av_log() combinations by av_dlog().
  Replace custom debug output functions by av_dlog().
  cook: Remove unused debug functions.
  lavfi: add avfilter_link_free() function
  swscale: reintroduce sws_format_name() symbol
  Remove stray extra arguments from av_dlog() invocations.
  targa: fix big-endian build
  v4l2: remove one forgotten use of AVFormatParameters.pix_fmt.
  vfwcap: add a framerate private option.
  v4l2: add a framerate private option.
  libdc1394: add a framerate private option.
  fbdev: add a framerate private option.
  bktr: add a framerate private option.
  oma: check avio_read() return value
  nutdec: remove unused variable
  Remove unused variables
  swscale: dither for planar yuv outputs
  swscale: Fix use of uninitialized values (bug probably introduced from a marge of libav)
  cpudetect: add av_force_cpu_flags()
  swscale: allocate larger buffer to handle altivec overreads.
  H264/MPEG frame-level multi-threading.
  vsrc_buffer: propagate error code in av_vsrc_buffer_add_frame()
  lavfi: reindent after the previous commit
  lavfi: add braces around the block of an if() expression in avfilter_default_get_video_buffer
  lavfi: clarify the context of a comment in avfilter_default_get_video_buffer()
  lavfi: apply misc style fixes
  Cosmetic changes to h264_idct_10bit.asm.
  2x faster h264_idct_add8_10.
  aacenc: Add stereo_mode option.
  h264: remove CONFIG_GPL from x86 intra prediction code.
  doc: cosmetics: libx264 typos
  postprocess: Remove test for impossible condition (was: Re: postprocess.c: replace check for p==NULL with *p==0)
  Fix various uninitialized variable warnings
  Port remove of get_sws_cpuflags from MPlayer's libmpcodecs.
  Replace "vector const" by "const vector" otherwise gcc 4.6.0 fails.
  Port recent changes to MPlayer libmpcodecs.
  Replace non-existent HAVE_SSE2 with HAVE_SSE.
  Simplify code and avoid compiler warning about incompatible types.
  Fix type of out[] variable, it should not be const.
  ARM: ac3dsp: optimised update_bap_counts()
  mpegaudiodec: Fix av_dlog() invocation.
  swscale: fix compilation of bfin due to missing pixdesc.h header
  lavf: tag dump_format() as @deprecated
  yuv4mpeg: complain and exit if a non-rawvideo stream is selected
  ffmpeg: handle copy of packets for AVFMT_RAWPICTURE output formats
  doc/examples: give meaningful names to the example files
  h264/10bit: add HAVE_ALIGNED_STACK checks.
  swscale: More accurate rounding in YSCALE_YUV_2_PACKEDX_FULL_C()
  Update 8-bit H.264 IDCT function names to reflect bit-depth.
  Add IDCT functions for 10-bit H.264.
  mpegaudioenc: Fix broken av_dlog statement.
  Employ correct printf format specifiers, mostly in debug output.
  ARM: fix MUL64 inline asm for pre-armv6
  doc: add libvpx encoder section
  vf_drawtext: Replace FFmpeg by Libav in license boilerplate.
  mpegaudiodec: remove unusued code and variables
  postprocess.c: filter name needs to be double 0 terminated
  improved 'edts' atom writing support
  mpegaudio: clean up compute_antialias() definition
  vp8: fix segmentation race during frame-threading.
  Port libmpcodec fixes from MPlayer.
  Merge remote-tracking branch 'ffmpeg-mt/master'
  swscale: Remove unused variable.
  ARM: simplify inline asm with 64-bit operands
  Add "const" to avoid "initialization discards qualifiers" warning.
  Add const to fix "cast discards qualifiers" warnings.
  Include pixdesc.h for av_get_pix_fmt_name.
  wav: Don't avio_seek() if we know we'll run into EOF
  api-example: uppercase first letter in "copyright"
  output-example: create @file doxy from text in the copyright header
  examples: move API examples to a dedicated dir in doc
  ffmpeg: simplify opt_*_codec() options
  v4l2: rewrite code iterating the supported standards
  v4l2: perform minor style fixes
  v4l2: replace memset() with explicit struct initialization
  rawdec: fail in case of unknow pixel format
  swscale: remove sws_format_name()
  error.c: fix compile flags
  TCP: change default timeout to 5sec
  Revert "Timeout TCP open() after 5 seconds."
  Fix various unused variable warnings
  Fix various bad printf format warnings
  ARM: enable UAL syntax in asm.S
  Remove now unused nb_istreams variable.
  Add const to vector types for input in altivec code.
  Remove unused variable, avoiding compiler warning.
  Cast pointers to uintptr_t rather than unsigned int.
  v4l2: don't leak video standard string on error.
  swscale: Remove disabled code.
  avfilter: Surround function only used in debug mode by appropriate #ifdef.
  vf_crop: Replace #ifdef DEBUG + av_log() by av_dlog().
  build: remove BUILD_ROOT variable
  vp8: use av_clip_uintp2() where possible
  swscale: Commits that could not be pulled earlier due to bugs 
  Commits that could not be pulled earlier due to bugs.
  Revert 1a5e4fd8c5b99478b4e08a69261930bb12aa948b for postproc. This broke the code
  doc: correct AC-3 option subsection placement
  ac3enc: fix LOCAL_ALIGNED usage in count_mantissa_bits()
  ac3dsp: do not use the ff_* prefix when referencing ff_ac3_bap_bits.
  swscale: use av_clip_uint8() in yuv2yuv1_c().
  swscale: replace formatConvBuffer[VOF] by allocated array.
  v4l2: create file @doxy from text in the copyright header
  v4l2: remove pointless empty lines
  v4l2: set default standard to NULL
  v4l2: use OFFSET macro when setting options
  ac3dsp: fix loop condition in ac3_update_bap_counts_c()
  ARM: unbreak build
  lavdev: add SDL output device
  ac3enc: modify mantissa bit counting to keep bap counts for all values of bap instead of just 0 to 4.
  ac3enc: split mantissa bit counting into a separate function.
  ac3enc: store per-block/channel bap pointers by reference block in a 2D array rather than in the AC3Block struct.
  lavu: add av_get_pix_fmt_name() convenience function
  iff: remove duplicated file description
  cmdutils: remove OPT_FUNC2
  get_bits: add av_unused tag to cache variable
  sws: replace all long with int.
  ARM: aacdec: fix constraints on inline asm
  ARM: remove unnecessary volatile from inline asm
  ARM: add "cc" clobbers to inline asm where needed
  ARM: improve FASTDIV asm
  ac3enc: use LOCAL_ALIGNED macro
  APIchanges: fill in git hash for av_get_pix_fmt_name (0420bd7).
  lavu: add av_get_pix_fmt_name() convenience function
  cmdutils: remove OPT_FUNC2
  swscale: fix crash in bilinear scaling.
  vpxenc: add VP8E_SET_STATIC_THRESHOLD mapping
  webm: support stereo videos in matroska/webm muxer
  rgb2rgb: remove duplicate mmx/mmx2/3dnow/sse2 functions.
  swscale: reindent h[cy]scale_fast() and updateDitherTables().
  swscale: reformat x86/swscale_template.c.
  swscale: remove duplicate mmx/mmx2 functions if they are identical.
  swscale: remove if (c->dstFormat) branch from yuv2packed[12X]().
  swscale: remove if(full_chr_int) from yuv2packed1().
  swscale: remove if(accurate_rnd) branch from functions.
  swscale: revive SWS_CPU_CAPS until next major bump.
  swscale: Remove commented-out printf cruft.
  Export PCR pid
  Export more transport stream information.
  Output MPEG-TS stream identifiers.
  lavf: deprecate AVFormatParameters.pix_fmt.
  rawdec: add a pixel_format private option.
  v4l2: add a pixel_format private option.
  libdc1394: add a pixel_format private option.
  cosmetics: indentation and alignment after previous commit
  ac3enc: add support for E-AC-3 encoding.
  ac3enc: Move AC-3 AVOptions array to a separate file to make it easier to use only selected options for the different AC-3 encoder types.
  ARM: disable ff_vector_fmul_vfp on VFPv3 systems
  ARM: check for VFPv3
  swscale: Remove unused variables in x86 code.
  doc: Drop DJGPP section, Libav now compiles out-of-the-box on FreeDOS.
  x86: Add appropriate ifdefs around certain AVX functions.
  cmdutils: use sws_freeContext() instead of av_freep().
  swscale: delay allocation of formatConvBuffer().
  swscale: fix build with --disable-swscale-alpha.
  movenc: Deprecate the global RTP hinting flag, use a private AVOption instead
  movenc: Add an AVClass for setting muxer specific options
  libdc1394: choose best video mode and rate based on camera capabilities.
  Remove support for libdc1394 < 2.0.
  avopt: fix segfault
  swscale: fix non-bitexact yuv2yuv[X2]() MMX/MMX2 functions.
  swscale: dont loose precission on RGB/BGR48 input, that is dont drop half the bits.
  patch checklist: suggest --disable-yasm test.
  lavdev: prefer the inclusion of avdevice.h over that of libavformat/avformat.h
  lavdev: include libavformat/avformat.h in avdevice.h
  fate.txt: replace FATE rsync command with a make command
  configure: report yasm/nasm presence properly
  tcp: make connect() timeout properly
  rawdec: factor video demuxer definitions into a macro.
  rtspdec: add initial_pause private option.
  lavf: deprecate AVFormatParameters.width/height.
  tty: add video_size private option.
  rawdec: add video_size private option.
  x11grab: add video_size private option.
  x11grab: factorize returning error codes.
  vfwcap: add video_size private option.
  v4l2: add video_size private option.
  v4l2: factorize returning error codes.
  libdc1394: add video_size private option.
  libdc1394: return meaninful error codes.
  bktr: add video_size private option.
  bktr: factorize returning error codes.
  Fix memleak
  Fix typo
  Remove specific note when not specific
  Minor cleanup in libx264.c
  Add metadata conversion table to the wav demuxer
  Fix 32bit rawvideo in avi on big-endian.
  id3v2: Check malloc result. ID3v2 tags can be very large.
  id3v2: Initialize tflags for version 2.2.
  webm: Additional options/presets for VP8 encodes under FFmpeg
  muxers: Add a flag to mark muxers that allow (non strict) monotone timestamps.
  swscale: Do not loose precission on yuv values after rgb->yuv.
  libx264: support aspect Ratio Switch
  ARM: add ARMv6 optimised av_clip_uintp2
  ARM: remove volatile from asm statements in libavutil/intmath
  ARM: fix av_clipl_int32_arm()
  v4l: include avdevice.h
  ffserver: move close_connection() call to avoid a temporary string and copy.
  lavf: initialize demuxer private options.
  AVOptions: set string default values.
  Fix compilation with YASM/NASM versions not supporting AVX.
  lavdevice: mark v4l for removal on next major bump.
  swscale: fix compile on ppc.
  swscale: fix compile on x86-32.
  build: Remove generated .version file on distclean.
  configure: Add -D_GNU_SOURCE to CPPFLAGS on OS/2.
  doc: Drop hint at --enable-memalign-hack for MinGW, it is now autodetected.
  ffplay: Remove disabled code.
  Mark parameterless function declarations as 'void'.
  swscale: use av_clip_uint8() in yuv2yuv1_c().
  swscale: remove VOF/VOFW.
  swscale: split chroma buffers into separate U/V planes.
  swscale: replace formatConvBuffer[VOF] by allocated array.
  rgb2rgb: remove duplicate mmx/mmx2/3dnow/sse2 functions.
  swscale: reindent h[cy]scale_fast() and updateDitherTables().
  swscale: reformat x86/swscale_template.c.
  swscale: remove duplicate mmx/mmx2 functions if they are identical.
  swscale: remove if (c->dstFormat) branch from yuv2packed[12X]().
  swscale: remove if(full_chr_int) from yuv2packed1().
  swscale: remove if(accurate_rnd) branch from functions.
  ffserver: Fix a null pointer dereference as a result of the FF_API_MAX_STREAMS cleanup.
  libdc1394: fix compilation.
  swscale: revive SWS_CPU_CAPS until next major bump.
  swscale: Remove commented-out printf cruft.
  ac3enc: initialize all coefficients to zero.
  ffv1: fix 16bits multithreading
  doc: create separate section for audio encoders
  swscale: Remove orphaned, commented-out function declaration.
  swscale: Eliminate rgb24toyv12_c() duplication.
  mpegvideo_enc: use AV_LOG_ERROR instead of AV_LOG_INFO for two error messages
  Fail when lowres value is lower than 0
  Remove h263_msmpeg4 from MpegEncContext.
  APIchanges: Fill in git hash for fps_probe_size (30315a8)
  avformat: Add fpsprobesize as an AVOption.
  swscale: document SWS_CPU_CAPS*
  Revert removial of SWS flags from e66149e714006d099d1ebfcca3f22ca74fc7dcf4
  avoptions: Return explicitly NAN or {0,0} if the option isn't found
  rtmp: Reindent
  rtmp: Don't try to do av_malloc(0)
  swscale: remove duplicatiopn of rgb24toyv12_c()
  Return -1 on invalid input instead of crashing.
  vf_mp: fix name of the remove-logo filter referenced in filters.texi
  tty: replace AVFormatParameters.sample_rate abuse with a private option.
  Fix end time of last chapter in compute_chapters_end
  ffmpeg: get rid of useless AVInputStream.nb_streams.
  ffmpeg: simplify managing input files and streams
  ffmpeg: purge redundant AVInputStream.index.
  lavf: deprecate AVFormatParameters.channel.
  libdc1394: add a private option for channel.
  dv1394: add a private option for channel.
  v4l2: reindent.
  v4l2: add a private option for channel.
  lavf: deprecate AVFormatParameters.standard.
  v4l2: add a private option for video standard.
  v4l: add a private option for video standard.
  dv1394: add a private option for video standard.
  bktr: add a private option for video standard.
  lavf: deprecate AVFormatParameters.{channels,sample_rate}.
  rawdec: add sample_rate/channels private options.
  ALSA: add channels and sample_rate private options.
  oss: add channels and sample_rate private options.
  sndio: add channels and sample_rate private options.
  lavf: deprecate AVFormatParameters.mpeg2ts_raw.
  mpegts: add compute_pcr option.
  lavf: add priv_class field to AVInputFormat.
  lavfi: add select filter
  eval: implement not() expression
  vsrc_buffer: return an error code if no frames are available
  ffmpeg: handle the case when get_filtered_frame() fails
  indeo3: add out-of-buffer write check
  Add reading of disc number to mov.c
  Fix end time of last chapter in compute_chapters_end().
  Do not reset channel_layout to 0.
  vsrc_buffer: remove duplicated file description
  Merge swscale bloatup This will be cleaned up in the next merge
  swscale: MMX optim of hscale16()
  swscale: dont loose bits on planar >8bit yuv ind gray nput.
  swscale: Switch to ronalds yuv2yuvX16inC_template() its very similar to baptsites and supports alpha
  configure: enable memalign_hack automatically when needed
  rawdec: fix decoding of QT WRAW files
  matroska: improve declaration of video_stereo_* constant tables
  matroskadec: fix reverted condition to accept combine_plane operation
  Fix register types for LOAD_AB arguments, fixes compilation with NASM.
  swscale: unbreak the build on non-x86 systems.
  swscale: remove if(bitexact) branch from functions.
  swscale: remove if(canMMX2BeUsed) conditional.
  swscale: remove swScale_{c,MMX,MMX2} duplication.
  swscale: use emms_c().
  Move emms_c() from libavcodec to libavutil.
  tiff: set palette in the context when specified in TIFF_PAL tag
  rtsp: use strtoul to parse rtptime and seq values.
  pgssubdec: fix incorrect colors.
  dvdsubdec: fix incorrect colors.
  ape: Allow demuxing of files with metadata tags.
  swscale: remove dead macro WRITEBGR24OLD.
  swscale: remove AMD3DNOW "optimizations".
  swscale: remove duplicate code in ppc/ subdirectory.
  swscale: remove duplicated x86/ functions.
  swscale: force --enable-runtime-cpudetect and remove SWS_CPU_CAPS_*.
  vsrc_buffer.h: add file doxy
  vsrc_buffer: tweak error message in init()
  wav: fix various printf warnings related to wrong argument type
  wav: propagate ff_get_wav_header() error code in w64_read_header()
  msmpeg4: reindent.
  lavc: remove msmpeg4v1 encoder.
  Remove avconfig.h and INCINSTDIRs on uninstall.
  ac3enc: add channel coupling support
  partial revert of 01d3ebaf219d83c0a70cdf9696ecb6b868e8a165
  fate: reenable frext-pph10i4_panasonic_a after the bitstream has been fixed
  avcodec_find_decoder: prefer non experimental decoders.
  j2kdec: mark as CODEC_CAP_EXPERIMENTAL
  j2k[c/h] j2kdec.c: Implement 2 code block styles
  j2k: Add void as the parameter of function ff_j2k_init_tier1_luts
  Add Kamil Nowosads j2k code.
  matroska: cleanup handling of video stereo mode
  oggdec: use av_dlog()
  mem: define the MAX_MALLOC_SIZE constant and use it in place of INT_MAX
  configure: Add -U__STRICT_ANSI__ to CPPFLAGS on Cygwin and DOS.
  muxers.texi changes for mkv/webm options
  aacdec: fix typo in scalefactor clipping check
  mpegaudio: Correct license header
  add 5.1 to stereo downmix to resample.c this is based on previous 6to2channel-resample.patch from ffmpeg2theora but updated to work with trunk and using av_clip_int16.
  fate: fix fate-h264-conformance-frext-pph10i4-panasonic-a crcs.
  fate: update 9/10bit refs.
  h264: Properly set coded_{width, height} when parsing H.264.
  x86 asm: Add SECTION_TEXT to dct32_sse.asm.
  Fix 9/10 bit in swscale.
  Do not ask for samples if a specific channel layout was requested.
  libx264: specify field for default union values in options
  movdec: dont divide by zero when stts_data[0].duration = 0.
  Fix ticket127
  dct32: Replacing libav by ffmpeg in the license header with the authors permission. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
  ffmpeg: Don't trigger url_interrupt_cb on the first signal
  avoptions: Check the return value from av_get_number
  lavf: fix style for avformat_alloc_output_context2()
  lavf: deprecate avformat_alloc_output_context() in favor of avformat_alloc_output_context2()
  lavfi: make vsrc_buffer.h header public
  dct32_sse: eliminate some spills
  Fix compilation with --disable-yasm.
  Fix dct32() compilation with --disable-yasm
  mpeg2dec: Fix lowres 3
  lavfi: bump minor and add changelog entry after the split filter addition
  vf_split: add documentation to filters.texi
  vf_split: give more meaningful names to the output pads
  vf_split: define draw_slice() before end_frame()
  vf_split: add description
  vf_split: fix various nits
  wmadec: avoid infinit loop.
  DirectShow capture: Fix build
  ffmpeg: get rid of the -vglobal option.
  dct32: Add AVX implementation of 32-point DCT
  dct32: Change pass 6 permutation to allow for AVX implementation
  dct32: port SSE 32-point DCT to YASM
  matroska: switch stereo mode from int to string and add support in the demuxer too
  matroska: cosmetics
  Create a stereo_mode metadata tag to specify the stereo 3d video layout using the StereoMode tag in a matroska/webm video track.
  libavfilter: vf_split from soc.
  DirectShow capture support Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
  multiple inclusion guard cleanup
  avio: document buffer must created with av_malloc() and friends
  avio: check AVIOContext malloc failure
  swscale: point out an alternative to sws_getContext
  svq3: Do initialization after parsing the extradata
  Fix channel_layout documentation.
  add changelog entries for 0.7_beta2
  ffserver: dont just crash
  fix ffserver's SIGSEGV
  avoptions: Support getting flag values using av_get_int
  preset dir for win32
  Merge remote-tracking branch 'ffmpeg-mt/master'
  Add a flag to disable side data merging.
  Merge/split side data.
  Encoding alac with more than two channels is not supported.
  mp3lame: add #include required for AV_RB32 macro.
  configure: make executable again
  LATM/AAC: Free previously initialized context on reinit.
  configure: Do not unconditionally add -Wall to host CFLAGS.
  configure: Set OS/2 objformat to a.out.
  Add support for a.out object format to assembler macros.
  fate: disable threading for encoding
  fate: add comment field
  fate: allow overriding default build and install dirs
  mpegtsenc: Add an AVClass pointer to the private data
  mpegaudio: clean up #includes
  mpegaudio: move all header parsing to mpegaudiodecheader.[ch]
  vf_libopencv: prefer opencv/cxcore.h over cxtypes.h
  decoders.texi: fix typos in rawvideo section
  cmdutils: use const AVClass * when senseful
  encoders.texi: add documentation for the libx264 encoder
  decoders.texi: add documentation for rawvideo decoder and options
  doc: add decoders.texi file
  encoders.texi: decrease level for audio encoders section
  ffprobe.texi: remove inclusion of muxers section
  indeo3: release buffer in indeo3_decode_end()
  indeo3: remove unnecessary includes
  indeo3: add @file doxy and a link to multimedia wiki documentation
  cmdutils: reset *picref_ptr to NULL in get_filtered_frame()
  ffmpeg: remove useless NULL-check on avfilter_unref_buffer
  libmp3lame: include "libavutil/intreadwrite.h" header
  qdm2: Use floating point synthesis filter.
  h264: correct border check.
  h264: fix loopfilter with threading at slice boundaries.
  Fix ff_mpa_synth_filter_fixed() prototype
  Reindent
  rtpenc_chain: Pass the MP4A_LATM flag to chained muxers
  rtpenc: MP4A-LATM payload support
  movenc: Pass AVFormatContext flags to the SDP generation
  sdp: Allow passing AVFormatContext flags to the SDP generation
  vsrc_buffer: document av_vsrc_buffer_add_video_buffer_ref()
  vsrc_buffer: add av_vsrc_buffer_add_frame()
  vsrc_buffer: fix example in docs, add mandatory parameters
  vsrc_buffer: make the source accept sws_param in init
  vsrc_buffer: propagate avfilter_open() error code
  vsrc_buffer: fix style
  lavfi: add avfilter_get_video_buffer_ref_from_frame to avcodec.h
  vsrc_buffer: remove dependency on AVFrame
  Rename costablegen.c ---> cos_tablegen.c.
  Collapse tableprint.c into tableprint.h.
  Simplify trig table rules
  Remove potentially unstable filenames from comments in generated files.
  Ignore generated tables and generated table generator programs.
  Simplify CLEANFILES make variable by using wildcards.
  Remove silly insults from avformat_version() Doxygen documentation.
  mpegaudiodsp: fix x86 and ppc makefiles
  configure: Adjust AVX assembler check.
  mpegaudio: remove unused version of SAME_HEADER_MASK
  mpegaudio: remove useless #undef at end of file
  asfdec: add missing #include for av_bswap32()
  mpegaudio: merge two #if CONFIG_FLOAT blocks
  mpegaudio: move some struct definitions from mpegaudio.h
  Move some mpegaudio functions to new mpegaudiodsp subsystem
  Clean up #includes in cmdutils.h.
  g729: Merge g729.h into g729dec.c.
  av_find_stream_info: Print more details about max anaylize duration failures.
  10l: wrap float_interleave functions in HAVE_YASM.
  Add little description for -rc_override
  APIchanges: fill in date and commit for request_sample_fmt
  Add floating-point sample format support to the ac3, eac3, dca, aac, and vorbis decoders.
  Add support for request_sample_format in ffmpeg and ffplay.
  Add APIchanges entry for request_sample_fmt.
  Add request_sample_fmt field to AVCodecContext.
  Add float_interleave() to FmtConvertContext with x86-optimized versions.
  Remove unused make variable SEEK_REFFILE
  fate: remove redundant aref and vref references
  Parse 'bext' metadata in the wav demuxer
  Cosmetics: indent
  Keep parsing wav until EOF if the input is seekable and we know the size of the data tag
  Refactor the tag checking into a switch statement
  Use avio_tell() instead of url_ftell()
  add x264opts entry to docs
  cleaned up the udp.c, removed some variables and an av_log
  configure: favor pkg_config over sdl_config
  libx264: support passing arbitrary parameters.
  ffmpeg: dont show_banner() on verbose<0
  fate: remove do_ffmpeg_nocheck function
  fate: do not collect -benchmark output
  mpegaudiodec: remove decode_end() function
  fate: run aref and vref as regular tests
  mpegaudio: sanitise compute_antialias_* names
  mpeg12: add slice-threading checks to slice-threading initializers.
  h264: copy pixel_shift between slice threading contexts.
  mdec: enable frame-level multithreading.
  mdec.c: fix overread.
  id3v2: prevent unsigned integer overflow in ff_id3v2_parse()
  id3v2: add @file doxy and link to format documentation
  configure: opensolaris install is not compatible with ffmpeg, allow overriding it.
  Fix compilation of iirfilter-test.
  eval: opensolaris strtod() cannot handle 0x1234
  libx264: handle closed GOP codec flag
  lavf: remove duplicate assignment in avformat_alloc_context.
  lavf: use designated initializers for AVClasses.
  Make sure neither data_size nor sample_count is negative
  Refactor the 'fmt ' tag search and parsing
  flvdec: clenup debug code
  asfdec: fix possible overread on broken files.
  asfdec: do not fall back to binary/generic search
  asfdec: reindent after previous commit c7bd5ed
  asfdec: fallback to binary search internally
  mpegaudio: add _fixed suffix to some names
  Modify x86util.asm to ease transitioning to 10-bit H.264 assembly.
  ffmpeg: reset top_field_first in opt_input_file().
  dct: build dct32 as separate object files
  qdm2: include correct header for rdft
  Ogg demuxer: give meaningful error codes and warnings.
  update changelog with 9/10 bit H264 and FFV1 changes
  Add some forgotten const to function arguments in libavfilter & libavformat.
  Write channel_layout for multichannel aif files.
  Fix ff_mov_write_chan() so it can be used by other muxers.
  Fix some mov files with little endian audio (tickets 201 - 203).
  iff/8svx: redesign 8SVX demuxing and decoding for handling stereo samples correctly
  iff: compact code setting metadata tags
  iff: fix bitrate computation for compressed audio stream
  iff: distinguish fields for audio and video compression
  imgutils: introduce internal image_get_linesize() and use it
  imgutils: make av_image_get_linesize() return AVERROR(EINVAL) for invalid pixel formats
  drawtext: specify union type for setting default options
  drawtext: reindent after the previous commit
  drawtext: fix strftime() text expansion
  ffmpeg: fix -aspect cli option
  Restructure video filter implementation in ffmpeg.c.
  ffplay: remove audio_write_get_buf_size() forward declaration
  lavfi: print key-frame and picture type information in ff_dlog_ref()
  mathops: remove ancient confusing comment
  rawdec: Allow overriding top field first.
  ffmpeg: initialize input_codec array earlier.
  cmdutils: Allocate private decoder context if its not allocated yet.
  cws2fws: Improve error message wording.
  tools: Check the return value of write().
  mpegaudio: move OUT_FMT macro to mpegaudiodec.c
  mpegaudio: remove OUT_MIN/MAX macros
  Add missing #includes to mp3_header_(de)compress bsf
  dct: fix indentation
  dct: bypass table allocation for DCT_II of size 32
  pngdec: relax condition for setting monoblack pixel format
  h264dsp_mmx: Add #ifdefs around some mmxext functions on x86_64.
  Remove unused header mpegaudio3.h.
  Support decoding of 1bpp rawvideo in avi (ticket 205).
  Support decoding of 2bpp rawvideo in avi (ticket 206).
  Bump minor after adding a caf muxer.
  configure: another try on fixing osx/mingw SDL
  aacdec: Use float instead of int16_t for ltp_state to avoid needless rounding.
  av_picture_crop(): Support simple cases with packed pixels too.
  acelp: Remove unused gray_decode table.
  dfa: Remove unused variable.
  configure: Include AVX availability in summary output.
  rawdec: propagate pict_type information to the output frame
  showinfo: replace "CRC" by "checksum"
  showinfo: fix vertical align nit
  showinfo: fix computation of Adler checksum
  imgutils: generalize linesize computation for bitstream formats
  configure: use same CPPFLAGS in kFreeBSD as Linux

Conflicts:
	ffserver.c
	libavcodec/avcodec.h
	libavcodec/opt.h
	libavcodec/version.h
	libavdevice/avdevice.h
	libavfilter/avfilter.h
	libavformat/avformat.h
	libavformat/metadata.c
	libavformat/metadata.h
	libavformat/utils.c
	libavformat/version.h
	libavutil/avutil.h
	libavutil/mem.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>
2011-06-16 06:29:01 +02:00

796 lines
31 KiB
C

/*
* DSP utils
* Copyright (c) 2000, 2001, 2002 Fabrice Bellard
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* DSP utils.
* note, many functions in here may use MMX which trashes the FPU state, it is
* absolutely necessary to call emms_c() between dsp & float/double code
*/
#ifndef AVCODEC_DSPUTIL_H
#define AVCODEC_DSPUTIL_H
#include "libavutil/intreadwrite.h"
#include "avcodec.h"
//#define DEBUG
/* dct code */
typedef short DCTELEM;
void fdct_ifast (DCTELEM *data);
void fdct_ifast248 (DCTELEM *data);
void ff_jpeg_fdct_islow (DCTELEM *data);
void ff_fdct248_islow (DCTELEM *data);
void j_rev_dct (DCTELEM *data);
void j_rev_dct4 (DCTELEM *data);
void j_rev_dct2 (DCTELEM *data);
void j_rev_dct1 (DCTELEM *data);
void ff_wmv2_idct_c(DCTELEM *data);
void ff_fdct_mmx(DCTELEM *block);
void ff_fdct_mmx2(DCTELEM *block);
void ff_fdct_sse2(DCTELEM *block);
#define H264_IDCT(depth) \
void ff_h264_idct8_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
void ff_h264_idct_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
void ff_h264_idct8_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
void ff_h264_lowres_idct_add_ ## depth ## _c(uint8_t *dst, int stride, DCTELEM *block);\
void ff_h264_lowres_idct_put_ ## depth ## _c(uint8_t *dst, int stride, DCTELEM *block);\
void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\
void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);
H264_IDCT( 8)
H264_IDCT( 9)
H264_IDCT(10)
void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
/* encoding scans */
extern const uint8_t ff_alternate_horizontal_scan[64];
extern const uint8_t ff_alternate_vertical_scan[64];
extern const uint8_t ff_zigzag_direct[64];
extern const uint8_t ff_zigzag248_direct[64];
/* pixel operations */
#define MAX_NEG_CROP 1024
/* temporary */
extern uint32_t ff_squareTbl[512];
extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
#define PUTAVG_PIXELS(depth)\
void ff_put_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
void ff_avg_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
void ff_put_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
void ff_avg_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);
PUTAVG_PIXELS( 8)
PUTAVG_PIXELS( 9)
PUTAVG_PIXELS(10)
#define ff_put_pixels8x8_c ff_put_pixels8x8_8_c
#define ff_avg_pixels8x8_c ff_avg_pixels8x8_8_c
#define ff_put_pixels16x16_c ff_put_pixels16x16_8_c
#define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c
/* VP3 DSP functions */
void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/);
void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
/* Bink functions */
void ff_bink_idct_c (DCTELEM *block);
void ff_bink_idct_add_c(uint8_t *dest, int linesize, DCTELEM *block);
void ff_bink_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
/* EA functions */
void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
/* 1/2^n downscaling functions from imgconvert.c */
#if LIBAVCODEC_VERSION_MAJOR < 53
/**
* @deprecated Use av_image_copy_plane() instead.
*/
attribute_deprecated
void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
#endif
void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
/* minimum alignment rules ;)
If you notice errors in the align stuff, need more alignment for some ASM code
for some CPU or need to use a function with less aligned data then send a mail
to the ffmpeg-devel mailing list, ...
!warning These alignments might not match reality, (missing attribute((align))
stuff somewhere possible).
I (Michael) did not check them, these are just the alignments which I think
could be reached easily ...
!future video codecs might need functions with less strict alignment
*/
/*
void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size);
void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride);
void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
void clear_blocks_c(DCTELEM *blocks);
*/
/* add and put pixel (decoding) */
// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4
typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
typedef void (*op_fill_func)(uint8_t *block/*align width (8 or 16)*/, uint8_t value, int line_size, int h);
#define DEF_OLD_QPEL(name)\
void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
void ff_avg_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
DEF_OLD_QPEL(qpel16_mc11_old_c)
DEF_OLD_QPEL(qpel16_mc31_old_c)
DEF_OLD_QPEL(qpel16_mc12_old_c)
DEF_OLD_QPEL(qpel16_mc32_old_c)
DEF_OLD_QPEL(qpel16_mc13_old_c)
DEF_OLD_QPEL(qpel16_mc33_old_c)
DEF_OLD_QPEL(qpel8_mc11_old_c)
DEF_OLD_QPEL(qpel8_mc31_old_c)
DEF_OLD_QPEL(qpel8_mc12_old_c)
DEF_OLD_QPEL(qpel8_mc32_old_c)
DEF_OLD_QPEL(qpel8_mc13_old_c)
DEF_OLD_QPEL(qpel8_mc33_old_c)
#define CALL_2X_PIXELS(a, b, n)\
static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b(block , pixels , line_size, h);\
b(block+n, pixels+n, line_size, h);\
}
/* motion estimation */
// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2
// although currently h<4 is not used as functions with width <8 are neither used nor implemented
typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
/**
* Scantable.
*/
typedef struct ScanTable{
const uint8_t *scantable;
uint8_t permutated[64];
uint8_t raster_end[64];
#if ARCH_PPC
/** Used by dct_quantize_altivec to find last-non-zero */
DECLARE_ALIGNED(16, uint8_t, inverse)[64];
#endif
} ScanTable;
void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
#define EMULATED_EDGE(depth) \
void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\
int block_w, int block_h,\
int src_x, int src_y, int w, int h);
EMULATED_EDGE(8)
EMULATED_EDGE(9)
EMULATED_EDGE(10)
#define ff_emulated_edge_mc ff_emulated_edge_mc_8
void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
/**
* DSPContext.
*/
typedef struct DSPContext {
/* pixel ops : interface with DCT */
void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
void (*put_pixels_nonclamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size);
void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size);
int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/);
/**
* Motion estimation with emulated edge values.
* @param buf pointer to destination buffer (unaligned)
* @param src pointer to pixel source (unaligned)
* @param linesize width (in pixels) for src/buf
* @param block_w number of pixels (per row) to copy to buf
* @param block_h nummber of pixel rows to copy to buf
* @param src_x offset of src to start of row - this may be negative
* @param src_y offset of src to top of image - this may be negative
* @param w width of src in pixels
* @param h height of src in pixels
*/
void (*emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize,
int block_w, int block_h,
int src_x, int src_y, int w, int h);
/**
* translational global motion compensation.
*/
void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
/**
* global motion compensation.
*/
void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
void (*clear_block)(DCTELEM *block/*align 16*/);
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
int (*pix_sum)(uint8_t * pix, int line_size);
int (*pix_norm1)(uint8_t * pix, int line_size);
// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
me_cmp_func sse[6];
me_cmp_func hadamard8_diff[6];
me_cmp_func dct_sad[6];
me_cmp_func quant_psnr[6];
me_cmp_func bit[6];
me_cmp_func rd[6];
me_cmp_func vsad[6];
me_cmp_func vsse[6];
me_cmp_func nsse[6];
me_cmp_func w53[6];
me_cmp_func w97[6];
me_cmp_func dct_max[6];
me_cmp_func dct264_sad[6];
me_cmp_func me_pre_cmp[6];
me_cmp_func me_cmp[6];
me_cmp_func me_sub_cmp[6];
me_cmp_func mb_cmp[6];
me_cmp_func ildct_cmp[6]; //only width 16 used
me_cmp_func frame_skip_cmp[6]; //only width 8 used
int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
int size);
/**
* Halfpel motion compensation with rounding (a+b+1)>>1.
* this is an array[4][4] of motion compensation functions for 4
* horizontal blocksizes (8,16) and the 4 halfpel positions<br>
* *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
* @param block destination where the result is stored
* @param pixels source
* @param line_size number of bytes in a horizontal line of block
* @param h height
*/
op_pixels_func put_pixels_tab[4][4];
/**
* Halfpel motion compensation with rounding (a+b+1)>>1.
* This is an array[4][4] of motion compensation functions for 4
* horizontal blocksizes (8,16) and the 4 halfpel positions<br>
* *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
* @param block destination into which the result is averaged (a+b+1)>>1
* @param pixels source
* @param line_size number of bytes in a horizontal line of block
* @param h height
*/
op_pixels_func avg_pixels_tab[4][4];
/**
* Halfpel motion compensation with no rounding (a+b)>>1.
* this is an array[2][4] of motion compensation functions for 2
* horizontal blocksizes (8,16) and the 4 halfpel positions<br>
* *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
* @param block destination where the result is stored
* @param pixels source
* @param line_size number of bytes in a horizontal line of block
* @param h height
*/
op_pixels_func put_no_rnd_pixels_tab[4][4];
/**
* Halfpel motion compensation with no rounding (a+b)>>1.
* this is an array[2][4] of motion compensation functions for 2
* horizontal blocksizes (8,16) and the 4 halfpel positions<br>
* *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
* @param block destination into which the result is averaged (a+b)>>1
* @param pixels source
* @param line_size number of bytes in a horizontal line of block
* @param h height
*/
op_pixels_func avg_no_rnd_pixels_tab[4][4];
void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h);
/**
* Thirdpel motion compensation with rounding (a+b+1)>>1.
* this is an array[12] of motion compensation functions for the 9 thirdpe
* positions<br>
* *pixels_tab[ xthirdpel + 4*ythirdpel ]
* @param block destination where the result is stored
* @param pixels source
* @param line_size number of bytes in a horizontal line of block
* @param h height
*/
tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
qpel_mc_func put_qpel_pixels_tab[2][16];
qpel_mc_func avg_qpel_pixels_tab[2][16];
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
qpel_mc_func put_mspel_pixels_tab[8];
/**
* h264 Chroma MC
*/
h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
qpel_mc_func put_h264_qpel_pixels_tab[4][16];
qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
qpel_mc_func put_2tap_qpel_pixels_tab[4][16];
qpel_mc_func avg_2tap_qpel_pixels_tab[4][16];
me_cmp_func pix_abs[2][4];
/* huffyuv specific */
void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
/**
* subtract huffyuv's variant of median prediction
* note, this might read from src1[-1], src2[-1]
*/
void (*sub_hfyu_median_prediction)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top);
void (*add_hfyu_median_prediction)(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top);
int (*add_hfyu_left_prediction)(uint8_t *dst, const uint8_t *src, int w, int left);
void (*add_hfyu_left_prediction_bgr32)(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha);
/* this might write to dst[w] */
void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len);
void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
void (*h261_loop_filter)(uint8_t *src, int stride);
void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale);
void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale);
void (*vp3_idct_dc_add)(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/);
void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void (*vector_fmul)(float *dst, const float *src0, const float *src1, int len);
void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len);
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
/**
* Multiply a vector of floats by a scalar float. Source and
* destination vectors must overlap exactly or not at all.
* @param dst result vector, 16-byte aligned
* @param src input vector, 16-byte aligned
* @param mul scalar value
* @param len length of vector, multiple of 4
*/
void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
int len);
/**
* Multiply a vector of floats by concatenated short vectors of
* floats and by a scalar float. Source and destination vectors
* must overlap exactly or not at all.
* [0]: short vectors of length 2, 8-byte aligned
* [1]: short vectors of length 4, 16-byte aligned
* @param dst output vector, 16-byte aligned
* @param src input vector, 16-byte aligned
* @param sv array of pointers to short vectors
* @param mul scalar value
* @param len number of elements in src and dst, multiple of 4
*/
void (*vector_fmul_sv_scalar[2])(float *dst, const float *src,
const float **sv, float mul, int len);
/**
* Multiply short vectors of floats by a scalar float, store
* concatenated result.
* [0]: short vectors of length 2, 8-byte aligned
* [1]: short vectors of length 4, 16-byte aligned
* @param dst output vector, 16-byte aligned
* @param sv array of pointers to short vectors
* @param mul scalar value
* @param len number of output elements, multiple of 4
*/
void (*sv_fmul_scalar[2])(float *dst, const float **sv,
float mul, int len);
/**
* Calculate the scalar product of two vectors of floats.
* @param v1 first vector, 16-byte aligned
* @param v2 second vector, 16-byte aligned
* @param len length of vectors, multiple of 4
*/
float (*scalarproduct_float)(const float *v1, const float *v2, int len);
/**
* Calculate the sum and difference of two vectors of floats.
* @param v1 first input vector, sum output, 16-byte aligned
* @param v2 second input vector, difference output, 16-byte aligned
* @param len length of vectors, multiple of 4
*/
void (*butterflies_float)(float *restrict v1, float *restrict v2, int len);
/* (I)DCT */
void (*fdct)(DCTELEM *block/* align 16*/);
void (*fdct248)(DCTELEM *block/* align 16*/);
/* IDCT really*/
void (*idct)(DCTELEM *block/* align 16*/);
/**
* block -> idct -> clip to unsigned 8 bit -> dest.
* (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
* @param line_size size in bytes of a horizontal line of dest
*/
void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
/**
* block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
* @param line_size size in bytes of a horizontal line of dest
*/
void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
/**
* idct input permutation.
* several optimized IDCTs need a permutated input (relative to the normal order of the reference
* IDCT)
* this permutation must be performed before the idct_put/add, note, normally this can be merged
* with the zigzag/alternate scan<br>
* an example to avoid confusion:
* - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
* - (x -> referece dct -> reference idct -> x)
* - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x)
* - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...)
*/
uint8_t idct_permutation[64];
int idct_permutation_type;
#define FF_NO_IDCT_PERM 1
#define FF_LIBMPEG2_IDCT_PERM 2
#define FF_SIMPLE_IDCT_PERM 3
#define FF_TRANSPOSE_IDCT_PERM 4
#define FF_PARTTRANS_IDCT_PERM 5
#define FF_SSE2_IDCT_PERM 6
int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale);
void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
#define BASIS_SHIFT 16
#define RECON_SHIFT 6
void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides);
#define EDGE_WIDTH 16
#define EDGE_TOP 1
#define EDGE_BOTTOM 2
void (*prefetch)(void *mem, int stride, int h);
void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
/* mlp/truehd functions */
void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff,
int firorder, int iirorder,
unsigned int filter_shift, int32_t mask, int blocksize,
int32_t *sample_buffer);
/* intrax8 functions */
void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize);
void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize,
int * range, int * sum, int edges);
/**
* Calculate scalar product of two vectors.
* @param len length of vectors, should be multiple of 16
* @param shift number of bits to discard from product
*/
int32_t (*scalarproduct_int16)(const int16_t *v1, const int16_t *v2/*align 16*/, int len, int shift);
/* ape functions */
/**
* Calculate scalar product of v1 and v2,
* and v1[i] += v3[i] * mul
* @param len length of vectors, should be multiple of 16
*/
int32_t (*scalarproduct_and_madd_int16)(int16_t *v1/*align 16*/, const int16_t *v2, const int16_t *v3, int len, int mul);
/**
* Apply symmetric window in 16-bit fixed-point.
* @param output destination array
* constraints: 16-byte aligned
* @param input source array
* constraints: 16-byte aligned
* @param window window array
* constraints: 16-byte aligned, at least len/2 elements
* @param len full window length
* constraints: multiple of ? greater than zero
*/
void (*apply_window_int16)(int16_t *output, const int16_t *input,
const int16_t *window, unsigned int len);
/* rv30 functions */
qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];
/* rv40 functions */
qpel_mc_func put_rv40_qpel_pixels_tab[4][16];
qpel_mc_func avg_rv40_qpel_pixels_tab[4][16];
h264_chroma_mc_func put_rv40_chroma_pixels_tab[3];
h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3];
/* bink functions */
op_fill_func fill_block_tab[2];
void (*scale_block)(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize);
} DSPContext;
void dsputil_static_init(void);
void dsputil_init(DSPContext* p, AVCodecContext *avctx);
int ff_check_alignment(void);
/**
* permute block according to permuatation.
* @param last last non zero element in scantable order
*/
void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
#define BYTE_VEC32(c) ((c)*0x01010101UL)
#define BYTE_VEC64(c) ((c)*0x0001000100010001UL)
static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
{
return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
}
static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
{
return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
}
static inline uint64_t rnd_avg64(uint64_t a, uint64_t b)
{
return (a | b) - (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1);
}
static inline uint64_t no_rnd_avg64(uint64_t a, uint64_t b)
{
return (a & b) + (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1);
}
static inline int get_penalty_factor(int lambda, int lambda2, int type){
switch(type&0xFF){
default:
case FF_CMP_SAD:
return lambda>>FF_LAMBDA_SHIFT;
case FF_CMP_DCT:
return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
case FF_CMP_W53:
return (4*lambda)>>(FF_LAMBDA_SHIFT);
case FF_CMP_W97:
return (2*lambda)>>(FF_LAMBDA_SHIFT);
case FF_CMP_SATD:
case FF_CMP_DCT264:
return (2*lambda)>>FF_LAMBDA_SHIFT;
case FF_CMP_RD:
case FF_CMP_PSNR:
case FF_CMP_SSE:
case FF_CMP_NSSE:
return lambda2>>FF_LAMBDA_SHIFT;
case FF_CMP_BIT:
return 1;
}
}
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_dwt(DSPContext *c);
void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx);
#if ARCH_ARM
#if HAVE_NEON
# define STRIDE_ALIGN 16
#endif
#elif ARCH_PPC
#define STRIDE_ALIGN 16
#elif HAVE_MMI
#define STRIDE_ALIGN 16
#endif
#ifndef STRIDE_ALIGN
# define STRIDE_ALIGN 8
#endif
#define LOCAL_ALIGNED_A(a, t, v, s, o, ...) \
uint8_t la_##v[sizeof(t s o) + (a)]; \
t (*v) o = (void *)FFALIGN((uintptr_t)la_##v, a)
#define LOCAL_ALIGNED_D(a, t, v, s, o, ...) DECLARE_ALIGNED(a, t, v) s o
#define LOCAL_ALIGNED(a, t, v, ...) LOCAL_ALIGNED_A(a, t, v, __VA_ARGS__,,)
#if HAVE_LOCAL_ALIGNED_8
# define LOCAL_ALIGNED_8(t, v, ...) LOCAL_ALIGNED_D(8, t, v, __VA_ARGS__,,)
#else
# define LOCAL_ALIGNED_8(t, v, ...) LOCAL_ALIGNED(8, t, v, __VA_ARGS__)
#endif
#if HAVE_LOCAL_ALIGNED_16
# define LOCAL_ALIGNED_16(t, v, ...) LOCAL_ALIGNED_D(16, t, v, __VA_ARGS__,,)
#else
# define LOCAL_ALIGNED_16(t, v, ...) LOCAL_ALIGNED(16, t, v, __VA_ARGS__)
#endif
#define WRAPPER8_16(name8, name16)\
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
return name8(s, dst , src , stride, h)\
+name8(s, dst+8 , src+8 , stride, h);\
}
#define WRAPPER8_16_SQ(name8, name16)\
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
int score=0;\
score +=name8(s, dst , src , stride, 8);\
score +=name8(s, dst+8 , src+8 , stride, 8);\
if(h==16){\
dst += 8*stride;\
src += 8*stride;\
score +=name8(s, dst , src , stride, 8);\
score +=name8(s, dst+8 , src+8 , stride, 8);\
}\
return score;\
}
static inline void copy_block2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
AV_WN16(dst , AV_RN16(src ));
dst+=dstStride;
src+=srcStride;
}
}
static inline void copy_block4(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
AV_WN32(dst , AV_RN32(src ));
dst+=dstStride;
src+=srcStride;
}
}
static inline void copy_block8(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
AV_WN32(dst , AV_RN32(src ));
AV_WN32(dst+4 , AV_RN32(src+4 ));
dst+=dstStride;
src+=srcStride;
}
}
static inline void copy_block9(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
AV_WN32(dst , AV_RN32(src ));
AV_WN32(dst+4 , AV_RN32(src+4 ));
dst[8]= src[8];
dst+=dstStride;
src+=srcStride;
}
}
static inline void copy_block16(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
AV_WN32(dst , AV_RN32(src ));
AV_WN32(dst+4 , AV_RN32(src+4 ));
AV_WN32(dst+8 , AV_RN32(src+8 ));
AV_WN32(dst+12, AV_RN32(src+12));
dst+=dstStride;
src+=srcStride;
}
}
static inline void copy_block17(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
AV_WN32(dst , AV_RN32(src ));
AV_WN32(dst+4 , AV_RN32(src+4 ));
AV_WN32(dst+8 , AV_RN32(src+8 ));
AV_WN32(dst+12, AV_RN32(src+12));
dst[16]= src[16];
dst+=dstStride;
src+=srcStride;
}
}
#endif /* AVCODEC_DSPUTIL_H */