diff options
author | Måns Rullgård <mans@mansr.com> | 2009-04-18 00:00:31 +0000 |
---|---|---|
committer | Måns Rullgård <mans@mansr.com> | 2009-04-18 00:00:31 +0000 |
commit | 9f5ff83f2a836a34734250ec924b9bb45fa67573 (patch) | |
tree | 7a9ffa02c9049d6606aa3a94af56434173f695b9 | |
parent | 3c55ce039d5b90afad35e19fc8ca6d147dd3f976 (diff) | |
download | ffmpeg-9f5ff83f2a836a34734250ec924b9bb45fa67573.tar.gz |
PPC asm for AV_RL*()
PPC is normally big endian but has special little endian load/store
instructions. Using these avoids a separate byteswap. This makes the
vorbis decoder about 5% faster. Not much else uses little-endian
read/write extensively.
GCC generates horrible PPC code for the default AV_[RW]B64 (which uses
a packed struct), so we override it with a plain pointer cast.
Originally committed as revision 18602 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rwxr-xr-x | configure | 3 | ||||
-rw-r--r-- | libavutil/intreadwrite.h | 2 | ||||
-rw-r--r-- | libavutil/ppc/intreadwrite.h | 104 |
3 files changed, 108 insertions, 1 deletions
@@ -890,6 +890,7 @@ HAVE_LIST=" getrusage inet_aton inline_asm + ldbrx libdc1394_1 libdc1394_2 llrint @@ -1807,7 +1808,7 @@ if test $cpu != "generic"; then Cell|CELL|cell) add_cflags -mcpu=cell warn_altivec disabled Cell - enable ppc64 + enable ppc64 ldbrx ;; # targets that do NOT support conditional mov (cmov) i[345]86|pentium|pentium-mmx|k6|k6-[23]|winchip-c6|winchip2|c3) diff --git a/libavutil/intreadwrite.h b/libavutil/intreadwrite.h index 42fb890a0e..5e55327ff9 100644 --- a/libavutil/intreadwrite.h +++ b/libavutil/intreadwrite.h @@ -31,6 +31,8 @@ #if ARCH_ARM # include "arm/intreadwrite.h" +#elif ARCH_PPC +# include "ppc/intreadwrite.h" #endif /* diff --git a/libavutil/ppc/intreadwrite.h b/libavutil/ppc/intreadwrite.h new file mode 100644 index 0000000000..f9898172c5 --- /dev/null +++ b/libavutil/ppc/intreadwrite.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_PPC_INTREADWRITE_H +#define AVUTIL_PPC_INTREADWRITE_H + +#include <stdint.h> +#include "config.h" + +#define AV_RL16 AV_RL16 +static inline uint16_t AV_RL16(const void *p) +{ + uint16_t v; + __asm__ ("lhbrx %0, %y1" : "=r"(v) : "Z"(*(const uint16_t*)p)); + return v; +} + +#define AV_WL16 AV_WL16 +static inline void AV_WL16(void *p, uint16_t v) +{ + __asm__ ("sthbrx %1, %y0" : "=Z"(*(uint16_t*)p) : "r"(v)); +} + +#define AV_RL32 AV_RL32 +static inline uint32_t AV_RL32(const void *p) +{ + uint32_t v; + __asm__ ("lwbrx %0, %y1" : "=r"(v) : "Z"(*(const uint32_t*)p)); + return v; +} + +#define AV_WL32 AV_WL32 +static inline void AV_WL32(void *p, uint32_t v) +{ + __asm__ ("stwbrx %1, %y0" : "=Z"(*(uint32_t*)p) : "r"(v)); +} + +#if HAVE_LDBRX + +#define AV_RL64 AV_RL64 +static inline uint64_t AV_RL64(const void *p) +{ + uint64_t v; + __asm__ ("ldbrx %0, %y1" : "=r"(v) : "Z"(*(const uint64_t*)p)); + return v; +} + +#define AV_WL64 AV_WL64 +static inline void AV_WL64(void *p, uint64_t v) +{ + __asm__ ("stdbrx %1, %y0" : "=Z"(*(uint64_t*)p) : "r"(v)); +} + +#else + +#define AV_RL64 AV_RL64 +static inline uint64_t AV_RL64(const void *p) +{ + union { uint64_t v; uint32_t hl[2]; } v; + __asm__ ("lwbrx %0, %y2 \n\t" + "lwbrx %1, %y3 \n\t" + : "=r"(v.hl[1]), "=r"(v.hl[0]) + : "Z"(*(const uint32_t*)p), "Z"(*((const uint32_t*)p+1))); + return v.v; +} + +#define AV_WL64 AV_WL64 +static inline void AV_WL64(void *p, uint64_t v) +{ + union { uint64_t v; uint32_t hl[2]; } vv = { v }; + __asm__ ("stwbrx %2, %y0 \n\t" + "stwbrx %3, %y1 \n\t" + : "=Z"(*(uint32_t*)p), "=Z"(*((uint32_t*)p+1)) + : "r"(vv.hl[1]), "r"(vv.hl[0])); +} + +#endif /* HAVE_LDBRX */ + +/* + * GCC fails miserably on the packed struct version which is used by + * default, so we override it here. + */ + +#define AV_RB64(p) (*(const uint64_t *)(p)) +#define AV_WB64(p, v) (*(uint64_t *)(p) = (v)) + +#endif /* AVUTIL_PPC_INTREADWRITE_H */ |