x86: replace explicit REP_RETs with RETs

From x86inc: > On AMD cpus <=K10, an ordinary ret is slow if it immediately follows either > a branch or a branch target. So switch to a 2-byte form of ret in that case. > We can automatically detect "follows a branch", but not a branch target. > (SSSE3 is a sufficient condition to know that your cpu doesn't have this problem.) x86inc can automatically determine whether to use REP_RET rather than REP in most of these cases, so impact is minimal. Additionally, a few REP_RETs were used unnecessary, despite the return being nowhere near a branch. The only CPUs affected were AMD K10s, made between 2007 and 2011, 16 years ago and 12 years ago, respectively. In the future, everyone involved with x86inc should consider dropping REP_RETs altogether.
author: Lynne <dev@lynne.ee> 2023-02-01 02:26:20 +0100
committer: Lynne <dev@lynne.ee> 2023-02-01 04:23:55 +0100
commit: bbe95f7353a972f28a48be8da883549f02c59e4b (patch)
tree: 08841c9da55e7f076f6046d1dbd70f49d74c0ec0 /libavcodec/x86
parent: fc9a3b584da3cf3fc1f00036be2eaf5dff903ccf (diff)
download: ffmpeg-bbe95f7353a972f28a48be8da883549f02c59e4b.tar.gz
39 files changed, 163 insertions, 163 deletions
diff --git a/libavcodec/x86/aacpsdsp.asm b/libavcodec/x86/aacpsdsp.asm
index 105e1af5c5..cc496d4df8 100644
--- a/libavcodec/x86/aacpsdsp.asm
+++ b/libavcodec/x86/aacpsdsp.asm
@@ -49,7 +49,7 @@ align 16
     add  dstq, mmsize
     add    nq, mmsize*2
     jl .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse
@@ -83,7 +83,7 @@ align 16
     add   src2q, mmsize
     add      nq, mmsize*2
     jl .loop
-    REP_RET
+    RET
 
 ;***********************************************************************
 ;void ff_ps_stereo_interpolate_sse3(float (*l)[2], float (*r)[2],
@@ -116,7 +116,7 @@ align 16
     movhps [rq+nq], m2
     add      nq, 8
     jl .loop
-    REP_RET
+    RET
 
 ;***************************************************************************
 ;void ps_stereo_interpolate_ipdopd_sse3(float (*l)[2], float (*r)[2],
@@ -164,7 +164,7 @@ align 16
     movhps [rq+nq], m2
     add      nq, 8
     jl .loop
-    REP_RET
+    RET
 
 ;**********************************************************
 ;void ps_hybrid_analysis_ileave_sse(float out[2][38][64],
@@ -484,7 +484,7 @@ align 16
     add    outq, strideq
     add      nq, 64
     jl .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index c11a94ca93..a95d359d95 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -60,7 +60,7 @@ cglobal ac3_exponent_min, 3, 4, 2, exp, reuse_blks, expn, offset
     sub        expnq, mmsize
     jg .nextexp
 .end:
-    REP_RET
+    RET
 %endmacro
 
 %define LOOP_ALIGN ALIGN 16
@@ -126,7 +126,7 @@ cglobal float_to_fixed24, 3, 3, 9, dst, src, len
     sub      lenq, 16
 %endif
     ja .loop
-    REP_RET
+    RET
 
 ;------------------------------------------------------------------------------
 ; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16])
@@ -220,7 +220,7 @@ cglobal ac3_extract_exponents, 3, 3, 4, exp, coef, len
 
     add     lenq, 4
     jl .loop
-    REP_RET
+    RET
 %endmacro
 
 %if HAVE_SSE2_EXTERNAL
diff --git a/libavcodec/x86/alacdsp.asm b/libavcodec/x86/alacdsp.asm
index bb2069f785..1cfd302de2 100644
--- a/libavcodec/x86/alacdsp.asm
+++ b/libavcodec/x86/alacdsp.asm
@@ -100,7 +100,7 @@ align 16
 
     add     lenq, mmsize*2
     jl .loop
-    REP_RET
+    RET
 
 %if ARCH_X86_64
 cglobal alac_append_extra_bits_mono, 2, 5, 3, buf, exbuf, exbits, ch, len
@@ -130,4 +130,4 @@ align 16
 
     add     lenq, mmsize*2
     jl .loop
-    REP_RET
+    RET
diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index f64077cb13..cf5baa9415 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -123,7 +123,7 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
     add     dstq, mmsize*4*(%2+%3)
     sub     lend, mmsize*(%2+%3)
     jg .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
diff --git a/libavcodec/x86/dirac_dwt.asm b/libavcodec/x86/dirac_dwt.asm
index 6c8b3c0d88..1f3b238aee 100644
--- a/libavcodec/x86/dirac_dwt.asm
+++ b/libavcodec/x86/dirac_dwt.asm
@@ -75,7 +75,7 @@ cglobal vertical_compose53iL0_%1, 4,4,1, b0, b1, b2, width
     COMPOSE_53iL0 m0, m1, [b2q+2*widthq], m2
     mova    [b1q+2*widthq], m0
     jg      .loop
-    REP_RET
+    RET
 
 ; void vertical_compose_dirac53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
 ;                                  int width)
@@ -93,7 +93,7 @@ cglobal vertical_compose_dirac53iH0_%1, 4,4,1, b0, b1, b2, width
     paddw   m0, [b1q+2*widthq]
     mova    [b1q+2*widthq], m0
     jg      .loop
-    REP_RET
+    RET
 
 ; void vertical_compose_dd97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
 ;                               IDWTELEM *b3, IDWTELEM *b4, int width)
@@ -110,7 +110,7 @@ cglobal vertical_compose_dd97iH0_%1, 6,6,5, b0, b1, b2, b3, b4, width
     COMPOSE_DD97iH0 [b2q+2*widthq], [b3q+2*widthq], [b4q+2*widthq]
     mova    [b2q+2*widthq], m1
     jg      .loop
-    REP_RET
+    RET
 
 ; void vertical_compose_dd137iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
 ;                                IDWTELEM *b3, IDWTELEM *b4, int width)
@@ -139,7 +139,7 @@ cglobal vertical_compose_dd137iL0_%1, 6,6,6, b0, b1, b2, b3, b4, width
     psubw   m5, m1
     mova    [b2q+2*widthq], m5
     jg      .loop
-    REP_RET
+    RET
 
 ; void vertical_compose_haar(IDWTELEM *b0, IDWTELEM *b1, int width)
 cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width
@@ -159,7 +159,7 @@ cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width
     paddw   m2, m0
     mova    [b1q+2*widthq], m2
     jg      .loop
-    REP_RET
+    RET
 %endmacro
 
 ; extend the left and right edges of the tmp array by %1 and %2 respectively
@@ -225,7 +225,7 @@ cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2
     cmp     xq, w2q
     jl      .highpass_loop
 .end:
-    REP_RET
+    RET
 %endmacro
 
 
@@ -290,7 +290,7 @@ cglobal horizontal_compose_dd97i_ssse3, 3,6,8, b, tmp, w, x, w2, b_w2
     cmp     xd, w2d
     jl      .highpass_loop
 .end:
-    REP_RET
+    RET
 
 
 INIT_XMM
diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm
index a44596e565..34c3fc9a0f 100644
--- a/libavcodec/x86/fft.asm
+++ b/libavcodec/x86/fft.asm
@@ -475,7 +475,7 @@ cglobal fft_calc, 2,5,8
     mov     r0, r1
     mov     r1, r3
     FFT_DISPATCH _interleave %+ SUFFIX, r1
-    REP_RET
+    RET
 
 %endif
 
@@ -510,7 +510,7 @@ cglobal fft_calc, 2,5,8
     add      r2, mmsize*2
     jl       .loop
 .end:
-    REP_RET
+    RET
 
 cglobal fft_permute, 2,7,1
     mov     r4,  [r0 + FFTContext.revtab]
@@ -543,7 +543,7 @@ cglobal fft_permute, 2,7,1
     movaps  [r1 + r2 + 16], xmm1
     add     r2, 32
     jl      .loopcopy
-    REP_RET
+    RET
 
 INIT_XMM sse
 cglobal imdct_calc, 3,5,3
@@ -583,7 +583,7 @@ cglobal imdct_calc, 3,5,3
     sub     r3, mmsize
     add     r2, mmsize
     jl      .loop
-    REP_RET
+    RET
 
 %ifdef PIC
 %define SECTION_REL - $$
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index 6d755f4972..44416e4dfd 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -79,7 +79,7 @@ ALIGN 16
     movd   [decodedq+4], m1
     jg .loop_sample
 .ret:
-    REP_RET
+    RET
 %endmacro
 
 %if HAVE_XOP_EXTERNAL
@@ -133,7 +133,7 @@ align 16
     mova [outq + lenq], m%2
     add      lenq, 16
     jl .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -177,7 +177,7 @@ align 16
     add      outq, mmsize*2
     sub      lend, mmsize/4
     jg .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -302,7 +302,7 @@ align 16
     add      outq, mmsize*REPCOUNT
     sub      lend, mmsize/4
     jg .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM ssse3
diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm
index a5c53034a2..e70bc492b2 100644
--- a/libavcodec/x86/h264_chromamc.asm
+++ b/libavcodec/x86/h264_chromamc.asm
@@ -112,7 +112,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
     jne .at_least_one_non_zero
     ; mx == 0 AND my == 0 - no filter needed
     mv0_pixels_mc8
-    REP_RET
+    RET
 
 .at_least_one_non_zero:
 %ifidn %2, rv40
@@ -192,7 +192,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
     add           r1, r2
     dec           r3d
     jne .next1drow
-    REP_RET
+    RET
 
 .both_non_zero: ; general case, bilinear
     movd          m4, r4d         ; x
@@ -365,7 +365,7 @@ cglobal %1_%2_chroma_mc4, 6, 6 + extra_regs, 0
     add           r0, r2
     sub          r3d, 2
     jnz .next2rows
-    REP_RET
+    RET
 %endmacro
 
 %macro chroma_mc2_mmx_func 2
@@ -407,7 +407,7 @@ cglobal %1_%2_chroma_mc2, 6, 7, 0
     add           r0, r2
     sub          r3d, 1
     jnz .nextrow
-    REP_RET
+    RET
 %endmacro
 
 %define rnd_1d_h264 pw_4
@@ -453,7 +453,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
     jne .at_least_one_non_zero
     ; mx == 0 AND my == 0 - no filter needed
     mv0_pixels_mc8
-    REP_RET
+    RET
 
 .at_least_one_non_zero:
     test         r5d, r5d
@@ -514,7 +514,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
     sub          r3d, 2
     lea           r0, [r0+r2*2]
     jg .next2rows
-    REP_RET
+    RET
 
 .my_is_zero:
     mov          r5d, r4d
@@ -551,7 +551,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
     lea           r0, [r0+r2*2]
     lea           r1, [r1+r2*2]
     jg .next2xrows
-    REP_RET
+    RET
 
 .mx_is_zero:
     mov          r4d, r5d
@@ -588,7 +588,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
     sub          r3d, 2
     lea           r0, [r0+r2*2]
     jg .next2yrows
-    REP_RET
+    RET
 %endmacro
 
 %macro chroma_mc4_ssse3_func 2
@@ -638,7 +638,7 @@ cglobal %1_%2_chroma_mc4, 6, 7, 0
     sub          r3d, 2
     lea           r0, [r0+r2*2]
     jg .next2rows
-    REP_RET
+    RET
 %endmacro
 
 %define CHROMAMC_AVG NOTHING
diff --git a/libavcodec/x86/h264_chromamc_10bit.asm b/libavcodec/x86/h264_chromamc_10bit.asm
index fdc4f407c7..d4f92c90c7 100644
--- a/libavcodec/x86/h264_chromamc_10bit.asm
+++ b/libavcodec/x86/h264_chromamc_10bit.asm
@@ -67,7 +67,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
     jne .at_least_one_non_zero
     ; mx == 0 AND my == 0 - no filter needed
     MV0_PIXELS_MC8
-    REP_RET
+    RET
 
 .at_least_one_non_zero:
     mov          r6d, 2
@@ -102,7 +102,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
     add           r1, r2
     dec           r3d
     jne .next1drow
-    REP_RET
+    RET
 
 .xy_interpolation: ; general case, bilinear
     movd          m4, r4m         ; x
@@ -144,7 +144,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
     add           r0, r2
     dec          r3d
     jne .next2drow
-    REP_RET
+    RET
 %endmacro
 
 ;-----------------------------------------------------------------------------
@@ -194,7 +194,7 @@ cglobal %1_h264_chroma_mc4_10, 6,6,7
     MC4_OP m6, m0
     sub   r3d, 2
     jnz .next2rows
-    REP_RET
+    RET
 %endmacro
 
 ;-----------------------------------------------------------------------------
@@ -234,7 +234,7 @@ cglobal %1_h264_chroma_mc2_10, 6,7
     add           r0, r2
     dec          r3d
     jnz .nextrow
-    REP_RET
+    RET
 %endmacro
 
 %macro NOTHING 2-3
diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm
index 23971b5cb5..033f2f4d55 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -372,7 +372,7 @@ cglobal deblock_v_luma_10, 5,5,15
     add         r4, 2
     dec         r3
     jg .loop
-    REP_RET
+    RET
 
 cglobal deblock_h_luma_10, 5,7,15
     shl        r2d, 2
@@ -411,7 +411,7 @@ cglobal deblock_h_luma_10, 5,7,15
     lea         r5, [r5+r1*8]
     dec         r6
     jg .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -648,7 +648,7 @@ cglobal deblock_v_luma_intra_10, 4,7,16
     add     r4, mmsize
     dec     r6
     jg .loop
-    REP_RET
+    RET
 
 ;-----------------------------------------------------------------------------
 ; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha,
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index 9b5920d3b0..1f86e51d82 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -354,7 +354,7 @@ INIT_MMX cpuname
     add          r2, 128
     cmp          r5, 16
     jl .nextblock
-    REP_RET
+    RET
 .no_dc:
 INIT_XMM cpuname
     mov       dst2d, dword [r1+r5*4]
@@ -368,7 +368,7 @@ INIT_XMM cpuname
     add          r2, 128
     cmp          r5, 16
     jl .nextblock
-    REP_RET
+    RET
 
 INIT_MMX mmx
 h264_idct_add8_mmx_plane:
@@ -508,7 +508,7 @@ cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8
     add16_sse2_cycle 5, 0x24
     add16_sse2_cycle 6, 0x1e
     add16_sse2_cycle 7, 0x26
-REP_RET
+RET
 
 %macro add16intra_sse2_cycle 2
     movzx       r0, word [r4+%2]
@@ -555,7 +555,7 @@ cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8
     add16intra_sse2_cycle 5, 0x24
     add16intra_sse2_cycle 6, 0x1e
     add16intra_sse2_cycle 7, 0x26
-REP_RET
+RET
 
 %macro add8_sse2_cycle 2
     movzx       r0, word [r4+%2]
@@ -610,7 +610,7 @@ cglobal h264_idct_add8_8, 5, 7 + ARCH_X86_64, 8
 %endif
     add8_sse2_cycle 2, 0x5c
     add8_sse2_cycle 3, 0x64
-REP_RET
+RET
 
 ;void ff_h264_luma_dc_dequant_idct_mmx(int16_t *output, int16_t *input, int qmul)
 
diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
index 9fd05abb2b..b990db7121 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -155,7 +155,7 @@ cglobal h264_idct_add16_10, 5,6
     ADD16_OP 13, 7+3*8
     ADD16_OP 14, 6+4*8
     ADD16_OP 15, 7+4*8
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -292,7 +292,7 @@ cglobal h264_idct_add16intra_10,5,7,8
     ADD16_OP_INTRA 10, 4+4*8
     ADD16_OP_INTRA 12, 6+3*8
     ADD16_OP_INTRA 14, 6+4*8
-    REP_RET
+    RET
     AC 8
     AC 10
     AC 12
@@ -335,7 +335,7 @@ cglobal h264_idct_add8_10,5,8,7
 %endif
     ADD16_OP_INTRA 32, 4+11*8
     ADD16_OP_INTRA 34, 4+12*8
-    REP_RET
+    RET
     AC 16
     AC 18
     AC 32
@@ -384,7 +384,7 @@ cglobal h264_idct_add8_422_10, 5, 8, 7
     ADD16_OP_INTRA 34, 4+12*8
     ADD16_OP_INTRA 40, 4+13*8 ; i+4
     ADD16_OP_INTRA 42, 4+14*8 ; i+4
-REP_RET
+RET
     AC 16
     AC 18
     AC 24 ; i+4
diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
index 31840a1472..8a38ba2bb5 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -62,7 +62,7 @@ cglobal pred16x16_vertical_8, 2,3
     lea   r0, [r0+r1*2]
     dec   r2
     jg .loop
-    REP_RET
+    RET
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred16x16_horizontal_8(uint8_t *src, ptrdiff_t stride)
@@ -95,7 +95,7 @@ cglobal pred16x16_horizontal_8, 2,3
     lea       r0, [r0+r1*2]
     dec       r2
     jg .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -146,7 +146,7 @@ cglobal pred16x16_dc_8, 2,7
     lea   r4, [r4+r1*2]
     dec   r3d
     jg .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -192,7 +192,7 @@ cglobal pred16x16_tm_vp8_8, 2,6,6
     lea          r0, [r0+r1*2]
     dec         r5d
     jg .loop
-    REP_RET
+    RET
 
 %if HAVE_AVX2_EXTERNAL
 INIT_YMM avx2
@@ -228,7 +228,7 @@ cglobal pred16x16_tm_vp8_8, 2, 4, 5, dst, stride, stride3, iteration
     lea                       dstq, [dstq+strideq*4]
     dec                 iterationd
     jg .loop
-    REP_RET
+    RET
 %endif
 
 ;-----------------------------------------------------------------------------
@@ -427,7 +427,7 @@ cglobal pred16x16_plane_%1_8, 2,9,7
     lea          r0, [r0+r2*2]
     dec          r4
     jg .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -556,7 +556,7 @@ ALIGN 16
     lea          r0, [r0+r2*2]
     dec          r4
     jg .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -599,7 +599,7 @@ cglobal pred8x8_horizontal_8, 2,3
     lea       r0, [r0+r1*2]
     dec       r2
     jg .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -737,7 +737,7 @@ cglobal pred8x8_dc_rv40_8, 2,7
     lea   r4, [r4+r1*2]
     dec   r3d
     jg .loop
-    REP_RET
+    RET
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred8x8_tm_vp8_8(uint8_t *src, ptrdiff_t stride)
@@ -770,7 +770,7 @@ cglobal pred8x8_tm_vp8_8, 2,6,4
     lea          r0, [r0+r1*2]
     dec         r5d
     jg .loop
-    REP_RET
+    RET
 
 INIT_XMM ssse3
 cglobal pred8x8_tm_vp8_8, 2,3,6
@@ -797,7 +797,7 @@ cglobal pred8x8_tm_vp8_8, 2,3,6
     lea          r0, [r0+r1*2]
     dec         r2d
     jg .loop
-    REP_RET
+    RET
 
 ; dest, left, right, src, tmp
 ; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
@@ -1802,7 +1802,7 @@ cglobal pred4x4_tm_vp8_8, 3,6
     lea        r0, [r0+r2*2]
     dec       r5d
     jg .loop
-    REP_RET
+    RET
 
 INIT_XMM ssse3
 cglobal pred4x4_tm_vp8_8, 3,3
diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm
index c4645d434e..2f30807332 100644
--- a/libavcodec/x86/h264_intrapred_10bit.asm
+++ b/libavcodec/x86/h264_intrapred_10bit.asm
@@ -327,7 +327,7 @@ cglobal pred8x8_horizontal_10, 2, 3
     lea          r0, [r0+r1*2]
     dec          r2d
     jg .loop
-    REP_RET
+    RET
 
 ;-----------------------------------------------------------------------------
 ; void ff_predict_8x8_dc_10(pixel *src, ptrdiff_t stride)
@@ -481,7 +481,7 @@ cglobal pred8x8_plane_10, 2, 7, 7
     add       r0, r1
     dec r2d
     jg .loop
-    REP_RET
+    RET
 
 
 ;-----------------------------------------------------------------------------
@@ -994,7 +994,7 @@ cglobal pred16x16_vertical_10, 2, 3
     lea   r0, [r0+r1*2]
     dec   r2d
     jg .loop
-    REP_RET
+    RET
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred16x16_horizontal_10(pixel *src, ptrdiff_t stride)
@@ -1012,7 +1012,7 @@ cglobal pred16x16_horizontal_10, 2, 3
     lea    r0, [r0+r1*2]
     dec    r2d
     jg .vloop
-    REP_RET
+    RET
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred16x16_dc_10(pixel *src, ptrdiff_t stride)
@@ -1048,7 +1048,7 @@ cglobal pred16x16_dc_10, 2, 6
     lea        r5, [r5+r1*2]
     dec       r3d
     jg .loop
-    REP_RET
+    RET
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred16x16_top_dc_10(pixel *src, ptrdiff_t stride)
@@ -1070,7 +1070,7 @@ cglobal pred16x16_top_dc_10, 2, 3
     lea        r0, [r0+r1*2]
     dec       r2d
     jg .loop
-    REP_RET
+    RET
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred16x16_left_dc_10(pixel *src, ptrdiff_t stride)
@@ -1101,7 +1101,7 @@ cglobal pred16x16_left_dc_10, 2, 6
     lea        r5, [r5+r1*2]
     dec       r3d
     jg .loop
-    REP_RET
+    RET
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred16x16_128_dc_10(pixel *src, ptrdiff_t stride)
@@ -1116,4 +1116,4 @@ cglobal pred16x16_128_dc_10, 2,3
     lea        r0, [r0+r1*2]
     dec       r2d
     jg .loop
-    REP_RET
+    RET
diff --git a/libavcodec/x86/h264_qpel_10bit.asm b/libavcodec/x86/h264_qpel_10bit.asm
index c862cb2226..80483b15ba 100644
--- a/libavcodec/x86/h264_qpel_10bit.asm
+++ b/libavcodec/x86/h264_qpel_10bit.asm
@@ -211,7 +211,7 @@ cglobal %1_h264_qpel16_mc00_10, 3,4
     lea            r1, [r1+r2*2]
     dec r3d
     jg .loop
-    REP_RET
+    RET
 %endmacro
 
 %define OP_MOV mova
diff --git a/libavcodec/x86/h264_qpel_8bit.asm b/libavcodec/x86/h264_qpel_8bit.asm
index 6269b3cf4f..4e64329991 100644
--- a/libavcodec/x86/h264_qpel_8bit.asm
+++ b/libavcodec/x86/h264_qpel_8bit.asm
@@ -89,7 +89,7 @@ cglobal %1_h264_qpel4_h_lowpass, 4,5 ; dst, src, dstStride, srcStride
     add           r1, r3
     dec          r4d
     jg         .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -149,7 +149,7 @@ cglobal %1_h264_qpel8_h_lowpass, 4,5 ; dst, src, dstStride, srcStride
     add           r1, r3
     dec          r4d
     jg         .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -192,7 +192,7 @@ cglobal %1_h264_qpel8_h_lowpass, 4,5,8 ; dst, src, dstStride, srcStride
     add           r0, r2
     dec          r4d
     jne        .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM ssse3
@@ -239,7 +239,7 @@ cglobal %1_h264_qpel4_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride
     add           r2, r4
     dec          r5d
     jg         .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -303,7 +303,7 @@ cglobal %1_h264_qpel8_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride
     add           r2, r4
     dec          r5d
     jg         .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -350,7 +350,7 @@ cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,8 ; dst, src, src2, dstStride, src2Strid
     add           r2, r4
     dec          r5d
     jg         .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM ssse3
@@ -458,7 +458,7 @@ cglobal %1_h264_qpel8or16_v_lowpass_op, 5,5,8 ; dst, src, dstStride, srcStride,
     FILT_V        %1
     FILT_V        %1
 .end:
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -531,7 +531,7 @@ cglobal %1_h264_qpel4_hv_lowpass_h, 3,4 ; tmp, dst, dstStride
     add           r1, r2
     dec          r3d
     jnz        .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -574,7 +574,7 @@ cglobal %1_h264_qpel8or16_hv1_lowpass_op, 4,4,8 ; src, tmp, srcStride, size
     FILT_HV    14*48
     FILT_HV    15*48
 .end:
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -619,7 +619,7 @@ cglobal %1_h264_qpel8or16_hv2_lowpass_op, 5,5 ; dst, tmp, dstStride, unused, h
     add           r0, r2
     dec          r4d
     jne        .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -710,7 +710,7 @@ cglobal %1_h264_qpel8or16_hv2_lowpass, 5,5,8 ; dst, tmp, dstStride, tmpStride, s
     dec          r4d
     jne        .op16
 .done:
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM ssse3
@@ -776,7 +776,7 @@ cglobal %1_pixels8_l2_shift5, 6, 6 ; dst, src16, src8, dstStride, src8Stride, h
     lea           r0, [r0+2*r3]
     sub          r5d, 2
     jne        .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -845,7 +845,7 @@ cglobal %1_h264_qpel16_h_lowpass_l2, 5, 6, 16 ; dst, src, src2, dstStride, src2S
     add           r2, r4
     dec          r5d
     jg         .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM ssse3
diff --git a/libavcodec/x86/h264_weight.asm b/libavcodec/x86/h264_weight.asm
index 6076e64ae0..66353d1a9c 100644
--- a/libavcodec/x86/h264_weight.asm
+++ b/libavcodec/x86/h264_weight.asm
@@ -79,7 +79,7 @@ cglobal h264_weight_%1, 6, 6, %2
     add        r0, r1
     dec        r2d
     jnz .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -102,7 +102,7 @@ cglobal h264_weight_%1, 6, 6, %2
     add        r0, r3
     dec        r2d
     jnz .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -196,7 +196,7 @@ cglobal h264_biweight_%1, 7, 8, %2
     add        r1, r2
     dec        r3d
     jnz .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -223,7 +223,7 @@ cglobal h264_biweight_%1, 7, 8, %2
     add        r1, r4
     dec        r3d
     jnz .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -258,7 +258,7 @@ cglobal h264_biweight_16, 7, 8, 8
     add        r1, r2
     dec        r3d
     jnz .nextrow
-    REP_RET
+    RET
 
 INIT_XMM ssse3
 cglobal h264_biweight_8, 7, 8, 8
@@ -281,4 +281,4 @@ cglobal h264_biweight_8, 7, 8, 8
     add        r1, r4
     dec        r3d
     jnz .nextrow
-    REP_RET
+    RET
diff --git a/libavcodec/x86/h264_weight_10bit.asm b/libavcodec/x86/h264_weight_10bit.asm
index f924e55854..356871bc62 100644
--- a/libavcodec/x86/h264_weight_10bit.asm
+++ b/libavcodec/x86/h264_weight_10bit.asm
@@ -101,7 +101,7 @@ cglobal h264_weight_16_10
     add       r0, r1
     dec       r2d
     jnz .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -120,7 +120,7 @@ cglobal h264_weight_8_10
     add        r0, r1
     dec        r2d
     jnz .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -142,7 +142,7 @@ cglobal h264_weight_4_10
     add         r0, r3
     dec         r2d
     jnz .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -234,7 +234,7 @@ cglobal h264_biweight_16_10
     add       r1, r2
     dec       r3d
     jnz .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -253,7 +253,7 @@ cglobal h264_biweight_8_10
     add      r1, r2
     dec      r3d
     jnz .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -275,7 +275,7 @@ cglobal h264_biweight_4_10
     add         r1, r4
     dec         r3d
     jnz .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm
index 2eb8924da8..8abb16150d 100644
--- a/libavcodec/x86/hevc_sao.asm
+++ b/libavcodec/x86/hevc_sao.asm
@@ -166,7 +166,7 @@ INIT_YMM cpuname
     add             srcq, srcstrideq             ; src += srcstride
     dec          heightd                         ; cmp height
     jnz               .loop                      ; height loop
-    REP_RET
+    RET
 %endmacro
 
 
diff --git a/libavcodec/x86/hevc_sao_10bit.asm b/libavcodec/x86/hevc_sao_10bit.asm
index 38005740e5..0daa9c645c 100644
--- a/libavcodec/x86/hevc_sao_10bit.asm
+++ b/libavcodec/x86/hevc_sao_10bit.asm
@@ -145,7 +145,7 @@ align 16
     add             srcq, srcstrideq
     dec          heightd
     jg .loop
-    REP_RET
+    RET
 %endmacro
 
 %macro HEVC_SAO_BAND_FILTER_FUNCS 0
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
index b3a270a173..7a2b7135d8 100644
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -78,7 +78,7 @@ cglobal put_pixels8_x2, 4,5
     add          r0, r4
     sub         r3d, 4
     jne .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -120,7 +120,7 @@ cglobal put_pixels16_x2, 4,5
     add          r0, r4
     sub         r3d, 4
     jne .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -162,7 +162,7 @@ cglobal put_no_rnd_pixels8_x2, 4,5
     add          r0, r4
     sub         r3d, 4
     jne .loop
-    REP_RET
+    RET
 
 
 ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
@@ -194,7 +194,7 @@ cglobal put_pixels8_y2, 4,5
     add          r0, r4
     sub         r3d, 4
     jne .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -232,7 +232,7 @@ cglobal put_no_rnd_pixels8_y2, 4,5
     add          r0, r4
     sub         r3d, 4
     jne .loop
-    REP_RET
+    RET
 
 
 ; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
@@ -280,7 +280,7 @@ cglobal avg_pixels8_x2, 4,5
     add          r0, r4
     sub         r3d, 4
     jne .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -323,7 +323,7 @@ cglobal avg_pixels8_y2, 4,5
     add          r0, r4
     sub         r3d, 4
     jne .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -370,7 +370,7 @@ cglobal avg_approx_pixels8_xy2, 4,5
     add          r0, r4
     sub         r3d, 4
     jne .loop
-    REP_RET
+    RET
 
 
 ; void ff_avg_pixels16_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
@@ -448,7 +448,7 @@ cglobal %1_pixels8_xy2, 4,5
     add         r4, r2
     sub        r3d, 2
     jnz .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -514,7 +514,7 @@ cglobal %1_pixels8_xy2, 4,5
     add         r4, r2
     sub        r3d, 2
     jnz .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX ssse3
diff --git a/libavcodec/x86/hpeldsp_vp3.asm b/libavcodec/x86/hpeldsp_vp3.asm
index 88ca8e8e0a..e580133e45 100644
--- a/libavcodec/x86/hpeldsp_vp3.asm
+++ b/libavcodec/x86/hpeldsp_vp3.asm
@@ -60,7 +60,7 @@ cglobal put_no_rnd_pixels8_x2_exact, 4,5
     lea          r0, [r0+r2*4]
     sub         r3d, 4
     jg .loop
-    REP_RET
+    RET
 
 
 ; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
@@ -96,4 +96,4 @@ cglobal put_no_rnd_pixels8_y2_exact, 4,5
     lea          r0, [r0+r2*4]
     sub         r3d, 4
     jg .loop
-    REP_RET
+    RET
diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm
index c5c40e991b..c1b375f479 100644
--- a/libavcodec/x86/huffyuvdsp.asm
+++ b/libavcodec/x86/huffyuvdsp.asm
@@ -74,7 +74,7 @@ cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left
     jl         .loop
     movd          m0, [dstq-4]
     movd     [leftq], m0
-    REP_RET
+    RET
 
 
 ; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top)
diff --git a/libavcodec/x86/jpeg2000dsp.asm b/libavcodec/x86/jpeg2000dsp.asm
index 61dfdd4f71..c61cc70784 100644
--- a/libavcodec/x86/jpeg2000dsp.asm
+++ b/libavcodec/x86/jpeg2000dsp.asm
@@ -113,7 +113,7 @@ align 16
     movaps   [src1q+csizeq], m5
     add  csizeq, mmsize
     jl .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse
@@ -153,7 +153,7 @@ align 16
     mova   [src0q+csizeq], m2
     add  csizeq, mmsize
     jl .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm
index eb1b80506e..7159aafe67 100644
--- a/libavcodec/x86/lossless_videodsp.asm
+++ b/libavcodec/x86/lossless_videodsp.asm
@@ -229,7 +229,7 @@ cglobal add_bytes, 3,4,2, dst, src, w, size
     inc     wq
     jl .3
 .end:
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
diff --git a/libavcodec/x86/lossless_videoencdsp.asm b/libavcodec/x86/lossless_videoencdsp.asm
index c579891d6a..8ccaea9139 100644
--- a/libavcodec/x86/lossless_videoencdsp.asm
+++ b/libavcodec/x86/lossless_videoencdsp.asm
@@ -110,7 +110,7 @@ cglobal diff_bytes, 4,5,2, dst, src1, src2, w
     inc               wq
         jl .loop_gpr_%1%2
 .end_%1%2:
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
diff --git a/libavcodec/x86/me_cmp.asm b/libavcodec/x86/me_cmp.asm
index eb036ee4bc..923eb8078b 100644
--- a/libavcodec/x86/me_cmp.asm
+++ b/libavcodec/x86/me_cmp.asm
@@ -458,7 +458,7 @@ cglobal hf_noise%1, 3,3,0, pix1, lsize, h
     psrlq      m6, 32
     paddd      m0, m6
     movd      eax, m0   ; eax = result of hf_noise8;
-    REP_RET                 ; return eax;
+    RET                 ; return eax;
 %endmacro
 
 INIT_MMX mmx
diff --git a/libavcodec/x86/pngdsp.asm b/libavcodec/x86/pngdsp.asm
index 7bc43c79a0..efaf652cd4 100644
--- a/libavcodec/x86/pngdsp.asm
+++ b/libavcodec/x86/pngdsp.asm
@@ -75,7 +75,7 @@ cglobal add_bytes_l2, 4, 6, 2, dst, src1, src2, wa, w, i
 .end_s:
     cmp                 iq, wq
     jl .loop_s
-    REP_RET
+    RET
 
 %macro ADD_PAETH_PRED_FN 1
 cglobal add_png_paeth_prediction, 5, 7, %1, dst, src, top, w, bpp, end, cntr
diff --git a/libavcodec/x86/qpel.asm b/libavcodec/x86/qpel.asm
index 4e72d5084f..481251314a 100644
--- a/libavcodec/x86/qpel.asm
+++ b/libavcodec/x86/qpel.asm
@@ -81,7 +81,7 @@ cglobal %1_pixels4_l2, 6,6
     add          r2, 16
     sub         r5d, 4
     jne       .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -125,7 +125,7 @@ cglobal %1_pixels8_l2, 6,6
     add          r2, 32
     sub         r5d, 4
     jne       .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -171,7 +171,7 @@ cglobal %1_pixels16_l2, 6,6
     add          r2, 32
     sub         r5d, 2
     jne       .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
diff --git a/libavcodec/x86/qpeldsp.asm b/libavcodec/x86/qpeldsp.asm
index 3a6a650654..30d26a5acc 100644
--- a/libavcodec/x86/qpeldsp.asm
+++ b/libavcodec/x86/qpeldsp.asm
@@ -92,7 +92,7 @@ cglobal put_no_rnd_pixels8_l2, 6,6
     add          r2, 32
     sub         r5d, 4
     jne .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -161,7 +161,7 @@ cglobal put_no_rnd_pixels16_l2, 6,6
     add          r2, 32
     sub         r5d, 2
     jne .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -274,7 +274,7 @@ cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 16
     add          r0, r2
     dec r4d
     jne .loop
-    REP_RET
+    RET
 %endmacro
 
 %macro PUT_OP 2-3
@@ -357,7 +357,7 @@ cglobal %1_mpeg4_qpel8_h_lowpass, 5, 5, 0, 8
     add          r0, r2
     dec r4d
     jne .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -466,7 +466,7 @@ cglobal %1_mpeg4_qpel16_v_lowpass, 4, 6, 0, 544
     add    r0, r1
     dec r4d
     jne .loopv
-    REP_RET
+    RET
 %endmacro
 
 %macro PUT_OPH 2-3
@@ -543,7 +543,7 @@ cglobal %1_mpeg4_qpel8_v_lowpass, 4, 6, 0, 288
     add    r0, r1
     dec r4d
     jne .loopv
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm
index 0a3d99c53f..f29bfd715c 100644
--- a/libavcodec/x86/rv34dsp.asm
+++ b/libavcodec/x86/rv34dsp.asm
@@ -54,7 +54,7 @@ cglobal rv34_idct_dc_noround, 1, 2, 0
     movq    [r0+ 8], m0
     movq    [r0+16], m0
     movq    [r0+24], m0
-    REP_RET
+    RET
 
 ; Load coeffs and perform row transform
 ; Output: coeffs in mm[0467], rounder in mm5
diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm
index f2ce236d44..e02ad2c63f 100644
--- a/libavcodec/x86/rv40dsp.asm
+++ b/libavcodec/x86/rv40dsp.asm
@@ -170,7 +170,7 @@ cglobal %1_rv40_qpel_v, 6,6+npicregs,12, dst, dststride, src, srcstride, height,
     add     srcq, srcstrideq
     dec  heightd                           ; next row
     jg .nextrow
-    REP_RET
+    RET
 %endmacro
 
 %macro FILTER_H  1
@@ -227,7 +227,7 @@ cglobal %1_rv40_qpel_h, 6, 6+npicregs, 12, dst, dststride, src, srcstride, heigh
     add     srcq, srcstrideq
     dec  heightd            ; next row
     jg .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM  sse2
@@ -280,7 +280,7 @@ cglobal %1_rv40_qpel_v, 6,6+npicregs,8, dst, dststride, src, srcstride, height,
     add     srcq, srcstrideq
     dec       heightd                          ; next row
     jg       .nextrow
-    REP_RET
+    RET
 
 cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height, mx, picreg
 %ifdef PIC
@@ -313,7 +313,7 @@ cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height,
     add     srcq, srcstrideq
     dec  heightd            ; next row
     jg .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM ssse3
@@ -464,7 +464,7 @@ cglobal rv40_weight_func_%1_%2, 6, 7, 8
 .loop:
     MAIN_LOOP  %2, RND
     jnz        .loop
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index 87dcdc43ce..d02f70d704 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -208,7 +208,7 @@ cglobal sbr_sum64x5, 1,2,4,z
     add     zq, 32
     cmp     zq, r1q
     jne  .loop
-    REP_RET
+    RET
 
 INIT_XMM sse
 cglobal sbr_qmf_post_shuffle, 2,3,4,W,z
@@ -227,7 +227,7 @@ cglobal sbr_qmf_post_shuffle, 2,3,4,W,z
     add               zq, 16
     cmp               zq, r2q
     jl             .loop
-    REP_RET
+    RET
 
 INIT_XMM sse
 cglobal sbr_neg_odd_64, 1,2,4,z
@@ -248,7 +248,7 @@ cglobal sbr_neg_odd_64, 1,2,4,z
     add         zq, 64
     cmp         zq, r1q
     jne      .loop
-    REP_RET
+    RET
 
 ; void ff_sbr_qmf_deint_bfly_sse2(float *v, const float *src0, const float *src1)
 INIT_XMM sse2
@@ -276,7 +276,7 @@ cglobal sbr_qmf_deint_bfly, 3,5,8, v,src0,src1,vrev,c
     add            vrevq, 2*mmsize
     sub               cq, 2*mmsize
     jge            .loop
-    REP_RET
+    RET
 
 INIT_XMM sse2
 cglobal sbr_qmf_pre_shuffle, 1,4,6,z
@@ -306,7 +306,7 @@ cglobal sbr_qmf_pre_shuffle, 1,4,6,z
     jge      .loop
     movq       m2, [zq]
     movq    [r2q], m2
-    REP_RET
+    RET
 
 %ifdef PIC
 %define NREGS 1
@@ -432,7 +432,7 @@ cglobal sbr_qmf_deint_neg, 2,4,4,v,src,vrev,c
     sub        vq, mmsize
     add        cq, mmsize
     jl      .loop
-    REP_RET
+    RET
 
 %macro SBR_AUTOCORRELATE 0
 cglobal sbr_autocorrelate, 2,3,8,32, x, phi, cnt
diff --git a/libavcodec/x86/takdsp.asm b/libavcodec/x86/takdsp.asm
index 5f3ded3ea2..be8e1ab553 100644
--- a/libavcodec/x86/takdsp.asm
+++ b/libavcodec/x86/takdsp.asm
@@ -43,7 +43,7 @@ cglobal tak_decorrelate_ls, 3, 3, 2, p1, p2, length
     mova     [p2q+lengthq+mmsize*1], m1
     add                     lengthq, mmsize*2
     jl .loop
-    REP_RET
+    RET
 
 cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length
     shl                     lengthd, 2
@@ -60,7 +60,7 @@ cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length
     mova     [p1q+lengthq+mmsize*1], m1
     add                     lengthq, mmsize*2
     jl .loop
-    REP_RET
+    RET
 
 cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length
     shl                     lengthd, 2
@@ -87,7 +87,7 @@ cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length
     mova       [p2q+lengthq+mmsize], m4
     add                     lengthq, mmsize*2
     jl .loop
-    REP_RET
+    RET
 
 INIT_XMM sse4
 cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor
@@ -113,4 +113,4 @@ cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor
     mova      [p1q+lengthq], m1
     add             lengthq, mmsize
     jl .loop
-    REP_RET
+    RET
diff --git a/libavcodec/x86/utvideodsp.asm b/libavcodec/x86/utvideodsp.asm
index b799c44b64..9d54deeb32 100644
--- a/libavcodec/x86/utvideodsp.asm
+++ b/libavcodec/x86/utvideodsp.asm
@@ -69,7 +69,7 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
     add        src_bq, linesize_bq
     sub        hd, 1
     jg .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
@@ -125,7 +125,7 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
     add        src_bq, linesize_bq
     sub        hd, 1
     jg .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM sse2
diff --git a/libavcodec/x86/v210.asm b/libavcodec/x86/v210.asm
index f247737ed0..8ae592205f 100644
--- a/libavcodec/x86/v210.asm
+++ b/libavcodec/x86/v210.asm
@@ -116,7 +116,7 @@ cglobal v210_planar_unpack_%1, 5, 5, 6 + 2 * cpuflag(avx2), src, y, u, v, w
     add wq, (mmsize*3)/8
     jl  .loop
 
-    REP_RET
+    RET
 %endmacro
 
 INIT_XMM ssse3
diff --git a/libavcodec/x86/vc1dsp_mc.asm b/libavcodec/x86/vc1dsp_mc.asm
index 0e6d87dd8b..c1b3ed1bc3 100644
--- a/libavcodec/x86/vc1dsp_mc.asm
+++ b/libavcodec/x86/vc1dsp_mc.asm
@@ -139,7 +139,7 @@ cglobal vc1_put_ver_16b_shift2, 4,7,0, dst, src, stride
     add              dstq, 8
     dec                 i
         jnz         .loop
-    REP_RET
+    RET
 %undef rnd
 %undef shift
 %undef stride_neg2
diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm
index b19a8300c5..3cc07878d3 100644
--- a/libavcodec/x86/videodsp.asm
+++ b/libavcodec/x86/videodsp.asm
@@ -433,4 +433,4 @@ cglobal prefetch, 3, 3, 0, buf, stride, h
     add      bufq, strideq
     dec        hd
     jg .loop
-    REP_RET
+    RET
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index 33d488bf6f..6ac5a7721b 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -200,7 +200,7 @@ cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, h
     add     srcq, srcstrideq
     dec  heightd            ; next row
     jg .nextrow
-    REP_RET
+    RET
 
 cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
     shl      mxd, 4
@@ -230,7 +230,7 @@ cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, h
     add     srcq, srcstrideq
     dec  heightd            ; next row
     jg .nextrow
-    REP_RET
+    RET
 
 cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
     shl      myd, 4
@@ -268,7 +268,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr
     add      srcq, srcstrideq
     dec   heightd                          ; next row
     jg .nextrow
-    REP_RET
+    RET
 
 cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
     lea      myd, [myq*3]
@@ -314,7 +314,7 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picr
     add      srcq, srcstrideq
     dec   heightd                          ; next row
     jg .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX ssse3
@@ -368,7 +368,7 @@ cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he
     add      srcq, srcstrideq
     dec   heightd                          ; next row
     jg .nextrow
-    REP_RET
+    RET
 
 ; 4x4 block, H-only 6-tap filter
 INIT_MMX mmxext
@@ -426,7 +426,7 @@ cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he
     add      srcq, srcstrideq
     dec   heightd                          ; next row
     jg .nextrow
-    REP_RET
+    RET
 
 INIT_XMM sse2
 cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, height, mx, picreg
@@ -474,7 +474,7 @@ cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, h
     add     srcq, srcstrideq
     dec  heightd            ; next row
     jg .nextrow
-    REP_RET
+    RET
 
 INIT_XMM sse2
 cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, height, mx, picreg
@@ -537,7 +537,7 @@ cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, h
     add     srcq, srcstrideq
     dec  heightd            ; next row
     jg .nextrow
-    REP_RET
+    RET
 
 %macro FILTER_V 1
 ; 4x4 block, V-only 4-tap filter
@@ -590,7 +590,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr
     add     srcq, srcstrideq
     dec  heightd                           ; next row
     jg .nextrow
-    REP_RET
+    RET
 
 
 ; 4x4 block, V-only 6-tap filter
@@ -655,7 +655,7 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picr
     add     srcq, srcstrideq
     dec  heightd                           ; next row
     jg .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -738,7 +738,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
     lea     srcq, [srcq+srcstrideq*2]
     sub  heightd, 2
     jg .nextrow
-    REP_RET
+    RET
 
 %if cpuflag(ssse3)
 cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
@@ -815,7 +815,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride
     lea     srcq, [srcq+srcstrideq*2]
     sub  heightd, 2
     jg .nextrow
-    REP_RET
+    RET
 %endmacro
 
 INIT_MMX mmxext
@@ -838,7 +838,7 @@ cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height
     lea    dstq, [dstq+dststrideq*2]
     sub heightd, 2
     jg .nextrow
-    REP_RET
+    RET
 
 INIT_XMM sse
 cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height
@@ -851,7 +851,7 @@ cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height
     lea    dstq, [dstq+dststrideq*2]
     sub heightd, 2
     jg .nextrow
-    REP_RET
+    RET
 
 ;-----------------------------------------------------------------------------
 ; void ff_vp8_idct_dc_add_<opt>(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
author	Lynne <dev@lynne.ee>	2023-02-01 02:26:20 +0100
committer	Lynne <dev@lynne.ee>	2023-02-01 04:23:55 +0100
commit	bbe95f7353a972f28a48be8da883549f02c59e4b (patch)
tree	08841c9da55e7f076f6046d1dbd70f49d74c0ec0 /libavcodec/x86
parent	fc9a3b584da3cf3fc1f00036be2eaf5dff903ccf (diff)
download	ffmpeg-bbe95f7353a972f28a48be8da883549f02c59e4b.tar.gz