summaryrefslogtreecommitdiffstats
path: root/contrib/libs/libjpeg-turbo/simd
diff options
context:
space:
mode:
authorrobot-piglet <[email protected]>2025-06-15 15:44:41 +0300
committerrobot-piglet <[email protected]>2025-06-15 15:55:30 +0300
commitea626d7b15346c0da649291483f80f1ae6e1d7e7 (patch)
tree24ae3c2aa7f259f3ba95af8450b5bce9a4bdb10d /contrib/libs/libjpeg-turbo/simd
parent726087f32fb38c191ff0c3ef8c6646aa940d987e (diff)
Intermediate changes
commit_hash:79edafb911368bba0a4d2f7f151a6c8a37c349f3
Diffstat (limited to 'contrib/libs/libjpeg-turbo/simd')
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/aarch32/jchuff-neon.c11
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/aarch32/jsimd.c24
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/aarch64/jchuff-neon.c12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/aarch64/jsimd.c30
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/jccolor-neon.c12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/jcgray-neon.c12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/jcphuff-neon.c197
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/jcsample-neon.c12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/jdcolor-neon.c13
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/jdmerge-neon.c13
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/jdsample-neon.c13
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/jfdctfst-neon.c12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/jfdctint-neon.c12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/jidctfst-neon.c12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/jidctint-neon.c13
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/jidctred-neon.c12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/jquanti-neon.c12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/arm/neon-compat.h10
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jccolext-avx2.asm28
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jccolext-mmx.asm28
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jccolext-sse2.asm28
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jccolor-avx2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jccolor-mmx.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jccolor-sse2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jcgray-avx2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jcgray-mmx.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jcgray-sse2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jcgryext-avx2.asm28
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jcgryext-mmx.asm28
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jcgryext-sse2.asm28
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jchuff-sse2.asm23
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jcphuff-sse2.asm6
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jcsample-avx2.asm24
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jcsample-mmx.asm20
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jcsample-sse2.asm24
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jdcolext-avx2.asm26
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jdcolext-mmx.asm26
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jdcolext-sse2.asm26
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jdcolor-avx2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jdcolor-mmx.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jdcolor-sse2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jdmerge-avx2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jdmerge-mmx.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jdmerge-sse2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-avx2.asm26
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-mmx.asm26
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-sse2.asm26
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jdsample-avx2.asm64
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jdsample-mmx.asm64
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jdsample-sse2.asm64
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-3dn.asm22
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-sse.asm22
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-mmx.asm22
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-sse2.asm18
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jfdctint-avx2.asm30
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jfdctint-mmx.asm22
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jfdctint-sse2.asm18
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jidctflt-3dn.asm28
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse.asm44
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse2.asm44
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jidctfst-mmx.asm24
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jidctfst-sse2.asm20
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jidctint-avx2.asm32
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jidctint-mmx.asm24
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jidctint-sse2.asm20
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jidctred-mmx.asm24
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jidctred-sse2.asm22
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jquant-3dn.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jquant-mmx.asm18
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jquant-sse.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jquantf-sse2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jquanti-avx2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jquanti-sse2.asm16
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jsimd.c94
-rw-r--r--contrib/libs/libjpeg-turbo/simd/i386/jsimdcpu.asm6
-rw-r--r--contrib/libs/libjpeg-turbo/simd/jsimd.h12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/nasm/jsimdcfg.inc.h4
-rw-r--r--contrib/libs/libjpeg-turbo/simd/nasm/jsimdext.inc66
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-avx2.asm30
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-sse2.asm30
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-avx2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-sse2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-avx2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-sse2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-avx2.asm30
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-sse2.asm30
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jchuff-sse2.asm78
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jcphuff-sse2.asm48
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-avx2.asm20
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-sse2.asm20
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-avx2.asm30
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-sse2.asm30
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-avx2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-sse2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-avx2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-sse2.asm12
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-avx2.asm36
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-sse2.asm36
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-avx2.asm62
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-sse2.asm52
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jfdctflt-sse.asm34
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jfdctfst-sse2.asm34
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-avx2.asm30
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-sse2.asm34
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jidctflt-sse2.asm54
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jidctfst-sse2.asm36
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-avx2.asm34
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-sse2.asm36
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jidctred-sse2.asm42
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jquantf-sse2.asm20
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-avx2.asm24
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-sse2.asm24
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jsimd.c90
-rw-r--r--contrib/libs/libjpeg-turbo/simd/x86_64/jsimdcpu.asm10
114 files changed, 1417 insertions, 1602 deletions
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/aarch32/jchuff-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/aarch32/jchuff-neon.c
index 19d94f720da..153da1f1c11 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/aarch32/jchuff-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/aarch32/jchuff-neon.c
@@ -2,6 +2,7 @@
* jchuff-neon.c - Huffman entropy encoding (32-bit Arm Neon)
*
* Copyright (C) 2020, Arm Limited. All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -24,11 +25,11 @@
*/
#define JPEG_INTERNALS
-#include "../../../jinclude.h"
-#include "../../../jpeglib.h"
-#include "../../../jsimd.h"
-#include "../../../jdct.h"
-#include "../../../jsimddct.h"
+#include "../../../src/jinclude.h"
+#include "../../../src/jpeglib.h"
+#include "../../../src/jsimd.h"
+#include "../../../src/jdct.h"
+#include "../../../src/jsimddct.h"
#include "../../jsimd.h"
#include "../jchuff.h"
#include "neon-compat.h"
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/aarch32/jsimd.c b/contrib/libs/libjpeg-turbo/simd/arm/aarch32/jsimd.c
index 920f7656ebf..7c8ea306bd1 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/aarch32/jsimd.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/aarch32/jsimd.c
@@ -3,8 +3,8 @@
*
* Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
* Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, 2024, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2019, Google LLC.
* Copyright (C) 2020, Arm Limited.
*
@@ -18,17 +18,17 @@
*/
#define JPEG_INTERNALS
-#include "../../../jinclude.h"
-#include "../../../jpeglib.h"
-#include "../../../jsimd.h"
-#include "../../../jdct.h"
-#include "../../../jsimddct.h"
+#include "../../../src/jinclude.h"
+#include "../../../src/jpeglib.h"
+#include "../../../src/jsimd.h"
+#include "../../../src/jdct.h"
+#include "../../../src/jsimddct.h"
#include "../../jsimd.h"
#include <ctype.h>
-static unsigned int simd_support = ~0;
-static unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_support = ~0;
+static THREAD_LOCAL unsigned int simd_huffman = 1;
#if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
@@ -96,8 +96,6 @@ parse_proc_cpuinfo(int bufsize)
/*
* Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
*/
LOCAL(void)
init_simd(void)
@@ -945,7 +943,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
- int Al, JCOEF *values, size_t *zerobits)
+ int Al, UJCOEF *values, size_t *zerobits)
{
jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
Sl, Al, values, zerobits);
@@ -970,7 +968,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
- int Al, JCOEF *absvalues, size_t *bits)
+ int Al, UJCOEF *absvalues, size_t *bits)
{
return jsimd_encode_mcu_AC_refine_prepare_neon(block,
jpeg_natural_order_start, Sl,
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/aarch64/jchuff-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/aarch64/jchuff-neon.c
index 607a116070c..11bf6dab130 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/aarch64/jchuff-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/aarch64/jchuff-neon.c
@@ -2,7 +2,7 @@
* jchuff-neon.c - Huffman entropy encoding (64-bit Arm Neon)
*
* Copyright (C) 2020-2021, Arm Limited. All Rights Reserved.
- * Copyright (C) 2020, 2022, D. R. Commander. All Rights Reserved.
+ * Copyright (C) 2020, 2022, 2024, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -25,11 +25,11 @@
*/
#define JPEG_INTERNALS
-#include "../../../jinclude.h"
-#include "../../../jpeglib.h"
-#include "../../../jsimd.h"
-#include "../../../jdct.h"
-#include "../../../jsimddct.h"
+#include "../../../src/jinclude.h"
+#include "../../../src/jpeglib.h"
+#include "../../../src/jsimd.h"
+#include "../../../src/jdct.h"
+#include "../../../src/jsimddct.h"
#include "../../jsimd.h"
#include "../align.h"
#include "../jchuff.h"
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/aarch64/jsimd.c b/contrib/libs/libjpeg-turbo/simd/arm/aarch64/jsimd.c
index 41c06d31801..8a6f30a1a89 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/aarch64/jsimd.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/aarch64/jsimd.c
@@ -3,8 +3,9 @@
*
* Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
* Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, 2024,
+ * D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2020, Arm Limited.
*
* Based on the x86 SIMD extension for IJG JPEG library,
@@ -17,13 +18,12 @@
*/
#define JPEG_INTERNALS
-#include "../../../jinclude.h"
-#include "../../../jpeglib.h"
-#include "../../../jsimd.h"
-#include "../../../jdct.h"
-#include "../../../jsimddct.h"
+#include "../../../src/jinclude.h"
+#include "../../../src/jpeglib.h"
+#include "../../../src/jsimd.h"
+#include "../../../src/jdct.h"
+#include "../../../src/jsimddct.h"
#include "../../jsimd.h"
-#include "jconfigint.h"
#include <ctype.h>
@@ -31,10 +31,10 @@
#define JSIMD_FASTST3 2
#define JSIMD_FASTTBL 4
-static unsigned int simd_support = ~0;
-static unsigned int simd_huffman = 1;
-static unsigned int simd_features = JSIMD_FASTLD3 | JSIMD_FASTST3 |
- JSIMD_FASTTBL;
+static THREAD_LOCAL unsigned int simd_support = ~0;
+static THREAD_LOCAL unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_features = JSIMD_FASTLD3 |
+ JSIMD_FASTST3 | JSIMD_FASTTBL;
#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
@@ -109,8 +109,6 @@ parse_proc_cpuinfo(int bufsize)
/*
* Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
*/
/*
@@ -1021,7 +1019,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
- int Al, JCOEF *values, size_t *zerobits)
+ int Al, UJCOEF *values, size_t *zerobits)
{
jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
Sl, Al, values, zerobits);
@@ -1048,7 +1046,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
- int Al, JCOEF *absvalues, size_t *bits)
+ int Al, UJCOEF *absvalues, size_t *bits)
{
return jsimd_encode_mcu_AC_refine_prepare_neon(block,
jpeg_natural_order_start,
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jccolor-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jccolor-neon.c
index 9fcc62dd25c..d14a7bf5018 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jccolor-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jccolor-neon.c
@@ -2,7 +2,7 @@
* jccolor-neon.c - colorspace conversion (Arm Neon)
*
* Copyright (C) 2020, Arm Limited. All Rights Reserved.
- * Copyright (C) 2020, D. R. Commander. All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -22,11 +22,11 @@
*/
#define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
#include "../jsimd.h"
#include "align.h"
#include "neon-compat.h"
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jcgray-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jcgray-neon.c
index 71c7b2de218..fbcf8214057 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jcgray-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jcgray-neon.c
@@ -2,6 +2,7 @@
* jcgray-neon.c - grayscale colorspace conversion (Arm Neon)
*
* Copyright (C) 2020, Arm Limited. All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -21,13 +22,14 @@
*/
#define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
#include "../jsimd.h"
#include "align.h"
+#include "neon-compat.h"
#include <arm_neon.h>
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jcphuff-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jcphuff-neon.c
index b91c5db478a..435f96ee968 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jcphuff-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jcphuff-neon.c
@@ -2,6 +2,8 @@
* jcphuff-neon.c - prepare data for progressive Huffman encoding (Arm Neon)
*
* Copyright (C) 2020-2021, Arm Limited. All Rights Reserved.
+ * Copyright (C) 2022, Matthieu Darbois. All Rights Reserved.
+ * Copyright (C) 2022, 2024, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -21,12 +23,11 @@
*/
#define JPEG_INTERNALS
-#include "jconfigint.h"
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
#include "../jsimd.h"
#include "neon-compat.h"
@@ -41,10 +42,10 @@
void jsimd_encode_mcu_AC_first_prepare_neon
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
- JCOEF *values, size_t *zerobits)
+ UJCOEF *values, size_t *zerobits)
{
- JCOEF *values_ptr = values;
- JCOEF *diff_values_ptr = values + DCTSIZE2;
+ UJCOEF *values_ptr = values;
+ UJCOEF *diff_values_ptr = values + DCTSIZE2;
/* Rows of coefficients to zero (since they haven't been processed) */
int i, rows_to_zero = 8;
@@ -68,23 +69,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon
coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[15], coefs2, 7);
/* Isolate sign of coefficients. */
- int16x8_t sign_coefs1 = vshrq_n_s16(coefs1, 15);
- int16x8_t sign_coefs2 = vshrq_n_s16(coefs2, 15);
+ uint16x8_t sign_coefs1 = vreinterpretq_u16_s16(vshrq_n_s16(coefs1, 15));
+ uint16x8_t sign_coefs2 = vreinterpretq_u16_s16(vshrq_n_s16(coefs2, 15));
/* Compute absolute value of coefficients and apply point transform Al. */
- int16x8_t abs_coefs1 = vabsq_s16(coefs1);
- int16x8_t abs_coefs2 = vabsq_s16(coefs2);
- coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
- coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
+ uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
+ uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
+ abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
+ abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
/* Compute diff values. */
- int16x8_t diff1 = veorq_s16(coefs1, sign_coefs1);
- int16x8_t diff2 = veorq_s16(coefs2, sign_coefs2);
+ uint16x8_t diff1 = veorq_u16(abs_coefs1, sign_coefs1);
+ uint16x8_t diff2 = veorq_u16(abs_coefs2, sign_coefs2);
/* Store transformed coefficients and diff values. */
- vst1q_s16(values_ptr, coefs1);
- vst1q_s16(values_ptr + DCTSIZE, coefs2);
- vst1q_s16(diff_values_ptr, diff1);
- vst1q_s16(diff_values_ptr + DCTSIZE, diff2);
+ vst1q_u16(values_ptr, abs_coefs1);
+ vst1q_u16(values_ptr + DCTSIZE, abs_coefs2);
+ vst1q_u16(diff_values_ptr, diff1);
+ vst1q_u16(diff_values_ptr + DCTSIZE, diff2);
values_ptr += 16;
diff_values_ptr += 16;
jpeg_natural_order_start += 16;
@@ -130,23 +131,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon
}
/* Isolate sign of coefficients. */
- int16x8_t sign_coefs1 = vshrq_n_s16(coefs1, 15);
- int16x8_t sign_coefs2 = vshrq_n_s16(coefs2, 15);
+ uint16x8_t sign_coefs1 = vreinterpretq_u16_s16(vshrq_n_s16(coefs1, 15));
+ uint16x8_t sign_coefs2 = vreinterpretq_u16_s16(vshrq_n_s16(coefs2, 15));
/* Compute absolute value of coefficients and apply point transform Al. */
- int16x8_t abs_coefs1 = vabsq_s16(coefs1);
- int16x8_t abs_coefs2 = vabsq_s16(coefs2);
- coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
- coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
+ uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
+ uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
+ abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
+ abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
/* Compute diff values. */
- int16x8_t diff1 = veorq_s16(coefs1, sign_coefs1);
- int16x8_t diff2 = veorq_s16(coefs2, sign_coefs2);
+ uint16x8_t diff1 = veorq_u16(abs_coefs1, sign_coefs1);
+ uint16x8_t diff2 = veorq_u16(abs_coefs2, sign_coefs2);
/* Store transformed coefficients and diff values. */
- vst1q_s16(values_ptr, coefs1);
- vst1q_s16(values_ptr + DCTSIZE, coefs2);
- vst1q_s16(diff_values_ptr, diff1);
- vst1q_s16(diff_values_ptr + DCTSIZE, diff2);
+ vst1q_u16(values_ptr, abs_coefs1);
+ vst1q_u16(values_ptr + DCTSIZE, abs_coefs2);
+ vst1q_u16(diff_values_ptr, diff1);
+ vst1q_u16(diff_values_ptr + DCTSIZE, diff2);
values_ptr += 16;
diff_values_ptr += 16;
rows_to_zero -= 2;
@@ -184,17 +185,17 @@ void jsimd_encode_mcu_AC_first_prepare_neon
}
/* Isolate sign of coefficients. */
- int16x8_t sign_coefs = vshrq_n_s16(coefs, 15);
+ uint16x8_t sign_coefs = vreinterpretq_u16_s16(vshrq_n_s16(coefs, 15));
/* Compute absolute value of coefficients and apply point transform Al. */
- int16x8_t abs_coefs = vabsq_s16(coefs);
- coefs = vshlq_s16(abs_coefs, vdupq_n_s16(-Al));
+ uint16x8_t abs_coefs = vreinterpretq_u16_s16(vabsq_s16(coefs));
+ abs_coefs = vshlq_u16(abs_coefs, vdupq_n_s16(-Al));
/* Compute diff values. */
- int16x8_t diff = veorq_s16(coefs, sign_coefs);
+ uint16x8_t diff = veorq_u16(abs_coefs, sign_coefs);
/* Store transformed coefficients and diff values. */
- vst1q_s16(values_ptr, coefs);
- vst1q_s16(diff_values_ptr, diff);
+ vst1q_u16(values_ptr, abs_coefs);
+ vst1q_u16(diff_values_ptr, diff);
values_ptr += 8;
diff_values_ptr += 8;
rows_to_zero--;
@@ -202,8 +203,8 @@ void jsimd_encode_mcu_AC_first_prepare_neon
/* Zero remaining memory in the values and diff_values blocks. */
for (i = 0; i < rows_to_zero; i++) {
- vst1q_s16(values_ptr, vdupq_n_s16(0));
- vst1q_s16(diff_values_ptr, vdupq_n_s16(0));
+ vst1q_u16(values_ptr, vdupq_n_u16(0));
+ vst1q_u16(diff_values_ptr, vdupq_n_u16(0));
values_ptr += 8;
diff_values_ptr += 8;
}
@@ -211,23 +212,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon
/* Construct zerobits bitmap. A set bit means that the corresponding
* coefficient != 0.
*/
- int16x8_t row0 = vld1q_s16(values + 0 * DCTSIZE);
- int16x8_t row1 = vld1q_s16(values + 1 * DCTSIZE);
- int16x8_t row2 = vld1q_s16(values + 2 * DCTSIZE);
- int16x8_t row3 = vld1q_s16(values + 3 * DCTSIZE);
- int16x8_t row4 = vld1q_s16(values + 4 * DCTSIZE);
- int16x8_t row5 = vld1q_s16(values + 5 * DCTSIZE);
- int16x8_t row6 = vld1q_s16(values + 6 * DCTSIZE);
- int16x8_t row7 = vld1q_s16(values + 7 * DCTSIZE);
-
- uint8x8_t row0_eq0 = vmovn_u16(vceqq_s16(row0, vdupq_n_s16(0)));
- uint8x8_t row1_eq0 = vmovn_u16(vceqq_s16(row1, vdupq_n_s16(0)));
- uint8x8_t row2_eq0 = vmovn_u16(vceqq_s16(row2, vdupq_n_s16(0)));
- uint8x8_t row3_eq0 = vmovn_u16(vceqq_s16(row3, vdupq_n_s16(0)));
- uint8x8_t row4_eq0 = vmovn_u16(vceqq_s16(row4, vdupq_n_s16(0)));
- uint8x8_t row5_eq0 = vmovn_u16(vceqq_s16(row5, vdupq_n_s16(0)));
- uint8x8_t row6_eq0 = vmovn_u16(vceqq_s16(row6, vdupq_n_s16(0)));
- uint8x8_t row7_eq0 = vmovn_u16(vceqq_s16(row7, vdupq_n_s16(0)));
+ uint16x8_t row0 = vld1q_u16(values + 0 * DCTSIZE);
+ uint16x8_t row1 = vld1q_u16(values + 1 * DCTSIZE);
+ uint16x8_t row2 = vld1q_u16(values + 2 * DCTSIZE);
+ uint16x8_t row3 = vld1q_u16(values + 3 * DCTSIZE);
+ uint16x8_t row4 = vld1q_u16(values + 4 * DCTSIZE);
+ uint16x8_t row5 = vld1q_u16(values + 5 * DCTSIZE);
+ uint16x8_t row6 = vld1q_u16(values + 6 * DCTSIZE);
+ uint16x8_t row7 = vld1q_u16(values + 7 * DCTSIZE);
+
+ uint8x8_t row0_eq0 = vmovn_u16(vceqq_u16(row0, vdupq_n_u16(0)));
+ uint8x8_t row1_eq0 = vmovn_u16(vceqq_u16(row1, vdupq_n_u16(0)));
+ uint8x8_t row2_eq0 = vmovn_u16(vceqq_u16(row2, vdupq_n_u16(0)));
+ uint8x8_t row3_eq0 = vmovn_u16(vceqq_u16(row3, vdupq_n_u16(0)));
+ uint8x8_t row4_eq0 = vmovn_u16(vceqq_u16(row4, vdupq_n_u16(0)));
+ uint8x8_t row5_eq0 = vmovn_u16(vceqq_u16(row5, vdupq_n_u16(0)));
+ uint8x8_t row6_eq0 = vmovn_u16(vceqq_u16(row6, vdupq_n_u16(0)));
+ uint8x8_t row7_eq0 = vmovn_u16(vceqq_u16(row7, vdupq_n_u16(0)));
/* { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } */
const uint8x8_t bitmap_mask =
@@ -274,7 +275,7 @@ void jsimd_encode_mcu_AC_first_prepare_neon
int jsimd_encode_mcu_AC_refine_prepare_neon
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
- JCOEF *absvalues, size_t *bits)
+ UJCOEF *absvalues, size_t *bits)
{
/* Temporary storage buffers for data used to compute the signbits bitmap and
* the end-of-block (EOB) position
@@ -282,7 +283,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
uint8_t coef_sign_bits[64];
uint8_t coef_eq1_bits[64];
- JCOEF *absvalues_ptr = absvalues;
+ UJCOEF *absvalues_ptr = absvalues;
uint8_t *coef_sign_bits_ptr = coef_sign_bits;
uint8_t *eq1_bits_ptr = coef_eq1_bits;
@@ -316,18 +317,18 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
vst1_u8(coef_sign_bits_ptr + DCTSIZE, sign_coefs2);
/* Compute absolute value of coefficients and apply point transform Al. */
- int16x8_t abs_coefs1 = vabsq_s16(coefs1);
- int16x8_t abs_coefs2 = vabsq_s16(coefs2);
- coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
- coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
- vst1q_s16(absvalues_ptr, coefs1);
- vst1q_s16(absvalues_ptr + DCTSIZE, coefs2);
+ uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
+ uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
+ abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
+ abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
+ vst1q_u16(absvalues_ptr, abs_coefs1);
+ vst1q_u16(absvalues_ptr + DCTSIZE, abs_coefs2);
/* Test whether transformed coefficient values == 1 (used to find EOB
* position.)
*/
- uint8x8_t coefs_eq11 = vmovn_u16(vceqq_s16(coefs1, vdupq_n_s16(1)));
- uint8x8_t coefs_eq12 = vmovn_u16(vceqq_s16(coefs2, vdupq_n_s16(1)));
+ uint8x8_t coefs_eq11 = vmovn_u16(vceqq_u16(abs_coefs1, vdupq_n_u16(1)));
+ uint8x8_t coefs_eq12 = vmovn_u16(vceqq_u16(abs_coefs2, vdupq_n_u16(1)));
vst1_u8(eq1_bits_ptr, coefs_eq11);
vst1_u8(eq1_bits_ptr + DCTSIZE, coefs_eq12);
@@ -385,18 +386,18 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
vst1_u8(coef_sign_bits_ptr + DCTSIZE, sign_coefs2);
/* Compute absolute value of coefficients and apply point transform Al. */
- int16x8_t abs_coefs1 = vabsq_s16(coefs1);
- int16x8_t abs_coefs2 = vabsq_s16(coefs2);
- coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
- coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
- vst1q_s16(absvalues_ptr, coefs1);
- vst1q_s16(absvalues_ptr + DCTSIZE, coefs2);
+ uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
+ uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
+ abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
+ abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
+ vst1q_u16(absvalues_ptr, abs_coefs1);
+ vst1q_u16(absvalues_ptr + DCTSIZE, abs_coefs2);
/* Test whether transformed coefficient values == 1 (used to find EOB
* position.)
*/
- uint8x8_t coefs_eq11 = vmovn_u16(vceqq_s16(coefs1, vdupq_n_s16(1)));
- uint8x8_t coefs_eq12 = vmovn_u16(vceqq_s16(coefs2, vdupq_n_s16(1)));
+ uint8x8_t coefs_eq11 = vmovn_u16(vceqq_u16(abs_coefs1, vdupq_n_u16(1)));
+ uint8x8_t coefs_eq12 = vmovn_u16(vceqq_u16(abs_coefs2, vdupq_n_u16(1)));
vst1_u8(eq1_bits_ptr, coefs_eq11);
vst1_u8(eq1_bits_ptr + DCTSIZE, coefs_eq12);
@@ -444,14 +445,14 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
vst1_u8(coef_sign_bits_ptr, sign_coefs);
/* Compute absolute value of coefficients and apply point transform Al. */
- int16x8_t abs_coefs = vabsq_s16(coefs);
- coefs = vshlq_s16(abs_coefs, vdupq_n_s16(-Al));
- vst1q_s16(absvalues_ptr, coefs);
+ uint16x8_t abs_coefs = vreinterpretq_u16_s16(vabsq_s16(coefs));
+ abs_coefs = vshlq_u16(abs_coefs, vdupq_n_s16(-Al));
+ vst1q_u16(absvalues_ptr, abs_coefs);
/* Test whether transformed coefficient values == 1 (used to find EOB
* position.)
*/
- uint8x8_t coefs_eq1 = vmovn_u16(vceqq_s16(coefs, vdupq_n_s16(1)));
+ uint8x8_t coefs_eq1 = vmovn_u16(vceqq_u16(abs_coefs, vdupq_n_u16(1)));
vst1_u8(eq1_bits_ptr, coefs_eq1);
absvalues_ptr += 8;
@@ -462,7 +463,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
/* Zero remaining memory in blocks. */
for (i = 0; i < rows_to_zero; i++) {
- vst1q_s16(absvalues_ptr, vdupq_n_s16(0));
+ vst1q_u16(absvalues_ptr, vdupq_n_u16(0));
vst1_u8(coef_sign_bits_ptr, vdup_n_u8(0));
vst1_u8(eq1_bits_ptr, vdup_n_u8(0));
absvalues_ptr += 8;
@@ -471,23 +472,23 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
}
/* Construct zerobits bitmap. */
- int16x8_t abs_row0 = vld1q_s16(absvalues + 0 * DCTSIZE);
- int16x8_t abs_row1 = vld1q_s16(absvalues + 1 * DCTSIZE);
- int16x8_t abs_row2 = vld1q_s16(absvalues + 2 * DCTSIZE);
- int16x8_t abs_row3 = vld1q_s16(absvalues + 3 * DCTSIZE);
- int16x8_t abs_row4 = vld1q_s16(absvalues + 4 * DCTSIZE);
- int16x8_t abs_row5 = vld1q_s16(absvalues + 5 * DCTSIZE);
- int16x8_t abs_row6 = vld1q_s16(absvalues + 6 * DCTSIZE);
- int16x8_t abs_row7 = vld1q_s16(absvalues + 7 * DCTSIZE);
-
- uint8x8_t abs_row0_eq0 = vmovn_u16(vceqq_s16(abs_row0, vdupq_n_s16(0)));
- uint8x8_t abs_row1_eq0 = vmovn_u16(vceqq_s16(abs_row1, vdupq_n_s16(0)));
- uint8x8_t abs_row2_eq0 = vmovn_u16(vceqq_s16(abs_row2, vdupq_n_s16(0)));
- uint8x8_t abs_row3_eq0 = vmovn_u16(vceqq_s16(abs_row3, vdupq_n_s16(0)));
- uint8x8_t abs_row4_eq0 = vmovn_u16(vceqq_s16(abs_row4, vdupq_n_s16(0)));
- uint8x8_t abs_row5_eq0 = vmovn_u16(vceqq_s16(abs_row5, vdupq_n_s16(0)));
- uint8x8_t abs_row6_eq0 = vmovn_u16(vceqq_s16(abs_row6, vdupq_n_s16(0)));
- uint8x8_t abs_row7_eq0 = vmovn_u16(vceqq_s16(abs_row7, vdupq_n_s16(0)));
+ uint16x8_t abs_row0 = vld1q_u16(absvalues + 0 * DCTSIZE);
+ uint16x8_t abs_row1 = vld1q_u16(absvalues + 1 * DCTSIZE);
+ uint16x8_t abs_row2 = vld1q_u16(absvalues + 2 * DCTSIZE);
+ uint16x8_t abs_row3 = vld1q_u16(absvalues + 3 * DCTSIZE);
+ uint16x8_t abs_row4 = vld1q_u16(absvalues + 4 * DCTSIZE);
+ uint16x8_t abs_row5 = vld1q_u16(absvalues + 5 * DCTSIZE);
+ uint16x8_t abs_row6 = vld1q_u16(absvalues + 6 * DCTSIZE);
+ uint16x8_t abs_row7 = vld1q_u16(absvalues + 7 * DCTSIZE);
+
+ uint8x8_t abs_row0_eq0 = vmovn_u16(vceqq_u16(abs_row0, vdupq_n_u16(0)));
+ uint8x8_t abs_row1_eq0 = vmovn_u16(vceqq_u16(abs_row1, vdupq_n_u16(0)));
+ uint8x8_t abs_row2_eq0 = vmovn_u16(vceqq_u16(abs_row2, vdupq_n_u16(0)));
+ uint8x8_t abs_row3_eq0 = vmovn_u16(vceqq_u16(abs_row3, vdupq_n_u16(0)));
+ uint8x8_t abs_row4_eq0 = vmovn_u16(vceqq_u16(abs_row4, vdupq_n_u16(0)));
+ uint8x8_t abs_row5_eq0 = vmovn_u16(vceqq_u16(abs_row5, vdupq_n_u16(0)));
+ uint8x8_t abs_row6_eq0 = vmovn_u16(vceqq_u16(abs_row6, vdupq_n_u16(0)));
+ uint8x8_t abs_row7_eq0 = vmovn_u16(vceqq_u16(abs_row7, vdupq_n_u16(0)));
/* { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } */
const uint8x8_t bitmap_mask =
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jcsample-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jcsample-neon.c
index 8a3e237838e..fd8a93e520b 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jcsample-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jcsample-neon.c
@@ -2,6 +2,7 @@
* jcsample-neon.c - downsampling (Arm Neon)
*
* Copyright (C) 2020, Arm Limited. All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -21,13 +22,14 @@
*/
#define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
#include "../jsimd.h"
#include "align.h"
+#include "neon-compat.h"
#include <arm_neon.h>
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jdcolor-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jdcolor-neon.c
index ea4668f1d30..97bb02a1ed7 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jdcolor-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jdcolor-neon.c
@@ -2,6 +2,7 @@
* jdcolor-neon.c - colorspace conversion (Arm Neon)
*
* Copyright (C) 2020, Arm Limited. All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -21,14 +22,14 @@
*/
#define JPEG_INTERNALS
-#include "jconfigint.h"
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
#include "../jsimd.h"
#include "align.h"
+#include "neon-compat.h"
#include <arm_neon.h>
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jdmerge-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jdmerge-neon.c
index e4f91fdc0ef..95e6d32830c 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jdmerge-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jdmerge-neon.c
@@ -2,6 +2,7 @@
* jdmerge-neon.c - merged upsampling/color conversion (Arm Neon)
*
* Copyright (C) 2020, Arm Limited. All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -21,14 +22,14 @@
*/
#define JPEG_INTERNALS
-#include "jconfigint.h"
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
#include "../jsimd.h"
#include "align.h"
+#include "neon-compat.h"
#include <arm_neon.h>
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jdsample-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jdsample-neon.c
index 90ec6782c47..a130b1a9581 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jdsample-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jdsample-neon.c
@@ -2,7 +2,7 @@
* jdsample-neon.c - upsampling (Arm Neon)
*
* Copyright (C) 2020, Arm Limited. All Rights Reserved.
- * Copyright (C) 2020, D. R. Commander. All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -22,12 +22,13 @@
*/
#define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
#include "../jsimd.h"
+#include "neon-compat.h"
#include <arm_neon.h>
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jfdctfst-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jfdctfst-neon.c
index bb371be3999..d6109f11d34 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jfdctfst-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jfdctfst-neon.c
@@ -2,6 +2,7 @@
* jfdctfst-neon.c - fast integer FDCT (Arm Neon)
*
* Copyright (C) 2020, Arm Limited. All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -21,13 +22,14 @@
*/
#define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
#include "../jsimd.h"
#include "align.h"
+#include "neon-compat.h"
#include <arm_neon.h>
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jfdctint-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jfdctint-neon.c
index ccfc07b15d9..bb290ea45d2 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jfdctint-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jfdctint-neon.c
@@ -2,7 +2,7 @@
* jfdctint-neon.c - accurate integer FDCT (Arm Neon)
*
* Copyright (C) 2020, Arm Limited. All Rights Reserved.
- * Copyright (C) 2020, D. R. Commander. All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -22,11 +22,11 @@
*/
#define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
#include "../jsimd.h"
#include "align.h"
#include "neon-compat.h"
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jidctfst-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jidctfst-neon.c
index a91be5362eb..e789125344a 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jidctfst-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jidctfst-neon.c
@@ -2,6 +2,7 @@
* jidctfst-neon.c - fast integer IDCT (Arm Neon)
*
* Copyright (C) 2020, Arm Limited. All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -21,13 +22,14 @@
*/
#define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
#include "../jsimd.h"
#include "align.h"
+#include "neon-compat.h"
#include <arm_neon.h>
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jidctint-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jidctint-neon.c
index 043b652e6c5..709e0eaf4e9 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jidctint-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jidctint-neon.c
@@ -2,7 +2,7 @@
* jidctint-neon.c - accurate integer IDCT (Arm Neon)
*
* Copyright (C) 2020, Arm Limited. All Rights Reserved.
- * Copyright (C) 2020, D. R. Commander. All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -22,12 +22,11 @@
*/
#define JPEG_INTERNALS
-#include "jconfigint.h"
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
#include "../jsimd.h"
#include "align.h"
#include "neon-compat.h"
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jidctred-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jidctred-neon.c
index be9627e61d4..25b1addc6a9 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jidctred-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jidctred-neon.c
@@ -2,7 +2,7 @@
* jidctred-neon.c - reduced-size IDCT (Arm Neon)
*
* Copyright (C) 2020, Arm Limited. All Rights Reserved.
- * Copyright (C) 2020, D. R. Commander. All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -22,11 +22,11 @@
*/
#define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
#include "../jsimd.h"
#include "align.h"
#include "neon-compat.h"
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jquanti-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jquanti-neon.c
index d5d95d89f67..e44fb3d4131 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jquanti-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jquanti-neon.c
@@ -2,6 +2,7 @@
* jquanti-neon.c - sample data conversion and quantization (Arm Neon)
*
* Copyright (C) 2020-2021, Arm Limited. All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -21,12 +22,13 @@
*/
#define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
#include "../jsimd.h"
+#include "neon-compat.h"
#include <arm_neon.h>
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/neon-compat.h b/contrib/libs/libjpeg-turbo/simd/arm/neon-compat.h
index 069c62d8290..992aa5a4f97 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/neon-compat.h
+++ b/contrib/libs/libjpeg-turbo/simd/arm/neon-compat.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2020, D. R. Commander. All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander. All Rights Reserved.
* Copyright (C) 2020-2021, Arm Limited. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
@@ -35,3 +35,11 @@
#else
#error "Unknown compiler"
#endif
+
+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Wdeclaration-after-statement"
+#pragma clang diagnostic ignored "-Wc99-extensions"
+#elif defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wdeclaration-after-statement"
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jccolext-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jccolext-avx2.asm
index c46d684436d..28ac9528079 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jccolext-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jccolext-avx2.asm
@@ -2,17 +2,13 @@
; jccolext.asm - colorspace conversion (AVX2)
;
; Copyright (C) 2015, Intel Corporation.
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -49,15 +45,15 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic eax ; make a room for GOT address
+ PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
- movpic POINTER [gotptr], ebx ; save GOT address
+ GET_GOT ebx ; get GOT address
+ MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)]
test ecx, ecx
@@ -80,9 +76,9 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
- pushpic eax
+ PUSHPIC eax
push edx
push ebx
push edi
@@ -93,11 +89,11 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
mov edi, JSAMPROW [edi] ; outptr0
mov ebx, JSAMPROW [ebx] ; outptr1
mov edx, JSAMPROW [edx] ; outptr2
- movpic eax, POINTER [gotptr] ; load GOT address (eax)
+ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_YMMWORD
jae near .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@@ -154,7 +150,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
jmp short .rgb_ycc_cnv
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -278,7 +274,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
jmp short .rgb_ycc_cnv
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -552,7 +548,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
pop edi
pop ebx
pop edx
- poppic eax
+ POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jccolext-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jccolext-mmx.asm
index 6357a42b2cf..44b62512e91 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jccolext-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jccolext-mmx.asm
@@ -2,17 +2,13 @@
; jccolext.asm - colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -49,15 +45,15 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic eax ; make a room for GOT address
+ PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
- movpic POINTER [gotptr], ebx ; save GOT address
+ GET_GOT ebx ; get GOT address
+ MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)] ; num_cols
test ecx, ecx
@@ -80,9 +76,9 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
- pushpic eax
+ PUSHPIC eax
push edx
push ebx
push edi
@@ -93,11 +89,11 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
mov edi, JSAMPROW [edi] ; outptr0
mov ebx, JSAMPROW [ebx] ; outptr1
mov edx, JSAMPROW [edx] ; outptr2
- movpic eax, POINTER [gotptr] ; load GOT address (eax)
+ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_MMWORD
jae short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@@ -143,7 +139,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
jmp short .rgb_ycc_cnv
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -211,7 +207,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
jmp short .rgb_ycc_cnv
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -449,7 +445,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
pop edi
pop ebx
pop edx
- poppic eax
+ POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jccolext-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jccolext-sse2.asm
index c6c80852ac5..1d8d5f5a205 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jccolext-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jccolext-sse2.asm
@@ -1,17 +1,13 @@
;
; jccolext.asm - colorspace conversion (SSE2)
;
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -48,15 +44,15 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic eax ; make a room for GOT address
+ PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
- movpic POINTER [gotptr], ebx ; save GOT address
+ GET_GOT ebx ; get GOT address
+ MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)]
test ecx, ecx
@@ -79,9 +75,9 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
- pushpic eax
+ PUSHPIC eax
push edx
push ebx
push edi
@@ -92,11 +88,11 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
mov edi, JSAMPROW [edi] ; outptr0
mov ebx, JSAMPROW [ebx] ; outptr1
mov edx, JSAMPROW [edx] ; outptr2
- movpic eax, POINTER [gotptr] ; load GOT address (eax)
+ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_XMMWORD
jae near .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@@ -147,7 +143,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
jmp short .rgb_ycc_cnv
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -232,7 +228,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
jmp short .rgb_ycc_cnv
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -478,7 +474,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
pop edi
pop ebx
pop edx
- poppic eax
+ POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jccolor-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jccolor-avx2.asm
index 14944e952f1..9ad5ea95f80 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jccolor-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jccolor-avx2.asm
@@ -1,18 +1,14 @@
;
; jccolor.asm - colorspace conversion (AVX2)
;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -33,7 +29,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_rgb_ycc_convert_avx2)
EXTN(jconst_rgb_ycc_convert_avx2):
@@ -46,7 +42,7 @@ PD_ONEHALFM1_CJ times 8 dd (1 << (SCALEBITS - 1)) - 1 + \
(CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1))
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jccolor-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jccolor-mmx.asm
index 8cb399bdc43..0dbec54817e 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jccolor-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jccolor-mmx.asm
@@ -2,17 +2,13 @@
; jccolor.asm - colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -33,7 +29,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_rgb_ycc_convert_mmx)
EXTN(jconst_rgb_ycc_convert_mmx):
@@ -46,7 +42,7 @@ PD_ONEHALFM1_CJ times 2 dd (1 << (SCALEBITS - 1)) - 1 + \
(CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 2 dd (1 << (SCALEBITS - 1))
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jccolor-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jccolor-sse2.asm
index 686d222ff70..678306a10c3 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jccolor-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jccolor-sse2.asm
@@ -1,17 +1,13 @@
;
; jccolor.asm - colorspace conversion (SSE2)
;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -32,7 +28,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_rgb_ycc_convert_sse2)
EXTN(jconst_rgb_ycc_convert_sse2):
@@ -45,7 +41,7 @@ PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS - 1)) - 1 + \
(CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1))
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcgray-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcgray-avx2.asm
index 560ee0c71e2..ded39567df2 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcgray-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcgray-avx2.asm
@@ -1,18 +1,14 @@
;
; jcgray.asm - grayscale colorspace conversion (AVX2)
;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -29,7 +25,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_rgb_gray_convert_avx2)
EXTN(jconst_rgb_gray_convert_avx2):
@@ -38,7 +34,7 @@ PW_F0299_F0337 times 8 dw F_0_299, F_0_337
PW_F0114_F0250 times 8 dw F_0_114, F_0_250
PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1))
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcgray-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcgray-mmx.asm
index 79fdf082a84..d6f031869a0 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcgray-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcgray-mmx.asm
@@ -2,17 +2,13 @@
; jcgray.asm - grayscale colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -29,7 +25,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_rgb_gray_convert_mmx)
EXTN(jconst_rgb_gray_convert_mmx):
@@ -38,7 +34,7 @@ PW_F0299_F0337 times 2 dw F_0_299, F_0_337
PW_F0114_F0250 times 2 dw F_0_114, F_0_250
PD_ONEHALF times 2 dd (1 << (SCALEBITS - 1))
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcgray-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcgray-sse2.asm
index cb4b28e8f49..ecc7fa08abb 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcgray-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcgray-sse2.asm
@@ -1,17 +1,13 @@
;
; jcgray.asm - grayscale colorspace conversion (SSE2)
;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -28,7 +24,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_rgb_gray_convert_sse2)
EXTN(jconst_rgb_gray_convert_sse2):
@@ -37,7 +33,7 @@ PW_F0299_F0337 times 4 dw F_0_299, F_0_337
PW_F0114_F0250 times 4 dw F_0_114, F_0_250
PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1))
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-avx2.asm
index 3fa7973d72b..70df8f80ba4 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-avx2.asm
@@ -1,18 +1,14 @@
;
; jcgryext.asm - grayscale colorspace conversion (AVX2)
;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -49,15 +45,15 @@ EXTN(jsimd_rgb_gray_convert_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic eax ; make a room for GOT address
+ PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
- movpic POINTER [gotptr], ebx ; save GOT address
+ GET_GOT ebx ; get GOT address
+ MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)]
test ecx, ecx
@@ -76,20 +72,20 @@ EXTN(jsimd_rgb_gray_convert_avx2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
- pushpic eax
+ PUSHPIC eax
push edi
push esi
push ecx ; col
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr0
- movpic eax, POINTER [gotptr] ; load GOT address (eax)
+ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_YMMWORD
jae near .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@@ -146,7 +142,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
jmp short .rgb_gray_cnv
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -270,7 +266,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
jmp short .rgb_gray_cnv
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -433,7 +429,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
pop ecx ; col
pop esi
pop edi
- poppic eax
+ POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-mmx.asm
index 8af42e5a332..dd90c3dfb08 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-mmx.asm
@@ -2,17 +2,13 @@
; jcgryext.asm - grayscale colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -49,15 +45,15 @@ EXTN(jsimd_rgb_gray_convert_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic eax ; make a room for GOT address
+ PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
- movpic POINTER [gotptr], ebx ; save GOT address
+ GET_GOT ebx ; get GOT address
+ MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)] ; num_cols
test ecx, ecx
@@ -76,20 +72,20 @@ EXTN(jsimd_rgb_gray_convert_mmx):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
- pushpic eax
+ PUSHPIC eax
push edi
push esi
push ecx ; col
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr0
- movpic eax, POINTER [gotptr] ; load GOT address (eax)
+ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_MMWORD
jae short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@@ -135,7 +131,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
jmp short .rgb_gray_cnv
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -203,7 +199,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
jmp short .rgb_gray_cnv
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -330,7 +326,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
pop ecx ; col
pop esi
pop edi
- poppic eax
+ POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-sse2.asm
index c9d6ff1e351..227295f3072 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-sse2.asm
@@ -1,17 +1,13 @@
;
; jcgryext.asm - grayscale colorspace conversion (SSE2)
;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -48,15 +44,15 @@ EXTN(jsimd_rgb_gray_convert_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic eax ; make a room for GOT address
+ PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
- movpic POINTER [gotptr], ebx ; save GOT address
+ GET_GOT ebx ; get GOT address
+ MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)]
test ecx, ecx
@@ -75,20 +71,20 @@ EXTN(jsimd_rgb_gray_convert_sse2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
- pushpic eax
+ PUSHPIC eax
push edi
push esi
push ecx ; col
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr0
- movpic eax, POINTER [gotptr] ; load GOT address (eax)
+ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_XMMWORD
jae near .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@@ -139,7 +135,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
jmp short .rgb_gray_cnv
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -224,7 +220,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
jmp short .rgb_gray_cnv
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -359,7 +355,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
pop ecx ; col
pop esi
pop edi
- poppic eax
+ POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jchuff-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jchuff-sse2.asm
index 278cf5e83af..ed194dd383d 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jchuff-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jchuff-sse2.asm
@@ -1,7 +1,7 @@
;
; jchuff-sse2.asm - Huffman entropy encoding (SSE2)
;
-; Copyright (C) 2009-2011, 2014-2017, 2019, D. R. Commander.
+; Copyright (C) 2009-2011, 2014-2017, 2019, 2024, D. R. Commander.
; Copyright (C) 2015, Matthieu Darbois.
; Copyright (C) 2018, Matthias Räncker.
;
@@ -9,11 +9,7 @@
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains an SSE2 implementation for Huffman coding of one block.
; The following code is based on jchuff.c; see jchuff.c for more details.
@@ -42,7 +38,7 @@ endstruc
EXTN(jconst_huff_encode_one_block):
- alignz 32
+ ALIGNZ 32
jpeg_mask_bits dq 0x0000, 0x0001, 0x0003, 0x0007
dq 0x000f, 0x001f, 0x003f, 0x007f
@@ -65,7 +61,8 @@ times 1 << 2 db 3
times 1 << 1 db 2
times 1 << 0 db 1
times 1 db 0
-jpeg_nbits_table:
+GLOBAL_DATA(jpeg_nbits_table)
+EXTN(jpeg_nbits_table):
times 1 db 0
times 1 << 0 db 1
times 1 << 1 db 2
@@ -83,14 +80,14 @@ times 1 << 12 db 13
times 1 << 13 db 14
times 1 << 14 db 15
- alignz 32
+ ALIGNZ 32
%ifdef PIC
%define NBITS(x) nbits_base + x
%else
-%define NBITS(x) jpeg_nbits_table + x
+%define NBITS(x) EXTN(jpeg_nbits_table) + x
%endif
-%define MASK_BITS(x) NBITS((x) * 8) + (jpeg_mask_bits - jpeg_nbits_table)
+%define MASK_BITS(x) NBITS((x) * 8) + (jpeg_mask_bits - EXTN(jpeg_nbits_table))
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -235,7 +232,7 @@ times 1 << 14 db 15
; If PIC is defined, load the address of a symbol defined in this file into a
; register. Equivalent to
-; get_GOT %1
+; GET_GOT %1
; lea %1, [GOTOFF(%1, %2)]
; without using the GOT.
;
@@ -469,7 +466,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
pcmpeqw mm_all_0xff, mm_all_0xff ;Z: all_0xff[i] = 0xFF;
%endmacro
- GET_SYM nbits_base, jpeg_nbits_table, GET_SYM_BEFORE, GET_SYM_AFTER
+ GET_SYM nbits_base, EXTN(jpeg_nbits_table), GET_SYM_BEFORE, GET_SYM_AFTER
psrldq xmm4, 1 * SIZEOF_WORD ;G: w4 = 37 44 45 38 39 46 47 --
shufpd xmm1, xmm5, 10b ;F: w1 = 36 37 44 45 50 51 58 59
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcphuff-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcphuff-sse2.asm
index c26b48a47d8..19a183fcd83 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcphuff-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcphuff-sse2.asm
@@ -7,11 +7,7 @@
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains an SSE2 implementation of data preparation for progressive
; Huffman encoding. See jcphuff.c for more details.
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcsample-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcsample-avx2.asm
index 0a20802dd89..5019829c9ae 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcsample-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcsample-avx2.asm
@@ -3,17 +3,13 @@
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
; Copyright (C) 2015, Intel Corporation.
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -70,7 +66,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
- alignx 16, 7
+ ALIGNX 16, 7
.expandloop:
push eax
push ecx
@@ -106,7 +102,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push ecx
push edi
@@ -117,7 +113,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
cmp ecx, byte SIZEOF_YMMWORD
jae short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop_r24:
; ecx can possibly be 8, 16, 24
@@ -141,7 +137,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
vpxor ymm1, ymm1, ymm1
mov ecx, SIZEOF_YMMWORD
jmp short .downsample
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
vmovdqu ymm0, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -243,7 +239,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
- alignx 16, 7
+ ALIGNX 16, 7
.expandloop:
push eax
push ecx
@@ -279,7 +275,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push ecx
push edi
@@ -291,7 +287,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
cmp ecx, byte SIZEOF_YMMWORD
jae short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop_r24:
cmp ecx, 24
@@ -320,7 +316,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
vpxor ymm3, ymm3, ymm3
mov ecx, SIZEOF_YMMWORD
jmp short .downsample
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
vmovdqu ymm0, YMMWORD [edx+0*SIZEOF_YMMWORD]
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcsample-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcsample-mmx.asm
index 2c223eebe81..94dd88870a3 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcsample-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcsample-mmx.asm
@@ -2,17 +2,13 @@
; jcsample.asm - downsampling (MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -69,7 +65,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
- alignx 16, 7
+ ALIGNX 16, 7
.expandloop:
push eax
push ecx
@@ -104,7 +100,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push ecx
push edi
@@ -112,7 +108,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -212,7 +208,7 @@ EXTN(jsimd_h2v2_downsample_mmx):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
- alignx 16, 7
+ ALIGNX 16, 7
.expandloop:
push eax
push ecx
@@ -247,7 +243,7 @@ EXTN(jsimd_h2v2_downsample_mmx):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push ecx
push edi
@@ -256,7 +252,7 @@ EXTN(jsimd_h2v2_downsample_mmx):
mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0
mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1
mov edi, JSAMPROW [edi] ; outptr
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [edx+0*SIZEOF_MMWORD]
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcsample-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcsample-sse2.asm
index 4fea60d2e21..eb8808bea84 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcsample-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcsample-sse2.asm
@@ -2,17 +2,13 @@
; jcsample.asm - downsampling (SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -69,7 +65,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
- alignx 16, 7
+ ALIGNX 16, 7
.expandloop:
push eax
push ecx
@@ -104,7 +100,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push ecx
push edi
@@ -115,14 +111,14 @@ EXTN(jsimd_h2v1_downsample_sse2):
cmp ecx, byte SIZEOF_XMMWORD
jae short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop_r8:
movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
pxor xmm1, xmm1
mov ecx, SIZEOF_XMMWORD
jmp short .downsample
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -225,7 +221,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
- alignx 16, 7
+ ALIGNX 16, 7
.expandloop:
push eax
push ecx
@@ -260,7 +256,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push ecx
push edi
@@ -272,7 +268,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
cmp ecx, byte SIZEOF_XMMWORD
jae short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop_r8:
movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
@@ -281,7 +277,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
pxor xmm3, xmm3
mov ecx, SIZEOF_XMMWORD
jmp short .downsample
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-avx2.asm
index 015be0416c5..fd79b79568e 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-avx2.asm
@@ -2,18 +2,14 @@
; jdcolext.asm - colorspace conversion (AVX2)
;
; Copyright 2009, 2012 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2012, 2016, D. R. Commander.
+; Copyright (C) 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -50,15 +46,15 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic eax ; make a room for GOT address
+ PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
- movpic POINTER [gotptr], ebx ; save GOT address
+ GET_GOT ebx ; get GOT address
+ MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [out_width(eax)] ; num_cols
test ecx, ecx
@@ -81,7 +77,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push eax
push edi
@@ -94,8 +90,8 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
mov ebx, JSAMPROW [ebx] ; inptr1
mov edx, JSAMPROW [edx] ; inptr2
mov edi, JSAMPROW [edi] ; outptr
- movpic eax, POINTER [gotptr] ; load GOT address (eax)
- alignx 16, 7
+ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
+ ALIGNX 16, 7
.columnloop:
vmovdqu ymm5, YMMWORD [ebx] ; ymm5=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)
@@ -295,7 +291,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
add ebx, byte SIZEOF_YMMWORD ; inptr1
add edx, byte SIZEOF_YMMWORD ; inptr2
jmp near .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.column_st64:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@@ -436,7 +432,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
add ebx, byte SIZEOF_YMMWORD ; inptr1
add edx, byte SIZEOF_YMMWORD ; inptr2
jmp near .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.column_st64:
cmp ecx, byte SIZEOF_YMMWORD/2
@@ -479,7 +475,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
%endif ; RGB_PIXELSIZE ; ---------------
- alignx 16, 7
+ ALIGNX 16, 7
.nextrow:
pop ecx
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-mmx.asm
index 5813cfcb66f..636bd6d3fdc 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-mmx.asm
@@ -2,17 +2,13 @@
; jdcolext.asm - colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -49,15 +45,15 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic eax ; make a room for GOT address
+ PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
- movpic POINTER [gotptr], ebx ; save GOT address
+ GET_GOT ebx ; get GOT address
+ MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [out_width(eax)] ; num_cols
test ecx, ecx
@@ -80,7 +76,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push eax
push edi
@@ -93,8 +89,8 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
mov ebx, JSAMPROW [ebx] ; inptr1
mov edx, JSAMPROW [edx] ; inptr2
mov edi, JSAMPROW [edi] ; outptr
- movpic eax, POINTER [gotptr] ; load GOT address (eax)
- alignx 16, 7
+ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
+ ALIGNX 16, 7
.columnloop:
movq mm5, MMWORD [ebx] ; mm5=Cb(01234567)
@@ -255,7 +251,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
add edx, byte SIZEOF_MMWORD ; inptr2
add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
jmp near .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.column_st16:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@@ -344,7 +340,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
add edx, byte SIZEOF_MMWORD ; inptr2
add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
jmp near .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.column_st16:
cmp ecx, byte SIZEOF_MMWORD/2
@@ -369,7 +365,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
%endif ; RGB_PIXELSIZE ; ---------------
- alignx 16, 7
+ ALIGNX 16, 7
.nextrow:
pop ecx
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-sse2.asm
index d5572b32946..0150f2cb69c 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-sse2.asm
@@ -2,17 +2,13 @@
; jdcolext.asm - colorspace conversion (SSE2)
;
; Copyright 2009, 2012 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2012, 2016, D. R. Commander.
+; Copyright (C) 2012, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -49,15 +45,15 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic eax ; make a room for GOT address
+ PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
- movpic POINTER [gotptr], ebx ; save GOT address
+ GET_GOT ebx ; get GOT address
+ MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [out_width(eax)] ; num_cols
test ecx, ecx
@@ -80,7 +76,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push eax
push edi
@@ -93,8 +89,8 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
mov ebx, JSAMPROW [ebx] ; inptr1
mov edx, JSAMPROW [edx] ; inptr2
mov edi, JSAMPROW [edi] ; outptr
- movpic eax, POINTER [gotptr] ; load GOT address (eax)
- alignx 16, 7
+ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
+ ALIGNX 16, 7
.columnloop:
movdqa xmm5, XMMWORD [ebx] ; xmm5=Cb(0123456789ABCDEF)
@@ -275,7 +271,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
add ebx, byte SIZEOF_XMMWORD ; inptr1
add edx, byte SIZEOF_XMMWORD ; inptr2
jmp near .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.column_st32:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@@ -387,7 +383,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
add ebx, byte SIZEOF_XMMWORD ; inptr1
add edx, byte SIZEOF_XMMWORD ; inptr2
jmp near .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.column_st32:
cmp ecx, byte SIZEOF_XMMWORD/2
@@ -423,7 +419,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
%endif ; RGB_PIXELSIZE ; ---------------
- alignx 16, 7
+ ALIGNX 16, 7
.nextrow:
pop ecx
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-avx2.asm
index e05b60d0017..d3a30d63a71 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-avx2.asm
@@ -3,17 +3,13 @@
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
; Copyright (C) 2015, Intel Corporation.
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -32,7 +28,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_ycc_rgb_convert_avx2)
EXTN(jconst_ycc_rgb_convert_avx2):
@@ -43,7 +39,7 @@ PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285
PW_ONE times 16 dw 1
PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1)
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-mmx.asm
index fb7e7bcce4b..6e67e4b72ea 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-mmx.asm
@@ -2,17 +2,13 @@
; jdcolor.asm - colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -31,7 +27,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_ycc_rgb_convert_mmx)
EXTN(jconst_ycc_rgb_convert_mmx):
@@ -42,7 +38,7 @@ PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
PW_ONE times 4 dw 1
PD_ONEHALF times 2 dd 1 << (SCALEBITS - 1)
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-sse2.asm
index b736255317e..79c9c6821a4 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-sse2.asm
@@ -2,17 +2,13 @@
; jdcolor.asm - colorspace conversion (SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -31,7 +27,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_ycc_rgb_convert_sse2)
EXTN(jconst_ycc_rgb_convert_sse2):
@@ -42,7 +38,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
PW_ONE times 8 dw 1
PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1)
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-avx2.asm
index 711e6792d0f..90493fd023b 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-avx2.asm
@@ -2,18 +2,14 @@
; jdmerge.asm - merged upsampling/color conversion (AVX2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -32,7 +28,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_merged_upsample_avx2)
EXTN(jconst_merged_upsample_avx2):
@@ -43,7 +39,7 @@ PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285
PW_ONE times 16 dw 1
PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1)
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-mmx.asm
index 6e8311d4081..0dc204aa8b4 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-mmx.asm
@@ -2,17 +2,13 @@
; jdmerge.asm - merged upsampling/color conversion (MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -31,7 +27,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_merged_upsample_mmx)
EXTN(jconst_merged_upsample_mmx):
@@ -42,7 +38,7 @@ PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
PW_ONE times 4 dw 1
PD_ONEHALF times 2 dd 1 << (SCALEBITS - 1)
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-sse2.asm
index e32f90aa177..06f07627421 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-sse2.asm
@@ -2,17 +2,13 @@
; jdmerge.asm - merged upsampling/color conversion (SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -31,7 +27,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_merged_upsample_sse2)
EXTN(jconst_merged_upsample_sse2):
@@ -42,7 +38,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
PW_ONE times 8 dw 1
PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1)
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-avx2.asm
index e35f7282bc4..a7aa930e346 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-avx2.asm
@@ -2,18 +2,14 @@
; jdmrgext.asm - merged upsampling/color conversion (AVX2)
;
; Copyright 2009, 2012 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2012, 2016, D. R. Commander.
+; Copyright (C) 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -50,15 +46,15 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic eax ; make a room for GOT address
+ PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
- movpic POINTER [gotptr], ebx ; save GOT address
+ GET_GOT ebx ; get GOT address
+ MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [output_width(eax)] ; col
test ecx, ecx
@@ -79,9 +75,9 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
pop ecx ; col
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
- movpic eax, POINTER [gotptr] ; load GOT address (eax)
+ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
vmovdqu ymm6, YMMWORD [ebx] ; ymm6=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)
vmovdqu ymm7, YMMWORD [edx] ; ymm7=Cr(0123456789ABCDEFGHIJKLMNOPQRSTUV)
@@ -168,13 +164,13 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
mov al, 2 ; Yctr
jmp short .Yloop_1st
- alignx 16, 7
+ ALIGNX 16, 7
.Yloop_2nd:
vmovdqa ymm0, YMMWORD [wk(1)] ; ymm0=(R-Y)H
vmovdqa ymm2, YMMWORD [wk(2)] ; ymm2=(G-Y)H
vmovdqa ymm4, YMMWORD [wk(0)] ; ymm4=(B-Y)H
- alignx 16, 7
+ ALIGNX 16, 7
.Yloop_1st:
vmovdqu ymm7, YMMWORD [esi] ; ymm7=Y(0123456789ABCDEFGHIJKLMNOPQRSTUV)
@@ -301,7 +297,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
add ebx, byte SIZEOF_YMMWORD ; inptr1
add edx, byte SIZEOF_YMMWORD ; inptr2
jmp near .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.column_st64:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@@ -445,7 +441,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
add ebx, byte SIZEOF_YMMWORD ; inptr1
add edx, byte SIZEOF_YMMWORD ; inptr2
jmp near .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.column_st64:
cmp ecx, byte SIZEOF_YMMWORD/2
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-mmx.asm
index eb3e36b4759..562758146c6 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-mmx.asm
@@ -2,17 +2,13 @@
; jdmrgext.asm - merged upsampling/color conversion (MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -47,15 +43,15 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic eax ; make a room for GOT address
+ PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
- movpic POINTER [gotptr], ebx ; save GOT address
+ GET_GOT ebx ; get GOT address
+ MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [output_width(eax)] ; col
test ecx, ecx
@@ -76,9 +72,9 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
pop ecx ; col
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
- movpic eax, POINTER [gotptr] ; load GOT address (eax)
+ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
movq mm6, MMWORD [ebx] ; mm6=Cb(01234567)
movq mm7, MMWORD [edx] ; mm7=Cr(01234567)
@@ -171,13 +167,13 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
mov al, 2 ; Yctr
jmp short .Yloop_1st
- alignx 16, 7
+ ALIGNX 16, 7
.Yloop_2nd:
movq mm0, MMWORD [wk(1)] ; mm0=(R-Y)H
movq mm2, MMWORD [wk(2)] ; mm2=(G-Y)H
movq mm4, MMWORD [wk(0)] ; mm4=(B-Y)H
- alignx 16, 7
+ ALIGNX 16, 7
.Yloop_1st:
movq mm7, MMWORD [esi] ; mm7=Y(01234567)
@@ -258,7 +254,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
add ebx, byte SIZEOF_MMWORD ; inptr1
add edx, byte SIZEOF_MMWORD ; inptr2
jmp near .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.column_st16:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@@ -350,7 +346,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
add ebx, byte SIZEOF_MMWORD ; inptr1
add edx, byte SIZEOF_MMWORD ; inptr2
jmp near .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.column_st16:
cmp ecx, byte SIZEOF_MMWORD/2
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-sse2.asm
index c113dc4d27e..13e7d980fa6 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-sse2.asm
@@ -2,17 +2,13 @@
; jdmrgext.asm - merged upsampling/color conversion (SSE2)
;
; Copyright 2009, 2012 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2012, 2016, D. R. Commander.
+; Copyright (C) 2012, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -49,15 +45,15 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic eax ; make a room for GOT address
+ PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
- movpic POINTER [gotptr], ebx ; save GOT address
+ GET_GOT ebx ; get GOT address
+ MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [output_width(eax)] ; col
test ecx, ecx
@@ -78,9 +74,9 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
pop ecx ; col
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
- movpic eax, POINTER [gotptr] ; load GOT address (eax)
+ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
movdqa xmm6, XMMWORD [ebx] ; xmm6=Cb(0123456789ABCDEF)
movdqa xmm7, XMMWORD [edx] ; xmm7=Cr(0123456789ABCDEF)
@@ -173,13 +169,13 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
mov al, 2 ; Yctr
jmp short .Yloop_1st
- alignx 16, 7
+ ALIGNX 16, 7
.Yloop_2nd:
movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(R-Y)H
movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(G-Y)H
movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(B-Y)H
- alignx 16, 7
+ ALIGNX 16, 7
.Yloop_1st:
movdqa xmm7, XMMWORD [esi] ; xmm7=Y(0123456789ABCDEF)
@@ -280,7 +276,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
add ebx, byte SIZEOF_XMMWORD ; inptr1
add edx, byte SIZEOF_XMMWORD ; inptr2
jmp near .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.column_st32:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@@ -395,7 +391,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
add ebx, byte SIZEOF_XMMWORD ; inptr1
add edx, byte SIZEOF_XMMWORD ; inptr2
jmp near .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.column_st32:
cmp ecx, byte SIZEOF_XMMWORD/2
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdsample-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdsample-avx2.asm
index a800c35e083..eba53ef7574 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdsample-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdsample-avx2.asm
@@ -3,24 +3,20 @@
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
; Copyright (C) 2015, Intel Corporation.
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_fancy_upsample_avx2)
EXTN(jconst_fancy_upsample_avx2):
@@ -31,7 +27,7 @@ PW_THREE times 16 dw 3
PW_SEVEN times 16 dw 7
PW_EIGHT times 16 dw 8
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -62,13 +58,13 @@ PW_EIGHT times 16 dw 8
EXTN(jsimd_h2v1_fancy_upsample_avx2):
push ebp
mov ebp, esp
- pushpic ebx
+ PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr
test eax, eax
@@ -81,7 +77,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push eax ; colctr
push edi
@@ -104,7 +100,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
and eax, byte -SIZEOF_YMMWORD
cmp eax, byte SIZEOF_YMMWORD
ja short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop_last:
vpcmpeqb xmm6, xmm6, xmm6
@@ -112,7 +108,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
vperm2i128 ymm6, ymm6, ymm6, 1 ; (---- ---- ... ---- ---- ff) MSB is ff
vpand ymm6, ymm6, YMMWORD [esi+0*SIZEOF_YMMWORD]
jmp short .upsample
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
vmovdqu ymm6, YMMWORD [esi+1*SIZEOF_YMMWORD]
@@ -196,7 +192,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
- poppic ebx
+ POPPIC ebx
pop ebp
ret
@@ -234,15 +230,15 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic eax ; make a room for GOT address
+ PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
- movpic POINTER [gotptr], ebx ; save GOT address
+ GET_GOT ebx ; get GOT address
+ MOVPIC POINTER [gotptr], ebx ; save GOT address
mov edx, eax ; edx = original ebp
mov eax, JDIMENSION [downsamp_width(edx)] ; colctr
@@ -256,7 +252,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
mov esi, JSAMPARRAY [input_data(edx)] ; input_data
mov edi, POINTER [output_data_ptr(edx)]
mov edi, JSAMPARRAY [edi] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push eax ; colctr
push ecx
@@ -286,8 +282,8 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
vmovdqu ymm1, YMMWORD [ecx+0*SIZEOF_YMMWORD] ; ymm1=row[-1][0]
vmovdqu ymm2, YMMWORD [esi+0*SIZEOF_YMMWORD] ; ymm2=row[+1][0]
- pushpic ebx
- movpic ebx, POINTER [gotptr] ; load GOT address
+ PUSHPIC ebx
+ MOVPIC ebx, POINTER [gotptr] ; load GOT address
vpxor ymm3, ymm3, ymm3 ; ymm3=(all 0's)
@@ -328,19 +324,19 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
vmovdqa YMMWORD [wk(0)], ymm1
vmovdqa YMMWORD [wk(1)], ymm2
- poppic ebx
+ POPPIC ebx
add eax, byte SIZEOF_YMMWORD-1
and eax, byte -SIZEOF_YMMWORD
cmp eax, byte SIZEOF_YMMWORD
ja short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop_last:
; -- process the last column block
- pushpic ebx
- movpic ebx, POINTER [gotptr] ; load GOT address
+ PUSHPIC ebx
+ MOVPIC ebx, POINTER [gotptr] ; load GOT address
vpcmpeqb xmm1, xmm1, xmm1
vpslldq xmm1, xmm1, (SIZEOF_XMMWORD-2)
@@ -353,7 +349,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
vmovdqa YMMWORD [wk(3)], ymm2 ; ymm2=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 31)
jmp near .upsample
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
; -- process the next column block
@@ -362,8 +358,8 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
vmovdqu ymm1, YMMWORD [ecx+1*SIZEOF_YMMWORD] ; ymm1=row[-1][1]
vmovdqu ymm2, YMMWORD [esi+1*SIZEOF_YMMWORD] ; ymm2=row[+1][1]
- pushpic ebx
- movpic ebx, POINTER [gotptr] ; load GOT address
+ PUSHPIC ebx
+ MOVPIC ebx, POINTER [gotptr] ; load GOT address
vpxor ymm3, ymm3, ymm3 ; ymm3=(all 0's)
@@ -516,7 +512,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm1
vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm0
- poppic ebx
+ POPPIC ebx
sub eax, byte SIZEOF_YMMWORD
add ecx, byte 1*SIZEOF_YMMWORD ; inptr1(above)
@@ -590,7 +586,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push edi
push esi
@@ -598,7 +594,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr
mov eax, edx ; colctr
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
cmp eax, byte SIZEOF_YMMWORD
@@ -629,7 +625,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
add esi, byte SIZEOF_YMMWORD ; inptr
add edi, byte 2*SIZEOF_YMMWORD ; outptr
jmp short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.nextrow:
pop esi
@@ -689,7 +685,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push edi
push esi
@@ -698,7 +694,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0
mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1
mov eax, edx ; colctr
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
cmp eax, byte SIZEOF_YMMWORD
@@ -734,7 +730,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
add ebx, 2*SIZEOF_YMMWORD ; outptr0
add edi, 2*SIZEOF_YMMWORD ; outptr1
jmp short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.nextrow:
pop esi
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdsample-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdsample-mmx.asm
index 12c49f0eab5..01d09e62d10 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdsample-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdsample-mmx.asm
@@ -2,24 +2,20 @@
; jdsample.asm - upsampling (MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_fancy_upsample_mmx)
EXTN(jconst_fancy_upsample_mmx):
@@ -30,7 +26,7 @@ PW_THREE times 4 dw 3
PW_SEVEN times 4 dw 7
PW_EIGHT times 4 dw 8
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -61,13 +57,13 @@ PW_EIGHT times 4 dw 8
EXTN(jsimd_h2v1_fancy_upsample_mmx):
push ebp
mov ebp, esp
- pushpic ebx
+ PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr
test eax, eax
@@ -80,7 +76,7 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push eax ; colctr
push edi
@@ -103,14 +99,14 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
and eax, byte -SIZEOF_MMWORD
cmp eax, byte SIZEOF_MMWORD
ja short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop_last:
pcmpeqb mm6, mm6
psllq mm6, (SIZEOF_MMWORD-1)*BYTE_BIT
pand mm6, MMWORD [esi+0*SIZEOF_MMWORD]
jmp short .upsample
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movq mm6, MMWORD [esi+1*SIZEOF_MMWORD]
@@ -187,7 +183,7 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
- poppic ebx
+ POPPIC ebx
pop ebp
ret
@@ -224,15 +220,15 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic eax ; make a room for GOT address
+ PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
- movpic POINTER [gotptr], ebx ; save GOT address
+ GET_GOT ebx ; get GOT address
+ MOVPIC POINTER [gotptr], ebx ; save GOT address
mov edx, eax ; edx = original ebp
mov eax, JDIMENSION [downsamp_width(edx)] ; colctr
@@ -246,7 +242,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
mov esi, JSAMPARRAY [input_data(edx)] ; input_data
mov edi, POINTER [output_data_ptr(edx)]
mov edi, JSAMPARRAY [edi] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push eax ; colctr
push ecx
@@ -276,8 +272,8 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
movq mm1, MMWORD [ecx+0*SIZEOF_MMWORD] ; mm1=row[-1][0]
movq mm2, MMWORD [esi+0*SIZEOF_MMWORD] ; mm2=row[+1][0]
- pushpic ebx
- movpic ebx, POINTER [gotptr] ; load GOT address
+ PUSHPIC ebx
+ MOVPIC ebx, POINTER [gotptr] ; load GOT address
pxor mm3, mm3 ; mm3=(all 0's)
movq mm4, mm0
@@ -312,19 +308,19 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
movq MMWORD [wk(0)], mm1
movq MMWORD [wk(1)], mm2
- poppic ebx
+ POPPIC ebx
add eax, byte SIZEOF_MMWORD-1
and eax, byte -SIZEOF_MMWORD
cmp eax, byte SIZEOF_MMWORD
ja short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop_last:
; -- process the last column block
- pushpic ebx
- movpic ebx, POINTER [gotptr] ; load GOT address
+ PUSHPIC ebx
+ MOVPIC ebx, POINTER [gotptr] ; load GOT address
pcmpeqb mm1, mm1
psllq mm1, (SIZEOF_MMWORD-2)*BYTE_BIT
@@ -337,7 +333,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
movq MMWORD [wk(3)], mm2
jmp short .upsample
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
; -- process the next column block
@@ -346,8 +342,8 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
movq mm1, MMWORD [ecx+1*SIZEOF_MMWORD] ; mm1=row[-1][1]
movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] ; mm2=row[+1][1]
- pushpic ebx
- movpic ebx, POINTER [gotptr] ; load GOT address
+ PUSHPIC ebx
+ MOVPIC ebx, POINTER [gotptr] ; load GOT address
pxor mm3, mm3 ; mm3=(all 0's)
movq mm4, mm0
@@ -486,7 +482,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
movq MMWORD [edi+0*SIZEOF_MMWORD], mm1
movq MMWORD [edi+1*SIZEOF_MMWORD], mm0
- poppic ebx
+ POPPIC ebx
sub eax, byte SIZEOF_MMWORD
add ecx, byte 1*SIZEOF_MMWORD ; inptr1(above)
@@ -561,7 +557,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push edi
push esi
@@ -569,7 +565,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr
mov eax, edx ; colctr
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -599,7 +595,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
add esi, byte 2*SIZEOF_MMWORD ; inptr
add edi, byte 4*SIZEOF_MMWORD ; outptr
jmp short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.nextrow:
pop esi
@@ -660,7 +656,7 @@ EXTN(jsimd_h2v2_upsample_mmx):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push edi
push esi
@@ -669,7 +665,7 @@ EXTN(jsimd_h2v2_upsample_mmx):
mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0
mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1
mov eax, edx ; colctr
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -704,7 +700,7 @@ EXTN(jsimd_h2v2_upsample_mmx):
add ebx, byte 4*SIZEOF_MMWORD ; outptr0
add edi, byte 4*SIZEOF_MMWORD ; outptr1
jmp short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.nextrow:
pop esi
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdsample-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdsample-sse2.asm
index 4e28d2f4b80..b10d9227987 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdsample-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdsample-sse2.asm
@@ -2,24 +2,20 @@
; jdsample.asm - upsampling (SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_fancy_upsample_sse2)
EXTN(jconst_fancy_upsample_sse2):
@@ -30,7 +26,7 @@ PW_THREE times 8 dw 3
PW_SEVEN times 8 dw 7
PW_EIGHT times 8 dw 8
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -61,13 +57,13 @@ PW_EIGHT times 8 dw 8
EXTN(jsimd_h2v1_fancy_upsample_sse2):
push ebp
mov ebp, esp
- pushpic ebx
+ PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr
test eax, eax
@@ -80,7 +76,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push eax ; colctr
push edi
@@ -103,14 +99,14 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
and eax, byte -SIZEOF_XMMWORD
cmp eax, byte SIZEOF_XMMWORD
ja short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop_last:
pcmpeqb xmm6, xmm6
pslldq xmm6, (SIZEOF_XMMWORD-1)
pand xmm6, XMMWORD [esi+0*SIZEOF_XMMWORD]
jmp short .upsample
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movdqa xmm6, XMMWORD [esi+1*SIZEOF_XMMWORD]
@@ -185,7 +181,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
- poppic ebx
+ POPPIC ebx
pop ebp
ret
@@ -223,15 +219,15 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic eax ; make a room for GOT address
+ PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
- movpic POINTER [gotptr], ebx ; save GOT address
+ GET_GOT ebx ; get GOT address
+ MOVPIC POINTER [gotptr], ebx ; save GOT address
mov edx, eax ; edx = original ebp
mov eax, JDIMENSION [downsamp_width(edx)] ; colctr
@@ -245,7 +241,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
mov esi, JSAMPARRAY [input_data(edx)] ; input_data
mov edi, POINTER [output_data_ptr(edx)]
mov edi, JSAMPARRAY [edi] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push eax ; colctr
push ecx
@@ -275,8 +271,8 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa xmm1, XMMWORD [ecx+0*SIZEOF_XMMWORD] ; xmm1=row[-1][0]
movdqa xmm2, XMMWORD [esi+0*SIZEOF_XMMWORD] ; xmm2=row[+1][0]
- pushpic ebx
- movpic ebx, POINTER [gotptr] ; load GOT address
+ PUSHPIC ebx
+ MOVPIC ebx, POINTER [gotptr] ; load GOT address
pxor xmm3, xmm3 ; xmm3=(all 0's)
movdqa xmm4, xmm0
@@ -311,19 +307,19 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa XMMWORD [wk(0)], xmm1
movdqa XMMWORD [wk(1)], xmm2
- poppic ebx
+ POPPIC ebx
add eax, byte SIZEOF_XMMWORD-1
and eax, byte -SIZEOF_XMMWORD
cmp eax, byte SIZEOF_XMMWORD
ja short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop_last:
; -- process the last column block
- pushpic ebx
- movpic ebx, POINTER [gotptr] ; load GOT address
+ PUSHPIC ebx
+ MOVPIC ebx, POINTER [gotptr] ; load GOT address
pcmpeqb xmm1, xmm1
pslldq xmm1, (SIZEOF_XMMWORD-2)
@@ -336,7 +332,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa XMMWORD [wk(3)], xmm2 ; xmm2=(-- -- -- -- -- -- -- 15)
jmp near .upsample
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
; -- process the next column block
@@ -345,8 +341,8 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa xmm1, XMMWORD [ecx+1*SIZEOF_XMMWORD] ; xmm1=row[-1][1]
movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] ; xmm2=row[+1][1]
- pushpic ebx
- movpic ebx, POINTER [gotptr] ; load GOT address
+ PUSHPIC ebx
+ MOVPIC ebx, POINTER [gotptr] ; load GOT address
pxor xmm3, xmm3 ; xmm3=(all 0's)
movdqa xmm4, xmm0
@@ -485,7 +481,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm1
movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm0
- poppic ebx
+ POPPIC ebx
sub eax, byte SIZEOF_XMMWORD
add ecx, byte 1*SIZEOF_XMMWORD ; inptr1(above)
@@ -558,7 +554,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push edi
push esi
@@ -566,7 +562,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr
mov eax, edx ; colctr
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -596,7 +592,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
add esi, byte 2*SIZEOF_XMMWORD ; inptr
add edi, byte 4*SIZEOF_XMMWORD ; outptr
jmp short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.nextrow:
pop esi
@@ -655,7 +651,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
push edi
push esi
@@ -664,7 +660,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0
mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1
mov eax, edx ; colctr
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -699,7 +695,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
add ebx, byte 4*SIZEOF_XMMWORD ; outptr0
add edi, byte 4*SIZEOF_XMMWORD ; outptr1
jmp short .columnloop
- alignx 16, 7
+ ALIGNX 16, 7
.nextrow:
pop esi
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-3dn.asm b/contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-3dn.asm
index 322ab163252..0cedc6caf40 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-3dn.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-3dn.asm
@@ -2,17 +2,13 @@
; jfdctflt.asm - floating-point FDCT (3DNow!)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a floating-point implementation of the forward DCT
; (Discrete Cosine Transform). The following code is based directly on
@@ -24,7 +20,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_fdct_float_3dnow)
EXTN(jconst_fdct_float_3dnow):
@@ -34,7 +30,7 @@ PD_0_707 times 2 dd 0.707106781186547524400844
PD_0_541 times 2 dd 0.541196100146196984399723
PD_1_306 times 2 dd 1.306562964876376527856643
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -63,19 +59,19 @@ EXTN(jsimd_fdct_float_3dnow):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic ebx
+ PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
mov edx, POINTER [data(eax)] ; (FAST_FLOAT *)
mov ecx, DCTSIZE/2
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
@@ -190,7 +186,7 @@ EXTN(jsimd_fdct_float_3dnow):
mov edx, POINTER [data(eax)] ; (FAST_FLOAT *)
mov ecx, DCTSIZE/2
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
@@ -307,7 +303,7 @@ EXTN(jsimd_fdct_float_3dnow):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
- poppic ebx
+ POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-sse.asm b/contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-sse.asm
index 86952c6499c..2cb95335869 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-sse.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-sse.asm
@@ -2,17 +2,13 @@
; jfdctflt.asm - floating-point FDCT (SSE)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a floating-point implementation of the forward DCT
; (Discrete Cosine Transform). The following code is based directly on
@@ -34,7 +30,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_fdct_float_sse)
EXTN(jconst_fdct_float_sse):
@@ -44,7 +40,7 @@ PD_0_707 times 4 dd 0.707106781186547524400844
PD_0_541 times 4 dd 0.541196100146196984399723
PD_1_306 times 4 dd 1.306562964876376527856643
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -74,19 +70,19 @@ EXTN(jsimd_fdct_float_sse):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic ebx
+ PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
mov edx, POINTER [data(eax)] ; (FAST_FLOAT *)
mov ecx, DCTSIZE/4
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
@@ -222,7 +218,7 @@ EXTN(jsimd_fdct_float_sse):
mov edx, POINTER [data(eax)] ; (FAST_FLOAT *)
mov ecx, DCTSIZE/4
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
@@ -358,7 +354,7 @@ EXTN(jsimd_fdct_float_sse):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
- poppic ebx
+ POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-mmx.asm
index 80645a50d7e..fe16e83ee24 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-mmx.asm
@@ -2,17 +2,13 @@
; jfdctfst.asm - fast integer FDCT (MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a fast, not so accurate integer implementation of
; the forward DCT (Discrete Cosine Transform). The following code is
@@ -49,7 +45,7 @@ F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS) ; FIX(1.306562965)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_fdct_ifast_mmx)
EXTN(jconst_fdct_ifast_mmx):
@@ -59,7 +55,7 @@ PW_F0382 times 4 dw F_0_382 << CONST_SHIFT
PW_F0541 times 4 dw F_0_541 << CONST_SHIFT
PW_F1306 times 4 dw F_1_306 << CONST_SHIFT
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -88,19 +84,19 @@ EXTN(jsimd_fdct_ifast_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic ebx
+ PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
mov edx, POINTER [data(eax)] ; (DCTELEM *)
mov ecx, DCTSIZE/4
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@@ -241,7 +237,7 @@ EXTN(jsimd_fdct_ifast_mmx):
mov edx, POINTER [data(eax)] ; (DCTELEM *)
mov ecx, DCTSIZE/4
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@@ -384,7 +380,7 @@ EXTN(jsimd_fdct_ifast_mmx):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
- poppic ebx
+ POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-sse2.asm
index 446fa7a68f7..890482e0067 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-sse2.asm
@@ -2,17 +2,13 @@
; jfdctfst.asm - fast integer FDCT (SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a fast, not so accurate integer implementation of
; the forward DCT (Discrete Cosine Transform). The following code is
@@ -49,7 +45,7 @@ F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS) ; FIX(1.306562965)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_fdct_ifast_sse2)
EXTN(jconst_fdct_ifast_sse2):
@@ -59,7 +55,7 @@ PW_F0382 times 8 dw F_0_382 << CONST_SHIFT
PW_F0541 times 8 dw F_0_541 << CONST_SHIFT
PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -89,13 +85,13 @@ EXTN(jsimd_fdct_ifast_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic ebx
+ PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
@@ -392,7 +388,7 @@ EXTN(jsimd_fdct_ifast_sse2):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; unused
- poppic ebx
+ POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-avx2.asm
index 23cf733135b..05ea8654850 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-avx2.asm
@@ -2,17 +2,13 @@
; jfdctint.asm - accurate integer FDCT (AVX2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a slower but more accurate integer implementation of the
; forward DCT (Discrete Cosine Transform). The following code is based
@@ -65,7 +61,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %1-%4: Input/output registers
; %5-%8: Temp registers
-%macro dotranspose 8
+%macro DOTRANSPOSE 8
; %1=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47)
; %2=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57)
; %3=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
@@ -108,7 +104,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %5-%8: Temp registers
; %9: Pass (1 or 2)
-%macro dodct 9
+%macro DODCT 9
vpsubw %5, %1, %4 ; %5=data1_0-data6_7=tmp6_7
vpaddw %6, %1, %4 ; %6=data1_0+data6_7=tmp1_0
vpaddw %7, %2, %3 ; %7=data3_2+data4_5=tmp3_2
@@ -223,7 +219,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_fdct_islow_avx2)
EXTN(jconst_fdct_islow_avx2):
@@ -242,7 +238,7 @@ PW_DESCALE_P2X times 16 dw 1 << (PASS1_BITS - 1)
PW_1_NEG1 times 8 dw 1
times 8 dw -1
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -262,13 +258,13 @@ PW_1_NEG1 times 8 dw 1
EXTN(jsimd_fdct_islow_avx2):
push ebp
mov ebp, esp
- pushpic ebx
+ PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
@@ -292,9 +288,9 @@ EXTN(jsimd_fdct_islow_avx2):
; ymm2=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
; ymm3=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77)
- dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
+ DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
- dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
+ DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm3=data7_5
; ---- Pass 2: process columns.
@@ -302,9 +298,9 @@ EXTN(jsimd_fdct_islow_avx2):
vperm2i128 ymm4, ymm1, ymm3, 0x20 ; ymm4=data3_7
vperm2i128 ymm1, ymm1, ymm3, 0x31 ; ymm1=data1_5
- dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
+ DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
- dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
+ DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm4=data7_5
vperm2i128 ymm3, ymm0, ymm1, 0x30 ; ymm3=data0_1
@@ -322,7 +318,7 @@ EXTN(jsimd_fdct_islow_avx2):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; unused
- poppic ebx
+ POPPIC ebx
pop ebp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-mmx.asm
index 34a43b9e5ef..7d4c61cd7d3 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-mmx.asm
@@ -2,17 +2,13 @@
; jfdctint.asm - accurate integer FDCT (MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, 2020, D. R. Commander.
+; Copyright (C) 2016, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a slower but more accurate integer implementation of the
; forward DCT (Discrete Cosine Transform). The following code is based
@@ -63,7 +59,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_fdct_islow_mmx)
EXTN(jconst_fdct_islow_mmx):
@@ -80,7 +76,7 @@ PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2 - 1)
PW_DESCALE_P2X times 4 dw 1 << (PASS1_BITS - 1)
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -109,19 +105,19 @@ EXTN(jsimd_fdct_islow_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic ebx
+ PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
mov edx, POINTER [data(eax)] ; (DCTELEM *)
mov ecx, DCTSIZE/4
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@@ -363,7 +359,7 @@ EXTN(jsimd_fdct_islow_mmx):
mov edx, POINTER [data(eax)] ; (DCTELEM *)
mov ecx, DCTSIZE/4
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@@ -609,7 +605,7 @@ EXTN(jsimd_fdct_islow_mmx):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
- poppic ebx
+ POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-sse2.asm
index 6f8e18cb9d0..7ed5c9501ac 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-sse2.asm
@@ -2,17 +2,13 @@
; jfdctint.asm - accurate integer FDCT (SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, 2020, D. R. Commander.
+; Copyright (C) 2016, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a slower but more accurate integer implementation of the
; forward DCT (Discrete Cosine Transform). The following code is based
@@ -63,7 +59,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_fdct_islow_sse2)
EXTN(jconst_fdct_islow_sse2):
@@ -80,7 +76,7 @@ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1)
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -110,13 +106,13 @@ EXTN(jsimd_fdct_islow_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic ebx
+ PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
@@ -622,7 +618,7 @@ EXTN(jsimd_fdct_islow_sse2):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; unused
- poppic ebx
+ POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-3dn.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-3dn.asm
index 87951910d8e..8612eee3a5f 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-3dn.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-3dn.asm
@@ -2,17 +2,13 @@
; jidctflt.asm - floating-point IDCT (3DNow! & MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a floating-point implementation of the inverse DCT
; (Discrete Cosine Transform). The following code is based directly on
@@ -24,7 +20,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_idct_float_3dnow)
EXTN(jconst_idct_float_3dnow):
@@ -36,7 +32,7 @@ PD_2_613 times 2 dd 2.613125929752753055713286
PD_RNDINT_MAGIC times 2 dd 100663296.0 ; (float)(0x00C00000 << 3)
PB_CENTERJSAMP times 8 db CENTERJSAMPLE
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -78,7 +74,7 @@ EXTN(jsimd_idct_float_3dnow):
push esi
push edi
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@@ -87,21 +83,21 @@ EXTN(jsimd_idct_float_3dnow):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; FAST_FLOAT *wsptr
mov ecx, DCTSIZE/2 ; ctr
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz short .columnDCT
- pushpic ebx ; save GOT address
+ PUSHPIC ebx ; save GOT address
mov ebx, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
mov eax, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
or ebx, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
or ebx, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
or eax, ebx
- poppic ebx ; restore GOT address
+ POPPIC ebx ; restore GOT address
jnz short .columnDCT
; -- AC terms all zero
@@ -127,7 +123,7 @@ EXTN(jsimd_idct_float_3dnow):
movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm1
movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1
jmp near .nextcolumn
- alignx 16, 7
+ ALIGNX 16, 7
%endif
.columnDCT:
@@ -293,7 +289,7 @@ EXTN(jsimd_idct_float_3dnow):
mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
mov eax, JDIMENSION [output_col(eax)]
mov ecx, DCTSIZE/2 ; ctr
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
; -- Even part
@@ -420,14 +416,14 @@ EXTN(jsimd_idct_float_3dnow):
punpckldq mm6, mm4 ; mm6=(00 01 02 03 04 05 06 07)
punpckhdq mm7, mm4 ; mm7=(10 11 12 13 14 15 16 17)
- pushpic ebx ; save GOT address
+ PUSHPIC ebx ; save GOT address
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6
movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7
- poppic ebx ; restore GOT address
+ POPPIC ebx ; restore GOT address
add esi, byte 2*SIZEOF_FAST_FLOAT ; wsptr
add edi, byte 2*SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse.asm
index b27ecfdf46a..caf636b5106 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse.asm
@@ -2,17 +2,13 @@
; jidctflt.asm - floating-point IDCT (SSE & MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a floating-point implementation of the inverse DCT
; (Discrete Cosine Transform). The following code is based directly on
@@ -23,18 +19,18 @@
; --------------------------------------------------------------------------
-%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+%macro UNPCKLPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
shufps %1, %2, 0x44
%endmacro
-%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+%macro UNPCKHPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
shufps %1, %2, 0xEE
%endmacro
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_idct_float_sse)
EXTN(jconst_idct_float_sse):
@@ -46,7 +42,7 @@ PD_M2_613 times 4 dd -2.613125929752753055713286
PD_0_125 times 4 dd 0.125 ; 1/8
PB_CENTERJSAMP times 8 db CENTERJSAMPLE
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -88,7 +84,7 @@ EXTN(jsimd_idct_float_sse):
push esi
push edi
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@@ -97,7 +93,7 @@ EXTN(jsimd_idct_float_sse):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; FAST_FLOAT *wsptr
mov ecx, DCTSIZE/4 ; ctr
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -149,7 +145,7 @@ EXTN(jsimd_idct_float_sse):
movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
jmp near .nextcolumn
- alignx 16, 7
+ ALIGNX 16, 7
%endif
.columnDCT:
@@ -325,11 +321,11 @@ EXTN(jsimd_idct_float_sse):
unpckhps xmm4, xmm0 ; xmm4=(42 52 43 53)
movaps xmm3, xmm6 ; transpose coefficients(phase 2)
- unpcklps2 xmm6, xmm7 ; xmm6=(00 10 20 30)
- unpckhps2 xmm3, xmm7 ; xmm3=(01 11 21 31)
+ UNPCKLPS2 xmm6, xmm7 ; xmm6=(00 10 20 30)
+ UNPCKHPS2 xmm3, xmm7 ; xmm3=(01 11 21 31)
movaps xmm0, xmm1 ; transpose coefficients(phase 2)
- unpcklps2 xmm1, xmm2 ; xmm1=(02 12 22 32)
- unpckhps2 xmm0, xmm2 ; xmm0=(03 13 23 33)
+ UNPCKLPS2 xmm1, xmm2 ; xmm1=(02 12 22 32)
+ UNPCKHPS2 xmm0, xmm2 ; xmm0=(03 13 23 33)
movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71)
movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73)
@@ -340,11 +336,11 @@ EXTN(jsimd_idct_float_sse):
movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
movaps xmm6, xmm5 ; transpose coefficients(phase 2)
- unpcklps2 xmm5, xmm7 ; xmm5=(40 50 60 70)
- unpckhps2 xmm6, xmm7 ; xmm6=(41 51 61 71)
+ UNPCKLPS2 xmm5, xmm7 ; xmm5=(40 50 60 70)
+ UNPCKHPS2 xmm6, xmm7 ; xmm6=(41 51 61 71)
movaps xmm3, xmm4 ; transpose coefficients(phase 2)
- unpcklps2 xmm4, xmm2 ; xmm4=(42 52 62 72)
- unpckhps2 xmm3, xmm2 ; xmm3=(43 53 63 73)
+ UNPCKLPS2 xmm4, xmm2 ; xmm4=(42 52 62 72)
+ UNPCKHPS2 xmm3, xmm2 ; xmm3=(43 53 63 73)
movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
@@ -372,7 +368,7 @@ EXTN(jsimd_idct_float_sse):
mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
mov eax, JDIMENSION [output_col(eax)]
mov ecx, DCTSIZE/4 ; ctr
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
; -- Even part
@@ -536,7 +532,7 @@ EXTN(jsimd_idct_float_sse):
punpckldq mm5, mm6 ; mm5=(20 21 22 23 24 25 26 27)
punpckhdq mm4, mm6 ; mm4=(30 31 32 33 34 35 36 37)
- pushpic ebx ; save GOT address
+ PUSHPIC ebx ; save GOT address
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
@@ -547,7 +543,7 @@ EXTN(jsimd_idct_float_sse):
movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
- poppic ebx ; restore GOT address
+ POPPIC ebx ; restore GOT address
add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr
add edi, byte 4*SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse2.asm
index c646eaef76e..42703a8efd7 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse2.asm
@@ -2,17 +2,13 @@
; jidctflt.asm - floating-point IDCT (SSE & SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a floating-point implementation of the inverse DCT
; (Discrete Cosine Transform). The following code is based directly on
@@ -23,18 +19,18 @@
; --------------------------------------------------------------------------
-%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+%macro UNPCKLPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
shufps %1, %2, 0x44
%endmacro
-%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+%macro UNPCKHPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
shufps %1, %2, 0xEE
%endmacro
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_idct_float_sse2)
EXTN(jconst_idct_float_sse2):
@@ -46,7 +42,7 @@ PD_M2_613 times 4 dd -2.613125929752753055713286
PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -88,7 +84,7 @@ EXTN(jsimd_idct_float_sse2):
push esi
push edi
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@@ -97,7 +93,7 @@ EXTN(jsimd_idct_float_sse2):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; FAST_FLOAT *wsptr
mov ecx, DCTSIZE/4 ; ctr
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -150,7 +146,7 @@ EXTN(jsimd_idct_float_sse2):
movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
jmp near .nextcolumn
- alignx 16, 7
+ ALIGNX 16, 7
%endif
.columnDCT:
@@ -287,11 +283,11 @@ EXTN(jsimd_idct_float_sse2):
unpckhps xmm4, xmm0 ; xmm4=(42 52 43 53)
movaps xmm3, xmm6 ; transpose coefficients(phase 2)
- unpcklps2 xmm6, xmm7 ; xmm6=(00 10 20 30)
- unpckhps2 xmm3, xmm7 ; xmm3=(01 11 21 31)
+ UNPCKLPS2 xmm6, xmm7 ; xmm6=(00 10 20 30)
+ UNPCKHPS2 xmm3, xmm7 ; xmm3=(01 11 21 31)
movaps xmm0, xmm1 ; transpose coefficients(phase 2)
- unpcklps2 xmm1, xmm2 ; xmm1=(02 12 22 32)
- unpckhps2 xmm0, xmm2 ; xmm0=(03 13 23 33)
+ UNPCKLPS2 xmm1, xmm2 ; xmm1=(02 12 22 32)
+ UNPCKHPS2 xmm0, xmm2 ; xmm0=(03 13 23 33)
movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71)
movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73)
@@ -302,11 +298,11 @@ EXTN(jsimd_idct_float_sse2):
movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
movaps xmm6, xmm5 ; transpose coefficients(phase 2)
- unpcklps2 xmm5, xmm7 ; xmm5=(40 50 60 70)
- unpckhps2 xmm6, xmm7 ; xmm6=(41 51 61 71)
+ UNPCKLPS2 xmm5, xmm7 ; xmm5=(40 50 60 70)
+ UNPCKHPS2 xmm6, xmm7 ; xmm6=(41 51 61 71)
movaps xmm3, xmm4 ; transpose coefficients(phase 2)
- unpcklps2 xmm4, xmm2 ; xmm4=(42 52 62 72)
- unpckhps2 xmm3, xmm2 ; xmm3=(43 53 63 73)
+ UNPCKLPS2 xmm4, xmm2 ; xmm4=(42 52 62 72)
+ UNPCKHPS2 xmm3, xmm2 ; xmm3=(43 53 63 73)
movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
@@ -334,7 +330,7 @@ EXTN(jsimd_idct_float_sse2):
mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
mov eax, JDIMENSION [output_col(eax)]
mov ecx, DCTSIZE/4 ; ctr
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
; -- Even part
@@ -464,7 +460,7 @@ EXTN(jsimd_idct_float_sse2):
pshufd xmm5, xmm6, 0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
pshufd xmm3, xmm7, 0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
- pushpic ebx ; save GOT address
+ PUSHPIC ebx ; save GOT address
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov ebx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
@@ -475,7 +471,7 @@ EXTN(jsimd_idct_float_sse2):
movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5
movq XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm3
- poppic ebx ; restore GOT address
+ POPPIC ebx ; restore GOT address
add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr
add edi, byte 4*SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctfst-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctfst-mmx.asm
index 24622d43693..77d4613d23b 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctfst-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctfst-mmx.asm
@@ -2,17 +2,13 @@
; jidctfst.asm - fast integer IDCT (MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a fast, not so accurate integer implementation of
; the inverse DCT (Discrete Cosine Transform). The following code is
@@ -56,7 +52,7 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_idct_ifast_mmx)
EXTN(jconst_idct_ifast_mmx):
@@ -67,7 +63,7 @@ PW_MF1613 times 4 dw -F_1_613 << CONST_SHIFT
PW_F1082 times 4 dw F_1_082 << CONST_SHIFT
PB_CENTERJSAMP times 8 db CENTERJSAMPLE
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -109,7 +105,7 @@ EXTN(jsimd_idct_ifast_mmx):
push esi
push edi
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@@ -118,7 +114,7 @@ EXTN(jsimd_idct_ifast_mmx):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; JCOEF *wsptr
mov ecx, DCTSIZE/4 ; ctr
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -163,7 +159,7 @@ EXTN(jsimd_idct_ifast_mmx):
movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
jmp near .nextcolumn
- alignx 16, 7
+ ALIGNX 16, 7
%endif
.columnDCT:
@@ -326,7 +322,7 @@ EXTN(jsimd_idct_ifast_mmx):
mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
mov eax, JDIMENSION [output_col(eax)]
mov ecx, DCTSIZE/4 ; ctr
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
; -- Even part
@@ -464,7 +460,7 @@ EXTN(jsimd_idct_ifast_mmx):
punpckldq mm5, mm4 ; mm5=(20 21 22 23 24 25 26 27)
punpckhdq mm1, mm4 ; mm1=(30 31 32 33 34 35 36 37)
- pushpic ebx ; save GOT address
+ PUSHPIC ebx ; save GOT address
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
@@ -475,7 +471,7 @@ EXTN(jsimd_idct_ifast_mmx):
movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1
- poppic ebx ; restore GOT address
+ POPPIC ebx ; restore GOT address
add esi, byte 4*SIZEOF_JCOEF ; wsptr
add edi, byte 4*SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctfst-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctfst-sse2.asm
index 19704ffa48f..c2fe34ba8c6 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctfst-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctfst-sse2.asm
@@ -2,17 +2,13 @@
; jidctfst.asm - fast integer IDCT (SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a fast, not so accurate integer implementation of
; the inverse DCT (Discrete Cosine Transform). The following code is
@@ -56,7 +52,7 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_idct_ifast_sse2)
EXTN(jconst_idct_ifast_sse2):
@@ -67,7 +63,7 @@ PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT
PW_F1082 times 8 dw F_1_082 << CONST_SHIFT
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -101,13 +97,13 @@ EXTN(jsimd_idct_ifast_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic ebx
+ PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input.
@@ -155,7 +151,7 @@ EXTN(jsimd_idct_ifast_sse2):
movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=col1
movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=col3
jmp near .column_end
- alignx 16, 7
+ ALIGNX 16, 7
%endif
.columnDCT:
@@ -490,7 +486,7 @@ EXTN(jsimd_idct_ifast_sse2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
- poppic ebx
+ POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctint-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctint-avx2.asm
index 199c7df3b69..cb119d3f06d 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctint-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctint-avx2.asm
@@ -2,17 +2,13 @@
; jidctint.asm - accurate integer IDCT (AVX2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a slower but more accurate integer implementation of the
; inverse DCT (Discrete Cosine Transform). The following code is based
@@ -65,7 +61,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %1-%4: Input/output registers
; %5-%8: Temp registers
-%macro dotranspose 8
+%macro DOTRANSPOSE 8
; %5=(00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71)
; %6=(03 13 23 33 43 53 63 73 02 12 22 32 42 52 62 72)
; %7=(04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75)
@@ -118,7 +114,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %5-%12: Temp registers
; %9: Pass (1 or 2)
-%macro dodct 13
+%macro DODCT 13
; -- Even part
; (Original)
@@ -250,7 +246,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_idct_islow_avx2)
EXTN(jconst_idct_islow_avx2):
@@ -269,7 +265,7 @@ PB_CENTERJSAMP times 32 db CENTERJSAMPLE
PW_1_NEG1 times 8 dw 1
times 8 dw -1
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -303,13 +299,13 @@ EXTN(jsimd_idct_islow_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic ebx
+ PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns.
@@ -353,7 +349,7 @@ EXTN(jsimd_idct_islow_avx2):
vpshufd ymm3, ymm4, 0xFF ; ymm3=col3_7=(03 03 03 03 03 03 03 03 07 07 07 07 07 07 07 07)
jmp near .column_end
- alignx 16, 7
+ ALIGNX 16, 7
%endif
.columnDCT:
@@ -371,10 +367,10 @@ EXTN(jsimd_idct_islow_avx2):
vperm2i128 ymm2, ymm5, ymm7, 0x20 ; ymm2=in2_6
vperm2i128 ymm3, ymm7, ymm6, 0x31 ; ymm3=in7_5
- dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 1
+ DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 1
; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm3=data7_6
- dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
+ DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm3=data3_7
.column_end:
@@ -395,10 +391,10 @@ EXTN(jsimd_idct_islow_avx2):
vperm2i128 ymm4, ymm3, ymm1, 0x31 ; ymm3=in7_5
vperm2i128 ymm1, ymm3, ymm1, 0x20 ; ymm1=in3_1
- dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 2
+ DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 2
; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm4=data7_6
- dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
+ DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm4=data3_7
vpacksswb ymm0, ymm0, ymm1 ; ymm0=data01_45
@@ -442,7 +438,7 @@ EXTN(jsimd_idct_islow_avx2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
- poppic ebx
+ POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctint-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctint-mmx.asm
index f15c8d34bcb..c2c17f441b8 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctint-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctint-mmx.asm
@@ -2,17 +2,13 @@
; jidctint.asm - accurate integer IDCT (MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, 2020, D. R. Commander.
+; Copyright (C) 2016, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a slower but more accurate integer implementation of the
; inverse DCT (Discrete Cosine Transform). The following code is based
@@ -63,7 +59,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_idct_islow_mmx)
EXTN(jconst_idct_islow_mmx):
@@ -80,7 +76,7 @@ PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2 - 1)
PB_CENTERJSAMP times 8 db CENTERJSAMPLE
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -122,7 +118,7 @@ EXTN(jsimd_idct_islow_mmx):
push esi
push edi
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@@ -131,7 +127,7 @@ EXTN(jsimd_idct_islow_mmx):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; JCOEF *wsptr
mov ecx, DCTSIZE/4 ; ctr
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -178,7 +174,7 @@ EXTN(jsimd_idct_islow_mmx):
movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
jmp near .nextcolumn
- alignx 16, 7
+ ALIGNX 16, 7
%endif
.columnDCT:
@@ -513,7 +509,7 @@ EXTN(jsimd_idct_islow_mmx):
mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
mov eax, JDIMENSION [output_col(eax)]
mov ecx, DCTSIZE/4 ; ctr
- alignx 16, 7
+ ALIGNX 16, 7
.rowloop:
; -- Even part
@@ -816,7 +812,7 @@ EXTN(jsimd_idct_islow_mmx):
punpckldq mm7, mm5 ; mm7=(20 21 22 23 24 25 26 27)
punpckhdq mm4, mm5 ; mm4=(30 31 32 33 34 35 36 37)
- pushpic ebx ; save GOT address
+ PUSHPIC ebx ; save GOT address
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
@@ -827,7 +823,7 @@ EXTN(jsimd_idct_islow_mmx):
movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm7
movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
- poppic ebx ; restore GOT address
+ POPPIC ebx ; restore GOT address
add esi, byte 4*SIZEOF_JCOEF ; wsptr
add edi, byte 4*SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctint-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctint-sse2.asm
index 43e320189b4..70516cadcef 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctint-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctint-sse2.asm
@@ -2,17 +2,13 @@
; jidctint.asm - accurate integer IDCT (SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, 2020, D. R. Commander.
+; Copyright (C) 2016, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a slower but more accurate integer implementation of the
; inverse DCT (Discrete Cosine Transform). The following code is based
@@ -63,7 +59,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_idct_islow_sse2)
EXTN(jconst_idct_islow_sse2):
@@ -80,7 +76,7 @@ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -114,13 +110,13 @@ EXTN(jsimd_idct_islow_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic ebx
+ PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input.
@@ -172,7 +168,7 @@ EXTN(jsimd_idct_islow_sse2):
movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5
movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7
jmp near .column_end
- alignx 16, 7
+ ALIGNX 16, 7
%endif
.columnDCT:
@@ -847,7 +843,7 @@ EXTN(jsimd_idct_islow_sse2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
- poppic ebx
+ POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctred-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctred-mmx.asm
index e2307e1cb6c..96cda657133 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctred-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctred-mmx.asm
@@ -2,17 +2,13 @@
; jidctred.asm - reduced-size IDCT (MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains inverse-DCT routines that produce reduced-size
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
@@ -69,7 +65,7 @@ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS) ; FIX(3.624509785)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_idct_red_mmx)
EXTN(jconst_idct_red_mmx):
@@ -87,7 +83,7 @@ PD_DESCALE_P1_2 times 2 dd 1 << (DESCALE_P1_2 - 1)
PD_DESCALE_P2_2 times 2 dd 1 << (DESCALE_P2_2 - 1)
PB_CENTERJSAMP times 8 db CENTERJSAMPLE
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -124,13 +120,13 @@ EXTN(jsimd_idct_4x4_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [workspace]
- pushpic ebx
+ PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@@ -139,7 +135,7 @@ EXTN(jsimd_idct_4x4_mmx):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; JCOEF *wsptr
mov ecx, DCTSIZE/4 ; ctr
- alignx 16, 7
+ ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_4X4_MMX
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -181,7 +177,7 @@ EXTN(jsimd_idct_4x4_mmx):
movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
jmp near .nextcolumn
- alignx 16, 7
+ ALIGNX 16, 7
%endif
.columnDCT:
@@ -479,7 +475,7 @@ EXTN(jsimd_idct_4x4_mmx):
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
- poppic ebx
+ POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
@@ -512,7 +508,7 @@ EXTN(jsimd_idct_2x2_mmx):
push esi
push edi
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input.
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctred-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctred-sse2.asm
index 6e56494e975..1fe967db199 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctred-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctred-sse2.asm
@@ -2,17 +2,13 @@
; jidctred.asm - reduced-size IDCT (SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains inverse-DCT routines that produce reduced-size
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
@@ -69,7 +65,7 @@ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS) ; FIX(3.624509785)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_idct_red_sse2)
EXTN(jconst_idct_red_sse2):
@@ -87,7 +83,7 @@ PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2 - 1)
PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2 - 1)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -122,13 +118,13 @@ EXTN(jsimd_idct_4x4_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
- pushpic ebx
+ PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input.
@@ -171,7 +167,7 @@ EXTN(jsimd_idct_4x4_sse2):
pshufd xmm3, xmm3, 0xFA ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07)
jmp near .column_end
- alignx 16, 7
+ ALIGNX 16, 7
%endif
.columnDCT:
@@ -400,7 +396,7 @@ EXTN(jsimd_idct_4x4_sse2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
- poppic ebx
+ POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
@@ -433,7 +429,7 @@ EXTN(jsimd_idct_2x2_sse2):
push esi
push edi
- get_GOT ebx ; get GOT address
+ GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input.
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jquant-3dn.asm b/contrib/libs/libjpeg-turbo/simd/i386/jquant-3dn.asm
index 5cb60caa947..58e0011f70e 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jquant-3dn.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jquant-3dn.asm
@@ -2,17 +2,13 @@
; jquant.asm - sample data conversion and quantization (3DNow! & MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
%include "jdct.inc"
@@ -52,7 +48,7 @@ EXTN(jsimd_convsamp_float_3dnow):
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/2
- alignx 16, 7
+ ALIGNX 16, 7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
@@ -154,7 +150,7 @@ EXTN(jsimd_quantize_float_3dnow):
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov eax, DCTSIZE2/16
- alignx 16, 7
+ ALIGNX 16, 7
.quantloop:
movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
movq mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jquant-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jquant-mmx.asm
index 61305c625de..4eda95ce12f 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jquant-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jquant-mmx.asm
@@ -2,17 +2,13 @@
; jquant.asm - sample data conversion and quantization (MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
%include "jdct.inc"
@@ -52,7 +48,7 @@ EXTN(jsimd_convsamp_mmx):
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/4
- alignx 16, 7
+ ALIGNX 16, 7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
@@ -120,8 +116,8 @@ EXTN(jsimd_convsamp_mmx):
; Quantize/descale the coefficients, and store into coef_block
;
; This implementation is based on an algorithm described in
-; "How to optimize for the Pentium family of microprocessors"
-; (http://www.agner.org/assem/).
+; "Optimizing subroutines in assembly language:
+; An optimization guide for x86 platforms" (https://agner.org/optimize).
;
; GLOBAL(void)
; jsimd_quantize_mmx(JCOEFPTR coef_block, DCTELEM *divisors,
@@ -157,10 +153,10 @@ EXTN(jsimd_quantize_mmx):
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov ah, 2
- alignx 16, 7
+ ALIGNX 16, 7
.quantloop1:
mov al, DCTSIZE2/8/2
- alignx 16, 7
+ ALIGNX 16, 7
.quantloop2:
movq mm2, MMWORD [MMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
movq mm3, MMWORD [MMBLOCK(0,1,esi,SIZEOF_DCTELEM)]
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jquant-sse.asm b/contrib/libs/libjpeg-turbo/simd/i386/jquant-sse.asm
index 218adc976f3..6cb5f79c215 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jquant-sse.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jquant-sse.asm
@@ -2,17 +2,13 @@
; jquant.asm - sample data conversion and quantization (SSE & MMX)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
%include "jdct.inc"
@@ -52,7 +48,7 @@ EXTN(jsimd_convsamp_float_sse):
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/2
- alignx 16, 7
+ ALIGNX 16, 7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
@@ -150,7 +146,7 @@ EXTN(jsimd_quantize_float_sse):
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov eax, DCTSIZE2/16
- alignx 16, 7
+ ALIGNX 16, 7
.quantloop:
movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jquantf-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jquantf-sse2.asm
index a881ab50f92..5668f8cb396 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jquantf-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jquantf-sse2.asm
@@ -2,17 +2,13 @@
; jquantf.asm - sample data conversion and quantization (SSE & SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
%include "jdct.inc"
@@ -52,7 +48,7 @@ EXTN(jsimd_convsamp_float_sse2):
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/2
- alignx 16, 7
+ ALIGNX 16, 7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
@@ -127,7 +123,7 @@ EXTN(jsimd_quantize_float_sse2):
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov eax, DCTSIZE2/16
- alignx 16, 7
+ ALIGNX 16, 7
.quantloop:
movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jquanti-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jquanti-avx2.asm
index 5ed6bec246c..60ae098e9c4 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jquanti-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jquanti-avx2.asm
@@ -2,18 +2,14 @@
; jquanti.asm - sample data conversion and quantization (AVX2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, 2018, D. R. Commander.
+; Copyright (C) 2016, 2018, 2024, D. R. Commander.
; Copyright (C) 2016, Matthieu Darbois.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
%include "jdct.inc"
@@ -107,8 +103,8 @@ EXTN(jsimd_convsamp_avx2):
; Quantize/descale the coefficients, and store into coef_block
;
; This implementation is based on an algorithm described in
-; "How to optimize for the Pentium family of microprocessors"
-; (http://www.agner.org/assem/).
+; "Optimizing subroutines in assembly language:
+; An optimization guide for x86 platforms" (https://agner.org/optimize).
;
; GLOBAL(void)
; jsimd_quantize_avx2(JCOEFPTR coef_block, DCTELEM *divisors,
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jquanti-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jquanti-sse2.asm
index 0a509408aa1..c1edde996e9 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jquanti-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jquanti-sse2.asm
@@ -2,17 +2,13 @@
; jquanti.asm - sample data conversion and quantization (SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
%include "jdct.inc"
@@ -52,7 +48,7 @@ EXTN(jsimd_convsamp_sse2):
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/4
- alignx 16, 7
+ ALIGNX 16, 7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
@@ -98,8 +94,8 @@ EXTN(jsimd_convsamp_sse2):
; Quantize/descale the coefficients, and store into coef_block
;
; This implementation is based on an algorithm described in
-; "How to optimize for the Pentium family of microprocessors"
-; (http://www.agner.org/assem/).
+; "Optimizing subroutines in assembly language:
+; An optimization guide for x86 platforms" (https://agner.org/optimize).
;
; GLOBAL(void)
; jsimd_quantize_sse2(JCOEFPTR coef_block, DCTELEM *divisors,
@@ -133,7 +129,7 @@ EXTN(jsimd_quantize_sse2):
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov eax, DCTSIZE2/32
- alignx 16, 7
+ ALIGNX 16, 7
.quantloop:
movdqa xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
movdqa xmm5, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_DCTELEM)]
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jsimd.c b/contrib/libs/libjpeg-turbo/simd/i386/jsimd.c
index 80bc821ff4e..d4786b155b7 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jsimd.c
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jsimd.c
@@ -2,8 +2,8 @@
* jsimd_i386.c
*
* Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022-2024, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -15,13 +15,12 @@
*/
#define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
#include "../jsimd.h"
-#include "jconfigint.h"
/*
* In the PIC cases, we have no guarantee that constants will keep
@@ -32,13 +31,11 @@
#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
#define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
-static unsigned int simd_support = (unsigned int)(~0);
-static unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
+static THREAD_LOCAL unsigned int simd_huffman = 1;
/*
* Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
*/
LOCAL(void)
init_simd(void)
@@ -161,6 +158,9 @@ jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+ if (simd_support == ~0U)
+ init_simd();
+
switch (cinfo->in_color_space) {
case JCS_EXT_RGB:
avx2fct = jsimd_extrgb_ycc_convert_avx2;
@@ -220,6 +220,9 @@ jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+ if (simd_support == ~0U)
+ init_simd();
+
switch (cinfo->in_color_space) {
case JCS_EXT_RGB:
avx2fct = jsimd_extrgb_gray_convert_avx2;
@@ -279,6 +282,9 @@ jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
+ if (simd_support == ~0U)
+ init_simd();
+
switch (cinfo->out_color_space) {
case JCS_EXT_RGB:
avx2fct = jsimd_ycc_extrgb_convert_avx2;
@@ -382,6 +388,9 @@ GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY output_data)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
compptr->v_samp_factor,
@@ -402,6 +411,9 @@ GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY output_data)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
compptr->v_samp_factor,
@@ -464,6 +476,9 @@ GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
input_data, output_data_ptr);
@@ -479,6 +494,9 @@ GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
input_data, output_data_ptr);
@@ -540,6 +558,9 @@ GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
compptr->downsampled_width, input_data,
@@ -558,6 +579,9 @@ GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
compptr->downsampled_width, input_data,
@@ -626,6 +650,9 @@ jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
+ if (simd_support == ~0U)
+ init_simd();
+
switch (cinfo->out_color_space) {
case JCS_EXT_RGB:
avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
@@ -684,6 +711,9 @@ jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
+ if (simd_support == ~0U)
+ init_simd();
+
switch (cinfo->out_color_space) {
case JCS_EXT_RGB:
avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
@@ -788,6 +818,9 @@ GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
DCTELEM *workspace)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_convsamp_avx2(sample_data, start_col, workspace);
else if (simd_support & JSIMD_SSE2)
@@ -800,6 +833,9 @@ GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
FAST_FLOAT *workspace)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_SSE2)
jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
else if (simd_support & JSIMD_SSE)
@@ -870,6 +906,9 @@ jsimd_can_fdct_float(void)
GLOBAL(void)
jsimd_fdct_islow(DCTELEM *data)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_fdct_islow_avx2(data);
else if (simd_support & JSIMD_SSE2)
@@ -881,6 +920,9 @@ jsimd_fdct_islow(DCTELEM *data)
GLOBAL(void)
jsimd_fdct_ifast(DCTELEM *data)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
jsimd_fdct_ifast_sse2(data);
else
@@ -890,6 +932,9 @@ jsimd_fdct_ifast(DCTELEM *data)
GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT *data)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
jsimd_fdct_float_sse(data);
else if (simd_support & JSIMD_3DNOW)
@@ -945,6 +990,9 @@ jsimd_can_quantize_float(void)
GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_quantize_avx2(coef_block, divisors, workspace);
else if (simd_support & JSIMD_SSE2)
@@ -957,6 +1005,9 @@ GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
FAST_FLOAT *workspace)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_SSE2)
jsimd_quantize_float_sse2(coef_block, divisors, workspace);
else if (simd_support & JSIMD_SSE)
@@ -1020,6 +1071,9 @@ jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
output_col);
@@ -1032,6 +1086,9 @@ jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
output_col);
@@ -1126,6 +1183,9 @@ jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
output_col);
@@ -1142,6 +1202,9 @@ jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
output_col);
@@ -1155,6 +1218,9 @@ jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
output_col);
@@ -1212,7 +1278,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
- int Al, JCOEF *values, size_t *zerobits)
+ int Al, UJCOEF *values, size_t *zerobits)
{
jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
Sl, Al, values, zerobits);
@@ -1238,7 +1304,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
- int Al, JCOEF *absvalues, size_t *bits)
+ int Al, UJCOEF *absvalues, size_t *bits)
{
return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
jpeg_natural_order_start,
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jsimdcpu.asm b/contrib/libs/libjpeg-turbo/simd/i386/jsimdcpu.asm
index ddcafa9e213..df80f17f5fa 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jsimdcpu.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jsimdcpu.asm
@@ -8,11 +8,7 @@
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
diff --git a/contrib/libs/libjpeg-turbo/simd/jsimd.h b/contrib/libs/libjpeg-turbo/simd/jsimd.h
index 64747c6360c..a28754adb9d 100644
--- a/contrib/libs/libjpeg-turbo/simd/jsimd.h
+++ b/contrib/libs/libjpeg-turbo/simd/jsimd.h
@@ -2,10 +2,10 @@
* simd/jsimd.h
*
* Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
- * Copyright (C) 2011, 2014-2016, 2018, 2020, D. R. Commander.
+ * Copyright (C) 2011, 2014-2016, 2018, 2020, 2022, D. R. Commander.
* Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
* Copyright (C) 2014, Linaro Limited.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
* Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
* Copyright (C) 2020, Arm Limited.
*
@@ -1243,16 +1243,16 @@ EXTERN(JOCTET *) jsimd_huff_encode_one_block_neon_slowtbl
/* Progressive Huffman encoding */
EXTERN(void) jsimd_encode_mcu_AC_first_prepare_sse2
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
- JCOEF *values, size_t *zerobits);
+ UJCOEF *values, size_t *zerobits);
EXTERN(void) jsimd_encode_mcu_AC_first_prepare_neon
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
- JCOEF *values, size_t *zerobits);
+ UJCOEF *values, size_t *zerobits);
EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_sse2
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
- JCOEF *absvalues, size_t *bits);
+ UJCOEF *absvalues, size_t *bits);
EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_neon
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
- JCOEF *absvalues, size_t *bits);
+ UJCOEF *absvalues, size_t *bits);
diff --git a/contrib/libs/libjpeg-turbo/simd/nasm/jsimdcfg.inc.h b/contrib/libs/libjpeg-turbo/simd/nasm/jsimdcfg.inc.h
index bf2a45ad50c..ed3f9c2a693 100644
--- a/contrib/libs/libjpeg-turbo/simd/nasm/jsimdcfg.inc.h
+++ b/contrib/libs/libjpeg-turbo/simd/nasm/jsimdcfg.inc.h
@@ -12,9 +12,9 @@
#define JPEG_INTERNALS
-#include "../jpeglib.h"
+#include "../src/jpeglib.h"
#include "../jconfig.h"
-#include "../jmorecfg.h"
+#include "../src/jmorecfg.h"
#include "jsimd.h"
;
diff --git a/contrib/libs/libjpeg-turbo/simd/nasm/jsimdext.inc b/contrib/libs/libjpeg-turbo/simd/nasm/jsimdext.inc
index d8a50ed8e23..674dfb6464c 100644
--- a/contrib/libs/libjpeg-turbo/simd/nasm/jsimdext.inc
+++ b/contrib/libs/libjpeg-turbo/simd/nasm/jsimdext.inc
@@ -2,9 +2,10 @@
; jsimdext.inc - common declarations
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2010, 2016, 2018-2019, D. R. Commander.
+; Copyright (C) 2010, 2016, 2018-2019, 2024, D. R. Commander.
; Copyright (C) 2018, Matthieu Darbois.
; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library - version 1.02
;
@@ -75,6 +76,14 @@
; mark stack as non-executable
section .note.GNU-stack noalloc noexec nowrite progbits
+%ifdef __CET__
+%ifdef __x86_64__
+section .note.gnu.property note alloc noexec align=8
+ dd 0x00000004, 0x00000010, 0x00000005, 0x00554e47
+ dd 0xc0000002, 0x00000004, 0x00000003, 0x00000000
+%endif
+%endif
+
; -- segment definition --
;
%ifdef _x86_64_
@@ -271,7 +280,7 @@ const_base:
%define GOTOFF(got, sym) (got) + (sym) - const_base
-%imacro get_GOT 1
+%imacro GET_GOT 1
; NOTE: this macro destroys ecx resister.
call %%geteip
add ecx, byte (%%ref - $)
@@ -303,7 +312,7 @@ const_base:
%define GOTOFF(got, sym) (got) + (sym) wrt ..gotoff
-%imacro get_GOT 1
+%imacro GET_GOT 1
extern GOT_SYMBOL
call %%geteip
add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
@@ -316,13 +325,13 @@ const_base:
%endif ; GOT_SYMBOL == _MACHO_PIC_ ----------------
-%imacro pushpic 1.nolist
+%imacro PUSHPIC 1.nolist
push %1
%endmacro
-%imacro poppic 1.nolist
+%imacro POPPIC 1.nolist
pop %1
%endmacro
-%imacro movpic 2.nolist
+%imacro MOVPIC 2.nolist
mov %1, %2
%endmacro
@@ -330,13 +339,13 @@ const_base:
%define GOTOFF(got, sym) (sym)
-%imacro get_GOT 1.nolist
+%imacro GET_GOT 1.nolist
%endmacro
-%imacro pushpic 1.nolist
+%imacro PUSHPIC 1.nolist
%endmacro
-%imacro poppic 1.nolist
+%imacro POPPIC 1.nolist
%endmacro
-%imacro movpic 2.nolist
+%imacro MOVPIC 2.nolist
%endmacro
%endif ; PIC -----------------------------------------
@@ -348,7 +357,7 @@ const_base:
%define MSKLE(x, y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
%define FILLB(b, n) (($$-(b)) & ((n)-1))
-%imacro alignx 1-2.nolist 0xFFFF
+%imacro ALIGNX 1-2.nolist 0xFFFF
%%bs: \
times MSKLE(FILLB(%%bs, %1), %2) & MSKLE(16, FILLB($, %1)) & FILLB($, %1) \
db 0x90 ; nop
@@ -370,7 +379,7 @@ const_base:
; Align the next data on {2,4,8,16,..}-byte boundary.
;
-%imacro alignz 1.nolist
+%imacro ALIGNZ 1.nolist
align %1, db 0 ; filling zeros
%endmacro
@@ -378,7 +387,7 @@ const_base:
%ifdef WIN64
-%imacro collect_args 1
+%imacro COLLECT_ARGS 1
sub rsp, SIZEOF_XMMWORD
movaps XMMWORD [rsp], xmm6
sub rsp, SIZEOF_XMMWORD
@@ -397,17 +406,17 @@ const_base:
%endif
%if %1 > 4
push r14
- mov r14, [rax+48]
+ mov r14, [rbp+48]
%endif
%if %1 > 5
push r15
- mov r15, [rax+56]
+ mov r15, [rbp+56]
%endif
push rsi
push rdi
%endmacro
-%imacro uncollect_args 1
+%imacro UNCOLLECT_ARGS 1
pop rdi
pop rsi
%if %1 > 5
@@ -428,7 +437,7 @@ const_base:
add rsp, SIZEOF_XMMWORD
%endmacro
-%imacro push_xmm 1
+%imacro PUSH_XMM 1
sub rsp, %1 * SIZEOF_XMMWORD
movaps XMMWORD [rsp+0*SIZEOF_XMMWORD], xmm8
%if %1 > 1
@@ -442,7 +451,7 @@ const_base:
%endif
%endmacro
-%imacro pop_xmm 1
+%imacro POP_XMM 1
movaps xmm8, XMMWORD [rsp+0*SIZEOF_XMMWORD]
%if %1 > 1
movaps xmm9, XMMWORD [rsp+1*SIZEOF_XMMWORD]
@@ -458,7 +467,7 @@ const_base:
%else
-%imacro collect_args 1
+%imacro COLLECT_ARGS 1
push r10
mov r10, rdi
%if %1 > 1
@@ -483,7 +492,7 @@ const_base:
%endif
%endmacro
-%imacro uncollect_args 1
+%imacro UNCOLLECT_ARGS 1
%if %1 > 5
pop r15
%endif
@@ -502,16 +511,29 @@ const_base:
pop r10
%endmacro
-%imacro push_xmm 1
+%imacro PUSH_XMM 1
%endmacro
-%imacro pop_xmm 1
+%imacro POP_XMM 1
%endmacro
%endif
%endif
+%ifdef __CET__
+
+%imacro ENDBR64 0
+ dd 0xfa1e0ff3
+%endmacro
+
+%else
+
+%imacro ENDBR64 0
+%endmacro
+
+%endif
+
; --------------------------------------------------------------------------
; Defines picked up from the C headers
;
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-avx2.asm
index ffb527db00e..aeeda0a682f 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-avx2.asm
@@ -1,19 +1,16 @@
;
; jccolext.asm - colorspace conversion (64-bit AVX2)
;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -33,21 +30,22 @@
; r13d = JDIMENSION output_row
; r14d = int num_rows
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
%define WK_NUM 8
align 32
GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2)
EXTN(jsimd_rgb_ycc_convert_avx2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
+ push r15
and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [wk(0)]
- collect_args 5
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
+ sub rsp, (SIZEOF_YMMWORD * WK_NUM)
+ COLLECT_ARGS 5
push rbx
mov ecx, r10d
@@ -548,9 +546,9 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
.return:
pop rbx
vzeroupper
- uncollect_args 5
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 5
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-sse2.asm
index af70ed6010f..f3a1244903b 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-sse2.asm
@@ -1,18 +1,15 @@
;
; jccolext.asm - colorspace conversion (64-bit SSE2)
;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -32,21 +29,22 @@
; r13d = JDIMENSION output_row
; r14d = int num_rows
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
%define WK_NUM 8
align 32
GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2)
EXTN(jsimd_rgb_ycc_convert_sse2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
+ push r15
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [wk(0)]
- collect_args 5
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
+ sub rsp, (SIZEOF_XMMWORD * WK_NUM)
+ COLLECT_ARGS 5
push rbx
mov ecx, r10d
@@ -473,9 +471,9 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
.return:
pop rbx
- uncollect_args 5
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 5
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-avx2.asm
index 16b78298dc4..e2628917336 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-avx2.asm
@@ -1,18 +1,14 @@
;
; jccolor.asm - colorspace conversion (64-bit AVX2)
;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -33,7 +29,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_rgb_ycc_convert_avx2)
EXTN(jconst_rgb_ycc_convert_avx2):
@@ -46,7 +42,7 @@ PD_ONEHALFM1_CJ times 8 dd (1 << (SCALEBITS - 1)) - 1 + \
(CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1))
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-sse2.asm
index e2955c21340..cc9edb4cebc 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-sse2.asm
@@ -1,17 +1,13 @@
;
; jccolor.asm - colorspace conversion (64-bit SSE2)
;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -32,7 +28,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_rgb_ycc_convert_sse2)
EXTN(jconst_rgb_ycc_convert_sse2):
@@ -45,7 +41,7 @@ PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS - 1)) - 1 + \
(CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1))
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-avx2.asm
index 591255bb112..267ec5142a4 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-avx2.asm
@@ -1,18 +1,14 @@
;
; jcgray.asm - grayscale colorspace conversion (64-bit AVX2)
;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -29,7 +25,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_rgb_gray_convert_avx2)
EXTN(jconst_rgb_gray_convert_avx2):
@@ -38,7 +34,7 @@ PW_F0299_F0337 times 8 dw F_0_299, F_0_337
PW_F0114_F0250 times 8 dw F_0_114, F_0_250
PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1))
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-sse2.asm
index e389904f2f8..4b94d7b8a28 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-sse2.asm
@@ -1,17 +1,13 @@
;
; jcgray.asm - grayscale colorspace conversion (64-bit SSE2)
;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -28,7 +24,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_rgb_gray_convert_sse2)
EXTN(jconst_rgb_gray_convert_sse2):
@@ -37,7 +33,7 @@ PW_F0299_F0337 times 4 dw F_0_299, F_0_337
PW_F0114_F0250 times 4 dw F_0_114, F_0_250
PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1))
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-avx2.asm
index ddcc2c0a2fe..77e85f768f9 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-avx2.asm
@@ -1,19 +1,16 @@
;
; jcgryext.asm - grayscale colorspace conversion (64-bit AVX2)
;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -33,21 +30,22 @@
; r13d = JDIMENSION output_row
; r14d = int num_rows
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
%define WK_NUM 2
align 32
GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2)
EXTN(jsimd_rgb_gray_convert_avx2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
+ push r15
and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [wk(0)]
- collect_args 5
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
+ sub rsp, byte (SIZEOF_YMMWORD * WK_NUM)
+ COLLECT_ARGS 5
push rbx
mov ecx, r10d
@@ -427,9 +425,9 @@ EXTN(jsimd_rgb_gray_convert_avx2):
.return:
pop rbx
vzeroupper
- uncollect_args 5
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 5
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-sse2.asm
index f1d399a63b8..3e8087c39bc 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-sse2.asm
@@ -1,18 +1,15 @@
;
; jcgryext.asm - grayscale colorspace conversion (64-bit SSE2)
;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -32,21 +29,22 @@
; r13d = JDIMENSION output_row
; r14d = int num_rows
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
%define WK_NUM 2
align 32
GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2)
EXTN(jsimd_rgb_gray_convert_sse2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
+ push r15
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [wk(0)]
- collect_args 5
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
+ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+ COLLECT_ARGS 5
push rbx
mov ecx, r10d
@@ -352,9 +350,9 @@ EXTN(jsimd_rgb_gray_convert_sse2):
.return:
pop rbx
- uncollect_args 5
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 5
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jchuff-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jchuff-sse2.asm
index 9ea6df946ef..b18b7f5d651 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jchuff-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jchuff-sse2.asm
@@ -1,19 +1,16 @@
;
; jchuff-sse2.asm - Huffman entropy encoding (64-bit SSE2)
;
-; Copyright (C) 2009-2011, 2014-2016, 2019, 2021, D. R. Commander.
+; Copyright (C) 2009-2011, 2014-2016, 2019, 2021, 2023-2024, D. R. Commander.
; Copyright (C) 2015, Matthieu Darbois.
; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains an SSE2 implementation for Huffman coding of one block.
; The following code is based on jchuff.c; see jchuff.c for more details.
@@ -38,7 +35,7 @@ endstruc
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_huff_encode_one_block)
EXTN(jconst_huff_encode_one_block):
@@ -48,7 +45,7 @@ jpeg_mask_bits dd 0x0000, 0x0001, 0x0003, 0x0007
dd 0x00ff, 0x01ff, 0x03ff, 0x07ff
dd 0x0fff, 0x1fff, 0x3fff, 0x7fff
- alignz 32
+ ALIGNZ 32
times 1 << 14 db 15
times 1 << 13 db 14
@@ -66,7 +63,8 @@ times 1 << 2 db 3
times 1 << 1 db 2
times 1 << 0 db 1
times 1 db 0
-jpeg_nbits_table:
+GLOBAL_DATA(jpeg_nbits_table)
+EXTN(jpeg_nbits_table):
times 1 db 0
times 1 << 0 db 1
times 1 << 1 db 2
@@ -85,10 +83,10 @@ times 1 << 13 db 14
times 1 << 14 db 15
times 1 << 15 db 16
- alignz 32
+ ALIGNZ 32
%define NBITS(x) nbits_base + x
-%define MASK_BITS(x) NBITS((x) * 4) + (jpeg_mask_bits - jpeg_nbits_table)
+%define MASK_BITS(x) NBITS((x) * 4) + (jpeg_mask_bits - EXTN(jpeg_nbits_table))
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -208,15 +206,15 @@ times 1 << 15 db 16
; rax - buffer
; rbx - temp
; rcx - nbits
-; rdx - block --> free_bits
+; rdx - code
; rsi - nbits_base
; rdi - t
-; rbp - code
; r8 - dctbl --> code_temp
; r9 - actbl
; r10 - state
; r11 - index
; r12 - put_buffer
+; r15 - block --> free_bits
%define buffer rax
%ifdef WIN64
@@ -231,12 +229,11 @@ times 1 << 15 db 16
%define nbitsq rcx
%define nbits ecx
%define nbitsb cl
-%define block rdx
+%define codeq rdx
+%define code edx
%define nbits_base rsi
%define t rdi
%define td edi
-%define codeq rbp
-%define code ebp
%define dctbl r8
%define actbl r9
%define state r10
@@ -244,6 +241,7 @@ times 1 << 15 db 16
%define indexd r11d
%define put_buffer r12
%define put_bufferd r12d
+%define block r15
; Step 1: Re-arrange input data according to jpeg_natural_order
; xx 01 02 03 04 05 06 07 xx 01 08 16 09 02 03 10
@@ -259,6 +257,9 @@ times 1 << 15 db 16
GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2)
EXTN(jsimd_huff_encode_one_block_sse2):
+ ENDBR64
+ push rbp
+ mov rbp, rsp
%ifdef WIN64
@@ -266,15 +267,15 @@ EXTN(jsimd_huff_encode_one_block_sse2):
; rdx = JOCTET *buffer
; r8 = JCOEFPTR block
; r9 = int last_dc_val
-; [rax+48] = c_derived_tbl *dctbl
-; [rax+56] = c_derived_tbl *actbl
+; [rbp+48] = c_derived_tbl *dctbl
+; [rbp+56] = c_derived_tbl *actbl
;X: X = code stream
mov buffer, rdx
+ push r15
mov block, r8
movups xmm3, XMMWORD [block + 0 * SIZEOF_WORD] ;D: w3 = xx 01 02 03 04 05 06 07
push rbx
- push rbp
movdqa xmm0, xmm3 ;A: w0 = xx 01 02 03 04 05 06 07
push rsi
push rdi
@@ -284,12 +285,10 @@ EXTN(jsimd_huff_encode_one_block_sse2):
movsx code, word [block] ;Z: code = block[0];
pxor xmm4, xmm4 ;A: w4[i] = 0;
sub code, r9d ;Z: code -= last_dc_val;
- mov dctbl, POINTER [rsp+6*8+4*8]
- mov actbl, POINTER [rsp+6*8+5*8]
+ mov dctbl, POINTER [rbp+48]
+ mov actbl, POINTER [rbp+56]
punpckldq xmm0, xmm1 ;A: w0 = xx 01 08 09 02 03 10 11
- lea nbits_base, [rel jpeg_nbits_table]
- add rsp, -DCTSIZE2 * SIZEOF_WORD
- mov t, rsp
+ lea nbits_base, [rel EXTN(jpeg_nbits_table)]
%else
@@ -301,23 +300,27 @@ EXTN(jsimd_huff_encode_one_block_sse2):
; r9 = c_derived_tbl *actbl
;X: X = code stream
+ push r15
+ mov block, rdx
movups xmm3, XMMWORD [block + 0 * SIZEOF_WORD] ;D: w3 = xx 01 02 03 04 05 06 07
push rbx
- push rbp
movdqa xmm0, xmm3 ;A: w0 = xx 01 02 03 04 05 06 07
push r12
mov state, rdi
mov buffer, rsi
movups xmm1, XMMWORD [block + 8 * SIZEOF_WORD] ;B: w1 = 08 09 10 11 12 13 14 15
movsx codeq, word [block] ;Z: code = block[0];
- lea nbits_base, [rel jpeg_nbits_table]
+ lea nbits_base, [rel EXTN(jpeg_nbits_table)]
pxor xmm4, xmm4 ;A: w4[i] = 0;
sub codeq, rcx ;Z: code -= last_dc_val;
punpckldq xmm0, xmm1 ;A: w0 = xx 01 08 09 02 03 10 11
- lea t, [rsp - DCTSIZE2 * SIZEOF_WORD] ; use red zone for t_
%endif
+ ; Allocate stack space for t array, and realign stack.
+ add rsp, -DCTSIZE2 * SIZEOF_WORD - 8
+ mov t, rsp
+
pshuflw xmm0, xmm0, 11001001b ;A: w0 = 01 08 xx 09 02 03 10 11
pinsrw xmm0, word [block + 16 * SIZEOF_WORD], 2 ;A: w0 = 01 08 16 09 02 03 10 11
punpckhdq xmm3, xmm1 ;D: w3 = 04 05 12 13 06 07 14 15
@@ -443,9 +446,9 @@ EXTN(jsimd_huff_encode_one_block_sse2):
pinsrw xmm5, word [block + 29 * SIZEOF_WORD], 7 ;E: w5 = 42 49 56 57 50 43 36 29
; (Row 4, offset 1)
%undef block
-%define free_bitsq rdx
-%define free_bitsd edx
-%define free_bitsb dl
+%define free_bitsq r15
+%define free_bitsd r15d
+%define free_bitsb r15b
pcmpeqw xmm1, xmm0 ;F: w1[i] = (w1[i] == 0 ? -1 : 0);
shl tempq, 48 ;Z: temp <<= 48;
pxor xmm2, xmm2 ;E: w2[i] = 0;
@@ -534,12 +537,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
test index, index
jnz .BLOOP ; } while (index != 0);
.ELOOP: ; } /* index != 0 */
- sub td, esp ; t -= (WIN64: &t_[0], UNIX: &t_[64]);
-%ifdef WIN64
+ sub td, esp ; t -= &t_[0];
cmp td, (DCTSIZE2 - 2) * SIZEOF_WORD ; if (t != 62)
-%else
- cmp td, -2 * SIZEOF_WORD ; if (t != -2)
-%endif
je .EFN ; {
movzx nbits, byte [actbl + c_derived_tbl.ehufsi + 0]
; nbits = actbl->ehufsi[0];
@@ -556,18 +555,17 @@ EXTN(jsimd_huff_encode_one_block_sse2):
; state->cur.put_buffer.simd = put_buffer;
mov byte [state + working_state.cur.free_bits], free_bitsb
; state->cur.free_bits = free_bits;
-%ifdef WIN64
- sub rsp, -DCTSIZE2 * SIZEOF_WORD
+ sub rsp, -DCTSIZE2 * SIZEOF_WORD - 8
pop r12
+%ifdef WIN64
pop rdi
pop rsi
- pop rbp
pop rbx
%else
- pop r12
- pop rbp
pop rbx
%endif
+ pop r15
+ pop rbp
ret
; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jcphuff-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jcphuff-sse2.asm
index 01b5c0235fa..c9ac59f2f1c 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jcphuff-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jcphuff-sse2.asm
@@ -3,16 +3,14 @@
; (64-bit SSE2)
;
; Copyright (C) 2016, 2018, Matthieu Darbois
+; Copyright (C) 2023, Aliaksiej Kandracienka.
+; Copyright (C) 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains an SSE2 implementation of data preparation for progressive
; Huffman encoding. See jcphuff.c for more details.
@@ -281,16 +279,13 @@
GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2)
EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [rbp - 16]
- collect_args 6
-
- movdqa XMMWORD [rbp - 16], ZERO
+ sub rsp, SIZEOF_XMMWORD
+ movdqa XMMWORD [rsp], ZERO
+ COLLECT_ARGS 6
movd AL, r13d
pxor ZERO, ZERO
@@ -384,10 +379,9 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
REDUCE0
- movdqa ZERO, XMMWORD [rbp - 16]
- uncollect_args 6
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 6
+ movdqa ZERO, XMMWORD [rsp]
+ mov rsp, rbp
pop rbp
ret
@@ -449,16 +443,13 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2)
EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [rbp - 16]
- collect_args 6
-
- movdqa XMMWORD [rbp - 16], ZERO
+ sub rsp, SIZEOF_XMMWORD
+ movdqa XMMWORD [rsp], ZERO
+ COLLECT_ARGS 6
xor SIGN, SIGN
xor EOB, EOB
@@ -606,10 +597,9 @@ EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
REDUCE0
mov eax, EOB
- movdqa ZERO, XMMWORD [rbp - 16]
- uncollect_args 6
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 6
+ movdqa ZERO, XMMWORD [rsp]
+ mov rsp, rbp
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-avx2.asm
index b32527aebea..53afc7d77fa 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-avx2.asm
@@ -2,7 +2,7 @@
; jcsample.asm - downsampling (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
;
@@ -10,11 +10,7 @@
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -44,10 +40,10 @@
GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2)
EXTN(jsimd_h2v1_downsample_avx2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 6
+ COLLECT_ARGS 6
mov ecx, r13d
shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
@@ -178,7 +174,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
.return:
vzeroupper
- uncollect_args 6
+ UNCOLLECT_ARGS 6
pop rbp
ret
@@ -206,10 +202,10 @@ EXTN(jsimd_h2v1_downsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2)
EXTN(jsimd_h2v2_downsample_avx2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 6
+ COLLECT_ARGS 6
mov ecx, r13d
shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
@@ -358,7 +354,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
.return:
vzeroupper
- uncollect_args 6
+ UNCOLLECT_ARGS 6
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-sse2.asm
index 2fcfe4567ab..d7ffa930e82 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-sse2.asm
@@ -2,18 +2,14 @@
; jcsample.asm - downsampling (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -43,10 +39,10 @@
GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2)
EXTN(jsimd_h2v1_downsample_sse2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 6
+ COLLECT_ARGS 6
mov ecx, r13d
shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
@@ -160,7 +156,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
jg near .rowloop
.return:
- uncollect_args 6
+ UNCOLLECT_ARGS 6
pop rbp
ret
@@ -188,10 +184,10 @@ EXTN(jsimd_h2v1_downsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2)
EXTN(jsimd_h2v2_downsample_sse2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 6
+ COLLECT_ARGS 6
mov ecx, r13d
shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
@@ -321,7 +317,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
jg near .rowloop
.return:
- uncollect_args 6
+ UNCOLLECT_ARGS 6
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-avx2.asm
index 2370fda6424..7b8a084398d 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-avx2.asm
@@ -2,19 +2,16 @@
; jdcolext.asm - colorspace conversion (64-bit AVX2)
;
; Copyright 2009, 2012 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2012, 2016, D. R. Commander.
+; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -34,21 +31,22 @@
; r13 = JSAMPARRAY output_buf
; r14d = int num_rows
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
%define WK_NUM 2
align 32
GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2)
EXTN(jsimd_ycc_rgb_convert_avx2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
+ push r15
and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [wk(0)]
- collect_args 5
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
+ sub rsp, byte (WK_NUM * SIZEOF_YMMWORD)
+ COLLECT_ARGS 5
push rbx
mov ecx, r10d ; num_cols
@@ -485,9 +483,9 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
.return:
pop rbx
vzeroupper
- uncollect_args 5
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 5
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-sse2.asm
index e07c8d75188..261f74da5d2 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-sse2.asm
@@ -2,18 +2,15 @@
; jdcolext.asm - colorspace conversion (64-bit SSE2)
;
; Copyright 2009, 2012 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2012, 2016, D. R. Commander.
+; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -33,21 +30,22 @@
; r13 = JSAMPARRAY output_buf
; r14d = int num_rows
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
%define WK_NUM 2
align 32
GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2)
EXTN(jsimd_ycc_rgb_convert_sse2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
+ push r15
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [wk(0)]
- collect_args 5
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
+ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+ COLLECT_ARGS 5
push rbx
mov ecx, r10d ; num_cols
@@ -428,9 +426,9 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
.return:
pop rbx
- uncollect_args 5
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 5
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-avx2.asm
index 43de9db04dc..bd5aa00b95c 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-avx2.asm
@@ -2,18 +2,14 @@
; jdcolor.asm - colorspace conversion (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -32,7 +28,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_ycc_rgb_convert_avx2)
EXTN(jconst_ycc_rgb_convert_avx2):
@@ -43,7 +39,7 @@ PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285
PW_ONE times 16 dw 1
PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1)
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-sse2.asm
index b3f1fec07eb..40343fe7895 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-sse2.asm
@@ -2,17 +2,13 @@
; jdcolor.asm - colorspace conversion (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -31,7 +27,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_ycc_rgb_convert_sse2)
EXTN(jconst_ycc_rgb_convert_sse2):
@@ -42,7 +38,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
PW_ONE times 8 dw 1
PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1)
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-avx2.asm
index 9515a17013d..6a5f1daba56 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-avx2.asm
@@ -2,18 +2,14 @@
; jdmerge.asm - merged upsampling/color conversion (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -32,7 +28,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_merged_upsample_avx2)
EXTN(jconst_merged_upsample_avx2):
@@ -43,7 +39,7 @@ PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285
PW_ONE times 16 dw 1
PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1)
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-sse2.asm
index aedccc20f6c..8c269b83d85 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-sse2.asm
@@ -2,17 +2,13 @@
; jdmerge.asm - merged upsampling/color conversion (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -31,7 +27,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_merged_upsample_sse2)
EXTN(jconst_merged_upsample_sse2):
@@ -42,7 +38,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
PW_ONE times 8 dw 1
PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1)
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-avx2.asm
index 8b264b4f039..01826fb6abb 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-avx2.asm
@@ -2,19 +2,16 @@
; jdmrgext.asm - merged upsampling/color conversion (64-bit AVX2)
;
; Copyright 2009, 2012 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2012, 2016, D. R. Commander.
+; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -34,21 +31,22 @@
; r12d = JDIMENSION in_row_group_ctr
; r13 = JSAMPARRAY output_buf
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
%define WK_NUM 3
align 32
GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2)
EXTN(jsimd_h2v1_merged_upsample_avx2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
+ push r15
and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [wk(0)]
- collect_args 4
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
+ sub rsp, SIZEOF_YMMWORD * WK_NUM
+ COLLECT_ARGS 4
push rbx
mov ecx, r10d ; col
@@ -479,9 +477,9 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
.return:
pop rbx
vzeroupper
- uncollect_args 4
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 4
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
@@ -505,10 +503,10 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2)
EXTN(jsimd_h2v2_merged_upsample_avx2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 4
+ COLLECT_ARGS 4
push rbx
mov eax, r10d
@@ -587,7 +585,7 @@ EXTN(jsimd_h2v2_merged_upsample_avx2):
add rsp, SIZEOF_JSAMPARRAY*4
pop rbx
- uncollect_args 4
+ UNCOLLECT_ARGS 4
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-sse2.asm
index eb3ab9dbd94..abd22e21a73 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-sse2.asm
@@ -2,18 +2,15 @@
; jdmrgext.asm - merged upsampling/color conversion (64-bit SSE2)
;
; Copyright 2009, 2012 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2012, 2016, D. R. Commander.
+; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jcolsamp.inc"
@@ -33,21 +30,22 @@
; r12d = JDIMENSION in_row_group_ctr
; r13 = JSAMPARRAY output_buf
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
%define WK_NUM 3
align 32
GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2)
EXTN(jsimd_h2v1_merged_upsample_sse2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
+ push r15
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [wk(0)]
- collect_args 4
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
+ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+ COLLECT_ARGS 4
push rbx
mov ecx, r10d ; col
@@ -421,9 +419,9 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
.return:
pop rbx
- uncollect_args 4
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 4
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
@@ -447,10 +445,10 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2)
EXTN(jsimd_h2v2_merged_upsample_sse2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 4
+ COLLECT_ARGS 4
push rbx
mov eax, r10d
@@ -529,7 +527,7 @@ EXTN(jsimd_h2v2_merged_upsample_sse2):
add rsp, SIZEOF_JSAMPARRAY*4
pop rbx
- uncollect_args 4
+ UNCOLLECT_ARGS 4
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-avx2.asm
index 1e4979f933e..6ae4cf812a7 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-avx2.asm
@@ -2,26 +2,23 @@
; jdsample.asm - upsampling (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_fancy_upsample_avx2)
EXTN(jconst_fancy_upsample_avx2):
@@ -32,7 +29,7 @@ PW_THREE times 16 dw 3
PW_SEVEN times 16 dw 7
PW_EIGHT times 16 dw 8
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -61,11 +58,11 @@ PW_EIGHT times 16 dw 8
GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2)
EXTN(jsimd_h2v1_fancy_upsample_avx2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- push_xmm 3
- collect_args 4
+ PUSH_XMM 3
+ COLLECT_ARGS 4
mov eax, r11d ; colctr
test rax, rax
@@ -186,8 +183,8 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
.return:
vzeroupper
- uncollect_args 4
- pop_xmm 3
+ UNCOLLECT_ARGS 4
+ POP_XMM 3
pop rbp
ret
@@ -208,22 +205,23 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
; r12 = JSAMPARRAY input_data
; r13 = JSAMPARRAY *output_data_ptr
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
%define WK_NUM 4
align 32
GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2)
EXTN(jsimd_h2v2_fancy_upsample_avx2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
- and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [wk(0)]
- push_xmm 3
- collect_args 4
+ mov rbp, rsp
+ push r15
+ and rsp, byte (-SIZEOF_YMMWORD) ; align to 128 bits
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
+ sub rsp, (SIZEOF_YMMWORD * WK_NUM)
+ PUSH_XMM 3
+ COLLECT_ARGS 4
push rbx
mov eax, r11d ; colctr
@@ -498,10 +496,10 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
.return:
pop rbx
vzeroupper
- uncollect_args 4
- pop_xmm 3
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 4
+ POP_XMM 3
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
@@ -524,10 +522,10 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2)
EXTN(jsimd_h2v1_upsample_avx2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 4
+ COLLECT_ARGS 4
mov edx, r11d
add rdx, byte (SIZEOF_YMMWORD-1)
@@ -590,7 +588,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
.return:
vzeroupper
- uncollect_args 4
+ UNCOLLECT_ARGS 4
pop rbp
ret
@@ -613,10 +611,10 @@ EXTN(jsimd_h2v1_upsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2)
EXTN(jsimd_h2v2_upsample_avx2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 4
+ COLLECT_ARGS 4
push rbx
mov edx, r11d
@@ -687,7 +685,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
.return:
pop rbx
vzeroupper
- uncollect_args 4
+ UNCOLLECT_ARGS 4
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-sse2.asm
index 38dbceec269..54c560fc28e 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-sse2.asm
@@ -2,25 +2,22 @@
; jdsample.asm - upsampling (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_fancy_upsample_sse2)
EXTN(jconst_fancy_upsample_sse2):
@@ -31,7 +28,7 @@ PW_THREE times 8 dw 3
PW_SEVEN times 8 dw 7
PW_EIGHT times 8 dw 8
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -60,10 +57,10 @@ PW_EIGHT times 8 dw 8
GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2)
EXTN(jsimd_h2v1_fancy_upsample_sse2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 4
+ COLLECT_ARGS 4
mov eax, r11d ; colctr
test rax, rax
@@ -174,7 +171,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
jg near .rowloop
.return:
- uncollect_args 4
+ UNCOLLECT_ARGS 4
pop rbp
ret
@@ -195,21 +192,22 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
; r12 = JSAMPARRAY input_data
; r13 = JSAMPARRAY *output_data_ptr
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
%define WK_NUM 4
align 32
GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2)
EXTN(jsimd_h2v2_fancy_upsample_sse2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
+ push r15
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [wk(0)]
- collect_args 4
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
+ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+ COLLECT_ARGS 4
push rbx
mov eax, r11d ; colctr
@@ -472,9 +470,9 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
.return:
pop rbx
- uncollect_args 4
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 4
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
@@ -497,10 +495,10 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2)
EXTN(jsimd_h2v1_upsample_sse2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 4
+ COLLECT_ARGS 4
mov edx, r11d
add rdx, byte (2*SIZEOF_XMMWORD)-1
@@ -561,7 +559,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
jg short .rowloop
.return:
- uncollect_args 4
+ UNCOLLECT_ARGS 4
pop rbp
ret
@@ -584,10 +582,10 @@ EXTN(jsimd_h2v1_upsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2)
EXTN(jsimd_h2v2_upsample_sse2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 4
+ COLLECT_ARGS 4
push rbx
mov edx, r11d
@@ -656,7 +654,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
.return:
pop rbx
- uncollect_args 4
+ UNCOLLECT_ARGS 4
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctflt-sse.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctflt-sse.asm
index ef2796649bc..58a1f5570d3 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctflt-sse.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctflt-sse.asm
@@ -2,17 +2,14 @@
; jfdctflt.asm - floating-point FDCT (64-bit SSE)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a floating-point implementation of the forward DCT
; (Discrete Cosine Transform). The following code is based directly on
@@ -34,7 +31,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_fdct_float_sse)
EXTN(jconst_fdct_float_sse):
@@ -44,7 +41,7 @@ PD_0_707 times 4 dd 0.707106781186547524400844
PD_0_541 times 4 dd 0.541196100146196984399723
PD_1_306 times 4 dd 1.306562964876376527856643
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -58,21 +55,22 @@ PD_1_306 times 4 dd 1.306562964876376527856643
; r10 = FAST_FLOAT *data
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
%define WK_NUM 2
align 32
GLOBAL_FUNCTION(jsimd_fdct_float_sse)
EXTN(jsimd_fdct_float_sse):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
+ push r15
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [wk(0)]
- collect_args 1
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
+ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+ COLLECT_ARGS 1
; ---- Pass 1: process rows.
@@ -344,9 +342,9 @@ EXTN(jsimd_fdct_float_sse):
dec rcx
jnz near .columnloop
- uncollect_args 1
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 1
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctfst-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctfst-sse2.asm
index 2e1bfe6e8c2..3b92d4edaae 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctfst-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctfst-sse2.asm
@@ -2,17 +2,14 @@
; jfdctfst.asm - fast integer FDCT (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a fast, not so accurate integer implementation of
; the forward DCT (Discrete Cosine Transform). The following code is
@@ -49,7 +46,7 @@ F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS) ; FIX(1.306562965)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_fdct_ifast_sse2)
EXTN(jconst_fdct_ifast_sse2):
@@ -59,7 +56,7 @@ PW_F0382 times 8 dw F_0_382 << CONST_SHIFT
PW_F0541 times 8 dw F_0_541 << CONST_SHIFT
PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -73,21 +70,22 @@ PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
; r10 = DCTELEM *data
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
%define WK_NUM 2
align 32
GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2)
EXTN(jsimd_fdct_ifast_sse2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
+ push r15
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [wk(0)]
- collect_args 1
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
+ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+ COLLECT_ARGS 1
; ---- Pass 1: process rows.
@@ -378,9 +376,9 @@ EXTN(jsimd_fdct_ifast_sse2):
movdqa XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm6
movdqa XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm2
- uncollect_args 1
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 1
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-avx2.asm
index e56258b48aa..0c4528612cd 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-avx2.asm
@@ -2,17 +2,13 @@
; jfdctint.asm - accurate integer FDCT (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a slower but more accurate integer implementation of the
; forward DCT (Discrete Cosine Transform). The following code is based
@@ -65,7 +61,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %1-%4: Input/output registers
; %5-%8: Temp registers
-%macro dotranspose 8
+%macro DOTRANSPOSE 8
; %1=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47)
; %2=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57)
; %3=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
@@ -108,7 +104,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %5-%8: Temp registers
; %9: Pass (1 or 2)
-%macro dodct 9
+%macro DODCT 9
vpsubw %5, %1, %4 ; %5=data1_0-data6_7=tmp6_7
vpaddw %6, %1, %4 ; %6=data1_0+data6_7=tmp1_0
vpaddw %7, %2, %3 ; %7=data3_2+data4_5=tmp3_2
@@ -223,7 +219,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_fdct_islow_avx2)
EXTN(jconst_fdct_islow_avx2):
@@ -242,7 +238,7 @@ PW_DESCALE_P2X times 16 dw 1 << (PASS1_BITS - 1)
PW_1_NEG1 times 8 dw 1
times 8 dw -1
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -260,10 +256,10 @@ PW_1_NEG1 times 8 dw 1
GLOBAL_FUNCTION(jsimd_fdct_islow_avx2)
EXTN(jsimd_fdct_islow_avx2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 1
+ COLLECT_ARGS 1
; ---- Pass 1: process rows.
@@ -285,9 +281,9 @@ EXTN(jsimd_fdct_islow_avx2):
; ymm2=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
; ymm3=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77)
- dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
+ DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
- dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
+ DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm3=data7_5
; ---- Pass 2: process columns.
@@ -295,9 +291,9 @@ EXTN(jsimd_fdct_islow_avx2):
vperm2i128 ymm4, ymm1, ymm3, 0x20 ; ymm4=data3_7
vperm2i128 ymm1, ymm1, ymm3, 0x31 ; ymm1=data1_5
- dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
+ DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
- dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
+ DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm4=data7_5
vperm2i128 ymm3, ymm0, ymm1, 0x30 ; ymm3=data0_1
@@ -311,7 +307,7 @@ EXTN(jsimd_fdct_islow_avx2):
vmovdqu YMMWORD [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm7
vzeroupper
- uncollect_args 1
+ UNCOLLECT_ARGS 1
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-sse2.asm
index ec1f383ccb7..3a6be020cd0 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-sse2.asm
@@ -2,17 +2,14 @@
; jfdctint.asm - accurate integer FDCT (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2020, 2024, D. R. Commander.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a slower but more accurate integer implementation of the
; forward DCT (Discrete Cosine Transform). The following code is based
@@ -63,7 +60,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_fdct_islow_sse2)
EXTN(jconst_fdct_islow_sse2):
@@ -80,7 +77,7 @@ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1)
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -94,21 +91,22 @@ PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1)
; r10 = DCTELEM *data
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
%define WK_NUM 6
align 32
GLOBAL_FUNCTION(jsimd_fdct_islow_sse2)
EXTN(jsimd_fdct_islow_sse2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
+ push r15
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [wk(0)]
- collect_args 1
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
+ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+ COLLECT_ARGS 1
; ---- Pass 1: process rows.
@@ -608,9 +606,9 @@ EXTN(jsimd_fdct_islow_sse2):
movdqa XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm1
movdqa XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm3
- uncollect_args 1
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 1
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctflt-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctflt-sse2.asm
index 60bf9618961..14437340229 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctflt-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctflt-sse2.asm
@@ -2,18 +2,15 @@
; jidctflt.asm - floating-point IDCT (64-bit SSE & SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a floating-point implementation of the inverse DCT
; (Discrete Cosine Transform). The following code is based directly on
@@ -24,18 +21,18 @@
; --------------------------------------------------------------------------
-%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+%macro UNPCKLPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
shufps %1, %2, 0x44
%endmacro
-%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+%macro UNPCKHPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
shufps %1, %2, 0xEE
%endmacro
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_idct_float_sse2)
EXTN(jconst_idct_float_sse2):
@@ -47,7 +44,7 @@ PD_M2_613 times 4 dd -2.613125929752753055713286
PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -65,8 +62,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
; r12 = JSAMPARRAY output_buf
; r13d = JDIMENSION output_col
-%define original_rbp rbp + 0
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD
; xmmword wk[WK_NUM]
%define WK_NUM 2
%define workspace wk(0) - DCTSIZE2 * SIZEOF_FAST_FLOAT
@@ -76,14 +72,15 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
GLOBAL_FUNCTION(jsimd_idct_float_sse2)
EXTN(jsimd_idct_float_sse2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
+ push r15
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
lea rsp, [workspace]
- collect_args 4
+ COLLECT_ARGS 4
push rbx
; ---- Pass 1: process columns from input, store into work array.
@@ -280,11 +277,11 @@ EXTN(jsimd_idct_float_sse2):
unpckhps xmm4, xmm0 ; xmm4=(42 52 43 53)
movaps xmm3, xmm6 ; transpose coefficients(phase 2)
- unpcklps2 xmm6, xmm7 ; xmm6=(00 10 20 30)
- unpckhps2 xmm3, xmm7 ; xmm3=(01 11 21 31)
+ UNPCKLPS2 xmm6, xmm7 ; xmm6=(00 10 20 30)
+ UNPCKHPS2 xmm3, xmm7 ; xmm3=(01 11 21 31)
movaps xmm0, xmm1 ; transpose coefficients(phase 2)
- unpcklps2 xmm1, xmm2 ; xmm1=(02 12 22 32)
- unpckhps2 xmm0, xmm2 ; xmm0=(03 13 23 33)
+ UNPCKLPS2 xmm1, xmm2 ; xmm1=(02 12 22 32)
+ UNPCKHPS2 xmm0, xmm2 ; xmm0=(03 13 23 33)
movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71)
movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73)
@@ -295,11 +292,11 @@ EXTN(jsimd_idct_float_sse2):
movaps XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm0
movaps xmm6, xmm5 ; transpose coefficients(phase 2)
- unpcklps2 xmm5, xmm7 ; xmm5=(40 50 60 70)
- unpckhps2 xmm6, xmm7 ; xmm6=(41 51 61 71)
+ UNPCKLPS2 xmm5, xmm7 ; xmm5=(40 50 60 70)
+ UNPCKHPS2 xmm6, xmm7 ; xmm6=(41 51 61 71)
movaps xmm3, xmm4 ; transpose coefficients(phase 2)
- unpcklps2 xmm4, xmm2 ; xmm4=(42 52 62 72)
- unpckhps2 xmm3, xmm2 ; xmm3=(43 53 63 73)
+ UNPCKLPS2 xmm4, xmm2 ; xmm4=(42 52 62 72)
+ UNPCKHPS2 xmm3, xmm2 ; xmm3=(43 53 63 73)
movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm5
movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm6
@@ -322,7 +319,6 @@ EXTN(jsimd_idct_float_sse2):
; ---- Pass 2: process rows from work array, store into output array.
- mov rax, [original_rbp]
lea rsi, [workspace] ; FAST_FLOAT *wsptr
mov rdi, r12 ; (JSAMPROW *)
mov eax, r13d
@@ -471,9 +467,9 @@ EXTN(jsimd_idct_float_sse2):
jnz near .rowloop
pop rbx
- uncollect_args 4
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 4
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctfst-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctfst-sse2.asm
index cb97fdfbb24..cffabb8378e 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctfst-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctfst-sse2.asm
@@ -2,18 +2,15 @@
; jidctfst.asm - fast integer IDCT (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a fast, not so accurate integer implementation of
; the inverse DCT (Discrete Cosine Transform). The following code is
@@ -57,7 +54,7 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_idct_ifast_sse2)
EXTN(jconst_idct_ifast_sse2):
@@ -68,7 +65,7 @@ PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT
PW_F1082 times 8 dw F_1_082 << CONST_SHIFT
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -86,8 +83,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
; r12 = JSAMPARRAY output_buf
; r13d = JDIMENSION output_col
-%define original_rbp rbp + 0
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD
; xmmword wk[WK_NUM]
%define WK_NUM 2
@@ -95,14 +91,15 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
GLOBAL_FUNCTION(jsimd_idct_ifast_sse2)
EXTN(jsimd_idct_ifast_sse2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
+ push r15
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [wk(0)]
- collect_args 4
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
+ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+ COLLECT_ARGS 4
; ---- Pass 1: process columns from input.
@@ -320,7 +317,6 @@ EXTN(jsimd_idct_ifast_sse2):
; ---- Pass 2: process rows from work array, store into output array.
- mov rax, [original_rbp]
mov rdi, r12 ; (JSAMPROW *)
mov eax, r13d
@@ -479,9 +475,9 @@ EXTN(jsimd_idct_ifast_sse2):
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
- uncollect_args 4
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 4
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-avx2.asm
index ca7e317f6e1..be3b46888e5 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-avx2.asm
@@ -2,18 +2,14 @@
; jidctint.asm - accurate integer IDCT (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a slower but more accurate integer implementation of the
; inverse DCT (Discrete Cosine Transform). The following code is based
@@ -66,7 +62,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %1-%4: Input/output registers
; %5-%8: Temp registers
-%macro dotranspose 8
+%macro DOTRANSPOSE 8
; %5=(00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71)
; %6=(03 13 23 33 43 53 63 73 02 12 22 32 42 52 62 72)
; %7=(04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75)
@@ -119,7 +115,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %5-%12: Temp registers
; %9: Pass (1 or 2)
-%macro dodct 13
+%macro DODCT 13
; -- Even part
; (Original)
@@ -241,7 +237,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_idct_islow_avx2)
EXTN(jconst_idct_islow_avx2):
@@ -260,7 +256,7 @@ PB_CENTERJSAMP times 32 db CENTERJSAMPLE
PW_1_NEG1 times 8 dw 1
times 8 dw -1
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -282,11 +278,11 @@ PW_1_NEG1 times 8 dw 1
GLOBAL_FUNCTION(jsimd_idct_islow_avx2)
EXTN(jsimd_idct_islow_avx2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
mov rbp, rsp ; rbp = aligned rbp
- push_xmm 4
- collect_args 4
+ PUSH_XMM 4
+ COLLECT_ARGS 4
; ---- Pass 1: process columns.
@@ -343,10 +339,10 @@ EXTN(jsimd_idct_islow_avx2):
vperm2i128 ymm2, ymm5, ymm7, 0x20 ; ymm2=in2_6
vperm2i128 ymm3, ymm7, ymm6, 0x31 ; ymm3=in7_5
- dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 1
+ DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 1
; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm3=data7_6
- dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
+ DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm3=data3_7
.column_end:
@@ -363,10 +359,10 @@ EXTN(jsimd_idct_islow_avx2):
vperm2i128 ymm4, ymm3, ymm1, 0x31 ; ymm3=in7_5
vperm2i128 ymm1, ymm3, ymm1, 0x20 ; ymm1=in3_1
- dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 2
+ DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 2
; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm4=data7_6
- dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
+ DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm4=data3_7
vpacksswb ymm0, ymm0, ymm1 ; ymm0=data01_45
@@ -408,8 +404,8 @@ EXTN(jsimd_idct_islow_avx2):
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7
- uncollect_args 4
- pop_xmm 4
+ UNCOLLECT_ARGS 4
+ POP_XMM 4
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-sse2.asm
index 7aa869bc0b5..b186871ff2a 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-sse2.asm
@@ -2,18 +2,15 @@
; jidctint.asm - accurate integer IDCT (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2020, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains a slower but more accurate integer implementation of the
; inverse DCT (Discrete Cosine Transform). The following code is based
@@ -64,7 +61,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_idct_islow_sse2)
EXTN(jconst_idct_islow_sse2):
@@ -81,7 +78,7 @@ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -99,8 +96,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
; r12 = JSAMPARRAY output_buf
; r13d = JDIMENSION output_col
-%define original_rbp rbp + 0
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD
; xmmword wk[WK_NUM]
%define WK_NUM 12
@@ -108,14 +104,15 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
GLOBAL_FUNCTION(jsimd_idct_islow_sse2)
EXTN(jsimd_idct_islow_sse2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
+ push r15
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [wk(0)]
- collect_args 4
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
+ sub rsp, (SIZEOF_XMMWORD * WK_NUM)
+ COLLECT_ARGS 4
; ---- Pass 1: process columns from input.
@@ -512,7 +509,6 @@ EXTN(jsimd_idct_islow_sse2):
; ---- Pass 2: process rows from work array, store into output array.
- mov rax, [original_rbp]
mov rdi, r12 ; (JSAMPROW *)
mov eax, r13d
@@ -836,9 +832,9 @@ EXTN(jsimd_idct_islow_sse2):
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5
- uncollect_args 4
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 4
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctred-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctred-sse2.asm
index 4ece9d891cb..6fb7095612e 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctred-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctred-sse2.asm
@@ -2,18 +2,15 @@
; jidctred.asm - reduced-size IDCT (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
;
; This file contains inverse-DCT routines that produce reduced-size
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
@@ -70,7 +67,7 @@ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS) ; FIX(3.624509785)
; --------------------------------------------------------------------------
SECTION SEG_CONST
- alignz 32
+ ALIGNZ 32
GLOBAL_DATA(jconst_idct_red_sse2)
EXTN(jconst_idct_red_sse2):
@@ -88,7 +85,7 @@ PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2 - 1)
PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2 - 1)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
- alignz 32
+ ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -107,8 +104,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
; r12 = JSAMPARRAY output_buf
; r13d = JDIMENSION output_col
-%define original_rbp rbp + 0
-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
+%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD
; xmmword wk[WK_NUM]
%define WK_NUM 2
@@ -116,14 +112,15 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
GLOBAL_FUNCTION(jsimd_idct_4x4_sse2)
EXTN(jsimd_idct_4x4_sse2):
+ ENDBR64
push rbp
- mov rax, rsp ; rax = original rbp
- sub rsp, byte 4
+ mov rbp, rsp
+ push r15
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
- mov [rsp], rax
- mov rbp, rsp ; rbp = aligned rbp
- lea rsp, [wk(0)]
- collect_args 4
+ ; Allocate stack space for wk array. r15 is used to access it.
+ mov r15, rsp
+ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+ COLLECT_ARGS 4
; ---- Pass 1: process columns from input.
@@ -309,7 +306,6 @@ EXTN(jsimd_idct_4x4_sse2):
; ---- Pass 2: process rows, store into output array.
- mov rax, [original_rbp]
mov rdi, r12 ; (JSAMPROW *)
mov eax, r13d
@@ -389,9 +385,9 @@ EXTN(jsimd_idct_4x4_sse2):
movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
- uncollect_args 4
- mov rsp, rbp ; rsp <- aligned rbp
- pop rsp ; rsp <- original rbp
+ UNCOLLECT_ARGS 4
+ lea rsp, [rbp-8]
+ pop r15
pop rbp
ret
@@ -414,10 +410,10 @@ EXTN(jsimd_idct_4x4_sse2):
GLOBAL_FUNCTION(jsimd_idct_2x2_sse2)
EXTN(jsimd_idct_2x2_sse2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 4
+ COLLECT_ARGS 4
push rbx
; ---- Pass 1: process columns from input.
@@ -565,7 +561,7 @@ EXTN(jsimd_idct_2x2_sse2):
mov word [rsi+rax*SIZEOF_JSAMPLE], cx
pop rbx
- uncollect_args 4
+ UNCOLLECT_ARGS 4
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jquantf-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jquantf-sse2.asm
index ab2e3954f63..64763338f2d 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jquantf-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jquantf-sse2.asm
@@ -2,18 +2,14 @@
; jquantf.asm - sample data conversion and quantization (64-bit SSE & SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
%include "jdct.inc"
@@ -37,10 +33,10 @@
GLOBAL_FUNCTION(jsimd_convsamp_float_sse2)
EXTN(jsimd_convsamp_float_sse2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 3
+ COLLECT_ARGS 3
push rbx
pcmpeqw xmm7, xmm7
@@ -89,7 +85,7 @@ EXTN(jsimd_convsamp_float_sse2):
jnz short .convloop
pop rbx
- uncollect_args 3
+ UNCOLLECT_ARGS 3
pop rbp
ret
@@ -110,10 +106,10 @@ EXTN(jsimd_convsamp_float_sse2):
GLOBAL_FUNCTION(jsimd_quantize_float_sse2)
EXTN(jsimd_quantize_float_sse2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 3
+ COLLECT_ARGS 3
mov rsi, r12
mov rdx, r11
@@ -146,7 +142,7 @@ EXTN(jsimd_quantize_float_sse2):
dec rax
jnz short .quantloop
- uncollect_args 3
+ UNCOLLECT_ARGS 3
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-avx2.asm
index 70fe81139cc..7e126e88a88 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-avx2.asm
@@ -2,7 +2,7 @@
; jquanti.asm - sample data conversion and quantization (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, 2018, D. R. Commander.
+; Copyright (C) 2009, 2016, 2018, 2024, D. R. Commander.
; Copyright (C) 2016, Matthieu Darbois.
; Copyright (C) 2018, Matthias Räncker.
;
@@ -10,11 +10,7 @@
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
%include "jdct.inc"
@@ -38,10 +34,10 @@
GLOBAL_FUNCTION(jsimd_convsamp_avx2)
EXTN(jsimd_convsamp_avx2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 3
+ COLLECT_ARGS 3
mov eax, r11d
@@ -84,7 +80,7 @@ EXTN(jsimd_convsamp_avx2):
vmovdqu YMMWORD [YMMBLOCK(6,0,r12,SIZEOF_DCTELEM)], ymm3
vzeroupper
- uncollect_args 3
+ UNCOLLECT_ARGS 3
pop rbp
ret
@@ -93,8 +89,8 @@ EXTN(jsimd_convsamp_avx2):
; Quantize/descale the coefficients, and store into coef_block
;
; This implementation is based on an algorithm described in
-; "How to optimize for the Pentium family of microprocessors"
-; (http://www.agner.org/assem/).
+; "Optimizing subroutines in assembly language:
+; An optimization guide for x86 platforms" (https://agner.org/optimize).
;
; GLOBAL(void)
; jsimd_quantize_avx2(JCOEFPTR coef_block, DCTELEM *divisors,
@@ -116,10 +112,10 @@ EXTN(jsimd_convsamp_avx2):
GLOBAL_FUNCTION(jsimd_quantize_avx2)
EXTN(jsimd_quantize_avx2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 3
+ COLLECT_ARGS 3
vmovdqu ymm4, [YMMBLOCK(0,0,r12,SIZEOF_DCTELEM)]
vmovdqu ymm5, [YMMBLOCK(2,0,r12,SIZEOF_DCTELEM)]
@@ -154,7 +150,7 @@ EXTN(jsimd_quantize_avx2):
vmovdqu [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm3
vzeroupper
- uncollect_args 3
+ UNCOLLECT_ARGS 3
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-sse2.asm
index 3ee442027a5..284b9fea71e 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-sse2.asm
@@ -2,18 +2,14 @@
; jquanti.asm - sample data conversion and quantization (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
%include "jdct.inc"
@@ -37,10 +33,10 @@
GLOBAL_FUNCTION(jsimd_convsamp_sse2)
EXTN(jsimd_convsamp_sse2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 3
+ COLLECT_ARGS 3
push rbx
pxor xmm6, xmm6 ; xmm6=(all 0's)
@@ -84,7 +80,7 @@ EXTN(jsimd_convsamp_sse2):
jnz short .convloop
pop rbx
- uncollect_args 3
+ UNCOLLECT_ARGS 3
pop rbp
ret
@@ -93,8 +89,8 @@ EXTN(jsimd_convsamp_sse2):
; Quantize/descale the coefficients, and store into coef_block
;
; This implementation is based on an algorithm described in
-; "How to optimize for the Pentium family of microprocessors"
-; (http://www.agner.org/assem/).
+; "Optimizing subroutines in assembly language:
+; An optimization guide for x86 platforms" (https://agner.org/optimize).
;
; GLOBAL(void)
; jsimd_quantize_sse2(JCOEFPTR coef_block, DCTELEM *divisors,
@@ -116,10 +112,10 @@ EXTN(jsimd_convsamp_sse2):
GLOBAL_FUNCTION(jsimd_quantize_sse2)
EXTN(jsimd_quantize_sse2):
+ ENDBR64
push rbp
- mov rax, rsp
mov rbp, rsp
- collect_args 3
+ COLLECT_ARGS 3
mov rsi, r12
mov rdx, r11
@@ -179,7 +175,7 @@ EXTN(jsimd_quantize_sse2):
dec rax
jnz near .quantloop
- uncollect_args 3
+ UNCOLLECT_ARGS 3
pop rbp
ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jsimd.c b/contrib/libs/libjpeg-turbo/simd/x86_64/jsimd.c
index d51962f3987..9f4e098fddc 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jsimd.c
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jsimd.c
@@ -2,8 +2,8 @@
* jsimd_x86_64.c
*
* Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
- * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2024, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -15,13 +15,12 @@
*/
#define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
#include "../jsimd.h"
-#include "jconfigint.h"
/*
* In the PIC cases, we have no guarantee that constants will keep
@@ -32,13 +31,11 @@
#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
#define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
-static unsigned int simd_support = (unsigned int)(~0);
-static unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
+static THREAD_LOCAL unsigned int simd_huffman = 1;
/*
* Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
*/
LOCAL(void)
init_simd(void)
@@ -116,7 +113,9 @@ jsimd_can_ycc_rgb(void)
{
init_simd();
-#ifndef WITH_SANITIZER
+#ifdef WITH_SANITIZER
+ return 0;
+#endif
/* The code is optimised for these values only */
if (BITS_IN_JSAMPLE != 8)
return 0;
@@ -131,7 +130,6 @@ jsimd_can_ycc_rgb(void)
if ((simd_support & JSIMD_SSE2) &&
IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
return 1;
-#endif
return 0;
}
@@ -150,6 +148,9 @@ jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+ if (simd_support == ~0U)
+ init_simd();
+
switch (cinfo->in_color_space) {
case JCS_EXT_RGB:
avx2fct = jsimd_extrgb_ycc_convert_avx2;
@@ -199,6 +200,9 @@ jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+ if (simd_support == ~0U)
+ init_simd();
+
switch (cinfo->in_color_space) {
case JCS_EXT_RGB:
avx2fct = jsimd_extrgb_gray_convert_avx2;
@@ -248,6 +252,9 @@ jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
+ if (simd_support == ~0U)
+ init_simd();
+
switch (cinfo->out_color_space) {
case JCS_EXT_RGB:
avx2fct = jsimd_ycc_extrgb_convert_avx2;
@@ -338,6 +345,9 @@ GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY output_data)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
compptr->v_samp_factor,
@@ -354,6 +364,9 @@ GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY output_data)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
compptr->v_samp_factor,
@@ -408,6 +421,9 @@ GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
input_data, output_data_ptr);
@@ -420,6 +436,9 @@ GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
input_data, output_data_ptr);
@@ -474,6 +493,9 @@ GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
compptr->downsampled_width, input_data,
@@ -488,6 +510,9 @@ GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
compptr->downsampled_width, input_data,
@@ -547,6 +572,9 @@ jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
+ if (simd_support == ~0U)
+ init_simd();
+
switch (cinfo->out_color_space) {
case JCS_EXT_RGB:
avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
@@ -595,6 +623,9 @@ jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
+ if (simd_support == ~0U)
+ init_simd();
+
switch (cinfo->out_color_space) {
case JCS_EXT_RGB:
avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
@@ -684,6 +715,9 @@ GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
DCTELEM *workspace)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_convsamp_avx2(sample_data, start_col, workspace);
else
@@ -753,6 +787,9 @@ jsimd_can_fdct_float(void)
GLOBAL(void)
jsimd_fdct_islow(DCTELEM *data)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_fdct_islow_avx2(data);
else
@@ -814,6 +851,9 @@ jsimd_can_quantize_float(void)
GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_quantize_avx2(coef_block, divisors, workspace);
else
@@ -944,6 +984,9 @@ jsimd_can_idct_float(void)
{
init_simd();
+#ifdef WITH_SANITIZER
+ return 0;
+#endif
if (DCTSIZE != 8)
return 0;
if (sizeof(JCOEF) != 2)
@@ -968,6 +1011,9 @@ jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
+ if (simd_support == ~0U)
+ init_simd();
+
if (simd_support & JSIMD_AVX2)
jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
output_col);
@@ -999,7 +1045,9 @@ jsimd_can_huff_encode_one_block(void)
{
init_simd();
-#ifndef WITH_SANITIZER
+#ifdef WITH_SANITIZER
+ return 0;
+#endif
if (DCTSIZE != 8)
return 0;
if (sizeof(JCOEF) != 2)
@@ -1008,7 +1056,6 @@ jsimd_can_huff_encode_one_block(void)
if ((simd_support & JSIMD_SSE2) && simd_huffman &&
IS_ALIGNED_SSE(jconst_huff_encode_one_block))
return 1;
-#endif
return 0;
}
@@ -1027,14 +1074,15 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
{
init_simd();
-#ifndef WITH_SANITIZER
+#ifdef WITH_SANITIZER
+ return 0;
+#endif
if (DCTSIZE != 8)
return 0;
if (sizeof(JCOEF) != 2)
return 0;
if (simd_support & JSIMD_SSE2)
return 1;
-#endif
return 0;
}
@@ -1042,7 +1090,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
- int Al, JCOEF *values, size_t *zerobits)
+ int Al, UJCOEF *values, size_t *zerobits)
{
jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
Sl, Al, values, zerobits);
@@ -1053,14 +1101,12 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
{
init_simd();
-#ifndef WITH_SANITIZER
if (DCTSIZE != 8)
return 0;
if (sizeof(JCOEF) != 2)
return 0;
if (simd_support & JSIMD_SSE2)
return 1;
-#endif
return 0;
}
@@ -1068,7 +1114,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
const int *jpeg_natural_order_start, int Sl,
- int Al, JCOEF *absvalues, size_t *bits)
+ int Al, UJCOEF *absvalues, size_t *bits)
{
return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
jpeg_natural_order_start,
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jsimdcpu.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jsimdcpu.asm
index 705f813d7da..b72f3b0b398 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jsimdcpu.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jsimdcpu.asm
@@ -3,17 +3,14 @@
;
; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on
; x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
%include "jsimdext.inc"
@@ -31,6 +28,8 @@
GLOBAL_FUNCTION(jpeg_simd_cpu_support)
EXTN(jpeg_simd_cpu_support):
+ push rbp
+ mov rbp, rsp
push rbx
push rdi
@@ -79,6 +78,7 @@ EXTN(jpeg_simd_cpu_support):
pop rdi
pop rbx
+ pop rbp
ret
; For some reason, the OS X linker does not honor the request to align the