Intermediate changes

commit_hash:79edafb911368bba0a4d2f7f151a6c8a37c349f3
author: robot-piglet <[email protected]> 2025-06-15 15:44:41 +0300
committer: robot-piglet <[email protected]> 2025-06-15 15:55:30 +0300
commit: ea626d7b15346c0da649291483f80f1ae6e1d7e7 (patch)
tree: 24ae3c2aa7f259f3ba95af8450b5bce9a4bdb10d /contrib/libs/libjpeg-turbo/simd
parent: 726087f32fb38c191ff0c3ef8c6646aa940d987e (diff)
114 files changed, 1417 insertions, 1602 deletions
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/aarch32/jchuff-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/aarch32/jchuff-neon.c
index 19d94f720da..153da1f1c11 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/aarch32/jchuff-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/aarch32/jchuff-neon.c
@@ -2,6 +2,7 @@
  * jchuff-neon.c - Huffman entropy encoding (32-bit Arm Neon)
  *
  * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -24,11 +25,11 @@
  */
 
 #define JPEG_INTERNALS
-#include "../../../jinclude.h"
-#include "../../../jpeglib.h"
-#include "../../../jsimd.h"
-#include "../../../jdct.h"
-#include "../../../jsimddct.h"
+#include "../../../src/jinclude.h"
+#include "../../../src/jpeglib.h"
+#include "../../../src/jsimd.h"
+#include "../../../src/jdct.h"
+#include "../../../src/jsimddct.h"
 #include "../../jsimd.h"
 #include "../jchuff.h"
 #include "neon-compat.h"
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/aarch32/jsimd.c b/contrib/libs/libjpeg-turbo/simd/arm/aarch32/jsimd.c
index 920f7656ebf..7c8ea306bd1 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/aarch32/jsimd.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/aarch32/jsimd.c
@@ -3,8 +3,8 @@
  *
  * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
  * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, 2024, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  * Copyright (C) 2019, Google LLC.
  * Copyright (C) 2020, Arm Limited.
  *
@@ -18,17 +18,17 @@
  */
 
 #define JPEG_INTERNALS
-#include "../../../jinclude.h"
-#include "../../../jpeglib.h"
-#include "../../../jsimd.h"
-#include "../../../jdct.h"
-#include "../../../jsimddct.h"
+#include "../../../src/jinclude.h"
+#include "../../../src/jpeglib.h"
+#include "../../../src/jsimd.h"
+#include "../../../src/jdct.h"
+#include "../../../src/jsimddct.h"
 #include "../../jsimd.h"
 
 #include <ctype.h>
 
-static unsigned int simd_support = ~0;
-static unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_support = ~0;
+static THREAD_LOCAL unsigned int simd_huffman = 1;
 
 #if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
 
@@ -96,8 +96,6 @@ parse_proc_cpuinfo(int bufsize)
 
 /*
  * Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
  */
 LOCAL(void)
 init_simd(void)
@@ -945,7 +943,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
 GLOBAL(void)
 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
                                   const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
+                                  int Al, UJCOEF *values, size_t *zerobits)
 {
   jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
                                          Sl, Al, values, zerobits);
@@ -970,7 +968,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
 GLOBAL(int)
 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
                                    const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
+                                   int Al, UJCOEF *absvalues, size_t *bits)
 {
   return jsimd_encode_mcu_AC_refine_prepare_neon(block,
                                                  jpeg_natural_order_start, Sl,
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/aarch64/jchuff-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/aarch64/jchuff-neon.c
index 607a116070c..11bf6dab130 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/aarch64/jchuff-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/aarch64/jchuff-neon.c
@@ -2,7 +2,7 @@
  * jchuff-neon.c - Huffman entropy encoding (64-bit Arm Neon)
  *
  * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
- * Copyright (C) 2020, 2022, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2020, 2022, 2024, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -25,11 +25,11 @@
  */
 
 #define JPEG_INTERNALS
-#include "../../../jinclude.h"
-#include "../../../jpeglib.h"
-#include "../../../jsimd.h"
-#include "../../../jdct.h"
-#include "../../../jsimddct.h"
+#include "../../../src/jinclude.h"
+#include "../../../src/jpeglib.h"
+#include "../../../src/jsimd.h"
+#include "../../../src/jdct.h"
+#include "../../../src/jsimddct.h"
 #include "../../jsimd.h"
 #include "../align.h"
 #include "../jchuff.h"
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/aarch64/jsimd.c b/contrib/libs/libjpeg-turbo/simd/arm/aarch64/jsimd.c
index 41c06d31801..8a6f30a1a89 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/aarch64/jsimd.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/aarch64/jsimd.c
@@ -3,8 +3,9 @@
  *
  * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
  * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, 2024,
+ *           D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  * Copyright (C) 2020, Arm Limited.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
@@ -17,13 +18,12 @@
  */
 
 #define JPEG_INTERNALS
-#include "../../../jinclude.h"
-#include "../../../jpeglib.h"
-#include "../../../jsimd.h"
-#include "../../../jdct.h"
-#include "../../../jsimddct.h"
+#include "../../../src/jinclude.h"
+#include "../../../src/jpeglib.h"
+#include "../../../src/jsimd.h"
+#include "../../../src/jdct.h"
+#include "../../../src/jsimddct.h"
 #include "../../jsimd.h"
-#include "jconfigint.h"
 
 #include <ctype.h>
 
@@ -31,10 +31,10 @@
 #define JSIMD_FASTST3  2
 #define JSIMD_FASTTBL  4
 
-static unsigned int simd_support = ~0;
-static unsigned int simd_huffman = 1;
-static unsigned int simd_features = JSIMD_FASTLD3 | JSIMD_FASTST3 |
-                                    JSIMD_FASTTBL;
+static THREAD_LOCAL unsigned int simd_support = ~0;
+static THREAD_LOCAL unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_features = JSIMD_FASTLD3 |
+                                                 JSIMD_FASTST3 | JSIMD_FASTTBL;
 
 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
 
@@ -109,8 +109,6 @@ parse_proc_cpuinfo(int bufsize)
 
 /*
  * Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
  */
 
 /*
@@ -1021,7 +1019,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
 GLOBAL(void)
 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
                                   const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
+                                  int Al, UJCOEF *values, size_t *zerobits)
 {
   jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
                                          Sl, Al, values, zerobits);
@@ -1048,7 +1046,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
 GLOBAL(int)
 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
                                    const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
+                                   int Al, UJCOEF *absvalues, size_t *bits)
 {
   return jsimd_encode_mcu_AC_refine_prepare_neon(block,
                                                  jpeg_natural_order_start,
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jccolor-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jccolor-neon.c
index 9fcc62dd25c..d14a7bf5018 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jccolor-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jccolor-neon.c
@@ -2,7 +2,7 @@
  * jccolor-neon.c - colorspace conversion (Arm Neon)
  *
  * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
- * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -22,11 +22,11 @@
  */
 
 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
 #include "neon-compat.h"
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jcgray-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jcgray-neon.c
index 71c7b2de218..fbcf8214057 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jcgray-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jcgray-neon.c
@@ -2,6 +2,7 @@
  * jcgray-neon.c - grayscale colorspace conversion (Arm Neon)
  *
  * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -21,13 +22,14 @@
  */
 
 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
+#include "neon-compat.h"
 
 #include <arm_neon.h>
 
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jcphuff-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jcphuff-neon.c
index b91c5db478a..435f96ee968 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jcphuff-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jcphuff-neon.c
@@ -2,6 +2,8 @@
  * jcphuff-neon.c - prepare data for progressive Huffman encoding (Arm Neon)
  *
  * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2022, Matthieu Darbois.  All Rights Reserved.
+ * Copyright (C) 2022, 2024, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -21,12 +23,11 @@
  */
 
 #define JPEG_INTERNALS
-#include "jconfigint.h"
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "neon-compat.h"
 
@@ -41,10 +42,10 @@
 
 void jsimd_encode_mcu_AC_first_prepare_neon
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *values, size_t *zerobits)
+   UJCOEF *values, size_t *zerobits)
 {
-  JCOEF *values_ptr = values;
-  JCOEF *diff_values_ptr = values + DCTSIZE2;
+  UJCOEF *values_ptr = values;
+  UJCOEF *diff_values_ptr = values + DCTSIZE2;
 
   /* Rows of coefficients to zero (since they haven't been processed) */
   int i, rows_to_zero = 8;
@@ -68,23 +69,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon
     coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[15], coefs2, 7);
 
     /* Isolate sign of coefficients. */
-    int16x8_t sign_coefs1 = vshrq_n_s16(coefs1, 15);
-    int16x8_t sign_coefs2 = vshrq_n_s16(coefs2, 15);
+    uint16x8_t sign_coefs1 = vreinterpretq_u16_s16(vshrq_n_s16(coefs1, 15));
+    uint16x8_t sign_coefs2 = vreinterpretq_u16_s16(vshrq_n_s16(coefs2, 15));
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs1 = vabsq_s16(coefs1);
-    int16x8_t abs_coefs2 = vabsq_s16(coefs2);
-    coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
-    coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
+    uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
+    uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
+    abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
+    abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
 
     /* Compute diff values. */
-    int16x8_t diff1 = veorq_s16(coefs1, sign_coefs1);
-    int16x8_t diff2 = veorq_s16(coefs2, sign_coefs2);
+    uint16x8_t diff1 = veorq_u16(abs_coefs1, sign_coefs1);
+    uint16x8_t diff2 = veorq_u16(abs_coefs2, sign_coefs2);
 
     /* Store transformed coefficients and diff values. */
-    vst1q_s16(values_ptr, coefs1);
-    vst1q_s16(values_ptr + DCTSIZE, coefs2);
-    vst1q_s16(diff_values_ptr, diff1);
-    vst1q_s16(diff_values_ptr + DCTSIZE, diff2);
+    vst1q_u16(values_ptr, abs_coefs1);
+    vst1q_u16(values_ptr + DCTSIZE, abs_coefs2);
+    vst1q_u16(diff_values_ptr, diff1);
+    vst1q_u16(diff_values_ptr + DCTSIZE, diff2);
     values_ptr += 16;
     diff_values_ptr += 16;
     jpeg_natural_order_start += 16;
@@ -130,23 +131,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon
     }
 
     /* Isolate sign of coefficients. */
-    int16x8_t sign_coefs1 = vshrq_n_s16(coefs1, 15);
-    int16x8_t sign_coefs2 = vshrq_n_s16(coefs2, 15);
+    uint16x8_t sign_coefs1 = vreinterpretq_u16_s16(vshrq_n_s16(coefs1, 15));
+    uint16x8_t sign_coefs2 = vreinterpretq_u16_s16(vshrq_n_s16(coefs2, 15));
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs1 = vabsq_s16(coefs1);
-    int16x8_t abs_coefs2 = vabsq_s16(coefs2);
-    coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
-    coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
+    uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
+    uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
+    abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
+    abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
 
     /* Compute diff values. */
-    int16x8_t diff1 = veorq_s16(coefs1, sign_coefs1);
-    int16x8_t diff2 = veorq_s16(coefs2, sign_coefs2);
+    uint16x8_t diff1 = veorq_u16(abs_coefs1, sign_coefs1);
+    uint16x8_t diff2 = veorq_u16(abs_coefs2, sign_coefs2);
 
     /* Store transformed coefficients and diff values. */
-    vst1q_s16(values_ptr, coefs1);
-    vst1q_s16(values_ptr + DCTSIZE, coefs2);
-    vst1q_s16(diff_values_ptr, diff1);
-    vst1q_s16(diff_values_ptr + DCTSIZE, diff2);
+    vst1q_u16(values_ptr, abs_coefs1);
+    vst1q_u16(values_ptr + DCTSIZE, abs_coefs2);
+    vst1q_u16(diff_values_ptr, diff1);
+    vst1q_u16(diff_values_ptr + DCTSIZE, diff2);
     values_ptr += 16;
     diff_values_ptr += 16;
     rows_to_zero -= 2;
@@ -184,17 +185,17 @@ void jsimd_encode_mcu_AC_first_prepare_neon
     }
 
     /* Isolate sign of coefficients. */
-    int16x8_t sign_coefs = vshrq_n_s16(coefs, 15);
+    uint16x8_t sign_coefs = vreinterpretq_u16_s16(vshrq_n_s16(coefs, 15));
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs = vabsq_s16(coefs);
-    coefs = vshlq_s16(abs_coefs, vdupq_n_s16(-Al));
+    uint16x8_t abs_coefs = vreinterpretq_u16_s16(vabsq_s16(coefs));
+    abs_coefs = vshlq_u16(abs_coefs, vdupq_n_s16(-Al));
 
     /* Compute diff values. */
-    int16x8_t diff = veorq_s16(coefs, sign_coefs);
+    uint16x8_t diff = veorq_u16(abs_coefs, sign_coefs);
 
     /* Store transformed coefficients and diff values. */
-    vst1q_s16(values_ptr, coefs);
-    vst1q_s16(diff_values_ptr, diff);
+    vst1q_u16(values_ptr, abs_coefs);
+    vst1q_u16(diff_values_ptr, diff);
     values_ptr += 8;
     diff_values_ptr += 8;
     rows_to_zero--;
@@ -202,8 +203,8 @@ void jsimd_encode_mcu_AC_first_prepare_neon
 
   /* Zero remaining memory in the values and diff_values blocks. */
   for (i = 0; i < rows_to_zero; i++) {
-    vst1q_s16(values_ptr, vdupq_n_s16(0));
-    vst1q_s16(diff_values_ptr, vdupq_n_s16(0));
+    vst1q_u16(values_ptr, vdupq_n_u16(0));
+    vst1q_u16(diff_values_ptr, vdupq_n_u16(0));
     values_ptr += 8;
     diff_values_ptr += 8;
   }
@@ -211,23 +212,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon
   /* Construct zerobits bitmap.  A set bit means that the corresponding
    * coefficient != 0.
    */
-  int16x8_t row0 = vld1q_s16(values + 0 * DCTSIZE);
-  int16x8_t row1 = vld1q_s16(values + 1 * DCTSIZE);
-  int16x8_t row2 = vld1q_s16(values + 2 * DCTSIZE);
-  int16x8_t row3 = vld1q_s16(values + 3 * DCTSIZE);
-  int16x8_t row4 = vld1q_s16(values + 4 * DCTSIZE);
-  int16x8_t row5 = vld1q_s16(values + 5 * DCTSIZE);
-  int16x8_t row6 = vld1q_s16(values + 6 * DCTSIZE);
-  int16x8_t row7 = vld1q_s16(values + 7 * DCTSIZE);
-
-  uint8x8_t row0_eq0 = vmovn_u16(vceqq_s16(row0, vdupq_n_s16(0)));
-  uint8x8_t row1_eq0 = vmovn_u16(vceqq_s16(row1, vdupq_n_s16(0)));
-  uint8x8_t row2_eq0 = vmovn_u16(vceqq_s16(row2, vdupq_n_s16(0)));
-  uint8x8_t row3_eq0 = vmovn_u16(vceqq_s16(row3, vdupq_n_s16(0)));
-  uint8x8_t row4_eq0 = vmovn_u16(vceqq_s16(row4, vdupq_n_s16(0)));
-  uint8x8_t row5_eq0 = vmovn_u16(vceqq_s16(row5, vdupq_n_s16(0)));
-  uint8x8_t row6_eq0 = vmovn_u16(vceqq_s16(row6, vdupq_n_s16(0)));
-  uint8x8_t row7_eq0 = vmovn_u16(vceqq_s16(row7, vdupq_n_s16(0)));
+  uint16x8_t row0 = vld1q_u16(values + 0 * DCTSIZE);
+  uint16x8_t row1 = vld1q_u16(values + 1 * DCTSIZE);
+  uint16x8_t row2 = vld1q_u16(values + 2 * DCTSIZE);
+  uint16x8_t row3 = vld1q_u16(values + 3 * DCTSIZE);
+  uint16x8_t row4 = vld1q_u16(values + 4 * DCTSIZE);
+  uint16x8_t row5 = vld1q_u16(values + 5 * DCTSIZE);
+  uint16x8_t row6 = vld1q_u16(values + 6 * DCTSIZE);
+  uint16x8_t row7 = vld1q_u16(values + 7 * DCTSIZE);
+
+  uint8x8_t row0_eq0 = vmovn_u16(vceqq_u16(row0, vdupq_n_u16(0)));
+  uint8x8_t row1_eq0 = vmovn_u16(vceqq_u16(row1, vdupq_n_u16(0)));
+  uint8x8_t row2_eq0 = vmovn_u16(vceqq_u16(row2, vdupq_n_u16(0)));
+  uint8x8_t row3_eq0 = vmovn_u16(vceqq_u16(row3, vdupq_n_u16(0)));
+  uint8x8_t row4_eq0 = vmovn_u16(vceqq_u16(row4, vdupq_n_u16(0)));
+  uint8x8_t row5_eq0 = vmovn_u16(vceqq_u16(row5, vdupq_n_u16(0)));
+  uint8x8_t row6_eq0 = vmovn_u16(vceqq_u16(row6, vdupq_n_u16(0)));
+  uint8x8_t row7_eq0 = vmovn_u16(vceqq_u16(row7, vdupq_n_u16(0)));
 
   /* { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } */
   const uint8x8_t bitmap_mask =
@@ -274,7 +275,7 @@ void jsimd_encode_mcu_AC_first_prepare_neon
 
 int jsimd_encode_mcu_AC_refine_prepare_neon
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *absvalues, size_t *bits)
+   UJCOEF *absvalues, size_t *bits)
 {
   /* Temporary storage buffers for data used to compute the signbits bitmap and
    * the end-of-block (EOB) position
@@ -282,7 +283,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
   uint8_t coef_sign_bits[64];
   uint8_t coef_eq1_bits[64];
 
-  JCOEF *absvalues_ptr = absvalues;
+  UJCOEF *absvalues_ptr = absvalues;
   uint8_t *coef_sign_bits_ptr = coef_sign_bits;
   uint8_t *eq1_bits_ptr = coef_eq1_bits;
 
@@ -316,18 +317,18 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
     vst1_u8(coef_sign_bits_ptr + DCTSIZE, sign_coefs2);
 
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs1 = vabsq_s16(coefs1);
-    int16x8_t abs_coefs2 = vabsq_s16(coefs2);
-    coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
-    coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
-    vst1q_s16(absvalues_ptr, coefs1);
-    vst1q_s16(absvalues_ptr + DCTSIZE, coefs2);
+    uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
+    uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
+    abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
+    abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
+    vst1q_u16(absvalues_ptr, abs_coefs1);
+    vst1q_u16(absvalues_ptr + DCTSIZE, abs_coefs2);
 
     /* Test whether transformed coefficient values == 1 (used to find EOB
      * position.)
      */
-    uint8x8_t coefs_eq11 = vmovn_u16(vceqq_s16(coefs1, vdupq_n_s16(1)));
-    uint8x8_t coefs_eq12 = vmovn_u16(vceqq_s16(coefs2, vdupq_n_s16(1)));
+    uint8x8_t coefs_eq11 = vmovn_u16(vceqq_u16(abs_coefs1, vdupq_n_u16(1)));
+    uint8x8_t coefs_eq12 = vmovn_u16(vceqq_u16(abs_coefs2, vdupq_n_u16(1)));
     vst1_u8(eq1_bits_ptr, coefs_eq11);
     vst1_u8(eq1_bits_ptr + DCTSIZE, coefs_eq12);
 
@@ -385,18 +386,18 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
     vst1_u8(coef_sign_bits_ptr + DCTSIZE, sign_coefs2);
 
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs1 = vabsq_s16(coefs1);
-    int16x8_t abs_coefs2 = vabsq_s16(coefs2);
-    coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
-    coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
-    vst1q_s16(absvalues_ptr, coefs1);
-    vst1q_s16(absvalues_ptr + DCTSIZE, coefs2);
+    uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
+    uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
+    abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
+    abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
+    vst1q_u16(absvalues_ptr, abs_coefs1);
+    vst1q_u16(absvalues_ptr + DCTSIZE, abs_coefs2);
 
     /* Test whether transformed coefficient values == 1 (used to find EOB
      * position.)
      */
-    uint8x8_t coefs_eq11 = vmovn_u16(vceqq_s16(coefs1, vdupq_n_s16(1)));
-    uint8x8_t coefs_eq12 = vmovn_u16(vceqq_s16(coefs2, vdupq_n_s16(1)));
+    uint8x8_t coefs_eq11 = vmovn_u16(vceqq_u16(abs_coefs1, vdupq_n_u16(1)));
+    uint8x8_t coefs_eq12 = vmovn_u16(vceqq_u16(abs_coefs2, vdupq_n_u16(1)));
     vst1_u8(eq1_bits_ptr, coefs_eq11);
     vst1_u8(eq1_bits_ptr + DCTSIZE, coefs_eq12);
 
@@ -444,14 +445,14 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
     vst1_u8(coef_sign_bits_ptr, sign_coefs);
 
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs = vabsq_s16(coefs);
-    coefs = vshlq_s16(abs_coefs, vdupq_n_s16(-Al));
-    vst1q_s16(absvalues_ptr, coefs);
+    uint16x8_t abs_coefs = vreinterpretq_u16_s16(vabsq_s16(coefs));
+    abs_coefs = vshlq_u16(abs_coefs, vdupq_n_s16(-Al));
+    vst1q_u16(absvalues_ptr, abs_coefs);
 
     /* Test whether transformed coefficient values == 1 (used to find EOB
      * position.)
      */
-    uint8x8_t coefs_eq1 = vmovn_u16(vceqq_s16(coefs, vdupq_n_s16(1)));
+    uint8x8_t coefs_eq1 = vmovn_u16(vceqq_u16(abs_coefs, vdupq_n_u16(1)));
     vst1_u8(eq1_bits_ptr, coefs_eq1);
 
     absvalues_ptr += 8;
@@ -462,7 +463,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
 
   /* Zero remaining memory in blocks. */
   for (i = 0; i < rows_to_zero; i++) {
-    vst1q_s16(absvalues_ptr, vdupq_n_s16(0));
+    vst1q_u16(absvalues_ptr, vdupq_n_u16(0));
     vst1_u8(coef_sign_bits_ptr, vdup_n_u8(0));
     vst1_u8(eq1_bits_ptr, vdup_n_u8(0));
     absvalues_ptr += 8;
@@ -471,23 +472,23 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
   }
 
   /* Construct zerobits bitmap. */
-  int16x8_t abs_row0 = vld1q_s16(absvalues + 0 * DCTSIZE);
-  int16x8_t abs_row1 = vld1q_s16(absvalues + 1 * DCTSIZE);
-  int16x8_t abs_row2 = vld1q_s16(absvalues + 2 * DCTSIZE);
-  int16x8_t abs_row3 = vld1q_s16(absvalues + 3 * DCTSIZE);
-  int16x8_t abs_row4 = vld1q_s16(absvalues + 4 * DCTSIZE);
-  int16x8_t abs_row5 = vld1q_s16(absvalues + 5 * DCTSIZE);
-  int16x8_t abs_row6 = vld1q_s16(absvalues + 6 * DCTSIZE);
-  int16x8_t abs_row7 = vld1q_s16(absvalues + 7 * DCTSIZE);
-
-  uint8x8_t abs_row0_eq0 = vmovn_u16(vceqq_s16(abs_row0, vdupq_n_s16(0)));
-  uint8x8_t abs_row1_eq0 = vmovn_u16(vceqq_s16(abs_row1, vdupq_n_s16(0)));
-  uint8x8_t abs_row2_eq0 = vmovn_u16(vceqq_s16(abs_row2, vdupq_n_s16(0)));
-  uint8x8_t abs_row3_eq0 = vmovn_u16(vceqq_s16(abs_row3, vdupq_n_s16(0)));
-  uint8x8_t abs_row4_eq0 = vmovn_u16(vceqq_s16(abs_row4, vdupq_n_s16(0)));
-  uint8x8_t abs_row5_eq0 = vmovn_u16(vceqq_s16(abs_row5, vdupq_n_s16(0)));
-  uint8x8_t abs_row6_eq0 = vmovn_u16(vceqq_s16(abs_row6, vdupq_n_s16(0)));
-  uint8x8_t abs_row7_eq0 = vmovn_u16(vceqq_s16(abs_row7, vdupq_n_s16(0)));
+  uint16x8_t abs_row0 = vld1q_u16(absvalues + 0 * DCTSIZE);
+  uint16x8_t abs_row1 = vld1q_u16(absvalues + 1 * DCTSIZE);
+  uint16x8_t abs_row2 = vld1q_u16(absvalues + 2 * DCTSIZE);
+  uint16x8_t abs_row3 = vld1q_u16(absvalues + 3 * DCTSIZE);
+  uint16x8_t abs_row4 = vld1q_u16(absvalues + 4 * DCTSIZE);
+  uint16x8_t abs_row5 = vld1q_u16(absvalues + 5 * DCTSIZE);
+  uint16x8_t abs_row6 = vld1q_u16(absvalues + 6 * DCTSIZE);
+  uint16x8_t abs_row7 = vld1q_u16(absvalues + 7 * DCTSIZE);
+
+  uint8x8_t abs_row0_eq0 = vmovn_u16(vceqq_u16(abs_row0, vdupq_n_u16(0)));
+  uint8x8_t abs_row1_eq0 = vmovn_u16(vceqq_u16(abs_row1, vdupq_n_u16(0)));
+  uint8x8_t abs_row2_eq0 = vmovn_u16(vceqq_u16(abs_row2, vdupq_n_u16(0)));
+  uint8x8_t abs_row3_eq0 = vmovn_u16(vceqq_u16(abs_row3, vdupq_n_u16(0)));
+  uint8x8_t abs_row4_eq0 = vmovn_u16(vceqq_u16(abs_row4, vdupq_n_u16(0)));
+  uint8x8_t abs_row5_eq0 = vmovn_u16(vceqq_u16(abs_row5, vdupq_n_u16(0)));
+  uint8x8_t abs_row6_eq0 = vmovn_u16(vceqq_u16(abs_row6, vdupq_n_u16(0)));
+  uint8x8_t abs_row7_eq0 = vmovn_u16(vceqq_u16(abs_row7, vdupq_n_u16(0)));
 
   /* { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } */
   const uint8x8_t bitmap_mask =
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jcsample-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jcsample-neon.c
index 8a3e237838e..fd8a93e520b 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jcsample-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jcsample-neon.c
@@ -2,6 +2,7 @@
  * jcsample-neon.c - downsampling (Arm Neon)
  *
  * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -21,13 +22,14 @@
  */
 
 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
+#include "neon-compat.h"
 
 #include <arm_neon.h>
 
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jdcolor-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jdcolor-neon.c
index ea4668f1d30..97bb02a1ed7 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jdcolor-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jdcolor-neon.c
@@ -2,6 +2,7 @@
  * jdcolor-neon.c - colorspace conversion (Arm Neon)
  *
  * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -21,14 +22,14 @@
  */
 
 #define JPEG_INTERNALS
-#include "jconfigint.h"
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
+#include "neon-compat.h"
 
 #include <arm_neon.h>
 
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jdmerge-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jdmerge-neon.c
index e4f91fdc0ef..95e6d32830c 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jdmerge-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jdmerge-neon.c
@@ -2,6 +2,7 @@
  * jdmerge-neon.c - merged upsampling/color conversion (Arm Neon)
  *
  * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -21,14 +22,14 @@
  */
 
 #define JPEG_INTERNALS
-#include "jconfigint.h"
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
+#include "neon-compat.h"
 
 #include <arm_neon.h>
 
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jdsample-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jdsample-neon.c
index 90ec6782c47..a130b1a9581 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jdsample-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jdsample-neon.c
@@ -2,7 +2,7 @@
  * jdsample-neon.c - upsampling (Arm Neon)
  *
  * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
- * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -22,12 +22,13 @@
  */
 
 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
+#include "neon-compat.h"
 
 #include <arm_neon.h>
 
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jfdctfst-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jfdctfst-neon.c
index bb371be3999..d6109f11d34 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jfdctfst-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jfdctfst-neon.c
@@ -2,6 +2,7 @@
  * jfdctfst-neon.c - fast integer FDCT (Arm Neon)
  *
  * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -21,13 +22,14 @@
  */
 
 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
+#include "neon-compat.h"
 
 #include <arm_neon.h>
 
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jfdctint-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jfdctint-neon.c
index ccfc07b15d9..bb290ea45d2 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jfdctint-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jfdctint-neon.c
@@ -2,7 +2,7 @@
  * jfdctint-neon.c - accurate integer FDCT (Arm Neon)
  *
  * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
- * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -22,11 +22,11 @@
  */
 
 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
 #include "neon-compat.h"
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jidctfst-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jidctfst-neon.c
index a91be5362eb..e789125344a 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jidctfst-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jidctfst-neon.c
@@ -2,6 +2,7 @@
  * jidctfst-neon.c - fast integer IDCT (Arm Neon)
  *
  * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -21,13 +22,14 @@
  */
 
 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
+#include "neon-compat.h"
 
 #include <arm_neon.h>
 
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jidctint-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jidctint-neon.c
index 043b652e6c5..709e0eaf4e9 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jidctint-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jidctint-neon.c
@@ -2,7 +2,7 @@
  * jidctint-neon.c - accurate integer IDCT (Arm Neon)
  *
  * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
- * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -22,12 +22,11 @@
  */
 
 #define JPEG_INTERNALS
-#include "jconfigint.h"
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
 #include "neon-compat.h"
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jidctred-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jidctred-neon.c
index be9627e61d4..25b1addc6a9 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jidctred-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jidctred-neon.c
@@ -2,7 +2,7 @@
  * jidctred-neon.c - reduced-size IDCT (Arm Neon)
  *
  * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
- * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -22,11 +22,11 @@
  */
 
 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
 #include "align.h"
 #include "neon-compat.h"
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/jquanti-neon.c b/contrib/libs/libjpeg-turbo/simd/arm/jquanti-neon.c
index d5d95d89f67..e44fb3d4131 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/jquanti-neon.c
+++ b/contrib/libs/libjpeg-turbo/simd/arm/jquanti-neon.c
@@ -2,6 +2,7 @@
  * jquanti-neon.c - sample data conversion and quantization (Arm Neon)
  *
  * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2024, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -21,12 +22,13 @@
  */
 
 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
+#include "neon-compat.h"
 
 #include <arm_neon.h>
 
diff --git a/contrib/libs/libjpeg-turbo/simd/arm/neon-compat.h b/contrib/libs/libjpeg-turbo/simd/arm/neon-compat.h
index 069c62d8290..992aa5a4f97 100644
--- a/contrib/libs/libjpeg-turbo/simd/arm/neon-compat.h
+++ b/contrib/libs/libjpeg-turbo/simd/arm/neon-compat.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2020, 2024, D. R. Commander.  All Rights Reserved.
  * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
@@ -35,3 +35,11 @@
 #else
 #error "Unknown compiler"
 #endif
+
+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Wdeclaration-after-statement"
+#pragma clang diagnostic ignored "-Wc99-extensions"
+#elif defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wdeclaration-after-statement"
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jccolext-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jccolext-avx2.asm
index c46d684436d..28ac9528079 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jccolext-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jccolext-avx2.asm
@@ -2,17 +2,13 @@
 ; jccolext.asm - colorspace conversion (AVX2)
 ;
 ; Copyright (C) 2015, Intel Corporation.
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -49,15 +45,15 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [img_width(eax)]
     test        ecx, ecx
@@ -80,9 +76,9 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
-    pushpic     eax
+    PUSHPIC     eax
     push        edx
     push        ebx
     push        edi
@@ -93,11 +89,11 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
     mov         edi, JSAMPROW [edi]     ; outptr0
     mov         ebx, JSAMPROW [ebx]     ; outptr1
     mov         edx, JSAMPROW [edx]     ; outptr2
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     cmp         ecx, byte SIZEOF_YMMWORD
     jae         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 %if RGB_PIXELSIZE == 3  ; ---------------
 
@@ -154,7 +150,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
     vmovdqu     ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
     vmovdqu     ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
     jmp         short .rgb_ycc_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     vmovdqu     ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -278,7 +274,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
     vmovdqu     ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
     vmovdqu     ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
     jmp         short .rgb_ycc_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     vmovdqu     ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -552,7 +548,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
     pop         edi
     pop         ebx
     pop         edx
-    poppic      eax
+    POPPIC      eax
 
     add         esi, byte SIZEOF_JSAMPROW  ; input_buf
     add         edi, byte SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jccolext-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jccolext-mmx.asm
index 6357a42b2cf..44b62512e91 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jccolext-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jccolext-mmx.asm
@@ -2,17 +2,13 @@
 ; jccolext.asm - colorspace conversion (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -49,15 +45,15 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [img_width(eax)]  ; num_cols
     test        ecx, ecx
@@ -80,9 +76,9 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
-    pushpic     eax
+    PUSHPIC     eax
     push        edx
     push        ebx
     push        edi
@@ -93,11 +89,11 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
     mov         edi, JSAMPROW [edi]     ; outptr0
     mov         ebx, JSAMPROW [ebx]     ; outptr1
     mov         edx, JSAMPROW [edx]     ; outptr2
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     cmp         ecx, byte SIZEOF_MMWORD
     jae         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 %if RGB_PIXELSIZE == 3  ; ---------------
 
@@ -143,7 +139,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
     movq        mmA, MMWORD [esi+0*SIZEOF_MMWORD]
     movq        mmG, MMWORD [esi+1*SIZEOF_MMWORD]
     jmp         short .rgb_ycc_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movq        mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -211,7 +207,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
     movq        mmA, MMWORD [esi+0*SIZEOF_MMWORD]
     movq        mmF, MMWORD [esi+1*SIZEOF_MMWORD]
     jmp         short .rgb_ycc_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movq        mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -449,7 +445,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
     pop         edi
     pop         ebx
     pop         edx
-    poppic      eax
+    POPPIC      eax
 
     add         esi, byte SIZEOF_JSAMPROW  ; input_buf
     add         edi, byte SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jccolext-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jccolext-sse2.asm
index c6c80852ac5..1d8d5f5a205 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jccolext-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jccolext-sse2.asm
@@ -1,17 +1,13 @@
 ;
 ; jccolext.asm - colorspace conversion (SSE2)
 ;
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -48,15 +44,15 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [img_width(eax)]
     test        ecx, ecx
@@ -79,9 +75,9 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
-    pushpic     eax
+    PUSHPIC     eax
     push        edx
     push        ebx
     push        edi
@@ -92,11 +88,11 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
     mov         edi, JSAMPROW [edi]     ; outptr0
     mov         ebx, JSAMPROW [ebx]     ; outptr1
     mov         edx, JSAMPROW [edx]     ; outptr2
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     cmp         ecx, byte SIZEOF_XMMWORD
     jae         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 %if RGB_PIXELSIZE == 3  ; ---------------
 
@@ -147,7 +143,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
     movdqu      xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
     movdqu      xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
     jmp         short .rgb_ycc_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movdqu      xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -232,7 +228,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
     movdqu      xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
     movdqu      xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
     jmp         short .rgb_ycc_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movdqu      xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -478,7 +474,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
     pop         edi
     pop         ebx
     pop         edx
-    poppic      eax
+    POPPIC      eax
 
     add         esi, byte SIZEOF_JSAMPROW  ; input_buf
     add         edi, byte SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jccolor-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jccolor-avx2.asm
index 14944e952f1..9ad5ea95f80 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jccolor-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jccolor-avx2.asm
@@ -1,18 +1,14 @@
 ;
 ; jccolor.asm - colorspace conversion (AVX2)
 ;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -33,7 +29,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_ycc_convert_avx2)
 
 EXTN(jconst_rgb_ycc_convert_avx2):
@@ -46,7 +42,7 @@ PD_ONEHALFM1_CJ times 8 dd  (1 << (SCALEBITS - 1)) - 1 + \
                             (CENTERJSAMPLE << SCALEBITS)
 PD_ONEHALF      times 8 dd  (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jccolor-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jccolor-mmx.asm
index 8cb399bdc43..0dbec54817e 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jccolor-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jccolor-mmx.asm
@@ -2,17 +2,13 @@
 ; jccolor.asm - colorspace conversion (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -33,7 +29,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_ycc_convert_mmx)
 
 EXTN(jconst_rgb_ycc_convert_mmx):
@@ -46,7 +42,7 @@ PD_ONEHALFM1_CJ times 2 dd  (1 << (SCALEBITS - 1)) - 1 + \
                             (CENTERJSAMPLE << SCALEBITS)
 PD_ONEHALF      times 2 dd  (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jccolor-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jccolor-sse2.asm
index 686d222ff70..678306a10c3 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jccolor-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jccolor-sse2.asm
@@ -1,17 +1,13 @@
 ;
 ; jccolor.asm - colorspace conversion (SSE2)
 ;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -32,7 +28,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_ycc_convert_sse2)
 
 EXTN(jconst_rgb_ycc_convert_sse2):
@@ -45,7 +41,7 @@ PD_ONEHALFM1_CJ times 4 dd  (1 << (SCALEBITS - 1)) - 1 + \
                             (CENTERJSAMPLE << SCALEBITS)
 PD_ONEHALF      times 4 dd  (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcgray-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcgray-avx2.asm
index 560ee0c71e2..ded39567df2 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcgray-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcgray-avx2.asm
@@ -1,18 +1,14 @@
 ;
 ; jcgray.asm - grayscale colorspace conversion (AVX2)
 ;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -29,7 +25,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_gray_convert_avx2)
 
 EXTN(jconst_rgb_gray_convert_avx2):
@@ -38,7 +34,7 @@ PW_F0299_F0337 times 8 dw F_0_299, F_0_337
 PW_F0114_F0250 times 8 dw F_0_114, F_0_250
 PD_ONEHALF     times 8 dd (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcgray-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcgray-mmx.asm
index 79fdf082a84..d6f031869a0 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcgray-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcgray-mmx.asm
@@ -2,17 +2,13 @@
 ; jcgray.asm - grayscale colorspace conversion (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -29,7 +25,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_gray_convert_mmx)
 
 EXTN(jconst_rgb_gray_convert_mmx):
@@ -38,7 +34,7 @@ PW_F0299_F0337 times 2 dw F_0_299, F_0_337
 PW_F0114_F0250 times 2 dw F_0_114, F_0_250
 PD_ONEHALF     times 2 dd (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcgray-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcgray-sse2.asm
index cb4b28e8f49..ecc7fa08abb 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcgray-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcgray-sse2.asm
@@ -1,17 +1,13 @@
 ;
 ; jcgray.asm - grayscale colorspace conversion (SSE2)
 ;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -28,7 +24,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_gray_convert_sse2)
 
 EXTN(jconst_rgb_gray_convert_sse2):
@@ -37,7 +33,7 @@ PW_F0299_F0337 times 4 dw F_0_299, F_0_337
 PW_F0114_F0250 times 4 dw F_0_114, F_0_250
 PD_ONEHALF     times 4 dd (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-avx2.asm
index 3fa7973d72b..70df8f80ba4 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-avx2.asm
@@ -1,18 +1,14 @@
 ;
 ; jcgryext.asm - grayscale colorspace conversion (AVX2)
 ;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -49,15 +45,15 @@ EXTN(jsimd_rgb_gray_convert_avx2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [img_width(eax)]
     test        ecx, ecx
@@ -76,20 +72,20 @@ EXTN(jsimd_rgb_gray_convert_avx2):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
-    pushpic     eax
+    PUSHPIC     eax
     push        edi
     push        esi
     push        ecx                     ; col
 
     mov         esi, JSAMPROW [esi]     ; inptr
     mov         edi, JSAMPROW [edi]     ; outptr0
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     cmp         ecx, byte SIZEOF_YMMWORD
     jae         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 %if RGB_PIXELSIZE == 3  ; ---------------
 
@@ -146,7 +142,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
     vmovdqu     ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
     vmovdqu     ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
     jmp         short .rgb_gray_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     vmovdqu     ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -270,7 +266,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
     vmovdqu     ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
     vmovdqu     ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
     jmp         short .rgb_gray_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     vmovdqu     ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -433,7 +429,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
     pop         ecx                     ; col
     pop         esi
     pop         edi
-    poppic      eax
+    POPPIC      eax
 
     add         esi, byte SIZEOF_JSAMPROW  ; input_buf
     add         edi, byte SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-mmx.asm
index 8af42e5a332..dd90c3dfb08 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-mmx.asm
@@ -2,17 +2,13 @@
 ; jcgryext.asm - grayscale colorspace conversion (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -49,15 +45,15 @@ EXTN(jsimd_rgb_gray_convert_mmx):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [img_width(eax)]  ; num_cols
     test        ecx, ecx
@@ -76,20 +72,20 @@ EXTN(jsimd_rgb_gray_convert_mmx):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
-    pushpic     eax
+    PUSHPIC     eax
     push        edi
     push        esi
     push        ecx                     ; col
 
     mov         esi, JSAMPROW [esi]     ; inptr
     mov         edi, JSAMPROW [edi]     ; outptr0
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     cmp         ecx, byte SIZEOF_MMWORD
     jae         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 %if RGB_PIXELSIZE == 3  ; ---------------
 
@@ -135,7 +131,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
     movq        mmA, MMWORD [esi+0*SIZEOF_MMWORD]
     movq        mmG, MMWORD [esi+1*SIZEOF_MMWORD]
     jmp         short .rgb_gray_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movq        mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -203,7 +199,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
     movq        mmA, MMWORD [esi+0*SIZEOF_MMWORD]
     movq        mmF, MMWORD [esi+1*SIZEOF_MMWORD]
     jmp         short .rgb_gray_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movq        mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -330,7 +326,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
     pop         ecx                     ; col
     pop         esi
     pop         edi
-    poppic      eax
+    POPPIC      eax
 
     add         esi, byte SIZEOF_JSAMPROW  ; input_buf
     add         edi, byte SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-sse2.asm
index c9d6ff1e351..227295f3072 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcgryext-sse2.asm
@@ -1,17 +1,13 @@
 ;
 ; jcgryext.asm - grayscale colorspace conversion (SSE2)
 ;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -48,15 +44,15 @@ EXTN(jsimd_rgb_gray_convert_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [img_width(eax)]
     test        ecx, ecx
@@ -75,20 +71,20 @@ EXTN(jsimd_rgb_gray_convert_sse2):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
-    pushpic     eax
+    PUSHPIC     eax
     push        edi
     push        esi
     push        ecx                     ; col
 
     mov         esi, JSAMPROW [esi]     ; inptr
     mov         edi, JSAMPROW [edi]     ; outptr0
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     cmp         ecx, byte SIZEOF_XMMWORD
     jae         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 %if RGB_PIXELSIZE == 3  ; ---------------
 
@@ -139,7 +135,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
     movdqu      xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
     movdqu      xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
     jmp         short .rgb_gray_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movdqu      xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -224,7 +220,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
     movdqu      xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
     movdqu      xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
     jmp         short .rgb_gray_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movdqu      xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -359,7 +355,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
     pop         ecx                     ; col
     pop         esi
     pop         edi
-    poppic      eax
+    POPPIC      eax
 
     add         esi, byte SIZEOF_JSAMPROW  ; input_buf
     add         edi, byte SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jchuff-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jchuff-sse2.asm
index 278cf5e83af..ed194dd383d 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jchuff-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jchuff-sse2.asm
@@ -1,7 +1,7 @@
 ;
 ; jchuff-sse2.asm - Huffman entropy encoding (SSE2)
 ;
-; Copyright (C) 2009-2011, 2014-2017, 2019, D. R. Commander.
+; Copyright (C) 2009-2011, 2014-2017, 2019, 2024, D. R. Commander.
 ; Copyright (C) 2015, Matthieu Darbois.
 ; Copyright (C) 2018, Matthias Räncker.
 ;
@@ -9,11 +9,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains an SSE2 implementation for Huffman coding of one block.
 ; The following code is based on jchuff.c; see jchuff.c for more details.
@@ -42,7 +38,7 @@ endstruc
 
 EXTN(jconst_huff_encode_one_block):
 
-    alignz      32
+    ALIGNZ      32
 
 jpeg_mask_bits dq 0x0000, 0x0001, 0x0003, 0x0007
                dq 0x000f, 0x001f, 0x003f, 0x007f
@@ -65,7 +61,8 @@ times 1 <<  2 db  3
 times 1 <<  1 db  2
 times 1 <<  0 db  1
 times 1       db  0
-jpeg_nbits_table:
+GLOBAL_DATA(jpeg_nbits_table)
+EXTN(jpeg_nbits_table):
 times 1       db  0
 times 1 <<  0 db  1
 times 1 <<  1 db  2
@@ -83,14 +80,14 @@ times 1 << 12 db 13
 times 1 << 13 db 14
 times 1 << 14 db 15
 
-    alignz      32
+    ALIGNZ      32
 
 %ifdef PIC
 %define NBITS(x)      nbits_base + x
 %else
-%define NBITS(x)      jpeg_nbits_table + x
+%define NBITS(x)      EXTN(jpeg_nbits_table) + x
 %endif
-%define MASK_BITS(x)  NBITS((x) * 8) + (jpeg_mask_bits - jpeg_nbits_table)
+%define MASK_BITS(x)  NBITS((x) * 8) + (jpeg_mask_bits - EXTN(jpeg_nbits_table))
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -235,7 +232,7 @@ times 1 << 14 db 15
 
 ; If PIC is defined, load the address of a symbol defined in this file into a
 ; register.  Equivalent to
-;   get_GOT     %1
+;   GET_GOT     %1
 ;   lea         %1, [GOTOFF(%1, %2)]
 ; without using the GOT.
 ;
@@ -469,7 +466,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
     pcmpeqw     mm_all_0xff, mm_all_0xff                  ;Z:     all_0xff[i] = 0xFF;
 %endmacro
 
-    GET_SYM     nbits_base, jpeg_nbits_table, GET_SYM_BEFORE, GET_SYM_AFTER
+    GET_SYM     nbits_base, EXTN(jpeg_nbits_table), GET_SYM_BEFORE, GET_SYM_AFTER
 
     psrldq      xmm4, 1 * SIZEOF_WORD                     ;G: w4 = 37 44 45 38 39 46 47 --
     shufpd      xmm1, xmm5, 10b                           ;F: w1 = 36 37 44 45 50 51 58 59
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcphuff-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcphuff-sse2.asm
index c26b48a47d8..19a183fcd83 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcphuff-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcphuff-sse2.asm
@@ -7,11 +7,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains an SSE2 implementation of data preparation for progressive
 ; Huffman encoding.  See jcphuff.c for more details.
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcsample-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcsample-avx2.asm
index 0a20802dd89..5019829c9ae 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcsample-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcsample-avx2.asm
@@ -3,17 +3,13 @@
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
 ; Copyright (C) 2015, Intel Corporation.
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -70,7 +66,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
 
     cld
     mov         esi, JSAMPARRAY [input_data(ebp)]  ; input_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .expandloop:
     push        eax
     push        ecx
@@ -106,7 +102,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
 
     mov         esi, JSAMPARRAY [input_data(ebp)]   ; input_data
     mov         edi, JSAMPARRAY [output_data(ebp)]  ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        ecx
     push        edi
@@ -117,7 +113,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
 
     cmp         ecx, byte SIZEOF_YMMWORD
     jae         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_r24:
     ; ecx can possibly be 8, 16, 24
@@ -141,7 +137,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
     vpxor       ymm1, ymm1, ymm1
     mov         ecx, SIZEOF_YMMWORD
     jmp         short .downsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     vmovdqu     ymm0, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -243,7 +239,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
 
     cld
     mov         esi, JSAMPARRAY [input_data(ebp)]  ; input_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .expandloop:
     push        eax
     push        ecx
@@ -279,7 +275,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
 
     mov         esi, JSAMPARRAY [input_data(ebp)]   ; input_data
     mov         edi, JSAMPARRAY [output_data(ebp)]  ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        ecx
     push        edi
@@ -291,7 +287,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
 
     cmp         ecx, byte SIZEOF_YMMWORD
     jae         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_r24:
     cmp         ecx, 24
@@ -320,7 +316,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
     vpxor       ymm3, ymm3, ymm3
     mov         ecx, SIZEOF_YMMWORD
     jmp         short .downsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     vmovdqu     ymm0, YMMWORD [edx+0*SIZEOF_YMMWORD]
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcsample-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcsample-mmx.asm
index 2c223eebe81..94dd88870a3 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcsample-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcsample-mmx.asm
@@ -2,17 +2,13 @@
 ; jcsample.asm - downsampling (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -69,7 +65,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
 
     cld
     mov         esi, JSAMPARRAY [input_data(ebp)]  ; input_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .expandloop:
     push        eax
     push        ecx
@@ -104,7 +100,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
 
     mov         esi, JSAMPARRAY [input_data(ebp)]   ; input_data
     mov         edi, JSAMPARRAY [output_data(ebp)]  ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        ecx
     push        edi
@@ -112,7 +108,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
 
     mov         esi, JSAMPROW [esi]     ; inptr
     mov         edi, JSAMPROW [edi]     ; outptr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movq        mm0, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -212,7 +208,7 @@ EXTN(jsimd_h2v2_downsample_mmx):
 
     cld
     mov         esi, JSAMPARRAY [input_data(ebp)]  ; input_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .expandloop:
     push        eax
     push        ecx
@@ -247,7 +243,7 @@ EXTN(jsimd_h2v2_downsample_mmx):
 
     mov         esi, JSAMPARRAY [input_data(ebp)]   ; input_data
     mov         edi, JSAMPARRAY [output_data(ebp)]  ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        ecx
     push        edi
@@ -256,7 +252,7 @@ EXTN(jsimd_h2v2_downsample_mmx):
     mov         edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]  ; inptr0
     mov         esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]  ; inptr1
     mov         edi, JSAMPROW [edi]                    ; outptr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movq        mm0, MMWORD [edx+0*SIZEOF_MMWORD]
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jcsample-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jcsample-sse2.asm
index 4fea60d2e21..eb8808bea84 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jcsample-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jcsample-sse2.asm
@@ -2,17 +2,13 @@
 ; jcsample.asm - downsampling (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -69,7 +65,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
 
     cld
     mov         esi, JSAMPARRAY [input_data(ebp)]  ; input_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .expandloop:
     push        eax
     push        ecx
@@ -104,7 +100,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
 
     mov         esi, JSAMPARRAY [input_data(ebp)]   ; input_data
     mov         edi, JSAMPARRAY [output_data(ebp)]  ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        ecx
     push        edi
@@ -115,14 +111,14 @@ EXTN(jsimd_h2v1_downsample_sse2):
 
     cmp         ecx, byte SIZEOF_XMMWORD
     jae         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_r8:
     movdqa      xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
     pxor        xmm1, xmm1
     mov         ecx, SIZEOF_XMMWORD
     jmp         short .downsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movdqa      xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -225,7 +221,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
 
     cld
     mov         esi, JSAMPARRAY [input_data(ebp)]  ; input_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .expandloop:
     push        eax
     push        ecx
@@ -260,7 +256,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
 
     mov         esi, JSAMPARRAY [input_data(ebp)]   ; input_data
     mov         edi, JSAMPARRAY [output_data(ebp)]  ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        ecx
     push        edi
@@ -272,7 +268,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
 
     cmp         ecx, byte SIZEOF_XMMWORD
     jae         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_r8:
     movdqa      xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
@@ -281,7 +277,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
     pxor        xmm3, xmm3
     mov         ecx, SIZEOF_XMMWORD
     jmp         short .downsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movdqa      xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-avx2.asm
index 015be0416c5..fd79b79568e 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-avx2.asm
@@ -2,18 +2,14 @@
 ; jdcolext.asm - colorspace conversion (AVX2)
 ;
 ; Copyright 2009, 2012 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2012, 2016, D. R. Commander.
+; Copyright (C) 2012, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -50,15 +46,15 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [out_width(eax)]  ; num_cols
     test        ecx, ecx
@@ -81,7 +77,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax
     push        edi
@@ -94,8 +90,8 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
     mov         ebx, JSAMPROW [ebx]     ; inptr1
     mov         edx, JSAMPROW [edx]     ; inptr2
     mov         edi, JSAMPROW [edi]     ; outptr
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
-    alignx      16, 7
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
+    ALIGNX      16, 7
 .columnloop:
 
     vmovdqu     ymm5, YMMWORD [ebx]     ; ymm5=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)
@@ -295,7 +291,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
     add         ebx, byte SIZEOF_YMMWORD  ; inptr1
     add         edx, byte SIZEOF_YMMWORD  ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st64:
     lea         ecx, [ecx+ecx*2]            ; imul ecx, RGB_PIXELSIZE
@@ -436,7 +432,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
     add         ebx, byte SIZEOF_YMMWORD  ; inptr1
     add         edx, byte SIZEOF_YMMWORD  ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st64:
     cmp         ecx, byte SIZEOF_YMMWORD/2
@@ -479,7 +475,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
 
 %endif  ; RGB_PIXELSIZE ; ---------------
 
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         ecx
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-mmx.asm
index 5813cfcb66f..636bd6d3fdc 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-mmx.asm
@@ -2,17 +2,13 @@
 ; jdcolext.asm - colorspace conversion (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -49,15 +45,15 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [out_width(eax)]  ; num_cols
     test        ecx, ecx
@@ -80,7 +76,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax
     push        edi
@@ -93,8 +89,8 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
     mov         ebx, JSAMPROW [ebx]     ; inptr1
     mov         edx, JSAMPROW [edx]     ; inptr2
     mov         edi, JSAMPROW [edi]     ; outptr
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
-    alignx      16, 7
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
+    ALIGNX      16, 7
 .columnloop:
 
     movq        mm5, MMWORD [ebx]       ; mm5=Cb(01234567)
@@ -255,7 +251,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
     add         edx, byte SIZEOF_MMWORD                ; inptr2
     add         edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD  ; outptr
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st16:
     lea         ecx, [ecx+ecx*2]        ; imul ecx, RGB_PIXELSIZE
@@ -344,7 +340,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
     add         edx, byte SIZEOF_MMWORD                ; inptr2
     add         edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD  ; outptr
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st16:
     cmp         ecx, byte SIZEOF_MMWORD/2
@@ -369,7 +365,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
 
 %endif  ; RGB_PIXELSIZE ; ---------------
 
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         ecx
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-sse2.asm
index d5572b32946..0150f2cb69c 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdcolext-sse2.asm
@@ -2,17 +2,13 @@
 ; jdcolext.asm - colorspace conversion (SSE2)
 ;
 ; Copyright 2009, 2012 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2012, 2016, D. R. Commander.
+; Copyright (C) 2012, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -49,15 +45,15 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [out_width(eax)]  ; num_cols
     test        ecx, ecx
@@ -80,7 +76,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax
     push        edi
@@ -93,8 +89,8 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
     mov         ebx, JSAMPROW [ebx]     ; inptr1
     mov         edx, JSAMPROW [edx]     ; inptr2
     mov         edi, JSAMPROW [edi]     ; outptr
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
-    alignx      16, 7
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
+    ALIGNX      16, 7
 .columnloop:
 
     movdqa      xmm5, XMMWORD [ebx]     ; xmm5=Cb(0123456789ABCDEF)
@@ -275,7 +271,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
     add         ebx, byte SIZEOF_XMMWORD  ; inptr1
     add         edx, byte SIZEOF_XMMWORD  ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st32:
     lea         ecx, [ecx+ecx*2]        ; imul ecx, RGB_PIXELSIZE
@@ -387,7 +383,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
     add         ebx, byte SIZEOF_XMMWORD  ; inptr1
     add         edx, byte SIZEOF_XMMWORD  ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st32:
     cmp         ecx, byte SIZEOF_XMMWORD/2
@@ -423,7 +419,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
 
 %endif  ; RGB_PIXELSIZE ; ---------------
 
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         ecx
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-avx2.asm
index e05b60d0017..d3a30d63a71 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-avx2.asm
@@ -3,17 +3,13 @@
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
 ; Copyright (C) 2015, Intel Corporation.
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -32,7 +28,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_ycc_rgb_convert_avx2)
 
 EXTN(jconst_ycc_rgb_convert_avx2):
@@ -43,7 +39,7 @@ PW_MF0344_F0285 times 8  dw -F_0_344, F_0_285
 PW_ONE          times 16 dw  1
 PD_ONEHALF      times 8  dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-mmx.asm
index fb7e7bcce4b..6e67e4b72ea 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-mmx.asm
@@ -2,17 +2,13 @@
 ; jdcolor.asm - colorspace conversion (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -31,7 +27,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_ycc_rgb_convert_mmx)
 
 EXTN(jconst_ycc_rgb_convert_mmx):
@@ -42,7 +38,7 @@ PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
 PW_ONE          times 4 dw  1
 PD_ONEHALF      times 2 dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-sse2.asm
index b736255317e..79c9c6821a4 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdcolor-sse2.asm
@@ -2,17 +2,13 @@
 ; jdcolor.asm - colorspace conversion (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -31,7 +27,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_ycc_rgb_convert_sse2)
 
 EXTN(jconst_ycc_rgb_convert_sse2):
@@ -42,7 +38,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
 PW_ONE          times 8 dw  1
 PD_ONEHALF      times 4 dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-avx2.asm
index 711e6792d0f..90493fd023b 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-avx2.asm
@@ -2,18 +2,14 @@
 ; jdmerge.asm - merged upsampling/color conversion (AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -32,7 +28,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_merged_upsample_avx2)
 
 EXTN(jconst_merged_upsample_avx2):
@@ -43,7 +39,7 @@ PW_MF0344_F0285 times 8  dw -F_0_344, F_0_285
 PW_ONE          times 16 dw  1
 PD_ONEHALF      times 8  dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-mmx.asm
index 6e8311d4081..0dc204aa8b4 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-mmx.asm
@@ -2,17 +2,13 @@
 ; jdmerge.asm - merged upsampling/color conversion (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -31,7 +27,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_merged_upsample_mmx)
 
 EXTN(jconst_merged_upsample_mmx):
@@ -42,7 +38,7 @@ PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
 PW_ONE          times 4 dw  1
 PD_ONEHALF      times 2 dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-sse2.asm
index e32f90aa177..06f07627421 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdmerge-sse2.asm
@@ -2,17 +2,13 @@
 ; jdmerge.asm - merged upsampling/color conversion (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -31,7 +27,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_merged_upsample_sse2)
 
 EXTN(jconst_merged_upsample_sse2):
@@ -42,7 +38,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
 PW_ONE          times 8 dw  1
 PD_ONEHALF      times 4 dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-avx2.asm
index e35f7282bc4..a7aa930e346 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-avx2.asm
@@ -2,18 +2,14 @@
 ; jdmrgext.asm - merged upsampling/color conversion (AVX2)
 ;
 ; Copyright 2009, 2012 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2012, 2016, D. R. Commander.
+; Copyright (C) 2012, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -50,15 +46,15 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [output_width(eax)]  ; col
     test        ecx, ecx
@@ -79,9 +75,9 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
 
     pop         ecx                     ; col
 
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     vmovdqu     ymm6, YMMWORD [ebx]     ; ymm6=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)
     vmovdqu     ymm7, YMMWORD [edx]     ; ymm7=Cr(0123456789ABCDEFGHIJKLMNOPQRSTUV)
@@ -168,13 +164,13 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
 
     mov         al, 2                   ; Yctr
     jmp         short .Yloop_1st
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .Yloop_2nd:
     vmovdqa     ymm0, YMMWORD [wk(1)]   ; ymm0=(R-Y)H
     vmovdqa     ymm2, YMMWORD [wk(2)]   ; ymm2=(G-Y)H
     vmovdqa     ymm4, YMMWORD [wk(0)]   ; ymm4=(B-Y)H
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .Yloop_1st:
     vmovdqu     ymm7, YMMWORD [esi]     ; ymm7=Y(0123456789ABCDEFGHIJKLMNOPQRSTUV)
@@ -301,7 +297,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
     add         ebx, byte SIZEOF_YMMWORD  ; inptr1
     add         edx, byte SIZEOF_YMMWORD  ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st64:
     lea         ecx, [ecx+ecx*2]            ; imul ecx, RGB_PIXELSIZE
@@ -445,7 +441,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
     add         ebx, byte SIZEOF_YMMWORD  ; inptr1
     add         edx, byte SIZEOF_YMMWORD  ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st64:
     cmp         ecx, byte SIZEOF_YMMWORD/2
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-mmx.asm
index eb3e36b4759..562758146c6 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-mmx.asm
@@ -2,17 +2,13 @@
 ; jdmrgext.asm - merged upsampling/color conversion (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -47,15 +43,15 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [output_width(eax)]  ; col
     test        ecx, ecx
@@ -76,9 +72,9 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
 
     pop         ecx                     ; col
 
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     movq        mm6, MMWORD [ebx]       ; mm6=Cb(01234567)
     movq        mm7, MMWORD [edx]       ; mm7=Cr(01234567)
@@ -171,13 +167,13 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
 
     mov         al, 2                   ; Yctr
     jmp         short .Yloop_1st
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .Yloop_2nd:
     movq        mm0, MMWORD [wk(1)]     ; mm0=(R-Y)H
     movq        mm2, MMWORD [wk(2)]     ; mm2=(G-Y)H
     movq        mm4, MMWORD [wk(0)]     ; mm4=(B-Y)H
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .Yloop_1st:
     movq        mm7, MMWORD [esi]       ; mm7=Y(01234567)
@@ -258,7 +254,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
     add         ebx, byte SIZEOF_MMWORD                ; inptr1
     add         edx, byte SIZEOF_MMWORD                ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st16:
     lea         ecx, [ecx+ecx*2]        ; imul ecx, RGB_PIXELSIZE
@@ -350,7 +346,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
     add         ebx, byte SIZEOF_MMWORD                ; inptr1
     add         edx, byte SIZEOF_MMWORD                ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st16:
     cmp         ecx, byte SIZEOF_MMWORD/2
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-sse2.asm
index c113dc4d27e..13e7d980fa6 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdmrgext-sse2.asm
@@ -2,17 +2,13 @@
 ; jdmrgext.asm - merged upsampling/color conversion (SSE2)
 ;
 ; Copyright 2009, 2012 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2012, 2016, D. R. Commander.
+; Copyright (C) 2012, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -49,15 +45,15 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [output_width(eax)]  ; col
     test        ecx, ecx
@@ -78,9 +74,9 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
 
     pop         ecx                     ; col
 
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     movdqa      xmm6, XMMWORD [ebx]     ; xmm6=Cb(0123456789ABCDEF)
     movdqa      xmm7, XMMWORD [edx]     ; xmm7=Cr(0123456789ABCDEF)
@@ -173,13 +169,13 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
 
     mov         al, 2                   ; Yctr
     jmp         short .Yloop_1st
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .Yloop_2nd:
     movdqa      xmm0, XMMWORD [wk(1)]   ; xmm0=(R-Y)H
     movdqa      xmm2, XMMWORD [wk(2)]   ; xmm2=(G-Y)H
     movdqa      xmm4, XMMWORD [wk(0)]   ; xmm4=(B-Y)H
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .Yloop_1st:
     movdqa      xmm7, XMMWORD [esi]     ; xmm7=Y(0123456789ABCDEF)
@@ -280,7 +276,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
     add         ebx, byte SIZEOF_XMMWORD  ; inptr1
     add         edx, byte SIZEOF_XMMWORD  ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st32:
     lea         ecx, [ecx+ecx*2]            ; imul ecx, RGB_PIXELSIZE
@@ -395,7 +391,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
     add         ebx, byte SIZEOF_XMMWORD  ; inptr1
     add         edx, byte SIZEOF_XMMWORD  ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st32:
     cmp         ecx, byte SIZEOF_XMMWORD/2
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdsample-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdsample-avx2.asm
index a800c35e083..eba53ef7574 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdsample-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdsample-avx2.asm
@@ -3,24 +3,20 @@
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
 ; Copyright (C) 2015, Intel Corporation.
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fancy_upsample_avx2)
 
 EXTN(jconst_fancy_upsample_avx2):
@@ -31,7 +27,7 @@ PW_THREE times 16 dw 3
 PW_SEVEN times 16 dw 7
 PW_EIGHT times 16 dw 8
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -62,13 +58,13 @@ PW_EIGHT times 16 dw 8
 EXTN(jsimd_h2v1_fancy_upsample_avx2):
     push        ebp
     mov         ebp, esp
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     mov         eax, JDIMENSION [downsamp_width(ebp)]  ; colctr
     test        eax, eax
@@ -81,7 +77,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax                     ; colctr
     push        edi
@@ -104,7 +100,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
     and         eax, byte -SIZEOF_YMMWORD
     cmp         eax, byte SIZEOF_YMMWORD
     ja          short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_last:
     vpcmpeqb    xmm6, xmm6, xmm6
@@ -112,7 +108,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
     vperm2i128  ymm6, ymm6, ymm6, 1             ; (---- ---- ... ---- ---- ff) MSB is ff
     vpand       ymm6, ymm6, YMMWORD [esi+0*SIZEOF_YMMWORD]
     jmp         short .upsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     vmovdqu     ymm6, YMMWORD [esi+1*SIZEOF_YMMWORD]
@@ -196,7 +192,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
     pop         esi
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; need not be preserved
-    poppic      ebx
+    POPPIC      ebx
     pop         ebp
     ret
 
@@ -234,15 +230,15 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         edx, eax                ; edx = original ebp
     mov         eax, JDIMENSION [downsamp_width(edx)]  ; colctr
@@ -256,7 +252,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     mov         esi, JSAMPARRAY [input_data(edx)]    ; input_data
     mov         edi, POINTER [output_data_ptr(edx)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax                     ; colctr
     push        ecx
@@ -286,8 +282,8 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     vmovdqu     ymm1, YMMWORD [ecx+0*SIZEOF_YMMWORD]  ; ymm1=row[-1][0]
     vmovdqu     ymm2, YMMWORD [esi+0*SIZEOF_YMMWORD]  ; ymm2=row[+1][0]
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     vpxor       ymm3, ymm3, ymm3        ; ymm3=(all 0's)
 
@@ -328,19 +324,19 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     vmovdqa     YMMWORD [wk(0)], ymm1
     vmovdqa     YMMWORD [wk(1)], ymm2
 
-    poppic      ebx
+    POPPIC      ebx
 
     add         eax, byte SIZEOF_YMMWORD-1
     and         eax, byte -SIZEOF_YMMWORD
     cmp         eax, byte SIZEOF_YMMWORD
     ja          short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_last:
     ; -- process the last column block
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     vpcmpeqb    xmm1, xmm1, xmm1
     vpslldq     xmm1, xmm1, (SIZEOF_XMMWORD-2)
@@ -353,7 +349,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     vmovdqa     YMMWORD [wk(3)], ymm2          ; ymm2=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 31)
 
     jmp         near .upsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     ; -- process the next column block
@@ -362,8 +358,8 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     vmovdqu     ymm1, YMMWORD [ecx+1*SIZEOF_YMMWORD]  ; ymm1=row[-1][1]
     vmovdqu     ymm2, YMMWORD [esi+1*SIZEOF_YMMWORD]  ; ymm2=row[+1][1]
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     vpxor       ymm3, ymm3, ymm3        ; ymm3=(all 0's)
 
@@ -516,7 +512,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     vmovdqu     YMMWORD [edi+0*SIZEOF_YMMWORD], ymm1
     vmovdqu     YMMWORD [edi+1*SIZEOF_YMMWORD], ymm0
 
-    poppic      ebx
+    POPPIC      ebx
 
     sub         eax, byte SIZEOF_YMMWORD
     add         ecx, byte 1*SIZEOF_YMMWORD  ; inptr1(above)
@@ -590,7 +586,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        edi
     push        esi
@@ -598,7 +594,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
     mov         esi, JSAMPROW [esi]     ; inptr
     mov         edi, JSAMPROW [edi]     ; outptr
     mov         eax, edx                ; colctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     cmp         eax, byte SIZEOF_YMMWORD
@@ -629,7 +625,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
     add         esi, byte SIZEOF_YMMWORD    ; inptr
     add         edi, byte 2*SIZEOF_YMMWORD  ; outptr
     jmp         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         esi
@@ -689,7 +685,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        edi
     push        esi
@@ -698,7 +694,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
     mov         ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]  ; outptr0
     mov         edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]  ; outptr1
     mov         eax, edx                               ; colctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     cmp         eax, byte SIZEOF_YMMWORD
@@ -734,7 +730,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
     add         ebx, 2*SIZEOF_YMMWORD     ; outptr0
     add         edi, 2*SIZEOF_YMMWORD     ; outptr1
     jmp         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         esi
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdsample-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdsample-mmx.asm
index 12c49f0eab5..01d09e62d10 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdsample-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdsample-mmx.asm
@@ -2,24 +2,20 @@
 ; jdsample.asm - upsampling (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fancy_upsample_mmx)
 
 EXTN(jconst_fancy_upsample_mmx):
@@ -30,7 +26,7 @@ PW_THREE times 4 dw 3
 PW_SEVEN times 4 dw 7
 PW_EIGHT times 4 dw 8
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -61,13 +57,13 @@ PW_EIGHT times 4 dw 8
 EXTN(jsimd_h2v1_fancy_upsample_mmx):
     push        ebp
     mov         ebp, esp
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     mov         eax, JDIMENSION [downsamp_width(ebp)]  ; colctr
     test        eax, eax
@@ -80,7 +76,7 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax                     ; colctr
     push        edi
@@ -103,14 +99,14 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
     and         eax, byte -SIZEOF_MMWORD
     cmp         eax, byte SIZEOF_MMWORD
     ja          short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_last:
     pcmpeqb     mm6, mm6
     psllq       mm6, (SIZEOF_MMWORD-1)*BYTE_BIT
     pand        mm6, MMWORD [esi+0*SIZEOF_MMWORD]
     jmp         short .upsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movq        mm6, MMWORD [esi+1*SIZEOF_MMWORD]
@@ -187,7 +183,7 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
     pop         esi
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; need not be preserved
-    poppic      ebx
+    POPPIC      ebx
     pop         ebp
     ret
 
@@ -224,15 +220,15 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         edx, eax                ; edx = original ebp
     mov         eax, JDIMENSION [downsamp_width(edx)]  ; colctr
@@ -246,7 +242,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
     mov         esi, JSAMPARRAY [input_data(edx)]    ; input_data
     mov         edi, POINTER [output_data_ptr(edx)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax                     ; colctr
     push        ecx
@@ -276,8 +272,8 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
     movq        mm1, MMWORD [ecx+0*SIZEOF_MMWORD]  ; mm1=row[-1][0]
     movq        mm2, MMWORD [esi+0*SIZEOF_MMWORD]  ; mm2=row[+1][0]
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     pxor        mm3, mm3                ; mm3=(all 0's)
     movq        mm4, mm0
@@ -312,19 +308,19 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
     movq        MMWORD [wk(0)], mm1
     movq        MMWORD [wk(1)], mm2
 
-    poppic      ebx
+    POPPIC      ebx
 
     add         eax, byte SIZEOF_MMWORD-1
     and         eax, byte -SIZEOF_MMWORD
     cmp         eax, byte SIZEOF_MMWORD
     ja          short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_last:
     ; -- process the last column block
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     pcmpeqb     mm1, mm1
     psllq       mm1, (SIZEOF_MMWORD-2)*BYTE_BIT
@@ -337,7 +333,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
     movq        MMWORD [wk(3)], mm2
 
     jmp         short .upsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     ; -- process the next column block
@@ -346,8 +342,8 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
     movq        mm1, MMWORD [ecx+1*SIZEOF_MMWORD]  ; mm1=row[-1][1]
     movq        mm2, MMWORD [esi+1*SIZEOF_MMWORD]  ; mm2=row[+1][1]
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     pxor        mm3, mm3                ; mm3=(all 0's)
     movq        mm4, mm0
@@ -486,7 +482,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
     movq        MMWORD [edi+0*SIZEOF_MMWORD], mm1
     movq        MMWORD [edi+1*SIZEOF_MMWORD], mm0
 
-    poppic      ebx
+    POPPIC      ebx
 
     sub         eax, byte SIZEOF_MMWORD
     add         ecx, byte 1*SIZEOF_MMWORD  ; inptr1(above)
@@ -561,7 +557,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        edi
     push        esi
@@ -569,7 +565,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
     mov         esi, JSAMPROW [esi]     ; inptr
     mov         edi, JSAMPROW [edi]     ; outptr
     mov         eax, edx                ; colctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movq        mm0, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -599,7 +595,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
     add         esi, byte 2*SIZEOF_MMWORD  ; inptr
     add         edi, byte 4*SIZEOF_MMWORD  ; outptr
     jmp         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         esi
@@ -660,7 +656,7 @@ EXTN(jsimd_h2v2_upsample_mmx):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        edi
     push        esi
@@ -669,7 +665,7 @@ EXTN(jsimd_h2v2_upsample_mmx):
     mov         ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]  ; outptr0
     mov         edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]  ; outptr1
     mov         eax, edx                               ; colctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movq        mm0, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -704,7 +700,7 @@ EXTN(jsimd_h2v2_upsample_mmx):
     add         ebx, byte 4*SIZEOF_MMWORD  ; outptr0
     add         edi, byte 4*SIZEOF_MMWORD  ; outptr1
     jmp         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         esi
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jdsample-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jdsample-sse2.asm
index 4e28d2f4b80..b10d9227987 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jdsample-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jdsample-sse2.asm
@@ -2,24 +2,20 @@
 ; jdsample.asm - upsampling (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fancy_upsample_sse2)
 
 EXTN(jconst_fancy_upsample_sse2):
@@ -30,7 +26,7 @@ PW_THREE times 8 dw 3
 PW_SEVEN times 8 dw 7
 PW_EIGHT times 8 dw 8
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -61,13 +57,13 @@ PW_EIGHT times 8 dw 8
 EXTN(jsimd_h2v1_fancy_upsample_sse2):
     push        ebp
     mov         ebp, esp
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     mov         eax, JDIMENSION [downsamp_width(ebp)]  ; colctr
     test        eax, eax
@@ -80,7 +76,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax                     ; colctr
     push        edi
@@ -103,14 +99,14 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
     and         eax, byte -SIZEOF_XMMWORD
     cmp         eax, byte SIZEOF_XMMWORD
     ja          short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_last:
     pcmpeqb     xmm6, xmm6
     pslldq      xmm6, (SIZEOF_XMMWORD-1)
     pand        xmm6, XMMWORD [esi+0*SIZEOF_XMMWORD]
     jmp         short .upsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movdqa      xmm6, XMMWORD [esi+1*SIZEOF_XMMWORD]
@@ -185,7 +181,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
     pop         esi
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; need not be preserved
-    poppic      ebx
+    POPPIC      ebx
     pop         ebp
     ret
 
@@ -223,15 +219,15 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         edx, eax                ; edx = original ebp
     mov         eax, JDIMENSION [downsamp_width(edx)]  ; colctr
@@ -245,7 +241,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     mov         esi, JSAMPARRAY [input_data(edx)]    ; input_data
     mov         edi, POINTER [output_data_ptr(edx)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax                     ; colctr
     push        ecx
@@ -275,8 +271,8 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     movdqa      xmm1, XMMWORD [ecx+0*SIZEOF_XMMWORD]  ; xmm1=row[-1][0]
     movdqa      xmm2, XMMWORD [esi+0*SIZEOF_XMMWORD]  ; xmm2=row[+1][0]
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     pxor        xmm3, xmm3              ; xmm3=(all 0's)
     movdqa      xmm4, xmm0
@@ -311,19 +307,19 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     movdqa      XMMWORD [wk(0)], xmm1
     movdqa      XMMWORD [wk(1)], xmm2
 
-    poppic      ebx
+    POPPIC      ebx
 
     add         eax, byte SIZEOF_XMMWORD-1
     and         eax, byte -SIZEOF_XMMWORD
     cmp         eax, byte SIZEOF_XMMWORD
     ja          short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_last:
     ; -- process the last column block
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     pcmpeqb     xmm1, xmm1
     pslldq      xmm1, (SIZEOF_XMMWORD-2)
@@ -336,7 +332,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     movdqa      XMMWORD [wk(3)], xmm2   ; xmm2=(-- -- -- -- -- -- -- 15)
 
     jmp         near .upsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     ; -- process the next column block
@@ -345,8 +341,8 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     movdqa      xmm1, XMMWORD [ecx+1*SIZEOF_XMMWORD]  ; xmm1=row[-1][1]
     movdqa      xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD]  ; xmm2=row[+1][1]
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     pxor        xmm3, xmm3              ; xmm3=(all 0's)
     movdqa      xmm4, xmm0
@@ -485,7 +481,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     movdqa      XMMWORD [edi+0*SIZEOF_XMMWORD], xmm1
     movdqa      XMMWORD [edi+1*SIZEOF_XMMWORD], xmm0
 
-    poppic      ebx
+    POPPIC      ebx
 
     sub         eax, byte SIZEOF_XMMWORD
     add         ecx, byte 1*SIZEOF_XMMWORD  ; inptr1(above)
@@ -558,7 +554,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        edi
     push        esi
@@ -566,7 +562,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
     mov         esi, JSAMPROW [esi]     ; inptr
     mov         edi, JSAMPROW [edi]     ; outptr
     mov         eax, edx                ; colctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movdqa      xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -596,7 +592,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
     add         esi, byte 2*SIZEOF_XMMWORD  ; inptr
     add         edi, byte 4*SIZEOF_XMMWORD  ; outptr
     jmp         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         esi
@@ -655,7 +651,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        edi
     push        esi
@@ -664,7 +660,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
     mov         ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]  ; outptr0
     mov         edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]  ; outptr1
     mov         eax, edx                               ; colctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movdqa      xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -699,7 +695,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
     add         ebx, byte 4*SIZEOF_XMMWORD  ; outptr0
     add         edi, byte 4*SIZEOF_XMMWORD  ; outptr1
     jmp         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         esi
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-3dn.asm b/contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-3dn.asm
index 322ab163252..0cedc6caf40 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-3dn.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-3dn.asm
@@ -2,17 +2,13 @@
 ; jfdctflt.asm - floating-point FDCT (3DNow!)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a floating-point implementation of the forward DCT
 ; (Discrete Cosine Transform). The following code is based directly on
@@ -24,7 +20,7 @@
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_float_3dnow)
 
 EXTN(jconst_fdct_float_3dnow):
@@ -34,7 +30,7 @@ PD_0_707 times 2 dd 0.707106781186547524400844
 PD_0_541 times 2 dd 0.541196100146196984399723
 PD_1_306 times 2 dd 1.306562964876376527856643
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -63,19 +59,19 @@ EXTN(jsimd_fdct_float_3dnow):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
 ;   push        esi                     ; unused
 ;   push        edi                     ; unused
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process rows.
 
     mov         edx, POINTER [data(eax)]  ; (FAST_FLOAT *)
     mov         ecx, DCTSIZE/2
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     movq        mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
@@ -190,7 +186,7 @@ EXTN(jsimd_fdct_float_3dnow):
 
     mov         edx, POINTER [data(eax)]  ; (FAST_FLOAT *)
     mov         ecx, DCTSIZE/2
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movq        mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
@@ -307,7 +303,7 @@ EXTN(jsimd_fdct_float_3dnow):
 ;   pop         esi                     ; unused
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; need not be preserved
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-sse.asm b/contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-sse.asm
index 86952c6499c..2cb95335869 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-sse.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jfdctflt-sse.asm
@@ -2,17 +2,13 @@
 ; jfdctflt.asm - floating-point FDCT (SSE)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a floating-point implementation of the forward DCT
 ; (Discrete Cosine Transform). The following code is based directly on
@@ -34,7 +30,7 @@
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_float_sse)
 
 EXTN(jconst_fdct_float_sse):
@@ -44,7 +40,7 @@ PD_0_707 times 4 dd 0.707106781186547524400844
 PD_0_541 times 4 dd 0.541196100146196984399723
 PD_1_306 times 4 dd 1.306562964876376527856643
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -74,19 +70,19 @@ EXTN(jsimd_fdct_float_sse):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
 ;   push        esi                     ; unused
 ;   push        edi                     ; unused
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process rows.
 
     mov         edx, POINTER [data(eax)]  ; (FAST_FLOAT *)
     mov         ecx, DCTSIZE/4
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     movaps      xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
@@ -222,7 +218,7 @@ EXTN(jsimd_fdct_float_sse):
 
     mov         edx, POINTER [data(eax)]  ; (FAST_FLOAT *)
     mov         ecx, DCTSIZE/4
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movaps      xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
@@ -358,7 +354,7 @@ EXTN(jsimd_fdct_float_sse):
 ;   pop         esi                     ; unused
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; need not be preserved
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-mmx.asm
index 80645a50d7e..fe16e83ee24 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-mmx.asm
@@ -2,17 +2,13 @@
 ; jfdctfst.asm - fast integer FDCT (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a fast, not so accurate integer implementation of
 ; the forward DCT (Discrete Cosine Transform). The following code is
@@ -49,7 +45,7 @@ F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS)  ; FIX(1.306562965)
 %define PRE_MULTIPLY_SCALE_BITS  2
 %define CONST_SHIFT              (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_ifast_mmx)
 
 EXTN(jconst_fdct_ifast_mmx):
@@ -59,7 +55,7 @@ PW_F0382 times 4 dw F_0_382 << CONST_SHIFT
 PW_F0541 times 4 dw F_0_541 << CONST_SHIFT
 PW_F1306 times 4 dw F_1_306 << CONST_SHIFT
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -88,19 +84,19 @@ EXTN(jsimd_fdct_ifast_mmx):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
 ;   push        esi                     ; unused
 ;   push        edi                     ; unused
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process rows.
 
     mov         edx, POINTER [data(eax)]  ; (DCTELEM *)
     mov         ecx, DCTSIZE/4
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     movq        mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@@ -241,7 +237,7 @@ EXTN(jsimd_fdct_ifast_mmx):
 
     mov         edx, POINTER [data(eax)]  ; (DCTELEM *)
     mov         ecx, DCTSIZE/4
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movq        mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@@ -384,7 +380,7 @@ EXTN(jsimd_fdct_ifast_mmx):
 ;   pop         esi                     ; unused
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; need not be preserved
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-sse2.asm
index 446fa7a68f7..890482e0067 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jfdctfst-sse2.asm
@@ -2,17 +2,13 @@
 ; jfdctfst.asm - fast integer FDCT (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a fast, not so accurate integer implementation of
 ; the forward DCT (Discrete Cosine Transform). The following code is
@@ -49,7 +45,7 @@ F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS)  ; FIX(1.306562965)
 %define PRE_MULTIPLY_SCALE_BITS  2
 %define CONST_SHIFT              (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_ifast_sse2)
 
 EXTN(jconst_fdct_ifast_sse2):
@@ -59,7 +55,7 @@ PW_F0382 times 8 dw F_0_382 << CONST_SHIFT
 PW_F0541 times 8 dw F_0_541 << CONST_SHIFT
 PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -89,13 +85,13 @@ EXTN(jsimd_fdct_ifast_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; unused
 ;   push        edx                     ; need not be preserved
 ;   push        esi                     ; unused
 ;   push        edi                     ; unused
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process rows.
 
@@ -392,7 +388,7 @@ EXTN(jsimd_fdct_ifast_sse2):
 ;   pop         esi                     ; unused
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; unused
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-avx2.asm
index 23cf733135b..05ea8654850 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-avx2.asm
@@ -2,17 +2,13 @@
 ; jfdctint.asm - accurate integer FDCT (AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a slower but more accurate integer implementation of the
 ; forward DCT (Discrete Cosine Transform). The following code is based
@@ -65,7 +61,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; %1-%4: Input/output registers
 ; %5-%8: Temp registers
 
-%macro dotranspose 8
+%macro DOTRANSPOSE 8
     ; %1=(00 01 02 03 04 05 06 07  40 41 42 43 44 45 46 47)
     ; %2=(10 11 12 13 14 15 16 17  50 51 52 53 54 55 56 57)
     ; %3=(20 21 22 23 24 25 26 27  60 61 62 63 64 65 66 67)
@@ -108,7 +104,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; %5-%8: Temp registers
 ; %9:    Pass (1 or 2)
 
-%macro dodct 9
+%macro DODCT 9
     vpsubw      %5, %1, %4              ; %5=data1_0-data6_7=tmp6_7
     vpaddw      %6, %1, %4              ; %6=data1_0+data6_7=tmp1_0
     vpaddw      %7, %2, %3              ; %7=data3_2+data4_5=tmp3_2
@@ -223,7 +219,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_islow_avx2)
 
 EXTN(jconst_fdct_islow_avx2):
@@ -242,7 +238,7 @@ PW_DESCALE_P2X             times 16 dw  1 << (PASS1_BITS - 1)
 PW_1_NEG1                  times 8  dw  1
                            times 8  dw -1
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -262,13 +258,13 @@ PW_1_NEG1                  times 8  dw  1
 EXTN(jsimd_fdct_islow_avx2):
     push        ebp
     mov         ebp, esp
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; unused
 ;   push        edx                     ; need not be preserved
 ;   push        esi                     ; unused
 ;   push        edi                     ; unused
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process rows.
 
@@ -292,9 +288,9 @@ EXTN(jsimd_fdct_islow_avx2):
     ; ymm2=(20 21 22 23 24 25 26 27  60 61 62 63 64 65 66 67)
     ; ymm3=(30 31 32 33 34 35 36 37  70 71 72 73 74 75 76 77)
 
-    dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
+    DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
 
-    dodct       ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
+    DODCT       ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
     ; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm3=data7_5
 
     ; ---- Pass 2: process columns.
@@ -302,9 +298,9 @@ EXTN(jsimd_fdct_islow_avx2):
     vperm2i128  ymm4, ymm1, ymm3, 0x20  ; ymm4=data3_7
     vperm2i128  ymm1, ymm1, ymm3, 0x31  ; ymm1=data1_5
 
-    dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
+    DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
 
-    dodct       ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
+    DODCT       ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
     ; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm4=data7_5
 
     vperm2i128 ymm3, ymm0, ymm1, 0x30   ; ymm3=data0_1
@@ -322,7 +318,7 @@ EXTN(jsimd_fdct_islow_avx2):
 ;   pop         esi                     ; unused
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; unused
-    poppic      ebx
+    POPPIC      ebx
     pop         ebp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-mmx.asm
index 34a43b9e5ef..7d4c61cd7d3 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-mmx.asm
@@ -2,17 +2,13 @@
 ; jfdctint.asm - accurate integer FDCT (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, 2020, D. R. Commander.
+; Copyright (C) 2016, 2020, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a slower but more accurate integer implementation of the
 ; forward DCT (Discrete Cosine Transform). The following code is based
@@ -63,7 +59,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_islow_mmx)
 
 EXTN(jconst_fdct_islow_mmx):
@@ -80,7 +76,7 @@ PD_DESCALE_P1  times 2 dd  1 << (DESCALE_P1 - 1)
 PD_DESCALE_P2  times 2 dd  1 << (DESCALE_P2 - 1)
 PW_DESCALE_P2X times 4 dw  1 << (PASS1_BITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -109,19 +105,19 @@ EXTN(jsimd_fdct_islow_mmx):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
 ;   push        esi                     ; unused
 ;   push        edi                     ; unused
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process rows.
 
     mov         edx, POINTER [data(eax)]  ; (DCTELEM *)
     mov         ecx, DCTSIZE/4
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     movq        mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@@ -363,7 +359,7 @@ EXTN(jsimd_fdct_islow_mmx):
 
     mov         edx, POINTER [data(eax)]  ; (DCTELEM *)
     mov         ecx, DCTSIZE/4
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movq        mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@@ -609,7 +605,7 @@ EXTN(jsimd_fdct_islow_mmx):
 ;   pop         esi                     ; unused
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; need not be preserved
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-sse2.asm
index 6f8e18cb9d0..7ed5c9501ac 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jfdctint-sse2.asm
@@ -2,17 +2,13 @@
 ; jfdctint.asm - accurate integer FDCT (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, 2020, D. R. Commander.
+; Copyright (C) 2016, 2020, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a slower but more accurate integer implementation of the
 ; forward DCT (Discrete Cosine Transform). The following code is based
@@ -63,7 +59,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_islow_sse2)
 
 EXTN(jconst_fdct_islow_sse2):
@@ -80,7 +76,7 @@ PD_DESCALE_P1  times 4 dd  1 << (DESCALE_P1 - 1)
 PD_DESCALE_P2  times 4 dd  1 << (DESCALE_P2 - 1)
 PW_DESCALE_P2X times 8 dw  1 << (PASS1_BITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -110,13 +106,13 @@ EXTN(jsimd_fdct_islow_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; unused
 ;   push        edx                     ; need not be preserved
 ;   push        esi                     ; unused
 ;   push        edi                     ; unused
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process rows.
 
@@ -622,7 +618,7 @@ EXTN(jsimd_fdct_islow_sse2):
 ;   pop         esi                     ; unused
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; unused
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-3dn.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-3dn.asm
index 87951910d8e..8612eee3a5f 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-3dn.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-3dn.asm
@@ -2,17 +2,13 @@
 ; jidctflt.asm - floating-point IDCT (3DNow! & MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a floating-point implementation of the inverse DCT
 ; (Discrete Cosine Transform). The following code is based directly on
@@ -24,7 +20,7 @@
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_float_3dnow)
 
 EXTN(jconst_idct_float_3dnow):
@@ -36,7 +32,7 @@ PD_2_613        times 2 dd 2.613125929752753055713286
 PD_RNDINT_MAGIC times 2 dd 100663296.0  ; (float)(0x00C00000 << 3)
 PB_CENTERJSAMP  times 8 db CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -78,7 +74,7 @@ EXTN(jsimd_idct_float_3dnow):
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input, store into work array.
 
@@ -87,21 +83,21 @@ EXTN(jsimd_idct_float_3dnow):
     mov         esi, JCOEFPTR [coef_block(eax)]  ; inptr
     lea         edi, [workspace]                 ; FAST_FLOAT *wsptr
     mov         ecx, DCTSIZE/2                   ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW
     mov         eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
     or          eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
     jnz         short .columnDCT
 
-    pushpic     ebx                     ; save GOT address
+    PUSHPIC     ebx                     ; save GOT address
     mov         ebx, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
     mov         eax, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
     or          ebx, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
     or          eax, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
     or          ebx, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
     or          eax, ebx
-    poppic      ebx                     ; restore GOT address
+    POPPIC      ebx                     ; restore GOT address
     jnz         short .columnDCT
 
     ; -- AC terms all zero
@@ -127,7 +123,7 @@ EXTN(jsimd_idct_float_3dnow):
     movq        MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm1
     movq        MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1
     jmp         near .nextcolumn
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -293,7 +289,7 @@ EXTN(jsimd_idct_float_3dnow):
     mov         edi, JSAMPARRAY [output_buf(eax)]  ; (JSAMPROW *)
     mov         eax, JDIMENSION [output_col(eax)]
     mov         ecx, DCTSIZE/2                     ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     ; -- Even part
@@ -420,14 +416,14 @@ EXTN(jsimd_idct_float_3dnow):
     punpckldq   mm6, mm4                ; mm6=(00 01 02 03 04 05 06 07)
     punpckhdq   mm7, mm4                ; mm7=(10 11 12 13 14 15 16 17)
 
-    pushpic     ebx                     ; save GOT address
+    PUSHPIC     ebx                     ; save GOT address
 
     mov         edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
     mov         ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
     movq        MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6
     movq        MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7
 
-    poppic      ebx                     ; restore GOT address
+    POPPIC      ebx                     ; restore GOT address
 
     add         esi, byte 2*SIZEOF_FAST_FLOAT  ; wsptr
     add         edi, byte 2*SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse.asm
index b27ecfdf46a..caf636b5106 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse.asm
@@ -2,17 +2,13 @@
 ; jidctflt.asm - floating-point IDCT (SSE & MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a floating-point implementation of the inverse DCT
 ; (Discrete Cosine Transform). The following code is based directly on
@@ -23,18 +19,18 @@
 
 ; --------------------------------------------------------------------------
 
-%macro unpcklps2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+%macro UNPCKLPS2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
     shufps      %1, %2, 0x44
 %endmacro
 
-%macro unpckhps2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+%macro UNPCKHPS2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
     shufps      %1, %2, 0xEE
 %endmacro
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_float_sse)
 
 EXTN(jconst_idct_float_sse):
@@ -46,7 +42,7 @@ PD_M2_613      times 4 dd -2.613125929752753055713286
 PD_0_125       times 4 dd  0.125        ; 1/8
 PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -88,7 +84,7 @@ EXTN(jsimd_idct_float_sse):
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input, store into work array.
 
@@ -97,7 +93,7 @@ EXTN(jsimd_idct_float_sse):
     mov         esi, JCOEFPTR [coef_block(eax)]  ; inptr
     lea         edi, [workspace]                 ; FAST_FLOAT *wsptr
     mov         ecx, DCTSIZE/4                   ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
     mov         eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -149,7 +145,7 @@ EXTN(jsimd_idct_float_sse):
     movaps      XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
     movaps      XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
     jmp         near .nextcolumn
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -325,11 +321,11 @@ EXTN(jsimd_idct_float_sse):
     unpckhps    xmm4, xmm0              ; xmm4=(42 52 43 53)
 
     movaps      xmm3, xmm6              ; transpose coefficients(phase 2)
-    unpcklps2   xmm6, xmm7              ; xmm6=(00 10 20 30)
-    unpckhps2   xmm3, xmm7              ; xmm3=(01 11 21 31)
+    UNPCKLPS2   xmm6, xmm7              ; xmm6=(00 10 20 30)
+    UNPCKHPS2   xmm3, xmm7              ; xmm3=(01 11 21 31)
     movaps      xmm0, xmm1              ; transpose coefficients(phase 2)
-    unpcklps2   xmm1, xmm2              ; xmm1=(02 12 22 32)
-    unpckhps2   xmm0, xmm2              ; xmm0=(03 13 23 33)
+    UNPCKLPS2   xmm1, xmm2              ; xmm1=(02 12 22 32)
+    UNPCKHPS2   xmm0, xmm2              ; xmm0=(03 13 23 33)
 
     movaps      xmm7, XMMWORD [wk(0)]   ; xmm7=(60 70 61 71)
     movaps      xmm2, XMMWORD [wk(1)]   ; xmm2=(62 72 63 73)
@@ -340,11 +336,11 @@ EXTN(jsimd_idct_float_sse):
     movaps      XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
 
     movaps      xmm6, xmm5              ; transpose coefficients(phase 2)
-    unpcklps2   xmm5, xmm7              ; xmm5=(40 50 60 70)
-    unpckhps2   xmm6, xmm7              ; xmm6=(41 51 61 71)
+    UNPCKLPS2   xmm5, xmm7              ; xmm5=(40 50 60 70)
+    UNPCKHPS2   xmm6, xmm7              ; xmm6=(41 51 61 71)
     movaps      xmm3, xmm4              ; transpose coefficients(phase 2)
-    unpcklps2   xmm4, xmm2              ; xmm4=(42 52 62 72)
-    unpckhps2   xmm3, xmm2              ; xmm3=(43 53 63 73)
+    UNPCKLPS2   xmm4, xmm2              ; xmm4=(42 52 62 72)
+    UNPCKHPS2   xmm3, xmm2              ; xmm3=(43 53 63 73)
 
     movaps      XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
     movaps      XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
@@ -372,7 +368,7 @@ EXTN(jsimd_idct_float_sse):
     mov         edi, JSAMPARRAY [output_buf(eax)]  ; (JSAMPROW *)
     mov         eax, JDIMENSION [output_col(eax)]
     mov         ecx, DCTSIZE/4                     ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     ; -- Even part
@@ -536,7 +532,7 @@ EXTN(jsimd_idct_float_sse):
     punpckldq   mm5, mm6                ; mm5=(20 21 22 23 24 25 26 27)
     punpckhdq   mm4, mm6                ; mm4=(30 31 32 33 34 35 36 37)
 
-    pushpic     ebx                     ; save GOT address
+    PUSHPIC     ebx                     ; save GOT address
 
     mov         edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
     mov         ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
@@ -547,7 +543,7 @@ EXTN(jsimd_idct_float_sse):
     movq        MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
     movq        MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
 
-    poppic      ebx                     ; restore GOT address
+    POPPIC      ebx                     ; restore GOT address
 
     add         esi, byte 4*SIZEOF_FAST_FLOAT  ; wsptr
     add         edi, byte 4*SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse2.asm
index c646eaef76e..42703a8efd7 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctflt-sse2.asm
@@ -2,17 +2,13 @@
 ; jidctflt.asm - floating-point IDCT (SSE & SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a floating-point implementation of the inverse DCT
 ; (Discrete Cosine Transform). The following code is based directly on
@@ -23,18 +19,18 @@
 
 ; --------------------------------------------------------------------------
 
-%macro unpcklps2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+%macro UNPCKLPS2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
     shufps      %1, %2, 0x44
 %endmacro
 
-%macro unpckhps2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+%macro UNPCKHPS2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
     shufps      %1, %2, 0xEE
 %endmacro
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_float_sse2)
 
 EXTN(jconst_idct_float_sse2):
@@ -46,7 +42,7 @@ PD_M2_613       times 4  dd -2.613125929752753055713286
 PD_RNDINT_MAGIC times 4  dd  100663296.0  ; (float)(0x00C00000 << 3)
 PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -88,7 +84,7 @@ EXTN(jsimd_idct_float_sse2):
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input, store into work array.
 
@@ -97,7 +93,7 @@ EXTN(jsimd_idct_float_sse2):
     mov         esi, JCOEFPTR [coef_block(eax)]  ; inptr
     lea         edi, [workspace]                 ; FAST_FLOAT *wsptr
     mov         ecx, DCTSIZE/4                   ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
     mov         eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -150,7 +146,7 @@ EXTN(jsimd_idct_float_sse2):
     movaps      XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
     movaps      XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
     jmp         near .nextcolumn
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -287,11 +283,11 @@ EXTN(jsimd_idct_float_sse2):
     unpckhps    xmm4, xmm0              ; xmm4=(42 52 43 53)
 
     movaps      xmm3, xmm6              ; transpose coefficients(phase 2)
-    unpcklps2   xmm6, xmm7              ; xmm6=(00 10 20 30)
-    unpckhps2   xmm3, xmm7              ; xmm3=(01 11 21 31)
+    UNPCKLPS2   xmm6, xmm7              ; xmm6=(00 10 20 30)
+    UNPCKHPS2   xmm3, xmm7              ; xmm3=(01 11 21 31)
     movaps      xmm0, xmm1              ; transpose coefficients(phase 2)
-    unpcklps2   xmm1, xmm2              ; xmm1=(02 12 22 32)
-    unpckhps2   xmm0, xmm2              ; xmm0=(03 13 23 33)
+    UNPCKLPS2   xmm1, xmm2              ; xmm1=(02 12 22 32)
+    UNPCKHPS2   xmm0, xmm2              ; xmm0=(03 13 23 33)
 
     movaps      xmm7, XMMWORD [wk(0)]   ; xmm7=(60 70 61 71)
     movaps      xmm2, XMMWORD [wk(1)]   ; xmm2=(62 72 63 73)
@@ -302,11 +298,11 @@ EXTN(jsimd_idct_float_sse2):
     movaps      XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
 
     movaps      xmm6, xmm5              ; transpose coefficients(phase 2)
-    unpcklps2   xmm5, xmm7              ; xmm5=(40 50 60 70)
-    unpckhps2   xmm6, xmm7              ; xmm6=(41 51 61 71)
+    UNPCKLPS2   xmm5, xmm7              ; xmm5=(40 50 60 70)
+    UNPCKHPS2   xmm6, xmm7              ; xmm6=(41 51 61 71)
     movaps      xmm3, xmm4              ; transpose coefficients(phase 2)
-    unpcklps2   xmm4, xmm2              ; xmm4=(42 52 62 72)
-    unpckhps2   xmm3, xmm2              ; xmm3=(43 53 63 73)
+    UNPCKLPS2   xmm4, xmm2              ; xmm4=(42 52 62 72)
+    UNPCKHPS2   xmm3, xmm2              ; xmm3=(43 53 63 73)
 
     movaps      XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
     movaps      XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
@@ -334,7 +330,7 @@ EXTN(jsimd_idct_float_sse2):
     mov         edi, JSAMPARRAY [output_buf(eax)]  ; (JSAMPROW *)
     mov         eax, JDIMENSION [output_col(eax)]
     mov         ecx, DCTSIZE/4                     ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     ; -- Even part
@@ -464,7 +460,7 @@ EXTN(jsimd_idct_float_sse2):
     pshufd      xmm5, xmm6, 0x4E  ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
     pshufd      xmm3, xmm7, 0x4E  ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
 
-    pushpic     ebx                     ; save GOT address
+    PUSHPIC     ebx                     ; save GOT address
 
     mov         edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
     mov         ebx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
@@ -475,7 +471,7 @@ EXTN(jsimd_idct_float_sse2):
     movq        XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5
     movq        XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm3
 
-    poppic      ebx                     ; restore GOT address
+    POPPIC      ebx                     ; restore GOT address
 
     add         esi, byte 4*SIZEOF_FAST_FLOAT  ; wsptr
     add         edi, byte 4*SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctfst-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctfst-mmx.asm
index 24622d43693..77d4613d23b 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctfst-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctfst-mmx.asm
@@ -2,17 +2,13 @@
 ; jidctfst.asm - fast integer IDCT (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a fast, not so accurate integer implementation of
 ; the inverse DCT (Discrete Cosine Transform). The following code is
@@ -56,7 +52,7 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS))       ; FIX(2.613125930) - FIX(1)
 %define PRE_MULTIPLY_SCALE_BITS  2
 %define CONST_SHIFT              (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_ifast_mmx)
 
 EXTN(jconst_idct_ifast_mmx):
@@ -67,7 +63,7 @@ PW_MF1613      times 4 dw -F_1_613 << CONST_SHIFT
 PW_F1082       times 4 dw  F_1_082 << CONST_SHIFT
 PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -109,7 +105,7 @@ EXTN(jsimd_idct_ifast_mmx):
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input, store into work array.
 
@@ -118,7 +114,7 @@ EXTN(jsimd_idct_ifast_mmx):
     mov         esi, JCOEFPTR [coef_block(eax)]  ; inptr
     lea         edi, [workspace]                 ; JCOEF *wsptr
     mov         ecx, DCTSIZE/4                   ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX
     mov         eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -163,7 +159,7 @@ EXTN(jsimd_idct_ifast_mmx):
     movq        MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
     movq        MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
     jmp         near .nextcolumn
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -326,7 +322,7 @@ EXTN(jsimd_idct_ifast_mmx):
     mov         edi, JSAMPARRAY [output_buf(eax)]  ; (JSAMPROW *)
     mov         eax, JDIMENSION [output_col(eax)]
     mov         ecx, DCTSIZE/4                     ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     ; -- Even part
@@ -464,7 +460,7 @@ EXTN(jsimd_idct_ifast_mmx):
     punpckldq   mm5, mm4                ; mm5=(20 21 22 23 24 25 26 27)
     punpckhdq   mm1, mm4                ; mm1=(30 31 32 33 34 35 36 37)
 
-    pushpic     ebx                     ; save GOT address
+    PUSHPIC     ebx                     ; save GOT address
 
     mov         edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
     mov         ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
@@ -475,7 +471,7 @@ EXTN(jsimd_idct_ifast_mmx):
     movq        MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
     movq        MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1
 
-    poppic      ebx                     ; restore GOT address
+    POPPIC      ebx                     ; restore GOT address
 
     add         esi, byte 4*SIZEOF_JCOEF     ; wsptr
     add         edi, byte 4*SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctfst-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctfst-sse2.asm
index 19704ffa48f..c2fe34ba8c6 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctfst-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctfst-sse2.asm
@@ -2,17 +2,13 @@
 ; jidctfst.asm - fast integer IDCT (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a fast, not so accurate integer implementation of
 ; the inverse DCT (Discrete Cosine Transform). The following code is
@@ -56,7 +52,7 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS))       ; FIX(2.613125930) - FIX(1)
 %define PRE_MULTIPLY_SCALE_BITS  2
 %define CONST_SHIFT              (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_ifast_sse2)
 
 EXTN(jconst_idct_ifast_sse2):
@@ -67,7 +63,7 @@ PW_MF1613      times 8  dw -F_1_613 << CONST_SHIFT
 PW_F1082       times 8  dw  F_1_082 << CONST_SHIFT
 PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -101,13 +97,13 @@ EXTN(jsimd_idct_ifast_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; unused
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input.
 
@@ -155,7 +151,7 @@ EXTN(jsimd_idct_ifast_sse2):
     movdqa      XMMWORD [wk(0)], xmm2   ; wk(0)=col1
     movdqa      XMMWORD [wk(1)], xmm0   ; wk(1)=col3
     jmp         near .column_end
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -490,7 +486,7 @@ EXTN(jsimd_idct_ifast_sse2):
     pop         esi
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; unused
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctint-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctint-avx2.asm
index 199c7df3b69..cb119d3f06d 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctint-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctint-avx2.asm
@@ -2,17 +2,13 @@
 ; jidctint.asm - accurate integer IDCT (AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a slower but more accurate integer implementation of the
 ; inverse DCT (Discrete Cosine Transform). The following code is based
@@ -65,7 +61,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; %1-%4: Input/output registers
 ; %5-%8: Temp registers
 
-%macro dotranspose 8
+%macro DOTRANSPOSE 8
     ; %5=(00 10 20 30 40 50 60 70  01 11 21 31 41 51 61 71)
     ; %6=(03 13 23 33 43 53 63 73  02 12 22 32 42 52 62 72)
     ; %7=(04 14 24 34 44 54 64 74  05 15 25 35 45 55 65 75)
@@ -118,7 +114,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; %5-%12: Temp registers
 ; %9:     Pass (1 or 2)
 
-%macro dodct 13
+%macro DODCT 13
     ; -- Even part
 
     ; (Original)
@@ -250,7 +246,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_islow_avx2)
 
 EXTN(jconst_idct_islow_avx2):
@@ -269,7 +265,7 @@ PB_CENTERJSAMP             times 32 db  CENTERJSAMPLE
 PW_1_NEG1                  times 8  dw  1
                            times 8  dw -1
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -303,13 +299,13 @@ EXTN(jsimd_idct_islow_avx2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; unused
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns.
 
@@ -353,7 +349,7 @@ EXTN(jsimd_idct_islow_avx2):
     vpshufd     ymm3, ymm4, 0xFF        ; ymm3=col3_7=(03 03 03 03 03 03 03 03  07 07 07 07 07 07 07 07)
 
     jmp         near .column_end
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -371,10 +367,10 @@ EXTN(jsimd_idct_islow_avx2):
     vperm2i128  ymm2, ymm5, ymm7, 0x20  ; ymm2=in2_6
     vperm2i128  ymm3, ymm7, ymm6, 0x31  ; ymm3=in7_5
 
-    dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 1
+    DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 1
     ; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm3=data7_6
 
-    dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
+    DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
     ; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm3=data3_7
 
 .column_end:
@@ -395,10 +391,10 @@ EXTN(jsimd_idct_islow_avx2):
     vperm2i128  ymm4, ymm3, ymm1, 0x31  ; ymm3=in7_5
     vperm2i128  ymm1, ymm3, ymm1, 0x20  ; ymm1=in3_1
 
-    dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 2
+    DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 2
     ; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm4=data7_6
 
-    dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
+    DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
     ; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm4=data3_7
 
     vpacksswb   ymm0, ymm0, ymm1        ; ymm0=data01_45
@@ -442,7 +438,7 @@ EXTN(jsimd_idct_islow_avx2):
     pop         esi
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; unused
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctint-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctint-mmx.asm
index f15c8d34bcb..c2c17f441b8 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctint-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctint-mmx.asm
@@ -2,17 +2,13 @@
 ; jidctint.asm - accurate integer IDCT (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, 2020, D. R. Commander.
+; Copyright (C) 2016, 2020, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a slower but more accurate integer implementation of the
 ; inverse DCT (Discrete Cosine Transform). The following code is based
@@ -63,7 +59,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_islow_mmx)
 
 EXTN(jconst_idct_islow_mmx):
@@ -80,7 +76,7 @@ PD_DESCALE_P1  times 2 dd  1 << (DESCALE_P1 - 1)
 PD_DESCALE_P2  times 2 dd  1 << (DESCALE_P2 - 1)
 PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -122,7 +118,7 @@ EXTN(jsimd_idct_islow_mmx):
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input, store into work array.
 
@@ -131,7 +127,7 @@ EXTN(jsimd_idct_islow_mmx):
     mov         esi, JCOEFPTR [coef_block(eax)]  ; inptr
     lea         edi, [workspace]                 ; JCOEF *wsptr
     mov         ecx, DCTSIZE/4                   ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX
     mov         eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -178,7 +174,7 @@ EXTN(jsimd_idct_islow_mmx):
     movq        MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
     movq        MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
     jmp         near .nextcolumn
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -513,7 +509,7 @@ EXTN(jsimd_idct_islow_mmx):
     mov         edi, JSAMPARRAY [output_buf(eax)]  ; (JSAMPROW *)
     mov         eax, JDIMENSION [output_col(eax)]
     mov         ecx, DCTSIZE/4                     ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     ; -- Even part
@@ -816,7 +812,7 @@ EXTN(jsimd_idct_islow_mmx):
     punpckldq   mm7, mm5                ; mm7=(20 21 22 23 24 25 26 27)
     punpckhdq   mm4, mm5                ; mm4=(30 31 32 33 34 35 36 37)
 
-    pushpic     ebx                     ; save GOT address
+    PUSHPIC     ebx                     ; save GOT address
 
     mov         edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
     mov         ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
@@ -827,7 +823,7 @@ EXTN(jsimd_idct_islow_mmx):
     movq        MMWORD [edx+eax*SIZEOF_JSAMPLE], mm7
     movq        MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
 
-    poppic      ebx                     ; restore GOT address
+    POPPIC      ebx                     ; restore GOT address
 
     add         esi, byte 4*SIZEOF_JCOEF     ; wsptr
     add         edi, byte 4*SIZEOF_JSAMPROW
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctint-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctint-sse2.asm
index 43e320189b4..70516cadcef 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctint-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctint-sse2.asm
@@ -2,17 +2,13 @@
 ; jidctint.asm - accurate integer IDCT (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, 2020, D. R. Commander.
+; Copyright (C) 2016, 2020, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a slower but more accurate integer implementation of the
 ; inverse DCT (Discrete Cosine Transform). The following code is based
@@ -63,7 +59,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_islow_sse2)
 
 EXTN(jconst_idct_islow_sse2):
@@ -80,7 +76,7 @@ PD_DESCALE_P1  times 4  dd  1 << (DESCALE_P1 - 1)
 PD_DESCALE_P2  times 4  dd  1 << (DESCALE_P2 - 1)
 PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -114,13 +110,13 @@ EXTN(jsimd_idct_islow_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; unused
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input.
 
@@ -172,7 +168,7 @@ EXTN(jsimd_idct_islow_sse2):
     movdqa      XMMWORD [wk(10)], xmm3  ; wk(10)=col5
     movdqa      XMMWORD [wk(11)], xmm4  ; wk(11)=col7
     jmp         near .column_end
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -847,7 +843,7 @@ EXTN(jsimd_idct_islow_sse2):
     pop         esi
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; unused
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctred-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctred-mmx.asm
index e2307e1cb6c..96cda657133 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctred-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctred-mmx.asm
@@ -2,17 +2,13 @@
 ; jidctred.asm - reduced-size IDCT (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains inverse-DCT routines that produce reduced-size
 ; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
@@ -69,7 +65,7 @@ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS)  ; FIX(3.624509785)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_red_mmx)
 
 EXTN(jconst_idct_red_mmx):
@@ -87,7 +83,7 @@ PD_DESCALE_P1_2 times 2 dd  1 << (DESCALE_P1_2 - 1)
 PD_DESCALE_P2_2 times 2 dd  1 << (DESCALE_P2_2 - 1)
 PB_CENTERJSAMP  times 8 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -124,13 +120,13 @@ EXTN(jsimd_idct_4x4_mmx):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [workspace]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input, store into work array.
 
@@ -139,7 +135,7 @@ EXTN(jsimd_idct_4x4_mmx):
     mov         esi, JCOEFPTR [coef_block(eax)]  ; inptr
     lea         edi, [workspace]                 ; JCOEF *wsptr
     mov         ecx, DCTSIZE/4                   ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_4X4_MMX
     mov         eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -181,7 +177,7 @@ EXTN(jsimd_idct_4x4_mmx):
     movq        MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
     movq        MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
     jmp         near .nextcolumn
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -479,7 +475,7 @@ EXTN(jsimd_idct_4x4_mmx):
     pop         esi
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; need not be preserved
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
@@ -512,7 +508,7 @@ EXTN(jsimd_idct_2x2_mmx):
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input.
 
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jidctred-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jidctred-sse2.asm
index 6e56494e975..1fe967db199 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jidctred-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jidctred-sse2.asm
@@ -2,17 +2,13 @@
 ; jidctred.asm - reduced-size IDCT (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains inverse-DCT routines that produce reduced-size
 ; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
@@ -69,7 +65,7 @@ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS)  ; FIX(3.624509785)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_red_sse2)
 
 EXTN(jconst_idct_red_sse2):
@@ -87,7 +83,7 @@ PD_DESCALE_P1_2 times 4  dd  1 << (DESCALE_P1_2 - 1)
 PD_DESCALE_P2_2 times 4  dd  1 << (DESCALE_P2_2 - 1)
 PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -122,13 +118,13 @@ EXTN(jsimd_idct_4x4_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; unused
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input.
 
@@ -171,7 +167,7 @@ EXTN(jsimd_idct_4x4_sse2):
     pshufd      xmm3, xmm3, 0xFA  ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07)
 
     jmp         near .column_end
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -400,7 +396,7 @@ EXTN(jsimd_idct_4x4_sse2):
     pop         esi
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; unused
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
@@ -433,7 +429,7 @@ EXTN(jsimd_idct_2x2_sse2):
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input.
 
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jquant-3dn.asm b/contrib/libs/libjpeg-turbo/simd/i386/jquant-3dn.asm
index 5cb60caa947..58e0011f70e 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jquant-3dn.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jquant-3dn.asm
@@ -2,17 +2,13 @@
 ; jquant.asm - sample data conversion and quantization (3DNow! & MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 %include "jdct.inc"
@@ -52,7 +48,7 @@ EXTN(jsimd_convsamp_float_3dnow):
     mov         eax, JDIMENSION [start_col]
     mov         edi, POINTER [workspace]       ; (DCTELEM *)
     mov         ecx, DCTSIZE/2
-    alignx      16, 7
+    ALIGNX      16, 7
 .convloop:
     mov         ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
     mov         edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
@@ -154,7 +150,7 @@ EXTN(jsimd_quantize_float_3dnow):
     mov         edx, POINTER [divisors]
     mov         edi, JCOEFPTR [coef_block]
     mov         eax, DCTSIZE2/16
-    alignx      16, 7
+    ALIGNX      16, 7
 .quantloop:
     movq        mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
     movq        mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jquant-mmx.asm b/contrib/libs/libjpeg-turbo/simd/i386/jquant-mmx.asm
index 61305c625de..4eda95ce12f 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jquant-mmx.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jquant-mmx.asm
@@ -2,17 +2,13 @@
 ; jquant.asm - sample data conversion and quantization (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 %include "jdct.inc"
@@ -52,7 +48,7 @@ EXTN(jsimd_convsamp_mmx):
     mov         eax, JDIMENSION [start_col]
     mov         edi, POINTER [workspace]       ; (DCTELEM *)
     mov         ecx, DCTSIZE/4
-    alignx      16, 7
+    ALIGNX      16, 7
 .convloop:
     mov         ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
     mov         edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
@@ -120,8 +116,8 @@ EXTN(jsimd_convsamp_mmx):
 ; Quantize/descale the coefficients, and store into coef_block
 ;
 ; This implementation is based on an algorithm described in
-;   "How to optimize for the Pentium family of microprocessors"
-;   (http://www.agner.org/assem/).
+;   "Optimizing subroutines in assembly language:
+;   An optimization guide for x86 platforms" (https://agner.org/optimize).
 ;
 ; GLOBAL(void)
 ; jsimd_quantize_mmx(JCOEFPTR coef_block, DCTELEM *divisors,
@@ -157,10 +153,10 @@ EXTN(jsimd_quantize_mmx):
     mov         edx, POINTER [divisors]
     mov         edi, JCOEFPTR [coef_block]
     mov         ah, 2
-    alignx      16, 7
+    ALIGNX      16, 7
 .quantloop1:
     mov         al, DCTSIZE2/8/2
-    alignx      16, 7
+    ALIGNX      16, 7
 .quantloop2:
     movq        mm2, MMWORD [MMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
     movq        mm3, MMWORD [MMBLOCK(0,1,esi,SIZEOF_DCTELEM)]
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jquant-sse.asm b/contrib/libs/libjpeg-turbo/simd/i386/jquant-sse.asm
index 218adc976f3..6cb5f79c215 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jquant-sse.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jquant-sse.asm
@@ -2,17 +2,13 @@
 ; jquant.asm - sample data conversion and quantization (SSE & MMX)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 %include "jdct.inc"
@@ -52,7 +48,7 @@ EXTN(jsimd_convsamp_float_sse):
     mov         eax, JDIMENSION [start_col]
     mov         edi, POINTER [workspace]       ; (DCTELEM *)
     mov         ecx, DCTSIZE/2
-    alignx      16, 7
+    ALIGNX      16, 7
 .convloop:
     mov         ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
     mov         edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
@@ -150,7 +146,7 @@ EXTN(jsimd_quantize_float_sse):
     mov         edx, POINTER [divisors]
     mov         edi, JCOEFPTR [coef_block]
     mov         eax, DCTSIZE2/16
-    alignx      16, 7
+    ALIGNX      16, 7
 .quantloop:
     movaps      xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
     movaps      xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jquantf-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jquantf-sse2.asm
index a881ab50f92..5668f8cb396 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jquantf-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jquantf-sse2.asm
@@ -2,17 +2,13 @@
 ; jquantf.asm - sample data conversion and quantization (SSE & SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 %include "jdct.inc"
@@ -52,7 +48,7 @@ EXTN(jsimd_convsamp_float_sse2):
     mov         eax, JDIMENSION [start_col]
     mov         edi, POINTER [workspace]       ; (DCTELEM *)
     mov         ecx, DCTSIZE/2
-    alignx      16, 7
+    ALIGNX      16, 7
 .convloop:
     mov         ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
     mov         edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
@@ -127,7 +123,7 @@ EXTN(jsimd_quantize_float_sse2):
     mov         edx, POINTER [divisors]
     mov         edi, JCOEFPTR [coef_block]
     mov         eax, DCTSIZE2/16
-    alignx      16, 7
+    ALIGNX      16, 7
 .quantloop:
     movaps      xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
     movaps      xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jquanti-avx2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jquanti-avx2.asm
index 5ed6bec246c..60ae098e9c4 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jquanti-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jquanti-avx2.asm
@@ -2,18 +2,14 @@
 ; jquanti.asm - sample data conversion and quantization (AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, 2018, D. R. Commander.
+; Copyright (C) 2016, 2018, 2024, D. R. Commander.
 ; Copyright (C) 2016, Matthieu Darbois.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 %include "jdct.inc"
@@ -107,8 +103,8 @@ EXTN(jsimd_convsamp_avx2):
 ; Quantize/descale the coefficients, and store into coef_block
 ;
 ; This implementation is based on an algorithm described in
-;   "How to optimize for the Pentium family of microprocessors"
-;   (http://www.agner.org/assem/).
+;   "Optimizing subroutines in assembly language:
+;   An optimization guide for x86 platforms" (https://agner.org/optimize).
 ;
 ; GLOBAL(void)
 ; jsimd_quantize_avx2(JCOEFPTR coef_block, DCTELEM *divisors,
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jquanti-sse2.asm b/contrib/libs/libjpeg-turbo/simd/i386/jquanti-sse2.asm
index 0a509408aa1..c1edde996e9 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jquanti-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jquanti-sse2.asm
@@ -2,17 +2,13 @@
 ; jquanti.asm - sample data conversion and quantization (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 %include "jdct.inc"
@@ -52,7 +48,7 @@ EXTN(jsimd_convsamp_sse2):
     mov         eax, JDIMENSION [start_col]
     mov         edi, POINTER [workspace]       ; (DCTELEM *)
     mov         ecx, DCTSIZE/4
-    alignx      16, 7
+    ALIGNX      16, 7
 .convloop:
     mov         ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
     mov         edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
@@ -98,8 +94,8 @@ EXTN(jsimd_convsamp_sse2):
 ; Quantize/descale the coefficients, and store into coef_block
 ;
 ; This implementation is based on an algorithm described in
-;   "How to optimize for the Pentium family of microprocessors"
-;   (http://www.agner.org/assem/).
+;   "Optimizing subroutines in assembly language:
+;   An optimization guide for x86 platforms" (https://agner.org/optimize).
 ;
 ; GLOBAL(void)
 ; jsimd_quantize_sse2(JCOEFPTR coef_block, DCTELEM *divisors,
@@ -133,7 +129,7 @@ EXTN(jsimd_quantize_sse2):
     mov         edx, POINTER [divisors]
     mov         edi, JCOEFPTR [coef_block]
     mov         eax, DCTSIZE2/32
-    alignx      16, 7
+    ALIGNX      16, 7
 .quantloop:
     movdqa      xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
     movdqa      xmm5, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_DCTELEM)]
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jsimd.c b/contrib/libs/libjpeg-turbo/simd/i386/jsimd.c
index 80bc821ff4e..d4786b155b7 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jsimd.c
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jsimd.c
@@ -2,8 +2,8 @@
  * jsimd_i386.c
  *
  * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022-2024, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -15,13 +15,12 @@
  */
 
 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
-#include "jconfigint.h"
 
 /*
  * In the PIC cases, we have no guarantee that constants will keep
@@ -32,13 +31,11 @@
 #define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
 #define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
 
-static unsigned int simd_support = (unsigned int)(~0);
-static unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
+static THREAD_LOCAL unsigned int simd_huffman = 1;
 
 /*
  * Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
  */
 LOCAL(void)
 init_simd(void)
@@ -161,6 +158,9 @@ jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
   void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->in_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_extrgb_ycc_convert_avx2;
@@ -220,6 +220,9 @@ jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
   void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->in_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_extrgb_gray_convert_avx2;
@@ -279,6 +282,9 @@ jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
   void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_ycc_extrgb_convert_avx2;
@@ -382,6 +388,9 @@ GLOBAL(void)
 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
                       JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
                                compptr->v_samp_factor,
@@ -402,6 +411,9 @@ GLOBAL(void)
 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
                       JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
                                compptr->v_samp_factor,
@@ -464,6 +476,9 @@ GLOBAL(void)
 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
                              input_data, output_data_ptr);
@@ -479,6 +494,9 @@ GLOBAL(void)
 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
                              input_data, output_data_ptr);
@@ -540,6 +558,9 @@ GLOBAL(void)
 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
                                    compptr->downsampled_width, input_data,
@@ -558,6 +579,9 @@ GLOBAL(void)
 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
                                    compptr->downsampled_width, input_data,
@@ -626,6 +650,9 @@ jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
   void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
@@ -684,6 +711,9 @@ jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
   void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
@@ -788,6 +818,9 @@ GLOBAL(void)
 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
                DCTELEM *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_convsamp_avx2(sample_data, start_col, workspace);
   else if (simd_support & JSIMD_SSE2)
@@ -800,6 +833,9 @@ GLOBAL(void)
 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
                      FAST_FLOAT *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_SSE2)
     jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
   else if (simd_support & JSIMD_SSE)
@@ -870,6 +906,9 @@ jsimd_can_fdct_float(void)
 GLOBAL(void)
 jsimd_fdct_islow(DCTELEM *data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_fdct_islow_avx2(data);
   else if (simd_support & JSIMD_SSE2)
@@ -881,6 +920,9 @@ jsimd_fdct_islow(DCTELEM *data)
 GLOBAL(void)
 jsimd_fdct_ifast(DCTELEM *data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
     jsimd_fdct_ifast_sse2(data);
   else
@@ -890,6 +932,9 @@ jsimd_fdct_ifast(DCTELEM *data)
 GLOBAL(void)
 jsimd_fdct_float(FAST_FLOAT *data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
     jsimd_fdct_float_sse(data);
   else if (simd_support & JSIMD_3DNOW)
@@ -945,6 +990,9 @@ jsimd_can_quantize_float(void)
 GLOBAL(void)
 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_quantize_avx2(coef_block, divisors, workspace);
   else if (simd_support & JSIMD_SSE2)
@@ -957,6 +1005,9 @@ GLOBAL(void)
 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
                      FAST_FLOAT *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_SSE2)
     jsimd_quantize_float_sse2(coef_block, divisors, workspace);
   else if (simd_support & JSIMD_SSE)
@@ -1020,6 +1071,9 @@ jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                JCOEFPTR coef_block, JSAMPARRAY output_buf,
                JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
     jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
                         output_col);
@@ -1032,6 +1086,9 @@ jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                JCOEFPTR coef_block, JSAMPARRAY output_buf,
                JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
     jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
                         output_col);
@@ -1126,6 +1183,9 @@ jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
                  JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
                           output_col);
@@ -1142,6 +1202,9 @@ jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
                  JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
     jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
                           output_col);
@@ -1155,6 +1218,9 @@ jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
                  JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
     jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
                           output_col);
@@ -1212,7 +1278,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
 GLOBAL(void)
 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
                                   const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
+                                  int Al, UJCOEF *values, size_t *zerobits)
 {
   jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
                                          Sl, Al, values, zerobits);
@@ -1238,7 +1304,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
 GLOBAL(int)
 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
                                    const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
+                                   int Al, UJCOEF *absvalues, size_t *bits)
 {
   return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
                                                  jpeg_natural_order_start,
diff --git a/contrib/libs/libjpeg-turbo/simd/i386/jsimdcpu.asm b/contrib/libs/libjpeg-turbo/simd/i386/jsimdcpu.asm
index ddcafa9e213..df80f17f5fa 100644
--- a/contrib/libs/libjpeg-turbo/simd/i386/jsimdcpu.asm
+++ b/contrib/libs/libjpeg-turbo/simd/i386/jsimdcpu.asm
@@ -8,11 +8,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
diff --git a/contrib/libs/libjpeg-turbo/simd/jsimd.h b/contrib/libs/libjpeg-turbo/simd/jsimd.h
index 64747c6360c..a28754adb9d 100644
--- a/contrib/libs/libjpeg-turbo/simd/jsimd.h
+++ b/contrib/libs/libjpeg-turbo/simd/jsimd.h
@@ -2,10 +2,10 @@
  * simd/jsimd.h
  *
  * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
- * Copyright (C) 2011, 2014-2016, 2018, 2020, D. R. Commander.
+ * Copyright (C) 2011, 2014-2016, 2018, 2020, 2022, D. R. Commander.
  * Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
  * Copyright (C) 2014, Linaro Limited.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
  * Copyright (C) 2020, Arm Limited.
  *
@@ -1243,16 +1243,16 @@ EXTERN(JOCTET *) jsimd_huff_encode_one_block_neon_slowtbl
 /* Progressive Huffman encoding */
 EXTERN(void) jsimd_encode_mcu_AC_first_prepare_sse2
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *values, size_t *zerobits);
+   UJCOEF *values, size_t *zerobits);
 
 EXTERN(void) jsimd_encode_mcu_AC_first_prepare_neon
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *values, size_t *zerobits);
+   UJCOEF *values, size_t *zerobits);
 
 EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_sse2
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *absvalues, size_t *bits);
+   UJCOEF *absvalues, size_t *bits);
 
 EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_neon
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *absvalues, size_t *bits);
+   UJCOEF *absvalues, size_t *bits);
diff --git a/contrib/libs/libjpeg-turbo/simd/nasm/jsimdcfg.inc.h b/contrib/libs/libjpeg-turbo/simd/nasm/jsimdcfg.inc.h
index bf2a45ad50c..ed3f9c2a693 100644
--- a/contrib/libs/libjpeg-turbo/simd/nasm/jsimdcfg.inc.h
+++ b/contrib/libs/libjpeg-turbo/simd/nasm/jsimdcfg.inc.h
@@ -12,9 +12,9 @@
 
 #define JPEG_INTERNALS
 
-#include "../jpeglib.h"
+#include "../src/jpeglib.h"
 #include "../jconfig.h"
-#include "../jmorecfg.h"
+#include "../src/jmorecfg.h"
 #include "jsimd.h"
 
 ;
diff --git a/contrib/libs/libjpeg-turbo/simd/nasm/jsimdext.inc b/contrib/libs/libjpeg-turbo/simd/nasm/jsimdext.inc
index d8a50ed8e23..674dfb6464c 100644
--- a/contrib/libs/libjpeg-turbo/simd/nasm/jsimdext.inc
+++ b/contrib/libs/libjpeg-turbo/simd/nasm/jsimdext.inc
@@ -2,9 +2,10 @@
 ; jsimdext.inc - common declarations
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2010, 2016, 2018-2019, D. R. Commander.
+; Copyright (C) 2010, 2016, 2018-2019, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthieu Darbois.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library - version 1.02
 ;
@@ -75,6 +76,14 @@
 ; mark stack as non-executable
 section .note.GNU-stack noalloc noexec nowrite progbits
 
+%ifdef __CET__
+%ifdef __x86_64__
+section .note.gnu.property note alloc noexec align=8
+    dd 0x00000004, 0x00000010, 0x00000005, 0x00554e47
+    dd 0xc0000002, 0x00000004, 0x00000003, 0x00000000
+%endif
+%endif
+
 ; -- segment definition --
 ;
 %ifdef _x86_64_
@@ -271,7 +280,7 @@ const_base:
 
 %define GOTOFF(got, sym)  (got) + (sym) - const_base
 
-%imacro get_GOT 1
+%imacro GET_GOT 1
     ; NOTE: this macro destroys ecx resister.
     call        %%geteip
     add         ecx, byte (%%ref - $)
@@ -303,7 +312,7 @@ const_base:
 
 %define GOTOFF(got, sym)  (got) + (sym) wrt ..gotoff
 
-%imacro get_GOT 1
+%imacro GET_GOT 1
     extern      GOT_SYMBOL
     call        %%geteip
     add         %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
@@ -316,13 +325,13 @@ const_base:
 
 %endif    ; GOT_SYMBOL == _MACHO_PIC_ ----------------
 
-%imacro pushpic 1.nolist
+%imacro PUSHPIC 1.nolist
     push        %1
 %endmacro
-%imacro poppic  1.nolist
+%imacro POPPIC  1.nolist
     pop         %1
 %endmacro
-%imacro movpic  2.nolist
+%imacro MOVPIC  2.nolist
     mov         %1, %2
 %endmacro
 
@@ -330,13 +339,13 @@ const_base:
 
 %define GOTOFF(got, sym)  (sym)
 
-%imacro get_GOT 1.nolist
+%imacro GET_GOT 1.nolist
 %endmacro
-%imacro pushpic 1.nolist
+%imacro PUSHPIC 1.nolist
 %endmacro
-%imacro poppic  1.nolist
+%imacro POPPIC  1.nolist
 %endmacro
-%imacro movpic  2.nolist
+%imacro MOVPIC  2.nolist
 %endmacro
 
 %endif   ;  PIC -----------------------------------------
@@ -348,7 +357,7 @@ const_base:
 %define MSKLE(x, y)  (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
 %define FILLB(b, n)  (($$-(b)) & ((n)-1))
 
-%imacro alignx 1-2.nolist 0xFFFF
+%imacro ALIGNX 1-2.nolist 0xFFFF
 %%bs: \
   times MSKLE(FILLB(%%bs, %1), %2) & MSKLE(16, FILLB($, %1)) & FILLB($, %1) \
         db 0x90                                      ; nop
@@ -370,7 +379,7 @@ const_base:
 
 ; Align the next data on {2,4,8,16,..}-byte boundary.
 ;
-%imacro alignz 1.nolist
+%imacro ALIGNZ 1.nolist
     align       %1, db 0                ; filling zeros
 %endmacro
 
@@ -378,7 +387,7 @@ const_base:
 
 %ifdef WIN64
 
-%imacro collect_args 1
+%imacro COLLECT_ARGS 1
     sub         rsp, SIZEOF_XMMWORD
     movaps      XMMWORD [rsp], xmm6
     sub         rsp, SIZEOF_XMMWORD
@@ -397,17 +406,17 @@ const_base:
 %endif
 %if %1 > 4
     push        r14
-    mov         r14, [rax+48]
+    mov         r14, [rbp+48]
 %endif
 %if %1 > 5
     push        r15
-    mov         r15, [rax+56]
+    mov         r15, [rbp+56]
 %endif
     push        rsi
     push        rdi
 %endmacro
 
-%imacro uncollect_args 1
+%imacro UNCOLLECT_ARGS 1
     pop         rdi
     pop         rsi
 %if %1 > 5
@@ -428,7 +437,7 @@ const_base:
     add         rsp, SIZEOF_XMMWORD
 %endmacro
 
-%imacro push_xmm 1
+%imacro PUSH_XMM 1
     sub         rsp, %1 * SIZEOF_XMMWORD
     movaps      XMMWORD [rsp+0*SIZEOF_XMMWORD], xmm8
 %if %1 > 1
@@ -442,7 +451,7 @@ const_base:
 %endif
 %endmacro
 
-%imacro pop_xmm 1
+%imacro POP_XMM 1
     movaps      xmm8, XMMWORD [rsp+0*SIZEOF_XMMWORD]
 %if %1 > 1
     movaps      xmm9, XMMWORD [rsp+1*SIZEOF_XMMWORD]
@@ -458,7 +467,7 @@ const_base:
 
 %else
 
-%imacro collect_args 1
+%imacro COLLECT_ARGS 1
     push        r10
     mov         r10, rdi
 %if %1 > 1
@@ -483,7 +492,7 @@ const_base:
 %endif
 %endmacro
 
-%imacro uncollect_args 1
+%imacro UNCOLLECT_ARGS 1
 %if %1 > 5
     pop         r15
 %endif
@@ -502,16 +511,29 @@ const_base:
     pop         r10
 %endmacro
 
-%imacro push_xmm 1
+%imacro PUSH_XMM 1
 %endmacro
 
-%imacro pop_xmm 1
+%imacro POP_XMM 1
 %endmacro
 
 %endif
 
 %endif
 
+%ifdef __CET__
+
+%imacro ENDBR64 0
+    dd 0xfa1e0ff3
+%endmacro
+
+%else
+
+%imacro ENDBR64 0
+%endmacro
+
+%endif
+
 ; --------------------------------------------------------------------------
 ;  Defines picked up from the C headers
 ;
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-avx2.asm
index ffb527db00e..aeeda0a682f 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-avx2.asm
@@ -1,19 +1,16 @@
 ;
 ; jccolext.asm - colorspace conversion (64-bit AVX2)
 ;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -33,21 +30,22 @@
 ; r13d = JDIMENSION output_row
 ; r14d = int num_rows
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
 %define WK_NUM  8
 
     align       32
     GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2)
 
 EXTN(jsimd_rgb_ycc_convert_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_YMMWORD)  ; align to 256 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 5
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, (SIZEOF_YMMWORD * WK_NUM)
+    COLLECT_ARGS 5
     push        rbx
 
     mov         ecx, r10d
@@ -548,9 +546,9 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
 .return:
     pop         rbx
     vzeroupper
-    uncollect_args 5
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 5
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-sse2.asm
index af70ed6010f..f3a1244903b 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jccolext-sse2.asm
@@ -1,18 +1,15 @@
 ;
 ; jccolext.asm - colorspace conversion (64-bit SSE2)
 ;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -32,21 +29,22 @@
 ; r13d = JDIMENSION output_row
 ; r14d = int num_rows
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
 %define WK_NUM  8
 
     align       32
     GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2)
 
 EXTN(jsimd_rgb_ycc_convert_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 5
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 5
     push        rbx
 
     mov         ecx, r10d
@@ -473,9 +471,9 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
 
 .return:
     pop         rbx
-    uncollect_args 5
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 5
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-avx2.asm
index 16b78298dc4..e2628917336 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-avx2.asm
@@ -1,18 +1,14 @@
 ;
 ; jccolor.asm - colorspace conversion (64-bit AVX2)
 ;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -33,7 +29,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_ycc_convert_avx2)
 
 EXTN(jconst_rgb_ycc_convert_avx2):
@@ -46,7 +42,7 @@ PD_ONEHALFM1_CJ times 8 dd  (1 << (SCALEBITS - 1)) - 1 + \
                             (CENTERJSAMPLE << SCALEBITS)
 PD_ONEHALF      times 8 dd  (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-sse2.asm
index e2955c21340..cc9edb4cebc 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jccolor-sse2.asm
@@ -1,17 +1,13 @@
 ;
 ; jccolor.asm - colorspace conversion (64-bit SSE2)
 ;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -32,7 +28,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_ycc_convert_sse2)
 
 EXTN(jconst_rgb_ycc_convert_sse2):
@@ -45,7 +41,7 @@ PD_ONEHALFM1_CJ times 4 dd  (1 << (SCALEBITS - 1)) - 1 + \
                             (CENTERJSAMPLE << SCALEBITS)
 PD_ONEHALF      times 4 dd  (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-avx2.asm
index 591255bb112..267ec5142a4 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-avx2.asm
@@ -1,18 +1,14 @@
 ;
 ; jcgray.asm - grayscale colorspace conversion (64-bit AVX2)
 ;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -29,7 +25,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_gray_convert_avx2)
 
 EXTN(jconst_rgb_gray_convert_avx2):
@@ -38,7 +34,7 @@ PW_F0299_F0337 times 8 dw F_0_299, F_0_337
 PW_F0114_F0250 times 8 dw F_0_114, F_0_250
 PD_ONEHALF     times 8 dd (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-sse2.asm
index e389904f2f8..4b94d7b8a28 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jcgray-sse2.asm
@@ -1,17 +1,13 @@
 ;
 ; jcgray.asm - grayscale colorspace conversion (64-bit SSE2)
 ;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -28,7 +24,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_gray_convert_sse2)
 
 EXTN(jconst_rgb_gray_convert_sse2):
@@ -37,7 +33,7 @@ PW_F0299_F0337 times 4 dw F_0_299, F_0_337
 PW_F0114_F0250 times 4 dw F_0_114, F_0_250
 PD_ONEHALF     times 4 dd (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-avx2.asm
index ddcc2c0a2fe..77e85f768f9 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-avx2.asm
@@ -1,19 +1,16 @@
 ;
 ; jcgryext.asm - grayscale colorspace conversion (64-bit AVX2)
 ;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -33,21 +30,22 @@
 ; r13d = JDIMENSION output_row
 ; r14d = int num_rows
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
 %define WK_NUM  2
 
     align       32
     GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2)
 
 EXTN(jsimd_rgb_gray_convert_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_YMMWORD)  ; align to 256 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 5
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_YMMWORD * WK_NUM)
+    COLLECT_ARGS 5
     push        rbx
 
     mov         ecx, r10d
@@ -427,9 +425,9 @@ EXTN(jsimd_rgb_gray_convert_avx2):
 .return:
     pop         rbx
     vzeroupper
-    uncollect_args 5
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 5
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-sse2.asm
index f1d399a63b8..3e8087c39bc 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jcgryext-sse2.asm
@@ -1,18 +1,15 @@
 ;
 ; jcgryext.asm - grayscale colorspace conversion (64-bit SSE2)
 ;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -32,21 +29,22 @@
 ; r13d = JDIMENSION output_row
 ; r14d = int num_rows
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
 %define WK_NUM  2
 
     align       32
     GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2)
 
 EXTN(jsimd_rgb_gray_convert_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 5
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 5
     push        rbx
 
     mov         ecx, r10d
@@ -352,9 +350,9 @@ EXTN(jsimd_rgb_gray_convert_sse2):
 
 .return:
     pop         rbx
-    uncollect_args 5
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 5
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jchuff-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jchuff-sse2.asm
index 9ea6df946ef..b18b7f5d651 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jchuff-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jchuff-sse2.asm
@@ -1,19 +1,16 @@
 ;
 ; jchuff-sse2.asm - Huffman entropy encoding (64-bit SSE2)
 ;
-; Copyright (C) 2009-2011, 2014-2016, 2019, 2021, D. R. Commander.
+; Copyright (C) 2009-2011, 2014-2016, 2019, 2021, 2023-2024, D. R. Commander.
 ; Copyright (C) 2015, Matthieu Darbois.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains an SSE2 implementation for Huffman coding of one block.
 ; The following code is based on jchuff.c; see jchuff.c for more details.
@@ -38,7 +35,7 @@ endstruc
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_huff_encode_one_block)
 
 EXTN(jconst_huff_encode_one_block):
@@ -48,7 +45,7 @@ jpeg_mask_bits dd 0x0000, 0x0001, 0x0003, 0x0007
                dd 0x00ff, 0x01ff, 0x03ff, 0x07ff
                dd 0x0fff, 0x1fff, 0x3fff, 0x7fff
 
-    alignz      32
+    ALIGNZ      32
 
 times 1 << 14 db 15
 times 1 << 13 db 14
@@ -66,7 +63,8 @@ times 1 <<  2 db  3
 times 1 <<  1 db  2
 times 1 <<  0 db  1
 times 1       db  0
-jpeg_nbits_table:
+GLOBAL_DATA(jpeg_nbits_table)
+EXTN(jpeg_nbits_table):
 times 1       db  0
 times 1 <<  0 db  1
 times 1 <<  1 db  2
@@ -85,10 +83,10 @@ times 1 << 13 db 14
 times 1 << 14 db 15
 times 1 << 15 db 16
 
-    alignz      32
+    ALIGNZ      32
 
 %define NBITS(x)      nbits_base + x
-%define MASK_BITS(x)  NBITS((x) * 4) + (jpeg_mask_bits - jpeg_nbits_table)
+%define MASK_BITS(x)  NBITS((x) * 4) + (jpeg_mask_bits - EXTN(jpeg_nbits_table))
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -208,15 +206,15 @@ times 1 << 15 db 16
 ; rax - buffer
 ; rbx - temp
 ; rcx - nbits
-; rdx - block --> free_bits
+; rdx - code
 ; rsi - nbits_base
 ; rdi - t
-; rbp - code
 ; r8  - dctbl --> code_temp
 ; r9  - actbl
 ; r10 - state
 ; r11 - index
 ; r12 - put_buffer
+; r15 - block --> free_bits
 
 %define buffer       rax
 %ifdef WIN64
@@ -231,12 +229,11 @@ times 1 << 15 db 16
 %define nbitsq       rcx
 %define nbits        ecx
 %define nbitsb       cl
-%define block        rdx
+%define codeq        rdx
+%define code         edx
 %define nbits_base   rsi
 %define t            rdi
 %define td           edi
-%define codeq        rbp
-%define code         ebp
 %define dctbl        r8
 %define actbl        r9
 %define state        r10
@@ -244,6 +241,7 @@ times 1 << 15 db 16
 %define indexd       r11d
 %define put_buffer   r12
 %define put_bufferd  r12d
+%define block        r15
 
 ; Step 1: Re-arrange input data according to jpeg_natural_order
 ; xx 01 02 03 04 05 06 07      xx 01 08 16 09 02 03 10
@@ -259,6 +257,9 @@ times 1 << 15 db 16
     GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2)
 
 EXTN(jsimd_huff_encode_one_block_sse2):
+    ENDBR64
+    push        rbp
+    mov         rbp, rsp
 
 %ifdef WIN64
 
@@ -266,15 +267,15 @@ EXTN(jsimd_huff_encode_one_block_sse2):
 ; rdx = JOCTET *buffer
 ; r8 = JCOEFPTR block
 ; r9 = int last_dc_val
-; [rax+48] = c_derived_tbl *dctbl
-; [rax+56] = c_derived_tbl *actbl
+; [rbp+48] = c_derived_tbl *dctbl
+; [rbp+56] = c_derived_tbl *actbl
 
                                                           ;X: X = code stream
     mov         buffer, rdx
+    push        r15
     mov         block, r8
     movups      xmm3, XMMWORD [block + 0 * SIZEOF_WORD]   ;D: w3 = xx 01 02 03 04 05 06 07
     push        rbx
-    push        rbp
     movdqa      xmm0, xmm3                                ;A: w0 = xx 01 02 03 04 05 06 07
     push        rsi
     push        rdi
@@ -284,12 +285,10 @@ EXTN(jsimd_huff_encode_one_block_sse2):
     movsx       code, word [block]                        ;Z:     code = block[0];
     pxor        xmm4, xmm4                                ;A: w4[i] = 0;
     sub         code, r9d                                 ;Z:     code -= last_dc_val;
-    mov         dctbl, POINTER [rsp+6*8+4*8]
-    mov         actbl, POINTER [rsp+6*8+5*8]
+    mov         dctbl, POINTER [rbp+48]
+    mov         actbl, POINTER [rbp+56]
     punpckldq   xmm0, xmm1                                ;A: w0 = xx 01 08 09 02 03 10 11
-    lea         nbits_base, [rel jpeg_nbits_table]
-    add         rsp, -DCTSIZE2 * SIZEOF_WORD
-    mov         t, rsp
+    lea         nbits_base, [rel EXTN(jpeg_nbits_table)]
 
 %else
 
@@ -301,23 +300,27 @@ EXTN(jsimd_huff_encode_one_block_sse2):
 ; r9 = c_derived_tbl *actbl
 
                                                           ;X: X = code stream
+    push        r15
+    mov         block, rdx
     movups      xmm3, XMMWORD [block + 0 * SIZEOF_WORD]   ;D: w3 = xx 01 02 03 04 05 06 07
     push        rbx
-    push        rbp
     movdqa      xmm0, xmm3                                ;A: w0 = xx 01 02 03 04 05 06 07
     push        r12
     mov         state, rdi
     mov         buffer, rsi
     movups      xmm1, XMMWORD [block + 8 * SIZEOF_WORD]   ;B: w1 = 08 09 10 11 12 13 14 15
     movsx       codeq, word [block]                       ;Z:     code = block[0];
-    lea         nbits_base, [rel jpeg_nbits_table]
+    lea         nbits_base, [rel EXTN(jpeg_nbits_table)]
     pxor        xmm4, xmm4                                ;A: w4[i] = 0;
     sub         codeq, rcx                                ;Z:     code -= last_dc_val;
     punpckldq   xmm0, xmm1                                ;A: w0 = xx 01 08 09 02 03 10 11
-    lea         t, [rsp - DCTSIZE2 * SIZEOF_WORD]         ;   use red zone for t_
 
 %endif
 
+    ; Allocate stack space for t array, and realign stack.
+    add         rsp, -DCTSIZE2 * SIZEOF_WORD - 8
+    mov         t, rsp
+
     pshuflw     xmm0, xmm0, 11001001b                     ;A: w0 = 01 08 xx 09 02 03 10 11
     pinsrw      xmm0, word [block + 16 * SIZEOF_WORD], 2  ;A: w0 = 01 08 16 09 02 03 10 11
     punpckhdq   xmm3, xmm1                                ;D: w3 = 04 05 12 13 06 07 14 15
@@ -443,9 +446,9 @@ EXTN(jsimd_huff_encode_one_block_sse2):
     pinsrw      xmm5, word [block + 29 * SIZEOF_WORD], 7  ;E: w5 = 42 49 56 57 50 43 36 29
                                                           ;        (Row 4, offset 1)
 %undef block
-%define free_bitsq  rdx
-%define free_bitsd  edx
-%define free_bitsb  dl
+%define free_bitsq  r15
+%define free_bitsd  r15d
+%define free_bitsb  r15b
     pcmpeqw     xmm1, xmm0                                ;F: w1[i] = (w1[i] == 0 ? -1 : 0);
     shl         tempq, 48                                 ;Z:     temp <<= 48;
     pxor        xmm2, xmm2                                ;E: w2[i] = 0;
@@ -534,12 +537,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
     test        index, index
     jnz         .BLOOP                                    ;   } while (index != 0);
 .ELOOP:                                                   ; }  /* index != 0 */
-    sub         td, esp                                   ; t -= (WIN64: &t_[0], UNIX: &t_[64]);
-%ifdef WIN64
+    sub         td, esp                                   ; t -= &t_[0];
     cmp         td, (DCTSIZE2 - 2) * SIZEOF_WORD          ; if (t != 62)
-%else
-    cmp         td, -2 * SIZEOF_WORD                      ; if (t != -2)
-%endif
     je          .EFN                                      ; {
     movzx       nbits, byte [actbl + c_derived_tbl.ehufsi + 0]
                                                           ;   nbits = actbl->ehufsi[0];
@@ -556,18 +555,17 @@ EXTN(jsimd_huff_encode_one_block_sse2):
                                                           ; state->cur.put_buffer.simd = put_buffer;
     mov         byte [state + working_state.cur.free_bits], free_bitsb
                                                           ; state->cur.free_bits = free_bits;
-%ifdef WIN64
-    sub         rsp, -DCTSIZE2 * SIZEOF_WORD
+    sub         rsp, -DCTSIZE2 * SIZEOF_WORD - 8
     pop         r12
+%ifdef WIN64
     pop         rdi
     pop         rsi
-    pop         rbp
     pop         rbx
 %else
-    pop         r12
-    pop         rbp
     pop         rbx
 %endif
+    pop         r15
+    pop         rbp
     ret
 
 ; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jcphuff-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jcphuff-sse2.asm
index 01b5c0235fa..c9ac59f2f1c 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jcphuff-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jcphuff-sse2.asm
@@ -3,16 +3,14 @@
 ; (64-bit SSE2)
 ;
 ; Copyright (C) 2016, 2018, Matthieu Darbois
+; Copyright (C) 2023, Aliaksiej Kandracienka.
+; Copyright (C) 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains an SSE2 implementation of data preparation for progressive
 ; Huffman encoding.  See jcphuff.c for more details.
@@ -281,16 +279,13 @@
     GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2)
 
 EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [rbp - 16]
-    collect_args 6
-
-    movdqa      XMMWORD [rbp - 16], ZERO
+    sub         rsp, SIZEOF_XMMWORD
+    movdqa      XMMWORD [rsp], ZERO
+    COLLECT_ARGS 6
 
     movd        AL, r13d
     pxor        ZERO, ZERO
@@ -384,10 +379,9 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
 
     REDUCE0
 
-    movdqa      ZERO, XMMWORD [rbp - 16]
-    uncollect_args 6
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 6
+    movdqa      ZERO, XMMWORD [rsp]
+    mov         rsp, rbp
     pop         rbp
     ret
 
@@ -449,16 +443,13 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
     GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2)
 
 EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [rbp - 16]
-    collect_args 6
-
-    movdqa      XMMWORD [rbp - 16], ZERO
+    sub         rsp, SIZEOF_XMMWORD
+    movdqa      XMMWORD [rsp], ZERO
+    COLLECT_ARGS 6
 
     xor         SIGN, SIGN
     xor         EOB, EOB
@@ -606,10 +597,9 @@ EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
     REDUCE0
 
     mov         eax, EOB
-    movdqa      ZERO, XMMWORD [rbp - 16]
-    uncollect_args 6
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 6
+    movdqa      ZERO, XMMWORD [rsp]
+    mov         rsp, rbp
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-avx2.asm
index b32527aebea..53afc7d77fa 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-avx2.asm
@@ -2,7 +2,7 @@
 ; jcsample.asm - downsampling (64-bit AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ; Copyright (C) 2018, Matthias Räncker.
 ;
@@ -10,11 +10,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -44,10 +40,10 @@
     GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2)
 
 EXTN(jsimd_h2v1_downsample_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 6
+    COLLECT_ARGS 6
 
     mov         ecx, r13d
     shl         rcx, 3                  ; imul rcx,DCTSIZE (rcx = output_cols)
@@ -178,7 +174,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
 
 .return:
     vzeroupper
-    uncollect_args 6
+    UNCOLLECT_ARGS 6
     pop         rbp
     ret
 
@@ -206,10 +202,10 @@ EXTN(jsimd_h2v1_downsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2)
 
 EXTN(jsimd_h2v2_downsample_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 6
+    COLLECT_ARGS 6
 
     mov         ecx, r13d
     shl         rcx, 3                  ; imul rcx,DCTSIZE (rcx = output_cols)
@@ -358,7 +354,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
 
 .return:
     vzeroupper
-    uncollect_args 6
+    UNCOLLECT_ARGS 6
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-sse2.asm
index 2fcfe4567ab..d7ffa930e82 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jcsample-sse2.asm
@@ -2,18 +2,14 @@
 ; jcsample.asm - downsampling (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -43,10 +39,10 @@
     GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2)
 
 EXTN(jsimd_h2v1_downsample_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 6
+    COLLECT_ARGS 6
 
     mov         ecx, r13d
     shl         rcx, 3                  ; imul rcx,DCTSIZE (rcx = output_cols)
@@ -160,7 +156,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
     jg          near .rowloop
 
 .return:
-    uncollect_args 6
+    UNCOLLECT_ARGS 6
     pop         rbp
     ret
 
@@ -188,10 +184,10 @@ EXTN(jsimd_h2v1_downsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2)
 
 EXTN(jsimd_h2v2_downsample_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 6
+    COLLECT_ARGS 6
 
     mov         ecx, r13d
     shl         rcx, 3                  ; imul rcx,DCTSIZE (rcx = output_cols)
@@ -321,7 +317,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
     jg          near .rowloop
 
 .return:
-    uncollect_args 6
+    UNCOLLECT_ARGS 6
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-avx2.asm
index 2370fda6424..7b8a084398d 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-avx2.asm
@@ -2,19 +2,16 @@
 ; jdcolext.asm - colorspace conversion (64-bit AVX2)
 ;
 ; Copyright 2009, 2012 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2012, 2016, D. R. Commander.
+; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -34,21 +31,22 @@
 ; r13 = JSAMPARRAY output_buf
 ; r14d = int num_rows
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
 %define WK_NUM  2
 
     align       32
     GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2)
 
 EXTN(jsimd_ycc_rgb_convert_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_YMMWORD)  ; align to 256 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 5
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (WK_NUM * SIZEOF_YMMWORD)
+    COLLECT_ARGS 5
     push        rbx
 
     mov         ecx, r10d               ; num_cols
@@ -485,9 +483,9 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
 .return:
     pop         rbx
     vzeroupper
-    uncollect_args 5
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 5
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-sse2.asm
index e07c8d75188..261f74da5d2 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolext-sse2.asm
@@ -2,18 +2,15 @@
 ; jdcolext.asm - colorspace conversion (64-bit SSE2)
 ;
 ; Copyright 2009, 2012 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2012, 2016, D. R. Commander.
+; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -33,21 +30,22 @@
 ; r13 = JSAMPARRAY output_buf
 ; r14d = int num_rows
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
 %define WK_NUM  2
 
     align       32
     GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2)
 
 EXTN(jsimd_ycc_rgb_convert_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 5
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 5
     push        rbx
 
     mov         ecx, r10d               ; num_cols
@@ -428,9 +426,9 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
 
 .return:
     pop         rbx
-    uncollect_args 5
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 5
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-avx2.asm
index 43de9db04dc..bd5aa00b95c 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-avx2.asm
@@ -2,18 +2,14 @@
 ; jdcolor.asm - colorspace conversion (64-bit AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -32,7 +28,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_ycc_rgb_convert_avx2)
 
 EXTN(jconst_ycc_rgb_convert_avx2):
@@ -43,7 +39,7 @@ PW_MF0344_F0285 times 8  dw -F_0_344, F_0_285
 PW_ONE          times 16 dw  1
 PD_ONEHALF      times 8  dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-sse2.asm
index b3f1fec07eb..40343fe7895 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdcolor-sse2.asm
@@ -2,17 +2,13 @@
 ; jdcolor.asm - colorspace conversion (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -31,7 +27,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_ycc_rgb_convert_sse2)
 
 EXTN(jconst_ycc_rgb_convert_sse2):
@@ -42,7 +38,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
 PW_ONE          times 8 dw  1
 PD_ONEHALF      times 4 dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-avx2.asm
index 9515a17013d..6a5f1daba56 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-avx2.asm
@@ -2,18 +2,14 @@
 ; jdmerge.asm - merged upsampling/color conversion (64-bit AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -32,7 +28,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_merged_upsample_avx2)
 
 EXTN(jconst_merged_upsample_avx2):
@@ -43,7 +39,7 @@ PW_MF0344_F0285 times 8  dw -F_0_344, F_0_285
 PW_ONE          times 16 dw  1
 PD_ONEHALF      times 8  dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-sse2.asm
index aedccc20f6c..8c269b83d85 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdmerge-sse2.asm
@@ -2,17 +2,13 @@
 ; jdmerge.asm - merged upsampling/color conversion (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -31,7 +27,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_merged_upsample_sse2)
 
 EXTN(jconst_merged_upsample_sse2):
@@ -42,7 +38,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
 PW_ONE          times 8 dw  1
 PD_ONEHALF      times 4 dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-avx2.asm
index 8b264b4f039..01826fb6abb 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-avx2.asm
@@ -2,19 +2,16 @@
 ; jdmrgext.asm - merged upsampling/color conversion (64-bit AVX2)
 ;
 ; Copyright 2009, 2012 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2012, 2016, D. R. Commander.
+; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -34,21 +31,22 @@
 ; r12d = JDIMENSION in_row_group_ctr
 ; r13 = JSAMPARRAY output_buf
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
 %define WK_NUM  3
 
     align       32
     GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2)
 
 EXTN(jsimd_h2v1_merged_upsample_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_YMMWORD)  ; align to 256 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 4
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, SIZEOF_YMMWORD * WK_NUM
+    COLLECT_ARGS 4
     push        rbx
 
     mov         ecx, r10d               ; col
@@ -479,9 +477,9 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
 .return:
     pop         rbx
     vzeroupper
-    uncollect_args 4
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 4
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
@@ -505,10 +503,10 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2)
 
 EXTN(jsimd_h2v2_merged_upsample_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 4
+    COLLECT_ARGS 4
     push        rbx
 
     mov         eax, r10d
@@ -587,7 +585,7 @@ EXTN(jsimd_h2v2_merged_upsample_avx2):
     add         rsp, SIZEOF_JSAMPARRAY*4
 
     pop         rbx
-    uncollect_args 4
+    UNCOLLECT_ARGS 4
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-sse2.asm
index eb3ab9dbd94..abd22e21a73 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdmrgext-sse2.asm
@@ -2,18 +2,15 @@
 ; jdmrgext.asm - merged upsampling/color conversion (64-bit SSE2)
 ;
 ; Copyright 2009, 2012 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2012, 2016, D. R. Commander.
+; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jcolsamp.inc"
 
@@ -33,21 +30,22 @@
 ; r12d = JDIMENSION in_row_group_ctr
 ; r13 = JSAMPARRAY output_buf
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
 %define WK_NUM  3
 
     align       32
     GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2)
 
 EXTN(jsimd_h2v1_merged_upsample_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 4
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 4
     push        rbx
 
     mov         ecx, r10d               ; col
@@ -421,9 +419,9 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
 
 .return:
     pop         rbx
-    uncollect_args 4
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 4
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
@@ -447,10 +445,10 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2)
 
 EXTN(jsimd_h2v2_merged_upsample_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 4
+    COLLECT_ARGS 4
     push        rbx
 
     mov         eax, r10d
@@ -529,7 +527,7 @@ EXTN(jsimd_h2v2_merged_upsample_sse2):
     add         rsp, SIZEOF_JSAMPARRAY*4
 
     pop         rbx
-    uncollect_args 4
+    UNCOLLECT_ARGS 4
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-avx2.asm
index 1e4979f933e..6ae4cf812a7 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-avx2.asm
@@ -2,26 +2,23 @@
 ; jdsample.asm - upsampling (64-bit AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fancy_upsample_avx2)
 
 EXTN(jconst_fancy_upsample_avx2):
@@ -32,7 +29,7 @@ PW_THREE times 16 dw 3
 PW_SEVEN times 16 dw 7
 PW_EIGHT times 16 dw 8
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -61,11 +58,11 @@ PW_EIGHT times 16 dw 8
     GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2)
 
 EXTN(jsimd_h2v1_fancy_upsample_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    push_xmm    3
-    collect_args 4
+    PUSH_XMM    3
+    COLLECT_ARGS 4
 
     mov         eax, r11d               ; colctr
     test        rax, rax
@@ -186,8 +183,8 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
 
 .return:
     vzeroupper
-    uncollect_args 4
-    pop_xmm     3
+    UNCOLLECT_ARGS 4
+    POP_XMM     3
     pop         rbp
     ret
 
@@ -208,22 +205,23 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
 ; r12 = JSAMPARRAY input_data
 ; r13 = JSAMPARRAY *output_data_ptr
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
 %define WK_NUM  4
 
     align       32
     GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2)
 
 EXTN(jsimd_h2v2_fancy_upsample_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
-    and         rsp, byte (-SIZEOF_YMMWORD)  ; align to 256 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    push_xmm    3
-    collect_args 4
+    mov         rbp, rsp
+    push        r15
+    and         rsp, byte (-SIZEOF_YMMWORD)  ; align to 128 bits
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, (SIZEOF_YMMWORD * WK_NUM)
+    PUSH_XMM    3
+    COLLECT_ARGS 4
     push        rbx
 
     mov         eax, r11d               ; colctr
@@ -498,10 +496,10 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
 .return:
     pop         rbx
     vzeroupper
-    uncollect_args 4
-    pop_xmm     3
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 4
+    POP_XMM     3
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
@@ -524,10 +522,10 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2)
 
 EXTN(jsimd_h2v1_upsample_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 4
+    COLLECT_ARGS 4
 
     mov         edx, r11d
     add         rdx, byte (SIZEOF_YMMWORD-1)
@@ -590,7 +588,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
 
 .return:
     vzeroupper
-    uncollect_args 4
+    UNCOLLECT_ARGS 4
     pop         rbp
     ret
 
@@ -613,10 +611,10 @@ EXTN(jsimd_h2v1_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2)
 
 EXTN(jsimd_h2v2_upsample_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 4
+    COLLECT_ARGS 4
     push        rbx
 
     mov         edx, r11d
@@ -687,7 +685,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
 .return:
     pop         rbx
     vzeroupper
-    uncollect_args 4
+    UNCOLLECT_ARGS 4
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-sse2.asm
index 38dbceec269..54c560fc28e 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jdsample-sse2.asm
@@ -2,25 +2,22 @@
 ; jdsample.asm - upsampling (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fancy_upsample_sse2)
 
 EXTN(jconst_fancy_upsample_sse2):
@@ -31,7 +28,7 @@ PW_THREE times 8 dw 3
 PW_SEVEN times 8 dw 7
 PW_EIGHT times 8 dw 8
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -60,10 +57,10 @@ PW_EIGHT times 8 dw 8
     GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2)
 
 EXTN(jsimd_h2v1_fancy_upsample_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 4
+    COLLECT_ARGS 4
 
     mov         eax, r11d               ; colctr
     test        rax, rax
@@ -174,7 +171,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
     jg          near .rowloop
 
 .return:
-    uncollect_args 4
+    UNCOLLECT_ARGS 4
     pop         rbp
     ret
 
@@ -195,21 +192,22 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
 ; r12 = JSAMPARRAY input_data
 ; r13 = JSAMPARRAY *output_data_ptr
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
 %define WK_NUM  4
 
     align       32
     GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2)
 
 EXTN(jsimd_h2v2_fancy_upsample_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 4
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 4
     push        rbx
 
     mov         eax, r11d               ; colctr
@@ -472,9 +470,9 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
 
 .return:
     pop         rbx
-    uncollect_args 4
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 4
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
@@ -497,10 +495,10 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2)
 
 EXTN(jsimd_h2v1_upsample_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 4
+    COLLECT_ARGS 4
 
     mov         edx, r11d
     add         rdx, byte (2*SIZEOF_XMMWORD)-1
@@ -561,7 +559,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
     jg          short .rowloop
 
 .return:
-    uncollect_args 4
+    UNCOLLECT_ARGS 4
     pop         rbp
     ret
 
@@ -584,10 +582,10 @@ EXTN(jsimd_h2v1_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2)
 
 EXTN(jsimd_h2v2_upsample_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 4
+    COLLECT_ARGS 4
     push        rbx
 
     mov         edx, r11d
@@ -656,7 +654,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
 
 .return:
     pop         rbx
-    uncollect_args 4
+    UNCOLLECT_ARGS 4
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctflt-sse.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctflt-sse.asm
index ef2796649bc..58a1f5570d3 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctflt-sse.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctflt-sse.asm
@@ -2,17 +2,14 @@
 ; jfdctflt.asm - floating-point FDCT (64-bit SSE)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a floating-point implementation of the forward DCT
 ; (Discrete Cosine Transform). The following code is based directly on
@@ -34,7 +31,7 @@
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_float_sse)
 
 EXTN(jconst_fdct_float_sse):
@@ -44,7 +41,7 @@ PD_0_707 times 4 dd 0.707106781186547524400844
 PD_0_541 times 4 dd 0.541196100146196984399723
 PD_1_306 times 4 dd 1.306562964876376527856643
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -58,21 +55,22 @@ PD_1_306 times 4 dd 1.306562964876376527856643
 
 ; r10 = FAST_FLOAT *data
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
 %define WK_NUM  2
 
     align       32
     GLOBAL_FUNCTION(jsimd_fdct_float_sse)
 
 EXTN(jsimd_fdct_float_sse):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 1
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 1
 
     ; ---- Pass 1: process rows.
 
@@ -344,9 +342,9 @@ EXTN(jsimd_fdct_float_sse):
     dec         rcx
     jnz         near .columnloop
 
-    uncollect_args 1
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 1
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctfst-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctfst-sse2.asm
index 2e1bfe6e8c2..3b92d4edaae 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctfst-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctfst-sse2.asm
@@ -2,17 +2,14 @@
 ; jfdctfst.asm - fast integer FDCT (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a fast, not so accurate integer implementation of
 ; the forward DCT (Discrete Cosine Transform). The following code is
@@ -49,7 +46,7 @@ F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS)  ; FIX(1.306562965)
 %define PRE_MULTIPLY_SCALE_BITS  2
 %define CONST_SHIFT              (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_ifast_sse2)
 
 EXTN(jconst_fdct_ifast_sse2):
@@ -59,7 +56,7 @@ PW_F0382 times 8 dw F_0_382 << CONST_SHIFT
 PW_F0541 times 8 dw F_0_541 << CONST_SHIFT
 PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -73,21 +70,22 @@ PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
 
 ; r10 = DCTELEM *data
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
 %define WK_NUM  2
 
     align       32
     GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2)
 
 EXTN(jsimd_fdct_ifast_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 1
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 1
 
     ; ---- Pass 1: process rows.
 
@@ -378,9 +376,9 @@ EXTN(jsimd_fdct_ifast_sse2):
     movdqa      XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm6
     movdqa      XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm2
 
-    uncollect_args 1
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 1
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-avx2.asm
index e56258b48aa..0c4528612cd 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-avx2.asm
@@ -2,17 +2,13 @@
 ; jfdctint.asm - accurate integer FDCT (64-bit AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a slower but more accurate integer implementation of the
 ; forward DCT (Discrete Cosine Transform). The following code is based
@@ -65,7 +61,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; %1-%4: Input/output registers
 ; %5-%8: Temp registers
 
-%macro dotranspose 8
+%macro DOTRANSPOSE 8
     ; %1=(00 01 02 03 04 05 06 07  40 41 42 43 44 45 46 47)
     ; %2=(10 11 12 13 14 15 16 17  50 51 52 53 54 55 56 57)
     ; %3=(20 21 22 23 24 25 26 27  60 61 62 63 64 65 66 67)
@@ -108,7 +104,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; %5-%8: Temp registers
 ; %9:    Pass (1 or 2)
 
-%macro dodct 9
+%macro DODCT 9
     vpsubw      %5, %1, %4              ; %5=data1_0-data6_7=tmp6_7
     vpaddw      %6, %1, %4              ; %6=data1_0+data6_7=tmp1_0
     vpaddw      %7, %2, %3              ; %7=data3_2+data4_5=tmp3_2
@@ -223,7 +219,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_islow_avx2)
 
 EXTN(jconst_fdct_islow_avx2):
@@ -242,7 +238,7 @@ PW_DESCALE_P2X             times 16 dw  1 << (PASS1_BITS - 1)
 PW_1_NEG1                  times 8  dw  1
                            times 8  dw -1
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -260,10 +256,10 @@ PW_1_NEG1                  times 8  dw  1
     GLOBAL_FUNCTION(jsimd_fdct_islow_avx2)
 
 EXTN(jsimd_fdct_islow_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 1
+    COLLECT_ARGS 1
 
     ; ---- Pass 1: process rows.
 
@@ -285,9 +281,9 @@ EXTN(jsimd_fdct_islow_avx2):
     ; ymm2=(20 21 22 23 24 25 26 27  60 61 62 63 64 65 66 67)
     ; ymm3=(30 31 32 33 34 35 36 37  70 71 72 73 74 75 76 77)
 
-    dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
+    DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
 
-    dodct       ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
+    DODCT       ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
     ; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm3=data7_5
 
     ; ---- Pass 2: process columns.
@@ -295,9 +291,9 @@ EXTN(jsimd_fdct_islow_avx2):
     vperm2i128  ymm4, ymm1, ymm3, 0x20  ; ymm4=data3_7
     vperm2i128  ymm1, ymm1, ymm3, 0x31  ; ymm1=data1_5
 
-    dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
+    DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
 
-    dodct       ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
+    DODCT       ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
     ; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm4=data7_5
 
     vperm2i128 ymm3, ymm0, ymm1, 0x30   ; ymm3=data0_1
@@ -311,7 +307,7 @@ EXTN(jsimd_fdct_islow_avx2):
     vmovdqu     YMMWORD [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm7
 
     vzeroupper
-    uncollect_args 1
+    UNCOLLECT_ARGS 1
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-sse2.asm
index ec1f383ccb7..3a6be020cd0 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jfdctint-sse2.asm
@@ -2,17 +2,14 @@
 ; jfdctint.asm - accurate integer FDCT (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2020, 2024, D. R. Commander.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a slower but more accurate integer implementation of the
 ; forward DCT (Discrete Cosine Transform). The following code is based
@@ -63,7 +60,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_islow_sse2)
 
 EXTN(jconst_fdct_islow_sse2):
@@ -80,7 +77,7 @@ PD_DESCALE_P1  times 4 dd  1 << (DESCALE_P1 - 1)
 PD_DESCALE_P2  times 4 dd  1 << (DESCALE_P2 - 1)
 PW_DESCALE_P2X times 8 dw  1 << (PASS1_BITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -94,21 +91,22 @@ PW_DESCALE_P2X times 8 dw  1 << (PASS1_BITS - 1)
 
 ; r10 = DCTELEM *data
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
 %define WK_NUM  6
 
     align       32
     GLOBAL_FUNCTION(jsimd_fdct_islow_sse2)
 
 EXTN(jsimd_fdct_islow_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 1
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 1
 
     ; ---- Pass 1: process rows.
 
@@ -608,9 +606,9 @@ EXTN(jsimd_fdct_islow_sse2):
     movdqa      XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm1
     movdqa      XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm3
 
-    uncollect_args 1
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 1
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctflt-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctflt-sse2.asm
index 60bf9618961..14437340229 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctflt-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctflt-sse2.asm
@@ -2,18 +2,15 @@
 ; jidctflt.asm - floating-point IDCT (64-bit SSE & SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a floating-point implementation of the inverse DCT
 ; (Discrete Cosine Transform). The following code is based directly on
@@ -24,18 +21,18 @@
 
 ; --------------------------------------------------------------------------
 
-%macro unpcklps2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+%macro UNPCKLPS2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
     shufps      %1, %2, 0x44
 %endmacro
 
-%macro unpckhps2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+%macro UNPCKHPS2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
     shufps      %1, %2, 0xEE
 %endmacro
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_float_sse2)
 
 EXTN(jconst_idct_float_sse2):
@@ -47,7 +44,7 @@ PD_M2_613       times 4  dd -2.613125929752753055713286
 PD_RNDINT_MAGIC times 4  dd  100663296.0  ; (float)(0x00C00000 << 3)
 PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -65,8 +62,7 @@ PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
 ; r12 = JSAMPARRAY output_buf
 ; r13d = JDIMENSION output_col
 
-%define original_rbp  rbp + 0
-%define wk(i)         rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
+%define wk(i)         r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD
                                         ; xmmword wk[WK_NUM]
 %define WK_NUM        2
 %define workspace     wk(0) - DCTSIZE2 * SIZEOF_FAST_FLOAT
@@ -76,14 +72,15 @@ PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_float_sse2)
 
 EXTN(jsimd_idct_float_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
     lea         rsp, [workspace]
-    collect_args 4
+    COLLECT_ARGS 4
     push        rbx
 
     ; ---- Pass 1: process columns from input, store into work array.
@@ -280,11 +277,11 @@ EXTN(jsimd_idct_float_sse2):
     unpckhps    xmm4, xmm0              ; xmm4=(42 52 43 53)
 
     movaps      xmm3, xmm6              ; transpose coefficients(phase 2)
-    unpcklps2   xmm6, xmm7              ; xmm6=(00 10 20 30)
-    unpckhps2   xmm3, xmm7              ; xmm3=(01 11 21 31)
+    UNPCKLPS2   xmm6, xmm7              ; xmm6=(00 10 20 30)
+    UNPCKHPS2   xmm3, xmm7              ; xmm3=(01 11 21 31)
     movaps      xmm0, xmm1              ; transpose coefficients(phase 2)
-    unpcklps2   xmm1, xmm2              ; xmm1=(02 12 22 32)
-    unpckhps2   xmm0, xmm2              ; xmm0=(03 13 23 33)
+    UNPCKLPS2   xmm1, xmm2              ; xmm1=(02 12 22 32)
+    UNPCKHPS2   xmm0, xmm2              ; xmm0=(03 13 23 33)
 
     movaps      xmm7, XMMWORD [wk(0)]   ; xmm7=(60 70 61 71)
     movaps      xmm2, XMMWORD [wk(1)]   ; xmm2=(62 72 63 73)
@@ -295,11 +292,11 @@ EXTN(jsimd_idct_float_sse2):
     movaps      XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm0
 
     movaps      xmm6, xmm5              ; transpose coefficients(phase 2)
-    unpcklps2   xmm5, xmm7              ; xmm5=(40 50 60 70)
-    unpckhps2   xmm6, xmm7              ; xmm6=(41 51 61 71)
+    UNPCKLPS2   xmm5, xmm7              ; xmm5=(40 50 60 70)
+    UNPCKHPS2   xmm6, xmm7              ; xmm6=(41 51 61 71)
     movaps      xmm3, xmm4              ; transpose coefficients(phase 2)
-    unpcklps2   xmm4, xmm2              ; xmm4=(42 52 62 72)
-    unpckhps2   xmm3, xmm2              ; xmm3=(43 53 63 73)
+    UNPCKLPS2   xmm4, xmm2              ; xmm4=(42 52 62 72)
+    UNPCKHPS2   xmm3, xmm2              ; xmm3=(43 53 63 73)
 
     movaps      XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm5
     movaps      XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm6
@@ -322,7 +319,6 @@ EXTN(jsimd_idct_float_sse2):
 
     ; ---- Pass 2: process rows from work array, store into output array.
 
-    mov         rax, [original_rbp]
     lea         rsi, [workspace]        ; FAST_FLOAT *wsptr
     mov         rdi, r12                ; (JSAMPROW *)
     mov         eax, r13d
@@ -471,9 +467,9 @@ EXTN(jsimd_idct_float_sse2):
     jnz         near .rowloop
 
     pop         rbx
-    uncollect_args 4
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 4
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctfst-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctfst-sse2.asm
index cb97fdfbb24..cffabb8378e 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctfst-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctfst-sse2.asm
@@ -2,18 +2,15 @@
 ; jidctfst.asm - fast integer IDCT (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a fast, not so accurate integer implementation of
 ; the inverse DCT (Discrete Cosine Transform). The following code is
@@ -57,7 +54,7 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS))         ; FIX(2.613125930) - FIX(1)
 %define PRE_MULTIPLY_SCALE_BITS  2
 %define CONST_SHIFT              (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_ifast_sse2)
 
 EXTN(jconst_idct_ifast_sse2):
@@ -68,7 +65,7 @@ PW_MF1613      times 8  dw -F_1_613 << CONST_SHIFT
 PW_F1082       times 8  dw  F_1_082 << CONST_SHIFT
 PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -86,8 +83,7 @@ PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
 ; r12 = JSAMPARRAY output_buf
 ; r13d = JDIMENSION output_col
 
-%define original_rbp  rbp + 0
-%define wk(i)         rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
+%define wk(i)         r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD
                                         ; xmmword wk[WK_NUM]
 %define WK_NUM        2
 
@@ -95,14 +91,15 @@ PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_ifast_sse2)
 
 EXTN(jsimd_idct_ifast_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 4
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 4
 
     ; ---- Pass 1: process columns from input.
 
@@ -320,7 +317,6 @@ EXTN(jsimd_idct_ifast_sse2):
 
     ; ---- Pass 2: process rows from work array, store into output array.
 
-    mov         rax, [original_rbp]
     mov         rdi, r12                ; (JSAMPROW *)
     mov         eax, r13d
 
@@ -479,9 +475,9 @@ EXTN(jsimd_idct_ifast_sse2):
     movq        XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
     movq        XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
 
-    uncollect_args 4
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 4
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
     ret
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-avx2.asm
index ca7e317f6e1..be3b46888e5 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-avx2.asm
@@ -2,18 +2,14 @@
 ; jidctint.asm - accurate integer IDCT (64-bit AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a slower but more accurate integer implementation of the
 ; inverse DCT (Discrete Cosine Transform). The following code is based
@@ -66,7 +62,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; %1-%4: Input/output registers
 ; %5-%8: Temp registers
 
-%macro dotranspose 8
+%macro DOTRANSPOSE 8
     ; %5=(00 10 20 30 40 50 60 70  01 11 21 31 41 51 61 71)
     ; %6=(03 13 23 33 43 53 63 73  02 12 22 32 42 52 62 72)
     ; %7=(04 14 24 34 44 54 64 74  05 15 25 35 45 55 65 75)
@@ -119,7 +115,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; %5-%12: Temp registers
 ; %9:     Pass (1 or 2)
 
-%macro dodct 13
+%macro DODCT 13
     ; -- Even part
 
     ; (Original)
@@ -241,7 +237,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_islow_avx2)
 
 EXTN(jconst_idct_islow_avx2):
@@ -260,7 +256,7 @@ PB_CENTERJSAMP             times 32 db  CENTERJSAMPLE
 PW_1_NEG1                  times 8  dw  1
                            times 8  dw -1
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -282,11 +278,11 @@ PW_1_NEG1                  times 8  dw  1
     GLOBAL_FUNCTION(jsimd_idct_islow_avx2)
 
 EXTN(jsimd_idct_islow_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
     mov         rbp, rsp                     ; rbp = aligned rbp
-    push_xmm    4
-    collect_args 4
+    PUSH_XMM    4
+    COLLECT_ARGS 4
 
     ; ---- Pass 1: process columns.
 
@@ -343,10 +339,10 @@ EXTN(jsimd_idct_islow_avx2):
     vperm2i128  ymm2, ymm5, ymm7, 0x20  ; ymm2=in2_6
     vperm2i128  ymm3, ymm7, ymm6, 0x31  ; ymm3=in7_5
 
-    dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 1
+    DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 1
     ; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm3=data7_6
 
-    dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
+    DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
     ; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm3=data3_7
 
 .column_end:
@@ -363,10 +359,10 @@ EXTN(jsimd_idct_islow_avx2):
     vperm2i128  ymm4, ymm3, ymm1, 0x31  ; ymm3=in7_5
     vperm2i128  ymm1, ymm3, ymm1, 0x20  ; ymm1=in3_1
 
-    dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 2
+    DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 2
     ; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm4=data7_6
 
-    dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
+    DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
     ; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm4=data3_7
 
     vpacksswb   ymm0, ymm0, ymm1        ; ymm0=data01_45
@@ -408,8 +404,8 @@ EXTN(jsimd_idct_islow_avx2):
     movq        XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
     movq        XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7
 
-    uncollect_args 4
-    pop_xmm     4
+    UNCOLLECT_ARGS 4
+    POP_XMM     4
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-sse2.asm
index 7aa869bc0b5..b186871ff2a 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctint-sse2.asm
@@ -2,18 +2,15 @@
 ; jidctint.asm - accurate integer IDCT (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2020, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains a slower but more accurate integer implementation of the
 ; inverse DCT (Discrete Cosine Transform). The following code is based
@@ -64,7 +61,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_islow_sse2)
 
 EXTN(jconst_idct_islow_sse2):
@@ -81,7 +78,7 @@ PD_DESCALE_P1  times 4  dd  1 << (DESCALE_P1 - 1)
 PD_DESCALE_P2  times 4  dd  1 << (DESCALE_P2 - 1)
 PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -99,8 +96,7 @@ PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
 ; r12 = JSAMPARRAY output_buf
 ; r13d = JDIMENSION output_col
 
-%define original_rbp  rbp + 0
-%define wk(i)         rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
+%define wk(i)         r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD
                                         ; xmmword wk[WK_NUM]
 %define WK_NUM        12
 
@@ -108,14 +104,15 @@ PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_islow_sse2)
 
 EXTN(jsimd_idct_islow_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 4
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 4
 
     ; ---- Pass 1: process columns from input.
 
@@ -512,7 +509,6 @@ EXTN(jsimd_idct_islow_sse2):
 
     ; ---- Pass 2: process rows from work array, store into output array.
 
-    mov         rax, [original_rbp]
     mov         rdi, r12                ; (JSAMPROW *)
     mov         eax, r13d
 
@@ -836,9 +832,9 @@ EXTN(jsimd_idct_islow_sse2):
     movq        XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2
     movq        XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5
 
-    uncollect_args 4
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 4
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctred-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctred-sse2.asm
index 4ece9d891cb..6fb7095612e 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jidctred-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jidctred-sse2.asm
@@ -2,18 +2,15 @@
 ; jidctred.asm - reduced-size IDCT (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 ;
 ; This file contains inverse-DCT routines that produce reduced-size
 ; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
@@ -70,7 +67,7 @@ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS)  ; FIX(3.624509785)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_red_sse2)
 
 EXTN(jconst_idct_red_sse2):
@@ -88,7 +85,7 @@ PD_DESCALE_P1_2 times 4  dd  1 << (DESCALE_P1_2 - 1)
 PD_DESCALE_P2_2 times 4  dd  1 << (DESCALE_P2_2 - 1)
 PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -107,8 +104,7 @@ PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
 ; r12 = JSAMPARRAY output_buf
 ; r13d = JDIMENSION output_col
 
-%define original_rbp  rbp + 0
-%define wk(i)         rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
+%define wk(i)         r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD
                                         ; xmmword wk[WK_NUM]
 %define WK_NUM        2
 
@@ -116,14 +112,15 @@ PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_4x4_sse2)
 
 EXTN(jsimd_idct_4x4_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 4
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 4
 
     ; ---- Pass 1: process columns from input.
 
@@ -309,7 +306,6 @@ EXTN(jsimd_idct_4x4_sse2):
 
     ; ---- Pass 2: process rows, store into output array.
 
-    mov         rax, [original_rbp]
     mov         rdi, r12                ; (JSAMPROW *)
     mov         eax, r13d
 
@@ -389,9 +385,9 @@ EXTN(jsimd_idct_4x4_sse2):
     movd        XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
     movd        XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
 
-    uncollect_args 4
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 4
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
@@ -414,10 +410,10 @@ EXTN(jsimd_idct_4x4_sse2):
     GLOBAL_FUNCTION(jsimd_idct_2x2_sse2)
 
 EXTN(jsimd_idct_2x2_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 4
+    COLLECT_ARGS 4
     push        rbx
 
     ; ---- Pass 1: process columns from input.
@@ -565,7 +561,7 @@ EXTN(jsimd_idct_2x2_sse2):
     mov         word [rsi+rax*SIZEOF_JSAMPLE], cx
 
     pop         rbx
-    uncollect_args 4
+    UNCOLLECT_ARGS 4
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jquantf-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jquantf-sse2.asm
index ab2e3954f63..64763338f2d 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jquantf-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jquantf-sse2.asm
@@ -2,18 +2,14 @@
 ; jquantf.asm - sample data conversion and quantization (64-bit SSE & SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 %include "jdct.inc"
@@ -37,10 +33,10 @@
     GLOBAL_FUNCTION(jsimd_convsamp_float_sse2)
 
 EXTN(jsimd_convsamp_float_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 3
+    COLLECT_ARGS 3
     push        rbx
 
     pcmpeqw     xmm7, xmm7
@@ -89,7 +85,7 @@ EXTN(jsimd_convsamp_float_sse2):
     jnz         short .convloop
 
     pop         rbx
-    uncollect_args 3
+    UNCOLLECT_ARGS 3
     pop         rbp
     ret
 
@@ -110,10 +106,10 @@ EXTN(jsimd_convsamp_float_sse2):
     GLOBAL_FUNCTION(jsimd_quantize_float_sse2)
 
 EXTN(jsimd_quantize_float_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 3
+    COLLECT_ARGS 3
 
     mov         rsi, r12
     mov         rdx, r11
@@ -146,7 +142,7 @@ EXTN(jsimd_quantize_float_sse2):
     dec         rax
     jnz         short .quantloop
 
-    uncollect_args 3
+    UNCOLLECT_ARGS 3
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-avx2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-avx2.asm
index 70fe81139cc..7e126e88a88 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-avx2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-avx2.asm
@@ -2,7 +2,7 @@
 ; jquanti.asm - sample data conversion and quantization (64-bit AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, 2018, D. R. Commander.
+; Copyright (C) 2009, 2016, 2018, 2024, D. R. Commander.
 ; Copyright (C) 2016, Matthieu Darbois.
 ; Copyright (C) 2018, Matthias Räncker.
 ;
@@ -10,11 +10,7 @@
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 %include "jdct.inc"
@@ -38,10 +34,10 @@
     GLOBAL_FUNCTION(jsimd_convsamp_avx2)
 
 EXTN(jsimd_convsamp_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 3
+    COLLECT_ARGS 3
 
     mov         eax, r11d
 
@@ -84,7 +80,7 @@ EXTN(jsimd_convsamp_avx2):
     vmovdqu     YMMWORD [YMMBLOCK(6,0,r12,SIZEOF_DCTELEM)], ymm3
 
     vzeroupper
-    uncollect_args 3
+    UNCOLLECT_ARGS 3
     pop         rbp
     ret
 
@@ -93,8 +89,8 @@ EXTN(jsimd_convsamp_avx2):
 ; Quantize/descale the coefficients, and store into coef_block
 ;
 ; This implementation is based on an algorithm described in
-;   "How to optimize for the Pentium family of microprocessors"
-;   (http://www.agner.org/assem/).
+;   "Optimizing subroutines in assembly language:
+;   An optimization guide for x86 platforms" (https://agner.org/optimize).
 ;
 ; GLOBAL(void)
 ; jsimd_quantize_avx2(JCOEFPTR coef_block, DCTELEM *divisors,
@@ -116,10 +112,10 @@ EXTN(jsimd_convsamp_avx2):
     GLOBAL_FUNCTION(jsimd_quantize_avx2)
 
 EXTN(jsimd_quantize_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 3
+    COLLECT_ARGS 3
 
     vmovdqu     ymm4, [YMMBLOCK(0,0,r12,SIZEOF_DCTELEM)]
     vmovdqu     ymm5, [YMMBLOCK(2,0,r12,SIZEOF_DCTELEM)]
@@ -154,7 +150,7 @@ EXTN(jsimd_quantize_avx2):
     vmovdqu     [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm3
 
     vzeroupper
-    uncollect_args 3
+    UNCOLLECT_ARGS 3
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-sse2.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-sse2.asm
index 3ee442027a5..284b9fea71e 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-sse2.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jquanti-sse2.asm
@@ -2,18 +2,14 @@
 ; jquanti.asm - sample data conversion and quantization (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 %include "jdct.inc"
@@ -37,10 +33,10 @@
     GLOBAL_FUNCTION(jsimd_convsamp_sse2)
 
 EXTN(jsimd_convsamp_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 3
+    COLLECT_ARGS 3
     push        rbx
 
     pxor        xmm6, xmm6              ; xmm6=(all 0's)
@@ -84,7 +80,7 @@ EXTN(jsimd_convsamp_sse2):
     jnz         short .convloop
 
     pop         rbx
-    uncollect_args 3
+    UNCOLLECT_ARGS 3
     pop         rbp
     ret
 
@@ -93,8 +89,8 @@ EXTN(jsimd_convsamp_sse2):
 ; Quantize/descale the coefficients, and store into coef_block
 ;
 ; This implementation is based on an algorithm described in
-;   "How to optimize for the Pentium family of microprocessors"
-;   (http://www.agner.org/assem/).
+;   "Optimizing subroutines in assembly language:
+;   An optimization guide for x86 platforms" (https://agner.org/optimize).
 ;
 ; GLOBAL(void)
 ; jsimd_quantize_sse2(JCOEFPTR coef_block, DCTELEM *divisors,
@@ -116,10 +112,10 @@ EXTN(jsimd_convsamp_sse2):
     GLOBAL_FUNCTION(jsimd_quantize_sse2)
 
 EXTN(jsimd_quantize_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 3
+    COLLECT_ARGS 3
 
     mov         rsi, r12
     mov         rdx, r11
@@ -179,7 +175,7 @@ EXTN(jsimd_quantize_sse2):
     dec         rax
     jnz         near .quantloop
 
-    uncollect_args 3
+    UNCOLLECT_ARGS 3
     pop         rbp
     ret
 
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jsimd.c b/contrib/libs/libjpeg-turbo/simd/x86_64/jsimd.c
index d51962f3987..9f4e098fddc 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jsimd.c
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jsimd.c
@@ -2,8 +2,8 @@
  * jsimd_x86_64.c
  *
  * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
- * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2024, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -15,13 +15,12 @@
  */
 
 #define JPEG_INTERNALS
-#include "../../jinclude.h"
-#include "../../jpeglib.h"
-#include "../../jsimd.h"
-#include "../../jdct.h"
-#include "../../jsimddct.h"
+#include "../../src/jinclude.h"
+#include "../../src/jpeglib.h"
+#include "../../src/jsimd.h"
+#include "../../src/jdct.h"
+#include "../../src/jsimddct.h"
 #include "../jsimd.h"
-#include "jconfigint.h"
 
 /*
  * In the PIC cases, we have no guarantee that constants will keep
@@ -32,13 +31,11 @@
 #define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
 #define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
 
-static unsigned int simd_support = (unsigned int)(~0);
-static unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
+static THREAD_LOCAL unsigned int simd_huffman = 1;
 
 /*
  * Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
  */
 LOCAL(void)
 init_simd(void)
@@ -116,7 +113,9 @@ jsimd_can_ycc_rgb(void)
 {
   init_simd();
 
-#ifndef WITH_SANITIZER
+#ifdef WITH_SANITIZER
+  return 0;
+#endif
   /* The code is optimised for these values only */
   if (BITS_IN_JSAMPLE != 8)
     return 0;
@@ -131,7 +130,6 @@ jsimd_can_ycc_rgb(void)
   if ((simd_support & JSIMD_SSE2) &&
       IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
     return 1;
-#endif
 
   return 0;
 }
@@ -150,6 +148,9 @@ jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
   void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->in_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_extrgb_ycc_convert_avx2;
@@ -199,6 +200,9 @@ jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
   void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->in_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_extrgb_gray_convert_avx2;
@@ -248,6 +252,9 @@ jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_ycc_extrgb_convert_avx2;
@@ -338,6 +345,9 @@ GLOBAL(void)
 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
                       JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
                                compptr->v_samp_factor,
@@ -354,6 +364,9 @@ GLOBAL(void)
 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
                       JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
                                compptr->v_samp_factor,
@@ -408,6 +421,9 @@ GLOBAL(void)
 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
                              input_data, output_data_ptr);
@@ -420,6 +436,9 @@ GLOBAL(void)
 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
                              input_data, output_data_ptr);
@@ -474,6 +493,9 @@ GLOBAL(void)
 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
                                    compptr->downsampled_width, input_data,
@@ -488,6 +510,9 @@ GLOBAL(void)
 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
                                    compptr->downsampled_width, input_data,
@@ -547,6 +572,9 @@ jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
@@ -595,6 +623,9 @@ jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
@@ -684,6 +715,9 @@ GLOBAL(void)
 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
                DCTELEM *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_convsamp_avx2(sample_data, start_col, workspace);
   else
@@ -753,6 +787,9 @@ jsimd_can_fdct_float(void)
 GLOBAL(void)
 jsimd_fdct_islow(DCTELEM *data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_fdct_islow_avx2(data);
   else
@@ -814,6 +851,9 @@ jsimd_can_quantize_float(void)
 GLOBAL(void)
 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_quantize_avx2(coef_block, divisors, workspace);
   else
@@ -944,6 +984,9 @@ jsimd_can_idct_float(void)
 {
   init_simd();
 
+#ifdef WITH_SANITIZER
+  return 0;
+#endif
   if (DCTSIZE != 8)
     return 0;
   if (sizeof(JCOEF) != 2)
@@ -968,6 +1011,9 @@ jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
                  JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
                           output_col);
@@ -999,7 +1045,9 @@ jsimd_can_huff_encode_one_block(void)
 {
   init_simd();
 
-#ifndef WITH_SANITIZER
+#ifdef WITH_SANITIZER
+  return 0;
+#endif
   if (DCTSIZE != 8)
     return 0;
   if (sizeof(JCOEF) != 2)
@@ -1008,7 +1056,6 @@ jsimd_can_huff_encode_one_block(void)
   if ((simd_support & JSIMD_SSE2) && simd_huffman &&
       IS_ALIGNED_SSE(jconst_huff_encode_one_block))
     return 1;
-#endif
 
   return 0;
 }
@@ -1027,14 +1074,15 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
 {
   init_simd();
 
-#ifndef WITH_SANITIZER
+#ifdef WITH_SANITIZER
+  return 0;
+#endif
   if (DCTSIZE != 8)
     return 0;
   if (sizeof(JCOEF) != 2)
     return 0;
   if (simd_support & JSIMD_SSE2)
     return 1;
-#endif
 
   return 0;
 }
@@ -1042,7 +1090,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
 GLOBAL(void)
 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
                                   const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
+                                  int Al, UJCOEF *values, size_t *zerobits)
 {
   jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
                                          Sl, Al, values, zerobits);
@@ -1053,14 +1101,12 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
 {
   init_simd();
 
-#ifndef WITH_SANITIZER
   if (DCTSIZE != 8)
     return 0;
   if (sizeof(JCOEF) != 2)
     return 0;
   if (simd_support & JSIMD_SSE2)
     return 1;
-#endif
 
   return 0;
 }
@@ -1068,7 +1114,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
 GLOBAL(int)
 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
                                    const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
+                                   int Al, UJCOEF *absvalues, size_t *bits)
 {
   return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
                                                  jpeg_natural_order_start,
diff --git a/contrib/libs/libjpeg-turbo/simd/x86_64/jsimdcpu.asm b/contrib/libs/libjpeg-turbo/simd/x86_64/jsimdcpu.asm
index 705f813d7da..b72f3b0b398 100644
--- a/contrib/libs/libjpeg-turbo/simd/x86_64/jsimdcpu.asm
+++ b/contrib/libs/libjpeg-turbo/simd/x86_64/jsimdcpu.asm
@@ -3,17 +3,14 @@
 ;
 ; Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
 ; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on
 ; x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
 ;
-; This file should be assembled with NASM (Netwide Assembler),
-; can *not* be assembled with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ or
-; http://sourceforge.net/project/showfiles.php?group_id=6208
+; This file should be assembled with NASM (Netwide Assembler) or Yasm.
 
 %include "jsimdext.inc"
 
@@ -31,6 +28,8 @@
     GLOBAL_FUNCTION(jpeg_simd_cpu_support)
 
 EXTN(jpeg_simd_cpu_support):
+    push        rbp
+    mov         rbp, rsp
     push        rbx
     push        rdi
 
@@ -79,6 +78,7 @@ EXTN(jpeg_simd_cpu_support):
 
     pop         rdi
     pop         rbx
+    pop         rbp
     ret
 
 ; For some reason, the OS X linker does not honor the request to align the
author	robot-piglet <[email protected]>	2025-06-15 15:44:41 +0300
committer	robot-piglet <[email protected]>	2025-06-15 15:55:30 +0300
commit	ea626d7b15346c0da649291483f80f1ae6e1d7e7 (patch)
tree	24ae3c2aa7f259f3ba95af8450b5bce9a4bdb10d /contrib/libs/libjpeg-turbo/simd
parent	726087f32fb38c191ff0c3ef8c6646aa940d987e (diff)