diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2025-02-06 09:44:56 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2025-02-06 10:00:51 +0300 |
commit | a6c9550f60a1663b0dfa661f909c3f78d9435be5 (patch) | |
tree | 4eb505d8cfd4eec3162795035b46cfa32e5331fb | |
parent | ed56e2e58e962572b47d0e7c9abca6a751178168 (diff) | |
download | ydb-a6c9550f60a1663b0dfa661f909c3f78d9435be5.tar.gz |
Intermediate changes
commit_hash:b431fc94a39861d987569e88bb97ba7e6187f7e5
24 files changed, 1092 insertions, 139 deletions
diff --git a/contrib/libs/openjpeg/.yandex_meta/override.nix b/contrib/libs/openjpeg/.yandex_meta/override.nix index aef28cef968..d173012e1fb 100644 --- a/contrib/libs/openjpeg/.yandex_meta/override.nix +++ b/contrib/libs/openjpeg/.yandex_meta/override.nix @@ -1,11 +1,11 @@ pkgs: attrs: with pkgs; with attrs; rec { - version = "2.5.2"; + version = "2.5.3"; src = fetchFromGitHub { owner = "uclouvain"; repo = "openjpeg"; rev = "v${version}"; - sha256 = "sha256-mQ9B3MJY2/bg0yY/7jUJrAXM6ozAHT5fmwES5Q1SGxw="; + sha256 = "sha256-ONPahcQ80e3ahYRQU+Tu8Z7ZTARjRlpXqPAYpUlX5sY="; }; patches = []; diff --git a/contrib/libs/openjpeg/CHANGELOG.md b/contrib/libs/openjpeg/CHANGELOG.md index 54aeb1555b4..926e6f1f4f3 100644 --- a/contrib/libs/openjpeg/CHANGELOG.md +++ b/contrib/libs/openjpeg/CHANGELOG.md @@ -1,5 +1,57 @@ # Changelog +## [v2.5.3](https://github.com/uclouvain/openjpeg/releases/v2.5.3) (2024-12-09) + +[Full Changelog](https://github.com/uclouvain/openjpeg/compare/v2.5.2...v2.5.3) + +**Closed issues:** + +- Memory Leak When Using Invalid Output Path in opj\_compress [\#1567](https://github.com/uclouvain/openjpeg/issues/1567) +- heap-buffer-overflow at lib/openjp2/j2k.c:8460:84 in opj\_j2k\_add\_tlmarker in openjpeg/opj\_decompress [\#1564](https://github.com/uclouvain/openjpeg/issues/1564) +- heap-buffer-overflow at bin/common/color.c:215:42 in sycc422\_to\_rgb in openjpeg/opj\_decompress [\#1563](https://github.com/uclouvain/openjpeg/issues/1563) +- Can not open libjpeg [\#1550](https://github.com/uclouvain/openjpeg/issues/1550) +- \[ERROR\] Wrong values for: w\(525\) h\(700\) numcomps\(0\) \(ihdr\) [\#1545](https://github.com/uclouvain/openjpeg/issues/1545) +- Failed to Open a Specific JP2 file [\#1544](https://github.com/uclouvain/openjpeg/issues/1544) +- Outdated File in OpenJPEG Project Leading to Vulnerability \(CVE-2016-9534\) [\#1539](https://github.com/uclouvain/openjpeg/issues/1539) +- Heap-buffer-overflow in in opj\_mqc\_init\_dec\_common when disabling strict mode [\#1535](https://github.com/uclouvain/openjpeg/issues/1535) +- Heap-buffer-overflow in in opj\_t1\_decode\_cblk when disabling strict mode [\#1533](https://github.com/uclouvain/openjpeg/issues/1533) +- opj\_decode\_tile\_data takes a long time to decode a very small file [\#1524](https://github.com/uclouvain/openjpeg/issues/1524) +- Release v2.5.2 tag is outside of the repository [\#1521](https://github.com/uclouvain/openjpeg/issues/1521) +- Website broken [\#1513](https://github.com/uclouvain/openjpeg/issues/1513) +- Guard Bits In CINEMA2K Profile [\#1340](https://github.com/uclouvain/openjpeg/issues/1340) +- Support for OSX on ARM \(M1\) [\#1289](https://github.com/uclouvain/openjpeg/issues/1289) +- Building on Windows creates \_\_stdcall-convention static lib/dll regardless of compiler settings [\#722](https://github.com/uclouvain/openjpeg/issues/722) + +**Merged pull requests:** + +- sycc422\_to\_rgb\(\): fix out-of-bounds read accesses when 2 \* width\_component\_1\_or\_2 + 1 == with\_component\_0 [\#1566](https://github.com/uclouvain/openjpeg/pull/1566) ([rouault](https://github.com/rouault)) +- opj\_j2k\_add\_tlmarker\(\): validate that current tile-part number if smaller that total number of tile-parts [\#1565](https://github.com/uclouvain/openjpeg/pull/1565) ([rouault](https://github.com/rouault)) +- Amend fix of PR 1530 regarding m\_sot\_length check [\#1561](https://github.com/uclouvain/openjpeg/pull/1561) ([rouault](https://github.com/rouault)) +- Do not turn on 'TPsot==TNsot detection fix' when TNsot==1, and [\#1560](https://github.com/uclouvain/openjpeg/pull/1560) ([rouault](https://github.com/rouault)) +- opj\_j2k\_setup\_encoder\(\): set numgbits = 1 for Cinema2K [\#1559](https://github.com/uclouvain/openjpeg/pull/1559) ([rouault](https://github.com/rouault)) +- bench\_dwt: Add assert for memory allocation failure [\#1555](https://github.com/uclouvain/openjpeg/pull/1555) ([hleft](https://github.com/hleft)) +- Add AVX2 and AVX512 optimization [\#1552](https://github.com/uclouvain/openjpeg/pull/1552) ([tszumski](https://github.com/tszumski)) +- Updated softprops/action-gh-release to v2 [\#1551](https://github.com/uclouvain/openjpeg/pull/1551) ([radarhere](https://github.com/radarhere)) +- Updated softprops/action-gh-release to v2 [\#1549](https://github.com/uclouvain/openjpeg/pull/1549) ([radarhere](https://github.com/radarhere)) +- fix: abi check [\#1548](https://github.com/uclouvain/openjpeg/pull/1548) ([mayeut](https://github.com/mayeut)) +- fix: when EPH markers are specified, they are required. [\#1547](https://github.com/uclouvain/openjpeg/pull/1547) ([mayeut](https://github.com/mayeut)) +- CI: add macOS arm64 [\#1546](https://github.com/uclouvain/openjpeg/pull/1546) ([mayeut](https://github.com/mayeut)) +- thirdparty/libz: update to zlib-1.3.1 [\#1542](https://github.com/uclouvain/openjpeg/pull/1542) ([rouault](https://github.com/rouault)) +- thirdparty/libpng: update to libpng-1.6.43 [\#1541](https://github.com/uclouvain/openjpeg/pull/1541) ([rouault](https://github.com/rouault)) +- thirdparty/libtiff: update to libtiff 4.6.0 [\#1540](https://github.com/uclouvain/openjpeg/pull/1540) ([rouault](https://github.com/rouault)) +- Use TLM \(Tile Length Marker\) segments to optimize decoding [\#1538](https://github.com/uclouvain/openjpeg/pull/1538) ([rouault](https://github.com/rouault)) +- Add new test for file with non-consecutive tilepart and TLM marker [\#1537](https://github.com/uclouvain/openjpeg/pull/1537) ([rouault](https://github.com/rouault)) +- Avoid heap-buffer-overflow read on corrupted image in non-strict mode [\#1536](https://github.com/uclouvain/openjpeg/pull/1536) ([rouault](https://github.com/rouault)) +- opj\_j2k\_read\_sod\(\): validate opj\_stream\_read\_data\(\) return to avoid … [\#1534](https://github.com/uclouvain/openjpeg/pull/1534) ([rouault](https://github.com/rouault)) +- Fixed typos [\#1532](https://github.com/uclouvain/openjpeg/pull/1532) ([radarhere](https://github.com/radarhere)) +- CI: pin macos job to macos-13 to get x86\_64 [\#1531](https://github.com/uclouvain/openjpeg/pull/1531) ([rouault](https://github.com/rouault)) +- Integer Overflow at j2k.c:9614 [\#1530](https://github.com/uclouvain/openjpeg/pull/1530) ([headshog](https://github.com/headshog)) +- Support setting enumcs for CMYK and EYCC color space [\#1529](https://github.com/uclouvain/openjpeg/pull/1529) ([radarhere](https://github.com/radarhere)) +- opj\_j2k\_decode\_tiles\(\): avoid use of uninitialized l\_current\_tile\_no variable [\#1528](https://github.com/uclouvain/openjpeg/pull/1528) ([rouault](https://github.com/rouault)) +- Updated actions/upload-artifact to v4 [\#1527](https://github.com/uclouvain/openjpeg/pull/1527) ([radarhere](https://github.com/radarhere)) +- Do not allow header length to be zero in non-zero length packet [\#1526](https://github.com/uclouvain/openjpeg/pull/1526) ([radarhere](https://github.com/radarhere)) +- Fix building on OpenBSD big endian hosts [\#1520](https://github.com/uclouvain/openjpeg/pull/1520) ([brad0](https://github.com/brad0)) + ## [v2.5.2](https://github.com/uclouvain/openjpeg/releases/v2.5.2) (2024-02-28) [Full Changelog](https://github.com/uclouvain/openjpeg/compare/v2.5.1...v2.5.2) @@ -38,7 +90,7 @@ - Cannot determine library version at compile time [\#1428](https://github.com/uclouvain/openjpeg/issues/1428) - ARM builds on Windows unsupported with Version 2.5.0 [\#1422](https://github.com/uclouvain/openjpeg/issues/1422) - opj\_decompress heap overflow Denial of Service issue [\#1413](https://github.com/uclouvain/openjpeg/issues/1413) -- Color chanel swapping for some JPEG2000 pictures [\#1382](https://github.com/uclouvain/openjpeg/issues/1382) +- Color channel swapping for some JPEG2000 pictures [\#1382](https://github.com/uclouvain/openjpeg/issues/1382) - Heap-buffer-overflow in color.c:379:42 in sycc420\_to\_rgb [\#1347](https://github.com/uclouvain/openjpeg/issues/1347) - No colorspace information after opj\_read\_header [\#570](https://github.com/uclouvain/openjpeg/issues/570) @@ -297,7 +349,7 @@ - LINUX install doesn't work when building shared libraries is disabled [\#1155](https://github.com/uclouvain/openjpeg/issues/1155) - OPENJPEG null ptr dereference in openjpeg-2.3.0/src/bin/jp2/convert.c:2243 [\#1152](https://github.com/uclouvain/openjpeg/issues/1152) - How to drop certain subbands/layers in DWT [\#1147](https://github.com/uclouvain/openjpeg/issues/1147) -- where is the MQ-Coder ouput stream in t2.c? [\#1146](https://github.com/uclouvain/openjpeg/issues/1146) +- where is the MQ-Coder output stream in t2.c? [\#1146](https://github.com/uclouvain/openjpeg/issues/1146) - OpenJPEG 2.3 \(and 2.2?\) multi component image fails to decode with KDU v7.10 [\#1132](https://github.com/uclouvain/openjpeg/issues/1132) - Missing checks for header\_info.height and header\_info.width in function pnmtoimage in src/bin/jpwl/convert.c, which can lead to heap buffer overflow [\#1126](https://github.com/uclouvain/openjpeg/issues/1126) - Assertion Failure in jp2.c [\#1125](https://github.com/uclouvain/openjpeg/issues/1125) diff --git a/contrib/libs/openjpeg/NEWS.md b/contrib/libs/openjpeg/NEWS.md index 20e9184a4fb..9e8b6b72a81 100644 --- a/contrib/libs/openjpeg/NEWS.md +++ b/contrib/libs/openjpeg/NEWS.md @@ -2,6 +2,36 @@ More details in the [CHANGELOG](https://github.com/uclouvain/openjpeg/blob/master/CHANGELOG.md) +## OpenJPEG 2.5.3 (Dec 2024) + +No API/ABI break compared to v2.5.2 + +### New Features + +* Use TLM \(Tile Length Marker\) segments to optimize decoding [\#1538](https://github.com/uclouvain/openjpeg/pull/1538) +* Add AVX2 and AVX512 optimization [\#1552](https://github.com/uclouvain/openjpeg/pull/1552) +* Support setting enumcs for CMYK and EYCC color space [\#1529](https://github.com/uclouvain/openjpeg/pull/1529) + +### Bug fixes + +* Do not turn on 'TPsot==TNsot detection fix' when TNsot==1, and add a OPJ_DPARAMETERS_DISABLE_TPSOT_FIX flag to disable it [\#1560](https://github.com/uclouvain/openjpeg/pull/1560) +* opj\_j2k\_setup\_encoder\(\): set numgbits = 1 for Cinema2K [\#1559](https://github.com/uclouvain/openjpeg/pull/1559) +* fix: when EPH markers are specified, they are required. [\#1547](https://github.com/uclouvain/openjpeg/pull/1547) +* sycc422\_to\_rgb\(\): fix out-of-bounds read accesses when 2 \* width\_component\_1\_or\_2 + 1 == with\_component\_0 [\#1566](https://github.com/uclouvain/openjpeg/pull/1566) +* Avoid heap-buffer-overflow read on corrupted image in non-strict mode [\#1536](https://github.com/uclouvain/openjpeg/pull/1536) +* opj\_j2k\_read\_sod\(\): validate opj\_stream\_read\_data\(\) return to avoid potential later heap-buffer-overflow in in opj_t1_decode_cblk when disabling strict mode [\#1534](https://github.com/uclouvain/openjpeg/pull/1534) +* fix integer Overflow at j2k.c:9614 [\#1530](https://github.com/uclouvain/openjpeg/pull/1530) +* Memory leak fixes in error code path of opj\_compress [\#1567](https://github.com/uclouvain/openjpeg/issues/1567) +* opj\_j2k\_decode\_tiles\(\): avoid use of uninitialized l\_current\_tile\_no variable [\#1528](https://github.com/uclouvain/openjpeg/pull/1528) +* Do not allow header length to be zero in non-zero length packet [\#1526](https://github.com/uclouvain/openjpeg/pull/1526) +* Fix building on OpenBSD big endian hosts [\#1520](https://github.com/uclouvain/openjpeg/pull/1520) + +### Changes in third party components + +* thirdparty/libz: update to zlib-1.3.1 [\#1542](https://github.com/uclouvain/openjpeg/pull/1542) +* thirdparty/libpng: update to libpng-1.6.43 [\#1541](https://github.com/uclouvain/openjpeg/pull/1541) +* thirdparty/libtiff: update to libtiff 4.6.0 [\#1540](https://github.com/uclouvain/openjpeg/pull/1540) + ## OpenJPEG 2.5.2 (Feb 2024) No API/ABI break compared to v2.5.1 diff --git a/contrib/libs/openjpeg/dwt.c b/contrib/libs/openjpeg/dwt.c index 6b18c5dd6e9..11aae472def 100644 --- a/contrib/libs/openjpeg/dwt.c +++ b/contrib/libs/openjpeg/dwt.c @@ -52,7 +52,7 @@ #ifdef __SSSE3__ #include <tmmintrin.h> #endif -#ifdef __AVX2__ +#if (defined(__AVX2__) || defined(__AVX512F__)) #include <immintrin.h> #endif @@ -66,7 +66,10 @@ #define OPJ_WS(i) v->mem[(i)*2] #define OPJ_WD(i) v->mem[(1+(i)*2)] -#ifdef __AVX2__ +#if defined(__AVX512F__) +/** Number of int32 values in a AVX512 register */ +#define VREG_INT_COUNT 16 +#elif defined(__AVX2__) /** Number of int32 values in a AVX2 register */ #define VREG_INT_COUNT 8 #else @@ -331,6 +334,51 @@ static void opj_dwt_decode_1(const opj_dwt_t *v) #endif /* STANDARD_SLOW_VERSION */ +#if defined(__AVX512F__) +static int32_t loop_short_sse(int32_t len, const int32_t** lf_ptr, + const int32_t** hf_ptr, int32_t** out_ptr, + int32_t* prev_even) +{ + int32_t next_even; + __m128i odd, even_m1, unpack1, unpack2; + const int32_t batch = (len - 2) / 8; + const __m128i two = _mm_set1_epi32(2); + + for (int32_t i = 0; i < batch; i++) { + const __m128i lf_ = _mm_loadu_si128((__m128i*)(*lf_ptr + 1)); + const __m128i hf1_ = _mm_loadu_si128((__m128i*)(*hf_ptr)); + const __m128i hf2_ = _mm_loadu_si128((__m128i*)(*hf_ptr + 1)); + + __m128i even = _mm_add_epi32(hf1_, hf2_); + even = _mm_add_epi32(even, two); + even = _mm_srai_epi32(even, 2); + even = _mm_sub_epi32(lf_, even); + + next_even = _mm_extract_epi32(even, 3); + even_m1 = _mm_bslli_si128(even, 4); + even_m1 = _mm_insert_epi32(even_m1, *prev_even, 0); + + //out[0] + out[2] + odd = _mm_add_epi32(even_m1, even); + odd = _mm_srai_epi32(odd, 1); + odd = _mm_add_epi32(odd, hf1_); + + unpack1 = _mm_unpacklo_epi32(even_m1, odd); + unpack2 = _mm_unpackhi_epi32(even_m1, odd); + + _mm_storeu_si128((__m128i*)(*out_ptr + 0), unpack1); + _mm_storeu_si128((__m128i*)(*out_ptr + 4), unpack2); + + *prev_even = next_even; + + *out_ptr += 8; + *lf_ptr += 4; + *hf_ptr += 4; + } + return batch; +} +#endif + #if !defined(STANDARD_SLOW_VERSION) static void opj_idwt53_h_cas0(OPJ_INT32* tmp, const OPJ_INT32 sn, @@ -364,6 +412,145 @@ static void opj_idwt53_h_cas0(OPJ_INT32* tmp, tmp[len - 1] = in_odd[(len - 1) / 2] + tmp[len - 2]; } #else +#if defined(__AVX512F__) + OPJ_INT32* out_ptr = tmp; + int32_t prev_even = in_even[0] - ((in_odd[0] + 1) >> 1); + + const __m512i permutevar_mask = _mm512_setr_epi32( + 0x10, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e); + const __m512i store1_perm = _mm512_setr_epi64(0x00, 0x01, 0x08, 0x09, 0x02, + 0x03, 0x0a, 0x0b); + const __m512i store2_perm = _mm512_setr_epi64(0x04, 0x05, 0x0c, 0x0d, 0x06, + 0x07, 0x0e, 0x0f); + + const __m512i two = _mm512_set1_epi32(2); + + int32_t simd_batch_512 = (len - 2) / 32; + int32_t leftover; + + for (i = 0; i < simd_batch_512; i++) { + const __m512i lf_avx2 = _mm512_loadu_si512((__m512i*)(in_even + 1)); + const __m512i hf1_avx2 = _mm512_loadu_si512((__m512i*)(in_odd)); + const __m512i hf2_avx2 = _mm512_loadu_si512((__m512i*)(in_odd + 1)); + int32_t next_even; + __m512i duplicate, even_m1, odd, unpack1, unpack2, store1, store2; + + __m512i even = _mm512_add_epi32(hf1_avx2, hf2_avx2); + even = _mm512_add_epi32(even, two); + even = _mm512_srai_epi32(even, 2); + even = _mm512_sub_epi32(lf_avx2, even); + + next_even = _mm_extract_epi32(_mm512_extracti32x4_epi32(even, 3), 3); + + duplicate = _mm512_set1_epi32(prev_even); + even_m1 = _mm512_permutex2var_epi32(even, permutevar_mask, duplicate); + + //out[0] + out[2] + odd = _mm512_add_epi32(even_m1, even); + odd = _mm512_srai_epi32(odd, 1); + odd = _mm512_add_epi32(odd, hf1_avx2); + + unpack1 = _mm512_unpacklo_epi32(even_m1, odd); + unpack2 = _mm512_unpackhi_epi32(even_m1, odd); + + store1 = _mm512_permutex2var_epi64(unpack1, store1_perm, unpack2); + store2 = _mm512_permutex2var_epi64(unpack1, store2_perm, unpack2); + + _mm512_storeu_si512(out_ptr, store1); + _mm512_storeu_si512(out_ptr + 16, store2); + + prev_even = next_even; + + out_ptr += 32; + in_even += 16; + in_odd += 16; + } + + leftover = len - simd_batch_512 * 32; + if (leftover > 8) { + leftover -= 8 * loop_short_sse(leftover, &in_even, &in_odd, &out_ptr, + &prev_even); + } + out_ptr[0] = prev_even; + + for (j = 1; j < (leftover - 2); j += 2) { + out_ptr[2] = in_even[1] - ((in_odd[0] + (in_odd[1]) + 2) >> 2); + out_ptr[1] = in_odd[0] + ((out_ptr[0] + out_ptr[2]) >> 1); + in_even++; + in_odd++; + out_ptr += 2; + } + + if (len & 1) { + out_ptr[2] = in_even[1] - ((in_odd[0] + 1) >> 1); + out_ptr[1] = in_odd[0] + ((out_ptr[0] + out_ptr[2]) >> 1); + } else { //!(len & 1) + out_ptr[1] = in_odd[0] + out_ptr[0]; + } +#elif defined(__AVX2__) + OPJ_INT32* out_ptr = tmp; + int32_t prev_even = in_even[0] - ((in_odd[0] + 1) >> 1); + + const __m256i reg_permutevar_mask_move_right = _mm256_setr_epi32(0x00, 0x00, + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + const __m256i two = _mm256_set1_epi32(2); + + int32_t simd_batch = (len - 2) / 16; + int32_t next_even; + __m256i even_m1, odd, unpack1_avx2, unpack2_avx2; + + for (i = 0; i < simd_batch; i++) { + const __m256i lf_avx2 = _mm256_loadu_si256((__m256i*)(in_even + 1)); + const __m256i hf1_avx2 = _mm256_loadu_si256((__m256i*)(in_odd)); + const __m256i hf2_avx2 = _mm256_loadu_si256((__m256i*)(in_odd + 1)); + + __m256i even = _mm256_add_epi32(hf1_avx2, hf2_avx2); + even = _mm256_add_epi32(even, two); + even = _mm256_srai_epi32(even, 2); + even = _mm256_sub_epi32(lf_avx2, even); + + next_even = _mm_extract_epi32(_mm256_extracti128_si256(even, 1), 3); + even_m1 = _mm256_permutevar8x32_epi32(even, reg_permutevar_mask_move_right); + even_m1 = _mm256_blend_epi32(even_m1, _mm256_set1_epi32(prev_even), (1 << 0)); + + //out[0] + out[2] + odd = _mm256_add_epi32(even_m1, even); + odd = _mm256_srai_epi32(odd, 1); + odd = _mm256_add_epi32(odd, hf1_avx2); + + unpack1_avx2 = _mm256_unpacklo_epi32(even_m1, odd); + unpack2_avx2 = _mm256_unpackhi_epi32(even_m1, odd); + + _mm_storeu_si128((__m128i*)(out_ptr + 0), _mm256_castsi256_si128(unpack1_avx2)); + _mm_storeu_si128((__m128i*)(out_ptr + 4), _mm256_castsi256_si128(unpack2_avx2)); + _mm_storeu_si128((__m128i*)(out_ptr + 8), _mm256_extracti128_si256(unpack1_avx2, + 0x1)); + _mm_storeu_si128((__m128i*)(out_ptr + 12), + _mm256_extracti128_si256(unpack2_avx2, 0x1)); + + prev_even = next_even; + + out_ptr += 16; + in_even += 8; + in_odd += 8; + } + out_ptr[0] = prev_even; + for (j = simd_batch * 16 + 1; j < (len - 2); j += 2) { + out_ptr[2] = in_even[1] - ((in_odd[0] + in_odd[1] + 2) >> 2); + out_ptr[1] = in_odd[0] + ((out_ptr[0] + out_ptr[2]) >> 1); + in_even++; + in_odd++; + out_ptr += 2; + } + + if (len & 1) { + out_ptr[2] = in_even[1] - ((in_odd[0] + 1) >> 1); + out_ptr[1] = in_odd[0] + ((out_ptr[0] + out_ptr[2]) >> 1); + } else { //!(len & 1) + out_ptr[1] = in_odd[0] + out_ptr[0]; + } +#else OPJ_INT32 d1c, d1n, s1n, s0c, s0n; assert(len > 1); @@ -397,7 +584,8 @@ static void opj_idwt53_h_cas0(OPJ_INT32* tmp, } else { tmp[len - 1] = d1n + s0n; } -#endif +#endif /*(__AVX512F__ || __AVX2__)*/ +#endif /*TWO_PASS_VERSION*/ memcpy(tiledp, tmp, (OPJ_UINT32)len * sizeof(OPJ_INT32)); } @@ -511,10 +699,20 @@ static void opj_idwt53_h(const opj_dwt_t *dwt, #endif } -#if (defined(__SSE2__) || defined(__AVX2__)) && !defined(STANDARD_SLOW_VERSION) +#if (defined(__SSE2__) || defined(__AVX2__) || defined(__AVX512F__)) && !defined(STANDARD_SLOW_VERSION) /* Conveniency macros to improve the readability of the formulas */ -#if __AVX2__ +#if defined(__AVX512F__) +#define VREG __m512i +#define LOAD_CST(x) _mm512_set1_epi32(x) +#define LOAD(x) _mm512_loadu_si512((const VREG*)(x)) +#define LOADU(x) _mm512_loadu_si512((const VREG*)(x)) +#define STORE(x,y) _mm512_storeu_si512((VREG*)(x),(y)) +#define STOREU(x,y) _mm512_storeu_si512((VREG*)(x),(y)) +#define ADD(x,y) _mm512_add_epi32((x),(y)) +#define SUB(x,y) _mm512_sub_epi32((x),(y)) +#define SAR(x,y) _mm512_srai_epi32((x),(y)) +#elif defined(__AVX2__) #define VREG __m256i #define LOAD_CST(x) _mm256_set1_epi32(x) #define LOAD(x) _mm256_load_si256((const VREG*)(x)) @@ -576,7 +774,10 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2( const VREG two = LOAD_CST(2); assert(len > 1); -#if __AVX2__ +#if defined(__AVX512F__) + assert(PARALLEL_COLS_53 == 32); + assert(VREG_INT_COUNT == 16); +#elif defined(__AVX2__) assert(PARALLEL_COLS_53 == 16); assert(VREG_INT_COUNT == 8); #else @@ -584,10 +785,13 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2( assert(VREG_INT_COUNT == 4); #endif +//For AVX512 code aligned load/store is set to it's unaligned equivalents +#if !defined(__AVX512F__) /* Note: loads of input even/odd values must be done in a unaligned */ /* fashion. But stores in tmp can be done with aligned store, since */ /* the temporary buffer is properly aligned */ assert((OPJ_SIZE_T)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0); +#endif s1n_0 = LOADU(in_even + 0); s1n_1 = LOADU(in_even + VREG_INT_COUNT); @@ -678,7 +882,10 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2( const OPJ_INT32* in_odd = &tiledp_col[0]; assert(len > 2); -#if __AVX2__ +#if defined(__AVX512F__) + assert(PARALLEL_COLS_53 == 32); + assert(VREG_INT_COUNT == 16); +#elif defined(__AVX2__) assert(PARALLEL_COLS_53 == 16); assert(VREG_INT_COUNT == 8); #else @@ -686,10 +893,13 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2( assert(VREG_INT_COUNT == 4); #endif +//For AVX512 code aligned load/store is set to it's unaligned equivalents +#if !defined(__AVX512F__) /* Note: loads of input even/odd values must be done in a unaligned */ /* fashion. But stores in tmp can be done with aligned store, since */ /* the temporary buffer is properly aligned */ assert((OPJ_SIZE_T)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0); +#endif s1_0 = LOADU(in_even + stride); /* in_odd[0] - ((in_even[0] + s1 + 2) >> 2); */ diff --git a/contrib/libs/openjpeg/ht_dec.c b/contrib/libs/openjpeg/ht_dec.c index a554b24a6a2..2984f56098b 100644 --- a/contrib/libs/openjpeg/ht_dec.c +++ b/contrib/libs/openjpeg/ht_dec.c @@ -901,7 +901,7 @@ typedef struct frwd_struct { * X controls this value. * * Unstuffing prevent sequences that are more than 0xFF7F from appearing - * in the conpressed sequence. So whenever a value of 0xFF is coded, the + * in the compressed sequence. So whenever a value of 0xFF is coded, the * MSB of the next byte is set 0 and must be ignored during decoding. * * Reading can go beyond the end of buffer by up to 3 bytes. @@ -1032,7 +1032,7 @@ OPJ_UINT32 frwd_fetch(frwd_struct_t *msp) //************************************************************************/ /** @brief Allocates T1 buffers * - * @param [in, out] t1 is codeblock cofficients storage + * @param [in, out] t1 is codeblock coefficients storage * @param [in] w is codeblock width * @param [in] h is codeblock height */ @@ -1120,7 +1120,7 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1, /** @brief Decodes one codeblock, processing the cleanup, siginificance * propagation, and magnitude refinement pass * - * @param [in, out] t1 is codeblock cofficients storage + * @param [in, out] t1 is codeblock coefficients storage * @param [in] cblk is codeblock properties * @param [in] orient is the subband to which the codeblock belongs (not needed) * @param [in] roishift is region of interest shift diff --git a/contrib/libs/openjpeg/j2k.c b/contrib/libs/openjpeg/j2k.c index c0551870b2b..a2014c89b02 100644 --- a/contrib/libs/openjpeg/j2k.c +++ b/contrib/libs/openjpeg/j2k.c @@ -2484,6 +2484,11 @@ static OPJ_BOOL opj_j2k_read_siz(opj_j2k_t *p_j2k, ++l_current_tile_param; } + /*Allocate and initialize some elements of codestrem index*/ + if (!opj_j2k_allocate_tile_element_cstr_index(p_j2k)) { + return OPJ_FALSE; + } + p_j2k->m_specific_param.m_decoder.m_state = J2K_STATE_MH; opj_image_comp_header_update(l_image, l_cp); @@ -3657,21 +3662,29 @@ static OPJ_BOOL opj_j2k_read_tlm(opj_j2k_t *p_j2k, opj_event_mgr_t * p_manager ) { - OPJ_UINT32 l_Ztlm, l_Stlm, l_ST, l_SP, l_tot_num_tp_remaining, l_quotient, - l_Ptlm_size; + OPJ_UINT32 l_Ztlm, l_Stlm, l_ST, l_SP, + l_Ptlm_size, l_entry_size, l_num_tileparts; + OPJ_UINT32 i; + opj_j2k_tlm_tile_part_info_t* l_tile_part_infos; + opj_j2k_tlm_info_t* l_tlm; + /* preconditions */ assert(p_header_data != 00); assert(p_j2k != 00); assert(p_manager != 00); - OPJ_UNUSED(p_j2k); + l_tlm = &(p_j2k->m_specific_param.m_decoder.m_tlm); if (p_header_size < 2) { - opj_event_msg(p_manager, EVT_ERROR, "Error reading TLM marker\n"); + opj_event_msg(p_manager, EVT_ERROR, "Error reading TLM marker.\n"); return OPJ_FALSE; } p_header_size -= 2; + if (l_tlm->m_is_invalid) { + return OPJ_TRUE; + } + opj_read_bytes(p_header_data, &l_Ztlm, 1); /* Ztlm */ ++p_header_data; @@ -3680,27 +3693,83 @@ static OPJ_BOOL opj_j2k_read_tlm(opj_j2k_t *p_j2k, ++p_header_data; l_ST = ((l_Stlm >> 4) & 0x3); + if (l_ST == 3) { + l_tlm->m_is_invalid = OPJ_TRUE; + opj_event_msg(p_manager, EVT_WARNING, + "opj_j2k_read_tlm(): ST = 3 is invalid.\n"); + return OPJ_TRUE; + } l_SP = (l_Stlm >> 6) & 0x1; l_Ptlm_size = (l_SP + 1) * 2; - l_quotient = l_Ptlm_size + l_ST; + l_entry_size = l_Ptlm_size + l_ST; - l_tot_num_tp_remaining = p_header_size % l_quotient; + if ((p_header_size % l_entry_size) != 0) { + l_tlm->m_is_invalid = OPJ_TRUE; + opj_event_msg(p_manager, EVT_WARNING, + "opj_j2k_read_tlm(): TLM marker not of expected size.\n"); + return OPJ_TRUE; + } - if (l_tot_num_tp_remaining != 0) { - opj_event_msg(p_manager, EVT_ERROR, "Error reading TLM marker\n"); - return OPJ_FALSE; + l_num_tileparts = p_header_size / l_entry_size; + if (l_num_tileparts == 0) { + /* not totally sure if this is valid... */ + return OPJ_TRUE; } - /* FIXME Do not care of this at the moment since only local variables are set here */ - /* - for - (i = 0; i < l_tot_num_tp; ++i) - { - opj_read_bytes(p_header_data,&l_Ttlm_i,l_ST); // Ttlm_i + + /* Highly unlikely, unless there are gazillions of TLM markers */ + if (l_tlm->m_entries_count > UINT32_MAX - l_num_tileparts || + l_tlm->m_entries_count + l_num_tileparts > UINT32_MAX / sizeof( + opj_j2k_tlm_tile_part_info_t)) { + l_tlm->m_is_invalid = OPJ_TRUE; + opj_event_msg(p_manager, EVT_WARNING, + "opj_j2k_read_tlm(): too many TLM markers.\n"); + return OPJ_TRUE; + } + + l_tile_part_infos = (opj_j2k_tlm_tile_part_info_t*)opj_realloc( + l_tlm->m_tile_part_infos, + (l_tlm->m_entries_count + l_num_tileparts) * sizeof( + opj_j2k_tlm_tile_part_info_t)); + if (!l_tile_part_infos) { + l_tlm->m_is_invalid = OPJ_TRUE; + opj_event_msg(p_manager, EVT_WARNING, + "opj_j2k_read_tlm(): cannot allocate m_tile_part_infos.\n"); + return OPJ_TRUE; + } + + l_tlm->m_tile_part_infos = l_tile_part_infos; + + for (i = 0; i < l_num_tileparts; ++ i) { + OPJ_UINT32 l_tile_index; + OPJ_UINT32 l_length; + + /* Read Ttlm_i */ + if (l_ST == 0) { + l_tile_index = l_tlm->m_entries_count; + } else { + opj_read_bytes(p_header_data, &l_tile_index, l_ST); p_header_data += l_ST; - opj_read_bytes(p_header_data,&l_Ptlm_i,l_Ptlm_size); // Ptlm_i - p_header_data += l_Ptlm_size; - }*/ + } + + if (l_tile_index >= p_j2k->m_cp.tw * p_j2k->m_cp.th) { + l_tlm->m_is_invalid = OPJ_TRUE; + opj_event_msg(p_manager, EVT_WARNING, + "opj_j2k_read_tlm(): invalid tile number %d\n", + l_tile_index); + return OPJ_TRUE; + } + + /* Read Ptlm_i */ + opj_read_bytes(p_header_data, &l_length, l_Ptlm_size); + p_header_data += l_Ptlm_size; + + l_tile_part_infos[l_tlm->m_entries_count].m_tile_index = + (OPJ_UINT16)l_tile_index; + l_tile_part_infos[l_tlm->m_entries_count].m_length = l_length; + ++l_tlm->m_entries_count; + } + return OPJ_TRUE; } @@ -4583,14 +4652,26 @@ static OPJ_BOOL opj_j2k_read_sot(opj_j2k_t *p_j2k, } /* Index */ - if (p_j2k->cstr_index) { + { assert(p_j2k->cstr_index->tile_index != 00); p_j2k->cstr_index->tile_index[p_j2k->m_current_tile_number].tileno = p_j2k->m_current_tile_number; p_j2k->cstr_index->tile_index[p_j2k->m_current_tile_number].current_tpsno = l_current_part; - if (l_num_parts != 0) { + if (!p_j2k->m_specific_param.m_decoder.m_tlm.m_is_invalid && + l_num_parts > + p_j2k->cstr_index->tile_index[p_j2k->m_current_tile_number].nb_tps) { + opj_event_msg(p_manager, EVT_WARNING, + "SOT marker for tile %u declares more tile-parts than found in TLM marker.", + p_j2k->m_current_tile_number); + p_j2k->m_specific_param.m_decoder.m_tlm.m_is_invalid = OPJ_TRUE; + } + + if (!p_j2k->m_specific_param.m_decoder.m_tlm.m_is_invalid) { + /* do nothing */ + } else if (l_num_parts != 0) { + p_j2k->cstr_index->tile_index[p_j2k->m_current_tile_number].nb_tps = l_num_parts; p_j2k->cstr_index->tile_index[p_j2k->m_current_tile_number].current_nb_tps = @@ -4661,33 +4742,6 @@ static OPJ_BOOL opj_j2k_read_sot(opj_j2k_t *p_j2k, } - /* FIXME move this onto a separate method to call before reading any SOT, remove part about main_end header, use a index struct inside p_j2k */ - /* if (p_j2k->cstr_info) { - if (l_tcp->first) { - if (tileno == 0) { - p_j2k->cstr_info->main_head_end = p_stream_tell(p_stream) - 13; - } - - p_j2k->cstr_info->tile[tileno].tileno = tileno; - p_j2k->cstr_info->tile[tileno].start_pos = p_stream_tell(p_stream) - 12; - p_j2k->cstr_info->tile[tileno].end_pos = p_j2k->cstr_info->tile[tileno].start_pos + totlen - 1; - p_j2k->cstr_info->tile[tileno].num_tps = numparts; - - if (numparts) { - p_j2k->cstr_info->tile[tileno].tp = (opj_tp_info_t *) opj_malloc(numparts * sizeof(opj_tp_info_t)); - } - else { - p_j2k->cstr_info->tile[tileno].tp = (opj_tp_info_t *) opj_malloc(10 * sizeof(opj_tp_info_t)); // Fixme (10) - } - } - else { - p_j2k->cstr_info->tile[tileno].end_pos += totlen; - } - - p_j2k->cstr_info->tile[tileno].tp[partno].tp_start_pos = p_stream_tell(p_stream) - 12; - p_j2k->cstr_info->tile[tileno].tp[partno].tp_end_pos = - p_j2k->cstr_info->tile[tileno].tp[partno].tp_start_pos + totlen - 1; - }*/ return OPJ_TRUE; } @@ -5023,7 +5077,7 @@ static OPJ_BOOL opj_j2k_read_sod(opj_j2k_t *p_j2k, /* Index */ l_cstr_index = p_j2k->cstr_index; - if (l_cstr_index) { + { OPJ_OFF_T l_current_pos = opj_stream_tell(p_stream) - 2; OPJ_UINT32 l_current_tile_part = @@ -5059,6 +5113,11 @@ static OPJ_BOOL opj_j2k_read_sod(opj_j2k_t *p_j2k, } if (l_current_read_size != p_j2k->m_specific_param.m_decoder.m_sot_length) { + if (l_current_read_size == (OPJ_SIZE_T)(-1)) { + /* Avoid issue of https://github.com/uclouvain/openjpeg/issues/1533 */ + opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n"); + return OPJ_FALSE; + } p_j2k->m_specific_param.m_decoder.m_state = J2K_STATE_NEOC; } else { p_j2k->m_specific_param.m_decoder.m_state = J2K_STATE_TPHSOT; @@ -6705,6 +6764,9 @@ void opj_j2k_decoder_set_strict_mode(opj_j2k_t *j2k, OPJ_BOOL strict) { if (j2k) { j2k->m_cp.strict = strict; + if (strict) { + j2k->m_specific_param.m_decoder.m_nb_tile_parts_correction_checked = 1; + } } } @@ -8251,7 +8313,14 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, tccp->qmfbid = parameters->irreversible ? 0 : 1; tccp->qntsty = parameters->irreversible ? J2K_CCP_QNTSTY_SEQNT : J2K_CCP_QNTSTY_NOQNT; - tccp->numgbits = 2; + + if (OPJ_IS_CINEMA(parameters->rsiz) && + parameters->rsiz == OPJ_PROFILE_CINEMA_2K) { + /* From https://github.com/uclouvain/openjpeg/issues/1340 */ + tccp->numgbits = 1; + } else { + tccp->numgbits = 2; + } if ((OPJ_INT32)i == parameters->roi_compno) { tccp->roishift = parameters->roi_shift; @@ -8390,7 +8459,8 @@ static OPJ_BOOL opj_j2k_add_tlmarker(OPJ_UINT32 tileno, if (type == J2K_MS_SOT) { OPJ_UINT32 l_current_tile_part = cstr_index->tile_index[tileno].current_tpsno; - if (cstr_index->tile_index[tileno].tp_index) { + if (cstr_index->tile_index[tileno].tp_index && + l_current_tile_part < cstr_index->tile_index[tileno].nb_tps) { cstr_index->tile_index[tileno].tp_index[l_current_tile_part].start_pos = pos; } @@ -8467,13 +8537,6 @@ OPJ_BOOL opj_j2k_read_header(opj_stream_private_t *p_stream, /* Copy codestream image information to the output image */ opj_copy_image_header(p_j2k->m_private_image, *p_image); - /*Allocate and initialize some elements of codestrem index*/ - if (!opj_j2k_allocate_tile_element_cstr_index(p_j2k)) { - opj_image_destroy(*p_image); - *p_image = NULL; - return OPJ_FALSE; - } - return OPJ_TRUE; } @@ -8825,6 +8888,87 @@ static OPJ_BOOL opj_j2k_decoding_validation(opj_j2k_t *p_j2k, return l_is_valid; } +/** Fill p_j2k->cstr_index->tp_index[].start_pos/end_pos fields from TLM marker segments */ +static void opj_j2k_build_tp_index_from_tlm(opj_j2k_t* p_j2k, + opj_event_mgr_t * p_manager) +{ + opj_j2k_tlm_info_t* l_tlm; + OPJ_UINT32 i; + OPJ_OFF_T l_cur_offset; + + assert(p_j2k->cstr_index->main_head_end > 0); + assert(p_j2k->cstr_index->nb_of_tiles > 0); + assert(p_j2k->cstr_index->tile_index != NULL); + + l_tlm = &(p_j2k->m_specific_param.m_decoder.m_tlm); + + if (l_tlm->m_entries_count == 0) { + l_tlm->m_is_invalid = OPJ_TRUE; + return; + } + + if (l_tlm->m_is_invalid) { + return; + } + + /* Initial pass to count the number of tile-parts per tile */ + for (i = 0; i < l_tlm->m_entries_count; ++i) { + OPJ_UINT32 l_tile_index_no = l_tlm->m_tile_part_infos[i].m_tile_index; + assert(l_tile_index_no < p_j2k->cstr_index->nb_of_tiles); + p_j2k->cstr_index->tile_index[l_tile_index_no].tileno = l_tile_index_no; + ++p_j2k->cstr_index->tile_index[l_tile_index_no].current_nb_tps; + } + + /* Now check that all tiles have at least one tile-part */ + for (i = 0; i < p_j2k->cstr_index->nb_of_tiles; ++i) { + if (p_j2k->cstr_index->tile_index[i].current_nb_tps == 0) { + opj_event_msg(p_manager, EVT_ERROR, + "opj_j2k_build_tp_index_from_tlm(): tile %d has no " + "registered tile-part in TLM marker segments.\n", i); + goto error; + } + } + + /* Final pass to fill p_j2k->cstr_index */ + l_cur_offset = p_j2k->cstr_index->main_head_end; + for (i = 0; i < l_tlm->m_entries_count; ++i) { + OPJ_UINT32 l_tile_index_no = l_tlm->m_tile_part_infos[i].m_tile_index; + opj_tile_index_t* l_tile_index = & + (p_j2k->cstr_index->tile_index[l_tile_index_no]); + if (!l_tile_index->tp_index) { + l_tile_index->tp_index = (opj_tp_index_t *) opj_calloc( + l_tile_index->current_nb_tps, sizeof(opj_tp_index_t)); + if (! l_tile_index->tp_index) { + opj_event_msg(p_manager, EVT_ERROR, + "opj_j2k_build_tp_index_from_tlm(): tile index allocation failed\n"); + goto error; + } + } + + assert(l_tile_index->nb_tps < l_tile_index->current_nb_tps); + l_tile_index->tp_index[l_tile_index->nb_tps].start_pos = l_cur_offset; + /* We don't know how to set the tp_index[].end_header field, but this is not really needed */ + /* If there would be no markers between SOT and SOD, that would be : */ + /* l_tile_index->tp_index[l_tile_index->nb_tps].end_header = l_cur_offset + 12; */ + l_tile_index->tp_index[l_tile_index->nb_tps].end_pos = l_cur_offset + + l_tlm->m_tile_part_infos[i].m_length; + ++l_tile_index->nb_tps; + + l_cur_offset += l_tlm->m_tile_part_infos[i].m_length; + } + + return; + +error: + l_tlm->m_is_invalid = OPJ_TRUE; + for (i = 0; i < l_tlm->m_entries_count; ++i) { + OPJ_UINT32 l_tile_index = l_tlm->m_tile_part_infos[i].m_tile_index; + p_j2k->cstr_index->tile_index[l_tile_index].current_nb_tps = 0; + opj_free(p_j2k->cstr_index->tile_index[l_tile_index].tp_index); + p_j2k->cstr_index->tile_index[l_tile_index].tp_index = NULL; + } +} + static OPJ_BOOL opj_j2k_read_header_procedure(opj_j2k_t *p_j2k, opj_stream_private_t *p_stream, opj_event_mgr_t * p_manager) @@ -9004,6 +9148,9 @@ static OPJ_BOOL opj_j2k_read_header_procedure(opj_j2k_t *p_j2k, /* Position of the last element if the main header */ p_j2k->cstr_index->main_head_end = (OPJ_UINT32) opj_stream_tell(p_stream) - 2; + /* Build tile-part index from TLM information */ + opj_j2k_build_tp_index_from_tlm(p_j2k, p_manager); + /* Next step: read a tile-part header */ p_j2k->m_specific_param.m_decoder.m_state = J2K_STATE_TPHSOT; @@ -9227,6 +9374,12 @@ void opj_j2k_destroy(opj_j2k_t *p_j2k) p_j2k->m_specific_param.m_decoder.m_comps_indices_to_decode = 00; p_j2k->m_specific_param.m_decoder.m_numcomps_to_decode = 0; + opj_free(p_j2k->m_specific_param.m_decoder.m_tlm.m_tile_part_infos); + p_j2k->m_specific_param.m_decoder.m_tlm.m_tile_part_infos = NULL; + + opj_free(p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset); + p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset = NULL; + } else { if (p_j2k->m_specific_param.m_encoder.m_encoded_tile_data) { @@ -9577,6 +9730,39 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, while ((!p_j2k->m_specific_param.m_decoder.m_can_decode) && (l_current_marker != J2K_MS_EOC)) { + if (p_j2k->m_specific_param.m_decoder.m_num_intersecting_tile_parts > 0 && + p_j2k->m_specific_param.m_decoder.m_idx_intersecting_tile_parts < + p_j2k->m_specific_param.m_decoder.m_num_intersecting_tile_parts) { + OPJ_OFF_T next_tp_sot_pos; + + next_tp_sot_pos = + p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset[p_j2k->m_specific_param.m_decoder.m_idx_intersecting_tile_parts]; + ++p_j2k->m_specific_param.m_decoder.m_idx_intersecting_tile_parts; + if (!(opj_stream_read_seek(p_stream, + next_tp_sot_pos, + p_manager))) { + opj_event_msg(p_manager, EVT_ERROR, "Problem with seek function\n"); + return OPJ_FALSE; + } + + /* Try to read 2 bytes (the marker ID) from stream and copy them into the buffer */ + if (opj_stream_read_data(p_stream, + p_j2k->m_specific_param.m_decoder.m_header_data, 2, p_manager) != 2) { + opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n"); + return OPJ_FALSE; + } + + /* Read 2 bytes from the buffer as the marker ID */ + opj_read_bytes(p_j2k->m_specific_param.m_decoder.m_header_data, + &l_current_marker, + 2); + + if (l_current_marker != J2K_MS_SOT) { + opj_event_msg(p_manager, EVT_ERROR, "Did not get expected SOT marker\n"); + return OPJ_FALSE; + } + } + /* Try to read until the Start Of Data is detected */ while (l_current_marker != J2K_MS_SOD) { @@ -9610,7 +9796,13 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, } /* Why this condition? FIXME */ - if (p_j2k->m_specific_param.m_decoder.m_state & J2K_STATE_TPH) { + if ((p_j2k->m_specific_param.m_decoder.m_state & J2K_STATE_TPH) && + p_j2k->m_specific_param.m_decoder.m_sot_length != 0) { + if (p_j2k->m_specific_param.m_decoder.m_sot_length < l_marker_size + 2) { + opj_event_msg(p_manager, EVT_ERROR, + "Sot length is less than marker size + marker ID\n"); + return OPJ_FALSE; + } p_j2k->m_specific_param.m_decoder.m_sot_length -= (l_marker_size + 2); } l_marker_size -= 2; /* Subtract the size of the marker ID already read */ @@ -9720,14 +9912,78 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, if (! opj_j2k_read_sod(p_j2k, p_stream, p_manager)) { return OPJ_FALSE; } + + /* Check if we can use the TLM index to access the next tile-part */ + if (!p_j2k->m_specific_param.m_decoder.m_can_decode && + p_j2k->m_specific_param.m_decoder.m_tile_ind_to_dec >= 0 && + p_j2k->m_current_tile_number == (OPJ_UINT32) + p_j2k->m_specific_param.m_decoder.m_tile_ind_to_dec && + !p_j2k->m_specific_param.m_decoder.m_tlm.m_is_invalid && + opj_stream_has_seek(p_stream)) { + l_tcp = p_j2k->m_cp.tcps + p_j2k->m_current_tile_number; + if (l_tcp->m_nb_tile_parts == + p_j2k->cstr_index->tile_index[p_j2k->m_current_tile_number].nb_tps && + (OPJ_UINT32)l_tcp->m_current_tile_part_number + 1 < l_tcp->m_nb_tile_parts) { + const OPJ_OFF_T next_tp_sot_pos = p_j2k->cstr_index->tile_index[ + p_j2k->m_current_tile_number].tp_index[l_tcp->m_current_tile_part_number + + 1].start_pos; + + if (next_tp_sot_pos != opj_stream_tell(p_stream)) { +#if 0 + opj_event_msg(p_manager, EVT_INFO, + "opj_j2k_read_tile_header(tile=%u): seek to tile part %u at %" PRId64 "\n", + p_j2k->m_current_tile_number, + l_tcp->m_current_tile_part_number + 1, + next_tp_sot_pos); +#endif + + if (!(opj_stream_read_seek(p_stream, + next_tp_sot_pos, + p_manager))) { + opj_event_msg(p_manager, EVT_ERROR, "Problem with seek function\n"); + return OPJ_FALSE; + } + } + + /* Try to read 2 bytes (the marker ID) from stream and copy them into the buffer */ + if (opj_stream_read_data(p_stream, + p_j2k->m_specific_param.m_decoder.m_header_data, 2, p_manager) != 2) { + opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n"); + return OPJ_FALSE; + } + + /* Read 2 bytes from the buffer as the marker ID */ + opj_read_bytes(p_j2k->m_specific_param.m_decoder.m_header_data, + &l_current_marker, + 2); + + if (l_current_marker != J2K_MS_SOT) { + opj_event_msg(p_manager, EVT_ERROR, "Did not get expected SOT marker\n"); + return OPJ_FALSE; + } + + continue; + } + } + if (p_j2k->m_specific_param.m_decoder.m_can_decode && !p_j2k->m_specific_param.m_decoder.m_nb_tile_parts_correction_checked) { /* Issue 254 */ - OPJ_BOOL l_correction_needed; + OPJ_BOOL l_correction_needed = OPJ_FALSE; p_j2k->m_specific_param.m_decoder.m_nb_tile_parts_correction_checked = 1; - if (!opj_j2k_need_nb_tile_parts_correction(p_stream, - p_j2k->m_current_tile_number, &l_correction_needed, p_manager)) { + if (p_j2k->m_cp.tcps[p_j2k->m_current_tile_number].m_nb_tile_parts == 1) { + /* Skip opj_j2k_need_nb_tile_parts_correction() if there is + * only a single tile part declared. The + * opj_j2k_need_nb_tile_parts_correction() hack was needed + * for files with 5 declared tileparts (where they were + * actually 6). + * Doing it systematically hurts performance when reading + * Sentinel2 L1C JPEG2000 files as explained in + * https://lists.osgeo.org/pipermail/gdal-dev/2024-November/059805.html + */ + } else if (!opj_j2k_need_nb_tile_parts_correction(p_stream, + p_j2k->m_current_tile_number, &l_correction_needed, p_manager)) { opj_event_msg(p_manager, EVT_ERROR, "opj_j2k_apply_nb_tile_parts_correction error\n"); return OPJ_FALSE; @@ -11303,6 +11559,17 @@ static void opj_j2k_dump_MH_index(opj_j2k_t* p_j2k, FILE* out_stream) OPJ_UINT32 l_acc_nb_of_tile_part = 0; for (it_tile = 0; it_tile < cstr_index->nb_of_tiles ; it_tile++) { l_acc_nb_of_tile_part += cstr_index->tile_index[it_tile].nb_tps; + + /* To avoid regenerating expected opj_dump results from the test */ + /* suite when there is a TLM marker present */ + if (cstr_index->tile_index[it_tile].nb_tps && + cstr_index->tile_index[it_tile].tp_index && + cstr_index->tile_index[it_tile].tp_index[0].start_pos > 0 && + cstr_index->tile_index[it_tile].tp_index[0].end_header == 0 && + getenv("OJP_DO_NOT_DISPLAY_TILE_INDEX_IF_TLM") != NULL) { + l_acc_nb_of_tile_part = 0; + break; + } } if (l_acc_nb_of_tile_part) { @@ -11666,6 +11933,18 @@ static OPJ_BOOL opj_j2k_are_all_used_components_decoded(opj_j2k_t *p_j2k, return OPJ_TRUE; } +static int CompareOffT(const void* a, const void* b) +{ + const OPJ_OFF_T offA = *(const OPJ_OFF_T*)a; + const OPJ_OFF_T offB = *(const OPJ_OFF_T*)b; + if (offA < offB) { + return -1; + } + if (offA == offB) { + return 0; + } + return 1; +} static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, opj_stream_private_t *p_stream, @@ -11676,6 +11955,7 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, OPJ_INT32 l_tile_x0, l_tile_y0, l_tile_x1, l_tile_y1; OPJ_UINT32 l_nb_comps; OPJ_UINT32 nr_tiles = 0; + OPJ_OFF_T end_pos = 0; /* Particular case for whole single tile decoding */ /* We can avoid allocating intermediate tile buffers */ @@ -11698,8 +11978,9 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, return OPJ_FALSE; } - if (! opj_j2k_decode_tile(p_j2k, l_current_tile_no, NULL, 0, - p_stream, p_manager)) { + if (!l_go_on || + ! opj_j2k_decode_tile(p_j2k, l_current_tile_no, NULL, 0, + p_stream, p_manager)) { opj_event_msg(p_manager, EVT_ERROR, "Failed to decode tile 1/1\n"); return OPJ_FALSE; } @@ -11717,6 +11998,77 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, return OPJ_TRUE; } + p_j2k->m_specific_param.m_decoder.m_num_intersecting_tile_parts = 0; + p_j2k->m_specific_param.m_decoder.m_idx_intersecting_tile_parts = 0; + opj_free(p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset); + p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset = NULL; + + /* If the area to decode only intersects a subset of tiles, and we have + * valid TLM information, then use it to plan the tilepart offsets to + * seek to. + */ + if (!(p_j2k->m_specific_param.m_decoder.m_start_tile_x == 0 && + p_j2k->m_specific_param.m_decoder.m_start_tile_y == 0 && + p_j2k->m_specific_param.m_decoder.m_end_tile_x == p_j2k->m_cp.tw && + p_j2k->m_specific_param.m_decoder.m_end_tile_y == p_j2k->m_cp.th) && + !p_j2k->m_specific_param.m_decoder.m_tlm.m_is_invalid && + opj_stream_has_seek(p_stream)) { + OPJ_UINT32 m_num_intersecting_tile_parts = 0; + + OPJ_UINT32 j; + for (j = 0; j < p_j2k->m_cp.tw * p_j2k->m_cp.th; ++j) { + if (p_j2k->cstr_index->tile_index[j].nb_tps > 0 && + p_j2k->cstr_index->tile_index[j].tp_index[ + p_j2k->cstr_index->tile_index[j].nb_tps - 1].end_pos > end_pos) { + end_pos = p_j2k->cstr_index->tile_index[j].tp_index[ + p_j2k->cstr_index->tile_index[j].nb_tps - 1].end_pos; + } + } + + for (j = p_j2k->m_specific_param.m_decoder.m_start_tile_y; + j < p_j2k->m_specific_param.m_decoder.m_end_tile_y; ++j) { + OPJ_UINT32 i; + for (i = p_j2k->m_specific_param.m_decoder.m_start_tile_x; + i < p_j2k->m_specific_param.m_decoder.m_end_tile_x; ++i) { + const OPJ_UINT32 tile_number = j * p_j2k->m_cp.tw + i; + m_num_intersecting_tile_parts += + p_j2k->cstr_index->tile_index[tile_number].nb_tps; + } + } + + p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset = + (OPJ_OFF_T*) + opj_malloc(m_num_intersecting_tile_parts * sizeof(OPJ_OFF_T)); + if (m_num_intersecting_tile_parts > 0 && + p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset) { + OPJ_UINT32 idx = 0; + for (j = p_j2k->m_specific_param.m_decoder.m_start_tile_y; + j < p_j2k->m_specific_param.m_decoder.m_end_tile_y; ++j) { + OPJ_UINT32 i; + for (i = p_j2k->m_specific_param.m_decoder.m_start_tile_x; + i < p_j2k->m_specific_param.m_decoder.m_end_tile_x; ++i) { + const OPJ_UINT32 tile_number = j * p_j2k->m_cp.tw + i; + OPJ_UINT32 k; + for (k = 0; k < p_j2k->cstr_index->tile_index[tile_number].nb_tps; ++k) { + const OPJ_OFF_T next_tp_sot_pos = + p_j2k->cstr_index->tile_index[tile_number].tp_index[k].start_pos; + p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset[idx] = + next_tp_sot_pos; + ++idx; + } + } + } + + p_j2k->m_specific_param.m_decoder.m_num_intersecting_tile_parts = idx; + + /* Sort by increasing offset */ + qsort(p_j2k->m_specific_param.m_decoder.m_intersecting_tile_parts_offset, + p_j2k->m_specific_param.m_decoder.m_num_intersecting_tile_parts, + sizeof(OPJ_OFF_T), + CompareOffT); + } + } + for (;;) { if (p_j2k->m_cp.tw == 1 && p_j2k->m_cp.th == 1 && p_j2k->m_cp.tcps[0].m_data != NULL) { @@ -11776,6 +12128,12 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, if (++nr_tiles == p_j2k->m_cp.th * p_j2k->m_cp.tw) { break; } + if (p_j2k->m_specific_param.m_decoder.m_num_intersecting_tile_parts > 0 && + p_j2k->m_specific_param.m_decoder.m_idx_intersecting_tile_parts == + p_j2k->m_specific_param.m_decoder.m_num_intersecting_tile_parts) { + opj_stream_seek(p_stream, end_pos + 2, p_manager); + break; + } } if (! opj_j2k_are_all_used_components_decoded(p_j2k, p_manager)) { @@ -11819,12 +12177,6 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k, OPJ_UINT32 l_nb_tiles; OPJ_UINT32 i; - /*Allocate and initialize some elements of codestrem index if not already done*/ - if (!p_j2k->cstr_index->tile_index) { - if (!opj_j2k_allocate_tile_element_cstr_index(p_j2k)) { - return OPJ_FALSE; - } - } /* Move into the codestream to the first SOT used to decode the desired tile */ l_tile_no_to_dec = (OPJ_UINT32) p_j2k->m_specific_param.m_decoder.m_tile_ind_to_dec; @@ -11839,12 +12191,38 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k, return OPJ_FALSE; } } else { + OPJ_OFF_T sot_pos = + p_j2k->cstr_index->tile_index[l_tile_no_to_dec].tp_index[0].start_pos; + OPJ_UINT32 l_marker; + +#if 0 + opj_event_msg(p_manager, EVT_INFO, + "opj_j2k_decode_one_tile(%u): seek to %" PRId64 "\n", + l_tile_no_to_dec, + sot_pos); +#endif if (!(opj_stream_read_seek(p_stream, - p_j2k->cstr_index->tile_index[l_tile_no_to_dec].tp_index[0].start_pos + 2, + sot_pos, p_manager))) { opj_event_msg(p_manager, EVT_ERROR, "Problem with seek function\n"); return OPJ_FALSE; } + + /* Try to read 2 bytes (the marker ID) from stream and copy them into the buffer */ + if (opj_stream_read_data(p_stream, + p_j2k->m_specific_param.m_decoder.m_header_data, 2, p_manager) != 2) { + opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n"); + return OPJ_FALSE; + } + + /* Read 2 bytes from the buffer as the marker ID */ + opj_read_bytes(p_j2k->m_specific_param.m_decoder.m_header_data, &l_marker, + 2); + + if (l_marker != J2K_MS_SOT) { + opj_event_msg(p_manager, EVT_ERROR, "Did not get expected SOT marker\n"); + return OPJ_FALSE; + } } /* Special case if we have previously read the EOC marker (if the previous tile getted is the last ) */ if (p_j2k->m_specific_param.m_decoder.m_state == J2K_STATE_EOC) { diff --git a/contrib/libs/openjpeg/j2k.h b/contrib/libs/openjpeg/j2k.h index e0b9688a353..bcf70a419c6 100644 --- a/contrib/libs/openjpeg/j2k.h +++ b/contrib/libs/openjpeg/j2k.h @@ -466,6 +466,24 @@ typedef struct opj_cp { /* <<UniPG */ } opj_cp_t; +/** Entry of a TLM marker segment */ +typedef struct opj_j2k_tlm_tile_part_info { + /** Tile index of the tile part. Ttlmi field */ + OPJ_UINT16 m_tile_index; + /** Length in bytes, from the beginning of the SOT marker to the end of + * the bit stream data for that tile-part. Ptlmi field */ + OPJ_UINT32 m_length; +} opj_j2k_tlm_tile_part_info_t; + +/** Information got from the concatenation of TLM marker semgnets. */ +typedef struct opj_j2k_tlm_info { + /** Number of entries in m_tile_part_infos. */ + OPJ_UINT32 m_entries_count; + /** Array of m_entries_count values. */ + opj_j2k_tlm_tile_part_info_t* m_tile_part_infos; + + OPJ_BOOL m_is_invalid; +} opj_j2k_tlm_info_t; typedef struct opj_j2k_dec { /** locate in which part of the codestream the decoder is (main header, tile header, end) */ @@ -499,6 +517,18 @@ typedef struct opj_j2k_dec { OPJ_UINT32 m_numcomps_to_decode; OPJ_UINT32 *m_comps_indices_to_decode; + opj_j2k_tlm_info_t m_tlm; + + /** Below if used when there's TLM information available and we use + * opj_set_decoded_area() to a subset of all tiles. + */ + /* Current index in m_intersecting_tile_parts_offset[] to seek to */ + OPJ_UINT32 m_idx_intersecting_tile_parts; + /* Number of elements of m_intersecting_tile_parts_offset[] */ + OPJ_UINT32 m_num_intersecting_tile_parts; + /* Start offset of contributing tile parts */ + OPJ_OFF_T* m_intersecting_tile_parts_offset; + /** to tell that a tile can be decoded. */ OPJ_BITFIELD m_can_decode : 1; OPJ_BITFIELD m_discard_tiles : 1; diff --git a/contrib/libs/openjpeg/jp2.c b/contrib/libs/openjpeg/jp2.c index 6015190e1f5..4df055a542a 100644 --- a/contrib/libs/openjpeg/jp2.c +++ b/contrib/libs/openjpeg/jp2.c @@ -1989,12 +1989,16 @@ OPJ_BOOL opj_jp2_setup_encoder(opj_jp2_t *jp2, jp2->enumcs = 0; } else { jp2->meth = 1; - if (image->color_space == 1) { + if (image->color_space == OPJ_CLRSPC_SRGB) { jp2->enumcs = 16; /* sRGB as defined by IEC 61966-2-1 */ - } else if (image->color_space == 2) { - jp2->enumcs = 17; /* greyscale */ - } else if (image->color_space == 3) { + } else if (image->color_space == OPJ_CLRSPC_GRAY) { + jp2->enumcs = 17; + } else if (image->color_space == OPJ_CLRSPC_SYCC) { jp2->enumcs = 18; /* YUV */ + } else if (image->color_space == OPJ_CLRSPC_EYCC) { + jp2->enumcs = 24; + } else if (image->color_space == OPJ_CLRSPC_CMYK) { + jp2->enumcs = 12; } } diff --git a/contrib/libs/openjpeg/openjpeg.h b/contrib/libs/openjpeg/openjpeg.h index 67d168bb578..59abd323aed 100644 --- a/contrib/libs/openjpeg/openjpeg.h +++ b/contrib/libs/openjpeg/openjpeg.h @@ -546,7 +546,7 @@ typedef struct opj_cparameters { } opj_cparameters_t; #define OPJ_DPARAMETERS_IGNORE_PCLR_CMAP_CDEF_FLAG 0x0001 -#define OPJ_DPARAMETERS_DUMP_FLAG 0x0002 +#define OPJ_DPARAMETERS_DUMP_FLAG 0x0002 /** * Decompression parameters @@ -772,7 +772,7 @@ typedef struct opj_packet_info { OPJ_OFF_T end_ph_pos; /** packet end position */ OPJ_OFF_T end_pos; - /** packet distorsion */ + /** packet distortion */ double disto; } opj_packet_info_t; @@ -1348,9 +1348,13 @@ OPJ_API OPJ_BOOL OPJ_CALLCONV opj_setup_decoder(opj_codec_t *p_codec, opj_dparameters_t *parameters); /** - * Set strict decoding parameter for this decoder. If strict decoding is enabled, partial bit - * streams will fail to decode. If strict decoding is disabled, the decoder will decode partial - * bitstreams as much as possible without erroring + * Set strict decoding parameter for this decoder. + * If strict decoding is enabled, partial bit streams will fail to decode, and + * the check for invalid TPSOT values added in https://github.com/uclouvain/openjpeg/pull/514 + * will be disabled. + * If strict decoding is disabled, the decoder will decode partial + * bitstreams as much as possible without erroring, and the TPSOT fixing logic + * will be enabled. * * @param p_codec decompressor handler * @param strict OPJ_TRUE to enable strict decoding, OPJ_FALSE to disable diff --git a/contrib/libs/openjpeg/opj_common.h b/contrib/libs/openjpeg/opj_common.h index ee8adf4725c..2923a35b7fe 100644 --- a/contrib/libs/openjpeg/opj_common.h +++ b/contrib/libs/openjpeg/opj_common.h @@ -28,8 +28,8 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef OPJ_COMMMON_H -#define OPJ_COMMMON_H +#ifndef OPJ_COMMON_H +#define OPJ_COMMON_H /* ========================================================== @@ -44,4 +44,4 @@ #define OPJ_COMP_PARAM_DEFAULT_PROG_ORDER OPJ_LRCP #define OPJ_COMP_PARAM_DEFAULT_NUMRESOLUTION 6 -#endif /* OPJ_COMMMON_H */ +#endif /* OPJ_COMMON_H */ diff --git a/contrib/libs/openjpeg/opj_config.h b/contrib/libs/openjpeg/opj_config.h index e495440f4fb..50d156357da 100644 --- a/contrib/libs/openjpeg/opj_config.h +++ b/contrib/libs/openjpeg/opj_config.h @@ -9,6 +9,6 @@ /* Version number. */ #define OPJ_VERSION_MAJOR 2 #define OPJ_VERSION_MINOR 5 -#define OPJ_VERSION_BUILD 2 +#define OPJ_VERSION_BUILD 3 #endif diff --git a/contrib/libs/openjpeg/opj_config_private-linux.h b/contrib/libs/openjpeg/opj_config_private-linux.h index 81a15df7f27..8076e7d32a6 100644 --- a/contrib/libs/openjpeg/opj_config_private-linux.h +++ b/contrib/libs/openjpeg/opj_config_private-linux.h @@ -1,6 +1,6 @@ /* create opj_config_private.h for CMake */ -#define OPJ_PACKAGE_VERSION "2.5.2" +#define OPJ_PACKAGE_VERSION "2.5.3" /* Not used by openjp2*/ /*#define HAVE_MEMORY_H 1*/ diff --git a/contrib/libs/openjpeg/t1.c b/contrib/libs/openjpeg/t1.c index 52e466eb974..98dce47f556 100644 --- a/contrib/libs/openjpeg/t1.c +++ b/contrib/libs/openjpeg/t1.c @@ -47,6 +47,9 @@ #ifdef __SSE2__ #include <emmintrin.h> #endif +#if (defined(__AVX2__) || defined(__AVX512F__)) +#include <immintrin.h> +#endif #if defined(__GNUC__) #pragma GCC poison malloc calloc realloc free @@ -1796,6 +1799,39 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x]; for (j = 0; j < cblk_h; ++j) { + //positive -> round down aka. (83)/2 = 41.5 -> 41 + //negative -> round up aka. (-83)/2 = -41.5 -> -41 +#if defined(__AVX512F__) + OPJ_INT32* ptr_in = datap + (j * cblk_w); + OPJ_INT32* ptr_out = tiledp + (j * (OPJ_SIZE_T)tile_w); + for (i = 0; i < cblk_w / 16; ++i) { + __m512i in_avx = _mm512_loadu_si512((__m512i*)(ptr_in)); + const __m512i add_avx = _mm512_srli_epi32(in_avx, 31); + in_avx = _mm512_add_epi32(in_avx, add_avx); + _mm512_storeu_si512((__m512i*)(ptr_out), _mm512_srai_epi32(in_avx, 1)); + ptr_in += 16; + ptr_out += 16; + } + + for (i = 0; i < cblk_w % 16; ++i) { + ptr_out[i] = ptr_in[i] / 2; + } +#elif defined(__AVX2__) + OPJ_INT32* ptr_in = datap + (j * cblk_w); + OPJ_INT32* ptr_out = tiledp + (j * (OPJ_SIZE_T)tile_w); + for (i = 0; i < cblk_w / 8; ++i) { + __m256i in_avx = _mm256_loadu_si256((__m256i*)(ptr_in)); + const __m256i add_avx = _mm256_srli_epi32(in_avx, 31); + in_avx = _mm256_add_epi32(in_avx, add_avx); + _mm256_storeu_si256((__m256i*)(ptr_out), _mm256_srai_epi32(in_avx, 1)); + ptr_in += 8; + ptr_out += 8; + } + + for (i = 0; i < cblk_w % 8; ++i) { + ptr_out[i] = ptr_in[i] / 2; + } +#else i = 0; for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) { OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U]; @@ -1811,6 +1847,7 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) OPJ_INT32 tmp = datap[(j * cblk_w) + i]; ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2; } +#endif } } else { /* if (tccp->qmfbid == 0) */ const float stepsize = 0.5f * band->stepsize; @@ -2006,10 +2043,16 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3); opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4); + if (cblk->corrupted) { + assert(cblk->numchunks == 0); + return OPJ_TRUE; + } + /* Even if we have a single chunk, in multi-threaded decoding */ /* the insertion of our synthetic marker might potentially override */ /* valid codestream of other codeblocks decoded in parallel. */ - if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) { + if (cblk->numchunks > 1 || (t1->mustuse_cblkdatabuffer && + cblk->numchunks > 0)) { OPJ_UINT32 i; OPJ_UINT32 cblk_len; @@ -2124,7 +2167,7 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, opj_mutex_lock(p_manager_mutex); } opj_event_msg(p_manager, EVT_WARNING, - "PTERM check failure: %d synthetized 0xFF markers read\n", + "PTERM check failure: %d synthesized 0xFF markers read\n", mqc->end_of_byte_stream_counter); if (p_manager_mutex) { opj_mutex_unlock(p_manager_mutex); @@ -2227,6 +2270,111 @@ static void opj_t1_cblk_encode_processor(void* user_data, opj_tls_t* tls) OPJ_UINT32* OPJ_RESTRICT t1data = (OPJ_UINT32*) t1->data; /* Change from "natural" order to "zigzag" order of T1 passes */ for (j = 0; j < (cblk_h & ~3U); j += 4) { +#if defined(__AVX512F__) + const __m512i perm1 = _mm512_setr_epi64(2, 3, 10, 11, 4, 5, 12, 13); + const __m512i perm2 = _mm512_setr_epi64(6, 7, 14, 15, 0, 0, 0, 0); + OPJ_UINT32* ptr = tiledp_u; + for (i = 0; i < cblk_w / 16; ++i) { + // INPUT OUTPUT + // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + // 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 + // 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F 08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + // 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F 0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F + __m512i in1 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr + + (j + 0) * tile_w)), T1_NMSEDEC_FRACBITS); + __m512i in2 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr + + (j + 1) * tile_w)), T1_NMSEDEC_FRACBITS); + __m512i in3 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr + + (j + 2) * tile_w)), T1_NMSEDEC_FRACBITS); + __m512i in4 = _mm512_slli_epi32(_mm512_loadu_si512((__m512i*)(ptr + + (j + 3) * tile_w)), T1_NMSEDEC_FRACBITS); + + __m512i tmp1 = _mm512_unpacklo_epi32(in1, in2); + __m512i tmp2 = _mm512_unpacklo_epi32(in3, in4); + __m512i tmp3 = _mm512_unpackhi_epi32(in1, in2); + __m512i tmp4 = _mm512_unpackhi_epi32(in3, in4); + + in1 = _mm512_unpacklo_epi64(tmp1, tmp2); + in2 = _mm512_unpacklo_epi64(tmp3, tmp4); + in3 = _mm512_unpackhi_epi64(tmp1, tmp2); + in4 = _mm512_unpackhi_epi64(tmp3, tmp4); + + _mm_storeu_si128((__m128i*)(t1data + 0), _mm512_castsi512_si128(in1)); + _mm_storeu_si128((__m128i*)(t1data + 4), _mm512_castsi512_si128(in3)); + _mm_storeu_si128((__m128i*)(t1data + 8), _mm512_castsi512_si128(in2)); + _mm_storeu_si128((__m128i*)(t1data + 12), _mm512_castsi512_si128(in4)); + + tmp1 = _mm512_permutex2var_epi64(in1, perm1, in3); + tmp2 = _mm512_permutex2var_epi64(in2, perm1, in4); + + _mm256_storeu_si256((__m256i*)(t1data + 16), _mm512_castsi512_si256(tmp1)); + _mm256_storeu_si256((__m256i*)(t1data + 24), _mm512_castsi512_si256(tmp2)); + _mm256_storeu_si256((__m256i*)(t1data + 32), _mm512_extracti64x4_epi64(tmp1, + 0x1)); + _mm256_storeu_si256((__m256i*)(t1data + 40), _mm512_extracti64x4_epi64(tmp2, + 0x1)); + _mm256_storeu_si256((__m256i*)(t1data + 48), + _mm512_castsi512_si256(_mm512_permutex2var_epi64(in1, perm2, in3))); + _mm256_storeu_si256((__m256i*)(t1data + 56), + _mm512_castsi512_si256(_mm512_permutex2var_epi64(in2, perm2, in4))); + t1data += 64; + ptr += 16; + } + for (i = 0; i < cblk_w % 16; ++i) { + t1data[0] = ptr[(j + 0) * tile_w] << T1_NMSEDEC_FRACBITS; + t1data[1] = ptr[(j + 1) * tile_w] << T1_NMSEDEC_FRACBITS; + t1data[2] = ptr[(j + 2) * tile_w] << T1_NMSEDEC_FRACBITS; + t1data[3] = ptr[(j + 3) * tile_w] << T1_NMSEDEC_FRACBITS; + t1data += 4; + ptr += 1; + } +#elif defined(__AVX2__) + OPJ_UINT32* ptr = tiledp_u; + for (i = 0; i < cblk_w / 8; ++i) { + // INPUT OUTPUT + // 00 01 02 03 04 05 06 07 00 10 20 30 01 11 21 31 + // 10 11 12 13 14 15 16 17 02 12 22 32 03 13 23 33 + // 20 21 22 23 24 25 26 27 04 14 24 34 05 15 25 35 + // 30 31 32 33 34 35 36 37 06 16 26 36 07 17 27 37 + __m256i in1 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr + + (j + 0) * tile_w)), T1_NMSEDEC_FRACBITS); + __m256i in2 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr + + (j + 1) * tile_w)), T1_NMSEDEC_FRACBITS); + __m256i in3 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr + + (j + 2) * tile_w)), T1_NMSEDEC_FRACBITS); + __m256i in4 = _mm256_slli_epi32(_mm256_loadu_si256((__m256i*)(ptr + + (j + 3) * tile_w)), T1_NMSEDEC_FRACBITS); + + __m256i tmp1 = _mm256_unpacklo_epi32(in1, in2); + __m256i tmp2 = _mm256_unpacklo_epi32(in3, in4); + __m256i tmp3 = _mm256_unpackhi_epi32(in1, in2); + __m256i tmp4 = _mm256_unpackhi_epi32(in3, in4); + + in1 = _mm256_unpacklo_epi64(tmp1, tmp2); + in2 = _mm256_unpacklo_epi64(tmp3, tmp4); + in3 = _mm256_unpackhi_epi64(tmp1, tmp2); + in4 = _mm256_unpackhi_epi64(tmp3, tmp4); + + _mm_storeu_si128((__m128i*)(t1data + 0), _mm256_castsi256_si128(in1)); + _mm_storeu_si128((__m128i*)(t1data + 4), _mm256_castsi256_si128(in3)); + _mm_storeu_si128((__m128i*)(t1data + 8), _mm256_castsi256_si128(in2)); + _mm_storeu_si128((__m128i*)(t1data + 12), _mm256_castsi256_si128(in4)); + _mm256_storeu_si256((__m256i*)(t1data + 16), _mm256_permute2x128_si256(in1, in3, + 0x31)); + _mm256_storeu_si256((__m256i*)(t1data + 24), _mm256_permute2x128_si256(in2, in4, + 0x31)); + t1data += 32; + ptr += 8; + } + for (i = 0; i < cblk_w % 8; ++i) { + t1data[0] = ptr[(j + 0) * tile_w] << T1_NMSEDEC_FRACBITS; + t1data[1] = ptr[(j + 1) * tile_w] << T1_NMSEDEC_FRACBITS; + t1data[2] = ptr[(j + 2) * tile_w] << T1_NMSEDEC_FRACBITS; + t1data[3] = ptr[(j + 3) * tile_w] << T1_NMSEDEC_FRACBITS; + t1data += 4; + ptr += 1; + } +#else for (i = 0; i < cblk_w; ++i) { t1data[0] = tiledp_u[(j + 0) * tile_w + i] << T1_NMSEDEC_FRACBITS; t1data[1] = tiledp_u[(j + 1) * tile_w + i] << T1_NMSEDEC_FRACBITS; @@ -2234,6 +2382,7 @@ static void opj_t1_cblk_encode_processor(void* user_data, opj_tls_t* tls) t1data[3] = tiledp_u[(j + 3) * tile_w + i] << T1_NMSEDEC_FRACBITS; t1data += 4; } +#endif } if (j < cblk_h) { for (i = 0; i < cblk_w; ++i) { diff --git a/contrib/libs/openjpeg/t2.c b/contrib/libs/openjpeg/t2.c index 781a6a59a16..4e8cf601828 100644 --- a/contrib/libs/openjpeg/t2.c +++ b/contrib/libs/openjpeg/t2.c @@ -1111,6 +1111,7 @@ static OPJ_BOOL opj_t2_read_packet_header(opj_t2_t* p_t2, /* SOP markers */ if (p_tcp->csty & J2K_CP_CSTY_SOP) { + /* SOP markers are allowed (i.e. optional), just warn */ if (p_max_length < 6) { opj_event_msg(p_manager, EVT_WARNING, "Not enough space for expected SOP marker\n"); @@ -1163,12 +1164,15 @@ static OPJ_BOOL opj_t2_read_packet_header(opj_t2_t* p_t2, /* EPH markers */ if (p_tcp->csty & J2K_CP_CSTY_EPH) { + /* EPH markers are required */ if ((*l_modified_length_ptr - (OPJ_UINT32)(l_header_data - *l_header_data_start)) < 2U) { - opj_event_msg(p_manager, EVT_WARNING, - "Not enough space for expected EPH marker\n"); + opj_event_msg(p_manager, EVT_ERROR, + "Not enough space for required EPH marker\n"); + return OPJ_FALSE; } else if ((*l_header_data) != 0xff || (*(l_header_data + 1) != 0x92)) { - opj_event_msg(p_manager, EVT_WARNING, "Expected EPH marker\n"); + opj_event_msg(p_manager, EVT_ERROR, "Expected EPH marker\n"); + return OPJ_FALSE; } else { l_header_data += 2; } @@ -1340,12 +1344,15 @@ static OPJ_BOOL opj_t2_read_packet_header(opj_t2_t* p_t2, /* EPH markers */ if (p_tcp->csty & J2K_CP_CSTY_EPH) { + /* EPH markers are required */ if ((*l_modified_length_ptr - (OPJ_UINT32)(l_header_data - *l_header_data_start)) < 2U) { - opj_event_msg(p_manager, EVT_WARNING, - "Not enough space for expected EPH marker\n"); + opj_event_msg(p_manager, EVT_ERROR, + "Not enough space for required EPH marker\n"); + return OPJ_FALSE; } else if ((*l_header_data) != 0xff || (*(l_header_data + 1) != 0x92)) { - opj_event_msg(p_manager, EVT_WARNING, "Expected EPH marker\n"); + opj_event_msg(p_manager, EVT_ERROR, "Expected EPH marker\n"); + return OPJ_FALSE; } else { l_header_data += 2; } @@ -1353,6 +1360,9 @@ static OPJ_BOOL opj_t2_read_packet_header(opj_t2_t* p_t2, l_header_length = (OPJ_UINT32)(l_header_data - *l_header_data_start); JAS_FPRINTF(stderr, "hdrlen=%d \n", l_header_length); + if (!l_header_length) { + return OPJ_FALSE; + } JAS_FPRINTF(stderr, "packet body\n"); *l_modified_length_ptr -= l_header_length; *l_header_data_start += l_header_length; @@ -1404,18 +1414,21 @@ static OPJ_BOOL opj_t2_read_packet_data(opj_t2_t* p_t2, l_nb_code_blocks = l_prc->cw * l_prc->ch; l_cblk = l_prc->cblks.dec; - for (cblkno = 0; cblkno < l_nb_code_blocks; ++cblkno) { + for (cblkno = 0; cblkno < l_nb_code_blocks; ++cblkno, ++l_cblk) { opj_tcd_seg_t *l_seg = 00; - // if we have a partial data stream, set numchunks to zero - // since we have no data to actually decode. - if (partial_buffer) { - l_cblk->numchunks = 0; - } - if (!l_cblk->numnewpasses) { /* nothing to do */ - ++l_cblk; + continue; + } + + if (partial_buffer || l_cblk->corrupted) { + /* if a previous segment in this packet couldn't be decoded, + * or if this code block was corrupted in a previous layer, + * then mark it as corrupted. + */ + l_cblk->numchunks = 0; + l_cblk->corrupted = OPJ_TRUE; continue; } @@ -1448,18 +1461,13 @@ static OPJ_BOOL opj_t2_read_packet_data(opj_t2_t* p_t2, "read: segment too long (%d) with max (%d) for codeblock %d (p=%d, b=%d, r=%d, c=%d)\n", l_seg->newlen, p_max_length, cblkno, p_pi->precno, bandno, p_pi->resno, p_pi->compno); - // skip this codeblock since it is a partial read + /* skip this codeblock (and following ones in this + * packet) since it is a partial read + */ partial_buffer = OPJ_TRUE; + l_cblk->corrupted = OPJ_TRUE; l_cblk->numchunks = 0; - - l_seg->numpasses += l_seg->numnewpasses; - l_cblk->numnewpasses -= l_seg->numnewpasses; - if (l_cblk->numnewpasses > 0) { - ++l_seg; - ++l_cblk->numsegs; - break; - } - continue; + break; } } @@ -1516,7 +1524,7 @@ static OPJ_BOOL opj_t2_read_packet_data(opj_t2_t* p_t2, } while (l_cblk->numnewpasses > 0); l_cblk->real_num_segs = l_cblk->numsegs; - ++l_cblk; + } /* next code_block */ ++l_band; @@ -1600,6 +1608,8 @@ static OPJ_BOOL opj_t2_skip_packet_data(opj_t2_t* p_t2, "skip: segment too long (%d) with max (%d) for codeblock %d (p=%d, b=%d, r=%d, c=%d)\n", l_seg->newlen, p_max_length, cblkno, p_pi->precno, bandno, p_pi->resno, p_pi->compno); + + *p_data_read = p_max_length; return OPJ_TRUE; } } diff --git a/contrib/libs/openjpeg/tcd.c b/contrib/libs/openjpeg/tcd.c index 687aa61bb09..8ca259b71dc 100644 --- a/contrib/libs/openjpeg/tcd.c +++ b/contrib/libs/openjpeg/tcd.c @@ -243,7 +243,7 @@ void opj_tcd_rateallocate_fixed(opj_tcd_t *tcd) /* ----------------------------------------------------------------------- */ /** Returns OPJ_TRUE if the layer allocation is unchanged w.r.t to the previous - * invokation with a different threshold */ + * invocation with a different threshold */ static OPJ_BOOL opj_tcd_makelayer(opj_tcd_t *tcd, OPJ_UINT32 layno, @@ -2861,12 +2861,12 @@ OPJ_BOOL opj_tcd_is_subband_area_of_interest(opj_tcd_t *tcd, return intersects; } -/** Returns whether a tile componenent is fully decoded, taking into account +/** Returns whether a tile component is fully decoded, taking into account * p_tcd->win_* members. * * @param p_tcd TCD handle. * @param compno Component number - * @return OPJ_TRUE whether the tile componenent is fully decoded + * @return OPJ_TRUE whether the tile component is fully decoded */ static OPJ_BOOL opj_tcd_is_whole_tilecomp_decoding(opj_tcd_t *p_tcd, OPJ_UINT32 compno) diff --git a/contrib/libs/openjpeg/tcd.h b/contrib/libs/openjpeg/tcd.h index f659869a134..3371b08cb27 100644 --- a/contrib/libs/openjpeg/tcd.h +++ b/contrib/libs/openjpeg/tcd.h @@ -141,6 +141,7 @@ typedef struct opj_tcd_cblk_dec { OPJ_UINT32 numchunksalloc; /* Number of chunks item allocated */ /* Decoded code-block. Only used for subtile decoding. Otherwise tilec->data is directly updated */ OPJ_INT32* decoded_data; + OPJ_BOOL corrupted; /* whether the code block data is corrupted */ } opj_tcd_cblk_dec_t; /** Precinct structure */ @@ -312,7 +313,7 @@ typedef struct opj_tcd_marker_info { /** Dump the content of a tcd structure */ -/*void tcd_dump(FILE *fd, opj_tcd_t *tcd, opj_tcd_image_t *img);*/ /* TODO MSD shoul use the new v2 structures */ +/*void tcd_dump(FILE *fd, opj_tcd_t *tcd, opj_tcd_image_t *img);*/ /* TODO MSD should use the new v2 structures */ /** Create a new TCD handle @@ -443,7 +444,7 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd, OPJ_SIZE_T opj_tcd_get_encoder_input_buffer_size(opj_tcd_t *p_tcd); /** - * Initialize the tile coder and may reuse some meory. + * Initialize the tile coder and may reuse some memory. * * @param p_tcd TCD handle. * @param p_tile_no current tile index to encode. @@ -491,7 +492,7 @@ void opj_tcd_reinit_segment(opj_tcd_seg_t* seg); * @param y0 Upper left y in subband coordinates * @param x1 Lower right x in subband coordinates * @param y1 Lower right y in subband coordinates - * @return OPJ_TRUE whether the sub-band region contributs to the area of + * @return OPJ_TRUE whether the sub-band region contributes to the area of * interest. */ OPJ_BOOL opj_tcd_is_subband_area_of_interest(opj_tcd_t *tcd, diff --git a/contrib/libs/openjpeg/ya.make b/contrib/libs/openjpeg/ya.make index e065d61445b..ebcca46a5fc 100644 --- a/contrib/libs/openjpeg/ya.make +++ b/contrib/libs/openjpeg/ya.make @@ -9,9 +9,9 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -VERSION(2.5.2) +VERSION(2.5.3) -ORIGINAL_SOURCE(https://github.com/uclouvain/openjpeg/archive/v2.5.2.tar.gz) +ORIGINAL_SOURCE(https://github.com/uclouvain/openjpeg/archive/v2.5.3.tar.gz) ADDINCL( contrib/libs/openjpeg diff --git a/contrib/python/executing/.dist-info/METADATA b/contrib/python/executing/.dist-info/METADATA index 45ff9aa8813..6e4ac6516d3 100644 --- a/contrib/python/executing/.dist-info/METADATA +++ b/contrib/python/executing/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: executing -Version: 2.1.0 +Version: 2.2.0 Summary: Get the currently executing AST node of a frame, and other information Home-page: https://github.com/alexmojaki/executing Author: Alex Hall @@ -42,7 +42,7 @@ This mini-package lets you get information about what a frame is currently doing * [How does it work?](#how-does-it-work) * [Is it reliable?](#is-it-reliable) * [Which nodes can it identify?](#which-nodes-can-it-identify) -* [Libraries that use this](#libraries-that-use-this) +* [Projects that use this](#projects-that-use-this) ## Usage diff --git a/contrib/python/executing/README.md b/contrib/python/executing/README.md index 1b4dbd8f074..61943dd7f54 100644 --- a/contrib/python/executing/README.md +++ b/contrib/python/executing/README.md @@ -13,7 +13,7 @@ This mini-package lets you get information about what a frame is currently doing * [How does it work?](#how-does-it-work) * [Is it reliable?](#is-it-reliable) * [Which nodes can it identify?](#which-nodes-can-it-identify) -* [Libraries that use this](#libraries-that-use-this) +* [Projects that use this](#projects-that-use-this) ## Usage diff --git a/contrib/python/executing/executing/__init__.py b/contrib/python/executing/executing/__init__.py index b6451973917..e5181a5c326 100644 --- a/contrib/python/executing/executing/__init__.py +++ b/contrib/python/executing/executing/__init__.py @@ -10,6 +10,9 @@ Get information about what a frame is currently doing. Typical usage: from collections import namedtuple _VersionInfo = namedtuple('_VersionInfo', ('major', 'minor', 'micro')) from .executing import Source, Executing, only, NotOneValueFound, cache, future_flags + +from ._pytest_utils import is_pytest_compatible + try: from .version import __version__ # type: ignore[import] if "dev" in __version__: @@ -22,4 +25,4 @@ else: __version_info__ = _VersionInfo(*map(int, __version__.split('.'))) -__all__ = ["Source"] +__all__ = ["Source","is_pytest_compatible"] diff --git a/contrib/python/executing/executing/_position_node_finder.py b/contrib/python/executing/executing/_position_node_finder.py index 7a814150da6..0f8344106f2 100644 --- a/contrib/python/executing/executing/_position_node_finder.py +++ b/contrib/python/executing/executing/_position_node_finder.py @@ -242,6 +242,66 @@ class PositionNodeFinder(object): # keeping the old behaviour makes it possible to distinguish both cases. return node.parent + + if ( + sys.version_info >= (3, 12, 6) + and instruction.opname in ("GET_ITER", "FOR_ITER") + and isinstance( + node.parent.parent, + (ast.ListComp, ast.SetComp, ast.DictComp, ast.GeneratorExp), + ) + and isinstance(node.parent,ast.comprehension) + and node is node.parent.iter + ): + # same as above but only for comprehensions, see: + # https://github.com/python/cpython/issues/123142 + + return node.parent.parent + + if sys.version_info >= (3, 12,6) and instruction.opname == "CALL": + before = self.instruction_before(instruction) + if ( + before is not None + and before.opname == "LOAD_CONST" + and before.positions == instruction.positions + and isinstance(node.parent, ast.withitem) + and node is node.parent.context_expr + ): + # node positions for with-statements have change + # and is now equal to the expression which created the context-manager + # https://github.com/python/cpython/pull/120763 + + # with context_manager: + # ... + + # but there is one problem to distinguish call-expressions from __exit__() + + # with context_manager(): + # ... + + # the call for __exit__ + + # 20 1:5 1:22 LOAD_CONST(None) + # 22 1:5 1:22 LOAD_CONST(None) + # 24 1:5 1:22 LOAD_CONST(None) + # 26 1:5 1:22 CALL() # <-- same source range as context_manager() + + # but we can use the fact that the previous load for None + # has the same source range as the call, wich can not happen for normal calls + + # we return the same ast.With statement at the and to preserve backward compatibility + + return node.parent.parent + + if ( + sys.version_info >= (3, 12,6) + and instruction.opname == "BEFORE_WITH" + and isinstance(node.parent, ast.withitem) + and node is node.parent.context_expr + ): + # handle positions changes for __enter__ + return node.parent.parent + return node def known_issues(self, node: EnhancedAST, instruction: dis.Instruction) -> None: @@ -880,6 +940,11 @@ class PositionNodeFinder(object): def instruction(self, index: int) -> Optional[dis.Instruction]: return self.bc_dict.get(index,None) + def instruction_before( + self, instruction: dis.Instruction + ) -> Optional[dis.Instruction]: + return self.bc_dict.get(instruction.offset - 2, None) + def opname(self, index: int) -> str: i=self.instruction(index) if i is None: diff --git a/contrib/python/executing/executing/_pytest_utils.py b/contrib/python/executing/executing/_pytest_utils.py new file mode 100644 index 00000000000..fab8693baf2 --- /dev/null +++ b/contrib/python/executing/executing/_pytest_utils.py @@ -0,0 +1,16 @@ +import sys + + + +def is_pytest_compatible() -> bool: + """ returns true if executing can be used for expressions inside assert statements which are rewritten by pytest + """ + if sys.version_info < (3, 11): + return False + + try: + import pytest + except ImportError: + return False + + return pytest.version_tuple >= (8, 3, 4) diff --git a/contrib/python/executing/executing/version.py b/contrib/python/executing/executing/version.py index b15121b0fec..e212710ff7b 100644 --- a/contrib/python/executing/executing/version.py +++ b/contrib/python/executing/executing/version.py @@ -1 +1 @@ -__version__ = '2.1.0'
\ No newline at end of file +__version__ = '2.2.0'
\ No newline at end of file diff --git a/contrib/python/executing/ya.make b/contrib/python/executing/ya.make index b437b26981c..32d111cc478 100644 --- a/contrib/python/executing/ya.make +++ b/contrib/python/executing/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(2.1.0) +VERSION(2.2.0) LICENSE(MIT) @@ -13,6 +13,7 @@ PY_SRCS( executing/__init__.py executing/_exceptions.py executing/_position_node_finder.py + executing/_pytest_utils.py executing/executing.py executing/version.py ) |