/* * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef MASKED_MOVE_H #define MASKED_MOVE_H #include "arch.h" #if defined(HAVE_AVX2) #include "unaligned.h" #include "simd_utils.h" #ifdef __cplusplus extern "C" { #endif extern const u32 mm_mask_mask[16]; extern const u32 mm_shuffle_end[32][8]; #ifdef __cplusplus } #endif /* load mask for len bytes from start of buffer */ static really_inline m256 _get_mm_mask_end(u32 len) { assert(len <= 32); const u8 *masky = (const u8 *)mm_mask_mask; m256 mask = load256(masky + 32); mask = _mm256_sll_epi32(mask, _mm_cvtsi32_si128(8 - (len >> 2))); return mask; } /* * masked_move256_len: Will load len bytes from *buf into m256 * _______________________________ * |0<----len---->| 32| * ------------------------------- */ static really_inline m256 masked_move256_len(const u8 *buf, const u32 len) { assert(len >= 4); m256 lmask = _get_mm_mask_end(len); u32 end = unaligned_load_u32(buf + len - 4); m256 preshufend = _mm256_broadcastq_epi64(_mm_cvtsi32_si128(end)); m256 v = _mm256_maskload_epi32((const int *)buf, lmask); m256 shufend = pshufb_m256(preshufend, loadu256(&mm_shuffle_end[len - 4])); m256 target = or256(v, shufend); return target; } #endif /* AVX2 */ #endif /* MASKED_MOVE_H */