diff options
author | thegeorg <thegeorg@yandex-team.com> | 2023-10-03 11:19:48 +0300 |
---|---|---|
committer | thegeorg <thegeorg@yandex-team.com> | 2023-10-03 11:43:28 +0300 |
commit | cda0c13f23f6b169fb0a49dc504b40a0aaecea09 (patch) | |
tree | 26476e92e5af2c856e017afb1df8f8dff42495bf /contrib/libs/pcre2/src/sljit | |
parent | 4854116da9c5e3c95bb8440f2ea997c54b6e1a61 (diff) | |
download | ydb-cda0c13f23f6b169fb0a49dc504b40a0aaecea09.tar.gz |
Move contrib/tools/jdk to build/platform/java/jdk/testing
Diffstat (limited to 'contrib/libs/pcre2/src/sljit')
24 files changed, 37372 insertions, 0 deletions
diff --git a/contrib/libs/pcre2/src/sljit/sljitConfig.h b/contrib/libs/pcre2/src/sljit/sljitConfig.h new file mode 100644 index 0000000000..5fba7aa638 --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitConfig.h @@ -0,0 +1,162 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SLJIT_CONFIG_H_ +#define SLJIT_CONFIG_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + This file contains the basic configuration options for the SLJIT compiler + and their default values. These options can be overridden in the + sljitConfigPre.h header file when SLJIT_HAVE_CONFIG_PRE is set to a + non-zero value. +*/ + +/* --------------------------------------------------------------------- */ +/* Architecture */ +/* --------------------------------------------------------------------- */ + +/* Architecture selection. */ +/* #define SLJIT_CONFIG_X86_32 1 */ +/* #define SLJIT_CONFIG_X86_64 1 */ +/* #define SLJIT_CONFIG_ARM_V5 1 */ +/* #define SLJIT_CONFIG_ARM_V7 1 */ +/* #define SLJIT_CONFIG_ARM_THUMB2 1 */ +/* #define SLJIT_CONFIG_ARM_64 1 */ +/* #define SLJIT_CONFIG_PPC_32 1 */ +/* #define SLJIT_CONFIG_PPC_64 1 */ +/* #define SLJIT_CONFIG_MIPS_32 1 */ +/* #define SLJIT_CONFIG_MIPS_64 1 */ +/* #define SLJIT_CONFIG_RISCV_32 1 */ +/* #define SLJIT_CONFIG_RISCV_64 1 */ +/* #define SLJIT_CONFIG_S390X 1 */ + +/* #define SLJIT_CONFIG_AUTO 1 */ +/* #define SLJIT_CONFIG_UNSUPPORTED 1 */ + +/* --------------------------------------------------------------------- */ +/* Utilities */ +/* --------------------------------------------------------------------- */ + +/* Implements a stack like data structure (by using mmap / VirtualAlloc */ +/* or a custom allocator). */ +#ifndef SLJIT_UTIL_STACK +/* Enabled by default */ +#define SLJIT_UTIL_STACK 1 +#endif + +/* Uses user provided allocator to allocate the stack (see SLJIT_UTIL_STACK) */ +#ifndef SLJIT_UTIL_SIMPLE_STACK_ALLOCATION +/* Disabled by default */ +#define SLJIT_UTIL_SIMPLE_STACK_ALLOCATION 0 +#endif + +/* Single threaded application. Does not require any locks. */ +#ifndef SLJIT_SINGLE_THREADED +/* Disabled by default. */ +#define SLJIT_SINGLE_THREADED 0 +#endif + +/* --------------------------------------------------------------------- */ +/* Configuration */ +/* --------------------------------------------------------------------- */ + +/* If SLJIT_STD_MACROS_DEFINED is not defined, the application should + define SLJIT_MALLOC, SLJIT_FREE, SLJIT_MEMCPY, and NULL. */ +#ifndef SLJIT_STD_MACROS_DEFINED +/* Disabled by default. */ +#define SLJIT_STD_MACROS_DEFINED 0 +#endif + +/* Executable code allocation: + If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should + define SLJIT_MALLOC_EXEC, SLJIT_FREE_EXEC, and SLJIT_EXEC_OFFSET. */ +#ifndef SLJIT_EXECUTABLE_ALLOCATOR +/* Enabled by default. */ +#define SLJIT_EXECUTABLE_ALLOCATOR 1 + +/* When SLJIT_PROT_EXECUTABLE_ALLOCATOR is enabled SLJIT uses + an allocator which does not set writable and executable + permission flags at the same time. + Instead, it creates a shared memory segment (usually backed by a file) + and maps it twice, with different permissions, depending on the use + case. + The trade-off is increased use of virtual memory, incompatibility with + fork(), and some possible additional security risks by the use of + publicly accessible files for the generated code. */ +#ifndef SLJIT_PROT_EXECUTABLE_ALLOCATOR +/* Disabled by default. */ +#define SLJIT_PROT_EXECUTABLE_ALLOCATOR 0 +#endif + +/* When SLJIT_WX_EXECUTABLE_ALLOCATOR is enabled SLJIT uses an + allocator which does not set writable and executable permission + flags at the same time. + Instead, it creates a new independent map on each invocation and + switches permissions at the underlying pages as needed. + The trade-off is increased memory use and degraded performance. */ +#ifndef SLJIT_WX_EXECUTABLE_ALLOCATOR +/* Disabled by default. */ +#define SLJIT_WX_EXECUTABLE_ALLOCATOR 0 +#endif + +#endif /* !SLJIT_EXECUTABLE_ALLOCATOR */ + +/* Return with error when an invalid argument is passed. */ +#ifndef SLJIT_ARGUMENT_CHECKS +/* Disabled by default */ +#define SLJIT_ARGUMENT_CHECKS 0 +#endif + +/* Debug checks (assertions, etc.). */ +#ifndef SLJIT_DEBUG +/* Enabled by default */ +#define SLJIT_DEBUG 1 +#endif + +/* Verbose operations. */ +#ifndef SLJIT_VERBOSE +/* Enabled by default */ +#define SLJIT_VERBOSE 1 +#endif + +/* + SLJIT_IS_FPU_AVAILABLE + The availability of the FPU can be controlled by SLJIT_IS_FPU_AVAILABLE. + zero value - FPU is NOT present. + nonzero value - FPU is present. +*/ + +/* For further configurations, see the beginning of sljitConfigInternal.h */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* SLJIT_CONFIG_H_ */ diff --git a/contrib/libs/pcre2/src/sljit/sljitConfigInternal.h b/contrib/libs/pcre2/src/sljit/sljitConfigInternal.h new file mode 100644 index 0000000000..cd3ce69734 --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitConfigInternal.h @@ -0,0 +1,851 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SLJIT_CONFIG_INTERNAL_H_ +#define SLJIT_CONFIG_INTERNAL_H_ + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG && (!defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE))) +#include <stdio.h> +#endif + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG \ + && (!defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE) || !defined(SLJIT_HALT_PROCESS))) +#include <stdlib.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + SLJIT defines the following architecture dependent types and macros: + + Types: + sljit_s8, sljit_u8 : signed and unsigned 8 bit integer type + sljit_s16, sljit_u16 : signed and unsigned 16 bit integer type + sljit_s32, sljit_u32 : signed and unsigned 32 bit integer type + sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer + sljit_p : unsgined pointer value (usually the same as sljit_uw, but + some 64 bit ABIs may use 32 bit pointers) + sljit_f32 : 32 bit single precision floating point value + sljit_f64 : 64 bit double precision floating point value + + Macros for feature detection (boolean): + SLJIT_32BIT_ARCHITECTURE : 32 bit architecture + SLJIT_64BIT_ARCHITECTURE : 64 bit architecture + SLJIT_LITTLE_ENDIAN : little endian architecture + SLJIT_BIG_ENDIAN : big endian architecture + SLJIT_UNALIGNED : unaligned memory accesses for non-fpu operations are supported + SLJIT_FPU_UNALIGNED : unaligned memory accesses for fpu operations are supported + SLJIT_INDIRECT_CALL : see SLJIT_FUNC_ADDR() for more information + + Constants: + SLJIT_NUMBER_OF_REGISTERS : number of available registers + SLJIT_NUMBER_OF_SCRATCH_REGISTERS : number of available scratch registers + SLJIT_NUMBER_OF_SAVED_REGISTERS : number of available saved registers + SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers + SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers + SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers + SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index + SLJIT_F32_SHIFT : the shift required to apply when accessing + a single precision floating point array by index + SLJIT_F64_SHIFT : the shift required to apply when accessing + a double precision floating point array by index + SLJIT_PREF_SHIFT_REG : x86 systems prefers ecx for shifting by register + the scratch register index of ecx is stored in this variable + SLJIT_LOCALS_OFFSET : local space starting offset (SLJIT_SP + SLJIT_LOCALS_OFFSET) + SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address + + Other macros: + SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT + SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (platform independent helper) +*/ + +/*****************/ +/* Sanity check. */ +/*****************/ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + + (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + + (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \ + + (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + + (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + + (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + + (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + + (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + + (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + + (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ + + (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \ + + (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \ + + (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + + (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \ + + (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2 +#error "Multiple architectures are selected" +#endif + +#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + && !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + && !(defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \ + && !(defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + && !(defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + && !(defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + && !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ + && !(defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \ + && !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \ + && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + && !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) \ + && !(defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) +#if defined SLJIT_CONFIG_AUTO && !SLJIT_CONFIG_AUTO +#error "An architecture must be selected" +#else /* SLJIT_CONFIG_AUTO */ +#define SLJIT_CONFIG_AUTO 1 +#endif /* !SLJIT_CONFIG_AUTO */ +#endif /* !SLJIT_CONFIG */ + +/********************************************************/ +/* Automatic CPU detection (requires compiler support). */ +/********************************************************/ + +#if (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) + +#ifndef _WIN32 + +#if defined(__i386__) || defined(__i386) +#define SLJIT_CONFIG_X86_32 1 +#elif defined(__x86_64__) +#define SLJIT_CONFIG_X86_64 1 +#elif defined(__arm__) || defined(__ARM__) +#ifdef __thumb2__ +#define SLJIT_CONFIG_ARM_THUMB2 1 +#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) +#define SLJIT_CONFIG_ARM_V7 1 +#else +#define SLJIT_CONFIG_ARM_V5 1 +#endif +#elif defined (__aarch64__) +#define SLJIT_CONFIG_ARM_64 1 +#elif defined(__ppc64__) || defined(__powerpc64__) || (defined(_ARCH_PPC64) && defined(__64BIT__)) || (defined(_POWER) && defined(__64BIT__)) +#define SLJIT_CONFIG_PPC_64 1 +#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER) +#define SLJIT_CONFIG_PPC_32 1 +#elif defined(__mips__) && !defined(_LP64) +#define SLJIT_CONFIG_MIPS_32 1 +#elif defined(__mips64) +#define SLJIT_CONFIG_MIPS_64 1 +#elif defined (__riscv_xlen) && (__riscv_xlen == 32) +#define SLJIT_CONFIG_RISCV_32 1 +#elif defined (__riscv_xlen) && (__riscv_xlen == 64) +#define SLJIT_CONFIG_RISCV_64 1 +#elif defined(__s390x__) +#define SLJIT_CONFIG_S390X 1 +#else +/* Unsupported architecture */ +#define SLJIT_CONFIG_UNSUPPORTED 1 +#endif + +#else /* _WIN32 */ + +#if defined(_M_X64) || defined(__x86_64__) +#define SLJIT_CONFIG_X86_64 1 +#elif (defined(_M_ARM) && _M_ARM >= 7 && defined(_M_ARMT)) || defined(__thumb2__) +#define SLJIT_CONFIG_ARM_THUMB2 1 +#elif (defined(_M_ARM) && _M_ARM >= 7) +#define SLJIT_CONFIG_ARM_V7 1 +#elif defined(_ARM_) +#define SLJIT_CONFIG_ARM_V5 1 +#elif defined(_M_ARM64) || defined(__aarch64__) +#define SLJIT_CONFIG_ARM_64 1 +#else +#define SLJIT_CONFIG_X86_32 1 +#endif + +#endif /* !_WIN32 */ +#endif /* SLJIT_CONFIG_AUTO */ + +#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +#undef SLJIT_EXECUTABLE_ALLOCATOR +#endif + +/******************************/ +/* CPU family type detection. */ +/******************************/ + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +#define SLJIT_CONFIG_ARM_32 1 +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#define SLJIT_CONFIG_X86 1 +#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +#define SLJIT_CONFIG_ARM 1 +#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define SLJIT_CONFIG_PPC 1 +#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#define SLJIT_CONFIG_MIPS 1 +#elif (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) || (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) +#define SLJIT_CONFIG_RISCV 1 +#endif + +/***********************************************************/ +/* Intel Control-flow Enforcement Technology (CET) spport. */ +/***********************************************************/ + +#ifdef SLJIT_CONFIG_X86 + +#if defined(__CET__) && !(defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) +#define SLJIT_CONFIG_X86_CET 1 +#endif + +#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined(__GNUC__) +#include <x86intrin.h> +#endif + +#endif /* SLJIT_CONFIG_X86 */ + +/**********************************/ +/* External function definitions. */ +/**********************************/ + +/* General macros: + Note: SLJIT is designed to be independent from them as possible. + + In release mode (SLJIT_DEBUG is not defined) only the following + external functions are needed: +*/ + +#ifndef SLJIT_MALLOC +#define SLJIT_MALLOC(size, allocator_data) malloc(size) +#endif + +#ifndef SLJIT_FREE +#define SLJIT_FREE(ptr, allocator_data) free(ptr) +#endif + +#ifndef SLJIT_MEMCPY +#define SLJIT_MEMCPY(dest, src, len) memcpy(dest, src, len) +#endif + +#ifndef SLJIT_MEMMOVE +#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len) +#endif + +#ifndef SLJIT_ZEROMEM +#define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len) +#endif + +/***************************/ +/* Compiler helper macros. */ +/***************************/ + +#if !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) + +#if defined(__GNUC__) && (__GNUC__ >= 3) +#define SLJIT_LIKELY(x) __builtin_expect((x), 1) +#define SLJIT_UNLIKELY(x) __builtin_expect((x), 0) +#else +#define SLJIT_LIKELY(x) (x) +#define SLJIT_UNLIKELY(x) (x) +#endif + +#endif /* !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) */ + +#ifndef SLJIT_INLINE +/* Inline functions. Some old compilers do not support them. */ +#ifdef __SUNPRO_C +#if __SUNPRO_C < 0x560 +#define SLJIT_INLINE +#else +#define SLJIT_INLINE inline +#endif /* __SUNPRO_C */ +#else +#define SLJIT_INLINE __inline +#endif +#endif /* !SLJIT_INLINE */ + +#ifndef SLJIT_NOINLINE +/* Not inline functions. */ +#if defined(__GNUC__) +#define SLJIT_NOINLINE __attribute__ ((noinline)) +#else +#define SLJIT_NOINLINE +#endif +#endif /* !SLJIT_INLINE */ + +#ifndef SLJIT_UNUSED_ARG +/* Unused arguments. */ +#define SLJIT_UNUSED_ARG(arg) (void)arg +#endif + +/*********************************/ +/* Type of public API functions. */ +/*********************************/ + +#ifndef SLJIT_API_FUNC_ATTRIBUTE +#if (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) +/* Static ABI functions. For all-in-one programs. */ + +#if defined(__GNUC__) +/* Disable unused warnings in gcc. */ +#define SLJIT_API_FUNC_ATTRIBUTE static __attribute__((unused)) +#else +#define SLJIT_API_FUNC_ATTRIBUTE static +#endif + +#else +#define SLJIT_API_FUNC_ATTRIBUTE +#endif /* (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) */ +#endif /* defined SLJIT_API_FUNC_ATTRIBUTE */ + +/****************************/ +/* Instruction cache flush. */ +/****************************/ + +/* + * TODO: + * + * clang >= 15 could be safe to enable below + * older versions are known to abort in some targets + * https://github.com/PhilipHazel/pcre2/issues/92 + * + * beware some vendors (ex: Microsoft, Apple) are known to have + * removed the code to support this builtin even if the call for + * __has_builtin reports it is available. + * + * make sure linking doesn't fail because __clear_cache() is + * missing before changing it or add an exception so that the + * system provided method that should be defined below is used + * instead. + */ +#if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) +#if __has_builtin(__builtin___clear_cache) && !defined(__clang__) + +/* + * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=91248 + * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=93811 + * gcc's clear_cache builtin for power is broken + */ +#if !defined(SLJIT_CONFIG_PPC) +#define SLJIT_CACHE_FLUSH(from, to) \ + __builtin___clear_cache((char*)(from), (char*)(to)) +#endif + +#endif /* gcc >= 10 */ +#endif /* (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) */ + +#ifndef SLJIT_CACHE_FLUSH + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + +/* Not required to implement on archs with unified caches. */ +#define SLJIT_CACHE_FLUSH(from, to) + +#elif defined __APPLE__ + +/* Supported by all macs since Mac OS 10.5. + However, it does not work on non-jailbroken iOS devices, + although the compilation is successful. */ +#include <libkern/OSCacheControl.h> +#define SLJIT_CACHE_FLUSH(from, to) \ + sys_icache_invalidate((void*)(from), (size_t)((char*)(to) - (char*)(from))) + +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +/* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */ +#define SLJIT_CACHE_FLUSH(from, to) \ + ppc_cache_flush((from), (to)) +#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 + +#elif defined(_WIN32) + +#define SLJIT_CACHE_FLUSH(from, to) \ + FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from)) + +#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || defined(__clang__) + +#define SLJIT_CACHE_FLUSH(from, to) \ + __builtin___clear_cache((char*)(from), (char*)(to)) + +#elif defined __ANDROID__ + +/* Android ARMv7 with gcc lacks __clear_cache; use cacheflush instead. */ +#include <sys/cachectl.h> +#define SLJIT_CACHE_FLUSH(from, to) \ + cacheflush((long)(from), (long)(to), 0) + +#else + +/* Call __ARM_NR_cacheflush on ARM-Linux or the corresponding MIPS syscall. */ +#define SLJIT_CACHE_FLUSH(from, to) \ + __clear_cache((char*)(from), (char*)(to)) + +#endif + +#endif /* !SLJIT_CACHE_FLUSH */ + +/******************************************************/ +/* Integer and floating point type definitions. */ +/******************************************************/ + +/* 8 bit byte type. */ +typedef unsigned char sljit_u8; +typedef signed char sljit_s8; + +/* 16 bit half-word type. */ +typedef unsigned short int sljit_u16; +typedef signed short int sljit_s16; + +/* 32 bit integer type. */ +typedef unsigned int sljit_u32; +typedef signed int sljit_s32; + +/* Machine word type. Enough for storing a pointer. + 32 bit for 32 bit machines. + 64 bit for 64 bit machines. */ +#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +/* Just to have something. */ +#define SLJIT_WORD_SHIFT 0 +typedef unsigned long int sljit_uw; +typedef long int sljit_sw; +#elif !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ + && !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \ + && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +#define SLJIT_32BIT_ARCHITECTURE 1 +#define SLJIT_WORD_SHIFT 2 +typedef unsigned int sljit_uw; +typedef int sljit_sw; +#else +#define SLJIT_64BIT_ARCHITECTURE 1 +#define SLJIT_WORD_SHIFT 3 +#ifdef _WIN32 +#ifdef __GNUC__ +/* These types do not require windows.h */ +typedef unsigned long long sljit_uw; +typedef long long sljit_sw; +#else +typedef unsigned __int64 sljit_uw; +typedef __int64 sljit_sw; +#endif +#else /* !_WIN32 */ +typedef unsigned long int sljit_uw; +typedef long int sljit_sw; +#endif /* _WIN32 */ +#endif + +typedef sljit_uw sljit_p; + +/* Floating point types. */ +typedef float sljit_f32; +typedef double sljit_f64; + +/* Shift for pointer sized data. */ +#define SLJIT_POINTER_SHIFT SLJIT_WORD_SHIFT + +/* Shift for double precision sized data. */ +#define SLJIT_F32_SHIFT 2 +#define SLJIT_F64_SHIFT 3 + +#ifndef SLJIT_W + +/* Defining long constants. */ +#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +#define SLJIT_W(w) (w##l) +#elif (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#ifdef _WIN64 +#define SLJIT_W(w) (w##ll) +#else /* !windows */ +#define SLJIT_W(w) (w##l) +#endif /* windows */ +#else /* 32 bit */ +#define SLJIT_W(w) (w) +#endif /* unknown */ + +#endif /* !SLJIT_W */ + +/*************************/ +/* Endianness detection. */ +/*************************/ + +#if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) + +/* These macros are mostly useful for the applications. */ +#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +#ifdef __LITTLE_ENDIAN__ +#define SLJIT_LITTLE_ENDIAN 1 +#else +#define SLJIT_BIG_ENDIAN 1 +#endif + +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + +#ifdef __MIPSEL__ +#define SLJIT_LITTLE_ENDIAN 1 +#else +#define SLJIT_BIG_ENDIAN 1 +#endif + +#ifndef SLJIT_MIPS_REV + +/* Auto detecting mips revision. */ +#if (defined __mips_isa_rev) && (__mips_isa_rev >= 6) +#define SLJIT_MIPS_REV 6 +#elif (defined __mips_isa_rev && __mips_isa_rev >= 1) \ + || (defined __clang__ && defined _MIPS_ARCH_OCTEON) \ + || (defined __clang__ && defined _MIPS_ARCH_P5600) +/* clang either forgets to define (clang-7) __mips_isa_rev at all + * or sets it to zero (clang-8,-9) for -march=octeon (MIPS64 R2+) + * and -march=p5600 (MIPS32 R5). + * It also sets the __mips macro to 64 or 32 for -mipsN when N <= 5 + * (should be set to N exactly) so we cannot rely on this too. + */ +#define SLJIT_MIPS_REV 1 +#endif + +#endif /* !SLJIT_MIPS_REV */ + +#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + +#define SLJIT_BIG_ENDIAN 1 + +#else +#define SLJIT_LITTLE_ENDIAN 1 +#endif + +#endif /* !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) */ + +/* Sanity check. */ +#if (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#error "Exactly one endianness must be selected" +#endif + +#if !(defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && !(defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#error "Exactly one endianness must be selected" +#endif + +#ifndef SLJIT_UNALIGNED + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ + || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +#define SLJIT_UNALIGNED 1 +#endif + +#endif /* !SLJIT_UNALIGNED */ + +#ifndef SLJIT_FPU_UNALIGNED + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ + || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +#define SLJIT_FPU_UNALIGNED 1 +#endif + +#endif /* !SLJIT_FPU_UNALIGNED */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +/* Auto detect SSE2 support using CPUID. + On 64 bit x86 cpus, sse2 must be present. */ +#define SLJIT_DETECT_SSE2 1 +#endif + +/*****************************************************************************************/ +/* Calling convention of functions generated by SLJIT or called from the generated code. */ +/*****************************************************************************************/ + +#ifndef SLJIT_FUNC +#define SLJIT_FUNC +#endif /* !SLJIT_FUNC */ + +#ifndef SLJIT_INDIRECT_CALL +#if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (!defined _CALL_ELF || _CALL_ELF == 1)) \ + || ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && defined _AIX) +/* It seems certain ppc compilers use an indirect addressing for functions + which makes things complicated. */ +#define SLJIT_INDIRECT_CALL 1 +#endif +#endif /* SLJIT_INDIRECT_CALL */ + +/* The offset which needs to be subtracted from the return address to +determine the next executed instruction after return. */ +#ifndef SLJIT_RETURN_ADDRESS_OFFSET +#define SLJIT_RETURN_ADDRESS_OFFSET 0 +#endif /* SLJIT_RETURN_ADDRESS_OFFSET */ + +/***************************************************/ +/* Functions of the built-in executable allocator. */ +/***************************************************/ + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size); +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr); +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); +#define SLJIT_BUILTIN_MALLOC_EXEC(size, exec_allocator_data) sljit_malloc_exec(size) +#define SLJIT_BUILTIN_FREE_EXEC(ptr, exec_allocator_data) sljit_free_exec(ptr) + +#ifndef SLJIT_MALLOC_EXEC +#define SLJIT_MALLOC_EXEC(size, exec_allocator_data) SLJIT_BUILTIN_MALLOC_EXEC((size), (exec_allocator_data)) +#endif /* SLJIT_MALLOC_EXEC */ + +#ifndef SLJIT_FREE_EXEC +#define SLJIT_FREE_EXEC(ptr, exec_allocator_data) SLJIT_BUILTIN_FREE_EXEC((ptr), (exec_allocator_data)) +#endif /* SLJIT_FREE_EXEC */ + +#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) +SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); +#define SLJIT_EXEC_OFFSET(ptr) sljit_exec_offset(ptr) +#else +#define SLJIT_EXEC_OFFSET(ptr) 0 +#endif + +#endif /* SLJIT_EXECUTABLE_ALLOCATOR */ + +/**********************************************/ +/* Registers and locals offset determination. */ +/**********************************************/ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 +#define SLJIT_LOCALS_OFFSET_BASE (8 * SSIZE_OF(sw)) +#define SLJIT_PREF_SHIFT_REG SLJIT_R2 + +#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +#define SLJIT_NUMBER_OF_REGISTERS 13 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15 +#ifndef _WIN64 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 +#define SLJIT_LOCALS_OFFSET_BASE 0 +#else /* _WIN64 */ +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 10 +#define SLJIT_LOCALS_OFFSET_BASE (4 * SSIZE_OF(sw)) +#endif /* !_WIN64 */ +#define SLJIT_PREF_SHIFT_REG SLJIT_R3 + +#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 +#define SLJIT_LOCALS_OFFSET_BASE 0 + +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) + +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 +#define SLJIT_LOCALS_OFFSET_BASE 0 + +#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + +#define SLJIT_NUMBER_OF_REGISTERS 26 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 +#define SLJIT_LOCALS_OFFSET_BASE (2 * (sljit_s32)sizeof(sljit_sw)) + +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +#define SLJIT_NUMBER_OF_REGISTERS 23 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 18 +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX) +#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * (sljit_s32)sizeof(sljit_sw)) +#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +/* Add +1 for double alignment. */ +#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1) * (sljit_s32)sizeof(sljit_sw)) +#else +#define SLJIT_LOCALS_OFFSET_BASE (3 * (sljit_s32)sizeof(sljit_sw)) +#endif /* SLJIT_CONFIG_PPC_64 || _AIX */ + +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + +#define SLJIT_NUMBER_OF_REGISTERS 21 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define SLJIT_LOCALS_OFFSET_BASE (4 * (sljit_s32)sizeof(sljit_sw)) +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 13 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 6 +#else +#define SLJIT_LOCALS_OFFSET_BASE 0 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 29 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 +#endif + +#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) + +#define SLJIT_NUMBER_OF_REGISTERS 23 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 12 +#define SLJIT_LOCALS_OFFSET_BASE 0 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12 + +#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + +/* + * https://refspecs.linuxbase.org/ELF/zSeries/lzsabi0_zSeries.html#STACKFRAME + * + * 160 + * .. FR6 + * .. FR4 + * .. FR2 + * 128 FR0 + * 120 R15 (used for SP) + * 112 R14 + * 104 R13 + * 96 R12 + * .. + * 48 R6 + * .. + * 16 R2 + * 8 RESERVED + * 0 SP + */ +#define SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE 160 + +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 +#define SLJIT_LOCALS_OFFSET_BASE SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + +#elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + +#define SLJIT_NUMBER_OF_REGISTERS 0 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 0 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 0 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 +#define SLJIT_LOCALS_OFFSET_BASE 0 + +#endif + +#define SLJIT_LOCALS_OFFSET (SLJIT_LOCALS_OFFSET_BASE) + +#define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \ + (SLJIT_NUMBER_OF_REGISTERS - SLJIT_NUMBER_OF_SAVED_REGISTERS) + +#define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \ + (SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS) + +/********************************/ +/* CPU status flags management. */ +/********************************/ + +#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \ + || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ + || (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ + || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +#define SLJIT_HAS_STATUS_FLAGS_STATE 1 +#endif + +/*************************************/ +/* Debug and verbose related macros. */ +/*************************************/ + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + +#if !defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE) + +/* SLJIT_HALT_PROCESS must halt the process. */ +#ifndef SLJIT_HALT_PROCESS +#define SLJIT_HALT_PROCESS() \ + abort(); +#endif /* !SLJIT_HALT_PROCESS */ + +#endif /* !SLJIT_ASSERT || !SLJIT_UNREACHABLE */ + +/* Feel free to redefine these two macros. */ +#ifndef SLJIT_ASSERT + +#define SLJIT_ASSERT(x) \ + do { \ + if (SLJIT_UNLIKELY(!(x))) { \ + printf("Assertion failed at " __FILE__ ":%d\n", __LINE__); \ + SLJIT_HALT_PROCESS(); \ + } \ + } while (0) + +#endif /* !SLJIT_ASSERT */ + +#ifndef SLJIT_UNREACHABLE + +#define SLJIT_UNREACHABLE() \ + do { \ + printf("Should never been reached " __FILE__ ":%d\n", __LINE__); \ + SLJIT_HALT_PROCESS(); \ + } while (0) + +#endif /* !SLJIT_UNREACHABLE */ + +#else /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */ + +/* Forcing empty, but valid statements. */ +#undef SLJIT_ASSERT +#undef SLJIT_UNREACHABLE + +#define SLJIT_ASSERT(x) \ + do { } while (0) +#define SLJIT_UNREACHABLE() \ + do { } while (0) + +#endif /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */ + +#ifndef SLJIT_COMPILE_ASSERT + +#define SLJIT_COMPILE_ASSERT(x, description) \ + switch(0) { case 0: case ((x) ? 1 : 0): break; } + +#endif /* !SLJIT_COMPILE_ASSERT */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* SLJIT_CONFIG_INTERNAL_H_ */ diff --git a/contrib/libs/pcre2/src/sljit/sljitExecAllocator.c b/contrib/libs/pcre2/src/sljit/sljitExecAllocator.c new file mode 100644 index 0000000000..92d940ddc2 --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitExecAllocator.c @@ -0,0 +1,411 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + This file contains a simple executable memory allocator + + It is assumed, that executable code blocks are usually medium (or sometimes + large) memory blocks, and the allocator is not too frequently called (less + optimized than other allocators). Thus, using it as a generic allocator is + not suggested. + + How does it work: + Memory is allocated in continuous memory areas called chunks by alloc_chunk() + Chunk format: + [ block ][ block ] ... [ block ][ block terminator ] + + All blocks and the block terminator is started with block_header. The block + header contains the size of the previous and the next block. These sizes + can also contain special values. + Block size: + 0 - The block is a free_block, with a different size member. + 1 - The block is a block terminator. + n - The block is used at the moment, and the value contains its size. + Previous block size: + 0 - This is the first block of the memory chunk. + n - The size of the previous block. + + Using these size values we can go forward or backward on the block chain. + The unused blocks are stored in a chain list pointed by free_blocks. This + list is useful if we need to find a suitable memory area when the allocator + is called. + + When a block is freed, the new free block is connected to its adjacent free + blocks if possible. + + [ free block ][ used block ][ free block ] + and "used block" is freed, the three blocks are connected together: + [ one big free block ] +*/ + +/* --------------------------------------------------------------------- */ +/* System (OS) functions */ +/* --------------------------------------------------------------------- */ + +/* 64 KByte. */ +#define CHUNK_SIZE (sljit_uw)0x10000u + +/* + alloc_chunk / free_chunk : + * allocate executable system memory chunks + * the size is always divisible by CHUNK_SIZE + SLJIT_ALLOCATOR_LOCK / SLJIT_ALLOCATOR_UNLOCK : + * provided as part of sljitUtils + * only the allocator requires this lock, sljit is fully thread safe + as it only uses local variables +*/ + +#ifdef _WIN32 +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) + +static SLJIT_INLINE void* alloc_chunk(sljit_uw size) +{ + return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); +} + +static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) +{ + SLJIT_UNUSED_ARG(size); + VirtualFree(chunk, 0, MEM_RELEASE); +} + +#else /* POSIX */ + +#if defined(__APPLE__) && defined(MAP_JIT) +/* + On macOS systems, returns MAP_JIT if it is defined _and_ we're running on a + version where it's OK to have more than one JIT block or where MAP_JIT is + required. + On non-macOS systems, returns MAP_JIT if it is defined. +*/ +#include <TargetConditionals.h> +#if TARGET_OS_OSX +#if defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86 +#ifdef MAP_ANON +#include <sys/utsname.h> +#include <stdlib.h> + +#define SLJIT_MAP_JIT (get_map_jit_flag()) + +static SLJIT_INLINE int get_map_jit_flag() +{ + size_t page_size; + void *ptr; + struct utsname name; + static int map_jit_flag = -1; + + if (map_jit_flag < 0) { + map_jit_flag = 0; + uname(&name); + + /* Kernel version for 10.14.0 (Mojave) or later */ + if (atoi(name.release) >= 18) { + page_size = get_page_alignment() + 1; + /* Only use MAP_JIT if a hardened runtime is used */ + ptr = mmap(NULL, page_size, PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANON, -1, 0); + + if (ptr != MAP_FAILED) + munmap(ptr, page_size); + else + map_jit_flag = MAP_JIT; + } + } + return map_jit_flag; +} +#endif /* MAP_ANON */ +#else /* !SLJIT_CONFIG_X86 */ +#if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) +#error "Unsupported architecture" +#endif /* SLJIT_CONFIG_ARM */ +#include <AvailabilityMacros.h> +#include <pthread.h> + +#define SLJIT_MAP_JIT (MAP_JIT) +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) \ + apple_update_wx_flags(enable_exec) + +static SLJIT_INLINE void apple_update_wx_flags(sljit_s32 enable_exec) +{ +#if MAC_OS_X_VERSION_MIN_REQUIRED >= 110000 + pthread_jit_write_protect_np(enable_exec); +#else +#error "Must target Big Sur or newer" +#endif /* BigSur */ +} +#endif /* SLJIT_CONFIG_X86 */ +#else /* !TARGET_OS_OSX */ +#define SLJIT_MAP_JIT (MAP_JIT) +#endif /* TARGET_OS_OSX */ +#endif /* __APPLE__ && MAP_JIT */ +#ifndef SLJIT_UPDATE_WX_FLAGS +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) +#endif /* !SLJIT_UPDATE_WX_FLAGS */ +#ifndef SLJIT_MAP_JIT +#define SLJIT_MAP_JIT (0) +#endif /* !SLJIT_MAP_JIT */ + +static SLJIT_INLINE void* alloc_chunk(sljit_uw size) +{ + void *retval; + int prot = PROT_READ | PROT_WRITE | PROT_EXEC; + int flags = MAP_PRIVATE; + int fd = -1; + +#ifdef PROT_MAX + prot |= PROT_MAX(prot); +#endif + +#ifdef MAP_ANON + flags |= MAP_ANON | SLJIT_MAP_JIT; +#else /* !MAP_ANON */ + if (SLJIT_UNLIKELY((dev_zero < 0) && open_dev_zero())) + return NULL; + + fd = dev_zero; +#endif /* MAP_ANON */ + + retval = mmap(NULL, size, prot, flags, fd, 0); + if (retval == MAP_FAILED) + return NULL; + +#ifdef __FreeBSD__ + /* HardenedBSD's mmap lies, so check permissions again */ + if (mprotect(retval, size, PROT_READ | PROT_WRITE | PROT_EXEC) < 0) { + munmap(retval, size); + return NULL; + } +#endif /* FreeBSD */ + + SLJIT_UPDATE_WX_FLAGS(retval, (uint8_t *)retval + size, 0); + + return retval; +} + +static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) +{ + munmap(chunk, size); +} + +#endif /* windows */ + +/* --------------------------------------------------------------------- */ +/* Common functions */ +/* --------------------------------------------------------------------- */ + +#define CHUNK_MASK (~(CHUNK_SIZE - 1)) + +struct block_header { + sljit_uw size; + sljit_uw prev_size; +}; + +struct free_block { + struct block_header header; + struct free_block *next; + struct free_block *prev; + sljit_uw size; +}; + +#define AS_BLOCK_HEADER(base, offset) \ + ((struct block_header*)(((sljit_u8*)base) + offset)) +#define AS_FREE_BLOCK(base, offset) \ + ((struct free_block*)(((sljit_u8*)base) + offset)) +#define MEM_START(base) ((void*)(((sljit_u8*)base) + sizeof(struct block_header))) +#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7u) & ~(sljit_uw)7) + +static struct free_block* free_blocks; +static sljit_uw allocated_size; +static sljit_uw total_size; + +static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size) +{ + free_block->header.size = 0; + free_block->size = size; + + free_block->next = free_blocks; + free_block->prev = NULL; + if (free_blocks) + free_blocks->prev = free_block; + free_blocks = free_block; +} + +static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block) +{ + if (free_block->next) + free_block->next->prev = free_block->prev; + + if (free_block->prev) + free_block->prev->next = free_block->next; + else { + SLJIT_ASSERT(free_blocks == free_block); + free_blocks = free_block->next; + } +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) +{ + struct block_header *header; + struct block_header *next_header; + struct free_block *free_block; + sljit_uw chunk_size; + + SLJIT_ALLOCATOR_LOCK(); + if (size < (64 - sizeof(struct block_header))) + size = (64 - sizeof(struct block_header)); + size = ALIGN_SIZE(size); + + free_block = free_blocks; + while (free_block) { + if (free_block->size >= size) { + chunk_size = free_block->size; + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); + if (chunk_size > size + 64) { + /* We just cut a block from the end of the free block. */ + chunk_size -= size; + free_block->size = chunk_size; + header = AS_BLOCK_HEADER(free_block, chunk_size); + header->prev_size = chunk_size; + AS_BLOCK_HEADER(header, size)->prev_size = size; + } + else { + sljit_remove_free_block(free_block); + header = (struct block_header*)free_block; + size = chunk_size; + } + allocated_size += size; + header->size = size; + SLJIT_ALLOCATOR_UNLOCK(); + return MEM_START(header); + } + free_block = free_block->next; + } + + chunk_size = (size + sizeof(struct block_header) + CHUNK_SIZE - 1) & CHUNK_MASK; + header = (struct block_header*)alloc_chunk(chunk_size); + if (!header) { + SLJIT_ALLOCATOR_UNLOCK(); + return NULL; + } + + chunk_size -= sizeof(struct block_header); + total_size += chunk_size; + + header->prev_size = 0; + if (chunk_size > size + 64) { + /* Cut the allocated space into a free and a used block. */ + allocated_size += size; + header->size = size; + chunk_size -= size; + + free_block = AS_FREE_BLOCK(header, size); + free_block->header.prev_size = size; + sljit_insert_free_block(free_block, chunk_size); + next_header = AS_BLOCK_HEADER(free_block, chunk_size); + } + else { + /* All space belongs to this allocation. */ + allocated_size += chunk_size; + header->size = chunk_size; + next_header = AS_BLOCK_HEADER(header, chunk_size); + } + next_header->size = 1; + next_header->prev_size = chunk_size; + SLJIT_ALLOCATOR_UNLOCK(); + return MEM_START(header); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) +{ + struct block_header *header; + struct free_block* free_block; + + SLJIT_ALLOCATOR_LOCK(); + header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header)); + allocated_size -= header->size; + + /* Connecting free blocks together if possible. */ + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); + + /* If header->prev_size == 0, free_block will equal to header. + In this case, free_block->header.size will be > 0. */ + free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size); + if (SLJIT_UNLIKELY(!free_block->header.size)) { + free_block->size += header->size; + header = AS_BLOCK_HEADER(free_block, free_block->size); + header->prev_size = free_block->size; + } + else { + free_block = (struct free_block*)header; + sljit_insert_free_block(free_block, header->size); + } + + header = AS_BLOCK_HEADER(free_block, free_block->size); + if (SLJIT_UNLIKELY(!header->size)) { + free_block->size += ((struct free_block*)header)->size; + sljit_remove_free_block((struct free_block*)header); + header = AS_BLOCK_HEADER(free_block, free_block->size); + header->prev_size = free_block->size; + } + + /* The whole chunk is free. */ + if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) { + /* If this block is freed, we still have (allocated_size / 2) free space. */ + if (total_size - free_block->size > (allocated_size * 3 / 2)) { + total_size -= free_block->size; + sljit_remove_free_block(free_block); + free_chunk(free_block, free_block->size + sizeof(struct block_header)); + } + } + + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1); + SLJIT_ALLOCATOR_UNLOCK(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) +{ + struct free_block* free_block; + struct free_block* next_free_block; + + SLJIT_ALLOCATOR_LOCK(); + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); + + free_block = free_blocks; + while (free_block) { + next_free_block = free_block->next; + if (!free_block->header.prev_size && + AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) { + total_size -= free_block->size; + sljit_remove_free_block(free_block); + free_chunk(free_block, free_block->size + sizeof(struct block_header)); + } + free_block = next_free_block; + } + + SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks)); + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1); + SLJIT_ALLOCATOR_UNLOCK(); +} diff --git a/contrib/libs/pcre2/src/sljit/sljitLir.c b/contrib/libs/pcre2/src/sljit/sljitLir.c new file mode 100644 index 0000000000..abafe1add9 --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitLir.c @@ -0,0 +1,3136 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "sljitLir.h" + +#ifdef _WIN32 + +#include <windows.h> + +#endif /* _WIN32 */ + +#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED) + +/* These libraries are needed for the macros below. */ +#include <stdlib.h> +#include <string.h> + +#endif /* SLJIT_STD_MACROS_DEFINED */ + +#define CHECK_ERROR() \ + do { \ + if (SLJIT_UNLIKELY(compiler->error)) \ + return compiler->error; \ + } while (0) + +#define CHECK_ERROR_PTR() \ + do { \ + if (SLJIT_UNLIKELY(compiler->error)) \ + return NULL; \ + } while (0) + +#define FAIL_IF(expr) \ + do { \ + if (SLJIT_UNLIKELY(expr)) \ + return compiler->error; \ + } while (0) + +#define PTR_FAIL_IF(expr) \ + do { \ + if (SLJIT_UNLIKELY(expr)) \ + return NULL; \ + } while (0) + +#define FAIL_IF_NULL(ptr) \ + do { \ + if (SLJIT_UNLIKELY(!(ptr))) { \ + compiler->error = SLJIT_ERR_ALLOC_FAILED; \ + return SLJIT_ERR_ALLOC_FAILED; \ + } \ + } while (0) + +#define PTR_FAIL_IF_NULL(ptr) \ + do { \ + if (SLJIT_UNLIKELY(!(ptr))) { \ + compiler->error = SLJIT_ERR_ALLOC_FAILED; \ + return NULL; \ + } \ + } while (0) + +#define PTR_FAIL_WITH_EXEC_IF(ptr) \ + do { \ + if (SLJIT_UNLIKELY(!(ptr))) { \ + compiler->error = SLJIT_ERR_EX_ALLOC_FAILED; \ + return NULL; \ + } \ + } while (0) + +#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + +#define SSIZE_OF(type) ((sljit_s32)sizeof(sljit_ ## type)) + +#define VARIABLE_FLAG_SHIFT (10) +#define VARIABLE_FLAG_MASK (0x3f << VARIABLE_FLAG_SHIFT) +#define GET_FLAG_TYPE(op) ((op) >> VARIABLE_FLAG_SHIFT) + +#define GET_OPCODE(op) \ + ((op) & ~(SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) + +#define HAS_FLAGS(op) \ + ((op) & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) + +#define GET_ALL_FLAGS(op) \ + ((op) & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#define TYPE_CAST_NEEDED(op) \ + ((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S32) +#else /* !SLJIT_64BIT_ARCHITECTURE */ +#define TYPE_CAST_NEEDED(op) \ + ((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S16) +#endif /* SLJIT_64BIT_ARCHITECTURE */ + +#define BUF_SIZE 4096 + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) +#define ABUF_SIZE 2048 +#else +#define ABUF_SIZE 4096 +#endif + +/* Parameter parsing. */ +#define REG_MASK 0x3f +#define OFFS_REG(reg) (((reg) >> 8) & REG_MASK) +#define OFFS_REG_MASK (REG_MASK << 8) +#define TO_OFFS_REG(reg) ((reg) << 8) +/* When reg cannot be unused. */ +#define FAST_IS_REG(reg) ((reg) <= REG_MASK) + +/* Mask for argument types. */ +#define SLJIT_ARG_MASK 0x7 +#define SLJIT_ARG_FULL_MASK (SLJIT_ARG_MASK | SLJIT_ARG_TYPE_SCRATCH_REG) + +/* Mask for sljit_emit_mem. */ +#define REG_PAIR_MASK 0xff00 +#define REG_PAIR_FIRST(reg) ((reg) & 0xff) +#define REG_PAIR_SECOND(reg) ((reg) >> 8) + +/* Mask for sljit_emit_enter. */ +#define SLJIT_KEPT_SAVEDS_COUNT(options) ((options) & 0x3) + +/* Jump flags. */ +#define JUMP_LABEL 0x1 +#define JUMP_ADDR 0x2 +/* SLJIT_REWRITABLE_JUMP is 0x1000. */ + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +# define PATCH_MB 0x4 +# define PATCH_MW 0x8 +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +# define PATCH_MD 0x10 +#endif +# define TYPE_SHIFT 13 +#endif /* SLJIT_CONFIG_X86 */ + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +# define IS_BL 0x4 +# define PATCH_B 0x8 +#endif /* SLJIT_CONFIG_ARM_V5 || SLJIT_CONFIG_ARM_V7 */ + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +# define CPOOL_SIZE 512 +#endif /* SLJIT_CONFIG_ARM_V5 */ + +#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +# define IS_COND 0x04 +# define IS_BL 0x08 + /* conditional + imm8 */ +# define PATCH_TYPE1 0x10 + /* conditional + imm20 */ +# define PATCH_TYPE2 0x20 + /* IT + imm24 */ +# define PATCH_TYPE3 0x30 + /* imm11 */ +# define PATCH_TYPE4 0x40 + /* imm24 */ +# define PATCH_TYPE5 0x50 + /* BL + imm24 */ +# define PATCH_BL 0x60 + /* 0xf00 cc code for branches */ +#endif /* SLJIT_CONFIG_ARM_THUMB2 */ + +#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +# define IS_COND 0x004 +# define IS_CBZ 0x008 +# define IS_BL 0x010 +# define PATCH_B 0x020 +# define PATCH_COND 0x040 +# define PATCH_ABS48 0x080 +# define PATCH_ABS64 0x100 +#endif /* SLJIT_CONFIG_ARM_64 */ + +#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) +# define IS_COND 0x004 +# define IS_CALL 0x008 +# define PATCH_B 0x010 +# define PATCH_ABS_B 0x020 +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +# define PATCH_ABS32 0x040 +# define PATCH_ABS48 0x080 +#endif /* SLJIT_CONFIG_PPC_64 */ +# define REMOVE_COND 0x100 +#endif /* SLJIT_CONFIG_PPC */ + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) +# define IS_MOVABLE 0x004 +# define IS_JAL 0x008 +# define IS_CALL 0x010 +# define IS_BIT26_COND 0x020 +# define IS_BIT16_COND 0x040 +# define IS_BIT23_COND 0x080 + +# define IS_COND (IS_BIT26_COND | IS_BIT16_COND | IS_BIT23_COND) + +# define PATCH_B 0x100 +# define PATCH_J 0x200 + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +# define PATCH_ABS32 0x400 +# define PATCH_ABS48 0x800 +#endif /* SLJIT_CONFIG_MIPS_64 */ + + /* instruction types */ +# define MOVABLE_INS 0 + /* 1 - 31 last destination register */ + /* no destination (i.e: store) */ +# define UNMOVABLE_INS 32 + /* FPU status register */ +# define FCSR_FCC 33 +#endif /* SLJIT_CONFIG_MIPS */ + +#if (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) +# define IS_COND 0x004 +# define IS_CALL 0x008 + +# define PATCH_B 0x010 +# define PATCH_J 0x020 + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) +# define PATCH_REL32 0x040 +# define PATCH_ABS32 0x080 +# define PATCH_ABS44 0x100 +# define PATCH_ABS52 0x200 +#else /* !SLJIT_CONFIG_RISCV_64 */ +# define PATCH_REL32 0x0 +#endif /* SLJIT_CONFIG_RISCV_64 */ +#endif /* SLJIT_CONFIG_RISCV */ + +/* Stack management. */ + +#define GET_SAVED_REGISTERS_SIZE(scratches, saveds, extra) \ + (((scratches < SLJIT_NUMBER_OF_SCRATCH_REGISTERS ? 0 : (scratches - SLJIT_NUMBER_OF_SCRATCH_REGISTERS)) + \ + (saveds) + (sljit_s32)(extra)) * (sljit_s32)sizeof(sljit_sw)) + +#define GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, size) \ + (((fscratches < SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS ? 0 : (fscratches - SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)) + \ + (fsaveds)) * (sljit_s32)(size)) + +#define ADJUST_LOCAL_OFFSET(p, i) \ + if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ + (i) += SLJIT_LOCALS_OFFSET; + +#endif /* !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) */ + +/* Utils can still be used even if SLJIT_CONFIG_UNSUPPORTED is set. */ +#include "sljitUtils.c" + +#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + +#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) +#include "sljitProtExecAllocator.c" +#elif (defined SLJIT_WX_EXECUTABLE_ALLOCATOR && SLJIT_WX_EXECUTABLE_ALLOCATOR) +#include "sljitWXExecAllocator.c" +#else +#include "sljitExecAllocator.c" +#endif + +#endif + +#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) +#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr) + (exec_offset)) +#else +#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr)) +#endif + +#ifndef SLJIT_UPDATE_WX_FLAGS +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) +#endif + +/* Argument checking features. */ + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + +/* Returns with error when an invalid argument is passed. */ + +#define CHECK_ARGUMENT(x) \ + do { \ + if (SLJIT_UNLIKELY(!(x))) \ + return 1; \ + } while (0) + +#define CHECK_RETURN_TYPE sljit_s32 +#define CHECK_RETURN_OK return 0 + +#define CHECK(x) \ + do { \ + if (SLJIT_UNLIKELY(x)) { \ + compiler->error = SLJIT_ERR_BAD_ARGUMENT; \ + return SLJIT_ERR_BAD_ARGUMENT; \ + } \ + } while (0) + +#define CHECK_PTR(x) \ + do { \ + if (SLJIT_UNLIKELY(x)) { \ + compiler->error = SLJIT_ERR_BAD_ARGUMENT; \ + return NULL; \ + } \ + } while (0) + +#define CHECK_REG_INDEX(x) \ + do { \ + if (SLJIT_UNLIKELY(x)) { \ + return -2; \ + } \ + } while (0) + +#elif (defined SLJIT_DEBUG && SLJIT_DEBUG) + +/* Assertion failure occures if an invalid argument is passed. */ +#undef SLJIT_ARGUMENT_CHECKS +#define SLJIT_ARGUMENT_CHECKS 1 + +#define CHECK_ARGUMENT(x) SLJIT_ASSERT(x) +#define CHECK_RETURN_TYPE void +#define CHECK_RETURN_OK return +#define CHECK(x) x +#define CHECK_PTR(x) x +#define CHECK_REG_INDEX(x) x + +#elif (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + +/* Arguments are not checked. */ +#define CHECK_RETURN_TYPE void +#define CHECK_RETURN_OK return +#define CHECK(x) x +#define CHECK_PTR(x) x +#define CHECK_REG_INDEX(x) x + +#else + +/* Arguments are not checked. */ +#define CHECK(x) +#define CHECK_PTR(x) +#define CHECK_REG_INDEX(x) + +#endif /* SLJIT_ARGUMENT_CHECKS */ + +/* --------------------------------------------------------------------- */ +/* Public functions */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +#define SLJIT_NEEDS_COMPILER_INIT 1 +static sljit_s32 compiler_initialized = 0; +/* A thread safe initialization. */ +static void init_compiler(void); +#endif + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data, void *exec_allocator_data) +{ + struct sljit_compiler *compiler = (struct sljit_compiler*)SLJIT_MALLOC(sizeof(struct sljit_compiler), allocator_data); + if (!compiler) + return NULL; + SLJIT_ZEROMEM(compiler, sizeof(struct sljit_compiler)); + + SLJIT_COMPILE_ASSERT( + sizeof(sljit_s8) == 1 && sizeof(sljit_u8) == 1 + && sizeof(sljit_s16) == 2 && sizeof(sljit_u16) == 2 + && sizeof(sljit_s32) == 4 && sizeof(sljit_u32) == 4 + && (sizeof(sljit_p) == 4 || sizeof(sljit_p) == 8) + && sizeof(sljit_p) <= sizeof(sljit_sw) + && (sizeof(sljit_sw) == 4 || sizeof(sljit_sw) == 8) + && (sizeof(sljit_uw) == 4 || sizeof(sljit_uw) == 8), + invalid_integer_types); + SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_32, + rewritable_jump_and_single_op_must_not_be_the_same); + SLJIT_COMPILE_ASSERT(!(SLJIT_EQUAL & 0x1) && !(SLJIT_LESS & 0x1) && !(SLJIT_F_EQUAL & 0x1) && !(SLJIT_JUMP & 0x1), + conditional_flags_must_be_even_numbers); + + /* Only the non-zero members must be set. */ + compiler->error = SLJIT_SUCCESS; + + compiler->allocator_data = allocator_data; + compiler->exec_allocator_data = exec_allocator_data; + compiler->buf = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, allocator_data); + compiler->abuf = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE, allocator_data); + + if (!compiler->buf || !compiler->abuf) { + if (compiler->buf) + SLJIT_FREE(compiler->buf, allocator_data); + if (compiler->abuf) + SLJIT_FREE(compiler->abuf, allocator_data); + SLJIT_FREE(compiler, allocator_data); + return NULL; + } + + compiler->buf->next = NULL; + compiler->buf->used_size = 0; + compiler->abuf->next = NULL; + compiler->abuf->used_size = 0; + + compiler->scratches = -1; + compiler->saveds = -1; + compiler->fscratches = -1; + compiler->fsaveds = -1; + compiler->local_size = -1; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + compiler->args_size = -1; +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + compiler->cpool = (sljit_uw*)SLJIT_MALLOC(CPOOL_SIZE * sizeof(sljit_uw) + + CPOOL_SIZE * sizeof(sljit_u8), allocator_data); + if (!compiler->cpool) { + SLJIT_FREE(compiler->buf, allocator_data); + SLJIT_FREE(compiler->abuf, allocator_data); + SLJIT_FREE(compiler, allocator_data); + return NULL; + } + compiler->cpool_unique = (sljit_u8*)(compiler->cpool + CPOOL_SIZE); + compiler->cpool_diff = 0xffffffff; +#endif + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + compiler->delay_slot = UNMOVABLE_INS; +#endif + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->last_flags = 0; + compiler->last_return = -1; + compiler->logical_local_size = 0; +#endif + +#if (defined SLJIT_NEEDS_COMPILER_INIT && SLJIT_NEEDS_COMPILER_INIT) + if (!compiler_initialized) { + init_compiler(); + compiler_initialized = 1; + } +#endif + + return compiler; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + struct sljit_memory_fragment *curr; + void *allocator_data = compiler->allocator_data; + SLJIT_UNUSED_ARG(allocator_data); + + buf = compiler->buf; + while (buf) { + curr = buf; + buf = buf->next; + SLJIT_FREE(curr, allocator_data); + } + + buf = compiler->abuf; + while (buf) { + curr = buf; + buf = buf->next; + SLJIT_FREE(curr, allocator_data); + } + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + SLJIT_FREE(compiler->cpool, allocator_data); +#endif + SLJIT_FREE(compiler, allocator_data); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler) +{ + if (compiler->error == SLJIT_SUCCESS) + compiler->error = SLJIT_ERR_ALLOC_FAILED; +} + +#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) +{ + SLJIT_UNUSED_ARG(exec_allocator_data); + + /* Remove thumb mode flag. */ + SLJIT_FREE_EXEC((void*)((sljit_uw)code & ~(sljit_uw)0x1), exec_allocator_data); +} +#elif (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) +{ + SLJIT_UNUSED_ARG(exec_allocator_data); + + /* Resolve indirection. */ + code = (void*)(*(sljit_uw*)code); + SLJIT_FREE_EXEC(code, exec_allocator_data); +} +#else +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) +{ + SLJIT_UNUSED_ARG(exec_allocator_data); + + SLJIT_FREE_EXEC(code, exec_allocator_data); +} +#endif + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) +{ + if (SLJIT_LIKELY(!!jump) && SLJIT_LIKELY(!!label)) { + jump->flags &= (sljit_uw)~JUMP_ADDR; + jump->flags |= JUMP_LABEL; + jump->u.label = label; + } +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target) +{ + if (SLJIT_LIKELY(!!jump)) { + jump->flags &= (sljit_uw)~JUMP_LABEL; + jump->flags |= JUMP_ADDR; + jump->u.target = target; + } +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label) +{ + if (SLJIT_LIKELY(!!put_label)) + put_label->label = label; +} + +#define SLJIT_CURRENT_FLAGS_ALL \ + (SLJIT_CURRENT_FLAGS_32 | SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE) + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(current_flags); + +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + compiler->status_flags_state = current_flags; +#endif + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_flags = 0; + if ((current_flags & ~(VARIABLE_FLAG_MASK | SLJIT_SET_Z | SLJIT_CURRENT_FLAGS_ALL)) == 0) { + compiler->last_flags = GET_FLAG_TYPE(current_flags) | (current_flags & (SLJIT_32 | SLJIT_SET_Z)); + } +#endif +} + +/* --------------------------------------------------------------------- */ +/* Private functions */ +/* --------------------------------------------------------------------- */ + +static void* ensure_buf(struct sljit_compiler *compiler, sljit_uw size) +{ + sljit_u8 *ret; + struct sljit_memory_fragment *new_frag; + + SLJIT_ASSERT(size <= 256); + if (compiler->buf->used_size + size <= (BUF_SIZE - (sljit_uw)SLJIT_OFFSETOF(struct sljit_memory_fragment, memory))) { + ret = compiler->buf->memory + compiler->buf->used_size; + compiler->buf->used_size += size; + return ret; + } + new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, compiler->allocator_data); + PTR_FAIL_IF_NULL(new_frag); + new_frag->next = compiler->buf; + compiler->buf = new_frag; + new_frag->used_size = size; + return new_frag->memory; +} + +static void* ensure_abuf(struct sljit_compiler *compiler, sljit_uw size) +{ + sljit_u8 *ret; + struct sljit_memory_fragment *new_frag; + + SLJIT_ASSERT(size <= 256); + if (compiler->abuf->used_size + size <= (ABUF_SIZE - (sljit_uw)SLJIT_OFFSETOF(struct sljit_memory_fragment, memory))) { + ret = compiler->abuf->memory + compiler->abuf->used_size; + compiler->abuf->used_size += size; + return ret; + } + new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE, compiler->allocator_data); + PTR_FAIL_IF_NULL(new_frag); + new_frag->next = compiler->abuf; + compiler->abuf = new_frag; + new_frag->used_size = size; + return new_frag->memory; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size) +{ + CHECK_ERROR_PTR(); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + if (size <= 0 || size > 128) + return NULL; + size = (size + 7) & ~7; +#else + if (size <= 0 || size > 64) + return NULL; + size = (size + 3) & ~3; +#endif + return ensure_abuf(compiler, (sljit_uw)size); +} + +static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf = compiler->buf; + struct sljit_memory_fragment *prev = NULL; + struct sljit_memory_fragment *tmp; + + do { + tmp = buf->next; + buf->next = prev; + prev = buf; + buf = tmp; + } while (buf != NULL); + + compiler->buf = prev; +} + +/* Only used in RISC architectures where the instruction size is constant */ +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + +static SLJIT_INLINE sljit_uw compute_next_addr(struct sljit_label *label, struct sljit_jump *jump, + struct sljit_const *const_, struct sljit_put_label *put_label) +{ + sljit_uw result = ~(sljit_uw)0; + + if (label) + result = label->size; + + if (jump && jump->addr < result) + result = jump->addr; + + if (const_ && const_->addr < result) + result = const_->addr; + + if (put_label && put_label->addr < result) + result = put_label->addr; + + return result; +} + +#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_S390X */ + +static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(local_size); + + compiler->options = options; + compiler->scratches = scratches; + compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_return = args & SLJIT_ARG_MASK; + compiler->logical_local_size = local_size; +#endif +} + +static SLJIT_INLINE void set_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(local_size); + + compiler->options = options; + compiler->scratches = scratches; + compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_return = args & SLJIT_ARG_MASK; + compiler->logical_local_size = local_size; +#endif +} + +static SLJIT_INLINE void set_label(struct sljit_label *label, struct sljit_compiler *compiler) +{ + label->next = NULL; + label->size = compiler->size; + if (compiler->last_label) + compiler->last_label->next = label; + else + compiler->labels = label; + compiler->last_label = label; +} + +static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_u32 flags) +{ + jump->next = NULL; + jump->flags = flags; + if (compiler->last_jump) + compiler->last_jump->next = jump; + else + compiler->jumps = jump; + compiler->last_jump = jump; +} + +static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_compiler *compiler) +{ + const_->next = NULL; + const_->addr = compiler->size; + if (compiler->last_const) + compiler->last_const->next = const_; + else + compiler->consts = const_; + compiler->last_const = const_; +} + +static SLJIT_INLINE void set_put_label(struct sljit_put_label *put_label, struct sljit_compiler *compiler, sljit_uw offset) +{ + put_label->next = NULL; + put_label->label = NULL; + put_label->addr = compiler->size - offset; + put_label->flags = 0; + if (compiler->last_put_label) + compiler->last_put_label->next = put_label; + else + compiler->put_labels = put_label; + compiler->last_put_label = put_label; +} + +#define ADDRESSING_DEPENDS_ON(exp, reg) \ + (((exp) & SLJIT_MEM) && (((exp) & REG_MASK) == reg || OFFS_REG(exp) == reg)) + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + +static sljit_s32 function_check_arguments(sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches) +{ + sljit_s32 word_arg_count, scratch_arg_end, saved_arg_count, float_arg_count, curr_type; + + curr_type = (arg_types & SLJIT_ARG_FULL_MASK); + + if (curr_type >= SLJIT_ARG_TYPE_F64) { + if (curr_type > SLJIT_ARG_TYPE_F32 || fscratches == 0) + return 0; + } else if (curr_type >= SLJIT_ARG_TYPE_W) { + if (scratches == 0) + return 0; + } + + arg_types >>= SLJIT_ARG_SHIFT; + + word_arg_count = 0; + scratch_arg_end = 0; + saved_arg_count = 0; + float_arg_count = 0; + while (arg_types != 0) { + if (word_arg_count + float_arg_count >= 4) + return 0; + + curr_type = (arg_types & SLJIT_ARG_MASK); + + if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) { + if (saveds == -1 || curr_type < SLJIT_ARG_TYPE_W || curr_type > SLJIT_ARG_TYPE_P) + return 0; + + word_arg_count++; + scratch_arg_end = word_arg_count; + } else { + if (curr_type < SLJIT_ARG_TYPE_W || curr_type > SLJIT_ARG_TYPE_F32) + return 0; + + if (curr_type < SLJIT_ARG_TYPE_F64) { + word_arg_count++; + saved_arg_count++; + } else + float_arg_count++; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (saveds == -1) + return (word_arg_count <= scratches && float_arg_count <= fscratches); + + return (saved_arg_count <= saveds && scratch_arg_end <= scratches && float_arg_count <= fscratches); +} + +#define FUNCTION_CHECK_IS_REG(r) \ + (((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) \ + || ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0)) + +#define FUNCTION_CHECK_IS_FREG(fr) \ + (((fr) >= SLJIT_FR0 && (fr) < (SLJIT_FR0 + compiler->fscratches)) \ + || ((fr) > (SLJIT_FS0 - compiler->fsaveds) && (fr) <= SLJIT_FS0)) + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#define CHECK_IF_VIRTUAL_REGISTER(p) ((p) <= SLJIT_S3 && (p) >= SLJIT_S8) +#else +#define CHECK_IF_VIRTUAL_REGISTER(p) 0 +#endif + +static sljit_s32 function_check_src_mem(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if (compiler->scratches == -1 || compiler->saveds == -1) + return 0; + + if (!(p & SLJIT_MEM)) + return 0; + + if (p == SLJIT_MEM1(SLJIT_SP)) + return (i >= 0 && i < compiler->logical_local_size); + + if (!(!(p & REG_MASK) || FUNCTION_CHECK_IS_REG(p & REG_MASK))) + return 0; + + if (CHECK_IF_VIRTUAL_REGISTER(p & REG_MASK)) + return 0; + + if (p & OFFS_REG_MASK) { + if (!(p & REG_MASK)) + return 0; + + if (!(FUNCTION_CHECK_IS_REG(OFFS_REG(p)))) + return 0; + + if (CHECK_IF_VIRTUAL_REGISTER(OFFS_REG(p))) + return 0; + + if ((i & ~0x3) != 0) + return 0; + } + + return (p & ~(SLJIT_MEM | REG_MASK | OFFS_REG_MASK)) == 0; +} + +#define FUNCTION_CHECK_SRC_MEM(p, i) \ + CHECK_ARGUMENT(function_check_src_mem(compiler, p, i)); + +static sljit_s32 function_check_src(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if (compiler->scratches == -1 || compiler->saveds == -1) + return 0; + + if (FUNCTION_CHECK_IS_REG(p)) + return (i == 0); + + if (p == SLJIT_IMM) + return 1; + + return function_check_src_mem(compiler, p, i); +} + +#define FUNCTION_CHECK_SRC(p, i) \ + CHECK_ARGUMENT(function_check_src(compiler, p, i)); + +static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if (compiler->scratches == -1 || compiler->saveds == -1) + return 0; + + if (FUNCTION_CHECK_IS_REG(p)) + return (i == 0); + + return function_check_src_mem(compiler, p, i); +} + +#define FUNCTION_CHECK_DST(p, i) \ + CHECK_ARGUMENT(function_check_dst(compiler, p, i)); + +static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if (compiler->scratches == -1 || compiler->saveds == -1) + return 0; + + if (FUNCTION_CHECK_IS_FREG(p)) + return (i == 0); + + return function_check_src_mem(compiler, p, i); +} + +#define FUNCTION_FCHECK(p, i) \ + CHECK_ARGUMENT(function_fcheck(compiler, p, i)); + +#endif /* SLJIT_ARGUMENT_CHECKS */ + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + +SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose) +{ + compiler->verbose = verbose; +} + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#ifdef _WIN64 +#ifdef __GNUC__ +# define SLJIT_PRINT_D "ll" +#else +# define SLJIT_PRINT_D "I64" +#endif +#else +# define SLJIT_PRINT_D "l" +#endif +#else +# define SLJIT_PRINT_D "" +#endif + +static void sljit_verbose_reg(struct sljit_compiler *compiler, sljit_s32 r) +{ + if (r < (SLJIT_R0 + compiler->scratches)) + fprintf(compiler->verbose, "r%d", r - SLJIT_R0); + else if (r != SLJIT_SP) + fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - r); + else + fprintf(compiler->verbose, "sp"); +} + +static void sljit_verbose_freg(struct sljit_compiler *compiler, sljit_s32 r) +{ + if (r < (SLJIT_FR0 + compiler->fscratches)) + fprintf(compiler->verbose, "fr%d", r - SLJIT_FR0); + else + fprintf(compiler->verbose, "fs%d", SLJIT_NUMBER_OF_FLOAT_REGISTERS - r); +} + +static void sljit_verbose_param(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if ((p) & SLJIT_IMM) + fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i)); + else if ((p) & SLJIT_MEM) { + if ((p) & REG_MASK) { + fputc('[', compiler->verbose); + sljit_verbose_reg(compiler, (p) & REG_MASK); + if ((p) & OFFS_REG_MASK) { + fprintf(compiler->verbose, " + "); + sljit_verbose_reg(compiler, OFFS_REG(p)); + if (i) + fprintf(compiler->verbose, " * %d", 1 << (i)); + } + else if (i) + fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i)); + fputc(']', compiler->verbose); + } + else + fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); + } else + sljit_verbose_reg(compiler, p); +} + +static void sljit_verbose_fparam(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if ((p) & SLJIT_MEM) { + if ((p) & REG_MASK) { + fputc('[', compiler->verbose); + sljit_verbose_reg(compiler, (p) & REG_MASK); + if ((p) & OFFS_REG_MASK) { + fprintf(compiler->verbose, " + "); + sljit_verbose_reg(compiler, OFFS_REG(p)); + if (i) + fprintf(compiler->verbose, "%d", 1 << (i)); + } + else if (i) + fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i)); + fputc(']', compiler->verbose); + } + else + fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); + } + else + sljit_verbose_freg(compiler, p); +} + +static const char* op0_names[] = { + "breakpoint", "nop", "lmul.uw", "lmul.sw", + "divmod.u", "divmod.s", "div.u", "div.s", + "endbr", "skip_frames_before_return" +}; + +static const char* op1_names[] = { + "", ".u8", ".s8", ".u16", + ".s16", ".u32", ".s32", "32", + ".p", "not", "clz", "ctz" +}; + +static const char* op2_names[] = { + "add", "addc", "sub", "subc", + "mul", "and", "or", "xor", + "shl", "mshl", "lshr", "mlshr", + "ashr", "mashr", "rotl", "rotr" +}; + +static const char* op_src_names[] = { + "fast_return", "skip_frames_before_fast_return", + "prefetch_l1", "prefetch_l2", + "prefetch_l3", "prefetch_once", +}; + +static const char* fop1_names[] = { + "mov", "conv", "conv", "conv", + "conv", "conv", "cmp", "neg", + "abs", +}; + +static const char* fop2_names[] = { + "add", "sub", "mul", "div" +}; + +static const char* jump_names[] = { + "equal", "not_equal", + "less", "greater_equal", + "greater", "less_equal", + "sig_less", "sig_greater_equal", + "sig_greater", "sig_less_equal", + "overflow", "not_overflow", + "carry", "", + "f_equal", "f_not_equal", + "f_less", "f_greater_equal", + "f_greater", "f_less_equal", + "unordered", "ordered", + "ordered_equal", "unordered_or_not_equal", + "ordered_less", "unordered_or_greater_equal", + "ordered_greater", "unordered_or_less_equal", + "unordered_or_equal", "ordered_not_equal", + "unordered_or_less", "ordered_greater_equal", + "unordered_or_greater", "ordered_less_equal", + "jump", "fast_call", + "call", "call_reg_arg" +}; + +static const char* call_arg_names[] = { + "void", "w", "32", "p", "f64", "f32" +}; + +#endif /* SLJIT_VERBOSE */ + +/* --------------------------------------------------------------------- */ +/* Arch dependent */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + +#define SLJIT_SKIP_CHECKS(compiler) (compiler)->skip_checks = 1 + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_generate_code(struct sljit_compiler *compiler) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + struct sljit_jump *jump; +#endif + + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(compiler->size > 0); + jump = compiler->jumps; + while (jump) { + /* All jumps have target. */ + CHECK_ARGUMENT(jump->flags & (JUMP_LABEL | JUMP_ADDR)); + jump = jump->next; + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + if (options & SLJIT_ENTER_REG_ARG) { + CHECK_ARGUMENT(!(options & ~(0x3 | SLJIT_ENTER_REG_ARG))); + } else { + CHECK_ARGUMENT(options == 0); + } + CHECK_ARGUMENT(SLJIT_KEPT_SAVEDS_COUNT(options) <= 3 && SLJIT_KEPT_SAVEDS_COUNT(options) <= saveds); + CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); + CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); + CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); + CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) <= SLJIT_ARG_TYPE_F32); + CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, (options & SLJIT_ENTER_REG_ARG) ? 0 : saveds, fscratches)); + + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " enter ret[%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); + + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) { + fprintf(compiler->verbose, "], args["); + do { + fprintf(compiler->verbose, "%s%s", call_arg_names[arg_types & SLJIT_ARG_MASK], + (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? "_r" : ""); + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } while (arg_types); + } + + fprintf(compiler->verbose, "],"); + + if (options & SLJIT_ENTER_REG_ARG) { + fprintf(compiler->verbose, " enter:reg_arg,"); + + if (SLJIT_KEPT_SAVEDS_COUNT(options) > 0) + fprintf(compiler->verbose, " keep:%d,", SLJIT_KEPT_SAVEDS_COUNT(options)); + } + + fprintf(compiler->verbose, "scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", + scratches, saveds, fscratches, fsaveds, local_size); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + if (options & SLJIT_ENTER_REG_ARG) { + CHECK_ARGUMENT(!(options & ~(0x3 | SLJIT_ENTER_REG_ARG))); + } else { + CHECK_ARGUMENT(options == 0); + } + CHECK_ARGUMENT(SLJIT_KEPT_SAVEDS_COUNT(options) <= 3 && SLJIT_KEPT_SAVEDS_COUNT(options) <= saveds); + CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); + CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); + CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); + CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) < SLJIT_ARG_TYPE_F64); + CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, (options & SLJIT_ENTER_REG_ARG) ? 0 : saveds, fscratches)); + + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " set_context ret[%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); + + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) { + fprintf(compiler->verbose, "], args["); + do { + fprintf(compiler->verbose, "%s%s", call_arg_names[arg_types & SLJIT_ARG_MASK], + (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? "_r" : ""); + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } while (arg_types); + } + + fprintf(compiler->verbose, "],"); + + if (options & SLJIT_ENTER_REG_ARG) { + fprintf(compiler->verbose, " enter:reg_arg,"); + + if (SLJIT_KEPT_SAVEDS_COUNT(options) > 0) + fprintf(compiler->verbose, " keep:%d,", SLJIT_KEPT_SAVEDS_COUNT(options)); + } + + fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", + scratches, saveds, fscratches, fsaveds, local_size); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return_void(struct sljit_compiler *compiler) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(compiler->last_return == SLJIT_ARG_TYPE_VOID); +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " return_void\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(compiler->scratches >= 0); + + switch (compiler->last_return) { + case SLJIT_ARG_TYPE_W: + CHECK_ARGUMENT(op >= SLJIT_MOV && op <= SLJIT_MOV_S32); + break; + case SLJIT_ARG_TYPE_32: + CHECK_ARGUMENT(op == SLJIT_MOV32 || (op >= SLJIT_MOV32_U8 && op <= SLJIT_MOV32_S16)); + break; + case SLJIT_ARG_TYPE_P: + CHECK_ARGUMENT(op == SLJIT_MOV_P); + break; + case SLJIT_ARG_TYPE_F64: + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(op == SLJIT_MOV_F64); + break; + case SLJIT_ARG_TYPE_F32: + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(op == SLJIT_MOV_F32); + break; + default: + /* Context not initialized, void, etc. */ + CHECK_ARGUMENT(0); + break; + } + + if (GET_OPCODE(op) < SLJIT_MOV_F64) { + FUNCTION_CHECK_SRC(src, srcw); + } else { + FUNCTION_FCHECK(src, srcw); + } + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (GET_OPCODE(op) < SLJIT_MOV_F64) { + fprintf(compiler->verbose, " return%s%s ", !(op & SLJIT_32) ? "" : "32", + op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE]); + sljit_verbose_param(compiler, src, srcw); + } else { + fprintf(compiler->verbose, " return%s ", !(op & SLJIT_32) ? ".f64" : ".f32"); + sljit_verbose_fparam(compiler, src, srcw); + } + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_SRC(src, srcw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " return_to "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_DST(dst, dstw); + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fast_enter "); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LMUL_SW) + || ((op & ~SLJIT_32) >= SLJIT_DIVMOD_UW && (op & ~SLJIT_32) <= SLJIT_DIV_SW) + || (op >= SLJIT_ENDBR && op <= SLJIT_SKIP_FRAMES_BEFORE_RETURN)); + CHECK_ARGUMENT(GET_OPCODE(op) < SLJIT_LMUL_UW || GET_OPCODE(op) >= SLJIT_ENDBR || compiler->scratches >= 2); + if ((GET_OPCODE(op) >= SLJIT_LMUL_UW && GET_OPCODE(op) <= SLJIT_DIV_SW) || op == SLJIT_SKIP_FRAMES_BEFORE_RETURN) + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + { + fprintf(compiler->verbose, " %s", op0_names[GET_OPCODE(op) - SLJIT_OP0_BASE]); + if (GET_OPCODE(op) >= SLJIT_DIVMOD_UW && GET_OPCODE(op) <= SLJIT_DIV_SW) { + fprintf(compiler->verbose, (op & SLJIT_32) ? "32" : "w"); + } + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CTZ); + + switch (GET_OPCODE(op)) { + case SLJIT_NOT: + /* Only SLJIT_32 and SLJIT_SET_Z are allowed. */ + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); + break; + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_P: + /* Nothing allowed */ + CHECK_ARGUMENT(!(op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + break; + default: + /* Only SLJIT_32 is allowed. */ + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + break; + } + + FUNCTION_CHECK_DST(dst, dstw); + FUNCTION_CHECK_SRC(src, srcw); + + if (GET_OPCODE(op) >= SLJIT_NOT) { + CHECK_ARGUMENT(src != SLJIT_IMM); + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z)); + } +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (GET_OPCODE(op) <= SLJIT_MOV_P) + { + fprintf(compiler->verbose, " mov%s%s ", !(op & SLJIT_32) ? "" : "32", + op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE]); + } + else + { + fprintf(compiler->verbose, " %s%s%s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_32) ? "" : "32", + !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".", + !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]); + } + + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 unset, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_ROTR); + + switch (GET_OPCODE(op)) { + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + case SLJIT_SHL: + case SLJIT_MSHL: + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); + break; + case SLJIT_MUL: + CHECK_ARGUMENT(!(op & SLJIT_SET_Z)); + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW); + break; + case SLJIT_ADD: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY) + || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW); + break; + case SLJIT_SUB: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_OVERFLOW) + || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)); + break; + case SLJIT_ADDC: + case SLJIT_SUBC: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)); + CHECK_ARGUMENT((compiler->last_flags & 0xff) == GET_FLAG_TYPE(SLJIT_SET_CARRY)); + CHECK_ARGUMENT((op & SLJIT_32) == (compiler->last_flags & SLJIT_32)); + break; + case SLJIT_ROTL: + case SLJIT_ROTR: + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + break; + default: + SLJIT_UNREACHABLE(); + break; + } + + if (unset) { + CHECK_ARGUMENT(HAS_FLAGS(op)); + } else { + FUNCTION_CHECK_DST(dst, dstw); + } + FUNCTION_CHECK_SRC(src1, src1w); + FUNCTION_CHECK_SRC(src2, src2w); + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s%s%s%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_32) ? "" : "32", + !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".", + !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]); + if (unset) + fprintf(compiler->verbose, "unset"); + else + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_LSHR + || GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR); + CHECK_ARGUMENT((op & ~(0xff | SLJIT_32 | SLJIT_SHIFT_INTO_NON_ZERO)) == 0); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src_dst)); + FUNCTION_CHECK_SRC(src1, src1w); + FUNCTION_CHECK_SRC(src2, src2w); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s.into%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_32) ? "" : "32", + (op & SLJIT_SHIFT_INTO_NON_ZERO) ? ".nz" : ""); + + sljit_verbose_reg(compiler, src_dst); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(op >= SLJIT_FAST_RETURN && op <= SLJIT_PREFETCH_ONCE); + FUNCTION_CHECK_SRC(src, srcw); + + if (op == SLJIT_FAST_RETURN || op == SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN) + { + CHECK_ARGUMENT(src != SLJIT_IMM); + compiler->last_flags = 0; + } + else if (op >= SLJIT_PREFETCH_L1 && op <= SLJIT_PREFETCH_ONCE) + { + CHECK_ARGUMENT(src & SLJIT_MEM); + } +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s ", op_src_names[op - SLJIT_OP_SRC_BASE]); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_s32 reg) +{ + SLJIT_UNUSED_ARG(reg); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS); +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_float_register_index(sljit_s32 reg) +{ + SLJIT_UNUSED_ARG(reg); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + sljit_u32 i; +#endif + + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(instruction); + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + CHECK_ARGUMENT(size > 0 && size < 16); +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) + CHECK_ARGUMENT((size == 2 && (((sljit_sw)instruction) & 0x1) == 0) + || (size == 4 && (((sljit_sw)instruction) & 0x3) == 0)); +#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + CHECK_ARGUMENT(size == 2 || size == 4 || size == 6); +#else + CHECK_ARGUMENT(size == 4 && (((sljit_sw)instruction) & 0x3) == 0); +#endif + + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " op_custom"); + for (i = 0; i < size; i++) + fprintf(compiler->verbose, " 0x%x", ((sljit_u8*)instruction)[i]); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV_F64 && GET_OPCODE(op) <= SLJIT_ABS_F64); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + FUNCTION_FCHECK(src, srcw); + FUNCTION_FCHECK(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + fprintf(compiler->verbose, " %s%s ", fop1_names[SLJIT_CONV_F64_FROM_F32 - SLJIT_FOP1_BASE], + (op & SLJIT_32) ? ".f32.from.f64" : ".f64.from.f32"); + else + fprintf(compiler->verbose, " %s%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + (op & SLJIT_32) ? ".f32" : ".f64"); + + sljit_verbose_fparam(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_flags = GET_FLAG_TYPE(op) | (op & SLJIT_32); +#endif + + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_CMP_F64); + CHECK_ARGUMENT(!(op & SLJIT_SET_Z)); + CHECK_ARGUMENT((op & VARIABLE_FLAG_MASK) + || (GET_FLAG_TYPE(op) >= SLJIT_F_EQUAL && GET_FLAG_TYPE(op) <= SLJIT_ORDERED_LESS_EQUAL)); + FUNCTION_FCHECK(src1, src1w); + FUNCTION_FCHECK(src2, src2w); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s", fop1_names[SLJIT_CMP_F64 - SLJIT_FOP1_BASE], (op & SLJIT_32) ? ".f32" : ".f64"); + if (op & VARIABLE_FLAG_MASK) { + fprintf(compiler->verbose, ".%s", jump_names[GET_FLAG_TYPE(op)]); + } + fprintf(compiler->verbose, " "); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CONV_S32_FROM_F64); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + FUNCTION_FCHECK(src, srcw); + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? ".s32" : ".sw", + (op & SLJIT_32) ? ".f32" : ".f64"); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_SW && GET_OPCODE(op) <= SLJIT_CONV_F64_FROM_S32); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + FUNCTION_CHECK_SRC(src, srcw); + FUNCTION_FCHECK(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + (op & SLJIT_32) ? ".f32" : ".f64", + (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? ".s32" : ".sw"); + sljit_verbose_fparam(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD_F64 && GET_OPCODE(op) <= SLJIT_DIV_F64); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + FUNCTION_FCHECK(src1, src1w); + FUNCTION_FCHECK(src2, src2w); + FUNCTION_FCHECK(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s ", fop2_names[GET_OPCODE(op) - SLJIT_FOP2_BASE], (op & SLJIT_32) ? ".f32" : ".f64"); + sljit_verbose_fparam(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_label(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_flags = 0; +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + fprintf(compiler->verbose, "label:\n"); +#endif + CHECK_RETURN_OK; +} + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) +#define CHECK_UNORDERED(type, last_flags) \ + ((((type) & 0xff) == SLJIT_UNORDERED || ((type) & 0xff) == SLJIT_ORDERED) && \ + ((last_flags) & 0xff) >= SLJIT_UNORDERED && ((last_flags) & 0xff) <= SLJIT_ORDERED_LESS_EQUAL) +#else +#define CHECK_UNORDERED(type, last_flags) 0 +#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_FAST_CALL); + + if ((type & 0xff) < SLJIT_JUMP) { + if ((type & 0xff) <= SLJIT_NOT_ZERO) + CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); + else if ((compiler->last_flags & 0xff) == SLJIT_CARRY) { + CHECK_ARGUMENT((type & 0xff) == SLJIT_CARRY || (type & 0xff) == SLJIT_NOT_CARRY); + compiler->last_flags = 0; + } else + CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) + || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW) + || CHECK_UNORDERED(type, compiler->last_flags)); + } +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + fprintf(compiler->verbose, " jump%s %s\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + jump_names[type & 0xff]); +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_CALL_RETURN))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_CALL && (type & 0xff) <= SLJIT_CALL_REG_ARG); + CHECK_ARGUMENT(function_check_arguments(arg_types, compiler->scratches, -1, compiler->fscratches)); + + if (type & SLJIT_CALL_RETURN) { + CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) == compiler->last_return); + + if (compiler->options & SLJIT_ENTER_REG_ARG) { + CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL_REG_ARG); + } else { + CHECK_ARGUMENT((type & 0xff) != SLJIT_CALL_REG_ARG); + } + } +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s%s ret[%s", jump_names[type & 0xff], + !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + !(type & SLJIT_CALL_RETURN) ? "" : ".ret", + call_arg_names[arg_types & SLJIT_ARG_MASK]); + + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) { + fprintf(compiler->verbose, "], args["); + do { + fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } while (arg_types); + } + fprintf(compiler->verbose, "]\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_SIG_LESS_EQUAL); + FUNCTION_CHECK_SRC(src1, src1w); + FUNCTION_CHECK_SRC(src2, src2w); + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " cmp%s%s %s, ", (type & SLJIT_32) ? "32" : "", + !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); + sljit_verbose_param(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_F_EQUAL && (type & 0xff) <= SLJIT_ORDERED_LESS_EQUAL + && ((type & 0xff) <= SLJIT_ORDERED || sljit_cmp_info(type & 0xff))); + FUNCTION_FCHECK(src1, src1w); + FUNCTION_FCHECK(src2, src2w); + compiler->last_flags = 0; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fcmp%s%s %s, ", (type & SLJIT_32) ? ".f32" : ".f64", + !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(type >= SLJIT_JUMP && type <= SLJIT_FAST_CALL); + FUNCTION_CHECK_SRC(src, srcw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " ijump.%s ", jump_names[type]); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_CALL_RETURN))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_CALL && (type & 0xff) <= SLJIT_CALL_REG_ARG); + CHECK_ARGUMENT(function_check_arguments(arg_types, compiler->scratches, -1, compiler->fscratches)); + FUNCTION_CHECK_SRC(src, srcw); + + if (type & SLJIT_CALL_RETURN) { + CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) == compiler->last_return); + + if (compiler->options & SLJIT_ENTER_REG_ARG) { + CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL_REG_ARG); + } else { + CHECK_ARGUMENT((type & 0xff) != SLJIT_CALL_REG_ARG); + } + } +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " i%s%s ret[%s", jump_names[type & 0xff], + !(type & SLJIT_CALL_RETURN) ? "" : ".ret", + call_arg_names[arg_types & SLJIT_ARG_MASK]); + + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) { + fprintf(compiler->verbose, "], args["); + do { + fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } while (arg_types); + } + fprintf(compiler->verbose, "], "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(type >= SLJIT_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL); + CHECK_ARGUMENT(op == SLJIT_MOV || op == SLJIT_MOV32 + || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR)); + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); + + if (type <= SLJIT_NOT_ZERO) + CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); + else + CHECK_ARGUMENT(type == (compiler->last_flags & 0xff) + || (type == SLJIT_NOT_CARRY && (compiler->last_flags & 0xff) == SLJIT_CARRY) + || (type == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW) + || CHECK_UNORDERED(type, compiler->last_flags)); + + FUNCTION_CHECK_DST(dst, dstw); + + if (GET_OPCODE(op) >= SLJIT_ADD) + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " flags.%s%s%s ", + GET_OPCODE(op) < SLJIT_OP2_BASE ? "mov" : op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], + GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_32) ? "32" : ""), + !(op & SLJIT_SET_Z) ? "" : ".z"); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", %s\n", jump_names[type]); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 cond = type & ~SLJIT_32; + + CHECK_ARGUMENT(cond >= SLJIT_EQUAL && cond <= SLJIT_ORDERED_LESS_EQUAL); + + CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg)); + if (src != SLJIT_IMM) { + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src)); + CHECK_ARGUMENT(srcw == 0); + } + + if (cond <= SLJIT_NOT_ZERO) + CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); + else + CHECK_ARGUMENT(cond == (compiler->last_flags & 0xff) + || (cond == SLJIT_NOT_CARRY && (compiler->last_flags & 0xff) == SLJIT_CARRY) + || (cond == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW) + || CHECK_UNORDERED(cond, compiler->last_flags)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " cmov%s %s, ", + !(type & SLJIT_32) ? "" : "32", + jump_names[type & ~SLJIT_32]); + sljit_verbose_reg(compiler, dst_reg); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 allowed_flags; + + if (type & SLJIT_MEM_UNALIGNED) { + CHECK_ARGUMENT(!(type & (SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32))); + } else if (type & SLJIT_MEM_UNALIGNED_16) { + CHECK_ARGUMENT(!(type & SLJIT_MEM_UNALIGNED_32)); + } else { + CHECK_ARGUMENT((reg & REG_PAIR_MASK) || (type & SLJIT_MEM_UNALIGNED_32)); + } + + allowed_flags = SLJIT_MEM_UNALIGNED; + + switch (type & 0xff) { + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + allowed_flags = SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16; + break; + case SLJIT_MOV: + case SLJIT_MOV_P: + allowed_flags = SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32; + break; + } + + CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | allowed_flags)) == 0); + + if (reg & REG_PAIR_MASK) { + CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(REG_PAIR_FIRST(reg))); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(REG_PAIR_SECOND(reg))); + CHECK_ARGUMENT(REG_PAIR_FIRST(reg) != REG_PAIR_SECOND(reg)); + } else { + CHECK_ARGUMENT((type & 0xff) >= SLJIT_MOV && (type & 0xff) <= SLJIT_MOV_P); + CHECK_ARGUMENT(!(type & SLJIT_32) || ((type & 0xff) >= SLJIT_MOV_U8 && (type & 0xff) <= SLJIT_MOV_S16)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(reg)); + } + + FUNCTION_CHECK_SRC_MEM(mem, memw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if ((type & 0xff) == SLJIT_MOV32) + fprintf(compiler->verbose, " %s32", + (type & SLJIT_MEM_STORE) ? "store" : "load"); + else + fprintf(compiler->verbose, " %s%s%s", + (type & SLJIT_MEM_STORE) ? "store" : "load", + !(type & SLJIT_32) ? "" : "32", + op1_names[(type & 0xff) - SLJIT_OP1_BASE]); + + if (type & SLJIT_MEM_UNALIGNED) + printf(".un"); + else if (type & SLJIT_MEM_UNALIGNED_16) + printf(".un16"); + else if (type & SLJIT_MEM_UNALIGNED_32) + printf(".un32"); + + if (reg & REG_PAIR_MASK) { + fprintf(compiler->verbose, " {"); + sljit_verbose_reg(compiler, REG_PAIR_FIRST(reg)); + fprintf(compiler->verbose, ", "); + sljit_verbose_reg(compiler, REG_PAIR_SECOND(reg)); + fprintf(compiler->verbose, "}, "); + } else { + fprintf(compiler->verbose, " "); + sljit_verbose_reg(compiler, reg); + fprintf(compiler->verbose, ", "); + } + sljit_verbose_param(compiler, mem, memw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT((type & 0xff) >= SLJIT_MOV && (type & 0xff) <= SLJIT_MOV_P); + CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_POST)) == 0); + CHECK_ARGUMENT((mem & REG_MASK) != 0 && (mem & REG_MASK) != reg); + + FUNCTION_CHECK_SRC_MEM(mem, memw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_MEM_SUPP) + CHECK_RETURN_OK; + if (sljit_emit_mem_update(compiler, type | SLJIT_MEM_SUPP, reg, mem, memw) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # mem: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + if ((type & 0xff) == SLJIT_MOV32) + fprintf(compiler->verbose, " %s32.%s ", + (type & SLJIT_MEM_STORE) ? "store" : "load", + (type & SLJIT_MEM_POST) ? "post" : "pre"); + else + fprintf(compiler->verbose, " %s%s%s.%s ", + (type & SLJIT_MEM_STORE) ? "store" : "load", + !(type & SLJIT_32) ? "" : "32", + op1_names[(type & 0xff) - SLJIT_OP1_BASE], + (type & SLJIT_MEM_POST) ? "post" : "pre"); + + sljit_verbose_reg(compiler, reg); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, mem, memw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV_F64); + + if (type & SLJIT_MEM_UNALIGNED) { + CHECK_ARGUMENT(!(type & (SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32))); + } else if (type & SLJIT_MEM_UNALIGNED_16) { + CHECK_ARGUMENT(!(type & SLJIT_MEM_UNALIGNED_32)); + } else { + CHECK_ARGUMENT(type & SLJIT_MEM_UNALIGNED_32); + CHECK_ARGUMENT(!(type & SLJIT_32)); + } + + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32))); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg)); + FUNCTION_CHECK_SRC_MEM(mem, memw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s.%s", + (type & SLJIT_MEM_STORE) ? "store" : "load", + !(type & SLJIT_32) ? "f64" : "f32"); + + if (type & SLJIT_MEM_UNALIGNED) + printf(".un"); + else if (type & SLJIT_MEM_UNALIGNED_16) + printf(".un16"); + else if (type & SLJIT_MEM_UNALIGNED_32) + printf(".un32"); + + fprintf(compiler->verbose, " "); + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, mem, memw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV_F64); + CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_POST)) == 0); + FUNCTION_CHECK_SRC_MEM(mem, memw); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & SLJIT_MEM_SUPP) + CHECK_RETURN_OK; + if (sljit_emit_fmem_update(compiler, type | SLJIT_MEM_SUPP, freg, mem, memw) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " # fmem: unsupported form, no instructions are emitted\n"); + CHECK_RETURN_OK; + } + + fprintf(compiler->verbose, " %s.%s.%s ", + (type & SLJIT_MEM_STORE) ? "store" : "load", + !(type & SLJIT_32) ? "f64" : "f32", + (type & SLJIT_MEM_POST) ? "post" : "pre"); + + sljit_verbose_freg(compiler, freg); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, mem, memw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; + +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) +{ + /* Any offset is allowed. */ + SLJIT_UNUSED_ARG(offset); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " local_base "); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", offset); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + SLJIT_UNUSED_ARG(init_value); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " const "); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", init_value); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " put_label "); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +#else /* !SLJIT_ARGUMENT_CHECKS && !SLJIT_VERBOSE */ + +#define SLJIT_SKIP_CHECKS(compiler) + +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_VERBOSE */ + +#define SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw) \ + SLJIT_COMPILE_ASSERT(!(SLJIT_CONV_SW_FROM_F64 & 0x1) && !(SLJIT_CONV_F64_FROM_SW & 0x1), \ + invalid_float_opcodes); \ + if (GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CMP_F64) { \ + if (GET_OPCODE(op) == SLJIT_CMP_F64) { \ + CHECK(check_sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw); \ + } \ + if ((GET_OPCODE(op) | 0x1) == SLJIT_CONV_S32_FROM_F64) { \ + CHECK(check_sljit_emit_fop1_conv_sw_from_f64(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_conv_sw_from_f64(compiler, op, dst, dstw, src, srcw); \ + } \ + CHECK(check_sljit_emit_fop1_conv_f64_from_sw(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_conv_f64_from_sw(compiler, op, dst, dstw, src, srcw); \ + } \ + CHECK(check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ + || ((defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) && !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6)) \ + || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + +static SLJIT_INLINE sljit_s32 sljit_emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + struct sljit_label *label; + struct sljit_jump *jump; + sljit_s32 op = (type & SLJIT_32) ? SLJIT_MOV32 : SLJIT_MOV; + + SLJIT_SKIP_CHECKS(compiler); + jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1); + FAIL_IF(!jump); + + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op1(compiler, op, dst_reg, 0, src, srcw)); + + SLJIT_SKIP_CHECKS(compiler); + label = sljit_emit_label(compiler); + FAIL_IF(!label); + + sljit_set_label(jump, label); + return SLJIT_SUCCESS; +} + +#endif + +#if (!(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) || (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)) \ + && !(defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + +static sljit_s32 sljit_emit_mem_unaligned(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + SLJIT_SKIP_CHECKS(compiler); + + if (type & SLJIT_MEM_STORE) + return sljit_emit_op1(compiler, type & (0xff | SLJIT_32), mem, memw, reg, 0); + return sljit_emit_op1(compiler, type & (0xff | SLJIT_32), reg, 0, mem, memw); +} + +#endif /* (!SLJIT_CONFIG_MIPS || SLJIT_MIPS_REV >= 6) && !SLJIT_CONFIG_ARM_V5 */ + +#if (!(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) || (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)) \ + && !(defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + +static sljit_s32 sljit_emit_fmem_unaligned(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + SLJIT_SKIP_CHECKS(compiler); + + if (type & SLJIT_MEM_STORE) + return sljit_emit_fop1(compiler, type & (0xff | SLJIT_32), mem, memw, freg, 0); + return sljit_emit_fop1(compiler, type & (0xff | SLJIT_32), freg, 0, mem, memw); +} + +#endif /* (!SLJIT_CONFIG_MIPS || SLJIT_MIPS_REV >= 6) && !SLJIT_CONFIG_ARM */ + +/* CPU description section */ + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) +#define SLJIT_CPUINFO_PART1 " 32bit (" +#elif (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#define SLJIT_CPUINFO_PART1 " 64bit (" +#else +#error "Internal error: CPU type info missing" +#endif + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define SLJIT_CPUINFO_PART2 "little endian + " +#elif (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) +#define SLJIT_CPUINFO_PART2 "big endian + " +#else +#error "Internal error: CPU type info missing" +#endif + +#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) +#define SLJIT_CPUINFO_PART3 "unaligned)" +#else +#define SLJIT_CPUINFO_PART3 "aligned)" +#endif + +#define SLJIT_CPUINFO SLJIT_CPUINFO_PART1 SLJIT_CPUINFO_PART2 SLJIT_CPUINFO_PART3 + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +# include "sljitNativeX86_common.c" +#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +# include "sljitNativeARM_32.c" +#elif (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +# include "sljitNativeARM_32.c" +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +# include "sljitNativeARM_T2_32.c" +#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +# include "sljitNativeARM_64.c" +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) +# include "sljitNativePPC_common.c" +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) +# include "sljitNativeMIPS_common.c" +#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) +# include "sljitNativeRISCV_common.c" +#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +# include "sljitNativeS390X.c" +#endif + +static SLJIT_INLINE sljit_s32 emit_mov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + /* At the moment the pointer size is always equal to sljit_sw. May be changed in the future. */ + if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_P)) + return SLJIT_SUCCESS; +#else + if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P)) + return SLJIT_SUCCESS; +#endif + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op1(compiler, op, SLJIT_RETURN_REG, 0, src, srcw); +} + +#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + && !((defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && defined __SOFTFP__) + +static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + if (src == SLJIT_FR0) + return SLJIT_SUCCESS; + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw); +} + +#endif /* !SLJIT_CONFIG_X86_32 && !(SLJIT_CONFIG_ARM_32 && __SOFTFP__) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + if (GET_OPCODE(op) < SLJIT_MOV_F64) { + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + } else { + FAIL_IF(emit_fmov_before_return(compiler, op, src, srcw)); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_return_void(compiler); +} + +#if !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ + && !(defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* Default compare for most architectures. */ + sljit_s32 flags, tmp_src, condition; + sljit_sw tmp_srcw; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); + + condition = type & 0xff; +#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + if ((condition == SLJIT_EQUAL || condition == SLJIT_NOT_EQUAL)) { + if ((src1 & SLJIT_IMM) && !src1w) { + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + src2w = 0; + } + if ((src2 & SLJIT_IMM) && !src2w) + return emit_cmp_to0(compiler, type, src1, src1w); + } +#endif + + if (SLJIT_UNLIKELY((src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM))) { + /* Immediate is preferred as second argument by most architectures. */ + switch (condition) { + case SLJIT_LESS: + condition = SLJIT_GREATER; + break; + case SLJIT_GREATER_EQUAL: + condition = SLJIT_LESS_EQUAL; + break; + case SLJIT_GREATER: + condition = SLJIT_LESS; + break; + case SLJIT_LESS_EQUAL: + condition = SLJIT_GREATER_EQUAL; + break; + case SLJIT_SIG_LESS: + condition = SLJIT_SIG_GREATER; + break; + case SLJIT_SIG_GREATER_EQUAL: + condition = SLJIT_SIG_LESS_EQUAL; + break; + case SLJIT_SIG_GREATER: + condition = SLJIT_SIG_LESS; + break; + case SLJIT_SIG_LESS_EQUAL: + condition = SLJIT_SIG_GREATER_EQUAL; + break; + } + + type = condition | (type & (SLJIT_32 | SLJIT_REWRITABLE_JUMP)); + tmp_src = src1; + src1 = src2; + src2 = tmp_src; + tmp_srcw = src1w; + src1w = src2w; + src2w = tmp_srcw; + } + + if (condition <= SLJIT_NOT_ZERO) + flags = SLJIT_SET_Z; + else + flags = condition << VARIABLE_FLAG_SHIFT; + + SLJIT_SKIP_CHECKS(compiler); + PTR_FAIL_IF(sljit_emit_op2u(compiler, + SLJIT_SUB | flags | (type & SLJIT_32), src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, condition | (type & (SLJIT_REWRITABLE_JUMP | SLJIT_32))); +} + +#endif /* !SLJIT_CONFIG_MIPS */ + +#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + if (type < SLJIT_UNORDERED || type > SLJIT_ORDERED_LESS_EQUAL) + return 0; + + switch (type) { + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return 0; + } + + return 1; +} + +#endif /* SLJIT_CONFIG_ARM */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | ((type & 0xff) << VARIABLE_FLAG_SHIFT) | (type & SLJIT_32), src1, src1w, src2, src2w); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +#if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \ + && !(defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw)); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(reg); + SLJIT_UNUSED_ARG(mem); + SLJIT_UNUSED_ARG(memw); + + return SLJIT_ERR_UNSUPPORTED; +} + +#endif /* !SLJIT_CONFIG_ARM && !SLJIT_CONFIG_PPC */ + +#if !(defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ + && !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + + return sljit_emit_fmem_unaligned(compiler, type, freg, mem, memw); +} + +#endif /* !SLJIT_CONFIG_ARM_32 && !SLJIT_CONFIG_MIPS */ + +#if !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + && !(defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem_update(compiler, type, freg, mem, memw)); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(mem); + SLJIT_UNUSED_ARG(memw); + + return SLJIT_ERR_UNSUPPORTED; +} + +#endif /* !SLJIT_CONFIG_ARM_64 && !SLJIT_CONFIG_PPC */ + +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) +{ + CHECK_ERROR(); + CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); + + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); + + SLJIT_SKIP_CHECKS(compiler); + + if (offset != 0) + return sljit_emit_op2(compiler, SLJIT_ADD, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); + return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_SP, 0); +} + +#endif + +#else /* SLJIT_CONFIG_UNSUPPORTED */ + +/* Empty function bodies for those machines, which are not (yet) supported. */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ + return "unsupported"; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data, void *exec_allocator_data) +{ + SLJIT_UNUSED_ARG(allocator_data); + SLJIT_UNUSED_ARG(exec_allocator_data); + SLJIT_UNREACHABLE(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(size); + SLJIT_UNREACHABLE(); + return NULL; +} + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) +SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(verbose); + SLJIT_UNREACHABLE(); +} +#endif + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNREACHABLE(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + SLJIT_UNUSED_ARG(feature_type); + SLJIT_UNREACHABLE(); + return 0; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + SLJIT_UNUSED_ARG(type); + SLJIT_UNREACHABLE(); + return 0; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) +{ + SLJIT_UNUSED_ARG(code); + SLJIT_UNUSED_ARG(exec_allocator_data); + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(options); + SLJIT_UNUSED_ARG(arg_types); + SLJIT_UNUSED_ARG(scratches); + SLJIT_UNUSED_ARG(saveds); + SLJIT_UNUSED_ARG(fscratches); + SLJIT_UNUSED_ARG(fsaveds); + SLJIT_UNUSED_ARG(local_size); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(options); + SLJIT_UNUSED_ARG(arg_types); + SLJIT_UNUSED_ARG(scratches); + SLJIT_UNUSED_ARG(saveds); + SLJIT_UNUSED_ARG(fscratches); + SLJIT_UNUSED_ARG(fsaveds); + SLJIT_UNUSED_ARG(local_size); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(src_dst); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + SLJIT_UNREACHABLE(); + return reg; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(instruction); + SLJIT_UNUSED_ARG(size); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(current_flags); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNREACHABLE(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNREACHABLE(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(arg_types); + SLJIT_UNREACHABLE(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_UNREACHABLE(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_UNREACHABLE(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) +{ + SLJIT_UNUSED_ARG(jump); + SLJIT_UNUSED_ARG(label); + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target) +{ + SLJIT_UNUSED_ARG(jump); + SLJIT_UNUSED_ARG(target); + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label) +{ + SLJIT_UNUSED_ARG(put_label); + SLJIT_UNUSED_ARG(label); + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(arg_types); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(type); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(dst_reg); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(reg); + SLJIT_UNUSED_ARG(mem); + SLJIT_UNUSED_ARG(memw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(reg); + SLJIT_UNUSED_ARG(mem); + SLJIT_UNUSED_ARG(memw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 mem, sljit_sw memw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(mem); + SLJIT_UNUSED_ARG(memw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 mem, sljit_sw memw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(freg); + SLJIT_UNUSED_ARG(mem); + SLJIT_UNUSED_ARG(memw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(offset); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw initval) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(initval); + SLJIT_UNREACHABLE(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + SLJIT_UNUSED_ARG(addr); + SLJIT_UNUSED_ARG(new_target); + SLJIT_UNUSED_ARG(executable_offset); + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + SLJIT_UNUSED_ARG(addr); + SLJIT_UNUSED_ARG(new_constant); + SLJIT_UNUSED_ARG(executable_offset); + SLJIT_UNREACHABLE(); +} + +#endif /* !SLJIT_CONFIG_UNSUPPORTED */ diff --git a/contrib/libs/pcre2/src/sljit/sljitLir.h b/contrib/libs/pcre2/src/sljit/sljitLir.h new file mode 100644 index 0000000000..a666a2643f --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitLir.h @@ -0,0 +1,1823 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SLJIT_LIR_H_ +#define SLJIT_LIR_H_ + +/* + ------------------------------------------------------------------------ + Stack-Less JIT compiler for multiple architectures (x86, ARM, PowerPC) + ------------------------------------------------------------------------ + + Short description + Advantages: + - The execution can be continued from any LIR instruction. In other + words, it is possible to jump to any label from anywhere, even from + a code fragment, which is compiled later, as long as the compiling + context is the same. See sljit_emit_enter for more details. + - Supports self modifying code: target of any jump and call + instructions and some constant values can be dynamically modified + during runtime. See SLJIT_REWRITABLE_JUMP. + - although it is not suggested to do it frequently + - can be used for inline caching: save an important value once + in the instruction stream + - A fixed stack space can be allocated for local variables + - The compiler is thread-safe + - The compiler is highly configurable through preprocessor macros. + You can disable unneeded features (multithreading in single + threaded applications), and you can use your own system functions + (including memory allocators). See sljitConfig.h. + Disadvantages: + - The compiler is more like a platform independent assembler, so + there is no built-in variable management. Registers and stack must + be managed manually (the name of the compiler refers to this). + In practice: + - This approach is very effective for interpreters + - One of the saved registers typically points to a stack interface + - It can jump to any exception handler anytime (even if it belongs + to another function) + - Hot paths can be modified during runtime reflecting the changes + of the fastest execution path of the dynamic language + - SLJIT supports complex memory addressing modes + - mainly position and context independent code (except some cases) + + For valgrind users: + - pass --smc-check=all argument to valgrind, since JIT is a "self-modifying code" +*/ + +#if (defined SLJIT_HAVE_CONFIG_PRE && SLJIT_HAVE_CONFIG_PRE) +#error #include "sljitConfigPre.h" +#endif /* SLJIT_HAVE_CONFIG_PRE */ + +#include "sljitConfig.h" + +/* The following header file defines useful macros for fine tuning +SLJIT based code generators. They are listed in the beginning +of sljitConfigInternal.h */ + +#include "sljitConfigInternal.h" + +#if (defined SLJIT_HAVE_CONFIG_POST && SLJIT_HAVE_CONFIG_POST) +#error #include "sljitConfigPost.h" +#endif /* SLJIT_HAVE_CONFIG_POST */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Version numbers. */ +#define SLJIT_MAJOR_VERSION 0 +#define SLJIT_MINOR_VERSION 95 + +/* --------------------------------------------------------------------- */ +/* Error codes */ +/* --------------------------------------------------------------------- */ + +/* Indicates no error. */ +#define SLJIT_SUCCESS 0 +/* After the call of sljit_generate_code(), the error code of the compiler + is set to this value to avoid further code generation. + The complier should be freed after sljit_generate_code(). */ +#define SLJIT_ERR_COMPILED 1 +/* Cannot allocate non-executable memory. */ +#define SLJIT_ERR_ALLOC_FAILED 2 +/* Cannot allocate executable memory. + Only sljit_generate_code() returns with this error code. */ +#define SLJIT_ERR_EX_ALLOC_FAILED 3 +/* Return value for SLJIT_CONFIG_UNSUPPORTED placeholder architecture. */ +#define SLJIT_ERR_UNSUPPORTED 4 +/* An ivalid argument is passed to any SLJIT function. */ +#define SLJIT_ERR_BAD_ARGUMENT 5 + +/* --------------------------------------------------------------------- */ +/* Registers */ +/* --------------------------------------------------------------------- */ + +/* + Scratch (R) registers: registers which may not preserve their values + across function calls. + + Saved (S) registers: registers which preserve their values across + function calls. + + The scratch and saved register sets overlap. The last scratch register + is the first saved register, the one before the last is the second saved + register, and so on. + + If an architecture provides two scratch and three saved registers, + its scratch and saved register sets are the following: + + R0 | | R0 is always a scratch register + R1 | | R1 is always a scratch register + [R2] | S2 | R2 and S2 represent the same physical register + [R3] | S1 | R3 and S1 represent the same physical register + [R4] | S0 | R4 and S0 represent the same physical register + + Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS would be 2 and + SLJIT_NUMBER_OF_SAVED_REGISTERS would be 3 for this architecture. + + Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 12 + and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 6. However, 6 registers + are virtual on x86-32. See below. + + The purpose of this definition is convenience: saved registers can + be used as extra scratch registers. For example four registers can + be specified as scratch registers and the fifth one as saved register + on the CPU above and any user code which requires four scratch + registers can run unmodified. The SLJIT compiler automatically saves + the content of the two extra scratch register on the stack. Scratch + registers can also be preserved by saving their value on the stack + but this needs to be done manually. + + Note: To emphasize that registers assigned to R2-R4 are saved + registers, they are enclosed by square brackets. + + Note: sljit_emit_enter and sljit_set_context defines whether a register + is S or R register. E.g: when 3 scratches and 1 saved is mapped + by sljit_emit_enter, the allowed register set will be: R0-R2 and + S0. Although S2 is mapped to the same position as R2, it does not + available in the current configuration. Furthermore the S1 register + is not available at all. +*/ + +/* Scratch registers. */ +#define SLJIT_R0 1 +#define SLJIT_R1 2 +#define SLJIT_R2 3 +/* Note: on x86-32, R3 - R6 (same as S3 - S6) are emulated (they + are allocated on the stack). These registers are called virtual + and cannot be used for memory addressing (cannot be part of + any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such + limitation on other CPUs. See sljit_get_register_index(). */ +#define SLJIT_R3 4 +#define SLJIT_R4 5 +#define SLJIT_R5 6 +#define SLJIT_R6 7 +#define SLJIT_R7 8 +#define SLJIT_R8 9 +#define SLJIT_R9 10 +/* All R registers provided by the architecture can be accessed by SLJIT_R(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_REGISTERS. */ +#define SLJIT_R(i) (1 + (i)) + +/* Saved registers. */ +#define SLJIT_S0 (SLJIT_NUMBER_OF_REGISTERS) +#define SLJIT_S1 (SLJIT_NUMBER_OF_REGISTERS - 1) +#define SLJIT_S2 (SLJIT_NUMBER_OF_REGISTERS - 2) +/* Note: on x86-32, S3 - S6 (same as R3 - R6) are emulated (they + are allocated on the stack). These registers are called virtual + and cannot be used for memory addressing (cannot be part of + any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such + limitation on other CPUs. See sljit_get_register_index(). */ +#define SLJIT_S3 (SLJIT_NUMBER_OF_REGISTERS - 3) +#define SLJIT_S4 (SLJIT_NUMBER_OF_REGISTERS - 4) +#define SLJIT_S5 (SLJIT_NUMBER_OF_REGISTERS - 5) +#define SLJIT_S6 (SLJIT_NUMBER_OF_REGISTERS - 6) +#define SLJIT_S7 (SLJIT_NUMBER_OF_REGISTERS - 7) +#define SLJIT_S8 (SLJIT_NUMBER_OF_REGISTERS - 8) +#define SLJIT_S9 (SLJIT_NUMBER_OF_REGISTERS - 9) +/* All S registers provided by the architecture can be accessed by SLJIT_S(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_REGISTERS. */ +#define SLJIT_S(i) (SLJIT_NUMBER_OF_REGISTERS - (i)) + +/* Registers >= SLJIT_FIRST_SAVED_REG are saved registers. */ +#define SLJIT_FIRST_SAVED_REG (SLJIT_S0 - SLJIT_NUMBER_OF_SAVED_REGISTERS + 1) + +/* The SLJIT_SP provides direct access to the linear stack space allocated by + sljit_emit_enter. It can only be used in the following form: SLJIT_MEM1(SLJIT_SP). + The immediate offset is extended by the relative stack offset automatically. + The sljit_get_local_base can be used to obtain the real address of a value. */ +#define SLJIT_SP (SLJIT_NUMBER_OF_REGISTERS + 1) + +/* Return with machine word. */ + +#define SLJIT_RETURN_REG SLJIT_R0 + +/* --------------------------------------------------------------------- */ +/* Floating point registers */ +/* --------------------------------------------------------------------- */ + +/* Each floating point register can store a 32 or a 64 bit precision + value. The FR and FS register sets are overlap in the same way as R + and S register sets. See above. */ + +/* Floating point scratch registers. */ +#define SLJIT_FR0 1 +#define SLJIT_FR1 2 +#define SLJIT_FR2 3 +#define SLJIT_FR3 4 +#define SLJIT_FR4 5 +#define SLJIT_FR5 6 +/* All FR registers provided by the architecture can be accessed by SLJIT_FR(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_FLOAT_REGISTERS. */ +#define SLJIT_FR(i) (1 + (i)) + +/* Floating point saved registers. */ +#define SLJIT_FS0 (SLJIT_NUMBER_OF_FLOAT_REGISTERS) +#define SLJIT_FS1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 1) +#define SLJIT_FS2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2) +#define SLJIT_FS3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 3) +#define SLJIT_FS4 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 4) +#define SLJIT_FS5 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 5) +/* All S registers provided by the architecture can be accessed by SLJIT_FS(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS. */ +#define SLJIT_FS(i) (SLJIT_NUMBER_OF_FLOAT_REGISTERS - (i)) + +/* Float registers >= SLJIT_FIRST_SAVED_FLOAT_REG are saved registers. */ +#define SLJIT_FIRST_SAVED_FLOAT_REG (SLJIT_FS0 - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + 1) + +/* Return with floating point arg. */ + +#define SLJIT_RETURN_FREG SLJIT_FR0 + +/* --------------------------------------------------------------------- */ +/* Argument type definitions */ +/* --------------------------------------------------------------------- */ + +/* The following argument type definitions are used by sljit_emit_enter, + sljit_set_context, sljit_emit_call, and sljit_emit_icall functions. + + As for sljit_emit_call and sljit_emit_icall, the first integer argument + must be placed into SLJIT_R0, the second one into SLJIT_R1, and so on. + Similarly the first floating point argument must be placed into SLJIT_FR0, + the second one into SLJIT_FR1, and so on. + + As for sljit_emit_enter, the integer arguments can be stored in scratch + or saved registers. The first integer argument without _R postfix is + stored in SLJIT_S0, the next one in SLJIT_S1, and so on. The integer + arguments with _R postfix are placed into scratch registers. The index + of the scratch register is the count of the previous integer arguments + starting from SLJIT_R0. The floating point arguments are always placed + into SLJIT_FR0, SLJIT_FR1, and so on. + + Note: if a function is called by sljit_emit_call/sljit_emit_icall and + an argument is stored in a scratch register by sljit_emit_enter, + that argument uses the same scratch register index for both + integer and floating point arguments. + + Example function definition: + sljit_f32 SLJIT_FUNC example_c_callback(void *arg_a, + sljit_f64 arg_b, sljit_u32 arg_c, sljit_f32 arg_d); + + Argument type definition: + SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_F32) + | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_P, 1) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F64, 2) + | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_32, 3) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 4) + + Short form of argument type definition: + SLJIT_ARGS4(32, P, F64, 32, F32) + + Argument passing: + arg_a must be placed in SLJIT_R0 + arg_c must be placed in SLJIT_R1 + arg_b must be placed in SLJIT_FR0 + arg_d must be placed in SLJIT_FR1 + + Examples for argument processing by sljit_emit_enter: + SLJIT_ARGS4(VOID, P, 32_R, F32, W) + Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_FR0, SLJIT_S1 + + SLJIT_ARGS4(VOID, W, W_R, W, W_R) + Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_S1, SLJIT_R3 + + SLJIT_ARGS4(VOID, F64, W, F32, W_R) + Arguments are placed into: SLJIT_FR0, SLJIT_S0, SLJIT_FR1, SLJIT_R1 + + Note: it is recommended to pass the scratch arguments first + followed by the saved arguments: + + SLJIT_ARGS4(VOID, W_R, W_R, W, W) + Arguments are placed into: SLJIT_R0, SLJIT_R1, SLJIT_S0, SLJIT_S1 +*/ + +/* The following flag is only allowed for the integer arguments of + sljit_emit_enter. When the flag is set, the integer argument is + stored in a scratch register instead of a saved register. */ +#define SLJIT_ARG_TYPE_SCRATCH_REG 0x8 + +/* Void result, can only be used by SLJIT_ARG_RETURN. */ +#define SLJIT_ARG_TYPE_VOID 0 +/* Machine word sized integer argument or result. */ +#define SLJIT_ARG_TYPE_W 1 +#define SLJIT_ARG_TYPE_W_R (SLJIT_ARG_TYPE_W | SLJIT_ARG_TYPE_SCRATCH_REG) +/* 32 bit integer argument or result. */ +#define SLJIT_ARG_TYPE_32 2 +#define SLJIT_ARG_TYPE_32_R (SLJIT_ARG_TYPE_32 | SLJIT_ARG_TYPE_SCRATCH_REG) +/* Pointer sized integer argument or result. */ +#define SLJIT_ARG_TYPE_P 3 +#define SLJIT_ARG_TYPE_P_R (SLJIT_ARG_TYPE_P | SLJIT_ARG_TYPE_SCRATCH_REG) +/* 64 bit floating point argument or result. */ +#define SLJIT_ARG_TYPE_F64 4 +/* 32 bit floating point argument or result. */ +#define SLJIT_ARG_TYPE_F32 5 + +#define SLJIT_ARG_SHIFT 4 +#define SLJIT_ARG_RETURN(type) (type) +#define SLJIT_ARG_VALUE(type, idx) ((type) << ((idx) * SLJIT_ARG_SHIFT)) + +/* Simplified argument list definitions. + + The following definition: + SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_W) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 1) + + can be shortened to: + SLJIT_ARGS1(W, F32) +*/ + +#define SLJIT_ARG_TO_TYPE(type) SLJIT_ARG_TYPE_ ## type + +#define SLJIT_ARGS0(ret) \ + SLJIT_ARG_RETURN(SLJIT_ARG_TO_TYPE(ret)) + +#define SLJIT_ARGS1(ret, arg1) \ + (SLJIT_ARGS0(ret) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1)) + +#define SLJIT_ARGS2(ret, arg1, arg2) \ + (SLJIT_ARGS1(ret, arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2)) + +#define SLJIT_ARGS3(ret, arg1, arg2, arg3) \ + (SLJIT_ARGS2(ret, arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3)) + +#define SLJIT_ARGS4(ret, arg1, arg2, arg3, arg4) \ + (SLJIT_ARGS3(ret, arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4)) + +/* --------------------------------------------------------------------- */ +/* Main structures and functions */ +/* --------------------------------------------------------------------- */ + +/* + The following structures are private, and can be changed in the + future. Keeping them here allows code inlining. +*/ + +struct sljit_memory_fragment { + struct sljit_memory_fragment *next; + sljit_uw used_size; + /* Must be aligned to sljit_sw. */ + sljit_u8 memory[1]; +}; + +struct sljit_label { + struct sljit_label *next; + sljit_uw addr; + /* The maximum size difference. */ + sljit_uw size; +}; + +struct sljit_jump { + struct sljit_jump *next; + sljit_uw addr; + /* Architecture dependent flags. */ + sljit_uw flags; + union { + sljit_uw target; + struct sljit_label *label; + } u; +}; + +struct sljit_put_label { + struct sljit_put_label *next; + struct sljit_label *label; + sljit_uw addr; + sljit_uw flags; +}; + +struct sljit_const { + struct sljit_const *next; + sljit_uw addr; +}; + +struct sljit_compiler { + sljit_s32 error; + sljit_s32 options; + + struct sljit_label *labels; + struct sljit_jump *jumps; + struct sljit_put_label *put_labels; + struct sljit_const *consts; + struct sljit_label *last_label; + struct sljit_jump *last_jump; + struct sljit_const *last_const; + struct sljit_put_label *last_put_label; + + void *allocator_data; + void *exec_allocator_data; + struct sljit_memory_fragment *buf; + struct sljit_memory_fragment *abuf; + + /* Available scratch registers. */ + sljit_s32 scratches; + /* Available saved registers. */ + sljit_s32 saveds; + /* Available float scratch registers. */ + sljit_s32 fscratches; + /* Available float saved registers. */ + sljit_s32 fsaveds; + /* Local stack size. */ + sljit_s32 local_size; + /* Maximum code size. */ + sljit_uw size; + /* Relative offset of the executable mapping from the writable mapping. */ + sljit_sw executable_offset; + /* Executable size for statistical purposes. */ + sljit_uw executable_size; + +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + sljit_s32 status_flags_state; +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 args_size; +#endif + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_s32 mode32; +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + /* Constant pool handling. */ + sljit_uw *cpool; + sljit_u8 *cpool_unique; + sljit_uw cpool_diff; + sljit_uw cpool_fill; + /* Other members. */ + /* Contains pointer, "ldr pc, [...]" pairs. */ + sljit_uw patches; +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + /* Temporary fields. */ + sljit_uw shift_imm; +#endif /* SLJIT_CONFIG_ARM_V5 || SLJIT_CONFIG_ARM_V7 */ + +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__) + sljit_uw args_size; +#endif + +#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + sljit_u32 imm; +#endif + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + sljit_s32 delay_slot; + sljit_s32 cache_arg; + sljit_sw cache_argw; +#endif + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + sljit_uw args_size; +#endif + +#if (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) + sljit_s32 cache_arg; + sljit_sw cache_argw; +#endif + +#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + /* Need to allocate register save area to make calls. */ + sljit_s32 mode; +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + FILE* verbose; +#endif + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + /* Flags specified by the last arithmetic instruction. + It contains the type of the variable flag. */ + sljit_s32 last_flags; + /* Return value type set by entry functions. */ + sljit_s32 last_return; + /* Local size passed to entry functions. */ + sljit_s32 logical_local_size; +#endif + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + /* Trust arguments when an API function is called. + Used internally for calling API functions. */ + sljit_s32 skip_checks; +#endif +}; + +/* --------------------------------------------------------------------- */ +/* Main functions */ +/* --------------------------------------------------------------------- */ + +/* Creates an SLJIT compiler. The allocator_data is required by some + custom memory managers. This pointer is passed to SLJIT_MALLOC + and SLJIT_FREE macros. Most allocators (including the default + one) ignores this value, and it is recommended to pass NULL + as a dummy value for allocator_data. The exec_allocator_data + has the same purpose but this one is passed to SLJIT_MALLOC_EXEC / + SLJIT_MALLOC_FREE functions. + + Returns NULL if failed. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data, void *exec_allocator_data); + +/* Frees everything except the compiled machine code. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler); + +/* Returns the current error code. If an error occurres, future calls + which uses the same compiler argument returns early with the same + error code. Thus there is no need for checking the error after every + call, it is enough to do it after the code is compiled. Removing + these checks increases the performance of the compiling process. */ +static SLJIT_INLINE sljit_s32 sljit_get_compiler_error(struct sljit_compiler *compiler) { return compiler->error; } + +/* Sets the compiler error code to SLJIT_ERR_ALLOC_FAILED except + if an error was detected before. After the error code is set + the compiler behaves as if the allocation failure happened + during an SLJIT function call. This can greatly simplify error + checking, since it is enough to check the compiler status + after the code is compiled. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler); + +/* + Allocate a small amount of memory. The size must be <= 64 bytes on 32 bit, + and <= 128 bytes on 64 bit architectures. The memory area is owned by the + compiler, and freed by sljit_free_compiler. The returned pointer is + sizeof(sljit_sw) aligned. Excellent for allocating small blocks during + compiling, and no need to worry about freeing them. The size is enough + to contain at most 16 pointers. If the size is outside of the range, + the function will return with NULL. However, this return value does not + indicate that there is no more memory (does not set the current error code + of the compiler to out-of-memory status). +*/ +SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) +/* Passing NULL disables verbose. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose); +#endif + +/* + Create executable code from the instruction stream. This is the final step + of the code generation so no more instructions can be emitted after this call. +*/ + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler); + +/* Free executable code. */ + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data); + +/* + When the protected executable allocator is used the JIT code is mapped + twice. The first mapping has read/write and the second mapping has read/exec + permissions. This function returns with the relative offset of the executable + mapping using the writable mapping as the base after the machine code is + successfully generated. The returned value is always 0 for the normal executable + allocator, since it uses only one mapping with read/write/exec permissions. + Dynamic code modifications requires this value. + + Before a successful code generation, this function returns with 0. +*/ +static SLJIT_INLINE sljit_sw sljit_get_executable_offset(struct sljit_compiler *compiler) { return compiler->executable_offset; } + +/* + The executable memory consumption of the generated code can be retrieved by + this function. The returned value can be used for statistical purposes. + + Before a successful code generation, this function returns with 0. +*/ +static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; } + +/* Returns with non-zero if the feature or limitation type passed as its + argument is present on the current CPU. The return value is one, if a + feature is fully supported, and it is two, if partially supported. + + Some features (e.g. floating point operations) require hardware (CPU) + support while others (e.g. move with update) are emulated if not available. + However, even when a feature is emulated, specialized code paths may be + faster than the emulation. Some limitations are emulated as well so their + general case is supported but it has extra performance costs. */ + +/* [Not emulated] Floating-point support is available. */ +#define SLJIT_HAS_FPU 0 +/* [Limitation] Some registers are virtual registers. */ +#define SLJIT_HAS_VIRTUAL_REGISTERS 1 +/* [Emulated] Has zero register (setting a memory location to zero is efficient). */ +#define SLJIT_HAS_ZERO_REGISTER 2 +/* [Emulated] Count leading zero is supported. */ +#define SLJIT_HAS_CLZ 3 +/* [Emulated] Count trailing zero is supported. */ +#define SLJIT_HAS_CTZ 4 +/* [Emulated] Rotate left/right is supported. */ +#define SLJIT_HAS_ROT 5 +/* [Emulated] Conditional move is supported. */ +#define SLJIT_HAS_CMOV 6 +/* [Emulated] Prefetch instruction is available (emulated as a nop). */ +#define SLJIT_HAS_PREFETCH 7 + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +/* [Not emulated] SSE2 support is available on x86. */ +#define SLJIT_HAS_SSE2 100 +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type); + +/* If type is between SLJIT_ORDERED_EQUAL and SLJIT_ORDERED_LESS_EQUAL, + sljit_cmp_info returns one, if the cpu supports the passed floating + point comparison type. + + If type is SLJIT_UNORDERED or SLJIT_ORDERED, sljit_cmp_info returns + one, if the cpu supports checking the unordered comparison result + regardless of the comparison type passed to the comparison instruction. + The returned value is always one, if there is at least one type between + SLJIT_ORDERED_EQUAL and SLJIT_ORDERED_LESS_EQUAL where sljit_cmp_info + returns with a zero value. + + Otherwise it returns zero. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type); + +/* The following functions generate machine code. If there is no + error, they return with SLJIT_SUCCESS, otherwise they return + with an error code. */ + +/* + The executable code is a function from the viewpoint of the C + language. The function calls must obey to the ABI (Application + Binary Interface) of the platform, which specify the purpose of + machine registers and stack handling among other things. The + sljit_emit_enter function emits the necessary instructions for + setting up a new context for the executable code. This is often + called as function prologue. Furthermore the options argument + can be used to pass configuration options to the compiler. The + available options are listed before sljit_emit_enter. + + The function argument list is specified by the SLJIT_ARGSx + (SLJIT_ARGS0 .. SLJIT_ARGS4) macros. Currently maximum four + arguments are supported. See the description of SLJIT_ARGSx + macros about argument passing. Furthermore the register set + used by the function must be declared as well. The number of + scratch and saved registers available to the function must + be passed to sljit_emit_enter. Only R registers between R0 + and "scratches" argument can be used later. E.g. if "scratches" + is set to two, the scratch register set will be limited to + SLJIT_R0 and SLJIT_R1. The S registers and the floating point + registers ("fscratches" and "fsaveds") are specified in a + similar manner. The sljit_emit_enter is also capable of + allocating a stack space for local data. The "local_size" + argument contains the size in bytes of this local area, and + it can be accessed using SLJIT_MEM1(SLJIT_SP). The memory + area between SLJIT_SP (inclusive) and SLJIT_SP + local_size + (exclusive) can be modified freely until the function returns. + The stack space is not initialized to zero. + + Note: the following conditions must met: + 0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS + 0 <= saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS + scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS + 0 <= fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + 0 <= fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + + Note: the compiler can use saved registers as scratch registers, + but the opposite is not supported + + Note: every call of sljit_emit_enter and sljit_set_context + overwrites the previous context. +*/ + +/* Saved registers between SLJIT_S0 and SLJIT_S(n - 1) (inclusive) + are not saved / restored on function enter / return. Instead, + these registers can be used to pass / return data (such as + global / local context pointers) across function calls. The + value of n must be between 1 and 3. This option is only + supported by SLJIT_ENTER_REG_ARG calling convention. */ +#define SLJIT_ENTER_KEEP(n) (n) + +/* The compiled function uses an SLJIT specific register argument + calling convention. This is a lightweight function call type where + both the caller and the called functions must be compiled by + SLJIT. The type argument of the call must be SLJIT_CALL_REG_ARG + and all arguments must be stored in scratch registers. */ +#define SLJIT_ENTER_REG_ARG 0x00000004 + +/* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ +#define SLJIT_MAX_LOCAL_SIZE 65536 + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size); + +/* The SLJIT compiler has a current context (which contains the local + stack space size, number of used registers, etc.) which is initialized + by sljit_emit_enter. Several functions (such as sljit_emit_return) + requires this context to be able to generate the appropriate code. + However, some code fragments (compiled separately) may have no + normal entry point so their context is unknown for the compiler. + + The sljit_set_context and sljit_emit_enter have the same arguments, + but sljit_set_context does not generate any machine code. + + Note: every call of sljit_emit_enter and sljit_set_context overwrites + the previous context. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size); + +/* Return to the caller function. The sljit_emit_return_void function + does not return with any value. The sljit_emit_return function returns + with a single value loaded from its source operand. The load operation + can be between SLJIT_MOV and SLJIT_MOV_P (see sljit_emit_op1) and + SLJIT_MOV_F32/SLJIT_MOV_F64 (see sljit_emit_fop1) depending on the + return value specified by sljit_emit_enter/sljit_set_context. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler); + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw); + +/* Restores the saved registers and free the stack area, then the execution + continues from the address specified by the source operand. This + operation is similar to sljit_emit_return, but it ignores the return + address. The code where the exection continues should use the same context + as the caller function (see sljit_set_context). A word (pointer) value + can be passed in the SLJIT_RETURN_REG register. This function can be used + to jump to exception handlers. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw); + +/* Generating entry and exit points for fast call functions (see SLJIT_FAST_CALL). + Both sljit_emit_fast_enter and SLJIT_FAST_RETURN operations preserve the + values of all registers and stack frame. The return address is stored in the + dst argument of sljit_emit_fast_enter, and this return address can be passed + to SLJIT_FAST_RETURN to continue the execution after the fast call. + + Fast calls are cheap operations (usually only a single call instruction is + emitted) but they do not preserve any registers. However the callee function + can freely use / update any registers and the local area which can be + efficiently exploited by various optimizations. Registers can be saved + and restored manually if needed. + + Although returning to different address by SLJIT_FAST_RETURN is possible, + this address usually cannot be predicted by the return address predictor of + modern CPUs which may reduce performance. Furthermore certain security + enhancement technologies such as Intel Control-flow Enforcement Technology + (CET) may disallow returning to a different address. + + Flags: - (does not modify flags). */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw); + +/* + Source and destination operands for arithmetical instructions + imm - a simple immediate value (cannot be used as a destination) + reg - any of the available registers (immediate argument must be 0) + [imm] - absolute memory address + [reg+imm] - indirect memory address + [reg+(reg<<imm)] - indirect indexed memory address (shift must be between 0 and 3) + useful for accessing arrays (fully supported by both x86 and + ARM architectures, and cheap operation on others) +*/ + +/* + IMPORTANT NOTE: memory accesses MUST be naturally aligned unless + SLJIT_UNALIGNED macro is defined and its value is 1. + + length | alignment + ---------+----------- + byte | 1 byte (any physical_address is accepted) + half | 2 byte (physical_address & 0x1 == 0) + int | 4 byte (physical_address & 0x3 == 0) + word | 4 byte if SLJIT_32BIT_ARCHITECTURE is defined and its value is 1 + | 8 byte if SLJIT_64BIT_ARCHITECTURE is defined and its value is 1 + pointer | size of sljit_p type (4 byte on 32 bit machines, 4 or 8 byte + | on 64 bit machines) + + Note: Different architectures have different addressing limitations. + A single instruction is enough for the following addressing + modes. Other adrressing modes are emulated by instruction + sequences. This information could help to improve those code + generators which focuses only a few architectures. + + x86: [reg+imm], -2^32+1 <= imm <= 2^32-1 (full address space on x86-32) + [reg+(reg<<imm)] is supported + [imm], -2^32+1 <= imm <= 2^32-1 is supported + Write-back is not supported + arm: [reg+imm], -4095 <= imm <= 4095 or -255 <= imm <= 255 for signed + bytes, any halfs or floating point values) + [reg+(reg<<imm)] is supported + Write-back is supported + arm-t2: [reg+imm], -255 <= imm <= 4095 + [reg+(reg<<imm)] is supported + Write back is supported only for [reg+imm], where -255 <= imm <= 255 + arm64: [reg+imm], -256 <= imm <= 255, 0 <= aligned imm <= 4095 * alignment + [reg+(reg<<imm)] is supported + Write back is supported only for [reg+imm], where -256 <= imm <= 255 + ppc: [reg+imm], -65536 <= imm <= 65535. 64 bit loads/stores and 32 bit + signed load on 64 bit requires immediates divisible by 4. + [reg+imm] is not supported for signed 8 bit values. + [reg+reg] is supported + Write-back is supported except for one instruction: 32 bit signed + load with [reg+imm] addressing mode on 64 bit. + mips: [reg+imm], -65536 <= imm <= 65535 + Write-back is not supported + riscv: [reg+imm], -2048 <= imm <= 2047 + Write-back is not supported + s390x: [reg+imm], -2^19 <= imm < 2^19 + [reg+reg] is supported + Write-back is not supported +*/ + +/* Macros for specifying operand types. */ +#define SLJIT_MEM 0x80 +#define SLJIT_MEM0() (SLJIT_MEM) +#define SLJIT_MEM1(r1) (SLJIT_MEM | (r1)) +#define SLJIT_MEM2(r1, r2) (SLJIT_MEM | (r1) | ((r2) << 8)) +#define SLJIT_IMM 0x40 +#define SLJIT_REG_PAIR(r1, r2) ((r1) | ((r2) << 8)) + +/* Sets 32 bit operation mode on 64 bit CPUs. This option is ignored on + 32 bit CPUs. When this option is set for an arithmetic operation, only + the lower 32 bits of the input registers are used, and the CPU status + flags are set according to the 32 bit result. Although the higher 32 bit + of the input and the result registers are not defined by SLJIT, it might + be defined by the CPU architecture (e.g. MIPS). To satisfy these CPU + requirements all source registers must be the result of those operations + where this option was also set. Memory loads read 32 bit values rather + than 64 bit ones. In other words 32 bit and 64 bit operations cannot be + mixed. The only exception is SLJIT_MOV32 which source register can hold + any 32 or 64 bit value, and it is converted to a 32 bit compatible format + first. When the source and destination registers are the same, this + conversion is free (no instructions are emitted) on most CPUs. A 32 bit + value can also be converted to a 64 bit value by SLJIT_MOV_S32 + (sign extension) or SLJIT_MOV_U32 (zero extension). + + As for floating-point operations, this option sets 32 bit single + precision mode. Similar to the integer operations, all register arguments + must be the result of those operations where this option was also set. + + Note: memory addressing always uses 64 bit values on 64 bit systems so + the result of a 32 bit operation must not be used with SLJIT_MEMx + macros. + + This option is part of the instruction name, so there is no need to + manually set it. E.g: + + SLJIT_ADD32 == (SLJIT_ADD | SLJIT_32) */ +#define SLJIT_32 0x100 + +/* Many CPUs (x86, ARM, PPC) have status flag bits which can be set according + to the result of an operation. Other CPUs (MIPS) do not have status + flag bits, and results must be stored in registers. To cover both + architecture types efficiently only two flags are defined by SLJIT: + + * Zero (equal) flag: it is set if the result is zero + * Variable flag: its value is defined by the arithmetic operation + + SLJIT instructions can set any or both of these flags. The value of + these flags is undefined if the instruction does not specify their + value. The description of each instruction contains the list of + allowed flag types. + + Note: the logical or operation can be used to set flags. + + Example: SLJIT_ADD can set the Z, OVERFLOW, CARRY flags hence + + sljit_op2(..., SLJIT_ADD, ...) + Both the zero and variable flags are undefined so they can + have any value after the operation is completed. + + sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...) + Sets the zero flag if the result is zero, clears it otherwise. + The variable flag is undefined. + + sljit_op2(..., SLJIT_ADD | SLJIT_SET_OVERFLOW, ...) + Sets the variable flag if an integer overflow occurs, clears + it otherwise. The zero flag is undefined. + + sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z | SLJIT_SET_CARRY, ...) + Sets the zero flag if the result is zero, clears it otherwise. + Sets the variable flag if unsigned overflow (carry) occurs, + clears it otherwise. + + Certain instructions (e.g. SLJIT_MOV) does not modify flags, so + status flags are unchanged. + + Example: + + sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...) + sljit_op1(..., SLJIT_MOV, ...) + Zero flag is set according to the result of SLJIT_ADD. + + sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...) + sljit_op2(..., SLJIT_ADD, ...) + Zero flag has unknown value. + + These flags can be used for code optimization. E.g. a fast loop can be + implemented by decreasing a counter register and set the zero flag + using a single instruction. The zero register can be used by a + conditional jump to restart the loop. A single comparison can set a + zero and less flags to check if a value is less, equal, or greater + than another value. + + Motivation: although some CPUs can set a large number of flag bits, + usually their values are ignored or only a few of them are used. Emulating + a large number of flags on systems without a flag register is complicated + so SLJIT instructions must specify the flag they want to use and only + that flag is computed. The last arithmetic instruction can be repeated if + multiple flags need to be checked. +*/ + +/* Set Zero status flag. */ +#define SLJIT_SET_Z 0x0200 +/* Set the variable status flag if condition is true. + See comparison types (e.g. SLJIT_SET_LESS, SLJIT_SET_F_EQUAL). */ +#define SLJIT_SET(condition) ((condition) << 10) + +/* Starting index of opcodes for sljit_emit_op0. */ +#define SLJIT_OP0_BASE 0 + +/* Flags: - (does not modify flags) + Note: breakpoint instruction is not supported by all architectures (e.g. ppc) + It falls back to SLJIT_NOP in those cases. */ +#define SLJIT_BREAKPOINT (SLJIT_OP0_BASE + 0) +/* Flags: - (does not modify flags) + Note: may or may not cause an extra cycle wait + it can even decrease the runtime in a few cases. */ +#define SLJIT_NOP (SLJIT_OP0_BASE + 1) +/* Flags: - (may destroy flags) + Unsigned multiplication of SLJIT_R0 and SLJIT_R1. + Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */ +#define SLJIT_LMUL_UW (SLJIT_OP0_BASE + 2) +/* Flags: - (may destroy flags) + Signed multiplication of SLJIT_R0 and SLJIT_R1. + Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */ +#define SLJIT_LMUL_SW (SLJIT_OP0_BASE + 3) +/* Flags: - (may destroy flags) + Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. + Note: if SLJIT_R1 is 0, the behaviour is undefined. */ +#define SLJIT_DIVMOD_UW (SLJIT_OP0_BASE + 4) +#define SLJIT_DIVMOD_U32 (SLJIT_DIVMOD_UW | SLJIT_32) +/* Flags: - (may destroy flags) + Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. + Note: if SLJIT_R1 is 0, the behaviour is undefined. + Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), + the behaviour is undefined. */ +#define SLJIT_DIVMOD_SW (SLJIT_OP0_BASE + 5) +#define SLJIT_DIVMOD_S32 (SLJIT_DIVMOD_SW | SLJIT_32) +/* Flags: - (may destroy flags) + Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. + Note: if SLJIT_R1 is 0, the behaviour is undefined. */ +#define SLJIT_DIV_UW (SLJIT_OP0_BASE + 6) +#define SLJIT_DIV_U32 (SLJIT_DIV_UW | SLJIT_32) +/* Flags: - (may destroy flags) + Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. + Note: if SLJIT_R1 is 0, the behaviour is undefined. + Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), + the behaviour is undefined. */ +#define SLJIT_DIV_SW (SLJIT_OP0_BASE + 7) +#define SLJIT_DIV_S32 (SLJIT_DIV_SW | SLJIT_32) +/* Flags: - (does not modify flags) + ENDBR32 instruction for x86-32 and ENDBR64 instruction for x86-64 + when Intel Control-flow Enforcement Technology (CET) is enabled. + No instructions are emitted for other architectures. */ +#define SLJIT_ENDBR (SLJIT_OP0_BASE + 8) +/* Flags: - (may destroy flags) + Skip stack frames before return when Intel Control-flow + Enforcement Technology (CET) is enabled. No instructions + are emitted for other architectures. */ +#define SLJIT_SKIP_FRAMES_BEFORE_RETURN (SLJIT_OP0_BASE + 9) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op); + +/* Starting index of opcodes for sljit_emit_op1. */ +#define SLJIT_OP1_BASE 32 + +/* The MOV instruction transfers data from source to destination. + + MOV instruction suffixes: + + U8 - unsigned 8 bit data transfer + S8 - signed 8 bit data transfer + U16 - unsigned 16 bit data transfer + S16 - signed 16 bit data transfer + U32 - unsigned int (32 bit) data transfer + S32 - signed int (32 bit) data transfer + P - pointer (sljit_p) data transfer +*/ + +/* Flags: - (does not modify flags) */ +#define SLJIT_MOV (SLJIT_OP1_BASE + 0) +/* Flags: - (does not modify flags) */ +#define SLJIT_MOV_U8 (SLJIT_OP1_BASE + 1) +#define SLJIT_MOV32_U8 (SLJIT_MOV_U8 | SLJIT_32) +/* Flags: - (does not modify flags) */ +#define SLJIT_MOV_S8 (SLJIT_OP1_BASE + 2) +#define SLJIT_MOV32_S8 (SLJIT_MOV_S8 | SLJIT_32) +/* Flags: - (does not modify flags) */ +#define SLJIT_MOV_U16 (SLJIT_OP1_BASE + 3) +#define SLJIT_MOV32_U16 (SLJIT_MOV_U16 | SLJIT_32) +/* Flags: - (does not modify flags) */ +#define SLJIT_MOV_S16 (SLJIT_OP1_BASE + 4) +#define SLJIT_MOV32_S16 (SLJIT_MOV_S16 | SLJIT_32) +/* Flags: - (does not modify flags) + Note: no SLJIT_MOV32_U32 form, since it is the same as SLJIT_MOV32 */ +#define SLJIT_MOV_U32 (SLJIT_OP1_BASE + 5) +/* Flags: - (does not modify flags) + Note: no SLJIT_MOV32_S32 form, since it is the same as SLJIT_MOV32 */ +#define SLJIT_MOV_S32 (SLJIT_OP1_BASE + 6) +/* Flags: - (does not modify flags) */ +#define SLJIT_MOV32 (SLJIT_OP1_BASE + 7) +/* Flags: - (does not modify flags) + Note: loads a pointer sized data, useful on x32 mode (a 64 bit mode + on x86-64 which uses 32 bit pointers) or similar compiling modes */ +#define SLJIT_MOV_P (SLJIT_OP1_BASE + 8) +/* Flags: Z + Note: immediate source argument is not supported */ +#define SLJIT_NOT (SLJIT_OP1_BASE + 9) +#define SLJIT_NOT32 (SLJIT_NOT | SLJIT_32) +/* Count leading zeroes + Flags: - (may destroy flags) + Note: immediate source argument is not supported */ +#define SLJIT_CLZ (SLJIT_OP1_BASE + 10) +#define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_32) +/* Count trailing zeroes + Flags: - (may destroy flags) + Note: immediate source argument is not supported */ +#define SLJIT_CTZ (SLJIT_OP1_BASE + 11) +#define SLJIT_CTZ32 (SLJIT_CTZ | SLJIT_32) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw); + +/* Starting index of opcodes for sljit_emit_op2. */ +#define SLJIT_OP2_BASE 96 + +/* Flags: Z | OVERFLOW | CARRY */ +#define SLJIT_ADD (SLJIT_OP2_BASE + 0) +#define SLJIT_ADD32 (SLJIT_ADD | SLJIT_32) +/* Flags: CARRY */ +#define SLJIT_ADDC (SLJIT_OP2_BASE + 1) +#define SLJIT_ADDC32 (SLJIT_ADDC | SLJIT_32) +/* Flags: Z | LESS | GREATER_EQUAL | GREATER | LESS_EQUAL + SIG_LESS | SIG_GREATER_EQUAL | SIG_GREATER + SIG_LESS_EQUAL | OVERFLOW | CARRY */ +#define SLJIT_SUB (SLJIT_OP2_BASE + 2) +#define SLJIT_SUB32 (SLJIT_SUB | SLJIT_32) +/* Flags: CARRY */ +#define SLJIT_SUBC (SLJIT_OP2_BASE + 3) +#define SLJIT_SUBC32 (SLJIT_SUBC | SLJIT_32) +/* Note: integer mul + Flags: OVERFLOW */ +#define SLJIT_MUL (SLJIT_OP2_BASE + 4) +#define SLJIT_MUL32 (SLJIT_MUL | SLJIT_32) +/* Flags: Z */ +#define SLJIT_AND (SLJIT_OP2_BASE + 5) +#define SLJIT_AND32 (SLJIT_AND | SLJIT_32) +/* Flags: Z */ +#define SLJIT_OR (SLJIT_OP2_BASE + 6) +#define SLJIT_OR32 (SLJIT_OR | SLJIT_32) +/* Flags: Z */ +#define SLJIT_XOR (SLJIT_OP2_BASE + 7) +#define SLJIT_XOR32 (SLJIT_XOR | SLJIT_32) +/* Flags: Z + Let bit_length be the length of the shift operation: 32 or 64. + If src2 is immediate, src2w is masked by (bit_length - 1). + Otherwise, if the content of src2 is outside the range from 0 + to bit_length - 1, the result is undefined. */ +#define SLJIT_SHL (SLJIT_OP2_BASE + 8) +#define SLJIT_SHL32 (SLJIT_SHL | SLJIT_32) +/* Flags: Z + Same as SLJIT_SHL, except the the second operand is + always masked by the length of the shift operation. */ +#define SLJIT_MSHL (SLJIT_OP2_BASE + 9) +#define SLJIT_MSHL32 (SLJIT_MSHL | SLJIT_32) +/* Flags: Z + Let bit_length be the length of the shift operation: 32 or 64. + If src2 is immediate, src2w is masked by (bit_length - 1). + Otherwise, if the content of src2 is outside the range from 0 + to bit_length - 1, the result is undefined. */ +#define SLJIT_LSHR (SLJIT_OP2_BASE + 10) +#define SLJIT_LSHR32 (SLJIT_LSHR | SLJIT_32) +/* Flags: Z + Same as SLJIT_LSHR, except the the second operand is + always masked by the length of the shift operation. */ +#define SLJIT_MLSHR (SLJIT_OP2_BASE + 11) +#define SLJIT_MLSHR32 (SLJIT_MLSHR | SLJIT_32) +/* Flags: Z + Let bit_length be the length of the shift operation: 32 or 64. + If src2 is immediate, src2w is masked by (bit_length - 1). + Otherwise, if the content of src2 is outside the range from 0 + to bit_length - 1, the result is undefined. */ +#define SLJIT_ASHR (SLJIT_OP2_BASE + 12) +#define SLJIT_ASHR32 (SLJIT_ASHR | SLJIT_32) +/* Flags: Z + Same as SLJIT_ASHR, except the the second operand is + always masked by the length of the shift operation. */ +#define SLJIT_MASHR (SLJIT_OP2_BASE + 13) +#define SLJIT_MASHR32 (SLJIT_MASHR | SLJIT_32) +/* Flags: - (may destroy flags) + Let bit_length be the length of the rotate operation: 32 or 64. + The second operand is always masked by (bit_length - 1). */ +#define SLJIT_ROTL (SLJIT_OP2_BASE + 14) +#define SLJIT_ROTL32 (SLJIT_ROTL | SLJIT_32) +/* Flags: - (may destroy flags) + Let bit_length be the length of the rotate operation: 32 or 64. + The second operand is always masked by (bit_length - 1). */ +#define SLJIT_ROTR (SLJIT_OP2_BASE + 15) +#define SLJIT_ROTR32 (SLJIT_ROTR | SLJIT_32) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +/* The sljit_emit_op2u function is the same as sljit_emit_op2 + except the result is discarded. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +/* Emit a left or right shift operation, where the bits shifted + in comes from a separate source operand. All operands are + interpreted as unsigned integers. + + In the followings the value_mask variable is 31 for 32 bit + operations and word_size - 1 otherwise. + + op must be one of the following operations: + SLJIT_SHL or SLJIT_SHL32: + src_dst <<= src2 + src_dst |= ((src1 >> 1) >> (src2 ^ value_mask)) + SLJIT_MSHL or SLJIT_MSHL32: + src2 &= value_mask + perform the SLJIT_SHL or SLJIT_SHL32 operation + SLJIT_LSHR or SLJIT_LSHR32: + src_dst >>= src2 + src_dst |= ((src1 << 1) << (src2 ^ value_mask)) + SLJIT_MLSHR or SLJIT_MLSHR32: + src2 &= value_mask + perform the SLJIT_LSHR or SLJIT_LSHR32 operation + + op can be combined (or'ed) with SLJIT_SHIFT_INTO_NON_ZERO + + src_dst must be a register which content is updated after + the operation is completed + src1 / src1w contains the bits which shifted into src_dst + src2 / src2w contains the shift amount + + Note: a rotate operation can be performed if src_dst and + src1 are set to the same register + + Flags: - (may destroy flags) */ + +/* The src2 contains a non-zero value. Improves the generated + code on certain architectures, which provides a small + performance improvement. */ +#define SLJIT_SHIFT_INTO_NON_ZERO 0x200 + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +/* Starting index of opcodes for sljit_emit_op2. */ +#define SLJIT_OP_SRC_BASE 128 + +/* Note: src cannot be an immedate value + Flags: - (does not modify flags) */ +#define SLJIT_FAST_RETURN (SLJIT_OP_SRC_BASE + 0) +/* Skip stack frames before fast return. + Note: src cannot be an immedate value + Flags: may destroy flags. */ +#define SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN (SLJIT_OP_SRC_BASE + 1) +/* Prefetch value into the level 1 data cache + Note: if the target CPU does not support data prefetch, + no instructions are emitted. + Note: this instruction never fails, even if the memory address is invalid. + Flags: - (does not modify flags) */ +#define SLJIT_PREFETCH_L1 (SLJIT_OP_SRC_BASE + 2) +/* Prefetch value into the level 2 data cache + Note: same as SLJIT_PREFETCH_L1 if the target CPU + does not support this instruction form. + Note: this instruction never fails, even if the memory address is invalid. + Flags: - (does not modify flags) */ +#define SLJIT_PREFETCH_L2 (SLJIT_OP_SRC_BASE + 3) +/* Prefetch value into the level 3 data cache + Note: same as SLJIT_PREFETCH_L2 if the target CPU + does not support this instruction form. + Note: this instruction never fails, even if the memory address is invalid. + Flags: - (does not modify flags) */ +#define SLJIT_PREFETCH_L3 (SLJIT_OP_SRC_BASE + 4) +/* Prefetch a value which is only used once (and can be discarded afterwards) + Note: same as SLJIT_PREFETCH_L1 if the target CPU + does not support this instruction form. + Note: this instruction never fails, even if the memory address is invalid. + Flags: - (does not modify flags) */ +#define SLJIT_PREFETCH_ONCE (SLJIT_OP_SRC_BASE + 5) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw); + +/* Starting index of opcodes for sljit_emit_fop1. */ +#define SLJIT_FOP1_BASE 160 + +/* Flags: - (does not modify flags) */ +#define SLJIT_MOV_F64 (SLJIT_FOP1_BASE + 0) +#define SLJIT_MOV_F32 (SLJIT_MOV_F64 | SLJIT_32) +/* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE] + SRC/DST TYPE can be: F64, F32, S32, SW + Rounding mode when the destination is SW or S32: round towards zero. */ +/* Flags: - (may destroy flags) */ +#define SLJIT_CONV_F64_FROM_F32 (SLJIT_FOP1_BASE + 1) +#define SLJIT_CONV_F32_FROM_F64 (SLJIT_CONV_F64_FROM_F32 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_CONV_SW_FROM_F64 (SLJIT_FOP1_BASE + 2) +#define SLJIT_CONV_SW_FROM_F32 (SLJIT_CONV_SW_FROM_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_CONV_S32_FROM_F64 (SLJIT_FOP1_BASE + 3) +#define SLJIT_CONV_S32_FROM_F32 (SLJIT_CONV_S32_FROM_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_CONV_F64_FROM_SW (SLJIT_FOP1_BASE + 4) +#define SLJIT_CONV_F32_FROM_SW (SLJIT_CONV_F64_FROM_SW | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_CONV_F64_FROM_S32 (SLJIT_FOP1_BASE + 5) +#define SLJIT_CONV_F32_FROM_S32 (SLJIT_CONV_F64_FROM_S32 | SLJIT_32) +/* Note: dst is the left and src is the right operand for SLJIT_CMP_F32/64. + Flags: EQUAL_F | LESS_F | GREATER_EQUAL_F | GREATER_F | LESS_EQUAL_F */ +#define SLJIT_CMP_F64 (SLJIT_FOP1_BASE + 6) +#define SLJIT_CMP_F32 (SLJIT_CMP_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_NEG_F64 (SLJIT_FOP1_BASE + 7) +#define SLJIT_NEG_F32 (SLJIT_NEG_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_ABS_F64 (SLJIT_FOP1_BASE + 8) +#define SLJIT_ABS_F32 (SLJIT_ABS_F64 | SLJIT_32) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw); + +/* Starting index of opcodes for sljit_emit_fop2. */ +#define SLJIT_FOP2_BASE 192 + +/* Flags: - (may destroy flags) */ +#define SLJIT_ADD_F64 (SLJIT_FOP2_BASE + 0) +#define SLJIT_ADD_F32 (SLJIT_ADD_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_SUB_F64 (SLJIT_FOP2_BASE + 1) +#define SLJIT_SUB_F32 (SLJIT_SUB_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_MUL_F64 (SLJIT_FOP2_BASE + 2) +#define SLJIT_MUL_F32 (SLJIT_MUL_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ +#define SLJIT_DIV_F64 (SLJIT_FOP2_BASE + 3) +#define SLJIT_DIV_F32 (SLJIT_DIV_F64 | SLJIT_32) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +/* Label and jump instructions. */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler); + +/* Invert (negate) conditional type: xor (^) with 0x1 */ + +/* Integer comparison types. */ +#define SLJIT_EQUAL 0 +#define SLJIT_ZERO SLJIT_EQUAL +#define SLJIT_NOT_EQUAL 1 +#define SLJIT_NOT_ZERO SLJIT_NOT_EQUAL + +#define SLJIT_LESS 2 +#define SLJIT_SET_LESS SLJIT_SET(SLJIT_LESS) +#define SLJIT_GREATER_EQUAL 3 +#define SLJIT_SET_GREATER_EQUAL SLJIT_SET(SLJIT_GREATER_EQUAL) +#define SLJIT_GREATER 4 +#define SLJIT_SET_GREATER SLJIT_SET(SLJIT_GREATER) +#define SLJIT_LESS_EQUAL 5 +#define SLJIT_SET_LESS_EQUAL SLJIT_SET(SLJIT_LESS_EQUAL) +#define SLJIT_SIG_LESS 6 +#define SLJIT_SET_SIG_LESS SLJIT_SET(SLJIT_SIG_LESS) +#define SLJIT_SIG_GREATER_EQUAL 7 +#define SLJIT_SET_SIG_GREATER_EQUAL SLJIT_SET(SLJIT_SIG_GREATER_EQUAL) +#define SLJIT_SIG_GREATER 8 +#define SLJIT_SET_SIG_GREATER SLJIT_SET(SLJIT_SIG_GREATER) +#define SLJIT_SIG_LESS_EQUAL 9 +#define SLJIT_SET_SIG_LESS_EQUAL SLJIT_SET(SLJIT_SIG_LESS_EQUAL) + +#define SLJIT_OVERFLOW 10 +#define SLJIT_SET_OVERFLOW SLJIT_SET(SLJIT_OVERFLOW) +#define SLJIT_NOT_OVERFLOW 11 + +/* Unlike other flags, sljit_emit_jump may destroy the carry flag. */ +#define SLJIT_CARRY 12 +#define SLJIT_SET_CARRY SLJIT_SET(SLJIT_CARRY) +#define SLJIT_NOT_CARRY 13 + +/* Basic floating point comparison types. + + Note: when the comparison result is unordered, their behaviour is unspecified. */ + +#define SLJIT_F_EQUAL 14 +#define SLJIT_SET_F_EQUAL SLJIT_SET(SLJIT_F_EQUAL) +#define SLJIT_F_NOT_EQUAL 15 +#define SLJIT_SET_F_NOT_EQUAL SLJIT_SET(SLJIT_F_NOT_EQUAL) +#define SLJIT_F_LESS 16 +#define SLJIT_SET_F_LESS SLJIT_SET(SLJIT_F_LESS) +#define SLJIT_F_GREATER_EQUAL 17 +#define SLJIT_SET_F_GREATER_EQUAL SLJIT_SET(SLJIT_F_GREATER_EQUAL) +#define SLJIT_F_GREATER 18 +#define SLJIT_SET_F_GREATER SLJIT_SET(SLJIT_F_GREATER) +#define SLJIT_F_LESS_EQUAL 19 +#define SLJIT_SET_F_LESS_EQUAL SLJIT_SET(SLJIT_F_LESS_EQUAL) + +/* Jumps when either argument contains a NaN value. */ +#define SLJIT_UNORDERED 20 +#define SLJIT_SET_UNORDERED SLJIT_SET(SLJIT_UNORDERED) +/* Jumps when neither argument contains a NaN value. */ +#define SLJIT_ORDERED 21 +#define SLJIT_SET_ORDERED SLJIT_SET(SLJIT_ORDERED) + +/* Ordered / unordered floating point comparison types. + + Note: each comparison type has an ordered and unordered form. Some + architectures supports only either of them (see: sljit_cmp_info). */ + +#define SLJIT_ORDERED_EQUAL 22 +#define SLJIT_SET_ORDERED_EQUAL SLJIT_SET(SLJIT_ORDERED_EQUAL) +#define SLJIT_UNORDERED_OR_NOT_EQUAL 23 +#define SLJIT_SET_UNORDERED_OR_NOT_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_NOT_EQUAL) +#define SLJIT_ORDERED_LESS 24 +#define SLJIT_SET_ORDERED_LESS SLJIT_SET(SLJIT_ORDERED_LESS) +#define SLJIT_UNORDERED_OR_GREATER_EQUAL 25 +#define SLJIT_SET_UNORDERED_OR_GREATER_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_GREATER_EQUAL) +#define SLJIT_ORDERED_GREATER 26 +#define SLJIT_SET_ORDERED_GREATER SLJIT_SET(SLJIT_ORDERED_GREATER) +#define SLJIT_UNORDERED_OR_LESS_EQUAL 27 +#define SLJIT_SET_UNORDERED_OR_LESS_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_LESS_EQUAL) + +#define SLJIT_UNORDERED_OR_EQUAL 28 +#define SLJIT_SET_UNORDERED_OR_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_EQUAL) +#define SLJIT_ORDERED_NOT_EQUAL 29 +#define SLJIT_SET_ORDERED_NOT_EQUAL SLJIT_SET(SLJIT_ORDERED_NOT_EQUAL) +#define SLJIT_UNORDERED_OR_LESS 30 +#define SLJIT_SET_UNORDERED_OR_LESS SLJIT_SET(SLJIT_UNORDERED_OR_LESS) +#define SLJIT_ORDERED_GREATER_EQUAL 31 +#define SLJIT_SET_ORDERED_GREATER_EQUAL SLJIT_SET(SLJIT_ORDERED_GREATER_EQUAL) +#define SLJIT_UNORDERED_OR_GREATER 32 +#define SLJIT_SET_UNORDERED_OR_GREATER SLJIT_SET(SLJIT_UNORDERED_OR_GREATER) +#define SLJIT_ORDERED_LESS_EQUAL 33 +#define SLJIT_SET_ORDERED_LESS_EQUAL SLJIT_SET(SLJIT_ORDERED_LESS_EQUAL) + +/* Unconditional jump types. */ +#define SLJIT_JUMP 34 +/* Fast calling method. See sljit_emit_fast_enter / SLJIT_FAST_RETURN. */ +#define SLJIT_FAST_CALL 35 +/* Default C calling convention. */ +#define SLJIT_CALL 36 +/* Called function must be compiled by SLJIT. + See SLJIT_ENTER_REG_ARG option. */ +#define SLJIT_CALL_REG_ARG 37 + +/* The target can be changed during runtime (see: sljit_set_jump_addr). */ +#define SLJIT_REWRITABLE_JUMP 0x1000 +/* When this flag is passed, the execution of the current function ends and + the called function returns to the caller of the current function. The + stack usage is reduced before the call, but it is not necessarily reduced + to zero. In the latter case the compiler needs to allocate space for some + arguments and the return address must be stored on the stack as well. */ +#define SLJIT_CALL_RETURN 0x2000 + +/* Emit a jump instruction. The destination is not set, only the type of the jump. + type must be between SLJIT_EQUAL and SLJIT_FAST_CALL + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP + + Flags: does not modify flags. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type); + +/* Emit a C compiler (ABI) compatible function call. + type must be SLJIT_CALL or SLJIT_CALL_REG_ARG + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP and/or SLJIT_CALL_RETURN + arg_types can be specified by SLJIT_ARGSx (SLJIT_ARG_RETURN / SLJIT_ARG_VALUE) macros + + Flags: destroy all flags. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types); + +/* Basic arithmetic comparison. In most architectures it is implemented as + a compare operation followed by a sljit_emit_jump. However some + architectures (i.e: ARM64 or MIPS) may employ special optimizations + here. It is suggested to use this comparison form when appropriate. + type must be between SLJIT_EQUAL and SLJIT_SIG_LESS_EQUAL + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP + + Flags: may destroy flags. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +/* Basic floating point comparison. In most architectures it is implemented as + a SLJIT_CMP_F32/64 operation (setting appropriate flags) followed by a + sljit_emit_jump. However some architectures (i.e: MIPS) may employ + special optimizations here. It is suggested to use this comparison form + when appropriate. + type must be between SLJIT_F_EQUAL and SLJIT_ORDERED_LESS_EQUAL + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP + Flags: destroy flags. + Note: when an operand is NaN the behaviour depends on the comparison type. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +/* Set the destination of the jump to this label. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label); +/* Set the destination address of the jump to this label. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target); + +/* Emit an indirect jump or fast call. + Direct form: set src to SLJIT_IMM() and srcw to the address + Indirect form: any other valid addressing mode + type must be between SLJIT_JUMP and SLJIT_FAST_CALL + + Flags: does not modify flags. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw); + +/* Emit a C compiler (ABI) compatible function call. + Direct form: set src to SLJIT_IMM() and srcw to the address + Indirect form: any other valid addressing mode + type must be SLJIT_CALL or SLJIT_CALL_REG_ARG + type can be combined (or'ed) with SLJIT_CALL_RETURN + arg_types can be specified by SLJIT_ARGSx (SLJIT_ARG_RETURN / SLJIT_ARG_VALUE) macros + + Flags: destroy all flags. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw); + +/* Perform an operation using the conditional flags as the second argument. + Type must always be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL. + The value represented by the type is 1, if the condition represented + by the type is fulfilled, and 0 otherwise. + + When op is SLJIT_MOV or SLJIT_MOV32: + Set dst to the value represented by the type (0 or 1). + Flags: - (does not modify flags) + When op is SLJIT_AND, SLJIT_AND32, SLJIT_OR, SLJIT_OR32, SLJIT_XOR, or SLJIT_XOR32 + Performs the binary operation using dst as the first, and the value + represented by type as the second argument. Result is written into dst. + Flags: Z (may destroy flags) */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type); + +/* Emit a conditional mov instruction which moves source to destination, + if the condition is satisfied. Unlike other arithmetic operations this + instruction does not support memory access. + + type must be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL + type can be combined (or'ed) with SLJIT_32 + dst_reg must be a valid register + src must be a valid register or immediate (SLJIT_IMM) + + Flags: - (does not modify flags) */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw); + +/* The following flags are used by sljit_emit_mem(), sljit_emit_mem_update(), + sljit_emit_fmem(), and sljit_emit_fmem_update(). */ + +/* Memory load operation. This is the default. */ +#define SLJIT_MEM_LOAD 0x000000 +/* Memory store operation. */ +#define SLJIT_MEM_STORE 0x000200 + +/* The following flags are used by sljit_emit_mem() and sljit_emit_fmem(). */ + +/* Load or stora data from an unaligned (byte aligned) address. */ +#define SLJIT_MEM_UNALIGNED 0x000400 +/* Load or stora data from a 16 bit aligned address. */ +#define SLJIT_MEM_UNALIGNED_16 0x000800 +/* Load or stora data from a 32 bit aligned address. */ +#define SLJIT_MEM_UNALIGNED_32 0x001000 + +/* The following flags are used by sljit_emit_mem_update(), + and sljit_emit_fmem_update(). */ + +/* Base register is updated before the memory access (default). */ +#define SLJIT_MEM_PRE 0x000000 +/* Base register is updated after the memory access. */ +#define SLJIT_MEM_POST 0x000400 + +/* When SLJIT_MEM_SUPP is passed, no instructions are emitted. + Instead the function returns with SLJIT_SUCCESS if the instruction + form is supported and SLJIT_ERR_UNSUPPORTED otherwise. This flag + allows runtime checking of available instruction forms. */ +#define SLJIT_MEM_SUPP 0x000800 + +/* The sljit_emit_mem emits instructions for various memory operations: + + When SLJIT_MEM_UNALIGNED / SLJIT_MEM_UNALIGNED_16 / + SLJIT_MEM_UNALIGNED_32 is set in type argument: + Emit instructions for unaligned memory loads or stores. When + SLJIT_UNALIGNED is not defined, the only way to access unaligned + memory data is using sljit_emit_mem. Otherwise all operations (e.g. + sljit_emit_op1/2, or sljit_emit_fop1/2) supports unaligned access. + In general, the performance of unaligned memory accesses are often + lower than aligned and should be avoided. + + When a pair of registers is passed in reg argument: + Emit instructions for moving data between a register pair and + memory. The register pair can be specified by the SLJIT_REG_PAIR + macro. The first register is loaded from or stored into the + location specified by the mem/memw arguments, and the end address + of this operation is the starting address of the data transfer + between the second register and memory. The type argument must + be SLJIT_MOV. The SLJIT_MEM_UNALIGNED* options are allowed for + this operation. + + type must be between SLJIT_MOV and SLJIT_MOV_P and can be + combined (or'ed) with SLJIT_MEM_* flags + reg is a register or register pair, which is the source or + destination of the operation + mem must be a memory operand + + Flags: - (does not modify flags) */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw); + +/* Emit a single memory load or store with update instruction. + When the requested instruction form is not supported by the CPU, + it returns with SLJIT_ERR_UNSUPPORTED instead of emulating the + instruction. This allows specializing tight loops based on + the supported instruction forms (see SLJIT_MEM_SUPP flag). + Absolute address (SLJIT_MEM0) forms are never supported + and the base (first) register specified by the mem argument + must not be SLJIT_SP and must also be different from the + register specified by the reg argument. + + type must be between SLJIT_MOV and SLJIT_MOV_P and can be + combined (or'ed) with SLJIT_MEM_* flags + reg is the source or destination register of the operation + mem must be a memory operand + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw); + +/* Same as sljit_emit_mem except the followings: + + Loading or storing a pair of registers is not supported. + + type must be SLJIT_MOV_F64 or SLJIT_MOV_F32 and can be + combined (or'ed) with SLJIT_MEM_* flags. + freg is the source or destination floating point register + of the operation + mem must be a memory operand + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw); + +/* Same as sljit_emit_mem_update except the followings: + + type must be SLJIT_MOV_F64 or SLJIT_MOV_F32 and can be + combined (or'ed) with SLJIT_MEM_* flags + freg is the source or destination floating point register + of the operation + mem must be a memory operand + + Flags: - (does not modify flags) */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw); + +/* Copies the base address of SLJIT_SP + offset to dst. The offset can + represent the starting address of a value in the local data (stack). + The offset is not limited by the local data limits, it can be any value. + For example if an array of bytes are stored on the stack from + offset 0x40, and R0 contains the offset of an array item plus 0x120, + this item can be changed by two SLJIT instructions: + + sljit_get_local_base(compiler, SLJIT_R1, 0, 0x40 - 0x120); + sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0, SLJIT_IMM, 0x5); + + Flags: - (may destroy flags) */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset); + +/* Store a value that can be changed runtime (see: sljit_get_const_addr / sljit_set_const) + Flags: - (does not modify flags) */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value); + +/* Store the value of a label (see: sljit_set_put_label) + Flags: - (does not modify flags) */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw); + +/* Set the value stored by put_label to this label. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label); + +/* After the code generation the address for label, jump and const instructions + are computed. Since these structures are freed by sljit_free_compiler, the + addresses must be preserved by the user program elsewere. */ +static SLJIT_INLINE sljit_uw sljit_get_label_addr(struct sljit_label *label) { return label->addr; } +static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; } +static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; } + +/* Only the address and executable offset are required to perform dynamic + code modifications. See sljit_get_executable_offset function. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset); +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset); + +/* --------------------------------------------------------------------- */ +/* CPU specific functions */ +/* --------------------------------------------------------------------- */ + +/* The following function is a helper function for sljit_emit_op_custom. + It returns with the real machine register index ( >=0 ) of any SLJIT_R, + SLJIT_S and SLJIT_SP registers. + + Note: it returns with -1 for virtual registers (only on x86-32). */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg); + +/* The following function is a helper function for sljit_emit_op_custom. + It returns with the real machine register ( >= 0 ) index of any SLJIT_FR, + and SLJIT_FS register. + + Note: the index is always an even number on ARM-32, MIPS. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg); + +/* Any instruction can be inserted into the instruction stream by + sljit_emit_op_custom. It has a similar purpose as inline assembly. + The size parameter must match to the instruction size of the target + architecture: + + x86: 0 < size <= 15. The instruction argument can be byte aligned. + Thumb2: if size == 2, the instruction argument must be 2 byte aligned. + if size == 4, the instruction argument must be 4 byte aligned. + Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size); + +/* Flags were set by a 32 bit operation. */ +#define SLJIT_CURRENT_FLAGS_32 SLJIT_32 + +/* Flags were set by an ADD or ADDC operations. */ +#define SLJIT_CURRENT_FLAGS_ADD 0x01 +/* Flags were set by a SUB, SUBC, or NEG operation. */ +#define SLJIT_CURRENT_FLAGS_SUB 0x02 + +/* Flags were set by sljit_emit_op2u with SLJIT_SUB opcode. + Must be combined with SLJIT_CURRENT_FLAGS_SUB. */ +#define SLJIT_CURRENT_FLAGS_COMPARE 0x04 + +/* Define the currently available CPU status flags. It is usually used after + an sljit_emit_label or sljit_emit_op_custom operations to define which CPU + status flags are available. + + The current_flags must be a valid combination of SLJIT_SET_* and + SLJIT_CURRENT_FLAGS_* constants. */ + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, + sljit_s32 current_flags); + +/* --------------------------------------------------------------------- */ +/* Miscellaneous utility functions */ +/* --------------------------------------------------------------------- */ + +/* Get the human readable name of the platform. Can be useful on platforms + like ARM, where ARM and Thumb2 functions can be mixed, and it is useful + to know the type of the code generator. */ +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void); + +/* Portable helper function to get an offset of a member. */ +#define SLJIT_OFFSETOF(base, member) ((sljit_sw)(&((base*)0x10)->member) - 0x10) + +#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) + +/* The sljit_stack structure and its manipulation functions provides + an implementation for a top-down stack. The stack top is stored + in the end field of the sljit_stack structure and the stack goes + down to the min_start field, so the memory region reserved for + this stack is between min_start (inclusive) and end (exclusive) + fields. However the application can only use the region between + start (inclusive) and end (exclusive) fields. The sljit_stack_resize + function can be used to extend this region up to min_start. + + This feature uses the "address space reserve" feature of modern + operating systems. Instead of allocating a large memory block + applications can allocate a small memory region and extend it + later without moving the content of the memory area. Therefore + after a successful resize by sljit_stack_resize all pointers into + this region are still valid. + + Note: + this structure may not be supported by all operating systems. + end and max_limit fields are aligned to PAGE_SIZE bytes (usually + 4 Kbyte or more). + stack should grow in larger steps, e.g. 4Kbyte, 16Kbyte or more. */ + +struct sljit_stack { + /* User data, anything can be stored here. + Initialized to the same value as the end field. */ + sljit_u8 *top; +/* These members are read only. */ + /* End address of the stack */ + sljit_u8 *end; + /* Current start address of the stack. */ + sljit_u8 *start; + /* Lowest start address of the stack. */ + sljit_u8 *min_start; +}; + +/* Allocates a new stack. Returns NULL if unsuccessful. + Note: see sljit_create_compiler for the explanation of allocator_data. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw start_size, sljit_uw max_size, void *allocator_data); +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data); + +/* Can be used to increase (extend) or decrease (shrink) the stack + memory area. Returns with new_start if successful and NULL otherwise. + It always fails if new_start is less than min_start or greater or equal + than end fields. The fields of the stack are not changed if the returned + value is NULL (the current memory content is never lost). */ +SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_start); + +#endif /* (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) */ + +#if !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + +/* Get the entry address of a given function (signed, unsigned result). */ +#define SLJIT_FUNC_ADDR(func_name) ((sljit_sw)func_name) +#define SLJIT_FUNC_UADDR(func_name) ((sljit_uw)func_name) + +#else /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ + +/* All JIT related code should be placed in the same context (library, binary, etc.). */ + +/* Get the entry address of a given function (signed, unsigned result). */ +#define SLJIT_FUNC_ADDR(func_name) (*(sljit_sw*)(void*)func_name) +#define SLJIT_FUNC_UADDR(func_name) (*(sljit_uw*)(void*)func_name) + +/* For powerpc64, the function pointers point to a context descriptor. */ +struct sljit_function_context { + sljit_uw addr; + sljit_uw r2; + sljit_uw r11; +}; + +/* Fill the context arguments using the addr and the function. + If func_ptr is NULL, it will not be set to the address of context + If addr is NULL, the function address also comes from the func pointer. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func); + +#endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) +/* Free unused executable memory. The allocator keeps some free memory + around to reduce the number of OS executable memory allocations. + This improves performance since these calls are costly. However + it is sometimes desired to free all unused memory regions, e.g. + before the application terminates. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); +#endif + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* SLJIT_LIR_H_ */ diff --git a/contrib/libs/pcre2/src/sljit/sljitNativeARM_32.c b/contrib/libs/pcre2/src/sljit/sljitNativeARM_32.c new file mode 100644 index 0000000000..54b8ade063 --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitNativeARM_32.c @@ -0,0 +1,3700 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef __SOFTFP__ +#define ARM_ABI_INFO " ABI:softfp" +#else +#define ARM_ABI_INFO " ABI:hardfp" +#endif + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + return "ARMv7" SLJIT_CPUINFO ARM_ABI_INFO; +#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + return "ARMv5" SLJIT_CPUINFO ARM_ABI_INFO; +#else +#error "Internal error: Unknown ARM architecture" +#endif +} + +/* Last register + 1. */ +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4) + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +/* In ARM instruction words. + Cache lines are usually 32 byte aligned. */ +#define CONST_POOL_ALIGNMENT 8 +#define CONST_POOL_EMPTY 0xffffffff + +#define ALIGN_INSTRUCTION(ptr) \ + (sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1)) +#define MAX_DIFFERENCE(max_diff) \ + (((max_diff) / (sljit_s32)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1)) + +/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, 6, 7 +}; + +#define RM(rm) ((sljit_uw)reg_map[rm]) +#define RM8(rm) ((sljit_uw)reg_map[rm] << 8) +#define RD(rd) ((sljit_uw)reg_map[rd] << 12) +#define RN(rn) ((sljit_uw)reg_map[rn] << 16) + +#define VM(rm) ((sljit_uw)freg_map[rm]) +#define VD(rd) ((sljit_uw)freg_map[rd] << 12) +#define VN(rn) ((sljit_uw)freg_map[rn] << 16) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +/* The instruction includes the AL condition. + INST_NAME - CONDITIONAL remove this flag. */ +#define COND_MASK 0xf0000000 +#define CONDITIONAL 0xe0000000 +#define PUSH_POOL 0xff000000 + +#define ADC 0xe0a00000 +#define ADD 0xe0800000 +#define AND 0xe0000000 +#define B 0xea000000 +#define BIC 0xe1c00000 +#define BL 0xeb000000 +#define BLX 0xe12fff30 +#define BX 0xe12fff10 +#define CLZ 0xe16f0f10 +#define CMN 0xe1600000 +#define CMP 0xe1400000 +#define BKPT 0xe1200070 +#define EOR 0xe0200000 +#define LDR 0xe5100000 +#define LDR_POST 0xe4100000 +#define MOV 0xe1a00000 +#define MUL 0xe0000090 +#define MVN 0xe1e00000 +#define NOP 0xe1a00000 +#define ORR 0xe1800000 +#define PUSH 0xe92d0000 +#define POP 0xe8bd0000 +#define RBIT 0xe6ff0f30 +#define RSB 0xe0600000 +#define RSC 0xe0e00000 +#define SBC 0xe0c00000 +#define SMULL 0xe0c00090 +#define STR 0xe5000000 +#define SUB 0xe0400000 +#define TST 0xe1000000 +#define UMULL 0xe0800090 +#define VABS_F32 0xeeb00ac0 +#define VADD_F32 0xee300a00 +#define VCMP_F32 0xeeb40a40 +#define VCVT_F32_S32 0xeeb80ac0 +#define VCVT_F64_F32 0xeeb70ac0 +#define VCVT_S32_F32 0xeebd0ac0 +#define VDIV_F32 0xee800a00 +#define VLDR_F32 0xed100a00 +#define VMOV_F32 0xeeb00a40 +#define VMOV 0xee000a10 +#define VMOV2 0xec400a10 +#define VMRS 0xeef1fa10 +#define VMUL_F32 0xee200a00 +#define VNEG_F32 0xeeb10a40 +#define VPOP 0xecbd0b00 +#define VPUSH 0xed2d0b00 +#define VSTR_F32 0xed000a00 +#define VSUB_F32 0xee300a40 + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +/* Arm v7 specific instructions. */ +#define MOVW 0xe3000000 +#define MOVT 0xe3400000 +#define SXTB 0xe6af0070 +#define SXTH 0xe6bf0070 +#define UXTB 0xe6ef0070 +#define UXTH 0xe6ff0070 +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + +static sljit_s32 push_cpool(struct sljit_compiler *compiler) +{ + /* Pushing the constant pool into the instruction stream. */ + sljit_uw* inst; + sljit_uw* cpool_ptr; + sljit_uw* cpool_end; + sljit_s32 i; + + /* The label could point the address after the constant pool. */ + if (compiler->last_label && compiler->last_label->size == compiler->size) + compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1; + + SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE); + inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!inst); + compiler->size++; + *inst = 0xff000000 | compiler->cpool_fill; + + for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) { + inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!inst); + compiler->size++; + *inst = 0; + } + + cpool_ptr = compiler->cpool; + cpool_end = cpool_ptr + compiler->cpool_fill; + while (cpool_ptr < cpool_end) { + inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!inst); + compiler->size++; + *inst = *cpool_ptr++; + } + compiler->cpool_diff = CONST_POOL_EMPTY; + compiler->cpool_fill = 0; + return SLJIT_SUCCESS; +} + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst) +{ + sljit_uw* ptr; + + if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) + FAIL_IF(push_cpool(compiler)); + + ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst; + return SLJIT_SUCCESS; +} + +static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) +{ + sljit_uw* ptr; + sljit_uw cpool_index = CPOOL_SIZE; + sljit_uw* cpool_ptr; + sljit_uw* cpool_end; + sljit_u8* cpool_unique_ptr; + + if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) + FAIL_IF(push_cpool(compiler)); + else if (compiler->cpool_fill > 0) { + cpool_ptr = compiler->cpool; + cpool_end = cpool_ptr + compiler->cpool_fill; + cpool_unique_ptr = compiler->cpool_unique; + do { + if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) { + cpool_index = (sljit_uw)(cpool_ptr - compiler->cpool); + break; + } + cpool_ptr++; + cpool_unique_ptr++; + } while (cpool_ptr < cpool_end); + } + + if (cpool_index == CPOOL_SIZE) { + /* Must allocate a new entry in the literal pool. */ + if (compiler->cpool_fill < CPOOL_SIZE) { + cpool_index = compiler->cpool_fill; + compiler->cpool_fill++; + } + else { + FAIL_IF(push_cpool(compiler)); + cpool_index = 0; + compiler->cpool_fill = 1; + } + } + + SLJIT_ASSERT((inst & 0xfff) == 0); + ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst | cpool_index; + + compiler->cpool[cpool_index] = literal; + compiler->cpool_unique[cpool_index] = 0; + if (compiler->cpool_diff == CONST_POOL_EMPTY) + compiler->cpool_diff = compiler->size; + return SLJIT_SUCCESS; +} + +static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) +{ + sljit_uw* ptr; + if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE)) + FAIL_IF(push_cpool(compiler)); + + SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0); + ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst | compiler->cpool_fill; + + compiler->cpool[compiler->cpool_fill] = literal; + compiler->cpool_unique[compiler->cpool_fill] = 1; + compiler->cpool_fill++; + if (compiler->cpool_diff == CONST_POOL_EMPTY) + compiler->cpool_diff = compiler->size; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 prepare_blx(struct sljit_compiler *compiler) +{ + /* Place for at least two instruction (doesn't matter whether the first has a literal). */ + if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088))) + return push_cpool(compiler); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler) +{ + /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */ + SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092)); + SLJIT_ASSERT(reg_map[TMP_REG1] != 14); + + return push_inst(compiler, BLX | RM(TMP_REG1)); +} + +static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size) +{ + sljit_uw diff; + sljit_uw ind; + sljit_uw counter = 0; + sljit_uw* clear_const_pool = const_pool; + sljit_uw* clear_const_pool_end = const_pool + cpool_size; + + SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT); + /* Set unused flag for all literals in the constant pool. + I.e.: unused literals can belong to branches, which can be encoded as B or BL. + We can "compress" the constant pool by discarding these literals. */ + while (clear_const_pool < clear_const_pool_end) + *clear_const_pool++ = (sljit_uw)(-1); + + while (last_pc_patch < code_ptr) { + /* Data transfer instruction with Rn == r15. */ + if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) { + diff = (sljit_uw)(const_pool - last_pc_patch); + ind = (*last_pc_patch) & 0xfff; + + /* Must be a load instruction with immediate offset. */ + SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20))); + if ((sljit_s32)const_pool[ind] < 0) { + const_pool[ind] = counter; + ind = counter; + counter++; + } + else + ind = const_pool[ind]; + + SLJIT_ASSERT(diff >= 1); + if (diff >= 2 || ind > 0) { + diff = (diff + (sljit_uw)ind - 2) << 2; + SLJIT_ASSERT(diff <= 0xfff); + *last_pc_patch = (*last_pc_patch & ~(sljit_uw)0xfff) | diff; + } + else + *last_pc_patch = (*last_pc_patch & ~(sljit_uw)(0xfff | (1 << 23))) | 0x004; + } + last_pc_patch++; + } + return counter; +} + +/* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */ +struct future_patch { + struct future_patch* next; + sljit_s32 index; + sljit_s32 value; +}; + +static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr) +{ + sljit_u32 value; + struct future_patch *curr_patch, *prev_patch; + + SLJIT_UNUSED_ARG(compiler); + + /* Using the values generated by patch_pc_relative_loads. */ + if (!*first_patch) + value = cpool_start_address[cpool_current_index]; + else { + curr_patch = *first_patch; + prev_patch = NULL; + while (1) { + if (!curr_patch) { + value = cpool_start_address[cpool_current_index]; + break; + } + if ((sljit_uw)curr_patch->index == cpool_current_index) { + value = (sljit_uw)curr_patch->value; + if (prev_patch) + prev_patch->next = curr_patch->next; + else + *first_patch = curr_patch->next; + SLJIT_FREE(curr_patch, compiler->allocator_data); + break; + } + prev_patch = curr_patch; + curr_patch = curr_patch->next; + } + } + + if ((sljit_sw)value >= 0) { + if (value > cpool_current_index) { + curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data); + if (!curr_patch) { + while (*first_patch) { + curr_patch = *first_patch; + *first_patch = (*first_patch)->next; + SLJIT_FREE(curr_patch, compiler->allocator_data); + } + return SLJIT_ERR_ALLOC_FAILED; + } + curr_patch->next = *first_patch; + curr_patch->index = (sljit_sw)value; + curr_patch->value = (sljit_sw)cpool_start_address[value]; + *first_patch = curr_patch; + } + cpool_start_address[value] = *buf_ptr; + } + return SLJIT_SUCCESS; +} + +#else + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst) +{ + sljit_uw* ptr; + + ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) +{ + FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | ((sljit_u32)imm & 0xfff))); + return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | (((sljit_u32)imm >> 16) & 0xfff)); +} + +#endif + +static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset) +{ + sljit_sw diff; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return 0; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (jump->flags & IS_BL) + code_ptr--; + + if (jump->flags & JUMP_ADDR) + diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset); + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)); + } + + /* Branch to Thumb code has not been optimized yet. */ + if (diff & 0x3) + return 0; + + if (jump->flags & IS_BL) { + if (diff <= 0x01ffffff && diff >= -0x02000000) { + *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK); + jump->flags |= PATCH_B; + return 1; + } + } + else { + if (diff <= 0x01ffffff && diff >= -0x02000000) { + *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK); + jump->flags |= PATCH_B; + } + } +#else + if (jump->flags & JUMP_ADDR) + diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr - executable_offset); + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr); + } + + /* Branch to Thumb code has not been optimized yet. */ + if (diff & 0x3) + return 0; + + if (diff <= 0x01ffffff && diff >= -0x02000000) { + code_ptr -= 2; + *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK); + jump->flags |= PATCH_B; + return 1; + } +#endif + return 0; +} + +static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache) +{ +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_uw *ptr = (sljit_uw *)jump_ptr; + sljit_uw *inst = (sljit_uw *)ptr[0]; + sljit_uw mov_pc = ptr[1]; + sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC); + sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2); + + SLJIT_UNUSED_ARG(executable_offset); + + if (diff <= 0x7fffff && diff >= -0x800000) { + /* Turn to branch. */ + if (!bl) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0); + } + inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff); + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + } else { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); + } + inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff); + inst[1] = NOP; + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); + } + } + } else { + /* Get the position of the constant. */ + if (mov_pc & (1 << 23)) + ptr = inst + ((mov_pc & 0xfff) >> 2) + 2; + else + ptr = inst + 1; + + if (*inst != mov_pc) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + (!bl ? 1 : 2), 0); + } + inst[0] = mov_pc; + if (!bl) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + } else { + inst[1] = BLX | RM(TMP_REG1); + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); + } + } + } + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0); + } + + *ptr = new_addr; + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1); + } + } +#else + sljit_uw *inst = (sljit_uw*)jump_ptr; + + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); + } + + inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff); + inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff); + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); + } +#endif +} + +static sljit_uw get_imm(sljit_uw imm); +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm); +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg); + +static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache) +{ +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_uw *ptr = (sljit_uw*)addr; + sljit_uw *inst = (sljit_uw*)ptr[0]; + sljit_uw ldr_literal = ptr[1]; + sljit_uw src2; + + SLJIT_UNUSED_ARG(executable_offset); + + src2 = get_imm(new_constant); + if (src2) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0); + } + + *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2; + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + return; + } + + src2 = get_imm(~new_constant); + if (src2) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0); + } + + *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2; + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + return; + } + + if (ldr_literal & (1 << 23)) + ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2; + else + ptr = inst + 1; + + if (*inst != ldr_literal) { + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0); + } + + *inst = ldr_literal; + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + } + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0); + } + + *ptr = new_constant; + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1); + } +#else + sljit_uw *inst = (sljit_uw*)addr; + + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); + } + + inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff); + inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff); + + if (flush_cache) { + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); + } +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_uw *code; + sljit_uw *code_ptr; + sljit_uw *buf_ptr; + sljit_uw *buf_end; + sljit_uw size; + sljit_uw word_count; + sljit_uw next_addr; + sljit_sw executable_offset; + sljit_uw addr; +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_uw cpool_size; + sljit_uw cpool_skip_alignment; + sljit_uw cpool_current_index; + sljit_uw *cpool_start_address; + sljit_uw *last_pc_patch; + struct future_patch *first_patch; +#endif + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_put_label *put_label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + /* Second code generation pass. */ +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + size = compiler->size + (compiler->patches << 1); + if (compiler->cpool_fill > 0) + size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1; +#else + size = compiler->size; +#endif + code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw), compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + cpool_size = 0; + cpool_skip_alignment = 0; + cpool_current_index = 0; + cpool_start_address = NULL; + first_patch = NULL; + last_pc_patch = code; +#endif + + code_ptr = code; + word_count = 0; + next_addr = 1; + executable_offset = SLJIT_EXEC_OFFSET(code); + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + put_label = compiler->put_labels; + + if (label && label->size == 0) { + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + label = label->next; + } + + do { + buf_ptr = (sljit_uw*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + word_count++; +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (cpool_size > 0) { + if (cpool_skip_alignment > 0) { + buf_ptr++; + cpool_skip_alignment--; + } + else { + if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { + SLJIT_FREE_EXEC(code, compiler->exec_allocator_data); + compiler->error = SLJIT_ERR_ALLOC_FAILED; + return NULL; + } + buf_ptr++; + if (++cpool_current_index >= cpool_size) { + SLJIT_ASSERT(!first_patch); + cpool_size = 0; + if (label && label->size == word_count) { + /* Points after the current instruction. */ + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + + next_addr = compute_next_addr(label, jump, const_, put_label); + } + } + } + } + else if ((*buf_ptr & 0xff000000) != PUSH_POOL) { +#endif + *code_ptr = *buf_ptr++; + if (next_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + SLJIT_ASSERT(!put_label || put_label->addr >= word_count); + + /* These structures are ordered by their address. */ + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (detect_jump_type(jump, code_ptr, code, executable_offset)) + code_ptr--; + jump->addr = (sljit_uw)code_ptr; +#else + jump->addr = (sljit_uw)(code_ptr - 2); + if (detect_jump_type(jump, code_ptr, code, executable_offset)) + code_ptr -= 2; +#endif + jump = jump->next; + } + if (label && label->size == word_count) { + /* code_ptr can be affected above. */ + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset); + label->size = (sljit_uw)((code_ptr + 1) - code); + label = label->next; + } + if (const_ && const_->addr == word_count) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + const_->addr = (sljit_uw)code_ptr; +#else + const_->addr = (sljit_uw)(code_ptr - 1); +#endif + const_ = const_->next; + } + if (put_label && put_label->addr == word_count) { + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)code_ptr; + put_label = put_label->next; + } + next_addr = compute_next_addr(label, jump, const_, put_label); + } + code_ptr++; +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + } + else { + /* Fortunately, no need to shift. */ + cpool_size = *buf_ptr++ & ~PUSH_POOL; + SLJIT_ASSERT(cpool_size > 0); + cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1); + cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size); + if (cpool_current_index > 0) { + /* Unconditional branch. */ + *code_ptr = B | (((sljit_uw)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); + code_ptr = (sljit_uw*)(cpool_start_address + cpool_current_index); + } + cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1; + cpool_current_index = 0; + last_pc_patch = code_ptr; + } +#endif + } while (buf_ptr < buf_end); + buf = buf->next; + } while (buf); + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(!put_label); + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + SLJIT_ASSERT(cpool_size == 0); + if (compiler->cpool_fill > 0) { + cpool_start_address = ALIGN_INSTRUCTION(code_ptr); + cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill); + if (cpool_current_index > 0) + code_ptr = (sljit_uw*)(cpool_start_address + cpool_current_index); + + buf_ptr = compiler->cpool; + buf_end = buf_ptr + compiler->cpool_fill; + cpool_current_index = 0; + while (buf_ptr < buf_end) { + if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { + SLJIT_FREE_EXEC(code, compiler->exec_allocator_data); + compiler->error = SLJIT_ERR_ALLOC_FAILED; + return NULL; + } + buf_ptr++; + cpool_current_index++; + } + SLJIT_ASSERT(!first_patch); + } +#endif + + jump = compiler->jumps; + while (jump) { + buf_ptr = (sljit_uw *)jump->addr; + + if (jump->flags & PATCH_B) { + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset); + if (!(jump->flags & JUMP_ADDR)) { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - addr) <= 0x01ffffff && (sljit_sw)(jump->u.label->addr - addr) >= -0x02000000); + *buf_ptr |= ((jump->u.label->addr - addr) >> 2) & 0x00ffffff; + } + else { + SLJIT_ASSERT((sljit_sw)(jump->u.target - addr) <= 0x01ffffff && (sljit_sw)(jump->u.target - addr) >= -0x02000000); + *buf_ptr |= ((jump->u.target - addr) >> 2) & 0x00ffffff; + } + } + else if (jump->flags & SLJIT_REWRITABLE_JUMP) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + jump->addr = (sljit_uw)code_ptr; + code_ptr[0] = (sljit_uw)buf_ptr; + code_ptr[1] = *buf_ptr; + inline_set_jump_addr((sljit_uw)code_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); + code_ptr += 2; +#else + inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); +#endif + } + else { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (jump->flags & IS_BL) + buf_ptr--; + if (*buf_ptr & (1 << 23)) + buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; + else + buf_ptr += 1; + *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; +#else + inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); +#endif + } + jump = jump->next; + } + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + const_ = compiler->consts; + while (const_) { + buf_ptr = (sljit_uw*)const_->addr; + const_->addr = (sljit_uw)code_ptr; + + code_ptr[0] = (sljit_uw)buf_ptr; + code_ptr[1] = *buf_ptr; + if (*buf_ptr & (1 << 23)) + buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; + else + buf_ptr += 1; + /* Set the value again (can be a simple constant). */ + inline_set_const((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0); + code_ptr += 2; + + const_ = const_->next; + } +#endif + + put_label = compiler->put_labels; + while (put_label) { + addr = put_label->label->addr; + buf_ptr = (sljit_uw*)put_label->addr; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + SLJIT_ASSERT((buf_ptr[0] & 0xffff0000) == 0xe59f0000); + buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr; +#else + SLJIT_ASSERT((buf_ptr[-1] & 0xfff00000) == MOVW && (buf_ptr[0] & 0xfff00000) == MOVT); + buf_ptr[-1] |= ((addr << 4) & 0xf0000) | (addr & 0xfff); + buf_ptr[0] |= ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff); +#endif + put_label = put_label->next; + } + + SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size); + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_uw); + + code = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif + + case SLJIT_HAS_CLZ: + case SLJIT_HAS_ROT: + case SLJIT_HAS_CMOV: +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + case SLJIT_HAS_CTZ: + case SLJIT_HAS_PREFETCH: +#endif + return 1; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + case SLJIT_HAS_CTZ: + return 2; +#endif + + default: + return 0; + } +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* Creates an index in data_transfer_insts array. */ +#define WORD_SIZE 0x00 +#define BYTE_SIZE 0x01 +#define HALF_SIZE 0x02 +#define PRELOAD 0x03 +#define SIGNED 0x04 +#define LOAD_DATA 0x08 + +/* Flag bits for emit_op. */ +#define ALLOW_IMM 0x10 +#define ALLOW_INV_IMM 0x20 +#define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM) +#define ALLOW_NEG_IMM 0x40 + +/* s/l - store/load (1 bit) + u/s - signed/unsigned (1 bit) + w/b/h/N - word/byte/half/NOT allowed (2 bit) + Storing signed and unsigned values are the same operations. */ + +static const sljit_uw data_transfer_insts[16] = { +/* s u w */ 0xe5000000 /* str */, +/* s u b */ 0xe5400000 /* strb */, +/* s u h */ 0xe10000b0 /* strh */, +/* s u N */ 0x00000000 /* not allowed */, +/* s s w */ 0xe5000000 /* str */, +/* s s b */ 0xe5400000 /* strb */, +/* s s h */ 0xe10000b0 /* strh */, +/* s s N */ 0x00000000 /* not allowed */, + +/* l u w */ 0xe5100000 /* ldr */, +/* l u b */ 0xe5500000 /* ldrb */, +/* l u h */ 0xe11000b0 /* ldrh */, +/* l u p */ 0xf5500000 /* preload */, +/* l s w */ 0xe5100000 /* ldr */, +/* l s b */ 0xe11000d0 /* ldrsb */, +/* l s h */ 0xe11000f0 /* ldrsh */, +/* l s N */ 0x00000000 /* not allowed */, +}; + +#define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \ + (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (sljit_uw)(arg)) + +/* Normal ldr/str instruction. + Type2: ldrsb, ldrh, ldrsh */ +#define IS_TYPE1_TRANSFER(type) \ + (data_transfer_insts[(type) & 0xf] & 0x04000000) +#define TYPE2_TRANSFER_IMM(imm) \ + (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22)) + +#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \ + ((sljit_uw)(opcode) | (sljit_uw)(mode) | VD(dst) | VM(src1) | VN(src2)) + +/* Flags for emit_op: */ + /* Arguments are swapped. */ +#define ARGS_SWAPPED 0x01 + /* Inverted immediate. */ +#define INV_IMM 0x02 + /* Source and destination is register. */ +#define MOVE_REG_CONV 0x04 + /* Unused return value. */ +#define UNUSED_RETURN 0x08 +/* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */ +#define SET_FLAGS (1 << 20) +/* dst: reg + src1: reg + src2: reg or imm (if allowed) + SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */ +#define SRC2_IMM (1 << 25) + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_uw imm, offset; + sljit_s32 i, tmp, size, word_arg_count; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); +#ifdef __SOFTFP__ + sljit_u32 float_arg_count; +#else + sljit_u32 old_offset, f32_offset; + sljit_u32 remap[3]; + sljit_u32 *remap_ptr = remap; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + imm = 0; + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) + imm |= (sljit_uw)1 << reg_map[i]; + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + imm |= (sljit_uw)1 << reg_map[i]; + + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + + /* Push saved and temporary registers + multiple registers: stmdb sp!, {..., lr} + single register: str reg, [sp, #-4]! */ + if (imm != 0) + FAIL_IF(push_inst(compiler, PUSH | (1 << 14) | imm)); + else + FAIL_IF(push_inst(compiler, 0xe52d0004 | RD(TMP_REG2))); + + /* Stack must be aligned to 8 bytes: */ + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); + + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((size & SSIZE_OF(sw)) != 0) { + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | sizeof(sljit_sw))); + size += SSIZE_OF(sw); + } + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fsaveds > 0) + FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst(compiler, VPUSH | VD(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + } + } + + local_size = ((size + local_size + 0x7) & ~0x7) - size; + compiler->local_size = local_size; + + if (options & SLJIT_ENTER_REG_ARG) + arg_types = 0; + + arg_types >>= SLJIT_ARG_SHIFT; + word_arg_count = 0; + saved_arg_count = 0; +#ifdef __SOFTFP__ + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); + + offset = 0; + float_arg_count = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + else + FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + offset += sizeof(sljit_f64) - sizeof(sljit_sw); + break; + case SLJIT_ARG_TYPE_F32: + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst(compiler, VMOV | (float_arg_count << 16) | (offset << 10))); + else + FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count - 1 != (sljit_s32)(offset >> 2)) + tmp = word_arg_count; + else + break; + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2))); + else + FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_uw)size - 4 * sizeof(sljit_sw)))); + break; + } + + offset += sizeof(sljit_sw); + arg_types >>= SLJIT_ARG_SHIFT; + } + + compiler->args_size = offset; +#else + offset = SLJIT_FR0; + old_offset = SLJIT_FR0; + f32_offset = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset != old_offset) + *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, SLJIT_32, offset, old_offset, 0); + old_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0x20, offset, f32_offset, 0); + f32_offset = 0; + } else { + if (offset != old_offset) + *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0, offset, old_offset, 0); + f32_offset = old_offset; + old_offset++; + } + offset++; + break; + default: + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(SLJIT_R0 + word_arg_count))); + saved_arg_count++; + } + + word_arg_count++; + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap)); + + while (remap_ptr > remap) + FAIL_IF(push_inst(compiler, *(--remap_ptr))); +#endif + + if (local_size > 0) + FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 size; + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); + + if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)) + size += SSIZE_OF(sw); + + compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm) +{ + sljit_uw imm2 = get_imm(imm); + + if (imm2 == 0) { + imm2 = (imm & ~(sljit_uw)0x3ff) >> 10; + imm = (imm & 0x3ff) >> 2; + + FAIL_IF(push_inst(compiler, ADD | SRC2_IMM | RD(SLJIT_SP) | RN(SLJIT_SP) | 0xb00 | imm2)); + return push_inst(compiler, ADD | SRC2_IMM | RD(SLJIT_SP) | RN(SLJIT_SP) | 0xf00 | (imm & 0xff)); + } + + return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | imm2); +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size) +{ + sljit_s32 local_size, fscratches, fsaveds, i, tmp; + sljit_s32 restored_reg = 0; + sljit_s32 lr_dst = TMP_PC; + sljit_uw reg_list = 0; + + SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128); + + local_size = compiler->local_size; + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; + + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst(compiler, VPOP | VD(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + if (fsaveds > 0) + FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + } + + local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7; + } + + if (frame_size < 0) { + lr_dst = TMP_REG2; + frame_size = 0; + } else if (frame_size > 0) { + SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0); + lr_dst = 0; + frame_size &= ~0x7; + } + + if (lr_dst != 0) + reg_list |= (sljit_uw)1 << reg_map[lr_dst]; + + tmp = SLJIT_S0 - compiler->saveds; + i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + if (tmp < i) { + restored_reg = i; + do { + reg_list |= (sljit_uw)1 << reg_map[i]; + } while (--i > tmp); + } + + i = compiler->scratches; + if (i >= SLJIT_FIRST_SAVED_REG) { + restored_reg = i; + do { + reg_list |= (sljit_uw)1 << reg_map[i]; + } while (--i >= SLJIT_FIRST_SAVED_REG); + } + + if (lr_dst == TMP_REG2 && reg_list == 0) { + restored_reg = TMP_REG2; + lr_dst = 0; + } + + if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) { + /* The local_size does not include the saved registers. */ + tmp = 0; + if (reg_list != 0) { + tmp = 2; + if (local_size <= 0xfff) { + if (local_size == 0) { + SLJIT_ASSERT(restored_reg != TMP_REG2); + if (frame_size == 0) + return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | 0x800008); + if (frame_size > 2 * SSIZE_OF(sw)) + return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_uw)(frame_size - (2 * SSIZE_OF(sw)))); + } + + FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_uw)local_size)); + tmp = 1; + } else if (frame_size == 0) { + frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw); + tmp = 3; + } + + /* Place for the saved register. */ + if (restored_reg != TMP_REG2) + local_size += SSIZE_OF(sw); + } + + /* Place for the lr register. */ + local_size += SSIZE_OF(sw); + + if (frame_size > local_size) + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | (sljit_uw)(frame_size - local_size))); + else if (frame_size < local_size) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size))); + + if (tmp <= 1) + return SLJIT_SUCCESS; + + if (tmp == 2) { + frame_size -= SSIZE_OF(sw); + if (restored_reg != TMP_REG2) + frame_size -= SSIZE_OF(sw); + + return push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_uw)frame_size); + } + + tmp = (restored_reg == TMP_REG2) ? 0x800004 : 0x800008; + return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_uw)tmp); + } + + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + /* Pop saved and temporary registers + multiple registers: ldmia sp!, {...} + single register: ldr reg, [sp], #4 */ + if ((reg_list & (reg_list - 1)) == 0) { + SLJIT_ASSERT(lr_dst != 0); + SLJIT_ASSERT(reg_list == (sljit_uw)1 << reg_map[lr_dst]); + + return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(lr_dst) | 0x800004); + } + + FAIL_IF(push_inst(compiler, POP | reg_list)); + + if (frame_size > 0) + return push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | ((sljit_uw)frame_size - sizeof(sljit_sw))); + + if (lr_dst != 0) + return SLJIT_SUCCESS; + + return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | sizeof(sljit_sw)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + return emit_stack_frame_release(compiler, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src))); + src = TMP_REG1; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_uw dst, sljit_uw src1, sljit_uw src2) +{ + sljit_s32 is_masked; + sljit_uw shift_type; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if (dst != src2) { + if (src2 & SRC2_IMM) { + return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2); + } + return push_inst(compiler, MOV | RD(dst) | RM(src2)); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if (flags & MOVE_REG_CONV) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (op == SLJIT_MOV_U8) + return push_inst(compiler, AND | RD(dst) | RN(src2) | SRC2_IMM | 0xff); + FAIL_IF(push_inst(compiler, MOV | RD(dst) | (24 << 7) | RM(src2))); + return push_inst(compiler, MOV | RD(dst) | (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | RM(dst)); +#else + return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2)); +#endif + } + else if (dst != src2) { + SLJIT_ASSERT(src2 & SRC2_IMM); + return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if (flags & MOVE_REG_CONV) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + FAIL_IF(push_inst(compiler, MOV | RD(dst) | (16 << 7) | RM(src2))); + return push_inst(compiler, MOV | RD(dst) | (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | RM(dst)); +#else + return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2)); +#endif + } + else if (dst != src2) { + SLJIT_ASSERT(src2 & SRC2_IMM); + return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2); + } + return SLJIT_SUCCESS; + + case SLJIT_NOT: + if (src2 & SRC2_IMM) + return push_inst(compiler, ((flags & INV_IMM) ? MOV : MVN) | (flags & SET_FLAGS) | RD(dst) | src2); + + return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src2)); + + case SLJIT_CLZ: + SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); + FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2))); + return SLJIT_SUCCESS; + + case SLJIT_CTZ: + SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG1) | RN(src2) | 0)); + FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | RM(TMP_REG1))); + FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG2))); + FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(dst) | 32)); + return push_inst(compiler, (EOR ^ 0xf0000000) | SRC2_IMM | RD(dst) | RN(dst) | 0x1f); +#else /* !SLJIT_CONFIG_ARM_V5 */ + FAIL_IF(push_inst(compiler, RBIT | RD(dst) | RM(src2))); + return push_inst(compiler, CLZ | RD(dst) | RM(dst)); +#endif /* SLJIT_CONFIG_ARM_V5 */ + + case SLJIT_ADD: + SLJIT_ASSERT(!(flags & INV_IMM)); + + if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN) + return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_ADDC: + SLJIT_ASSERT(!(flags & INV_IMM)); + return push_inst(compiler, ADC | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_SUB: + SLJIT_ASSERT(!(flags & INV_IMM)); + + if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN) + return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS) + | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_SUBC: + SLJIT_ASSERT(!(flags & INV_IMM)); + return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SBC : RSC) | (flags & SET_FLAGS) + | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & INV_IMM)); + SLJIT_ASSERT(!(src2 & SRC2_IMM)); + compiler->status_flags_state = 0; + + if (!HAS_FLAGS(op)) + return push_inst(compiler, MUL | RN(dst) | RM8(src2) | RM(src1)); + + FAIL_IF(push_inst(compiler, SMULL | RN(TMP_REG1) | RD(dst) | RM8(src2) | RM(src1))); + + /* cmp TMP_REG1, dst asr #31. */ + return push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | RM(dst) | 0xfc0); + + case SLJIT_AND: + if ((flags & (UNUSED_RETURN | INV_IMM)) == UNUSED_RETURN) + return push_inst(compiler, TST | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS) + | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_OR: + SLJIT_ASSERT(!(flags & INV_IMM)); + return push_inst(compiler, ORR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_XOR: + SLJIT_ASSERT(!(flags & INV_IMM)); + return push_inst(compiler, EOR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + + case SLJIT_SHL: + case SLJIT_MSHL: + shift_type = 0; + is_masked = GET_OPCODE(op) == SLJIT_MSHL; + break; + + case SLJIT_LSHR: + case SLJIT_MLSHR: + shift_type = 1; + is_masked = GET_OPCODE(op) == SLJIT_MLSHR; + break; + + case SLJIT_ASHR: + case SLJIT_MASHR: + shift_type = 2; + is_masked = GET_OPCODE(op) == SLJIT_MASHR; + break; + + case SLJIT_ROTL: + if (compiler->shift_imm == 0x20) { + FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0)); + src2 = TMP_REG2; + } else + compiler->shift_imm = (sljit_uw)(-(sljit_sw)compiler->shift_imm) & 0x1f; + /* fallthrough */ + + case SLJIT_ROTR: + shift_type = 3; + is_masked = 0; + break; + + default: + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; + } + + SLJIT_ASSERT(!(flags & ARGS_SWAPPED) && !(flags & INV_IMM) && !(src2 & SRC2_IMM)); + + if (compiler->shift_imm != 0x20) { + SLJIT_ASSERT(src1 == TMP_REG1); + + if (compiler->shift_imm != 0) + return push_inst(compiler, MOV | (flags & SET_FLAGS) | + RD(dst) | (compiler->shift_imm << 7) | (shift_type << 5) | RM(src2)); + return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2)); + } + + SLJIT_ASSERT(src1 != TMP_REG2); + + if (is_masked) { + FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | SRC2_IMM | 0x1f)); + src2 = TMP_REG2; + } + + return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) + | RM8(src2) | (sljit_uw)(shift_type << 5) | 0x10 | RM(src1)); +} + +#undef EMIT_SHIFT_INS_AND_RETURN + +/* Tests whether the immediate can be stored in the 12 bit imm field. + Returns with 0 if not possible. */ +static sljit_uw get_imm(sljit_uw imm) +{ + sljit_u32 rol; + + if (imm <= 0xff) + return SRC2_IMM | imm; + + if (!(imm & 0xff000000)) { + imm <<= 8; + rol = 8; + } + else { + imm = (imm << 24) | (imm >> 8); + rol = 0; + } + + if (!(imm & 0xff000000)) { + imm <<= 8; + rol += 4; + } + + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + if (!(imm & 0x00ffffff)) + return SRC2_IMM | (imm >> 24) | (rol << 8); + else + return 0; +} + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm, sljit_s32 positive) +{ + sljit_uw mask; + sljit_uw imm1; + sljit_uw imm2; + sljit_uw rol; + + /* Step1: Search a zero byte (8 continous zero bit). */ + mask = 0xff000000; + rol = 8; + while(1) { + if (!(imm & mask)) { + /* Rol imm by rol. */ + imm = (imm << rol) | (imm >> (32 - rol)); + /* Calculate arm rol. */ + rol = 4 + (rol >> 1); + break; + } + rol += 2; + mask >>= 2; + if (mask & 0x3) { + /* rol by 8. */ + imm = (imm << 8) | (imm >> 24); + mask = 0xff00; + rol = 24; + while (1) { + if (!(imm & mask)) { + /* Rol imm by rol. */ + imm = (imm << rol) | (imm >> (32 - rol)); + /* Calculate arm rol. */ + rol = (rol >> 1) - 8; + break; + } + rol += 2; + mask >>= 2; + if (mask & 0x3) + return 0; + } + break; + } + } + + /* The low 8 bit must be zero. */ + SLJIT_ASSERT(!(imm & 0xff)); + + if (!(imm & 0xff000000)) { + imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8); + imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8); + } + else if (imm & 0xc0000000) { + imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); + imm <<= 8; + rol += 4; + + if (!(imm & 0xff000000)) { + imm <<= 8; + rol += 4; + } + + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + if (!(imm & 0x00ffffff)) + imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); + else + return 0; + } + else { + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); + imm <<= 8; + rol += 4; + + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + if (!(imm & 0x00ffffff)) + imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); + else + return 0; + } + + FAIL_IF(push_inst(compiler, (positive ? MOV : MVN) | RD(reg) | imm1)); + FAIL_IF(push_inst(compiler, (positive ? ORR : BIC) | RD(reg) | RN(reg) | imm2)); + return 1; +} +#endif + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm) +{ + sljit_uw tmp; + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + if (!(imm & ~(sljit_uw)0xffff)) + return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)); +#endif + + /* Create imm by 1 inst. */ + tmp = get_imm(imm); + if (tmp) + return push_inst(compiler, MOV | RD(reg) | tmp); + + tmp = get_imm(~imm); + if (tmp) + return push_inst(compiler, MVN | RD(reg) | tmp); + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + /* Create imm by 2 inst. */ + FAIL_IF(generate_int(compiler, reg, imm, 1)); + FAIL_IF(generate_int(compiler, reg, ~imm, 0)); + + /* Load integer. */ + return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), imm); +#else + FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff))); + if (imm <= 0xffff) + return SLJIT_SUCCESS; + return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff)); +#endif +} + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) +{ + sljit_uw imm, offset_reg, tmp; + sljit_sw mask = IS_TYPE1_TRANSFER(flags) ? 0xfff : 0xff; + sljit_sw sign = IS_TYPE1_TRANSFER(flags) ? 0x1000 : 0x100; + + SLJIT_ASSERT(arg & SLJIT_MEM); + SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -mask && argw <= mask)); + + if (SLJIT_UNLIKELY(!(arg & REG_MASK))) { + tmp = (sljit_uw)(argw & (sign | mask)); + tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask); + + FAIL_IF(load_immediate(compiler, tmp_reg, tmp)); + + argw -= (sljit_sw)tmp; + tmp = 1; + + if (argw < 0) { + argw = -argw; + tmp = 0; + } + + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, tmp, reg, tmp_reg, + (mask == 0xff) ? TYPE2_TRANSFER_IMM(argw) : argw)); + } + + if (arg & OFFS_REG_MASK) { + offset_reg = OFFS_REG(arg); + arg &= REG_MASK; + argw &= 0x3; + + if (argw != 0 && (mask == 0xff)) { + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_uw)argw << 7))); + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0))); + } + + /* Bit 25: RM is offset. */ + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, + RM(offset_reg) | (mask == 0xff ? 0 : (1 << 25)) | ((sljit_uw)argw << 7))); + } + + arg &= REG_MASK; + + if (argw > mask) { + tmp = (sljit_uw)(argw & (sign | mask)); + tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask); + imm = get_imm(tmp); + + if (imm) { + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm)); + argw -= (sljit_sw)tmp; + arg = tmp_reg; + + SLJIT_ASSERT(argw >= -mask && argw <= mask); + } + } else if (argw < -mask) { + tmp = (sljit_uw)(-argw & (sign | mask)); + tmp = (sljit_uw)((-argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask); + imm = get_imm(tmp); + + if (imm) { + FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm)); + argw += (sljit_sw)tmp; + arg = tmp_reg; + + SLJIT_ASSERT(argw >= -mask && argw <= mask); + } + } + + if (argw <= mask && argw >= -mask) { + if (argw >= 0) { + if (mask == 0xff) + argw = TYPE2_TRANSFER_IMM(argw); + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, argw)); + } + + argw = -argw; + + if (mask == 0xff) + argw = TYPE2_TRANSFER_IMM(argw); + + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, argw)); + } + + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw)); + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, + RM(tmp_reg) | (mask == 0xff ? 0 : (1 << 25)))); +} + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* src1 is reg or TMP_REG1 + src2 is reg, TMP_REG2, or imm + result goes to TMP_REG2, so put result can use TMP_REG1. */ + + /* We prefers register and simple consts. */ + sljit_s32 dst_reg; + sljit_s32 src1_reg; + sljit_s32 src2_reg = 0; + sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + sljit_s32 neg_op = 0; + + if (dst == TMP_REG2) + flags |= UNUSED_RETURN; + + SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM)); + + if (inp_flags & ALLOW_NEG_IMM) { + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + neg_op = SLJIT_SUB; + break; + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + neg_op = SLJIT_SUBC; + break; + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + neg_op = SLJIT_ADD; + break; + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + neg_op = SLJIT_ADDC; + break; + } + } + + do { + if (!(inp_flags & ALLOW_IMM)) + break; + + if (src2 & SLJIT_IMM) { + src2_reg = (sljit_s32)get_imm((sljit_uw)src2w); + if (src2_reg) + break; + if (inp_flags & ALLOW_INV_IMM) { + src2_reg = (sljit_s32)get_imm(~(sljit_uw)src2w); + if (src2_reg) { + flags |= INV_IMM; + break; + } + } + if (neg_op != 0) { + src2_reg = (sljit_s32)get_imm((sljit_uw)-src2w); + if (src2_reg) { + op = neg_op | GET_ALL_FLAGS(op); + break; + } + } + } + + if (src1 & SLJIT_IMM) { + src2_reg = (sljit_s32)get_imm((sljit_uw)src1w); + if (src2_reg) { + flags |= ARGS_SWAPPED; + src1 = src2; + src1w = src2w; + break; + } + if (inp_flags & ALLOW_INV_IMM) { + src2_reg = (sljit_s32)get_imm(~(sljit_uw)src1w); + if (src2_reg) { + flags |= ARGS_SWAPPED | INV_IMM; + src1 = src2; + src1w = src2w; + break; + } + } + if (neg_op >= SLJIT_SUB) { + /* Note: additive operation (commutative). */ + src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w); + if (src2_reg) { + src1 = src2; + src1w = src2w; + op = neg_op | GET_ALL_FLAGS(op); + break; + } + } + } + } while(0); + + /* Source 1. */ + if (FAST_IS_REG(src1)) + src1_reg = src1; + else if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); + src1_reg = TMP_REG1; + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); + src1_reg = TMP_REG1; + } + + /* Destination. */ + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2; + + if (op <= SLJIT_MOV_P) { + if (dst & SLJIT_MEM) { + if (inp_flags & BYTE_SIZE) + inp_flags &= ~SIGNED; + + if (FAST_IS_REG(src2)) + return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG2); + } + + if (FAST_IS_REG(src2) && dst_reg != TMP_REG2) + flags |= MOVE_REG_CONV; + } + + /* Source 2. */ + if (src2_reg == 0) { + src2_reg = (op <= SLJIT_MOV_P) ? dst_reg : TMP_REG2; + + if (FAST_IS_REG(src2)) + src2_reg = src2; + else if (src2 & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2)); + else + FAIL_IF(load_immediate(compiler, src2_reg, (sljit_uw)src2w)); + } + + FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg)); + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + + return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1); +} + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__GNUC__) +extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator); +extern int __aeabi_idivmod(int numerator, int denominator); +#else +#error "Software divmod functions are needed" +#endif + +#ifdef __cplusplus +} +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + sljit_uw saved_reg_list[3]; + sljit_sw saved_reg_count; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + FAIL_IF(push_inst(compiler, BKPT)); + break; + case SLJIT_NOP: + FAIL_IF(push_inst(compiler, NOP)); + break; + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) + | RN(SLJIT_R1) | RD(SLJIT_R0) | RM8(SLJIT_R0) | RM(SLJIT_R1)); + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); + SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3); + + saved_reg_count = 0; + if (compiler->scratches >= 4) + saved_reg_list[saved_reg_count++] = 3; + if (compiler->scratches >= 3) + saved_reg_list[saved_reg_count++] = 2; + if (op >= SLJIT_DIV_UW) + saved_reg_list[saved_reg_count++] = 1; + + if (saved_reg_count > 0) { + FAIL_IF(push_inst(compiler, STR | 0x2d0000 | (saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */)); + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst(compiler, STR | 0x8d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */)); + } + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst(compiler, STR | 0x8d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */)); + } + } + +#if defined(__GNUC__) + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod)))); +#else +#error "Software divmod functions are needed" +#endif + + if (saved_reg_count > 0) { + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst(compiler, LDR | 0x8d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */)); + } + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst(compiler, LDR | 0x8d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */)); + } + return push_inst(compiler, (LDR ^ (1 << 24)) | 0x8d0000 | (sljit_uw)(saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); + } + return SLJIT_SUCCESS; + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_U8: + return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); + + case SLJIT_MOV_S8: + return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); + + case SLJIT_MOV_U16: + return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); + + case SLJIT_MOV_S16: + return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); + + case SLJIT_NOT: + return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_CLZ: + case SLJIT_CTZ: + return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + case SLJIT_SUB: + case SLJIT_SUBC: + return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: + return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_MSHL: + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + case SLJIT_ROTL: + case SLJIT_ROTR: + if (src2 & SLJIT_IMM) { + compiler->shift_imm = src2w & 0x1f; + return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w); + } else { + compiler->shift_imm = 0x20; + return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w); + } + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 is_left; + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w)); + + op = GET_OPCODE(op); + is_left = (op == SLJIT_SHL || op == SLJIT_MSHL); + + if (src_dst == src1) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, src_dst, 0, src_dst, 0, src2, src2w); + } + + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + /* Shift type of ROR is 3. */ + if (src2 & SLJIT_IMM) { + src2w &= 0x1f; + + if (src2w == 0) + return SLJIT_SUCCESS; + } else if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src2, src2w, TMP_REG2)); + src2 = TMP_REG2; + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); + src1 = TMP_REG1; + } else if (src1 & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); + src1 = TMP_REG1; + } + + if (src2 & SLJIT_IMM) { + FAIL_IF(push_inst(compiler, MOV | RD(src_dst) | RM(src_dst) | ((sljit_uw)(is_left ? 0 : 1) << 5) | ((sljit_uw)src2w << 7))); + src2w = (src2w ^ 0x1f) + 1; + return push_inst(compiler, ORR | RD(src_dst) | RN(src_dst) | RM(src1) | ((sljit_uw)(is_left ? 1 : 0) << 5) | ((sljit_uw)src2w << 7)); + } + + if (op == SLJIT_MSHL || op == SLJIT_MLSHR) { + FAIL_IF(push_inst(compiler, AND | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0x1f)); + src2 = TMP_REG2; + } + + FAIL_IF(push_inst(compiler, MOV | RD(src_dst) | RM8(src2) | ((sljit_uw)(is_left ? 0 : 1) << 5) | 0x10 | RM(src_dst))); + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src1) | ((sljit_uw)(is_left ? 1 : 0) << 5) | (1 << 7))); + FAIL_IF(push_inst(compiler, EOR | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0x1f)); + return push_inst(compiler, ORR | RD(src_dst) | RN(src_dst) | RM(TMP_REG1) | ((sljit_uw)(is_left ? 1 : 0) << 5) | 0x10 | RM8(TMP_REG2)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src))); + else + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1)); + + return push_inst(compiler, BX | RM(TMP_REG2)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + SLJIT_ASSERT(src & SLJIT_MEM); + return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1); +#else /* !SLJIT_CONFIG_ARM_V7 */ + return SLJIT_SUCCESS; +#endif /* SLJIT_CONFIG_ARM_V7 */ + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return (freg_map[reg] << 1); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + SLJIT_UNUSED_ARG(size); + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_uw*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define FPU_LOAD (1 << 20) +#define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \ + ((inst) | (sljit_uw)((add) << 23) | RN(base) | VD(freg) | (sljit_uw)(offs)) + +static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + sljit_uw imm; + sljit_uw inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD)); + + SLJIT_ASSERT(arg & SLJIT_MEM); + arg &= ~SLJIT_MEM; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 7))); + arg = TMP_REG2; + argw = 0; + } + + /* Fast loads and stores. */ + if (arg) { + if (!(argw & ~0x3fc)) + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2)); + if (!(-argw & ~0x3fc)) + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2)); + + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc); + if (imm) { + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | imm)); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2)); + } + imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc); + if (imm) { + argw = -argw; + FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG2) | RN(arg & REG_MASK) | imm)); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG2, reg, (argw & 0x3fc) >> 2)); + } + } + + if (arg) { + FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw)); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(TMP_REG2))); + } + else + FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw)); + + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + op ^= SLJIT_32; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_32, TMP_FREG1, src, 0))); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | VN(TMP_FREG1)); + + /* Store the integer value from a VFP register. */ + return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + op ^= SLJIT_32; + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, VMOV | RD(src) | VN(TMP_FREG1))); + else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); + FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | VN(TMP_FREG1))); + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_32, dst_r, TMP_FREG1, 0))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + op ^= SLJIT_32; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w)); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_32, src1, src2, 0))); + return push_inst(compiler, VMRS); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_32; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_32, dst_r, src, 0))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_32, dst_r, src, 0))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src, 0))); + break; + case SLJIT_CONV_F64_FROM_F32: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_32, dst_r, src, 0))); + op ^= SLJIT_32; + break; + } + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + op ^= SLJIT_32; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w)); + src2 = TMP_FREG2; + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_32, dst_r, src2, src1))); + break; + + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_32, dst_r, src2, src1))); + break; + + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_32, dst_r, src2, src1))); + break; + + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_32, dst_r, src2, src1))); + break; + } + + if (dst_r == TMP_FREG1) + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw)); + + return SLJIT_SUCCESS; +} + +#undef EMIT_FPU_DATA_TRANSFER + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2)); + + /* Memory. */ + return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ + return 0x00000000; + + case SLJIT_NOT_EQUAL: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */ + return 0x10000000; + + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x20000000; + /* fallthrough */ + + case SLJIT_LESS: + return 0x30000000; + + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x30000000; + /* fallthrough */ + + case SLJIT_GREATER_EQUAL: + return 0x20000000; + + case SLJIT_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + return 0x80000000; + + case SLJIT_LESS_EQUAL: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + return 0x90000000; + + case SLJIT_SIG_LESS: + case SLJIT_UNORDERED_OR_LESS: + return 0xb0000000; + + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + return 0xa0000000; + + case SLJIT_SIG_GREATER: + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + return 0xc0000000; + + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + return 0xd0000000; + + case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) + return 0x10000000; + /* fallthrough */ + + case SLJIT_UNORDERED: + return 0x60000000; + + case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) + return 0x00000000; + /* fallthrough */ + + case SLJIT_ORDERED: + return 0x70000000; + + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + return 0x40000000; + + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + return 0x50000000; + + default: + SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_REG_ARG); + return 0xe0000000; + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + SLJIT_ASSERT(reg_map[TMP_REG1] != 14); + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (type >= SLJIT_FAST_CALL) + PTR_FAIL_IF(prepare_blx(compiler)); + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, + type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(compiler, type), 0)); + + if (jump->flags & SLJIT_REWRITABLE_JUMP) { + jump->addr = compiler->size; + compiler->patches++; + } + + if (type >= SLJIT_FAST_CALL) { + jump->flags |= IS_BL; + PTR_FAIL_IF(emit_blx(compiler)); + } + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) + jump->addr = compiler->size; +#else + if (type >= SLJIT_FAST_CALL) + jump->flags |= IS_BL; + PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); + PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(compiler, type))); + jump->addr = compiler->size; +#endif + return jump; +} + +#ifdef __SOFTFP__ + +static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space) +{ + sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN; + sljit_u32 offset = 0; + sljit_u32 word_arg_offset = 0; + sljit_u32 src_offset = 4 * sizeof(sljit_sw); + sljit_u32 float_arg_count = 0; + sljit_s32 types = 0; + sljit_u8 offsets[4]; + sljit_u8 *offset_ptr = offsets; + + if (src && FAST_IS_REG(*src)) + src_offset = (sljit_uw)reg_map[*src] * sizeof(sljit_sw); + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f64); + float_arg_count++; + break; + case SLJIT_ARG_TYPE_F32: + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f32); + float_arg_count++; + break; + default: + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_sw); + word_arg_offset += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) { + /* Keep lr register on the stack. */ + if (is_tail_call) + offset += sizeof(sljit_sw); + + offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7; + + *extra_space = offset; + + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset)); + else + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | offset)); + } else { + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, -1)); + *extra_space = 0; + } + + /* Process arguments in reversed direction. */ + while (types) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count--; + offset = *(--offset_ptr); + + SLJIT_ASSERT((offset & 0x7) == 0); + + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); + *src = TMP_REG1; + } + FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + } else + FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count--; + offset = *(--offset_ptr); + + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); + *src = TMP_REG1; + } + FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10))); + } else + FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); + break; + default: + word_arg_offset -= sizeof(sljit_sw); + offset = *(--offset_ptr); + + SLJIT_ASSERT(offset >= word_arg_offset); + + if (offset != word_arg_offset) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); + *src = TMP_REG1; + } + else if (src_offset == word_arg_offset) { + *src = (sljit_s32)(SLJIT_R0 + (offset >> 2)); + src_offset = offset; + } + FAIL_IF(push_inst(compiler, MOV | (offset << 10) | (word_arg_offset >> 2))); + } else + FAIL_IF(push_inst(compiler, STR | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (offset - 4 * sizeof(sljit_sw)))); + } + break; + } + + types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) + FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0)); + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32) + FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12))); + + return SLJIT_SUCCESS; +} + +#else /* !__SOFTFP__ */ + +static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + sljit_u32 offset = SLJIT_FR0; + sljit_u32 new_offset = SLJIT_FR0; + sljit_u32 f32_offset = 0; + + /* Remove return value. */ + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset != new_offset) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, + SLJIT_32, new_offset, offset, 0))); + + new_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, + 0x400000, f32_offset, offset, 0))); + f32_offset = 0; + } else { + if (offset != new_offset) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, + 0, new_offset, offset, 0))); + f32_offset = new_offset; + new_offset++; + } + offset++; + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#endif /* __SOFTFP__ */ + +#undef EMIT_FPU_OPERATION + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ +#ifdef __SOFTFP__ + struct sljit_jump *jump; + sljit_u32 extra_space = (sljit_u32)type; +#endif + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + +#ifdef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) { + PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + + SLJIT_SKIP_CHECKS(compiler); + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, + TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw)))); + + PTR_FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(push_inst(compiler, BX | RM(TMP_REG2))); + return jump; + } + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); + PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); + return jump; + } +#endif /* __SOFTFP__ */ + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + +#ifndef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); +#endif /* !__SOFTFP__ */ + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + SLJIT_ASSERT(reg_map[TMP_REG1] != 14); + + if (!(src & SLJIT_IMM)) { + if (FAST_IS_REG(src)) { + SLJIT_ASSERT(reg_map[src] != 14); + return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src)); + } + + SLJIT_ASSERT(src & SLJIT_MEM); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)); + } + + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); + jump->u.target = (sljit_uw)srcw; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (type >= SLJIT_FAST_CALL) + FAIL_IF(prepare_blx(compiler)); + FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0)); + if (type >= SLJIT_FAST_CALL) + FAIL_IF(emit_blx(compiler)); +#else + FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); + FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1))); +#endif + jump->addr = compiler->size; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ +#ifdef __SOFTFP__ + sljit_u32 extra_space = (sljit_u32)type; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src))); + src = TMP_REG1; + } + +#ifdef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) { + FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP; + + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, + TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw)))); + + FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space)); + + if (type & SLJIT_CALL_RETURN) + return push_inst(compiler, BX | RM(TMP_REG2)); + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); + return softfloat_post_call_with_args(compiler, arg_types); + } +#endif /* __SOFTFP__ */ + + if (type & SLJIT_CALL_RETURN) { + FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP; + } + +#ifndef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); +#endif /* !__SOFTFP__ */ + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +#ifdef __SOFTFP__ + +static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + if (compiler->options & SLJIT_ENTER_REG_ARG) { + if (src == SLJIT_FR0) + return SLJIT_SUCCESS; + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw); + } + + if (FAST_IS_REG(src)) { + if (op & SLJIT_32) + return push_inst(compiler, VMOV | (1 << 20) | RD(SLJIT_R0) | VN(src)); + return push_inst(compiler, VMOV2 | (1 << 20) | RD(SLJIT_R0) | RN(SLJIT_R1) | VM(src)); + } + + SLJIT_SKIP_CHECKS(compiler); + + if (op & SLJIT_32) + return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw); + return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw); +} + +#endif /* __SOFTFP__ */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op); + sljit_uw cc, ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + op = GET_OPCODE(op); + cc = get_cc(compiler, type); + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (op < SLJIT_ADD) { + FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | SRC2_IMM | 0)); + FAIL_IF(push_inst(compiler, ((MOV | RD(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc)); + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; + } + + ins = (op == SLJIT_AND ? AND : (op == SLJIT_OR ? ORR : EOR)); + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2)); + + FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc)); + + if (op == SLJIT_AND) + FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000))); + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2)); + + if (flags & SLJIT_SET_Z) + return push_inst(compiler, MOV | SET_FLAGS | RD(TMP_REG2) | RM(dst_reg)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_uw cc, tmp; + + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + + cc = get_cc(compiler, type & ~SLJIT_32); + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { + tmp = get_imm((sljit_uw)srcw); + if (tmp) + return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc); + + tmp = get_imm(~(sljit_uw)srcw); + if (tmp) + return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc); + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + tmp = (sljit_uw)srcw; + FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff))); + if (tmp <= 0xffff) + return SLJIT_SUCCESS; + return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff)); +#else + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); + src = TMP_REG1; +#endif + } + + return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src)) & ~COND_MASK) | cc); +} + +static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset) +{ + sljit_s32 arg = *mem; + sljit_sw argw = *memw; + sljit_uw imm, tmp; +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_sw mask = max_offset >= 0xf00 ? 0xfff : 0xff; + sljit_sw sign = max_offset >= 0xf00 ? 0x1000 : 0x100; +#else /* !SLJIT_CONFIG_ARM_V5 */ + sljit_sw mask = 0xfff; + sljit_sw sign = 0x1000; + + SLJIT_ASSERT(max_offset >= 0xf00); +#endif /* SLJIT_CONFIG_ARM_V5 */ + + *mem = TMP_REG1; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + *memw = 0; + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_uw)(argw & 0x3) << 7)); + } + + arg &= REG_MASK; + + if (arg) { + if (argw <= max_offset && argw >= -mask) { + *mem = arg; + return SLJIT_SUCCESS; + } + + if (argw >= 0) { + tmp = (sljit_uw)(argw & (sign | mask)); + tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask); + imm = get_imm(tmp); + + if (imm) { + *memw = argw - (sljit_sw)tmp; + SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset); + + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | imm); + } + } else { + tmp = (sljit_uw)(-argw & (sign | mask)); + tmp = (sljit_uw)((-argw + ((tmp <= (sljit_uw)((sign << 1) - max_offset - 1)) ? 0 : sign)) & ~mask); + imm = get_imm(tmp); + + if (imm) { + *memw = argw + (sljit_sw)tmp; + SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset); + + return push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg) | imm); + } + } + } + + tmp = (sljit_uw)(argw & (sign | mask)); + tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask); + *memw = argw - (sljit_sw)tmp; + + FAIL_IF(load_immediate(compiler, TMP_REG1, tmp)); + + if (arg == 0) + return SLJIT_SUCCESS; + + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(arg)); +} + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + +static sljit_s32 sljit_emit_mem_unaligned(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 flags, steps, tmp_reg; + sljit_uw add, shift; + + switch (type & 0xff) { + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + flags = BYTE_SIZE; + if (!(type & SLJIT_MEM_STORE)) + flags |= LOAD_DATA; + if ((type & 0xff) == SLJIT_MOV_S8) + flags |= SIGNED; + + return emit_op_mem(compiler, flags, reg, mem, memw, TMP_REG1); + + case SLJIT_MOV_U16: + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 1)); + flags = BYTE_SIZE; + steps = 1; + break; + + case SLJIT_MOV_S16: + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xff - 1)); + flags = BYTE_SIZE | SIGNED; + steps = 1; + break; + + default: + if (type & SLJIT_MEM_UNALIGNED_32) { + flags = WORD_SIZE; + if (!(type & SLJIT_MEM_STORE)) + flags |= LOAD_DATA; + + return emit_op_mem(compiler, flags, reg, mem, memw, TMP_REG1); + } + + if (!(type & SLJIT_MEM_UNALIGNED_16)) { + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 3)); + flags = BYTE_SIZE; + steps = 3; + break; + } + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xff - 2)); + + add = 1; + if (memw < 0) { + add = 0; + memw = -memw; + } + + tmp_reg = reg; + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE, add, reg, mem, TYPE2_TRANSFER_IMM(memw)))); + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(reg) | (16 << 7) | (2 << 4))); + } else { + if (reg == mem) { + SLJIT_ASSERT(reg != TMP_REG1); + tmp_reg = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE | LOAD_DATA, add, tmp_reg, mem, TYPE2_TRANSFER_IMM(memw)))); + } + + if (!add) { + memw -= 2; + if (memw <= 0) { + memw = -memw; + add = 1; + } + } else + memw += 2; + + if (type & SLJIT_MEM_STORE) + return push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE, add, TMP_REG2, mem, TYPE2_TRANSFER_IMM(memw))); + + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE | LOAD_DATA, add, TMP_REG2, mem, TYPE2_TRANSFER_IMM(memw)))); + return push_inst(compiler, ORR | RD(reg) | RN(tmp_reg) | RM(TMP_REG2) | (16 << 7)); + } + + SLJIT_ASSERT(steps > 0); + + add = 1; + if (memw < 0) { + add = 0; + memw = -memw; + } + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE, add, reg, mem, memw))); + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(reg) | (8 << 7) | (2 << 4))); + + while (1) { + if (!add) { + memw -= 1; + if (memw == 0) + add = 1; + } else + memw += 1; + + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE, add, TMP_REG2, mem, memw))); + + if (--steps == 0) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(TMP_REG2) | (8 << 7) | (2 << 4))); + } + } + + tmp_reg = reg; + + if (reg == mem) { + SLJIT_ASSERT(reg != TMP_REG1); + tmp_reg = TMP_REG1; + } + + shift = 8; + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE | LOAD_DATA, add, tmp_reg, mem, memw))); + + do { + if (!add) { + memw -= 1; + if (memw == 0) + add = 1; + } else + memw += 1; + + if (steps > 1) { + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE | LOAD_DATA, add, TMP_REG2, mem, memw))); + FAIL_IF(push_inst(compiler, ORR | RD(tmp_reg) | RN(tmp_reg) | RM(TMP_REG2) | (shift << 7))); + shift += 8; + } + } while (--steps != 0); + + flags |= LOAD_DATA; + + if (flags & SIGNED) + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(flags, add, TMP_REG2, mem, TYPE2_TRANSFER_IMM(memw)))); + else + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(flags, add, TMP_REG2, mem, memw))); + + return push_inst(compiler, ORR | RD(reg) | RN(tmp_reg) | RM(TMP_REG2) | (shift << 7)); +} + +#endif /* SLJIT_CONFIG_ARM_V5 */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 flags; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + ADJUST_LOCAL_OFFSET(mem, memw); +#endif /* SLJIT_CONFIG_ARM_V5 */ + + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + } + + ADJUST_LOCAL_OFFSET(mem, memw); + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (type & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16)) { + FAIL_IF(update_mem_addr(compiler, &mem, &memw, (type & SLJIT_MEM_UNALIGNED_16) ? 0xfff - 6 : 0xfff - 7)); + + if (!(type & SLJIT_MEM_STORE) && REG_PAIR_FIRST(reg) == (mem & REG_MASK)) { + FAIL_IF(sljit_emit_mem_unaligned(compiler, type, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw))); + return sljit_emit_mem_unaligned(compiler, type, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw); + } + + FAIL_IF(sljit_emit_mem_unaligned(compiler, type, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw)); + return sljit_emit_mem_unaligned(compiler, type, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw)); + } +#endif /* SLJIT_CONFIG_ARM_V5 */ + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4)); + + flags = WORD_SIZE; + + if (!(type & SLJIT_MEM_STORE)) { + if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1)); + return emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1); + } + + flags = WORD_SIZE | LOAD_DATA; + } + + FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1)); + return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 flags; + sljit_uw is_type1_transfer, inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw)); + + is_type1_transfer = 1; + + switch (type & 0xff) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + case SLJIT_MOV_P: + flags = WORD_SIZE; + break; + case SLJIT_MOV_U8: + flags = BYTE_SIZE; + break; + case SLJIT_MOV_S8: + if (!(type & SLJIT_MEM_STORE)) + is_type1_transfer = 0; + flags = BYTE_SIZE | SIGNED; + break; + case SLJIT_MOV_U16: + is_type1_transfer = 0; + flags = HALF_SIZE; + break; + case SLJIT_MOV_S16: + is_type1_transfer = 0; + flags = HALF_SIZE | SIGNED; + break; + default: + SLJIT_UNREACHABLE(); + flags = WORD_SIZE; + break; + } + + if (!(type & SLJIT_MEM_STORE)) + flags |= LOAD_DATA; + + SLJIT_ASSERT(is_type1_transfer == !!IS_TYPE1_TRANSFER(flags)); + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + if (!is_type1_transfer && memw != 0) + return SLJIT_ERR_UNSUPPORTED; + } else { + if (is_type1_transfer) { + if (memw > 4095 || memw < -4095) + return SLJIT_ERR_UNSUPPORTED; + } else if (memw > 255 || memw < -255) + return SLJIT_ERR_UNSUPPORTED; + } + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + memw &= 0x3; + + inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | ((sljit_uw)memw << 7)); + + if (is_type1_transfer) + inst |= (1 << 25); + + if (type & SLJIT_MEM_POST) + inst ^= (1 << 24); + else + inst |= (1 << 21); + + return push_inst(compiler, inst); + } + + inst = EMIT_DATA_TRANSFER(flags, 0, reg, mem & REG_MASK, 0); + + if (type & SLJIT_MEM_POST) + inst ^= (1 << 24); + else + inst |= (1 << 21); + + if (is_type1_transfer) { + if (memw >= 0) + inst |= (1 << 23); + else + memw = -memw; + + return push_inst(compiler, inst | (sljit_uw)memw); + } + + if (memw >= 0) + inst |= (1 << 23); + else + memw = -memw; + + return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_uw)memw)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_s32 max_offset; + sljit_s32 dst; +#endif /* SLJIT_CONFIG_ARM_V5 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + + if (type & SLJIT_MEM_UNALIGNED_32) + return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw); + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2))); + + if (type & SLJIT_32) + return sljit_emit_mem_unaligned(compiler, SLJIT_MOV | SLJIT_MEM_STORE | (type & SLJIT_MEM_UNALIGNED_16), TMP_REG2, mem, memw); + + max_offset = 0xfff - 7; + if (type & SLJIT_MEM_UNALIGNED_16) + max_offset++; + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, max_offset)); + mem |= SLJIT_MEM; + + FAIL_IF(sljit_emit_mem_unaligned(compiler, SLJIT_MOV | SLJIT_MEM_STORE | (type & SLJIT_MEM_UNALIGNED_16), TMP_REG2, mem, memw)); + + FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | 0x80 | RD(TMP_REG2))); + return sljit_emit_mem_unaligned(compiler, SLJIT_MOV | SLJIT_MEM_STORE | (type & SLJIT_MEM_UNALIGNED_16), TMP_REG2, mem, memw + 4); + } + + max_offset = (type & SLJIT_32) ? 0xfff - 3 : 0xfff - 7; + if (type & SLJIT_MEM_UNALIGNED_16) + max_offset++; + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, max_offset)); + + dst = TMP_REG1; + + /* Stack offset adjustment is not needed because dst + is not stored on the stack when mem is SLJIT_SP. */ + + if (mem == TMP_REG1) { + dst = SLJIT_R3; + + if (compiler->scratches >= 4) + FAIL_IF(push_inst(compiler, STR | (1 << 21) | RN(SLJIT_SP) | RD(SLJIT_R3) | 8)); + } + + mem |= SLJIT_MEM; + + FAIL_IF(sljit_emit_mem_unaligned(compiler, SLJIT_MOV | (type & SLJIT_MEM_UNALIGNED_16), dst, mem, memw)); + FAIL_IF(push_inst(compiler, VMOV | VN(freg) | RD(dst))); + + if (!(type & SLJIT_32)) { + FAIL_IF(sljit_emit_mem_unaligned(compiler, SLJIT_MOV | (type & SLJIT_MEM_UNALIGNED_16), dst, mem, memw + 4)); + FAIL_IF(push_inst(compiler, VMOV | VN(freg) | 0x80 | RD(dst))); + } + + if (dst == SLJIT_R3 && compiler->scratches >= 4) + FAIL_IF(push_inst(compiler, (LDR ^ (0x1 << 24)) | (0x1 << 23) | RN(SLJIT_SP) | RD(SLJIT_R3) | 8)); + return SLJIT_SUCCESS; +#else /* !SLJIT_CONFIG_ARM_V5 */ + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2))); + + if (type & SLJIT_32) + return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1); + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4)); + mem |= SLJIT_MEM; + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1)); + FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | 0x80 | RD(TMP_REG2))); + return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw + 4, TMP_REG1); + } + + if (type & SLJIT_32) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1)); + return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG2)); + } + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4)); + mem |= SLJIT_MEM; + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1)); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, mem, memw + 4, TMP_REG1)); + return push_inst(compiler, VMOV2 | VM(freg) | RD(TMP_REG2) | RN(TMP_REG1)); +#endif /* SLJIT_CONFIG_ARM_V5 */ +} + +#undef FPU_LOAD + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, + EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), (sljit_uw)init_value)); + compiler->patches++; +#else + PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value)); +#endif + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1)); + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0)); + compiler->patches++; +#else + PTR_FAIL_IF(emit_imm(compiler, dst_r, 0)); +#endif + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 0); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1)); + return put_label; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + inline_set_jump_addr(addr, executable_offset, new_target, 1); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + inline_set_const(addr, executable_offset, (sljit_uw)new_constant, 1); +} diff --git a/contrib/libs/pcre2/src/sljit/sljitNativeARM_64.c b/contrib/libs/pcre2/src/sljit/sljitNativeARM_64.c new file mode 100644 index 0000000000..89f747e7c8 --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitNativeARM_64.c @@ -0,0 +1,2417 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ + return "ARM-64" SLJIT_CPUINFO; +} + +/* Length of an instruction word */ +typedef sljit_u32 sljit_ins; + +#define TMP_ZERO (0) + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_FP (SLJIT_NUMBER_OF_REGISTERS + 5) + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +/* r18 - platform register, currently not used */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { + 31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 31, 9, 10, 30, 29 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 15, 14, 13, 12, 11, 10, 9, 8, 30, 31 +}; + +#define W_OP ((sljit_ins)1 << 31) +#define RD(rd) ((sljit_ins)reg_map[rd]) +#define RT(rt) ((sljit_ins)reg_map[rt]) +#define RN(rn) ((sljit_ins)reg_map[rn] << 5) +#define RT2(rt2) ((sljit_ins)reg_map[rt2] << 10) +#define RM(rm) ((sljit_ins)reg_map[rm] << 16) +#define VD(vd) ((sljit_ins)freg_map[vd]) +#define VT(vt) ((sljit_ins)freg_map[vt]) +#define VT2(vt) ((sljit_ins)freg_map[vt] << 10) +#define VN(vn) ((sljit_ins)freg_map[vn] << 5) +#define VM(vm) ((sljit_ins)freg_map[vm] << 16) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define ADC 0x9a000000 +#define ADD 0x8b000000 +#define ADDE 0x8b200000 +#define ADDI 0x91000000 +#define AND 0x8a000000 +#define ANDI 0x92000000 +#define ASRV 0x9ac02800 +#define B 0x14000000 +#define B_CC 0x54000000 +#define BL 0x94000000 +#define BLR 0xd63f0000 +#define BR 0xd61f0000 +#define BRK 0xd4200000 +#define CBZ 0xb4000000 +#define CLZ 0xdac01000 +#define CSEL 0x9a800000 +#define CSINC 0x9a800400 +#define EOR 0xca000000 +#define EORI 0xd2000000 +#define EXTR 0x93c00000 +#define FABS 0x1e60c000 +#define FADD 0x1e602800 +#define FCMP 0x1e602000 +#define FCVT 0x1e224000 +#define FCVTZS 0x9e780000 +#define FDIV 0x1e601800 +#define FMOV 0x1e604000 +#define FMUL 0x1e600800 +#define FNEG 0x1e614000 +#define FSUB 0x1e603800 +#define LDRI 0xf9400000 +#define LDRI_F64 0xfd400000 +#define LDRI_POST 0xf8400400 +#define LDP 0xa9400000 +#define LDP_F64 0x6d400000 +#define LDP_POST 0xa8c00000 +#define LDR_PRE 0xf8400c00 +#define LSLV 0x9ac02000 +#define LSRV 0x9ac02400 +#define MADD 0x9b000000 +#define MOVK 0xf2800000 +#define MOVN 0x92800000 +#define MOVZ 0xd2800000 +#define NOP 0xd503201f +#define ORN 0xaa200000 +#define ORR 0xaa000000 +#define ORRI 0xb2000000 +#define RBIT 0xdac00000 +#define RET 0xd65f0000 +#define RORV 0x9ac02c00 +#define SBC 0xda000000 +#define SBFM 0x93000000 +#define SCVTF 0x9e620000 +#define SDIV 0x9ac00c00 +#define SMADDL 0x9b200000 +#define SMULH 0x9b403c00 +#define STP 0xa9000000 +#define STP_F64 0x6d000000 +#define STP_PRE 0xa9800000 +#define STRB 0x38206800 +#define STRBI 0x39000000 +#define STRI 0xf9000000 +#define STRI_F64 0xfd000000 +#define STR_FI 0x3d000000 +#define STR_FR 0x3c206800 +#define STUR_FI 0x3c000000 +#define STURBI 0x38000000 +#define SUB 0xcb000000 +#define SUBI 0xd1000000 +#define SUBS 0xeb000000 +#define UBFM 0xd3000000 +#define UDIV 0x9ac00800 +#define UMULH 0x9bc03c00 + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) +{ + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)(imm & 0xffff) << 5))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)(imm >> 16) & 0xffff) << 5) | (1 << 21))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)(imm >> 32) & 0xffff) << 5) | (2 << 21))); + return push_inst(compiler, MOVK | RD(dst) | ((sljit_ins)(imm >> 48) << 5) | (3 << 21)); +} + +static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_sw diff; + sljit_uw target_addr; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) { + jump->flags |= PATCH_ABS64; + return 0; + } + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + } + + diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset; + + if (jump->flags & IS_COND) { + diff += SSIZE_OF(ins); + if (diff <= 0xfffff && diff >= -0x100000) { + code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1; + jump->addr -= sizeof(sljit_ins); + jump->flags |= PATCH_COND; + return 5; + } + diff -= SSIZE_OF(ins); + } + + if (diff <= 0x7ffffff && diff >= -0x8000000) { + jump->flags |= PATCH_B; + return 4; + } + + if (target_addr < 0x100000000l) { + if (jump->flags & IS_COND) + code_ptr[-5] -= (2 << 5); + code_ptr[-2] = code_ptr[0]; + return 2; + } + + if (target_addr < 0x1000000000000l) { + if (jump->flags & IS_COND) + code_ptr[-5] -= (1 << 5); + jump->flags |= PATCH_ABS48; + code_ptr[-1] = code_ptr[0]; + return 1; + } + + jump->flags |= PATCH_ABS64; + return 0; +} + +static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +{ + if (max_label < 0x100000000l) { + put_label->flags = 0; + return 2; + } + + if (max_label < 0x1000000000000l) { + put_label->flags = 1; + return 1; + } + + put_label->flags = 2; + return 0; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw next_addr; + sljit_sw executable_offset; + sljit_sw addr; + sljit_u32 dst; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_put_label *put_label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + next_addr = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + put_label = compiler->put_labels; + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + if (next_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + SLJIT_ASSERT(!put_label || put_label->addr >= word_count); + + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + if (jump && jump->addr == word_count) { + jump->addr = (sljit_uw)(code_ptr - 4); + code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset); + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + if (put_label && put_label->addr == word_count) { + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)(code_ptr - 3); + code_ptr -= put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); + put_label = put_label->next; + } + next_addr = compute_next_addr(label, jump, const_, put_label); + } + code_ptr++; + word_count++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(!put_label); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + addr = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); + buf_ptr = (sljit_ins *)jump->addr; + + if (jump->flags & PATCH_B) { + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x1ffffff && addr >= -0x2000000); + buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (sljit_ins)(addr & 0x3ffffff); + if (jump->flags & IS_COND) + buf_ptr[-1] -= (4 << 5); + break; + } + if (jump->flags & PATCH_COND) { + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x3ffff && addr >= -0x40000); + buf_ptr[0] = (buf_ptr[0] & ~(sljit_ins)0xffffe0) | (sljit_ins)((addr & 0x7ffff) << 5); + break; + } + + SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || (sljit_uw)addr <= (sljit_uw)0xffffffff); + SLJIT_ASSERT((jump->flags & PATCH_ABS64) || (sljit_uw)addr <= (sljit_uw)0xffffffffffff); + + dst = buf_ptr[0] & 0x1f; + buf_ptr[0] = MOVZ | dst | (((sljit_ins)addr & 0xffff) << 5); + buf_ptr[1] = MOVK | dst | (((sljit_ins)(addr >> 16) & 0xffff) << 5) | (1 << 21); + if (jump->flags & (PATCH_ABS48 | PATCH_ABS64)) + buf_ptr[2] = MOVK | dst | (((sljit_ins)(addr >> 32) & 0xffff) << 5) | (2 << 21); + if (jump->flags & PATCH_ABS64) + buf_ptr[3] = MOVK | dst | ((sljit_ins)(addr >> 48) << 5) | (3 << 21); + } while (0); + jump = jump->next; + } + + put_label = compiler->put_labels; + while (put_label) { + addr = (sljit_sw)put_label->label->addr; + buf_ptr = (sljit_ins*)put_label->addr; + + buf_ptr[0] |= ((sljit_ins)addr & 0xffff) << 5; + buf_ptr[1] |= ((sljit_ins)(addr >> 16) & 0xffff) << 5; + + if (put_label->flags >= 1) + buf_ptr[2] |= ((sljit_ins)(addr >> 32) & 0xffff) << 5; + + if (put_label->flags >= 2) + buf_ptr[3] |= (sljit_ins)(addr >> 48) << 5; + + put_label = put_label->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif + + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CTZ: + case SLJIT_HAS_ROT: + case SLJIT_HAS_CMOV: + case SLJIT_HAS_PREFETCH: + return 1; + + default: + return 0; + } +} + +/* --------------------------------------------------------------------- */ +/* Core code generator functions. */ +/* --------------------------------------------------------------------- */ + +#define COUNT_TRAILING_ZERO(value, result) \ + result = 0; \ + if (!(value & 0xffffffff)) { \ + result += 32; \ + value >>= 32; \ + } \ + if (!(value & 0xffff)) { \ + result += 16; \ + value >>= 16; \ + } \ + if (!(value & 0xff)) { \ + result += 8; \ + value >>= 8; \ + } \ + if (!(value & 0xf)) { \ + result += 4; \ + value >>= 4; \ + } \ + if (!(value & 0x3)) { \ + result += 2; \ + value >>= 2; \ + } \ + if (!(value & 0x1)) { \ + result += 1; \ + value >>= 1; \ + } + +#define LOGICAL_IMM_CHECK (sljit_ins)0x100 + +static sljit_ins logical_imm(sljit_sw imm, sljit_u32 len) +{ + sljit_s32 negated; + sljit_u32 ones, right; + sljit_uw mask, uimm; + sljit_ins ins; + + if (len & LOGICAL_IMM_CHECK) { + len &= ~LOGICAL_IMM_CHECK; + if (len == 32 && (imm == 0 || imm == -1)) + return 0; + if (len == 16 && ((sljit_s32)imm == 0 || (sljit_s32)imm == -1)) + return 0; + } + + SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1) + || (len == 16 && (sljit_s32)imm != 0 && (sljit_s32)imm != -1)); + + uimm = (sljit_uw)imm; + while (1) { + if (len <= 0) { + SLJIT_UNREACHABLE(); + return 0; + } + + mask = ((sljit_uw)1 << len) - 1; + if ((uimm & mask) != ((uimm >> len) & mask)) + break; + len >>= 1; + } + + len <<= 1; + + negated = 0; + if (uimm & 0x1) { + negated = 1; + uimm = ~uimm; + } + + if (len < 64) + uimm &= ((sljit_uw)1 << len) - 1; + + /* Unsigned right shift. */ + COUNT_TRAILING_ZERO(uimm, right); + + /* Signed shift. We also know that the highest bit is set. */ + imm = (sljit_sw)~uimm; + SLJIT_ASSERT(imm < 0); + + COUNT_TRAILING_ZERO(imm, ones); + + if (~imm) + return 0; + + if (len == 64) + ins = 1 << 22; + else + ins = (0x3f - ((len << 1) - 1)) << 10; + + if (negated) + return ins | ((len - ones - 1) << 10) | ((len - ones - right) << 16); + + return ins | ((ones - 1) << 10) | ((len - right) << 16); +} + +#undef COUNT_TRAILING_ZERO + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw simm) +{ + sljit_uw imm = (sljit_uw)simm; + sljit_u32 i, zeros, ones, first; + sljit_ins bitmask; + + /* Handling simple immediates first. */ + if (imm <= 0xffff) + return push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)imm << 5)); + + if (simm < 0 && simm >= -0x10000) + return push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5)); + + if (imm <= 0xffffffffl) { + if ((imm & 0xffff) == 0) + return push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)(imm >> 16) << 5) | (1 << 21)); + if ((imm & 0xffff0000l) == 0xffff0000) + return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5)); + if ((imm & 0xffff) == 0xffff) + return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | (((sljit_ins)~imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); + + bitmask = logical_imm(simm, 16); + if (bitmask != 0) + return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask); + + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | (((sljit_ins)imm & 0xffff) << 5))); + return push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); + } + + bitmask = logical_imm(simm, 32); + if (bitmask != 0) + return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask); + + if (simm < 0 && simm >= -0x100000000l) { + if ((imm & 0xffff) == 0xffff) + return push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); + + FAIL_IF(push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5))); + return push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); + } + + /* A large amount of number can be constructed from ORR and MOVx, but computing them is costly. */ + + zeros = 0; + ones = 0; + for (i = 4; i > 0; i--) { + if ((simm & 0xffff) == 0) + zeros++; + if ((simm & 0xffff) == 0xffff) + ones++; + simm >>= 16; + } + + simm = (sljit_sw)imm; + first = 1; + if (ones > zeros) { + simm = ~simm; + for (i = 0; i < 4; i++) { + if (!(simm & 0xffff)) { + simm >>= 16; + continue; + } + if (first) { + first = 0; + FAIL_IF(push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21))); + } + else + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)~simm & 0xffff) << 5) | (i << 21))); + simm >>= 16; + } + return SLJIT_SUCCESS; + } + + for (i = 0; i < 4; i++) { + if (!(simm & 0xffff)) { + simm >>= 16; + continue; + } + if (first) { + first = 0; + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21))); + } + else + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21))); + simm >>= 16; + } + return SLJIT_SUCCESS; +} + +#define ARG1_IMM 0x0010000 +#define ARG2_IMM 0x0020000 +#define INT_OP 0x0040000 +#define SET_FLAGS 0x0080000 +#define UNUSED_RETURN 0x0100000 + +#define CHECK_FLAGS(flag_bits) \ + if (flags & SET_FLAGS) { \ + inv_bits |= flag_bits; \ + if (flags & UNUSED_RETURN) \ + dst = TMP_ZERO; \ + } + +static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_sw arg1, sljit_sw arg2) +{ + /* dst must be register, TMP_REG1 + arg1 must be register, TMP_REG1, imm + arg2 must be register, TMP_REG2, imm */ + sljit_ins inv_bits = (flags & INT_OP) ? W_OP : 0; + sljit_ins inst_bits; + sljit_s32 op = (flags & 0xffff); + sljit_s32 reg; + sljit_sw imm, nimm; + + if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { + /* Both are immediates. */ + flags &= ~ARG1_IMM; + if (arg1 == 0 && op != SLJIT_ADD && op != SLJIT_SUB) + arg1 = TMP_ZERO; + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); + arg1 = TMP_REG1; + } + } + + if (flags & (ARG1_IMM | ARG2_IMM)) { + reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2); + imm = (flags & ARG2_IMM) ? arg2 : arg1; + + switch (op) { + case SLJIT_MUL: + case SLJIT_CLZ: + case SLJIT_CTZ: + case SLJIT_ADDC: + case SLJIT_SUBC: + /* No form with immediate operand (except imm 0, which + is represented by a ZERO register). */ + break; + case SLJIT_MOV: + SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1); + return load_immediate(compiler, dst, imm); + case SLJIT_NOT: + SLJIT_ASSERT(flags & ARG2_IMM); + FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm)); + goto set_flags; + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + if (flags & ARG1_IMM) + break; + imm = -imm; + /* Fall through. */ + case SLJIT_ADD: + if (op != SLJIT_SUB) + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + + if (imm == 0) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg)); + } + if (imm > 0 && imm <= 0xfff) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((sljit_ins)imm << 10)); + } + nimm = -imm; + if (nimm > 0 && nimm <= 0xfff) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((sljit_ins)nimm << 10)); + } + if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)imm >> 12) << 10) | (1 << 22)); + } + if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)nimm >> 12) << 10) | (1 << 22)); + } + if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) { + FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)imm >> 12) << 10) | (1 << 22))); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | (((sljit_ins)imm & 0xfff) << 10)); + } + if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) { + FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)nimm >> 12) << 10) | (1 << 22))); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | (((sljit_ins)nimm & 0xfff) << 10)); + } + break; + case SLJIT_AND: + inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32)); + if (!inst_bits) + break; + CHECK_FLAGS(3 << 29); + return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits); + case SLJIT_OR: + case SLJIT_XOR: + inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32)); + if (!inst_bits) + break; + if (op == SLJIT_OR) + inst_bits |= ORRI; + else + inst_bits |= EORI; + FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg))); + goto set_flags; + case SLJIT_SHL: + case SLJIT_MSHL: + if (flags & ARG1_IMM) + break; + + if (flags & INT_OP) { + imm &= 0x1f; + inst_bits = (((sljit_ins)-imm & 0x1f) << 16) | ((31 - (sljit_ins)imm) << 10); + } else { + imm &= 0x3f; + inst_bits = ((sljit_ins)1 << 22) | (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10); + } + + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits)); + goto set_flags; + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + if (flags & ARG1_IMM) + break; + + if (op >= SLJIT_ASHR) + inv_bits |= 1 << 30; + + if (flags & INT_OP) { + imm &= 0x1f; + inst_bits = ((sljit_ins)imm << 16) | (31 << 10); + } else { + imm &= 0x3f; + inst_bits = ((sljit_ins)1 << 22) | ((sljit_ins)imm << 16) | (63 << 10); + } + + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits)); + goto set_flags; + case SLJIT_ROTL: + case SLJIT_ROTR: + if (flags & ARG1_IMM) + break; + + if (op == SLJIT_ROTL) + imm = -imm; + + imm &= (flags & INT_OP) ? 0x1f : 0x3f; + return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(dst) | RN(arg1) | RM(arg1) | ((sljit_ins)imm << 10)); + default: + SLJIT_UNREACHABLE(); + break; + } + + if (flags & ARG2_IMM) { + if (arg2 == 0) + arg2 = TMP_ZERO; + else { + FAIL_IF(load_immediate(compiler, TMP_REG2, arg2)); + arg2 = TMP_REG2; + } + } + else { + if (arg1 == 0) + arg1 = TMP_ZERO; + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); + arg1 = TMP_REG1; + } + } + } + + /* Both arguments are registers. */ + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (dst == arg2) + return SLJIT_SUCCESS; + return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2)); + case SLJIT_MOV_U8: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + return push_inst(compiler, (UBFM ^ W_OP) | RD(dst) | RN(arg2) | (7 << 10)); + case SLJIT_MOV_S8: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (!(flags & INT_OP)) + inv_bits |= 1 << 22; + return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10)); + case SLJIT_MOV_U16: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + return push_inst(compiler, (UBFM ^ W_OP) | RD(dst) | RN(arg2) | (15 << 10)); + case SLJIT_MOV_S16: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (!(flags & INT_OP)) + inv_bits |= 1 << 22; + return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10)); + case SLJIT_MOV32: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (dst == arg2) + return SLJIT_SUCCESS; + /* fallthrough */ + case SLJIT_MOV_U32: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + return push_inst(compiler, (ORR ^ W_OP) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); + case SLJIT_MOV_S32: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10)); + case SLJIT_NOT: + SLJIT_ASSERT(arg1 == TMP_REG1); + FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2))); + break; /* Set flags. */ + case SLJIT_CLZ: + SLJIT_ASSERT(arg1 == TMP_REG1); + return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)); + case SLJIT_CTZ: + SLJIT_ASSERT(arg1 == TMP_REG1); + FAIL_IF(push_inst(compiler, (RBIT ^ inv_bits) | RD(dst) | RN(arg2))); + return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(dst)); + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_MUL: + compiler->status_flags_state = 0; + if (!(flags & SET_FLAGS)) + return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)); + if (flags & INT_OP) { + FAIL_IF(push_inst(compiler, SMADDL | RD(dst) | RN(arg1) | RM(arg2) | (31 << 10))); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10))); + return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10)); + } + FAIL_IF(push_inst(compiler, SMULH | RD(TMP_LR) | RN(arg1) | RM(arg2))); + FAIL_IF(push_inst(compiler, MADD | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO))); + return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10)); + case SLJIT_AND: + CHECK_FLAGS(3 << 29); + return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_OR: + FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + break; /* Set flags. */ + case SLJIT_XOR: + FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + break; /* Set flags. */ + case SLJIT_SHL: + case SLJIT_MSHL: + FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + break; /* Set flags. */ + case SLJIT_LSHR: + case SLJIT_MLSHR: + FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + break; /* Set flags. */ + case SLJIT_ASHR: + case SLJIT_MASHR: + FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + break; /* Set flags. */ + case SLJIT_ROTL: + FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(arg2))); + arg2 = TMP_REG2; + /* fallthrough */ + case SLJIT_ROTR: + return push_inst(compiler, (RORV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + default: + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; + } + +set_flags: + if (flags & SET_FLAGS) + return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO)); + return SLJIT_SUCCESS; +} + +#define STORE 0x10 +#define SIGNED 0x20 + +#define BYTE_SIZE 0x0 +#define HALF_SIZE 0x1 +#define INT_SIZE 0x2 +#define WORD_SIZE 0x3 + +#define MEM_SIZE_SHIFT(flags) ((sljit_ins)(flags) & 0x3) + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) +{ + sljit_u32 shift = MEM_SIZE_SHIFT(flags); + sljit_u32 type = (shift << 30); + + if (!(flags & STORE)) + type |= (flags & SIGNED) ? 0x00800000 : 0x00400000; + + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (argw == 0 || argw == shift) + return push_inst(compiler, STRB | type | RT(reg) + | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); + + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10))); + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg)); + } + + arg &= REG_MASK; + + if (!arg) { + FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~(0xfff << shift))); + + argw = (argw >> shift) & 0xfff; + + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10)); + } + + if ((argw & ((1 << shift) - 1)) == 0) { + if (argw >= 0) { + if ((argw >> shift) <= 0xfff) + return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift))); + + if (argw <= 0xffffff) { + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); + + argw = ((argw & 0xfff) >> shift); + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10)); + } + } else if (argw < -256 && argw >= -0xfff000) { + FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)(-argw + 0xfff) >> 12) << 10))); + argw = ((0x1000 + argw) & 0xfff) >> shift; + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10)); + } + } + + if (argw <= 0xff && argw >= -0x100) + return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12)); + + if (argw >= 0) { + if (argw <= 0xfff0ff && ((argw + 0x100) & 0xfff) <= 0x1ff) { + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); + return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12)); + } + } else if (argw >= -0xfff100 && ((-argw + 0xff) & 0xfff) <= 0x1ff) { + FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)-argw >> 12) << 10))); + return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12)); + } + + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + + return push_inst(compiler, STRB | type | RT(reg) | RN(arg) | RM(tmp_reg)); +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 prev, fprev, saved_regs_size, i, tmp; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); + sljit_ins offs; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 2); + saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64)); + + local_size = (local_size + saved_regs_size + 0xf) & ~0xf; + compiler->local_size = local_size; + + if (local_size <= 512) { + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15))); + offs = (sljit_ins)(local_size - 2 * SSIZE_OF(sw)) << (15 - 3); + local_size = 0; + } else { + saved_regs_size = ((saved_regs_size - 2 * SSIZE_OF(sw)) + 0xf) & ~0xf; + + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)saved_regs_size << 10))); + offs = (sljit_ins)(saved_regs_size - 2 * SSIZE_OF(sw)) << (15 - 3); + local_size -= saved_regs_size; + SLJIT_ASSERT(local_size > 0); + } + + prev = -1; + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + prev = -1; + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + prev = -1; + } + + fprev = -1; + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } + + if (fprev != -1) + FAIL_IF(push_inst(compiler, STRI_F64 | VT(fprev) | RN(SLJIT_SP) | (offs >> 5) | (1 << 10))); + + if (prev != -1) + FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0))); + + +#ifdef _WIN32 + if (local_size > 4096) + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); +#endif /* _WIN32 */ + + if (!(options & SLJIT_ENTER_REG_ARG)) { + arg_types >>= SLJIT_ARG_SHIFT; + saved_arg_count = 0; + tmp = SLJIT_R0; + + while (arg_types) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - saved_arg_count) | RN(TMP_ZERO) | RM(tmp))); + saved_arg_count++; + } + tmp++; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + } + +#ifdef _WIN32 + if (local_size > 4096) { + if (local_size < 4 * 4096) { + /* No need for a loop. */ + + if (local_size >= 2 * 4096) { + if (local_size >= 3 * 4096) { + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + } + + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + } + } + else { + FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG1) | ((((sljit_ins)local_size >> 12) - 1) << 5))); + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10))); + FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */)); + } + + local_size &= 0xfff; + + if (local_size > 0) + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + else + FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + } + + if (local_size > 0) { + if (local_size <= 512) + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15))); + else { + if (local_size >= 4096) + local_size = (1 << (22 - 10)); + + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10))); + FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + } + } + +#else /* !_WIN32 */ + + /* The local_size does not include saved registers size. */ + if (local_size != 0) { + if (local_size > 0xfff) { + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (((sljit_ins)local_size >> 12) << 10) | (1 << 22))); + local_size &= 0xfff; + } + + if (local_size > 512 || local_size == 0) { + if (local_size != 0) + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10))); + + FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + } else + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15))); + } + +#endif /* _WIN32 */ + + return push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 saved_regs_size; + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 2); + saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64)); + + compiler->local_size = (local_size + saved_regs_size + 0xf) & ~0xf; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to) +{ + sljit_s32 local_size, prev, fprev, i, tmp; + sljit_ins offs; + + local_size = compiler->local_size; + + if (!is_return_to) { + if (local_size > 512 && local_size <= 512 + 496) { + FAIL_IF(push_inst(compiler, LDP_POST | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << (15 - 3)))); + local_size = 512; + } else + FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + } else { + if (local_size > 512 && local_size <= 512 + 248) { + FAIL_IF(push_inst(compiler, LDRI_POST | RT(TMP_FP) | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << 12))); + local_size = 512; + } else + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_FP) | RN(SLJIT_SP) | 0)); + } + + if (local_size > 512) { + local_size -= 512; + if (local_size > 0xfff) { + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) + | (((sljit_ins)local_size >> 12) << 10) | (1 << 22))); + local_size &= 0xfff; + } + + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10))); + local_size = 512; + } + + offs = (sljit_ins)(local_size - 2 * SSIZE_OF(sw)) << (15 - 3); + prev = -1; + + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + prev = -1; + } + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + prev = -1; + } + + fprev = -1; + + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } + + if (fprev != -1) + FAIL_IF(push_inst(compiler, LDRI_F64 | VT(fprev) | RN(SLJIT_SP) | (offs >> 5) | (1 << 10))); + + if (prev != -1) + FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0))); + + /* This and the next call/jump instruction can be executed parallelly. */ + return push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (sljit_ins)(local_size << 10)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + + return push_inst(compiler, RET | RN(TMP_LR)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(src))); + src = TMP_REG1; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + sljit_ins inv_bits = (op & SLJIT_32) ? W_OP : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, BRK); + case SLJIT_NOP: + return push_inst(compiler, NOP); + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); + return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, ((op == SLJIT_DIVMOD_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1))); + FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); + return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + return push_inst(compiler, ((op == SLJIT_DIV_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)); + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r, flags, mem_flags; + sljit_s32 op_flags = GET_ALL_FLAGS(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + op = GET_OPCODE(op); + if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { + /* Both operands are registers. */ + if (dst_r != TMP_REG1 && FAST_IS_REG(src)) + return emit_op_imm(compiler, op | ((op_flags & SLJIT_32) ? INT_OP : 0), dst_r, TMP_REG1, src); + + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: + mem_flags = WORD_SIZE; + break; + case SLJIT_MOV_U8: + mem_flags = BYTE_SIZE; + if (src & SLJIT_IMM) + srcw = (sljit_u8)srcw; + break; + case SLJIT_MOV_S8: + mem_flags = BYTE_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (sljit_s8)srcw; + break; + case SLJIT_MOV_U16: + mem_flags = HALF_SIZE; + if (src & SLJIT_IMM) + srcw = (sljit_u16)srcw; + break; + case SLJIT_MOV_S16: + mem_flags = HALF_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (sljit_s16)srcw; + break; + case SLJIT_MOV_U32: + mem_flags = INT_SIZE; + if (src & SLJIT_IMM) + srcw = (sljit_u32)srcw; + break; + case SLJIT_MOV_S32: + case SLJIT_MOV32: + mem_flags = INT_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (sljit_s32)srcw; + break; + default: + SLJIT_UNREACHABLE(); + mem_flags = 0; + break; + } + + if (src & SLJIT_IMM) + FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw)); + else if (!(src & SLJIT_MEM)) + dst_r = src; + else + FAIL_IF(emit_op_mem(compiler, mem_flags, dst_r, src, srcw, TMP_REG1)); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; + } + + flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; + mem_flags = WORD_SIZE; + + if (op_flags & SLJIT_32) { + flags |= INT_OP; + mem_flags = INT_SIZE; + } + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src, srcw, TMP_REG2)); + src = TMP_REG2; + } + + emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, src); + + if (SLJIT_UNLIKELY(dst & SLJIT_MEM)) + return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r, flags, mem_flags; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + mem_flags = WORD_SIZE; + + if (op & SLJIT_32) { + flags |= INT_OP; + mem_flags = INT_SIZE; + } + + if (dst == TMP_REG1) + flags |= UNUSED_RETURN; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG1, src1, src1w, TMP_REG1)); + src1 = TMP_REG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src2, src2w, TMP_REG2)); + src2 = TMP_REG2; + } + + if (src1 & SLJIT_IMM) + flags |= ARG1_IMM; + else + src1w = src1; + + if (src2 & SLJIT_IMM) + flags |= ARG2_IMM; + else + src2w = src2; + + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins inv_bits, imm; + sljit_s32 is_left; + sljit_sw mask; + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w)); + + is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL); + + if (src_dst == src1) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w); + } + + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + inv_bits = (op & SLJIT_32) ? W_OP : 0; + mask = inv_bits ? 0x1f : 0x3f; + + if (src2 & SLJIT_IMM) { + src2w &= mask; + + if (src2w == 0) + return SLJIT_SUCCESS; + } else if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG2, src2, src2w, TMP_REG2)); + src2 = TMP_REG2; + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1)); + src1 = TMP_REG1; + } else if (src1 & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1 = TMP_REG1; + } + + if (src2 & SLJIT_IMM) { + if (is_left) + src2w = (src2w ^ mask) + 1; + + return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(src_dst) + | RN(is_left ? src_dst : src1) | RM(is_left ? src1 : src_dst) | ((sljit_ins)src2w << 10)); + } + + FAIL_IF(push_inst(compiler, ((is_left ? LSLV : LSRV) ^ inv_bits) | RD(src_dst) | RN(src_dst) | RM(src2))); + + if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) { + /* Shift left/right by 1. */ + if (is_left) + imm = (sljit_ins)(inv_bits ? ((1 << 16) | (31 << 10)) : ((1 << 16) | (63 << 10) | (1 << 22))); + else + imm = (sljit_ins)(inv_bits ? ((31 << 16) | (30 << 10)) : ((63 << 16) | (62 << 10) | (1 << 22))); + + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(TMP_REG1) | RN(src1) | imm)); + + /* Set imm to mask. */ + imm = (sljit_ins)(inv_bits ? (4 << 10) : ((5 << 10) | (1 << 22))); + FAIL_IF(push_inst(compiler, (EORI ^ inv_bits) | RD(TMP_REG2) | RN(src2) | imm)); + + src1 = TMP_REG1; + } else + FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(src2))); + + FAIL_IF(push_inst(compiler, ((is_left ? LSRV : LSLV) ^ inv_bits) | RD(TMP_REG1) | RN(src1) | RM(TMP_REG2))); + return push_inst(compiler, (ORR ^ inv_bits) | RD(src_dst) | RN(src_dst) | RM(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src))); + else + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw, TMP_REG1)); + + return push_inst(compiler, RET | RN(TMP_LR)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + SLJIT_ASSERT(reg_map[1] == 0 && reg_map[3] == 2 && reg_map[5] == 4); + + /* The reg_map[op] should provide the appropriate constant. */ + if (op == SLJIT_PREFETCH_L1) + op = 1; + else if (op == SLJIT_PREFETCH_L2) + op = 3; + else if (op == SLJIT_PREFETCH_L3) + op = 5; + else + op = 2; + + /* Signed word sized load is the prefetch instruction. */ + return emit_op_mem(compiler, WORD_SIZE | SIGNED, op, src, srcw, TMP_REG1); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return freg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + SLJIT_UNUSED_ARG(size); + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + sljit_u32 shift = MEM_SIZE_SHIFT(flags); + sljit_ins type = (shift << 30); + + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (!(flags & STORE)) + type |= 0x00400000; + + if (arg & OFFS_REG_MASK) { + argw &= 3; + if (argw == 0 || argw == shift) + return push_inst(compiler, STR_FR | type | VT(reg) + | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); + + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10))); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1)); + } + + arg &= REG_MASK; + + if (!arg) { + FAIL_IF(load_immediate(compiler, TMP_REG1, argw & ~(0xfff << shift))); + + argw = (argw >> shift) & 0xfff; + + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | ((sljit_ins)argw << 10)); + } + + if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) { + if ((argw >> shift) <= 0xfff) + return push_inst(compiler, STR_FI | type | VT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift))); + + if (argw <= 0xffffff) { + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG1) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); + + argw = ((argw & 0xfff) >> shift); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | ((sljit_ins)argw << 10)); + } + } + + if (argw <= 255 && argw >= -256) + return push_inst(compiler, STUR_FI | type | VT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); + return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg) | RM(TMP_REG1)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) + inv_bits |= W_OP; + + if (src & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src))); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + inv_bits |= W_OP; + + if (src & SLJIT_MEM) { + emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw, TMP_REG1); + src = TMP_REG1; + } else if (src & SLJIT_IMM) { + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, ((op & SLJIT_32) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; + + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r, mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits; + + CHECK_ERROR(); + + SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x1) == WORD_SIZE, must_be_one_bit_difference); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x1) : mem_flags, dst_r, src, srcw); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src))); + break; + case SLJIT_CONV_F64_FROM_F32: + FAIL_IF(push_inst(compiler, FCVT | (sljit_ins)((op & SLJIT_32) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); + break; + } + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r, mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); + break; + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); + break; + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); + break; + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); + break; + } + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw); +} + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR)); + + /* Memory. */ + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw, TMP_REG1); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ + return 0x1; + + case SLJIT_NOT_EQUAL: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */ + return 0x0; + + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x3; + /* fallthrough */ + + case SLJIT_LESS: + return 0x2; + + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x2; + /* fallthrough */ + + case SLJIT_GREATER_EQUAL: + return 0x3; + + case SLJIT_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + return 0x9; + + case SLJIT_LESS_EQUAL: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + return 0x8; + + case SLJIT_SIG_LESS: + case SLJIT_UNORDERED_OR_LESS: + return 0xa; + + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + return 0xb; + + case SLJIT_SIG_GREATER: + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + return 0xd; + + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + return 0xc; + + case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) + return 0x0; + /* fallthrough */ + + case SLJIT_UNORDERED: + return 0x7; + + case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) + return 0x1; + /* fallthrough */ + + case SLJIT_ORDERED: + return 0x6; + + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + return 0x5; + + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + return 0x4; + + default: + SLJIT_UNREACHABLE(); + return 0xe; + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if (type < SLJIT_JUMP) { + jump->flags |= IS_COND; + PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(compiler, type))); + } + else if (type >= SLJIT_FAST_CALL) + jump->flags |= IS_BL; + + PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1))); + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0; + + SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL); + ADJUST_LOCAL_OFFSET(src, srcw); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + jump->flags |= IS_CBZ | IS_COND; + + if (src & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + else if (src & SLJIT_IMM) { + PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + + SLJIT_ASSERT(FAST_IS_REG(src)); + + if ((type & 0xff) == SLJIT_EQUAL) + inv_bits |= 1 << 24; + + PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src))); + PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG1))); + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + + if (!(src & SLJIT_IMM)) { + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src)); + } + + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); + jump->u.target = (sljit_uw)srcw; + + FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); + jump->addr = compiler->size; + return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(src))); + src = TMP_REG1; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP; + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 dst_r, src_r, flags, mem_flags; + sljit_ins cc; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + cc = get_cc(compiler, type); + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (GET_OPCODE(op) < SLJIT_ADD) { + FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO))); + + if (dst_r == TMP_REG1) { + mem_flags = (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE; + return emit_op_mem(compiler, mem_flags, TMP_REG1, dst, dstw, TMP_REG2); + } + + return SLJIT_SUCCESS; + } + + flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + mem_flags = WORD_SIZE; + + if (op & SLJIT_32) { + flags |= INT_OP; + mem_flags = INT_SIZE; + } + + src_r = dst; + + if (dst & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG1, dst, dstw, TMP_REG1)); + src_r = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO))); + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src_r, TMP_REG2); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0; + sljit_ins cc; + + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { + if (type & SLJIT_32) + srcw = (sljit_s32)srcw; + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + srcw = 0; + } + + cc = get_cc(compiler, type & ~SLJIT_32); + + return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_u32 inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + ADJUST_LOCAL_OFFSET(mem, memw); + + if (!(mem & REG_MASK)) { + FAIL_IF(load_immediate(compiler, TMP_REG1, memw & ~0x1f8)); + + mem = SLJIT_MEM1(TMP_REG1); + memw &= 0x1f8; + } else if (mem & OFFS_REG_MASK) { + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10))); + + mem = SLJIT_MEM1(TMP_REG1); + memw = 0; + } else if ((memw & 0x7) != 0 || memw > 0x1f8 || memw < -0x200) { + inst = ADDI; + + if (memw < 0) { + /* Remains negative for integer min. */ + memw = -memw; + inst = SUBI; + } else if ((memw & 0x7) == 0 && memw <= 0x7ff0) { + if (!(type & SLJIT_MEM_STORE) && (mem & REG_MASK) == REG_PAIR_FIRST(reg)) { + FAIL_IF(push_inst(compiler, LDRI | RD(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | ((sljit_ins)memw << 7))); + return push_inst(compiler, LDRI | RD(REG_PAIR_FIRST(reg)) | RN(mem & REG_MASK) | ((sljit_ins)(memw + 0x8) << 7)); + } + + inst = (type & SLJIT_MEM_STORE) ? STRI : LDRI; + + FAIL_IF(push_inst(compiler, inst | RD(REG_PAIR_FIRST(reg)) | RN(mem & REG_MASK) | ((sljit_ins)memw << 7))); + return push_inst(compiler, inst | RD(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | ((sljit_ins)(memw + 0x8) << 7)); + } + + if ((sljit_uw)memw <= 0xfff) { + FAIL_IF(push_inst(compiler, inst | RD(TMP_REG1) | RN(mem & REG_MASK) | ((sljit_ins)memw << 10))); + memw = 0; + } else if ((sljit_uw)memw <= 0xffffff) { + FAIL_IF(push_inst(compiler, inst | (1 << 22) | RD(TMP_REG1) | RN(mem & REG_MASK) | (((sljit_ins)memw >> 12) << 10))); + + if ((memw & 0xe07) != 0) { + FAIL_IF(push_inst(compiler, inst | RD(TMP_REG1) | RN(TMP_REG1) | (((sljit_ins)memw & 0xfff) << 10))); + memw = 0; + } else { + memw &= 0xfff; + } + } else { + FAIL_IF(load_immediate(compiler, TMP_REG1, memw)); + FAIL_IF(push_inst(compiler, (inst == ADDI ? ADD : SUB) | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(TMP_REG1))); + memw = 0; + } + + mem = SLJIT_MEM1(TMP_REG1); + + if (inst == SUBI) + memw = -memw; + } + + SLJIT_ASSERT((memw & 0x7) == 0 && memw <= 0x1f8 && memw >= -0x200); + return push_inst(compiler, ((type & SLJIT_MEM_STORE) ? STP : LDP) | RT(REG_PAIR_FIRST(reg)) | RT2(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x3f8) << 12)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_u32 sign = 0, inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw)); + + if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + switch (type & 0xff) { + case SLJIT_MOV: + case SLJIT_MOV_P: + inst = STURBI | (MEM_SIZE_SHIFT(WORD_SIZE) << 30) | 0x400; + break; + case SLJIT_MOV_S8: + sign = 1; + /* fallthrough */ + case SLJIT_MOV_U8: + inst = STURBI | (MEM_SIZE_SHIFT(BYTE_SIZE) << 30) | 0x400; + break; + case SLJIT_MOV_S16: + sign = 1; + /* fallthrough */ + case SLJIT_MOV_U16: + inst = STURBI | (MEM_SIZE_SHIFT(HALF_SIZE) << 30) | 0x400; + break; + case SLJIT_MOV_S32: + sign = 1; + /* fallthrough */ + case SLJIT_MOV_U32: + case SLJIT_MOV32: + inst = STURBI | (MEM_SIZE_SHIFT(INT_SIZE) << 30) | 0x400; + break; + default: + SLJIT_UNREACHABLE(); + inst = STURBI | (MEM_SIZE_SHIFT(WORD_SIZE) << 30) | 0x400; + break; + } + + if (!(type & SLJIT_MEM_STORE)) + inst |= sign ? 0x00800000 : 0x00400000; + + if (!(type & SLJIT_MEM_POST)) + inst |= 0x800; + + return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_u32 inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem_update(compiler, type, freg, mem, memw)); + + if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + inst = STUR_FI | 0x80000400; + + if (!(type & SLJIT_32)) + inst |= 0x40000000; + + if (!(type & SLJIT_MEM_STORE)) + inst |= 0x00400000; + + if (!(type & SLJIT_MEM_POST)) + inst |= 0x800; + + return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) +{ + sljit_s32 dst_reg; + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); + + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + + /* Not all instruction forms support accessing SP register. */ + if (offset <= 0xffffff && offset >= -0xffffff) { + ins = ADDI; + if (offset < 0) { + offset = -offset; + ins = SUBI; + } + + if (offset <= 0xfff) + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (sljit_ins)(offset << 10))); + else { + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (sljit_ins)((offset & 0xfff000) >> (12 - 10)) | (1 << 22))); + + offset &= 0xfff; + if (offset != 0) + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (sljit_ins)(offset << 10))); + } + } + else { + FAIL_IF(load_immediate (compiler, dst_reg, offset)); + /* Add extended register form. */ + FAIL_IF(push_inst(compiler, ADDE | (0x3 << 13) | RD(dst_reg) | RN(SLJIT_SP) | RM(dst_reg))); + } + + if (SLJIT_UNLIKELY(dst & SLJIT_MEM)) + return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, (sljit_uw)init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, 0)); + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 1); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); + + return put_label; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins* inst = (sljit_ins*)addr; + sljit_u32 dst; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0); + + dst = inst[0] & 0x1f; + SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21))); + inst[0] = MOVZ | dst | (((sljit_u32)new_target & 0xffff) << 5); + inst[1] = MOVK | dst | (((sljit_u32)(new_target >> 16) & 0xffff) << 5) | (1 << 21); + inst[2] = MOVK | dst | (((sljit_u32)(new_target >> 32) & 0xffff) << 5) | (2 << 21); + inst[3] = MOVK | dst | ((sljit_u32)(new_target >> 48) << 5) | (3 << 21); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 4); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} diff --git a/contrib/libs/pcre2/src/sljit/sljitNativeARM_T2_32.c b/contrib/libs/pcre2/src/sljit/sljitNativeARM_T2_32.c new file mode 100644 index 0000000000..7d6bac077e --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitNativeARM_T2_32.c @@ -0,0 +1,3150 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ +#ifdef __SOFTFP__ + return "ARM-Thumb2" SLJIT_CPUINFO " ABI:softfp"; +#else + return "ARM-Thumb2" SLJIT_CPUINFO " ABI:hardfp"; +#endif +} + +/* Length of an instruction word. */ +typedef sljit_u32 sljit_ins; + +/* Last register + 1. */ +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4) + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, 6, 7 +}; + +#define COPY_BITS(src, from, to, bits) \ + ((from >= to ? ((sljit_ins)(src) >> (from - to)) : ((sljit_ins)(src) << (to - from))) & (((1 << bits) - 1) << to)) + +#define NEGATE(uimm) ((sljit_uw)-(sljit_sw)(uimm)) + +/* Thumb16 encodings. */ +#define RD3(rd) ((sljit_ins)reg_map[rd]) +#define RN3(rn) ((sljit_ins)reg_map[rn] << 3) +#define RM3(rm) ((sljit_ins)reg_map[rm] << 6) +#define RDN3(rdn) ((sljit_ins)reg_map[rdn] << 8) +#define IMM3(imm) ((sljit_ins)imm << 6) +#define IMM8(imm) ((sljit_ins)imm) + +/* Thumb16 helpers. */ +#define SET_REGS44(rd, rn) \ + (((sljit_ins)reg_map[rn] << 3) | ((sljit_ins)reg_map[rd] & 0x7) | (((sljit_ins)reg_map[rd] & 0x8) << 4)) +#define IS_2_LO_REGS(reg1, reg2) \ + (reg_map[reg1] <= 7 && reg_map[reg2] <= 7) +#define IS_3_LO_REGS(reg1, reg2, reg3) \ + (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7) + +/* Thumb32 encodings. */ +#define RD4(rd) ((sljit_ins)reg_map[rd] << 8) +#define RN4(rn) ((sljit_ins)reg_map[rn] << 16) +#define RM4(rm) ((sljit_ins)reg_map[rm]) +#define RT4(rt) ((sljit_ins)reg_map[rt] << 12) +#define DD4(dd) ((sljit_ins)freg_map[dd] << 12) +#define DN4(dn) ((sljit_ins)freg_map[dn] << 16) +#define DM4(dm) ((sljit_ins)freg_map[dm]) +#define IMM5(imm) \ + (COPY_BITS(imm, 2, 12, 3) | (((sljit_ins)imm & 0x3) << 6)) +#define IMM12(imm) \ + (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | ((sljit_ins)imm & 0xff)) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +/* dot '.' changed to _ + I immediate form (possibly followed by number of immediate bits). */ +#define ADCI 0xf1400000 +#define ADCS 0x4140 +#define ADC_W 0xeb400000 +#define ADD 0x4400 +#define ADDS 0x1800 +#define ADDSI3 0x1c00 +#define ADDSI8 0x3000 +#define ADDWI 0xf2000000 +#define ADD_SP 0x4485 +#define ADD_SP_I 0xb000 +#define ADD_W 0xeb000000 +#define ADD_WI 0xf1000000 +#define ANDI 0xf0000000 +#define ANDS 0x4000 +#define AND_W 0xea000000 +#define ASRS 0x4100 +#define ASRSI 0x1000 +#define ASR_W 0xfa40f000 +#define ASR_WI 0xea4f0020 +#define BCC 0xd000 +#define BICI 0xf0200000 +#define BKPT 0xbe00 +#define BLX 0x4780 +#define BX 0x4700 +#define CLZ 0xfab0f080 +#define CMNI_W 0xf1100f00 +#define CMP 0x4280 +#define CMPI 0x2800 +#define CMPI_W 0xf1b00f00 +#define CMP_X 0x4500 +#define CMP_W 0xebb00f00 +#define EORI 0xf0800000 +#define EORS 0x4040 +#define EOR_W 0xea800000 +#define IT 0xbf00 +#define LDR_SP 0x9800 +#define LDR 0xf8d00000 +#define LDRD 0xe9500000 +#define LDRI 0xf8500800 +#define LSLS 0x4080 +#define LSLSI 0x0000 +#define LSL_W 0xfa00f000 +#define LSL_WI 0xea4f0000 +#define LSRS 0x40c0 +#define LSRSI 0x0800 +#define LSR_W 0xfa20f000 +#define LSR_WI 0xea4f0010 +#define MOV 0x4600 +#define MOVS 0x0000 +#define MOVSI 0x2000 +#define MOVT 0xf2c00000 +#define MOVW 0xf2400000 +#define MOV_W 0xea4f0000 +#define MOV_WI 0xf04f0000 +#define MUL 0xfb00f000 +#define MVNS 0x43c0 +#define MVN_W 0xea6f0000 +#define MVN_WI 0xf06f0000 +#define NOP 0xbf00 +#define ORNI 0xf0600000 +#define ORRI 0xf0400000 +#define ORRS 0x4300 +#define ORR_W 0xea400000 +#define POP 0xbc00 +#define POP_W 0xe8bd0000 +#define PUSH 0xb400 +#define PUSH_W 0xe92d0000 +#define RBIT 0xfa90f0a0 +#define RORS 0x41c0 +#define ROR_W 0xfa60f000 +#define ROR_WI 0xea4f0030 +#define RSB_WI 0xf1c00000 +#define RSBSI 0x4240 +#define SBCI 0xf1600000 +#define SBCS 0x4180 +#define SBC_W 0xeb600000 +#define SDIV 0xfb90f0f0 +#define SMULL 0xfb800000 +#define STRD 0xe9400000 +#define STR_SP 0x9000 +#define SUBS 0x1a00 +#define SUBSI3 0x1e00 +#define SUBSI8 0x3800 +#define SUB_W 0xeba00000 +#define SUBWI 0xf2a00000 +#define SUB_SP_I 0xb080 +#define SUB_WI 0xf1a00000 +#define SXTB 0xb240 +#define SXTB_W 0xfa4ff080 +#define SXTH 0xb200 +#define SXTH_W 0xfa0ff080 +#define TST 0x4200 +#define TSTI 0xf0000f00 +#define TST_W 0xea000f00 +#define UDIV 0xfbb0f0f0 +#define UMULL 0xfba00000 +#define UXTB 0xb2c0 +#define UXTB_W 0xfa5ff080 +#define UXTH 0xb280 +#define UXTH_W 0xfa1ff080 +#define VABS_F32 0xeeb00ac0 +#define VADD_F32 0xee300a00 +#define VCMP_F32 0xeeb40a40 +#define VCVT_F32_S32 0xeeb80ac0 +#define VCVT_F64_F32 0xeeb70ac0 +#define VCVT_S32_F32 0xeebd0ac0 +#define VDIV_F32 0xee800a00 +#define VLDR_F32 0xed100a00 +#define VMOV_F32 0xeeb00a40 +#define VMOV 0xee000a10 +#define VMOV2 0xec400a10 +#define VMRS 0xeef1fa10 +#define VMUL_F32 0xee200a00 +#define VNEG_F32 0xeeb10a40 +#define VPOP 0xecbd0b00 +#define VPUSH 0xed2d0b00 +#define VSTR_F32 0xed000a00 +#define VSUB_F32 0xee300a40 + +static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst) +{ + sljit_u16 *ptr; + SLJIT_ASSERT(!(inst & 0xffff0000)); + + ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_u16)); + FAIL_IF(!ptr); + *ptr = (sljit_u16)(inst); + compiler->size++; + return SLJIT_SUCCESS; +} + +static sljit_s32 push_inst32(struct sljit_compiler *compiler, sljit_ins inst) +{ + sljit_u16 *ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr++ = (sljit_u16)(inst >> 16); + *ptr = (sljit_u16)(inst); + compiler->size += 2; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) +{ + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) + | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); + return push_inst32(compiler, MOVT | RD4(dst) + | COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); +} + +static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm) +{ + sljit_ins dst = inst[1] & 0x0f00; + SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00)); + inst[0] = (sljit_u16)((MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1)); + inst[1] = (sljit_u16)(dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff)); + inst[2] = (sljit_u16)((MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1)); + inst[3] = (sljit_u16)(dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16)); +} + +static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) +{ + sljit_sw diff; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return 0; + + if (jump->flags & JUMP_ADDR) { + /* Branch to ARM code is not optimized yet. */ + if (!(jump->u.target & 0x1)) + return 0; + diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset) >> 1; + } + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)) >> 1; + } + + if (jump->flags & IS_COND) { + SLJIT_ASSERT(!(jump->flags & IS_BL)); + if (diff <= 127 && diff >= -128) { + jump->flags |= PATCH_TYPE1; + return 5; + } + if (diff <= 524287 && diff >= -524288) { + jump->flags |= PATCH_TYPE2; + return 4; + } + /* +1 comes from the prefix IT instruction. */ + diff--; + if (diff <= 8388607 && diff >= -8388608) { + jump->flags |= PATCH_TYPE3; + return 3; + } + } + else if (jump->flags & IS_BL) { + if (diff <= 8388607 && diff >= -8388608) { + jump->flags |= PATCH_BL; + return 3; + } + } + else { + if (diff <= 1023 && diff >= -1024) { + jump->flags |= PATCH_TYPE4; + return 4; + } + if (diff <= 8388607 && diff >= -8388608) { + jump->flags |= PATCH_TYPE5; + return 3; + } + } + + return 0; +} + +static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw executable_offset) +{ + sljit_s32 type = (jump->flags >> 4) & 0xf; + sljit_sw diff; + sljit_u16 *jump_inst; + sljit_s32 s, j1, j2; + + if (SLJIT_UNLIKELY(type == 0)) { + modify_imm32_const((sljit_u16*)jump->addr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); + return; + } + + if (jump->flags & JUMP_ADDR) { + SLJIT_ASSERT(jump->u.target & 0x1); + diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1; + } + else { + SLJIT_ASSERT(jump->u.label->addr & 0x1); + diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1; + } + jump_inst = (sljit_u16*)jump->addr; + + switch (type) { + case 1: + /* Encoding T1 of 'B' instruction */ + SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND)); + jump_inst[0] = (sljit_u16)(0xd000 | (jump->flags & 0xf00) | ((sljit_ins)diff & 0xff)); + return; + case 2: + /* Encoding T3 of 'B' instruction */ + SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND)); + jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1)); + jump_inst[1] = (sljit_u16)(0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | ((sljit_ins)diff & 0x7ff)); + return; + case 3: + SLJIT_ASSERT(jump->flags & IS_COND); + *jump_inst++ = (sljit_u16)(IT | ((jump->flags >> 4) & 0xf0) | 0x8); + diff--; + type = 5; + break; + case 4: + /* Encoding T2 of 'B' instruction */ + SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND)); + jump_inst[0] = (sljit_u16)(0xe000 | (diff & 0x7ff)); + return; + } + + SLJIT_ASSERT(diff <= 8388607 && diff >= -8388608); + + /* Really complex instruction form for branches. */ + s = (diff >> 23) & 0x1; + j1 = (~(diff >> 22) ^ s) & 0x1; + j2 = (~(diff >> 21) ^ s) & 0x1; + jump_inst[0] = (sljit_u16)(0xf000 | ((sljit_ins)s << 10) | COPY_BITS(diff, 11, 0, 10)); + jump_inst[1] = (sljit_u16)((j1 << 13) | (j2 << 11) | (diff & 0x7ff)); + + /* The others have a common form. */ + if (type == 5) /* Encoding T4 of 'B' instruction */ + jump_inst[1] |= 0x9000; + else if (type == 6) /* Encoding T1 of 'BL' instruction */ + jump_inst[1] |= 0xd000; + else + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_u16 *code; + sljit_u16 *code_ptr; + sljit_u16 *buf_ptr; + sljit_u16 *buf_end; + sljit_uw half_count; + sljit_uw next_addr; + sljit_sw executable_offset; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_put_label *put_label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + code = (sljit_u16*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_u16), compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + half_count = 0; + next_addr = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + put_label = compiler->put_labels; + + do { + buf_ptr = (sljit_u16*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 1); + do { + *code_ptr = *buf_ptr++; + if (next_addr == half_count) { + SLJIT_ASSERT(!label || label->size >= half_count); + SLJIT_ASSERT(!jump || jump->addr >= half_count); + SLJIT_ASSERT(!const_ || const_->addr >= half_count); + SLJIT_ASSERT(!put_label || put_label->addr >= half_count); + + /* These structures are ordered by their address. */ + if (label && label->size == half_count) { + label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + if (jump && jump->addr == half_count) { + jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8); + code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset); + jump = jump->next; + } + if (const_ && const_->addr == half_count) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + if (put_label && put_label->addr == half_count) { + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)code_ptr; + put_label = put_label->next; + } + next_addr = compute_next_addr(label, jump, const_, put_label); + } + code_ptr++; + half_count++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == half_count) { + label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(!put_label); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + set_jump_instruction(jump, executable_offset); + jump = jump->next; + } + + put_label = compiler->put_labels; + while (put_label) { + modify_imm32_const((sljit_u16 *)put_label->addr, put_label->label->addr); + put_label = put_label->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_u16); + + code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + + /* Set thumb mode flag. */ + return (void*)((sljit_uw)code | 0x1); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif + + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CTZ: + case SLJIT_HAS_ROT: + case SLJIT_HAS_CMOV: + case SLJIT_HAS_PREFETCH: + return 1; + + default: + return 0; + } +} + +/* --------------------------------------------------------------------- */ +/* Core code generator functions. */ +/* --------------------------------------------------------------------- */ + +#define INVALID_IMM 0x80000000 +static sljit_uw get_imm(sljit_uw imm) +{ + /* Thumb immediate form. */ + sljit_s32 counter; + + if (imm <= 0xff) + return imm; + + if ((imm & 0xffff) == (imm >> 16)) { + /* Some special cases. */ + if (!(imm & 0xff00)) + return (1 << 12) | (imm & 0xff); + if (!(imm & 0xff)) + return (2 << 12) | ((imm >> 8) & 0xff); + if ((imm & 0xff00) == ((imm & 0xff) << 8)) + return (3 << 12) | (imm & 0xff); + } + + /* Assembly optimization: count leading zeroes? */ + counter = 8; + if (!(imm & 0xffff0000)) { + counter += 16; + imm <<= 16; + } + if (!(imm & 0xff000000)) { + counter += 8; + imm <<= 8; + } + if (!(imm & 0xf0000000)) { + counter += 4; + imm <<= 4; + } + if (!(imm & 0xc0000000)) { + counter += 2; + imm <<= 2; + } + if (!(imm & 0x80000000)) { + counter += 1; + imm <<= 1; + } + /* Since imm >= 128, this must be true. */ + SLJIT_ASSERT(counter <= 31); + + if (imm & 0x00ffffff) + return INVALID_IMM; /* Cannot be encoded. */ + + return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1); +} + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) +{ + sljit_uw tmp; + + /* MOVS cannot be used since it destroy flags. */ + + if (imm >= 0x10000) { + tmp = get_imm(imm); + if (tmp != INVALID_IMM) + return push_inst32(compiler, MOV_WI | RD4(dst) | tmp); + tmp = get_imm(~imm); + if (tmp != INVALID_IMM) + return push_inst32(compiler, MVN_WI | RD4(dst) | tmp); + } + + /* set low 16 bits, set hi 16 bits to 0. */ + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) + | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); + + /* set hi 16 bit if needed. */ + if (imm >= 0x10000) + return push_inst32(compiler, MOVT | RD4(dst) + | COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); + return SLJIT_SUCCESS; +} + +#define ARG1_IMM 0x0010000 +#define ARG2_IMM 0x0020000 +/* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */ +#define SET_FLAGS 0x0100000 +#define UNUSED_RETURN 0x0200000 + +static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_uw arg1, sljit_uw arg2) +{ + /* dst must be register, TMP_REG1 + arg1 must be register, imm + arg2 must be register, imm */ + sljit_s32 reg; + sljit_uw imm, imm2; + + if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { + /* Both are immediates, no temporaries are used. */ + flags &= ~ARG1_IMM; + FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); + arg1 = TMP_REG1; + } + + if (flags & (ARG1_IMM | ARG2_IMM)) { + reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2); + imm = (flags & ARG2_IMM) ? arg2 : arg1; + + switch (flags & 0xffff) { + case SLJIT_CLZ: + case SLJIT_CTZ: + case SLJIT_MUL: + /* No form with immediate operand. */ + break; + case SLJIT_MOV: + SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG2); + return load_immediate(compiler, dst, imm); + case SLJIT_NOT: + if (!(flags & SET_FLAGS)) + return load_immediate(compiler, dst, ~imm); + /* Since the flags should be set, we just fallback to the register mode. + Although some clever things could be done here, "NOT IMM" does not worth the efforts. */ + break; + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + imm2 = NEGATE(imm); + if (IS_2_LO_REGS(reg, dst)) { + if (imm <= 0x7) + return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); + if (imm2 <= 0x7) + return push_inst16(compiler, SUBSI3 | IMM3(imm2) | RD3(dst) | RN3(reg)); + if (reg == dst) { + if (imm <= 0xff) + return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst)); + if (imm2 <= 0xff) + return push_inst16(compiler, SUBSI8 | IMM8(imm2) | RDN3(dst)); + } + } + if (!(flags & SET_FLAGS)) { + if (imm <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm)); + if (imm2 <= 0xfff) + return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm2)); + } + imm2 = get_imm(imm); + if (imm2 != INVALID_IMM) + return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2); + imm = get_imm(NEGATE(imm)); + if (imm != INVALID_IMM) + return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + if (flags & ARG1_IMM) { + if (imm == 0 && IS_2_LO_REGS(reg, dst)) + return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg)); + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + } + if (flags & UNUSED_RETURN) { + if (imm <= 0xff && reg_map[reg] <= 7) + return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg)); + imm2 = get_imm(imm); + if (imm2 != INVALID_IMM) + return push_inst32(compiler, CMPI_W | RN4(reg) | imm2); + imm = get_imm(NEGATE(imm)); + if (imm != INVALID_IMM) + return push_inst32(compiler, CMNI_W | RN4(reg) | imm); + break; + } + imm2 = NEGATE(imm); + if (IS_2_LO_REGS(reg, dst)) { + if (imm <= 0x7) + return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); + if (imm2 <= 0x7) + return push_inst16(compiler, ADDSI3 | IMM3(imm2) | RD3(dst) | RN3(reg)); + if (reg == dst) { + if (imm <= 0xff) + return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst)); + if (imm2 <= 0xff) + return push_inst16(compiler, ADDSI8 | IMM8(imm2) | RDN3(dst)); + } + } + if (!(flags & SET_FLAGS)) { + if (imm <= 0xfff) + return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm)); + if (imm2 <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm2)); + } + imm2 = get_imm(imm); + if (imm2 != INVALID_IMM) + return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2); + imm = get_imm(NEGATE(imm)); + if (imm != INVALID_IMM) + return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + if (flags & ARG1_IMM) + break; + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_AND: + imm2 = get_imm(imm); + if (imm2 != INVALID_IMM) + return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TSTI : ANDI) | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2); + imm = get_imm(~imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_OR: + imm2 = get_imm(imm); + if (imm2 != INVALID_IMM) + return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2); + imm = get_imm(~imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_XOR: + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_SHL: + case SLJIT_MSHL: + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + case SLJIT_ROTL: + case SLJIT_ROTR: + if (flags & ARG1_IMM) + break; + imm &= 0x1f; + + if (imm == 0) { + if (!(flags & SET_FLAGS)) + return push_inst16(compiler, MOV | SET_REGS44(dst, reg)); + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg)); + return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg)); + } + + switch (flags & 0xffff) { + case SLJIT_SHL: + case SLJIT_MSHL: + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6)); + return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); + case SLJIT_LSHR: + case SLJIT_MLSHR: + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6)); + return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); + case SLJIT_ASHR: + case SLJIT_MASHR: + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6)); + return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); + case SLJIT_ROTL: + imm = (imm ^ 0x1f) + 1; + /* fallthrough */ + default: /* SLJIT_ROTR */ + return push_inst32(compiler, ROR_WI | RD4(dst) | RM4(reg) | IMM5(imm)); + } + default: + SLJIT_UNREACHABLE(); + break; + } + + if (flags & ARG2_IMM) { + imm = arg2; + arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(load_immediate(compiler, (sljit_s32)arg2, imm)); + } + else { + imm = arg1; + arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(load_immediate(compiler, (sljit_s32)arg1, imm)); + } + + SLJIT_ASSERT(arg1 != arg2); + } + + /* Both arguments are registers. */ + switch (flags & 0xffff) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + case SLJIT_MOV_P: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); + if (dst == (sljit_s32)arg2) + return SLJIT_SUCCESS; + return push_inst16(compiler, MOV | SET_REGS44(dst, arg2)); + case SLJIT_MOV_U8: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2)); + case SLJIT_MOV_S8: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2)); + case SLJIT_MOV_U16: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2)); + case SLJIT_MOV_S16: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2)); + case SLJIT_NOT: + SLJIT_ASSERT(arg1 == TMP_REG2); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, MVNS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(arg2)); + case SLJIT_CLZ: + SLJIT_ASSERT(arg1 == TMP_REG2); + return push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2)); + case SLJIT_CTZ: + SLJIT_ASSERT(arg1 == TMP_REG2); + FAIL_IF(push_inst32(compiler, RBIT | RN4(arg2) | RD4(dst) | RM4(arg2))); + return push_inst32(compiler, CLZ | RN4(dst) | RD4(dst) | RM4(dst)); + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + if (IS_3_LO_REGS(dst, arg1, arg2)) + return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2)); + if (dst == (sljit_s32)arg1 && !(flags & SET_FLAGS)) + return push_inst16(compiler, ADD | SET_REGS44(dst, arg2)); + return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + if (flags & UNUSED_RETURN) { + if (IS_2_LO_REGS(arg1, arg2)) + return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2)); + return push_inst16(compiler, CMP_X | SET_REGS44(arg1, arg2)); + } + if (IS_3_LO_REGS(dst, arg1, arg2)) + return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2)); + return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_MUL: + compiler->status_flags_state = 0; + if (!(flags & SET_FLAGS)) + return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2)); + SLJIT_ASSERT(dst != TMP_REG2); + FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2))); + /* cmp TMP_REG2, dst asr #31. */ + return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst)); + case SLJIT_AND: + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2)); + if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2)) + return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2)); + return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TST_W : AND_W) | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_OR: + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_XOR: + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_MSHL: + FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(arg2) | 0x1f)); + arg2 = TMP_REG2; + /* fallthrough */ + case SLJIT_SHL: + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_MLSHR: + FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(arg2) | 0x1f)); + arg2 = TMP_REG2; + /* fallthrough */ + case SLJIT_LSHR: + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_MASHR: + FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(arg2) | 0x1f)); + arg2 = TMP_REG2; + /* fallthrough */ + case SLJIT_ASHR: + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_ROTL: + FAIL_IF(push_inst32(compiler, RSB_WI | RD4(TMP_REG2) | RN4(arg2) | 0)); + arg2 = TMP_REG2; + /* fallthrough */ + case SLJIT_ROTR: + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, RORS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ROR_W | RD4(dst) | RN4(arg1) | RM4(arg2)); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +#define STORE 0x01 +#define SIGNED 0x02 + +#define WORD_SIZE 0x00 +#define BYTE_SIZE 0x04 +#define HALF_SIZE 0x08 +#define PRELOAD 0x0c + +#define IS_WORD_SIZE(flags) (!((flags) & (BYTE_SIZE | HALF_SIZE))) +#define ALIGN_CHECK(argw, imm, shift) (!((argw) & ~((imm) << (shift)))) + +/* + 1st letter: + w = word + b = byte + h = half + + 2nd letter: + s = signed + u = unsigned + + 3rd letter: + l = load + s = store +*/ + +static const sljit_ins sljit_mem16[12] = { +/* w u l */ 0x5800 /* ldr */, +/* w u s */ 0x5000 /* str */, +/* w s l */ 0x5800 /* ldr */, +/* w s s */ 0x5000 /* str */, + +/* b u l */ 0x5c00 /* ldrb */, +/* b u s */ 0x5400 /* strb */, +/* b s l */ 0x5600 /* ldrsb */, +/* b s s */ 0x5400 /* strb */, + +/* h u l */ 0x5a00 /* ldrh */, +/* h u s */ 0x5200 /* strh */, +/* h s l */ 0x5e00 /* ldrsh */, +/* h s s */ 0x5200 /* strh */, +}; + +static const sljit_ins sljit_mem16_imm5[12] = { +/* w u l */ 0x6800 /* ldr imm5 */, +/* w u s */ 0x6000 /* str imm5 */, +/* w s l */ 0x6800 /* ldr imm5 */, +/* w s s */ 0x6000 /* str imm5 */, + +/* b u l */ 0x7800 /* ldrb imm5 */, +/* b u s */ 0x7000 /* strb imm5 */, +/* b s l */ 0x0000 /* not allowed */, +/* b s s */ 0x7000 /* strb imm5 */, + +/* h u l */ 0x8800 /* ldrh imm5 */, +/* h u s */ 0x8000 /* strh imm5 */, +/* h s l */ 0x0000 /* not allowed */, +/* h s s */ 0x8000 /* strh imm5 */, +}; + +#define MEM_IMM8 0xc00 +#define MEM_IMM12 0x800000 +static const sljit_ins sljit_mem32[13] = { +/* w u l */ 0xf8500000 /* ldr.w */, +/* w u s */ 0xf8400000 /* str.w */, +/* w s l */ 0xf8500000 /* ldr.w */, +/* w s s */ 0xf8400000 /* str.w */, + +/* b u l */ 0xf8100000 /* ldrb.w */, +/* b u s */ 0xf8000000 /* strb.w */, +/* b s l */ 0xf9100000 /* ldrsb.w */, +/* b s s */ 0xf8000000 /* strb.w */, + +/* h u l */ 0xf8300000 /* ldrh.w */, +/* h u s */ 0xf8200000 /* strsh.w */, +/* h s l */ 0xf9300000 /* ldrsh.w */, +/* h s s */ 0xf8200000 /* strsh.w */, + +/* p u l */ 0xf8100000 /* pld */, +}; + +/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ +static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value) +{ + sljit_uw imm; + + if (value >= 0) { + if (value <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value)); + imm = get_imm((sljit_uw)value); + if (imm != INVALID_IMM) + return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | imm); + } + else { + value = -value; + if (value <= 0xfff) + return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value)); + imm = get_imm((sljit_uw)value); + if (imm != INVALID_IMM) + return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | imm); + } + return SLJIT_ERR_UNSUPPORTED; +} + +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) +{ + sljit_s32 other_r; + sljit_uw imm, tmp; + + SLJIT_ASSERT(arg & SLJIT_MEM); + SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -0xff && argw <= 0xfff)); + + if (SLJIT_UNLIKELY(!(arg & REG_MASK))) { + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff); + if (imm != INVALID_IMM) { + FAIL_IF(push_inst32(compiler, MOV_WI | RD4(tmp_reg) | imm)); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg) | (argw & 0xfff)); + } + + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw)); + if (IS_2_LO_REGS(reg, tmp_reg) && sljit_mem16_imm5[flags]) + return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(tmp_reg)); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg)); + } + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + other_r = OFFS_REG(arg); + arg &= REG_MASK; + + if (!argw && IS_3_LO_REGS(reg, arg, other_r)) + return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)); + return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | ((sljit_ins)argw << 4)); + } + + arg &= REG_MASK; + + if (argw > 0xfff) { + imm = get_imm((sljit_uw)(argw & ~0xfff)); + if (imm != INVALID_IMM) { + push_inst32(compiler, ADD_WI | RD4(tmp_reg) | RN4(arg) | imm); + arg = tmp_reg; + argw = argw & 0xfff; + } + } + else if (argw < -0xff) { + tmp = (sljit_uw)((-argw + 0xfff) & ~0xfff); + SLJIT_ASSERT(tmp >= (sljit_uw)-argw); + imm = get_imm(tmp); + + if (imm != INVALID_IMM) { + push_inst32(compiler, SUB_WI | RD4(tmp_reg) | RN4(arg) | imm); + arg = tmp_reg; + argw += (sljit_sw)tmp; + + SLJIT_ASSERT(argw >= 0 && argw <= 0xfff); + } + } + + /* 16 bit instruction forms. */ + if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) { + tmp = 3; + if (IS_WORD_SIZE(flags)) { + if (ALIGN_CHECK(argw, 0x1f, 2)) + tmp = 2; + } + else if (flags & BYTE_SIZE) + { + if (ALIGN_CHECK(argw, 0x1f, 0)) + tmp = 0; + } + else { + SLJIT_ASSERT(flags & HALF_SIZE); + if (ALIGN_CHECK(argw, 0x1f, 1)) + tmp = 1; + } + + if (tmp < 3) + return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | ((sljit_ins)argw << (6 - tmp))); + } + else if (SLJIT_UNLIKELY(arg == SLJIT_SP) && IS_WORD_SIZE(flags) && ALIGN_CHECK(argw, 0xff, 2) && reg_map[reg] <= 7) { + /* SP based immediate. */ + return push_inst16(compiler, STR_SP | (sljit_ins)((flags & STORE) ? 0 : 0x800) | RDN3(reg) | ((sljit_ins)argw >> 2)); + } + + if (argw >= 0 && argw <= 0xfff) + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | (sljit_ins)argw); + else if (argw < 0 && argw >= -0xff) + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | (sljit_ins)-argw); + + SLJIT_ASSERT(arg != tmp_reg); + + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw)); + if (IS_3_LO_REGS(reg, arg, tmp_reg)) + return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg)); + return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg)); +} + +#undef ALIGN_CHECK +#undef IS_WORD_SIZE + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 size, i, tmp, word_arg_count; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); + sljit_uw offset; + sljit_uw imm = 0; +#ifdef __SOFTFP__ + sljit_u32 float_arg_count; +#else + sljit_u32 old_offset, f32_offset; + sljit_u32 remap[3]; + sljit_u32 *remap_ptr = remap; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) + imm |= (sljit_uw)1 << reg_map[i]; + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + imm |= (sljit_uw)1 << reg_map[i]; + + /* At least two registers must be set for PUSH_W and one for PUSH instruction. */ + FAIL_IF((imm & 0xff00) + ? push_inst32(compiler, PUSH_W | (1 << 14) | imm) + : push_inst16(compiler, PUSH | (1 << 8) | imm)); + + /* Stack must be aligned to 8 bytes: (LR, R4) */ + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); + + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((size & SSIZE_OF(sw)) != 0) { + FAIL_IF(push_inst16(compiler, SUB_SP_I | (sizeof(sljit_sw) >> 2))); + size += SSIZE_OF(sw); + } + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst32(compiler, VPUSH | DD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fsaveds > 0) + FAIL_IF(push_inst32(compiler, VPUSH | DD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst32(compiler, VPUSH | DD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + } + } + + local_size = ((size + local_size + 0x7) & ~0x7) - size; + compiler->local_size = local_size; + + if (options & SLJIT_ENTER_REG_ARG) + arg_types = 0; + + arg_types >>= SLJIT_ARG_SHIFT; + word_arg_count = 0; + saved_arg_count = 0; +#ifdef __SOFTFP__ + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); + + offset = 0; + float_arg_count = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst32(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + else + FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800100 | RN4(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + offset += sizeof(sljit_f64) - sizeof(sljit_sw); + break; + case SLJIT_ARG_TYPE_F32: + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst32(compiler, VMOV | (float_arg_count << 16) | (offset << 10))); + else + FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800000 | RN4(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count - 1 != (sljit_s32)(offset >> 2)) + tmp = word_arg_count; + else + break; + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst16(compiler, MOV | ((sljit_ins)reg_map[tmp] & 0x7) | (((sljit_ins)reg_map[tmp] & 0x8) << 4) | (offset << 1))); + else if (reg_map[tmp] <= 7) + FAIL_IF(push_inst16(compiler, LDR_SP | RDN3(tmp) + | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + else + FAIL_IF(push_inst32(compiler, LDR | RT4(tmp) | RN4(SLJIT_SP) + | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw))))); + break; + } + + offset += sizeof(sljit_sw); + arg_types >>= SLJIT_ARG_SHIFT; + } + + compiler->args_size = offset; +#else + offset = SLJIT_FR0; + old_offset = SLJIT_FR0; + f32_offset = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset != old_offset) + *remap_ptr++ = VMOV_F32 | SLJIT_32 | DD4(offset) | DM4(old_offset); + old_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + *remap_ptr++ = VMOV_F32 | 0x20 | DD4(offset) | DM4(f32_offset); + f32_offset = 0; + } else { + if (offset != old_offset) + *remap_ptr++ = VMOV_F32 | DD4(offset) | DM4(old_offset); + f32_offset = old_offset; + old_offset++; + } + offset++; + break; + default: + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0 - saved_arg_count, SLJIT_R0 + word_arg_count))); + saved_arg_count++; + } + + word_arg_count++; + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap)); + + while (remap_ptr > remap) + FAIL_IF(push_inst32(compiler, *(--remap_ptr))); +#endif + +#ifdef _WIN32 + if (local_size >= 4096) { + imm = get_imm(4096); + SLJIT_ASSERT(imm != INVALID_IMM); + + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm)); + + if (local_size < 4 * 4096) { + if (local_size > 2 * 4096) { + if (local_size > 3 * 4096) { + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm)); + } + + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm)); + } + } else { + FAIL_IF(load_immediate(compiler, TMP_REG2, ((sljit_uw)local_size >> 12) - 1)); + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm)); + FAIL_IF(push_inst32(compiler, SUB_WI | SET_FLAGS | RD4(TMP_REG2) | RN4(TMP_REG2) | 1)); + FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-8 & 0xff))); + } + + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + local_size &= 0xfff; + } + + if (local_size >= 256) { + SLJIT_ASSERT(local_size < 4096); + + if (local_size <= (127 << 2)) + FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2))); + else + FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size)); + + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + } else if (local_size > 0) + FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | (sljit_uw)local_size)); +#else /* !_WIN32 */ + if (local_size > 0) { + if (local_size <= (127 << 2)) + FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2))); + else + FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size)); + } +#endif /* _WIN32 */ + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 size; + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); + + if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)) + size += SSIZE_OF(sw); + + compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm) +{ + sljit_uw imm2; + + /* The TMP_REG1 register must keep its value. */ + if (imm <= (127u << 2)) + return push_inst16(compiler, ADD_SP_I | (imm >> 2)); + + if (imm <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | IMM12(imm)); + + imm2 = get_imm(imm); + + if (imm2 != INVALID_IMM) + return push_inst32(compiler, ADD_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm2); + + FAIL_IF(load_immediate(compiler, TMP_REG2, imm)); + return push_inst16(compiler, ADD_SP | RN3(TMP_REG2)); +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size) +{ + sljit_s32 local_size, fscratches, fsaveds, i, tmp; + sljit_s32 restored_reg = 0; + sljit_s32 lr_dst = TMP_PC; + sljit_uw reg_list = 0; + + SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128); + + local_size = compiler->local_size; + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; + + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst32(compiler, VPOP | DD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst32(compiler, VPOP | DD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + if (fsaveds > 0) + FAIL_IF(push_inst32(compiler, VPOP | DD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + } + + local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7; + } + + if (frame_size < 0) { + lr_dst = TMP_REG2; + frame_size = 0; + } else if (frame_size > 0) { + SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0); + lr_dst = 0; + frame_size &= ~0x7; + } + + tmp = SLJIT_S0 - compiler->saveds; + i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + if (tmp < i) { + restored_reg = i; + do { + reg_list |= (sljit_uw)1 << reg_map[i]; + } while (--i > tmp); + } + + i = compiler->scratches; + if (i >= SLJIT_FIRST_SAVED_REG) { + restored_reg = i; + do { + reg_list |= (sljit_uw)1 << reg_map[i]; + } while (--i >= SLJIT_FIRST_SAVED_REG); + } + + if (lr_dst == TMP_REG2 && reg_list == 0) { + reg_list |= (sljit_uw)1 << reg_map[TMP_REG2]; + restored_reg = TMP_REG2; + lr_dst = 0; + } + + if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) { + /* The local_size does not include the saved registers. */ + tmp = 0; + if (reg_list != 0) { + tmp = 2; + if (local_size <= 0xfff) { + if (local_size == 0) { + SLJIT_ASSERT(restored_reg != TMP_REG2); + if (frame_size == 0) + return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | 0x308); + if (frame_size > 2 * SSIZE_OF(sw)) + return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | 0x100 | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw)))); + } + + if (reg_map[restored_reg] <= 7 && local_size <= 0x3fc) + FAIL_IF(push_inst16(compiler, STR_SP | 0x800 | RDN3(restored_reg) | (sljit_ins)(local_size >> 2))); + else + FAIL_IF(push_inst32(compiler, LDR | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)local_size)); + tmp = 1; + } else if (frame_size == 0) { + frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw); + tmp = 3; + } + + /* Place for the saved register. */ + if (restored_reg != TMP_REG2) + local_size += SSIZE_OF(sw); + } + + /* Place for the lr register. */ + local_size += SSIZE_OF(sw); + + if (frame_size > local_size) + FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_ins)(frame_size - local_size) >> 2))); + else if (frame_size < local_size) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size))); + + if (tmp <= 1) + return SLJIT_SUCCESS; + + if (tmp == 2) { + frame_size -= SSIZE_OF(sw); + if (restored_reg != TMP_REG2) + frame_size -= SSIZE_OF(sw); + + if (reg_map[restored_reg] <= 7) + return push_inst16(compiler, STR_SP | 0x800 | RDN3(restored_reg) | (sljit_ins)(frame_size >> 2)); + + return push_inst32(compiler, LDR | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)frame_size); + } + + tmp = (restored_reg == TMP_REG2) ? 0x304 : 0x308; + return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)tmp); + } + + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + if (!(reg_list & 0xff00) && lr_dst != TMP_REG2) { + if (lr_dst == TMP_PC) + reg_list |= 1u << 8; + + /* At least one register must be set for POP instruction. */ + SLJIT_ASSERT(reg_list != 0); + + FAIL_IF(push_inst16(compiler, POP | reg_list)); + } else { + if (lr_dst != 0) + reg_list |= (sljit_uw)1 << reg_map[lr_dst]; + + /* At least two registers must be set for POP_W instruction. */ + SLJIT_ASSERT((reg_list & (reg_list - 1)) != 0); + + FAIL_IF(push_inst32(compiler, POP_W | reg_list)); + } + + if (frame_size > 0) + return push_inst16(compiler, SUB_SP_I | (((sljit_ins)frame_size - sizeof(sljit_sw)) >> 2)); + + if (lr_dst != 0) + return SLJIT_SUCCESS; + + return push_inst16(compiler, ADD_SP_I | 1); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + return emit_stack_frame_release(compiler, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src))); + src = TMP_REG1; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__) + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _WIN32 +extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator); +extern long long __rt_sdiv(int denominator, int numerator); +#elif defined(__GNUC__) +extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator); +extern int __aeabi_idivmod(int numerator, int denominator); +#else +#error "Software divmod functions are needed" +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ +#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__) + sljit_uw saved_reg_list[3]; + sljit_uw saved_reg_count; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + return push_inst16(compiler, BKPT); + case SLJIT_NOP: + return push_inst16(compiler, NOP); + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + return push_inst32(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) + | RD4(SLJIT_R1) | RT4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)); +#if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__) + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0))); + FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1))); + FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1))); + return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1)); + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)); +#else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */ + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); + SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3); + + saved_reg_count = 0; + if (compiler->scratches >= 4) + saved_reg_list[saved_reg_count++] = 3; + if (compiler->scratches >= 3) + saved_reg_list[saved_reg_count++] = 2; + if (op >= SLJIT_DIV_UW) + saved_reg_list[saved_reg_count++] = 1; + + if (saved_reg_count > 0) { + FAIL_IF(push_inst32(compiler, 0xf84d0d00 | (saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */)); + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst16(compiler, 0x9001 | (saved_reg_list[1] << 8) /* str rX, [sp, #4] */)); + } + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst16(compiler, 0x9002 | (saved_reg_list[2] << 8) /* str rX, [sp, #8] */)); + } + } + +#ifdef _WIN32 + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1))); + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__rt_udiv) : SLJIT_FUNC_ADDR(__rt_sdiv)))); +#elif defined(__GNUC__) + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod)))); +#else +#error "Software divmod functions are needed" +#endif + + if (saved_reg_count > 0) { + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst16(compiler, 0x9802 | (saved_reg_list[2] << 8) /* ldr rX, [sp, #8] */)); + } + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst16(compiler, 0x9801 | (saved_reg_list[1] << 8) /* ldr rX, [sp, #4] */)); + } + return push_inst32(compiler, 0xf85d0b00 | (saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); + } + return SLJIT_SUCCESS; +#endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */ + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r, flags; + sljit_s32 op_flags = GET_ALL_FLAGS(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + op = GET_OPCODE(op); + if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + case SLJIT_MOV_P: + flags = WORD_SIZE; + break; + case SLJIT_MOV_U8: + flags = BYTE_SIZE; + if (src & SLJIT_IMM) + srcw = (sljit_u8)srcw; + break; + case SLJIT_MOV_S8: + flags = BYTE_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (sljit_s8)srcw; + break; + case SLJIT_MOV_U16: + flags = HALF_SIZE; + if (src & SLJIT_IMM) + srcw = (sljit_u16)srcw; + break; + case SLJIT_MOV_S16: + flags = HALF_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (sljit_s16)srcw; + break; + default: + SLJIT_UNREACHABLE(); + flags = 0; + break; + } + + if (src & SLJIT_IMM) + FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, (sljit_uw)srcw)); + else if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1)); + } else { + if (dst_r != TMP_REG1) + return emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src); + dst_r = src; + } + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + + return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2); + } + + flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + emit_op_imm(compiler, flags | op, dst_r, TMP_REG2, (sljit_uw)src); + + if (SLJIT_UNLIKELY(dst & SLJIT_MEM)) + return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_reg, flags, src2_reg; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + + if (dst == TMP_REG1) + flags |= UNUSED_RETURN; + + if (src1 & SLJIT_IMM) + flags |= ARG1_IMM; + else if (src1 & SLJIT_MEM) { + emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1); + src1w = TMP_REG1; + } + else + src1w = src1; + + if (src2 & SLJIT_IMM) + flags |= ARG2_IMM; + else if (src2 & SLJIT_MEM) { + src2_reg = (!(flags & ARG1_IMM) && (src1w == TMP_REG1)) ? TMP_REG2 : TMP_REG1; + emit_op_mem(compiler, WORD_SIZE, src2_reg, src2, src2w, src2_reg); + src2w = src2_reg; + } + else + src2w = src2; + + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, (sljit_uw)src1w, (sljit_uw)src2w); + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG2); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 is_left; + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w)); + + op = GET_OPCODE(op); + is_left = (op == SLJIT_SHL || op == SLJIT_MSHL); + + if (src_dst == src1) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, src_dst, 0, src_dst, 0, src2, src2w); + } + + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (src2 & SLJIT_IMM) { + src2w &= 0x1f; + + if (src2w == 0) + return SLJIT_SUCCESS; + } else if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src2, src2w, TMP_REG2)); + src2 = TMP_REG2; + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1)); + src1 = TMP_REG1; + } else if (src1 & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); + src1 = TMP_REG1; + } + + if (src2 & SLJIT_IMM) { + if (reg_map[src_dst] <= 7) + FAIL_IF(push_inst16(compiler, (is_left ? LSLSI : LSRSI) | RD3(src_dst) | RN3(src_dst) | ((sljit_ins)src2w << 6))); + else + FAIL_IF(push_inst32(compiler, (is_left ? LSL_WI : LSR_WI) | RD4(src_dst) | RM4(src_dst) | IMM5(src2w))); + + src2w = (src2w ^ 0x1f) + 1; + return push_inst32(compiler, ORR_W | RD4(src_dst) | RN4(src_dst) | RM4(src1) | (is_left ? 0x10 : 0x0) | IMM5(src2w)); + } + + if (op == SLJIT_MSHL || op == SLJIT_MLSHR) { + FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(src2) | 0x1f)); + src2 = TMP_REG2; + } + + if (IS_2_LO_REGS(src_dst, src2)) + FAIL_IF(push_inst16(compiler, (is_left ? LSLS : LSRS) | RD3(src_dst) | RN3(src2))); + else + FAIL_IF(push_inst32(compiler, (is_left ? LSL_W : LSR_W) | RD4(src_dst) | RN4(src_dst) | RM4(src2))); + + FAIL_IF(push_inst32(compiler, (is_left ? LSR_WI : LSL_WI) | RD4(TMP_REG1) | RM4(src1) | (1 << 6))); + FAIL_IF(push_inst32(compiler, EORI | RD4(TMP_REG2) | RN4(src2) | 0x1f)); + FAIL_IF(push_inst32(compiler, (is_left ? LSR_W : LSL_W) | RD4(TMP_REG1) | RN4(TMP_REG1) | RM4(TMP_REG2))); + return push_inst32(compiler, ORR_W | RD4(src_dst) | RN4(src_dst) | RM4(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, src))); + else + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2)); + + return push_inst16(compiler, BX | RN3(TMP_REG2)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + return emit_op_mem(compiler, PRELOAD, TMP_PC, src, srcw, TMP_REG1); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return (freg_map[reg] << 1); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + if (size == 2) + return push_inst16(compiler, *(sljit_u16*)instruction); + return push_inst32(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define FPU_LOAD (1 << 20) + +static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + sljit_uw imm; + sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD)); + + SLJIT_ASSERT(arg & SLJIT_MEM); + + /* Fast loads and stores. */ + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 6))); + arg = SLJIT_MEM | TMP_REG1; + argw = 0; + } + + if ((arg & REG_MASK) && (argw & 0x3) == 0) { + if (!(argw & ~0x3fc)) + return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | DD4(reg) | ((sljit_uw)argw >> 2)); + if (!(-argw & ~0x3fc)) + return push_inst32(compiler, inst | RN4(arg & REG_MASK) | DD4(reg) | ((sljit_uw)-argw >> 2)); + } + + if (arg & REG_MASK) { + if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg)); + } + + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc); + if (imm != INVALID_IMM) { + FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm)); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2)); + } + + imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc); + if (imm != INVALID_IMM) { + argw = -argw; + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm)); + return push_inst32(compiler, inst | RN4(TMP_REG1) | DD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2)); + } + } + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw)); + if (arg & REG_MASK) + FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, (arg & REG_MASK)))); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + op ^= SLJIT_32; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_32) | DD4(TMP_FREG1) | DM4(src))); + + if (FAST_IS_REG(dst)) + return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | DN4(TMP_FREG1)); + + /* Store the integer value from a VFP register. */ + return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + op ^= SLJIT_32; + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1))); + else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); + FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | DN4(TMP_FREG1))); + } + + FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_32) | DD4(dst_r) | DM4(TMP_FREG1))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + op ^= SLJIT_32; + + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_32) | DD4(src1) | DM4(src2))); + return push_inst32(compiler, VMRS); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_32; + + if (src & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src))); + break; + case SLJIT_CONV_F64_FROM_F32: + FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src))); + op ^= SLJIT_32; + break; + } + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + op ^= SLJIT_32; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2))); + break; + case SLJIT_SUB_F64: + FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2))); + break; + case SLJIT_MUL_F64: + FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2))); + break; + case SLJIT_DIV_F64: + FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2))); + break; + } + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw); +} + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + + if (FAST_IS_REG(dst)) + return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2)); + + /* Memory. */ + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, TMP_REG1); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ + return 0x0; + + case SLJIT_NOT_EQUAL: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */ + return 0x1; + + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x2; + /* fallthrough */ + + case SLJIT_LESS: + return 0x3; + + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x3; + /* fallthrough */ + + case SLJIT_GREATER_EQUAL: + return 0x2; + + case SLJIT_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + return 0x8; + + case SLJIT_LESS_EQUAL: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + return 0x9; + + case SLJIT_SIG_LESS: + case SLJIT_UNORDERED_OR_LESS: + return 0xb; + + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + return 0xa; + + case SLJIT_SIG_GREATER: + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + return 0xc; + + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + return 0xd; + + case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) + return 0x1; + /* fallthrough */ + + case SLJIT_UNORDERED: + return 0x6; + + case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) + return 0x0; + /* fallthrough */ + + case SLJIT_ORDERED: + return 0x7; + + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + return 0x4; + + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + return 0x5; + + default: /* SLJIT_JUMP */ + SLJIT_UNREACHABLE(); + return 0xe; + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + sljit_ins cc; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); + if (type < SLJIT_JUMP) { + jump->flags |= IS_COND; + cc = get_cc(compiler, type); + jump->flags |= cc << 8; + PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + } + + jump->addr = compiler->size; + if (type <= SLJIT_JUMP) + PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1))); + else { + jump->flags |= IS_BL; + PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1))); + } + + return jump; +} + +#ifdef __SOFTFP__ + +static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space) +{ + sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN; + sljit_u32 offset = 0; + sljit_u32 word_arg_offset = 0; + sljit_u32 float_arg_count = 0; + sljit_s32 types = 0; + sljit_u32 src_offset = 4 * sizeof(sljit_sw); + sljit_u8 offsets[4]; + sljit_u8 *offset_ptr = offsets; + + if (src && FAST_IS_REG(*src)) + src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw); + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f64); + float_arg_count++; + break; + case SLJIT_ARG_TYPE_F32: + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f32); + float_arg_count++; + break; + default: + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_sw); + word_arg_offset += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) { + /* Keep lr register on the stack. */ + if (is_tail_call) + offset += sizeof(sljit_sw); + + offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7; + + *extra_space = offset; + + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset)); + else + FAIL_IF(push_inst16(compiler, SUB_SP_I | (offset >> 2))); + } else { + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, -1)); + *extra_space = 0; + } + + SLJIT_ASSERT(reg_map[TMP_REG1] == 12); + + /* Process arguments in reversed direction. */ + while (types) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count--; + offset = *(--offset_ptr); + + SLJIT_ASSERT((offset & 0x7) == 0); + + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) { + FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); + *src = TMP_REG1; + } + FAIL_IF(push_inst32(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + } else + FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800100 | RN4(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count--; + offset = *(--offset_ptr); + + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { + FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); + *src = TMP_REG1; + } + FAIL_IF(push_inst32(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10))); + } else + FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800000 | RN4(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); + break; + default: + word_arg_offset -= sizeof(sljit_sw); + offset = *(--offset_ptr); + + SLJIT_ASSERT(offset >= word_arg_offset); + + if (offset != word_arg_offset) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { + FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); + *src = TMP_REG1; + } + else if (src_offset == word_arg_offset) { + *src = (sljit_s32)(1 + (offset >> 2)); + src_offset = offset; + } + FAIL_IF(push_inst16(compiler, MOV | (offset >> 2) | (word_arg_offset << 1))); + } else + FAIL_IF(push_inst16(compiler, STR_SP | (word_arg_offset << 6) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); + } + break; + } + + types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) + FAIL_IF(push_inst32(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0)); + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32) + FAIL_IF(push_inst32(compiler, VMOV | (0 << 16) | (0 << 12))); + + return SLJIT_SUCCESS; +} + +#else + +static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + sljit_u32 offset = SLJIT_FR0; + sljit_u32 new_offset = SLJIT_FR0; + sljit_u32 f32_offset = 0; + + /* Remove return value. */ + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset != new_offset) + FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_32 | DD4(new_offset) | DM4(offset))); + + new_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | DD4(f32_offset) | DM4(offset))); + f32_offset = 0; + } else { + if (offset != new_offset) + FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | DD4(new_offset) | DM4(offset))); + f32_offset = new_offset; + new_offset++; + } + offset++; + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#endif + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ +#ifdef __SOFTFP__ + struct sljit_jump *jump; + sljit_u32 extra_space = (sljit_u32)type; +#endif + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + +#ifdef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) { + PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + + SLJIT_SKIP_CHECKS(compiler); + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2) + | RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw)))); + + PTR_FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2))); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG2))); + return jump; + } + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); + PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); + return jump; + } +#endif /* __SOFTFP__ */ + + if (type & SLJIT_CALL_RETURN) { + /* ldmia sp!, {..., lr} */ + PTR_FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + +#ifndef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); +#endif /* !__SOFTFP__ */ + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + SLJIT_ASSERT(reg_map[TMP_REG1] != 14); + + if (!(src & SLJIT_IMM)) { + if (FAST_IS_REG(src)) { + SLJIT_ASSERT(reg_map[src] != 14); + return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src)); + } + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw, TMP_REG1)); + if (type >= SLJIT_FAST_CALL) + return push_inst16(compiler, BLX | RN3(TMP_REG1)); + } + + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); + jump->u.target = (sljit_uw)srcw; + + FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); + jump->addr = compiler->size; + return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ +#ifdef __SOFTFP__ + sljit_u32 extra_space = (sljit_u32)type; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) { + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src))); + src = TMP_REG1; + } + +#ifdef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) { + FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP; + + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2) + | RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw)))); + + FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2))); + + if (type & SLJIT_CALL_RETURN) + return push_inst16(compiler, BX | RN3(TMP_REG2)); + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); + return softfloat_post_call_with_args(compiler, arg_types); + } +#endif /* __SOFTFP__ */ + + if (type & SLJIT_CALL_RETURN) { + /* ldmia sp!, {..., lr} */ + FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP; + } + +#ifndef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); +#endif /* !__SOFTFP__ */ + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +#ifdef __SOFTFP__ + +static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + if (compiler->options & SLJIT_ENTER_REG_ARG) { + if (src == SLJIT_FR0) + return SLJIT_SUCCESS; + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw); + } + + if (FAST_IS_REG(src)) { + if (op & SLJIT_32) + return push_inst32(compiler, VMOV | (1 << 20) | DN4(src) | RT4(SLJIT_R0)); + return push_inst32(compiler, VMOV2 | (1 << 20) | DM4(src) | RT4(SLJIT_R0) | RN4(SLJIT_R1)); + } + + SLJIT_SKIP_CHECKS(compiler); + + if (op & SLJIT_32) + return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw); + return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw); +} + +#endif /* __SOFTFP__ */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 dst_r, flags = GET_ALL_FLAGS(op); + sljit_ins cc; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + op = GET_OPCODE(op); + cc = get_cc(compiler, type); + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (op < SLJIT_ADD) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4)); + if (reg_map[dst_r] > 7) { + FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1)); + FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0)); + } else { + /* The movsi (immediate) instruction does not set flags in IT block. */ + FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1)); + FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0)); + } + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2); + } + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2)); + + if (op == SLJIT_AND) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4)); + FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 1)); + FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 0)); + } + else { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + FAIL_IF(push_inst32(compiler, ((op == SLJIT_OR) ? ORRI : EORI) | RN4(dst_r) | RD4(dst_r) | 1)); + } + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2)); + + if (!(flags & SLJIT_SET_Z)) + return SLJIT_SUCCESS; + + /* The condition must always be set, even if the ORR/EORI is not executed above. */ + return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_uw cc, tmp; + + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + + cc = get_cc(compiler, type & ~SLJIT_32); + + if (!(src & SLJIT_IMM)) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src)); + } + + tmp = (sljit_uw) srcw; + + if (tmp < 0x10000) { + /* set low 16 bits, set hi 16 bits to 0. */ + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + return push_inst32(compiler, MOVW | RD4(dst_reg) + | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)); + } + + tmp = get_imm((sljit_uw)srcw); + if (tmp != INVALID_IMM) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + return push_inst32(compiler, MOV_WI | RD4(dst_reg) | tmp); + } + + tmp = get_imm(~(sljit_uw)srcw); + if (tmp != INVALID_IMM) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + return push_inst32(compiler, MVN_WI | RD4(dst_reg) | tmp); + } + + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | ((cc & 0x1) << 3) | 0x4)); + + tmp = (sljit_uw) srcw; + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg) + | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff))); + return push_inst32(compiler, MOVT | RD4(dst_reg) + | COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 flags; + sljit_uw imm, tmp; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + if (type & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32)) { + if ((mem & REG_MASK) == 0) { + if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) { + imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff)); + + if (imm != INVALID_IMM) + memw = (memw & 0xfff) - 0x1000; + } else { + imm = get_imm((sljit_uw)(memw & ~0xfff)); + + if (imm != INVALID_IMM) + memw &= 0xff; + } + + if (imm == INVALID_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw)); + memw = 0; + } else + FAIL_IF(push_inst32(compiler, MOV_WI | RD4(TMP_REG1) | imm)); + + mem = SLJIT_MEM1(TMP_REG1); + } else if (mem & OFFS_REG_MASK) { + FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6))); + memw = 0; + mem = SLJIT_MEM1(TMP_REG1); + } else if (memw < -0xff) { + /* Zero value can be included in the first case. */ + if ((-memw & 0xfff) <= SSIZE_OF(sw)) + tmp = (sljit_uw)((-memw + 0x7ff) & ~0x7ff); + else + tmp = (sljit_uw)((-memw + 0xfff) & ~0xfff); + + SLJIT_ASSERT(tmp >= (sljit_uw)-memw); + imm = get_imm(tmp); + + if (imm != INVALID_IMM) { + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm)); + memw += (sljit_sw)tmp; + SLJIT_ASSERT(memw >= 0 && memw <= 0xfff - SSIZE_OF(sw)); + } else { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw)); + FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK))); + memw = 0; + } + + mem = SLJIT_MEM1(TMP_REG1); + } else if (memw >= (0x1000 - SSIZE_OF(sw))) { + if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) { + imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff)); + + if (imm != INVALID_IMM) + memw = (memw & 0xfff) - 0x1000; + } else { + imm = get_imm((sljit_uw)(memw & ~0xfff)); + + if (imm != INVALID_IMM) + memw &= 0xfff; + } + + if (imm != INVALID_IMM) { + SLJIT_ASSERT(memw >= -0xff && memw <= 0xfff); + FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm)); + } else { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw)); + FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK))); + memw = 0; + } + + mem = SLJIT_MEM1(TMP_REG1); + } + + flags = WORD_SIZE; + + SLJIT_ASSERT(memw <= 0xfff - SSIZE_OF(sw) && memw >= -0xff); + + if (type & SLJIT_MEM_STORE) { + flags |= STORE; + } else if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, REG_PAIR_SECOND(reg), mem, memw + SSIZE_OF(sw), TMP_REG2)); + return emit_op_mem(compiler, WORD_SIZE, REG_PAIR_FIRST(reg), mem, memw, TMP_REG2); + } + + FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), mem, memw, TMP_REG2)); + return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), mem, memw + SSIZE_OF(sw), TMP_REG2); + } + + flags = 1 << 23; + + if ((mem & REG_MASK) == 0) { + tmp = (sljit_uw)(memw & 0x7fc); + imm = get_imm((sljit_uw)((memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc)); + + if (imm == INVALID_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw)); + memw = 0; + } else { + FAIL_IF(push_inst32(compiler, MOV_WI | RD4(TMP_REG1) | imm)); + memw = (memw & 0x3fc) >> 2; + + if (tmp > 0x400) { + memw = 0x100 - memw; + flags = 0; + } + + SLJIT_ASSERT(memw >= 0 && memw <= 0xff); + } + + mem = SLJIT_MEM1(TMP_REG1); + } else if (mem & OFFS_REG_MASK) { + FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6))); + memw = 0; + mem = SLJIT_MEM1(TMP_REG1); + } else if (memw < 0) { + if ((-memw & ~0x3fc) == 0) { + flags = 0; + memw = -memw >> 2; + } else { + tmp = (sljit_uw)(-memw & 0x7fc); + imm = get_imm((sljit_uw)((-memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc)); + + if (imm != INVALID_IMM) { + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm)); + memw = (-memw & 0x3fc) >> 2; + + if (tmp <= 0x400) + flags = 0; + else + memw = 0x100 - memw; + } else { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw)); + FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK))); + memw = 0; + } + + mem = SLJIT_MEM1(TMP_REG1); + } + } else if ((memw & ~0x3fc) != 0) { + tmp = (sljit_uw)(memw & 0x7fc); + imm = get_imm((sljit_uw)((memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc)); + + if (imm != INVALID_IMM) { + FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm)); + memw = (memw & 0x3fc) >> 2; + + if (tmp > 0x400) { + memw = 0x100 - memw; + flags = 0; + } + } else { + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw)); + FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK))); + memw = 0; + } + + mem = SLJIT_MEM1(TMP_REG1); + } else + memw >>= 2; + + SLJIT_ASSERT(memw >= 0 && memw <= 0xff); + return push_inst32(compiler, ((type & SLJIT_MEM_STORE) ? STRD : LDRD) | (sljit_ins)flags | RN4(mem & REG_MASK) | RT4(REG_PAIR_FIRST(reg)) | RD4(REG_PAIR_SECOND(reg)) | (sljit_ins)memw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 flags; + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw)); + + if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -255)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + switch (type & 0xff) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + case SLJIT_MOV_P: + flags = WORD_SIZE; + break; + case SLJIT_MOV_U8: + flags = BYTE_SIZE; + break; + case SLJIT_MOV_S8: + flags = BYTE_SIZE | SIGNED; + break; + case SLJIT_MOV_U16: + flags = HALF_SIZE; + break; + case SLJIT_MOV_S16: + flags = HALF_SIZE | SIGNED; + break; + default: + SLJIT_UNREACHABLE(); + flags = WORD_SIZE; + break; + } + + if (type & SLJIT_MEM_STORE) + flags |= STORE; + + inst = sljit_mem32[flags] | 0x900; + + if (!(type & SLJIT_MEM_POST)) + inst |= 0x400; + + if (memw >= 0) + inst |= 0x200; + else + memw = -memw; + + return push_inst32(compiler, inst | RT4(reg) | RN4(mem & REG_MASK) | (sljit_ins)memw); +} + +static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset) +{ + sljit_s32 arg = *mem; + sljit_sw argw = *memw; + sljit_uw imm; + + *mem = TMP_REG1; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + *memw = 0; + return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((sljit_uw)(argw & 0x3) << 6)); + } + + arg &= REG_MASK; + + if (arg) { + if (argw <= max_offset && argw >= -0xff) { + *mem = arg; + return SLJIT_SUCCESS; + } + + if (argw < 0) { + imm = get_imm((sljit_uw)(-argw & ~0xff)); + + if (imm) { + *memw = -(-argw & 0xff); + return push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg) | imm); + } + } else if ((argw & 0xfff) <= max_offset) { + imm = get_imm((sljit_uw)(argw & ~0xfff)); + + if (imm) { + *memw = argw & 0xfff; + return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm); + } + } else { + imm = get_imm((sljit_uw)((argw | 0xfff) + 1)); + + if (imm) { + *memw = (argw & 0xfff) - 0x1000; + return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm); + } + } + } + + imm = (sljit_uw)(argw & ~0xfff); + + if ((argw & 0xfff) > max_offset) { + imm += 0x1000; + *memw = (argw & 0xfff) - 0x1000; + } else + *memw = argw & 0xfff; + + FAIL_IF(load_immediate(compiler, TMP_REG1, imm)); + + if (arg == 0) + return SLJIT_SUCCESS; + + return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, arg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + + if (type & SLJIT_MEM_UNALIGNED_32) + return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw); + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | DN4(freg) | RT4(TMP_REG2))); + + if (type & SLJIT_32) + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1); + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4)); + mem |= SLJIT_MEM; + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1)); + FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | DN4(freg) | 0x80 | RT4(TMP_REG2))); + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw + 4, TMP_REG1); + } + + if (type & SLJIT_32) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1)); + return push_inst32(compiler, VMOV | DN4(freg) | RT4(TMP_REG2)); + } + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4)); + mem |= SLJIT_MEM; + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1)); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, mem, memw + 4, TMP_REG1)); + return push_inst32(compiler, VMOV2 | DM4(freg) | RT4(TMP_REG2) | RN4(TMP_REG1)); +} + +#undef FPU_LOAD + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, (sljit_uw)init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, 0)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); + return put_label; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_u16 *inst = (sljit_u16*)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0); + modify_imm32_const(inst, new_target); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1); + inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 4); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} diff --git a/contrib/libs/pcre2/src/sljit/sljitNativeMIPS_32.c b/contrib/libs/pcre2/src/sljit/sljitNativeMIPS_32.c new file mode 100644 index 0000000000..e6853c98f6 --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitNativeMIPS_32.c @@ -0,0 +1,314 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* mips 32-bit arch dependent functions. */ + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm) +{ + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + if (imm < 0 && imm >= SIMM_MIN) + return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar)); + return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst))); + return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins *)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); + SLJIT_ASSERT((inst[0] & 0xffe00000) == LUI && (inst[1] & 0xfc000000) == ORI); + inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr, sljit_u32 *extra_space) +{ + sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN; + sljit_u32 offset = 0; + sljit_s32 float_arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 types = 0; + sljit_ins prev_ins = NOP; + sljit_ins ins = NOP; + sljit_u8 offsets[4]; + sljit_u8 *offsets_ptr = offsets; + + SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12); + + arg_types >>= SLJIT_ARG_SHIFT; + + /* See ABI description in sljit_emit_enter. */ + + while (arg_types) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + *offsets_ptr = (sljit_u8)offset; + + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) { + offset += sizeof(sljit_sw); + *offsets_ptr = (sljit_u8)offset; + } + + if (word_arg_count == 0 && float_arg_count <= 1) + *offsets_ptr = (sljit_u8)(254 + float_arg_count); + + offset += sizeof(sljit_f64); + float_arg_count++; + break; + case SLJIT_ARG_TYPE_F32: + if (word_arg_count == 0 && float_arg_count <= 1) + *offsets_ptr = (sljit_u8)(254 + float_arg_count); + + offset += sizeof(sljit_f32); + float_arg_count++; + break; + default: + offset += sizeof(sljit_sw); + word_arg_count++; + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + offsets_ptr++; + } + + /* Stack is aligned to 16 bytes. */ + SLJIT_ASSERT(offset <= 8 * sizeof(sljit_sw)); + + if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) { + if (is_tail_call) { + offset = (offset + sizeof(sljit_sw) + 15) & ~(sljit_uw)0xf; + FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset, &prev_ins)); + *extra_space = offset; + } else { + FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-16), DR(SLJIT_SP))); + *extra_space = 16; + } + } else { + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, 0, &prev_ins)); + *extra_space = 0; + } + + while (types) { + --offsets_ptr; + + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (*offsets_ptr < 4 * sizeof (sljit_sw)) { + if (prev_ins != NOP) + FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); + + /* Must be preceded by at least one other argument, + * and its starting offset must be 8 because of alignment. */ + SLJIT_ASSERT((*offsets_ptr >> 2) == 2); + + prev_ins = MFC1 | TA(6) | FS(float_arg_count) | (1 << 11); + ins = MFC1 | TA(7) | FS(float_arg_count); + } else if (*offsets_ptr < 254) + ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr); + else if (*offsets_ptr == 254) + ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1); + + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F32: + if (*offsets_ptr < 4 * sizeof (sljit_sw)) + ins = MFC1 | TA(4 + (*offsets_ptr >> 2)) | FS(float_arg_count); + else if (*offsets_ptr < 254) + ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr); + else if (*offsets_ptr == 254) + ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1); + + float_arg_count--; + break; + default: + if (*offsets_ptr >= 4 * sizeof (sljit_sw)) + ins = SW | S(SLJIT_SP) | T(word_arg_count) | IMM(*offsets_ptr); + else if ((*offsets_ptr >> 2) != word_arg_count - 1) + ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (*offsets_ptr >> 2)); + else if (*offsets_ptr == 0) + ins = ADDU | S(SLJIT_R0) | TA(0) | DA(4); + + word_arg_count--; + break; + } + + if (ins != NOP) { + if (prev_ins != NOP) + FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); + prev_ins = ins; + ins = NOP; + } + + types >>= SLJIT_ARG_SHIFT; + } + + *ins_ptr = prev_ins; + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + struct sljit_jump *jump; + sljit_u32 extra_space = 0; + sljit_ins ins = NOP; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + + if ((type & 0xff) != SLJIT_CALL_REG_ARG) { + extra_space = (sljit_u32)type; + PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space)); + } else if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); + + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + + if (ins == NOP && compiler->delay_slot != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; + + if (!(type & SLJIT_CALL_RETURN) || extra_space > 0) { + jump->flags |= IS_JAL; + + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + jump->flags |= IS_CALL; + + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + } else + PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); + + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += 2; + + if (extra_space == 0) + return jump; + + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, + SLJIT_MEM1(SLJIT_SP), (sljit_sw)(extra_space - sizeof(sljit_sw)))); + + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + + PTR_FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(extra_space), + (type & SLJIT_CALL_RETURN) ? UNMOVABLE_INS : DR(SLJIT_SP))); + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u32 extra_space = (sljit_u32)type; + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); + src = PIC_ADDR_REG; + srcw = 0; + } + + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); + src = PIC_ADDR_REG; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); + + if (ins != NOP) + FAIL_IF(push_inst(compiler, ins, MOVABLE_INS)); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); + } + + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + + if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); + else if (src != PIC_ADDR_REG) + FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); + + FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space)); + + /* Register input. */ + if (!(type & SLJIT_CALL_RETURN) || extra_space > 0) + FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); + + if (extra_space == 0) + return SLJIT_SUCCESS; + + if (type & SLJIT_CALL_RETURN) + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, + SLJIT_MEM1(SLJIT_SP), (sljit_sw)(extra_space - sizeof(sljit_sw)))); + + if (type & SLJIT_CALL_RETURN) + FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + + return push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(extra_space), + (type & SLJIT_CALL_RETURN) ? UNMOVABLE_INS : DR(SLJIT_SP)); +} diff --git a/contrib/libs/pcre2/src/sljit/sljitNativeMIPS_64.c b/contrib/libs/pcre2/src/sljit/sljitNativeMIPS_64.c new file mode 100644 index 0000000000..d2a5924f8e --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitNativeMIPS_64.c @@ -0,0 +1,319 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* mips 64-bit arch dependent functions. */ + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm) +{ + sljit_s32 shift = 32; + sljit_s32 shift2; + sljit_s32 inv = 0; + sljit_ins ins; + sljit_uw uimm; + + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + if (imm < 0 && imm >= SIMM_MIN) + return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + if (imm <= 0x7fffffffl && imm >= -0x80000000l) { + FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar)); + return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS; + } + + /* Zero extended number. */ + uimm = (sljit_uw)imm; + if (imm < 0) { + uimm = ~(sljit_uw)imm; + inv = 1; + } + + while (!(uimm & 0xff00000000000000l)) { + shift -= 8; + uimm <<= 8; + } + + if (!(uimm & 0xf000000000000000l)) { + shift -= 4; + uimm <<= 4; + } + + if (!(uimm & 0xc000000000000000l)) { + shift -= 2; + uimm <<= 2; + } + + if ((sljit_sw)uimm < 0) { + uimm >>= 1; + shift += 1; + } + SLJIT_ASSERT(((uimm & 0xc000000000000000l) == 0x4000000000000000l) && (shift > 0) && (shift <= 32)); + + if (inv) + uimm = ~uimm; + + FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(uimm >> 48), dst_ar)); + if (uimm & 0x0000ffff00000000l) + FAIL_IF(push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(uimm >> 32), dst_ar)); + + imm &= (1l << shift) - 1; + if (!(imm & ~0xffff)) { + ins = (shift == 32) ? DSLL32 : DSLL; + if (shift < 32) + ins |= SH_IMM(shift); + FAIL_IF(push_inst(compiler, ins | TA(dst_ar) | DA(dst_ar), dst_ar)); + return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar); + } + + /* Double shifts needs to be performed. */ + uimm <<= 32; + shift2 = shift - 16; + + while (!(uimm & 0xf000000000000000l)) { + shift2 -= 4; + uimm <<= 4; + } + + if (!(uimm & 0xc000000000000000l)) { + shift2 -= 2; + uimm <<= 2; + } + + if (!(uimm & 0x8000000000000000l)) { + shift2--; + uimm <<= 1; + } + + SLJIT_ASSERT((uimm & 0x8000000000000000l) && (shift2 > 0) && (shift2 <= 16)); + + FAIL_IF(push_inst(compiler, DSLL | TA(dst_ar) | DA(dst_ar) | SH_IMM(shift - shift2), dst_ar)); + FAIL_IF(push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(uimm >> 48), dst_ar)); + FAIL_IF(push_inst(compiler, DSLL | TA(dst_ar) | DA(dst_ar) | SH_IMM(shift2), dst_ar)); + + imm &= (1l << shift2) - 1; + return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar); +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 48), DR(dst))); + FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 32), DR(dst))); + FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst))); + FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 16), DR(dst))); + FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins *)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 0); + inst[0] = (inst[0] & 0xffff0000) | ((sljit_ins)(new_target >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | ((sljit_ins)(new_target >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000) | ((sljit_ins)(new_target >> 16) & 0xffff); + inst[5] = (inst[5] & 0xffff0000) | ((sljit_ins)new_target & 0xffff); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 6); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr) +{ + sljit_s32 arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 float_arg_count = 0; + sljit_s32 types = 0; + sljit_ins prev_ins = *ins_ptr; + sljit_ins ins = NOP; + + SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12); + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: + arg_count++; + float_arg_count++; + break; + default: + arg_count++; + word_arg_count++; + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + while (types) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (arg_count != float_arg_count) + ins = MOV_S | FMT_D | FS(float_arg_count) | FD(arg_count); + else if (arg_count == 1) + ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1); + arg_count--; + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F32: + if (arg_count != float_arg_count) + ins = MOV_S | FMT_S | FS(float_arg_count) | FD(arg_count); + else if (arg_count == 1) + ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1); + arg_count--; + float_arg_count--; + break; + default: + if (arg_count != word_arg_count) + ins = DADDU | S(word_arg_count) | TA(0) | D(arg_count); + else if (arg_count == 1) + ins = DADDU | S(SLJIT_R0) | TA(0) | DA(4); + arg_count--; + word_arg_count--; + break; + } + + if (ins != NOP) { + if (prev_ins != NOP) + FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); + prev_ins = ins; + ins = NOP; + } + + types >>= SLJIT_ARG_SHIFT; + } + + *ins_ptr = prev_ins; + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + struct sljit_jump *jump; + sljit_ins ins = NOP; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); + + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins)); + + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + + if (ins == NOP && compiler->delay_slot != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; + + if (!(type & SLJIT_CALL_RETURN)) { + jump->flags |= IS_JAL; + + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + jump->flags |= IS_CALL; + + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + } else + PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); + + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); + + /* Maximum number of instructions required for generating a constant. */ + compiler->size += 6; + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins = NOP; + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); + src = PIC_ADDR_REG; + srcw = 0; + } + + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); + src = PIC_ADDR_REG; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); + + if (ins != NOP) + FAIL_IF(push_inst(compiler, ins, MOVABLE_INS)); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); + } + + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + + if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); + else if (src != PIC_ADDR_REG) + FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); + + if (type & SLJIT_CALL_RETURN) + FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); + + FAIL_IF(call_with_args(compiler, arg_types, &ins)); + + /* Register input. */ + if (!(type & SLJIT_CALL_RETURN)) + FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); + return push_inst(compiler, ins, UNMOVABLE_INS); +} diff --git a/contrib/libs/pcre2/src/sljit/sljitNativeMIPS_common.c b/contrib/libs/pcre2/src/sljit/sljitNativeMIPS_common.c new file mode 100644 index 0000000000..9afe901c38 --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitNativeMIPS_common.c @@ -0,0 +1,3720 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Latest MIPS architecture. */ + +#ifndef __mips_hard_float +/* Disable automatic detection, covers both -msoft-float and -mno-float */ +#undef SLJIT_IS_FPU_AVAILABLE +#define SLJIT_IS_FPU_AVAILABLE 0 +#endif + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return "MIPS32-R6" SLJIT_CPUINFO; +#else /* !SLJIT_CONFIG_MIPS_32 */ + return "MIPS64-R6" SLJIT_CPUINFO; +#endif /* SLJIT_CONFIG_MIPS_32 */ + +#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return "MIPS32-R2" SLJIT_CPUINFO; +#else /* !SLJIT_CONFIG_MIPS_32 */ + return "MIPS64-R2" SLJIT_CPUINFO; +#endif /* SLJIT_CONFIG_MIPS_32 */ + +#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return "MIPS32-R1" SLJIT_CPUINFO; +#else /* !SLJIT_CONFIG_MIPS_32 */ + return "MIPS64-R1" SLJIT_CPUINFO; +#endif /* SLJIT_CONFIG_MIPS_32 */ + +#else /* SLJIT_MIPS_REV < 1 */ + return "MIPS III" SLJIT_CPUINFO; +#endif /* SLJIT_MIPS_REV >= 6 */ +} + +/* Length of an instruction word + Both for mips-32 and mips-64 */ +typedef sljit_u32 sljit_ins; + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) + +/* For position independent code, t9 must contain the function address. */ +#define PIC_ADDR_REG TMP_REG2 + +/* Floating point status register. */ +#define FCSR_REG 31 +/* Return address register. */ +#define RETURN_ADDR_REG 31 + +/* Flags are kept in volatile registers. */ +#define EQUAL_FLAG 3 +#define OTHER_FLAG 1 + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) +#define TMP_FREG3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3) + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31 +}; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { + 0, 0, 14, 2, 4, 6, 8, 18, 30, 28, 26, 24, 22, 20, 12, 10, 16 +}; + +#else + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { + 0, 0, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 1, 2, 3, 4, 5, 6, 7, 8, 9, 31, 30, 29, 28, 27, 26, 25, 24, 12, 11, 10 +}; + +#endif + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define S(s) ((sljit_ins)reg_map[s] << 21) +#define T(t) ((sljit_ins)reg_map[t] << 16) +#define D(d) ((sljit_ins)reg_map[d] << 11) +#define FT(t) ((sljit_ins)freg_map[t] << 16) +#define FS(s) ((sljit_ins)freg_map[s] << 11) +#define FD(d) ((sljit_ins)freg_map[d] << 6) +/* Absolute registers. */ +#define SA(s) ((sljit_ins)(s) << 21) +#define TA(t) ((sljit_ins)(t) << 16) +#define DA(d) ((sljit_ins)(d) << 11) +#define IMM(imm) ((sljit_ins)(imm) & 0xffff) +#define SH_IMM(imm) ((sljit_ins)(imm) << 6) + +#define DR(dr) (reg_map[dr]) +#define FR(dr) (freg_map[dr]) +#define HI(opcode) ((sljit_ins)(opcode) << 26) +#define LO(opcode) ((sljit_ins)(opcode)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +/* CMP.cond.fmt */ +/* S = (20 << 21) D = (21 << 21) */ +#define CMP_FMT_S (20 << 21) +#endif /* SLJIT_MIPS_REV >= 6 */ +/* S = (16 << 21) D = (17 << 21) */ +#define FMT_S (16 << 21) +#define FMT_D (17 << 21) + +#define ABS_S (HI(17) | FMT_S | LO(5)) +#define ADD_S (HI(17) | FMT_S | LO(0)) +#define ADDIU (HI(9)) +#define ADDU (HI(0) | LO(33)) +#define AND (HI(0) | LO(36)) +#define ANDI (HI(12)) +#define B (HI(4)) +#define BAL (HI(1) | (17 << 16)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define BC1EQZ (HI(17) | (9 << 21) | FT(TMP_FREG3)) +#define BC1NEZ (HI(17) | (13 << 21) | FT(TMP_FREG3)) +#else /* SLJIT_MIPS_REV < 6 */ +#define BC1F (HI(17) | (8 << 21)) +#define BC1T (HI(17) | (8 << 21) | (1 << 16)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define BEQ (HI(4)) +#define BGEZ (HI(1) | (1 << 16)) +#define BGTZ (HI(7)) +#define BLEZ (HI(6)) +#define BLTZ (HI(1) | (0 << 16)) +#define BNE (HI(5)) +#define BREAK (HI(0) | LO(13)) +#define CFC1 (HI(17) | (2 << 21)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define C_EQ_S (HI(17) | CMP_FMT_S | LO(2)) +#define C_OLE_S (HI(17) | CMP_FMT_S | LO(6)) +#define C_OLT_S (HI(17) | CMP_FMT_S | LO(4)) +#define C_UEQ_S (HI(17) | CMP_FMT_S | LO(3)) +#define C_ULE_S (HI(17) | CMP_FMT_S | LO(7)) +#define C_ULT_S (HI(17) | CMP_FMT_S | LO(5)) +#define C_UN_S (HI(17) | CMP_FMT_S | LO(1)) +#define C_FD (FD(TMP_FREG3)) +#else /* SLJIT_MIPS_REV < 6 */ +#define C_EQ_S (HI(17) | FMT_S | LO(50)) +#define C_OLE_S (HI(17) | FMT_S | LO(54)) +#define C_OLT_S (HI(17) | FMT_S | LO(52)) +#define C_UEQ_S (HI(17) | FMT_S | LO(51)) +#define C_ULE_S (HI(17) | FMT_S | LO(55)) +#define C_ULT_S (HI(17) | FMT_S | LO(53)) +#define C_UN_S (HI(17) | FMT_S | LO(49)) +#define C_FD (0) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define CVT_S_S (HI(17) | FMT_S | LO(32)) +#define DADDIU (HI(25)) +#define DADDU (HI(0) | LO(45)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define DDIV (HI(0) | (2 << 6) | LO(30)) +#define DDIVU (HI(0) | (2 << 6) | LO(31)) +#define DMOD (HI(0) | (3 << 6) | LO(30)) +#define DMODU (HI(0) | (3 << 6) | LO(31)) +#define DIV (HI(0) | (2 << 6) | LO(26)) +#define DIVU (HI(0) | (2 << 6) | LO(27)) +#define DMUH (HI(0) | (3 << 6) | LO(28)) +#define DMUHU (HI(0) | (3 << 6) | LO(29)) +#define DMUL (HI(0) | (2 << 6) | LO(28)) +#define DMULU (HI(0) | (2 << 6) | LO(29)) +#else /* SLJIT_MIPS_REV < 6 */ +#define DDIV (HI(0) | LO(30)) +#define DDIVU (HI(0) | LO(31)) +#define DIV (HI(0) | LO(26)) +#define DIVU (HI(0) | LO(27)) +#define DMULT (HI(0) | LO(28)) +#define DMULTU (HI(0) | LO(29)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define DIV_S (HI(17) | FMT_S | LO(3)) +#define DINSU (HI(31) | LO(6)) +#define DROTR (HI(0) | (1 << 21) | LO(58)) +#define DROTR32 (HI(0) | (1 << 21) | LO(62)) +#define DROTRV (HI(0) | (1 << 6) | LO(22)) +#define DSLL (HI(0) | LO(56)) +#define DSLL32 (HI(0) | LO(60)) +#define DSLLV (HI(0) | LO(20)) +#define DSRA (HI(0) | LO(59)) +#define DSRA32 (HI(0) | LO(63)) +#define DSRAV (HI(0) | LO(23)) +#define DSRL (HI(0) | LO(58)) +#define DSRL32 (HI(0) | LO(62)) +#define DSRLV (HI(0) | LO(22)) +#define DSUBU (HI(0) | LO(47)) +#define J (HI(2)) +#define JAL (HI(3)) +#define JALR (HI(0) | LO(9)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define JR (HI(0) | LO(9)) +#else /* SLJIT_MIPS_REV < 6 */ +#define JR (HI(0) | LO(8)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define LD (HI(55)) +#define LDL (HI(26)) +#define LDR (HI(27)) +#define LDC1 (HI(53)) +#define LUI (HI(15)) +#define LW (HI(35)) +#define LWL (HI(34)) +#define LWR (HI(38)) +#define LWC1 (HI(49)) +#define MFC1 (HI(17)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define MOD (HI(0) | (3 << 6) | LO(26)) +#define MODU (HI(0) | (3 << 6) | LO(27)) +#else /* SLJIT_MIPS_REV < 6 */ +#define MFHI (HI(0) | LO(16)) +#define MFLO (HI(0) | LO(18)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define MOV_S (HI(17) | FMT_S | LO(6)) +#define MTC1 (HI(17) | (4 << 21)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define MUH (HI(0) | (3 << 6) | LO(24)) +#define MUHU (HI(0) | (3 << 6) | LO(25)) +#define MUL (HI(0) | (2 << 6) | LO(24)) +#define MULU (HI(0) | (2 << 6) | LO(25)) +#else /* SLJIT_MIPS_REV < 6 */ +#define MULT (HI(0) | LO(24)) +#define MULTU (HI(0) | LO(25)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define MUL_S (HI(17) | FMT_S | LO(2)) +#define NEG_S (HI(17) | FMT_S | LO(7)) +#define NOP (HI(0) | LO(0)) +#define NOR (HI(0) | LO(39)) +#define OR (HI(0) | LO(37)) +#define ORI (HI(13)) +#define ROTR (HI(0) | (1 << 21) | LO(2)) +#define ROTRV (HI(0) | (1 << 6) | LO(6)) +#define SD (HI(63)) +#define SDL (HI(44)) +#define SDR (HI(45)) +#define SDC1 (HI(61)) +#define SLT (HI(0) | LO(42)) +#define SLTI (HI(10)) +#define SLTIU (HI(11)) +#define SLTU (HI(0) | LO(43)) +#define SLL (HI(0) | LO(0)) +#define SLLV (HI(0) | LO(4)) +#define SRL (HI(0) | LO(2)) +#define SRLV (HI(0) | LO(6)) +#define SRA (HI(0) | LO(3)) +#define SRAV (HI(0) | LO(7)) +#define SUB_S (HI(17) | FMT_S | LO(1)) +#define SUBU (HI(0) | LO(35)) +#define SW (HI(43)) +#define SWL (HI(42)) +#define SWR (HI(46)) +#define SWC1 (HI(57)) +#define TRUNC_W_S (HI(17) | FMT_S | LO(13)) +#define XOR (HI(0) | LO(38)) +#define XORI (HI(14)) + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) +#define CLZ (HI(28) | LO(32)) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define DCLZ (LO(18)) +#else /* SLJIT_MIPS_REV < 6 */ +#define DCLZ (HI(28) | LO(36)) +#define MOVF (HI(0) | (0 << 16) | LO(1)) +#define MOVN (HI(0) | LO(11)) +#define MOVT (HI(0) | (1 << 16) | LO(1)) +#define MOVZ (HI(0) | LO(10)) +#define MUL (HI(28) | LO(2)) +#endif /* SLJIT_MIPS_REV >= 6 */ +#define PREF (HI(51)) +#define PREFX (HI(19) | LO(15)) +#define SEB (HI(31) | (16 << 6) | LO(32)) +#define SEH (HI(31) | (24 << 6) | LO(32)) +#endif /* SLJIT_MIPS_REV >= 1 */ + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define ADDU_W ADDU +#define ADDIU_W ADDIU +#define SLL_W SLL +#define SRA_W SRA +#define SUBU_W SUBU +#define STORE_W SW +#define LOAD_W LW +#else +#define ADDU_W DADDU +#define ADDIU_W DADDIU +#define SLL_W DSLL +#define SRA_W DSRA +#define SUBU_W DSUBU +#define STORE_W SD +#define LOAD_W LD +#endif + +#define SIMM_MAX (0x7fff) +#define SIMM_MIN (-0x8000) +#define UIMM_MAX (0xffff) + +/* dest_reg is the absolute name of the register + Useful for reordering instructions in the delay slot. */ +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 delay_slot) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + SLJIT_ASSERT(delay_slot == MOVABLE_INS || delay_slot >= UNMOVABLE_INS + || (sljit_ins)delay_slot == ((ins >> 11) & 0x1f) + || (sljit_ins)delay_slot == ((ins >> 16) & 0x1f)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + compiler->delay_slot = delay_slot; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_ins invert_branch(sljit_uw flags) +{ + if (flags & IS_BIT26_COND) + return (1 << 26); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + if (flags & IS_BIT23_COND) + return (1 << 23); +#endif /* SLJIT_MIPS_REV >= 6 */ + return (1 << 16); +} + +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_sw diff; + sljit_uw target_addr; + sljit_ins *inst; + sljit_ins saved_inst; + + inst = (sljit_ins *)jump->addr; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL)) + goto exit; +#else + if (jump->flags & SLJIT_REWRITABLE_JUMP) + goto exit; +#endif + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + } + + if (jump->flags & IS_COND) + inst--; + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (jump->flags & IS_CALL) + goto preserve_addr; +#endif + + /* B instructions. */ + if (jump->flags & IS_MOVABLE) { + diff = ((sljit_sw)target_addr - (sljit_sw)inst - executable_offset) >> 2; + if (diff <= SIMM_MAX && diff >= SIMM_MIN) { + jump->flags |= PATCH_B; + + if (!(jump->flags & IS_COND)) { + inst[0] = inst[-1]; + inst[-1] = (jump->flags & IS_JAL) ? BAL : B; + jump->addr -= sizeof(sljit_ins); + return inst; + } + saved_inst = inst[0]; + inst[0] = inst[-1]; + inst[-1] = saved_inst ^ invert_branch(jump->flags); + jump->addr -= 2 * sizeof(sljit_ins); + return inst; + } + } else { + diff = ((sljit_sw)target_addr - (sljit_sw)(inst + 1) - executable_offset) >> 2; + if (diff <= SIMM_MAX && diff >= SIMM_MIN) { + jump->flags |= PATCH_B; + + if (!(jump->flags & IS_COND)) { + inst[0] = (jump->flags & IS_JAL) ? BAL : B; + /* Keep inst[1] */ + return inst + 1; + } + inst[0] ^= invert_branch(jump->flags); + inst[1] = NOP; + jump->addr -= sizeof(sljit_ins); + return inst + 1; + } + } + + if (jump->flags & IS_COND) { + if ((jump->flags & IS_MOVABLE) && (target_addr & ~(sljit_uw)0xfffffff) == ((jump->addr + 2 * sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)) { + jump->flags |= PATCH_J; + saved_inst = inst[0]; + inst[0] = inst[-1]; + inst[-1] = (saved_inst & 0xffff0000) | 3; + inst[1] = J; + inst[2] = NOP; + return inst + 2; + } + else if ((target_addr & ~(sljit_uw)0xfffffff) == ((jump->addr + 3 * sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)) { + jump->flags |= PATCH_J; + inst[0] = (inst[0] & 0xffff0000) | 3; + inst[1] = NOP; + inst[2] = J; + inst[3] = NOP; + jump->addr += sizeof(sljit_ins); + return inst + 3; + } + } + else { + /* J instuctions. */ + if ((jump->flags & IS_MOVABLE) && (target_addr & ~(sljit_uw)0xfffffff) == (jump->addr & ~(sljit_uw)0xfffffff)) { + jump->flags |= PATCH_J; + inst[0] = inst[-1]; + inst[-1] = (jump->flags & IS_JAL) ? JAL : J; + jump->addr -= sizeof(sljit_ins); + return inst; + } + + if ((target_addr & ~(sljit_uw)0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)) { + jump->flags |= PATCH_J; + inst[0] = (jump->flags & IS_JAL) ? JAL : J; + /* Keep inst[1] */ + return inst + 1; + } + } + + if (jump->flags & IS_COND) + inst++; + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +preserve_addr: + if (target_addr <= 0x7fffffff) { + jump->flags |= PATCH_ABS32; + if (jump->flags & IS_COND) + inst[-1] -= 4; + + inst[2] = inst[0]; + inst[3] = inst[1]; + return inst + 3; + } + if (target_addr <= 0x7fffffffffffl) { + jump->flags |= PATCH_ABS48; + if (jump->flags & IS_COND) + inst[-1] -= 2; + + inst[4] = inst[0]; + inst[5] = inst[1]; + return inst + 5; + } +#endif + +exit: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + inst[2] = inst[0]; + inst[3] = inst[1]; + return inst + 3; +#else + inst[6] = inst[0]; + inst[7] = inst[1]; + return inst + 7; +#endif +} + +#ifdef __GNUC__ +static __attribute__ ((noinline)) void sljit_cache_flush(void* code, void* code_ptr) +{ + SLJIT_CACHE_FLUSH(code, code_ptr); +} +#endif + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + +static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +{ + if (max_label < 0x80000000l) { + put_label->flags = PATCH_ABS32; + return 1; + } + + if (max_label < 0x800000000000l) { + put_label->flags = PATCH_ABS48; + return 3; + } + + put_label->flags = 0; + return 5; +} + +#endif /* SLJIT_CONFIG_MIPS_64 */ + +static SLJIT_INLINE void load_addr_to_reg(void *dst, sljit_u32 reg) +{ + struct sljit_jump *jump; + struct sljit_put_label *put_label; + sljit_uw flags; + sljit_ins *inst; + sljit_uw addr; + + if (reg != 0) { + jump = (struct sljit_jump*)dst; + flags = jump->flags; + inst = (sljit_ins*)jump->addr; + addr = (flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + } else { + put_label = (struct sljit_put_label*)dst; +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + flags = put_label->flags; +#endif + inst = (sljit_ins*)put_label->addr; + addr = put_label->label->addr; + reg = *inst; + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + inst[0] = LUI | T(reg) | IMM(addr >> 16); +#else /* !SLJIT_CONFIG_MIPS_32 */ + if (flags & PATCH_ABS32) { + SLJIT_ASSERT(addr < 0x80000000l); + inst[0] = LUI | T(reg) | IMM(addr >> 16); + } + else if (flags & PATCH_ABS48) { + SLJIT_ASSERT(addr < 0x800000000000l); + inst[0] = LUI | T(reg) | IMM(addr >> 32); + inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff); + inst[2] = DSLL | T(reg) | D(reg) | SH_IMM(16); + inst += 2; + } + else { + inst[0] = LUI | T(reg) | IMM(addr >> 48); + inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 32) & 0xffff); + inst[2] = DSLL | T(reg) | D(reg) | SH_IMM(16); + inst[3] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff); + inst[4] = DSLL | T(reg) | D(reg) | SH_IMM(16); + inst += 4; + } +#endif /* SLJIT_CONFIG_MIPS_32 */ + + inst[1] = ORI | S(reg) | T(reg) | IMM(addr & 0xffff); +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw next_addr; + sljit_sw executable_offset; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_put_label *put_label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + next_addr = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + put_label = compiler->put_labels; + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + if (next_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + SLJIT_ASSERT(!put_label || put_label->addr >= word_count); + + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + word_count += 2; +#else + word_count += 6; +#endif + jump->addr = (sljit_uw)(code_ptr - 1); + code_ptr = detect_jump_type(jump, code, executable_offset); + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + if (put_label && put_label->addr == word_count) { + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)code_ptr; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + code_ptr += 1; + word_count += 1; +#else + code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); + word_count += 5; +#endif + put_label = put_label->next; + } + next_addr = compute_next_addr(label, jump, const_, put_label); + } + code_ptr++; + word_count++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)code_ptr; + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(!put_label); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + buf_ptr = (sljit_ins *)jump->addr; + + if (jump->flags & PATCH_B) { + addr = (sljit_uw)((sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) - sizeof(sljit_ins)) >> 2); + SLJIT_ASSERT((sljit_sw)addr <= SIMM_MAX && (sljit_sw)addr >= SIMM_MIN); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((sljit_ins)addr & 0xffff); + break; + } + if (jump->flags & PATCH_J) { + SLJIT_ASSERT((addr & ~(sljit_uw)0xfffffff) + == (((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)); + buf_ptr[0] |= (sljit_ins)(addr >> 2) & 0x03ffffff; + break; + } + + load_addr_to_reg(jump, PIC_ADDR_REG); + } while (0); + jump = jump->next; + } + + put_label = compiler->put_labels; + while (put_label) { + load_addr_to_reg(put_label, 0); + put_label = put_label->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + +#ifndef __GNUC__ + SLJIT_CACHE_FLUSH(code, code_ptr); +#else + /* GCC workaround for invalid code generation with -O2. */ + sljit_cache_flush(code, code_ptr); +#endif + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ +#if defined(__GNUC__) && !defined(SLJIT_IS_FPU_AVAILABLE) + sljit_sw fir = 0; +#endif /* __GNUC__ && !SLJIT_IS_FPU_AVAILABLE */ + + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#elif defined(__GNUC__) + __asm__ ("cfc1 %0, $0" : "=r"(fir)); + return (fir >> 22) & 0x1; +#else +#error "FIR check is not implemented for this architecture" +#endif + case SLJIT_HAS_ZERO_REGISTER: + return 1; +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CMOV: + case SLJIT_HAS_PREFETCH: + return 1; + + case SLJIT_HAS_CTZ: + return 2; +#endif /* SLJIT_MIPS_REV >= 1 */ +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + case SLJIT_HAS_ROT: + return 1; +#endif /* SLJIT_MIPS_REV >= 2 */ + default: + return 0; + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + return (type >= SLJIT_ORDERED_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL); +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define WORD_DATA 0x00 +#define BYTE_DATA 0x02 +#define HALF_DATA 0x04 +#define INT_DATA 0x06 +#define SIGNED_DATA 0x08 +/* Separates integer and floating point registers */ +#define GPR_REG 0x0f +#define DOUBLE_DATA 0x10 +#define SINGLE_DATA 0x12 + +#define MEM_MASK 0x1f + +#define ARG_TEST 0x00020 +#define ALT_KEEP_CACHE 0x00040 +#define CUMULATIVE_OP 0x00080 +#define LOGICAL_OP 0x00100 +#define IMM_OP 0x00200 +#define MOVE_OP 0x00400 +#define SRC2_IMM 0x00800 + +#define UNUSED_DEST 0x01000 +#define REG_DEST 0x02000 +#define REG1_SOURCE 0x04000 +#define REG2_SOURCE 0x08000 +#define SLOW_SRC1 0x10000 +#define SLOW_SRC2 0x20000 +#define SLOW_DEST 0x40000 + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw); +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size, sljit_ins *ins_ptr); + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#include "sljitNativeMIPS_32.c" +#else +#include "sljitNativeMIPS_64.c" +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_ins base; + sljit_s32 i, tmp, offset; + sljit_s32 arg_count, word_arg_count, float_arg_count; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((local_size & SSIZE_OF(sw)) != 0) + local_size += SSIZE_OF(sw); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + } + + local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; +#else + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + local_size = (local_size + SLJIT_LOCALS_OFFSET + 31) & ~0x1f; +#endif + compiler->local_size = local_size; + + offset = 0; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (!(options & SLJIT_ENTER_REG_ARG)) { + tmp = arg_types >> SLJIT_ARG_SHIFT; + arg_count = 0; + + while (tmp) { + offset = arg_count; + if ((tmp & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) { + if ((arg_count & 0x1) != 0) + arg_count++; + arg_count++; + } + + arg_count++; + tmp >>= SLJIT_ARG_SHIFT; + } + + compiler->args_size = (sljit_uw)arg_count << 2; + offset = (offset >= 4) ? (offset << 2) : 0; + } +#endif /* SLJIT_CONFIG_MIPS_32 */ + + if (local_size + offset <= -SIMM_MIN) { + /* Frequent case. */ + FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP))); + base = S(SLJIT_SP); + offset = local_size - SSIZE_OF(sw); + } else { + FAIL_IF(load_immediate(compiler, OTHER_FLAG, local_size)); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | TA(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP))); + base = S(TMP_REG2); + offset = -SSIZE_OF(sw); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + local_size = 0; +#endif + } + + FAIL_IF(push_inst(compiler, STORE_W | base | TA(RETURN_ADDR_REG) | IMM(offset), UNMOVABLE_INS)); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STORE_W | base | T(i) | IMM(offset), MOVABLE_INS)); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STORE_W | base | T(i) | IMM(offset), MOVABLE_INS)); + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + /* This alignment is valid because offset is not used after storing FPU regs. */ + if ((offset & SSIZE_OF(sw)) != 0) + offset -= SSIZE_OF(sw); +#endif + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, SDC1 | base | FT(i) | IMM(offset), MOVABLE_INS)); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, SDC1 | base | FT(i) | IMM(offset), MOVABLE_INS)); + } + + if (options & SLJIT_ENTER_REG_ARG) + return SLJIT_SUCCESS; + + arg_types >>= SLJIT_ARG_SHIFT; + arg_count = 0; + word_arg_count = 0; + float_arg_count = 0; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + /* The first maximum two floating point arguments are passed in floating point + registers if no integer argument precedes them. The first 16 byte data is + passed in four integer registers, the rest is placed onto the stack. + The floating point registers are also part of the first 16 byte data, so + their corresponding integer registers are not used when they are present. */ + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + if ((arg_count & 0x1) != 0) + arg_count++; + + if (word_arg_count == 0 && float_arg_count <= 2) { + if (float_arg_count == 1) + FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + } else if (arg_count < 4) { + FAIL_IF(push_inst(compiler, MTC1 | TA(4 + arg_count) | FS(float_arg_count), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MTC1 | TA(5 + arg_count) | FS(float_arg_count) | (1 << 11), MOVABLE_INS)); + } else + FAIL_IF(push_inst(compiler, LDC1 | base | FT(float_arg_count) | IMM(local_size + (arg_count << 2)), MOVABLE_INS)); + arg_count++; + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + + if (word_arg_count == 0 && float_arg_count <= 2) { + if (float_arg_count == 1) + FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + } else if (arg_count < 4) + FAIL_IF(push_inst(compiler, MTC1 | TA(4 + arg_count) | FS(float_arg_count), MOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, LWC1 | base | FT(float_arg_count) | IMM(local_size + (arg_count << 2)), MOVABLE_INS)); + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count != arg_count + 1 || arg_count == 0) + tmp = word_arg_count; + else + break; + + if (arg_count < 4) + FAIL_IF(push_inst(compiler, ADDU_W | SA(4 + arg_count) | TA(0) | D(tmp), DR(tmp))); + else + FAIL_IF(push_inst(compiler, LW | base | T(tmp) | IMM(local_size + (arg_count << 2)), DR(tmp))); + break; + } + arg_count++; + arg_types >>= SLJIT_ARG_SHIFT; + } + + SLJIT_ASSERT(compiler->args_size == (sljit_uw)arg_count << 2); +#else /* !SLJIT_CONFIG_MIPS_32 */ + while (arg_types) { + arg_count++; + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + if (arg_count != float_arg_count) + FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(arg_count) | FD(float_arg_count), MOVABLE_INS)); + else if (arg_count == 1) + FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + if (arg_count != float_arg_count) + FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(arg_count) | FD(float_arg_count), MOVABLE_INS)); + else if (arg_count == 1) + FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count != arg_count || word_arg_count <= 1) + tmp = word_arg_count; + else + break; + + FAIL_IF(push_inst(compiler, ADDU_W | SA(3 + arg_count) | TA(0) | D(tmp), DR(tmp))); + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } +#endif /* SLJIT_CONFIG_MIPS_32 */ + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((local_size & SSIZE_OF(sw)) != 0) + local_size += SSIZE_OF(sw); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + } + + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; +#else + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 31) & ~0x1f; +#endif + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size, sljit_ins *ins_ptr) +{ + sljit_s32 local_size, i, tmp, offset; + sljit_s32 load_return_addr = (frame_size == 0); + sljit_s32 scratches = compiler->scratches; + sljit_s32 saveds = compiler->saveds; + sljit_s32 fsaveds = compiler->fsaveds; + sljit_s32 fscratches = compiler->fscratches; + sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + + SLJIT_ASSERT(frame_size == 1 || (frame_size & 0xf) == 0); + frame_size &= ~0xf; + + local_size = compiler->local_size; + + tmp = GET_SAVED_REGISTERS_SIZE(scratches, saveds - kept_saveds_count, 1); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((tmp & SSIZE_OF(sw)) != 0) + tmp += SSIZE_OF(sw); + tmp += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + } +#else + tmp += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); +#endif + + if (local_size <= SIMM_MAX) { + if (local_size < frame_size) { + FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(local_size - frame_size), DR(SLJIT_SP))); + local_size = frame_size; + } + } else { + if (tmp < frame_size) + tmp = frame_size; + + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size - tmp)); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP))); + local_size = tmp; + } + + SLJIT_ASSERT(local_size >= frame_size); + + offset = local_size - SSIZE_OF(sw); + if (load_return_addr) + FAIL_IF(push_inst(compiler, LOAD_W | S(SLJIT_SP) | TA(RETURN_ADDR_REG) | IMM(offset), RETURN_ADDR_REG)); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - kept_saveds_count; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, LOAD_W | S(SLJIT_SP) | T(i) | IMM(offset), MOVABLE_INS)); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, LOAD_W | S(SLJIT_SP) | T(i) | IMM(offset), MOVABLE_INS)); + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + /* This alignment is valid because offset is not used after storing FPU regs. */ + if ((offset & SSIZE_OF(sw)) != 0) + offset -= SSIZE_OF(sw); +#endif + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, LDC1 | S(SLJIT_SP) | FT(i) | IMM(offset), MOVABLE_INS)); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, LDC1 | S(SLJIT_SP) | FT(i) | IMM(offset), MOVABLE_INS)); + } + + if (local_size > frame_size) + *ins_ptr = ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(local_size - frame_size); + else + *ins_ptr = NOP; + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + emit_stack_frame_release(compiler, 0, &ins); + + FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + return push_inst(compiler, ins, UNMOVABLE_INS); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); + src = PIC_ADDR_REG; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); + src = PIC_ADDR_REG; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1, &ins)); + + if (!(src & SLJIT_IMM)) { + FAIL_IF(push_inst(compiler, JR | S(src), UNMOVABLE_INS)); + return push_inst(compiler, ins, UNMOVABLE_INS); + } + + if (ins != NOP) + FAIL_IF(push_inst(compiler, ins, MOVABLE_INS)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define ARCH_32_64(a, b) a +#else +#define ARCH_32_64(a, b) b +#endif + +static const sljit_ins data_transfer_insts[16 + 4] = { +/* u w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */), +/* u w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */), +/* u b s */ HI(40) /* sb */, +/* u b l */ HI(36) /* lbu */, +/* u h s */ HI(41) /* sh */, +/* u h l */ HI(37) /* lhu */, +/* u i s */ HI(43) /* sw */, +/* u i l */ ARCH_32_64(HI(35) /* lw */, HI(39) /* lwu */), + +/* s w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */), +/* s w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */), +/* s b s */ HI(40) /* sb */, +/* s b l */ HI(32) /* lb */, +/* s h s */ HI(41) /* sh */, +/* s h l */ HI(33) /* lh */, +/* s i s */ HI(43) /* sw */, +/* s i l */ HI(35) /* lw */, + +/* d s */ HI(61) /* sdc1 */, +/* d l */ HI(53) /* ldc1 */, +/* s s */ HI(57) /* swc1 */, +/* s l */ HI(49) /* lwc1 */, +}; + +#undef ARCH_32_64 + +/* reg_ar is an absoulute register! */ + +/* Can perform an operation using at most 1 instruction. */ +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) +{ + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) { + /* Works for both absoulte and relative addresses. */ + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(arg & REG_MASK) + | TA(reg_ar) | IMM(argw), ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? reg_ar : MOVABLE_INS)); + return -1; + } + return 0; +} + +#define TO_ARGW_HI(argw) (((argw) & ~0xffff) + (((argw) & 0x8000) ? 0x10000 : 0)) + +/* See getput_arg below. + Note: can_cache is called only for binary operators. */ +static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); + + /* Simple operation except for updates. */ + if (arg & OFFS_REG_MASK) { + argw &= 0x3; + next_argw &= 0x3; + if (argw && argw == next_argw && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK))) + return 1; + return 0; + } + + if (arg == next_arg) { + if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) + || TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw)) + return 1; + return 0; + } + + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + sljit_s32 tmp_ar, base, delay_slot; + sljit_sw offset, argw_hi; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + /* Since tmp can be the same as base or offset registers, + * these might be unavailable after modifying tmp. */ + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) { + tmp_ar = reg_ar; + delay_slot = reg_ar; + } + else { + tmp_ar = DR(TMP_REG1); + delay_slot = MOVABLE_INS; + } + base = arg & REG_MASK; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + /* Using the cache. */ + if (argw == compiler->cache_argw) { + if (arg == compiler->cache_arg) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + + if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + } + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); + } + } + + if (SLJIT_UNLIKELY(argw)) { + compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | D(TMP_REG3) | SH_IMM(argw), DR(TMP_REG3))); + } + + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); + tmp_ar = DR(TMP_REG3); + } + else + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); + } + + if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar) | IMM(argw - compiler->cache_argw), delay_slot); + + if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw) <= SIMM_MAX && (argw - compiler->cache_argw) >= SIMM_MIN) { + offset = argw - compiler->cache_argw; + } else { + compiler->cache_arg = SLJIT_MEM; + + argw_hi = TO_ARGW_HI(argw); + + if (next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN && argw_hi != TO_ARGW_HI(next_argw)) { + FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw)); + compiler->cache_argw = argw; + offset = 0; + } else { + FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw_hi)); + compiler->cache_argw = argw_hi; + offset = argw & 0xffff; + argw = argw_hi; + } + } + + if (!base) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar) | IMM(offset), delay_slot); + + if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) { + compiler->cache_arg = arg; + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | D(TMP_REG3), DR(TMP_REG3))); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar) | IMM(offset), delay_slot); + } + + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar) | IMM(offset), delay_slot); +} + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) +{ + sljit_s32 tmp_ar, base, delay_slot; + + if (getput_arg_fast(compiler, flags, reg_ar, arg, argw)) + return compiler->error; + + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) { + tmp_ar = reg_ar; + delay_slot = reg_ar; + } + else { + tmp_ar = DR(TMP_REG1); + delay_slot = MOVABLE_INS; + } + base = arg & REG_MASK; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (SLJIT_UNLIKELY(argw)) { + FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | DA(tmp_ar) | SH_IMM(argw), tmp_ar)); + FAIL_IF(push_inst(compiler, ADDU_W | SA(tmp_ar) | T(base) | DA(tmp_ar), tmp_ar)); + } + else + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(OFFS_REG(arg)) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); + } + + FAIL_IF(load_immediate(compiler, tmp_ar, TO_ARGW_HI(argw))); + + if (base != 0) + FAIL_IF(push_inst(compiler, ADDU_W | SA(tmp_ar) | T(base) | DA(tmp_ar), tmp_ar)); + + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar) | IMM(argw), delay_slot); +} + +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +#define EMIT_LOGICAL(op_imm, op_reg) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \ + } \ + else { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_reg | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_reg | S(src1) | T(src2) | D(dst), DR(dst))); \ + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + +#define SELECT_OP(a, b) (b) + +#define EMIT_SHIFT(dimm, dimm32, imm, dv, v) \ + op_imm = (imm); \ + op_v = (v); + +#else /* !SLJIT_CONFIG_MIPS_32 */ + +#define SELECT_OP(a, b) \ + (!(op & SLJIT_32) ? a : b) + +#define EMIT_SHIFT(dimm, dimm32, imm, dv, v) \ + op_dimm = (dimm); \ + op_dimm32 = (dimm32); \ + op_imm = (imm); \ + op_dv = (dv); \ + op_v = (v); + +#endif /* SLJIT_CONFIG_MIPS_32 */ + +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV < 1) + +static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src) +{ + sljit_s32 is_clz = (GET_OPCODE(op) == SLJIT_CLZ); +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + sljit_ins max = (op & SLJIT_32) ? 32 : 64; +#else /* !SLJIT_CONFIG_RISCV_64 */ + sljit_ins max = 32; +#endif /* SLJIT_CONFIG_RISCV_64 */ + + /* The TMP_REG2 is the next value. */ + if (src != TMP_REG2) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + + FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG2) | TA(0) | IMM(is_clz ? 13 : 14), UNMOVABLE_INS)); + /* The OTHER_FLAG is the counter. Delay slot. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(max), OTHER_FLAG)); + + if (!is_clz) { + FAIL_IF(push_inst(compiler, ANDI | S(TMP_REG2) | T(TMP_REG1) | IMM(1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, BNE | S(TMP_REG1) | TA(0) | IMM(11), UNMOVABLE_INS)); + } else + FAIL_IF(push_inst(compiler, BLTZ | S(TMP_REG2) | TA(0) | IMM(11), UNMOVABLE_INS)); + + /* Delay slot. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(0), OTHER_FLAG)); + + /* The TMP_REG1 is the next shift. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(TMP_REG1) | IMM(max), DR(TMP_REG1))); + + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(TMP_REG2) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRL, SRL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), DR(TMP_REG1))); + + FAIL_IF(push_inst(compiler, (is_clz ? SELECT_OP(DSRLV, SRLV) : SELECT_OP(DSLLV, SLLV)) | S(TMP_REG1) | TA(EQUAL_FLAG) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, BNE | S(TMP_REG2) | TA(0) | IMM(-4), UNMOVABLE_INS)); + /* Delay slot. */ + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(TMP_REG1) | T(TMP_REG2) | IMM(-1), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, (is_clz ? SELECT_OP(DSRLV, SRLV) : SELECT_OP(DSLLV, SLLV)) | S(TMP_REG2) | TA(EQUAL_FLAG) | D(TMP_REG2), DR(TMP_REG2))); + + FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG2) | TA(0) | IMM(-7), UNMOVABLE_INS)); + /* Delay slot. */ + FAIL_IF(push_inst(compiler, OR | SA(OTHER_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG)); + + return push_inst(compiler, SELECT_OP(DADDU, ADDU) | SA(OTHER_FLAG) | TA(0) | D(dst), DR(dst)); +} + +#endif /* SLJIT_MIPS_REV < 1 */ + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_sw src2) +{ + sljit_s32 is_overflow, is_carry, carry_src_ar, is_handled; + sljit_ins op_imm, op_v; +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + sljit_ins ins, op_dimm, op_dimm32, op_dv; +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (dst != src2) + return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); +#else /* SLJIT_MIPS_REV < 1 */ + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ +#else /* !SLJIT_CONFIG_MIPS_32 */ +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + if (op & SLJIT_32) + return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst)); +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); +#else /* SLJIT_MIPS_REV < 1 */ + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ +#else /* !SLJIT_CONFIG_MIPS_32 */ +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + if (op & SLJIT_32) + return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst)); +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + case SLJIT_MOV_U32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + if (dst == src2) + return push_inst(compiler, DINSU | T(src2) | SA(0) | (31 << 11) | (0 << 11), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 2 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst))); + return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; +#endif /* SLJIT_CONFIG_MIPS_64 */ + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst))); + return SLJIT_SUCCESS; + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + return push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | D(dst), DR(dst)); +#else /* SLJIT_MIPS_REV < 6 */ + return push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 6 */ + case SLJIT_CTZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(0) | T(src2) | D(TMP_REG1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, AND | S(src2) | T(TMP_REG1) | D(dst), DR(dst))); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(dst) | D(dst), DR(dst))); +#else /* SLJIT_MIPS_REV < 6 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(dst) | T(dst) | D(dst), DR(dst))); +#endif /* SLJIT_MIPS_REV >= 6 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(TMP_REG1) | IMM(SELECT_OP(-64, -32)), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(SELECT_OP(26, 27)), DR(TMP_REG1))); + return push_inst(compiler, XOR | S(dst) | T(TMP_REG1) | D(dst), DR(dst)); +#else /* SLJIT_MIPS_REV < 1 */ + case SLJIT_CLZ: + case SLJIT_CTZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + return emit_clz_ctz(compiler, op, dst, src2); +#endif /* SLJIT_MIPS_REV >= 1 */ + + case SLJIT_ADD: + /* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */ + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + carry_src_ar = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + if (is_overflow || carry_src_ar != 0) { + if (src1 != dst) + carry_src_ar = DR(src1); + else if (src2 != dst) + carry_src_ar = DR(src2); + else { + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | TA(0) | DA(OTHER_FLAG), OTHER_FLAG)); + carry_src_ar = OTHER_FLAG; + } + } + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ + if (is_overflow || carry_src_ar != 0) { + if (flags & SRC2_IMM) + FAIL_IF(push_inst(compiler, SLTIU | S(dst) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + else + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(carry_src_ar) | DA(OTHER_FLAG), OTHER_FLAG)); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | D(TMP_REG1), DR(TMP_REG1))); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + return push_inst(compiler, XOR | S(TMP_REG1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_ADDC: + carry_src_ar = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); + } else { + if (carry_src_ar != 0) { + if (src1 != dst) + carry_src_ar = DR(src1); + else if (src2 != dst) + carry_src_ar = DR(src2); + else { + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + carry_src_ar = EQUAL_FLAG; + } + } + + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ + if (carry_src_ar != 0) { + if (flags & SRC2_IMM) + FAIL_IF(push_inst(compiler, SLTIU | S(dst) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + else + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(carry_src_ar) | DA(EQUAL_FLAG), EQUAL_FLAG)); + } + + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + + if (carry_src_ar == 0) + return SLJIT_SUCCESS; + + /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */ + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + /* Set carry flag. */ + return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_SUB: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_handled = 0; + + if (flags & SRC2_IMM) { + if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + is_handled = 1; + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + is_handled = 1; + } + } + + if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + is_handled = 1; + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_LESS: + case SLJIT_GREATER_EQUAL: + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + break; + case SLJIT_GREATER: + case SLJIT_LESS_EQUAL: + FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + break; + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER_EQUAL: + FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + break; + case SLJIT_SIG_GREATER: + case SLJIT_SIG_LESS_EQUAL: + FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + break; + } + } + + if (is_handled) { + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)); + } + else { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)); + } + return SLJIT_SUCCESS; + } + + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | D(TMP_REG1), DR(TMP_REG1))); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + return push_inst(compiler, XOR | S(TMP_REG1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_SUBC: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); + } + else { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1))); + + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + + if (!is_carry) + return SLJIT_SUCCESS; + + return push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & SRC2_IMM)); + + if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) { +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst)); +#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); +#else /* !SLJIT_CONFIG_MIPS_32 */ + if (op & SLJIT_32) + return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); + FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS)); + return push_inst(compiler, MFLO | D(dst), DR(dst)); +#endif /* SLJIT_CONFIG_MIPS_32 */ +#else /* SLJIT_MIPS_REV < 1 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS)); + return push_inst(compiler, MFLO | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 6 */ + } + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + FAIL_IF(push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst))); + FAIL_IF(push_inst(compiler, SELECT_OP(DMUH, MUH) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); +#else /* SLJIT_MIPS_REV < 6 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); +#endif /* SLJIT_MIPS_REV >= 6 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG)); + return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_AND: + EMIT_LOGICAL(ANDI, AND); + return SLJIT_SUCCESS; + + case SLJIT_OR: + EMIT_LOGICAL(ORI, OR); + return SLJIT_SUCCESS; + + case SLJIT_XOR: + EMIT_LOGICAL(XORI, XOR); + return SLJIT_SUCCESS; + + case SLJIT_SHL: + case SLJIT_MSHL: + EMIT_SHIFT(DSLL, DSLL32, SLL, DSLLV, SLLV); + break; + + case SLJIT_LSHR: + case SLJIT_MLSHR: + EMIT_SHIFT(DSRL, DSRL32, SRL, DSRLV, SRLV); + break; + + case SLJIT_ASHR: + case SLJIT_MASHR: + EMIT_SHIFT(DSRA, DSRA32, SRA, DSRAV, SRAV); + break; + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + case SLJIT_ROTL: + if ((flags & SRC2_IMM) || src2 == 0) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + src2 = -src2 & 0x1f; +#else /* !SLJIT_CONFIG_MIPS_32 */ + src2 = -src2 & ((op & SLJIT_32) ? 0x1f : 0x3f); +#endif /* SLJIT_CONFIG_MIPS_32 */ + } else { + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(0) | T(src2) | D(TMP_REG2), DR(TMP_REG2))); + src2 = TMP_REG2; + } + /* fallthrough */ + + case SLJIT_ROTR: + EMIT_SHIFT(DROTR, DROTR32, ROTR, DROTRV, ROTRV); + break; +#else /* SLJIT_MIPS_REV < 1 */ + case SLJIT_ROTL: + case SLJIT_ROTR: + if (flags & SRC2_IMM) { + SLJIT_ASSERT(src2 != 0); +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (!(op & SLJIT_32)) { + if (GET_OPCODE(op) == SLJIT_ROTL) + op_imm = ((src2 < 32) ? DSLL : DSLL32); + else + op_imm = ((src2 < 32) ? DSRL : DSRL32); + + FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(OTHER_FLAG) | (((sljit_ins)src2 & 0x1f) << 6), OTHER_FLAG)); + + src2 = 64 - src2; + if (GET_OPCODE(op) == SLJIT_ROTL) + op_imm = ((src2 < 32) ? DSRL : DSRL32); + else + op_imm = ((src2 < 32) ? DSLL : DSLL32); + + FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | (((sljit_ins)src2 & 0x1f) << 6), DR(dst))); + return push_inst(compiler, OR | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)); + } +#endif /* SLJIT_CONFIG_MIPS_64 */ + + op_imm = (GET_OPCODE(op) == SLJIT_ROTL) ? SLL : SRL; + FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(OTHER_FLAG) | ((sljit_ins)src2 << 6), OTHER_FLAG)); + + src2 = 32 - src2; + op_imm = (GET_OPCODE(op) == SLJIT_ROTL) ? SRL : SLL; + FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | (((sljit_ins)src2 & 0x1f) << 6), DR(dst))); + return push_inst(compiler, OR | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)); + } + + if (src2 == 0) { + if (dst != src1) + return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | TA(0) | D(dst), DR(dst)); + return SLJIT_SUCCESS; + } + + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(0) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (!(op & SLJIT_32)) { + op_v = (GET_OPCODE(op) == SLJIT_ROTL) ? DSLLV : DSRLV; + FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + op_v = (GET_OPCODE(op) == SLJIT_ROTL) ? DSRLV : DSLLV; + FAIL_IF(push_inst(compiler, op_v | SA(EQUAL_FLAG) | T(src1) | D(dst), DR(dst))); + return push_inst(compiler, OR | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)); + } +#endif /* SLJIT_CONFIG_MIPS_64 */ + + op_v = (GET_OPCODE(op) == SLJIT_ROTL) ? SLLV : SRLV; + FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + op_v = (GET_OPCODE(op) == SLJIT_ROTL) ? SRLV : SLLV; + FAIL_IF(push_inst(compiler, op_v | SA(EQUAL_FLAG) | T(src1) | D(dst), DR(dst))); + return push_inst(compiler, OR | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 2 */ + + default: + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if ((flags & SRC2_IMM) || src2 == 0) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); + + if (flags & UNUSED_DEST) + return SLJIT_SUCCESS; + return push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst)); + } + + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + if (flags & UNUSED_DEST) + return SLJIT_SUCCESS; + return push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst)); +#else /* !SLJIT_CONFIG_MIPS_32 */ + if ((flags & SRC2_IMM) || src2 == 0) { + if (src2 >= 32) { + SLJIT_ASSERT(!(op & SLJIT_32)); + ins = op_dimm32; + src2 -= 32; + } + else + ins = (op & SLJIT_32) ? op_imm : op_dimm; + + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); + + if (flags & UNUSED_DEST) + return SLJIT_SUCCESS; + return push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst)); + } + + ins = (op & SLJIT_32) ? op_v : op_dv; + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + if (flags & UNUSED_DEST) + return SLJIT_SUCCESS; + return push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst)); +#endif /* SLJIT_CONFIG_MIPS_32 */ +} + +#define CHECK_IMM(flags, srcw) \ + ((!((flags) & LOGICAL_OP) && ((srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)) \ + || (((flags) & LOGICAL_OP) && !((srcw) & ~UIMM_MAX))) + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + sljit_s32 dst_r = TMP_REG2; + sljit_s32 src1_r; + sljit_sw src2_r = 0; + sljit_s32 sugg_src2_r = TMP_REG2; + + if (!(flags & ALT_KEEP_CACHE)) { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + } + + if (dst == TMP_REG2) { + SLJIT_ASSERT(HAS_FLAGS(op)); + flags |= UNUSED_DEST; + } + else if (FAST_IS_REG(dst)) { + dst_r = dst; + flags |= REG_DEST; + if (flags & MOVE_OP) + sugg_src2_r = dst_r; + } + else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, DR(TMP_REG1), dst, dstw)) + flags |= SLOW_DEST; + + if (flags & IMM_OP) { + if ((src2 & SLJIT_IMM) && src2w != 0 && CHECK_IMM(flags, src2w)) { + flags |= SRC2_IMM; + src2_r = src2w; + } else if ((flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w != 0 && CHECK_IMM(flags, src1w)) { + flags |= SRC2_IMM; + src2_r = src1w; + + /* And swap arguments. */ + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + /* src2w = src2_r unneeded. */ + } + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) { + src1_r = src1; + flags |= REG1_SOURCE; + } + else if (src1 & SLJIT_IMM) { + if (src1w) { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); + src1_r = TMP_REG1; + } + else + src1_r = 0; + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + src1_r = TMP_REG1; + } + + /* Source 2. */ + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG2_SOURCE; + if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) + dst_r = (sljit_s32)src2_r; + } + else if (src2 & SLJIT_IMM) { + if (!(flags & SRC2_IMM)) { + if (src2w) { + FAIL_IF(load_immediate(compiler, DR(sugg_src2_r), src2w)); + src2_r = sugg_src2_r; + } + else { + src2_r = 0; + if (flags & MOVE_OP) { + if (dst & SLJIT_MEM) + dst_r = 0; + else + op = SLJIT_MOV; + } + } + } + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + src2_r = sugg_src2_r; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + SLJIT_ASSERT(src2_r == TMP_REG2); + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w, dst, dstw)); + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, flags, DR(dst_r), dst, dstw); + return compiler->error; + } + return getput_arg(compiler, flags, DR(dst_r), dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +#undef CHECK_IMM + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + sljit_s32 int_op = op & SLJIT_32; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, BREAK, UNMOVABLE_INS); + case SLJIT_NOP: + return push_inst(compiler, NOP, UNMOVABLE_INS); + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMULU : DMUL) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3))); + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMUHU : DMUH) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1))); +#else /* !SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? MULU : MUL) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3))); + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? MUHU : MUH) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1))); +#endif /* SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | TA(0) | D(SLJIT_R0), DR(SLJIT_R0))); + return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_R1), DR(SLJIT_R1)); +#else /* SLJIT_MIPS_REV < 6 */ +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMULTU : DMULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); +#else /* !SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? MULTU : MULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); +#endif /* SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); + return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); +#endif /* SLJIT_MIPS_REV >= 6 */ + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (int_op) { + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3))); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? MODU : MOD) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1))); + } + else { + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3))); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DMODU : DMOD) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1))); + } +#else /* !SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3))); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? MODU : MOD) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1))); +#endif /* SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | TA(0) | D(SLJIT_R0), DR(SLJIT_R0))); + return (op >= SLJIT_DIV_UW) ? SLJIT_SUCCESS : push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_R1), DR(SLJIT_R1)); +#else /* SLJIT_MIPS_REV < 6 */ +#if !(defined SLJIT_MIPS_REV) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif /* !SLJIT_MIPS_REV */ +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (int_op) + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); +#else /* !SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); +#endif /* SLJIT_CONFIG_MIPS_64 */ + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); + return (op >= SLJIT_DIV_UW) ? SLJIT_SUCCESS : push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); +#endif /* SLJIT_MIPS_REV >= 6 */ + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) +static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + if (!(src & OFFS_REG_MASK)) { + if (srcw <= SIMM_MAX && srcw >= SIMM_MIN) + return push_inst(compiler, PREF | S(src & REG_MASK) | IMM(srcw), MOVABLE_INS); + + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); + return push_inst(compiler, PREFX | S(src & REG_MASK) | T(TMP_REG1), MOVABLE_INS); + } + + srcw &= 0x3; + + if (SLJIT_UNLIKELY(srcw != 0)) { + FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(src)) | D(TMP_REG1) | SH_IMM(srcw), DR(TMP_REG1))); + return push_inst(compiler, PREFX | S(src & REG_MASK) | T(TMP_REG1), MOVABLE_INS); + } + + return push_inst(compiler, PREFX | S(src & REG_MASK) | T(OFFS_REG(src)), MOVABLE_INS); +} +#endif /* SLJIT_MIPS_REV >= 1 */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (op & SLJIT_32) + flags = INT_DATA | SIGNED_DATA; +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: +#endif + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw); + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + case SLJIT_MOV_U32: + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw); + + case SLJIT_MOV_S32: + case SLJIT_MOV32: + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw); +#endif + + case SLJIT_MOV_U8: + return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); + + case SLJIT_MOV_S8: + return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); + + case SLJIT_MOV_U16: + return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); + + case SLJIT_MOV_S16: + return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); + + case SLJIT_NOT: + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_CLZ: + case SLJIT_CTZ: + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (op & SLJIT_32) { + flags |= INT_DATA | SIGNED_DATA; + if (src1 & SLJIT_IMM) + src1w = (sljit_s32)src1w; + if (src2 & SLJIT_IMM) + src2w = (sljit_s32)src2w; + } +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: + compiler->status_flags_state = 0; + return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, flags | CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_MSHL: + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + case SLJIT_ROTL: + case SLJIT_ROTR: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (src2 & SLJIT_IMM) + src2w &= 0x1f; +#else + if (src2 & SLJIT_IMM) { + if (op & SLJIT_32) + src2w &= 0x1f; + else + src2w &= 0x3f; + } +#endif + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); +} + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#define SELECT_OP3(op, src2w, D, D32, W) (((op & SLJIT_32) ? (W) : ((src2w) < 32) ? (D) : (D32)) | (((sljit_ins)src2w & 0x1f) << 6)) +#define SELECT_OP2(op, D, W) ((op & SLJIT_32) ? (W) : (D)) +#else /* !SLJIT_CONFIG_MIPS_64 */ +#define SELECT_OP3(op, src2w, D, D32, W) ((W) | ((sljit_ins)(src2w) << 6)) +#define SELECT_OP2(op, D, W) (W) +#endif /* SLJIT_CONFIG_MIPS_64 */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 is_left; + sljit_ins ins1, ins2, ins3; +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; + sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64; +#else /* !SLJIT_CONFIG_MIPS_64 */ + sljit_s32 inp_flags = WORD_DATA | LOAD_DATA; + sljit_sw bit_length = 32; +#endif /* SLJIT_CONFIG_MIPS_64 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w)); + + is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL); + + if (src_dst == src1) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w); + } + + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (src2 & SLJIT_IMM) { + src2w &= bit_length - 1; + + if (src2w == 0) + return SLJIT_SUCCESS; + } else if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG2), src2, src2w)); + src2 = TMP_REG2; + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG1), src1, src1w)); + src1 = TMP_REG1; + } else if (src1 & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); + src1 = TMP_REG1; + } + + if (src2 & SLJIT_IMM) { + if (is_left) { + ins1 = SELECT_OP3(op, src2w, DSLL, DSLL32, SLL); + src2w = bit_length - src2w; + ins2 = SELECT_OP3(op, src2w, DSRL, DSRL32, SRL); + } else { + ins1 = SELECT_OP3(op, src2w, DSRL, DSRL32, SRL); + src2w = bit_length - src2w; + ins2 = SELECT_OP3(op, src2w, DSLL, DSLL32, SLL); + } + + FAIL_IF(push_inst(compiler, ins1 | T(src_dst) | D(src_dst), DR(src_dst))); + FAIL_IF(push_inst(compiler, ins2 | T(src1) | D(TMP_REG1), DR(TMP_REG1))); + return push_inst(compiler, OR | S(src_dst) | T(TMP_REG1) | D(src_dst), DR(src_dst)); + } + + if (is_left) { + ins1 = SELECT_OP2(op, DSRL, SRL); + ins2 = SELECT_OP2(op, DSLLV, SLLV); + ins3 = SELECT_OP2(op, DSRLV, SRLV); + } else { + ins1 = SELECT_OP2(op, DSLL, SLL); + ins2 = SELECT_OP2(op, DSRLV, SRLV); + ins3 = SELECT_OP2(op, DSLLV, SLLV); + } + + FAIL_IF(push_inst(compiler, ins2 | S(src2) | T(src_dst) | D(src_dst), DR(src_dst))); + + if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) { + FAIL_IF(push_inst(compiler, ins1 | T(src1) | D(TMP_REG1) | (1 << 6), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XORI | S(src2) | T(TMP_REG2) | ((sljit_ins)bit_length - 1), DR(TMP_REG2))); + src1 = TMP_REG1; + } else + FAIL_IF(push_inst(compiler, SELECT_OP2(op, DSUBU, SUBU) | SA(0) | T(src2) | D(TMP_REG2), DR(TMP_REG2))); + + FAIL_IF(push_inst(compiler, ins3 | S(TMP_REG2) | T(src1) | D(TMP_REG1), DR(TMP_REG1))); + return push_inst(compiler, OR | S(src_dst) | T(TMP_REG1) | D(src_dst), DR(src_dst)); +} + +#undef SELECT_OP3 +#undef SELECT_OP2 + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | DA(RETURN_ADDR_REG), RETURN_ADDR_REG)); + else + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw)); + + FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + return push_inst(compiler, NOP, UNMOVABLE_INS); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + return emit_prefetch(compiler, src, srcw); +#else /* SLJIT_MIPS_REV < 1 */ + return SLJIT_SUCCESS; +#endif /* SLJIT_MIPS_REV >= 1 */ + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return FR(reg); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7)) +#define FMT(op) ((((sljit_ins)op & SLJIT_32) ^ SLJIT_32) << (21 - 8)) + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# define flags (sljit_u32)0 +#else + sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)) << 21; +#endif + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src, srcw, dst, dstw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, (TRUNC_W_S ^ (flags >> 19)) | FMT(op) | FS(src) | FD(TMP_FREG1), MOVABLE_INS)); + + if (FAST_IS_REG(dst)) { + FAIL_IF(push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS)); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + return SLJIT_SUCCESS; + } + + /* Store the integer value from a VFP register. */ + return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, FR(TMP_FREG1), dst, dstw, 0, 0); + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# undef flags +#endif +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# define flags (sljit_u32)0 +#else + sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)) << 21; +#endif + + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (FAST_IS_REG(src)) { + FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS)); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + } else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_op_mem2(compiler, (flags ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, FR(TMP_FREG1), src, srcw, dst, dstw)); + } + else { +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; +#endif + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); + FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS)); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + } + + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((((sljit_ins)op & SLJIT_32) ^ SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0); + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# undef flags +#endif +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins inst; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + inst = C_EQ_S; + break; + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + inst = C_UEQ_S; + break; + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + inst = C_OLT_S; + break; + case SLJIT_F_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_ORDERED_GREATER_EQUAL: + inst = C_ULT_S; + break; + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + inst = C_ULE_S; + break; + case SLJIT_F_LESS_EQUAL: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED_LESS_EQUAL: + inst = C_OLE_S; + break; + default: + SLJIT_ASSERT(GET_FLAG_TYPE(op) == SLJIT_UNORDERED || GET_FLAG_TYPE(op) == SLJIT_ORDERED); + inst = C_UN_S; + break; + } + return push_inst(compiler, inst | FMT(op) | FT(src2) | FS(src1) | C_FD, UNMOVABLE_INS); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_32; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(dst_r), src, srcw, dst, dstw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, MOV_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, NEG_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, ABS_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + break; + case SLJIT_CONV_F64_FROM_F32: + /* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */ + FAIL_IF(push_inst(compiler, CVT_S_S | (sljit_ins)((op & SLJIT_32) ? 1 : (1 << 21)) | FS(src) | FD(dst_r), MOVABLE_INS)); + op ^= SLJIT_32; + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), FR(dst_r), dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r, flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; + + if (src1 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w)) { + FAIL_IF(compiler->error); + src1 = TMP_FREG1; + } else + flags |= SLOW_SRC1; + } + + if (src2 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w)) { + FAIL_IF(compiler->error); + src2 = TMP_FREG2; + } else + flags |= SLOW_SRC2; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, dst, dstw)); + + if (flags & SLOW_SRC1) + src1 = TMP_FREG1; + if (flags & SLOW_SRC2) + src2 = TMP_FREG2; + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); + break; + + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, SUB_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); + break; + + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, MUL_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); + break; + + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, DIV_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); + break; + } + + if (dst_r == TMP_FREG2) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG2), dst, dstw, 0, 0)); + + return SLJIT_SUCCESS; +} + +#undef FLOAT_DATA +#undef FMT + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), UNMOVABLE_INS); + + /* Memory. */ + FAIL_IF(emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw)); + compiler->delay_slot = UNMOVABLE_INS; + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + compiler->delay_slot = UNMOVABLE_INS; + return label; +} + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define BRANCH_LENGTH 4 +#else +#define BRANCH_LENGTH 8 +#endif + +#define BR_Z(src) \ + inst = BEQ | SA(src) | TA(0) | BRANCH_LENGTH; \ + flags = IS_BIT26_COND; \ + delay_check = src; + +#define BR_NZ(src) \ + inst = BNE | SA(src) | TA(0) | BRANCH_LENGTH; \ + flags = IS_BIT26_COND; \ + delay_check = src; + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + +#define BR_T() \ + inst = BC1NEZ; \ + flags = IS_BIT23_COND; \ + delay_check = FCSR_FCC; +#define BR_F() \ + inst = BC1EQZ; \ + flags = IS_BIT23_COND; \ + delay_check = FCSR_FCC; + +#else /* SLJIT_MIPS_REV < 6 */ + +#define BR_T() \ + inst = BC1T | BRANCH_LENGTH; \ + flags = IS_BIT16_COND; \ + delay_check = FCSR_FCC; +#define BR_F() \ + inst = BC1F | BRANCH_LENGTH; \ + flags = IS_BIT16_COND; \ + delay_check = FCSR_FCC; + +#endif /* SLJIT_MIPS_REV >= 6 */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + sljit_ins inst; + sljit_u32 flags = 0; + sljit_s32 delay_check = UNMOVABLE_INS; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + switch (type) { + case SLJIT_EQUAL: + BR_NZ(EQUAL_FLAG); + break; + case SLJIT_NOT_EQUAL: + BR_Z(EQUAL_FLAG); + break; + case SLJIT_LESS: + case SLJIT_GREATER: + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER: + case SLJIT_OVERFLOW: + case SLJIT_CARRY: + BR_Z(OTHER_FLAG); + break; + case SLJIT_GREATER_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_NOT_OVERFLOW: + case SLJIT_NOT_CARRY: + BR_NZ(OTHER_FLAG); + break; + case SLJIT_F_NOT_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED: + BR_T(); + break; + case SLJIT_F_EQUAL: + case SLJIT_F_LESS: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_UNORDERED: + BR_F(); + break; + default: + /* Not conditional branch. */ + inst = 0; + break; + } + + jump->flags |= flags; + if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != delay_check)) + jump->flags |= IS_MOVABLE; + + if (inst) + PTR_FAIL_IF(push_inst(compiler, inst, UNMOVABLE_INS)); + + if (type <= SLJIT_JUMP) + PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); + else { + jump->flags |= IS_JAL; + PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + } + + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + + /* Maximum number of instructions required for generating a constant. */ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->size += 2; +#else + compiler->size += 6; +#endif + return jump; +} + +#define RESOLVE_IMM1() \ + if (src1 & SLJIT_IMM) { \ + if (src1w) { \ + PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); \ + src1 = TMP_REG1; \ + } \ + else \ + src1 = 0; \ + } + +#define RESOLVE_IMM2() \ + if (src2 & SLJIT_IMM) { \ + if (src2w) { \ + PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG2), src2w)); \ + src2 = TMP_REG2; \ + } \ + else \ + src2 = 0; \ + } + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + struct sljit_jump *jump; + sljit_s32 flags; + sljit_ins inst; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + flags = WORD_DATA | LOAD_DATA; +#else /* !SLJIT_CONFIG_MIPS_32 */ + flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; +#endif /* SLJIT_CONFIG_MIPS_32 */ + + if (src1 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w)); + src1 = TMP_REG1; + } + + if (src2 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG2), src2, src2w, 0, 0)); + src2 = TMP_REG2; + } + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if (type <= SLJIT_NOT_EQUAL) { + RESOLVE_IMM1(); + RESOLVE_IMM2(); + jump->flags |= IS_BIT26_COND; + if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != DR(src1) && compiler->delay_slot != DR(src2))) + jump->flags |= IS_MOVABLE; + PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(src1) | T(src2) | BRANCH_LENGTH, UNMOVABLE_INS)); + } + else if (type >= SLJIT_SIG_LESS && (((src1 & SLJIT_IMM) && (src1w == 0)) || ((src2 & SLJIT_IMM) && (src2w == 0)))) { + inst = NOP; + if ((src1 & SLJIT_IMM) && (src1w == 0)) { + RESOLVE_IMM2(); + switch (type) { + case SLJIT_SIG_LESS: + inst = BLEZ; + jump->flags |= IS_BIT26_COND; + break; + case SLJIT_SIG_GREATER_EQUAL: + inst = BGTZ; + jump->flags |= IS_BIT26_COND; + break; + case SLJIT_SIG_GREATER: + inst = BGEZ; + jump->flags |= IS_BIT16_COND; + break; + case SLJIT_SIG_LESS_EQUAL: + inst = BLTZ; + jump->flags |= IS_BIT16_COND; + break; + } + src1 = src2; + } + else { + RESOLVE_IMM1(); + switch (type) { + case SLJIT_SIG_LESS: + inst = BGEZ; + jump->flags |= IS_BIT16_COND; + break; + case SLJIT_SIG_GREATER_EQUAL: + inst = BLTZ; + jump->flags |= IS_BIT16_COND; + break; + case SLJIT_SIG_GREATER: + inst = BLEZ; + jump->flags |= IS_BIT26_COND; + break; + case SLJIT_SIG_LESS_EQUAL: + inst = BGTZ; + jump->flags |= IS_BIT26_COND; + break; + } + } + PTR_FAIL_IF(push_inst(compiler, inst | S(src1) | BRANCH_LENGTH, UNMOVABLE_INS)); + } + else { + if (type == SLJIT_LESS || type == SLJIT_GREATER_EQUAL || type == SLJIT_SIG_LESS || type == SLJIT_SIG_GREATER_EQUAL) { + RESOLVE_IMM1(); + if ((src2 & SLJIT_IMM) && src2w <= SIMM_MAX && src2w >= SIMM_MIN) + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src1) | T(TMP_REG1) | IMM(src2w), DR(TMP_REG1))); + else { + RESOLVE_IMM2(); + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTU : SLT) | S(src1) | T(src2) | D(TMP_REG1), DR(TMP_REG1))); + } + type = (type == SLJIT_LESS || type == SLJIT_SIG_LESS) ? SLJIT_NOT_EQUAL : SLJIT_EQUAL; + } + else { + RESOLVE_IMM2(); + if ((src1 & SLJIT_IMM) && src1w <= SIMM_MAX && src1w >= SIMM_MIN) + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src2) | T(TMP_REG1) | IMM(src1w), DR(TMP_REG1))); + else { + RESOLVE_IMM1(); + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTU : SLT) | S(src2) | T(src1) | D(TMP_REG1), DR(TMP_REG1))); + } + type = (type == SLJIT_GREATER || type == SLJIT_SIG_GREATER) ? SLJIT_NOT_EQUAL : SLJIT_EQUAL; + } + + jump->flags |= IS_BIT26_COND; + PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(TMP_REG1) | TA(0) | BRANCH_LENGTH, UNMOVABLE_INS)); + } + + PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + + /* Maximum number of instructions required for generating a constant. */ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->size += 2; +#else + compiler->size += 6; +#endif + return jump; +} + +#undef RESOLVE_IMM1 +#undef RESOLVE_IMM2 + +#undef BRANCH_LENGTH +#undef BR_Z +#undef BR_NZ +#undef BR_T +#undef BR_F + +#undef FLOAT_DATA +#undef FMT + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump = NULL; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + + if (src & SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0)); + jump->u.target = (sljit_uw)srcw; + + if (compiler->delay_slot != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; + + src = TMP_REG2; + } else if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(TMP_REG2), src, srcw)); + src = TMP_REG2; + } + + if (type <= SLJIT_JUMP) + FAIL_IF(push_inst(compiler, JR | S(src), UNMOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, JALR | S(src) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + + if (jump != NULL) { + jump->addr = compiler->size; + + /* Maximum number of instructions required for generating a constant. */ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->size += 2; +#else + compiler->size += 6; +#endif + } + + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 src_ar, dst_ar, invert; + sljit_s32 saved_op = op; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + sljit_s32 mem_type = WORD_DATA; +#else + sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + op = GET_OPCODE(op); + dst_ar = DR((op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) + FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, DR(TMP_REG1), dst, dstw, dst, dstw)); + + if (type < SLJIT_F_EQUAL) { + src_ar = OTHER_FLAG; + invert = type & 0x1; + + switch (type) { + case SLJIT_EQUAL: + case SLJIT_NOT_EQUAL: + FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; + break; + case SLJIT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) { + src_ar = OTHER_FLAG; + break; + } + FAIL_IF(push_inst(compiler, SLTIU | SA(OTHER_FLAG) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; + invert ^= 0x1; + break; + } + } else { + invert = 0; + + switch (type) { + case SLJIT_F_NOT_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED: + invert = 1; + break; + } + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + FAIL_IF(push_inst(compiler, MFC1 | TA(dst_ar) | FS(TMP_FREG3), dst_ar)); +#else /* SLJIT_MIPS_REV < 6 */ + FAIL_IF(push_inst(compiler, CFC1 | TA(dst_ar) | DA(FCSR_REG), dst_ar)); +#endif /* SLJIT_MIPS_REV >= 6 */ + FAIL_IF(push_inst(compiler, SRL | TA(dst_ar) | DA(dst_ar) | SH_IMM(23), dst_ar)); + FAIL_IF(push_inst(compiler, ANDI | SA(dst_ar) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; + } + + if (invert) { + FAIL_IF(push_inst(compiler, XORI | SA(src_ar) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; + } + + if (op < SLJIT_ADD) { + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_type, src_ar, dst, dstw); + + if (src_ar != dst_ar) + return push_inst(compiler, ADDU_W | SA(src_ar) | TA(0) | DA(dst_ar), dst_ar); + return SLJIT_SUCCESS; + } + + /* OTHER_FLAG cannot be specified as src2 argument at the moment. */ + if (DR(TMP_REG2) != src_ar) + FAIL_IF(push_inst(compiler, ADDU_W | SA(src_ar) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + + mem_type |= CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE; + + if (dst & SLJIT_MEM) + return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); + return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, TMP_REG2, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) + sljit_ins ins; +#endif /* SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6) + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (type & SLJIT_32) + srcw = (sljit_s32)srcw; +#endif + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); + src = TMP_REG1; + srcw = 0; + } + + switch (type & ~SLJIT_32) { + case SLJIT_EQUAL: + ins = MOVZ | TA(EQUAL_FLAG); + break; + case SLJIT_NOT_EQUAL: + ins = MOVN | TA(EQUAL_FLAG); + break; + case SLJIT_LESS: + case SLJIT_GREATER: + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER: + case SLJIT_OVERFLOW: + ins = MOVN | TA(OTHER_FLAG); + break; + case SLJIT_GREATER_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_NOT_OVERFLOW: + ins = MOVZ | TA(OTHER_FLAG); + break; + case SLJIT_F_EQUAL: + case SLJIT_F_LESS: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_UNORDERED: + ins = MOVT; + break; + case SLJIT_F_NOT_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED: + ins = MOVF; + break; + default: + ins = MOVZ | TA(OTHER_FLAG); + SLJIT_UNREACHABLE(); + break; + } + + return push_inst(compiler, ins | S(src) | D(dst_reg), DR(dst_reg)); + +#else /* SLJIT_MIPS_REV < 1 || SLJIT_MIPS_REV >= 6 */ + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); +#endif /* SLJIT_MIPS_REV >= 1 */ +} + +static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s16 max_offset) +{ + sljit_s32 arg = *mem; + sljit_sw argw = *memw; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (SLJIT_UNLIKELY(argw)) { + FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | D(TMP_REG1) | SH_IMM(argw), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG1) | T(arg & REG_MASK) | D(TMP_REG1), DR(TMP_REG1))); + } else + FAIL_IF(push_inst(compiler, ADDU_W | S(arg & REG_MASK) | T(OFFS_REG(arg)) | D(TMP_REG1), DR(TMP_REG1))); + + *mem = TMP_REG1; + *memw = 0; + + return SLJIT_SUCCESS; + } + + if (argw <= max_offset && argw >= SIMM_MIN) { + *mem = arg & REG_MASK; + return SLJIT_SUCCESS; + } + + *mem = TMP_REG1; + + if ((sljit_s16)argw > max_offset) { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), argw)); + *memw = 0; + } else { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), TO_ARGW_HI(argw))); + *memw = (sljit_s16)argw; + } + + if ((arg & REG_MASK) == 0) + return SLJIT_SUCCESS; + + return push_inst(compiler, ADDU_W | S(TMP_REG1) | T(arg & REG_MASK) | D(TMP_REG1), DR(TMP_REG1)); +} + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define MEM16_IMM_FIRST(memw) IMM((memw) + 1) +#define MEM16_IMM_SECOND(memw) IMM(memw) +#define MEMF64_FS_FIRST(freg) FS(freg) +#define MEMF64_FS_SECOND(freg) (FS(freg) | ((sljit_ins)1 << 11)) +#else /* !SLJIT_LITTLE_ENDIAN */ +#define MEM16_IMM_FIRST(memw) IMM(memw) +#define MEM16_IMM_SECOND(memw) IMM((memw) + 1) +#define MEMF64_FS_FIRST(freg) (FS(freg) | ((sljit_ins)1 << 11)) +#define MEMF64_FS_SECOND(freg) FS(freg) +#endif /* SLJIT_LITTLE_ENDIAN */ + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define MEM_CHECK_UNALIGNED(type) ((type) & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16)) +#else /* !SLJIT_CONFIG_MIPS_32 */ +#define MEM_CHECK_UNALIGNED(type) ((type) & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16 | SLJIT_MEM_UNALIGNED_32)) +#endif /* SLJIT_CONFIG_MIPS_32 */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 op = type & 0xff; + sljit_s32 flags = 0; + sljit_ins ins; +#if !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + sljit_ins ins_right; +#endif /* !(SLJIT_MIPS_REV >= 6) */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (reg & REG_PAIR_MASK) { + ADJUST_LOCAL_OFFSET(mem, memw); + +#if !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + if (MEM_CHECK_UNALIGNED(type)) { + FAIL_IF(update_mem_addr(compiler, &mem, &memw, SIMM_MAX - (2 * SSIZE_OF(sw) - 1))); + + if (!(type & SLJIT_MEM_STORE) && (mem == REG_PAIR_FIRST(reg) || mem == REG_PAIR_SECOND(reg))) { + FAIL_IF(push_inst(compiler, ADDU_W | S(mem) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + mem = TMP_REG1; + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + ins = ((type & SLJIT_MEM_STORE) ? SWL : LWL) | S(mem); + ins_right = ((type & SLJIT_MEM_STORE) ? SWR : LWR) | S(mem); +#else /* !SLJIT_CONFIG_MIPS_32 */ + ins = ((type & SLJIT_MEM_STORE) ? SDL : LDL) | S(mem); + ins_right = ((type & SLJIT_MEM_STORE) ? SDR : LDR) | S(mem); +#endif /* SLJIT_CONFIG_MIPS_32 */ + + FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_FIRST(reg)) | IMM(memw), DR(REG_PAIR_FIRST(reg)))); + FAIL_IF(push_inst(compiler, ins_right | T(REG_PAIR_FIRST(reg)) | IMM(memw + (SSIZE_OF(sw) - 1)), DR(REG_PAIR_FIRST(reg)))); + FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_SECOND(reg)) | IMM(memw + SSIZE_OF(sw)), DR(REG_PAIR_SECOND(reg)))); + return push_inst(compiler, ins_right | T(REG_PAIR_SECOND(reg)) | IMM((memw + 2 * SSIZE_OF(sw) - 1)), DR(REG_PAIR_SECOND(reg))); + } +#endif /* !(SLJIT_MIPS_REV >= 6) */ + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, SIMM_MAX - SSIZE_OF(sw))); + + ins = ((type & SLJIT_MEM_STORE) ? STORE_W : LOAD_W) | S(mem); + + if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) { + FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_SECOND(reg)) | IMM(memw + SSIZE_OF(sw)), DR(REG_PAIR_SECOND(reg)))); + return push_inst(compiler, ins | T(REG_PAIR_FIRST(reg)) | IMM(memw), DR(REG_PAIR_FIRST(reg))); + } + + FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_FIRST(reg)) | IMM(memw), DR(REG_PAIR_FIRST(reg)))); + return push_inst(compiler, ins | T(REG_PAIR_SECOND(reg)) | IMM(memw + SSIZE_OF(sw)), DR(REG_PAIR_SECOND(reg))); + } + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); +#else /* !(SLJIT_MIPS_REV >= 6) */ + ADJUST_LOCAL_OFFSET(mem, memw); + + switch (op) { + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + flags = BYTE_DATA; + if (!(type & SLJIT_MEM_STORE)) + flags |= LOAD_DATA; + + if (op == SLJIT_MOV_S8) + flags |= SIGNED_DATA; + + return emit_op_mem(compiler, flags, DR(reg), mem, memw); + + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + FAIL_IF(update_mem_addr(compiler, &mem, &memw, SIMM_MAX - 1)); + SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2); + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, SRA_W | T(reg) | D(TMP_REG2) | SH_IMM(8), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(TMP_REG2) | MEM16_IMM_FIRST(memw), MOVABLE_INS)); + return push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(reg) | MEM16_IMM_SECOND(memw), MOVABLE_INS); + } + + flags = BYTE_DATA | LOAD_DATA; + + if (op == SLJIT_MOV_S16) + flags |= SIGNED_DATA; + + FAIL_IF(push_inst(compiler, data_transfer_insts[flags] | S(mem) | T(TMP_REG2) | MEM16_IMM_FIRST(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA | LOAD_DATA] | S(mem) | T(reg) | MEM16_IMM_SECOND(memw), DR(reg))); + FAIL_IF(push_inst(compiler, SLL_W | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(8), DR(TMP_REG2))); + return push_inst(compiler, OR | S(reg) | T(TMP_REG2) | D(reg), DR(reg)); + + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (type & SLJIT_MEM_UNALIGNED_32) { + flags = WORD_DATA; + if (!(type & SLJIT_MEM_STORE)) + flags |= LOAD_DATA; + + return emit_op_mem(compiler, flags, DR(reg), mem, memw); + } +#else /* !SLJIT_CONFIG_MIPS_32 */ + FAIL_IF(update_mem_addr(compiler, &mem, &memw, SIMM_MAX - 7)); + SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2); + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, SDL | S(mem) | T(reg) | IMM(memw), MOVABLE_INS)); + return push_inst(compiler, SDR | S(mem) | T(reg) | IMM(memw + 7), MOVABLE_INS); + } + + if (mem == reg) { + FAIL_IF(push_inst(compiler, ADDU_W | S(mem) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + mem = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, LDL | S(mem) | T(reg) | IMM(memw), DR(reg))); + return push_inst(compiler, LDR | S(mem) | T(reg) | IMM(memw + 7), DR(reg)); +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, SIMM_MAX - 3)); + SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2); + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, SWL | S(mem) | T(reg) | IMM(memw), MOVABLE_INS)); + return push_inst(compiler, SWR | S(mem) | T(reg) | IMM(memw + 3), MOVABLE_INS); + } + + if (mem == reg) { + FAIL_IF(push_inst(compiler, ADDU_W | S(mem) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + mem = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(reg) | IMM(memw), DR(reg))); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return push_inst(compiler, LWR | S(mem) | T(reg) | IMM(memw + 3), DR(reg)); +#else /* !SLJIT_CONFIG_MIPS_32 */ + FAIL_IF(push_inst(compiler, LWR | S(mem) | T(reg) | IMM(memw + 3), DR(reg))); + + if (op != SLJIT_MOV_U32) + return SLJIT_SUCCESS; + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + return push_inst(compiler, DINSU | T(reg) | SA(0) | (31 << 11) | (0 << 11), DR(reg)); +#else /* SLJIT_MIPS_REV < 1 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(reg) | D(reg) | SH_IMM(0), DR(reg))); + return push_inst(compiler, DSRL32 | T(reg) | D(reg) | SH_IMM(0), DR(reg)); +#endif /* SLJIT_MIPS_REV >= 2 */ +#endif /* SLJIT_CONFIG_MIPS_32 */ +#endif /* SLJIT_MIPS_REV >= 6 */ +} + +#if !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, SIMM_MAX - (type & SLJIT_32) ? 3 : 7)); + SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2); + + if (type & SLJIT_MEM_STORE) { + if (type & SLJIT_32) { + FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2))); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM(memw), MOVABLE_INS)); + return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), MOVABLE_INS); + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | MEMF64_FS_FIRST(freg), DR(TMP_REG2))); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM(memw), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), MOVABLE_INS)); + + FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | MEMF64_FS_SECOND(freg), DR(TMP_REG2))); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM(memw + 4), MOVABLE_INS)); + return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM(memw + 7), MOVABLE_INS); +#else /* !SLJIT_CONFIG_MIPS_32 */ + FAIL_IF(push_inst(compiler, MFC1 | (1 << 21) | T(TMP_REG2) | FS(freg), DR(TMP_REG2))); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + FAIL_IF(push_inst(compiler, SDL | S(mem) | T(TMP_REG2) | IMM(memw), MOVABLE_INS)); + return push_inst(compiler, SDR | S(mem) | T(TMP_REG2) | IMM(memw + 7), MOVABLE_INS); +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + + if (type & SLJIT_32) { + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), DR(TMP_REG2))); + + FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS)); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | MEMF64_FS_FIRST(freg), MOVABLE_INS)); + + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM(memw + 4), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM(memw + 7), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | MEMF64_FS_SECOND(freg), MOVABLE_INS)); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif +#else /* !SLJIT_CONFIG_MIPS_32 */ + FAIL_IF(push_inst(compiler, LDL | S(mem) | T(TMP_REG2) | IMM(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LDR | S(mem) | T(TMP_REG2) | IMM(memw + 7), DR(TMP_REG2))); + + FAIL_IF(push_inst(compiler, MTC1 | (1 << 21) | T(TMP_REG2) | FS(freg), MOVABLE_INS)); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif +#endif /* SLJIT_CONFIG_MIPS_32 */ + return SLJIT_SUCCESS; +} + +#endif /* !SLJIT_MIPS_REV || SLJIT_MIPS_REV < 6 */ + +#undef MEM16_IMM_FIRST +#undef MEM16_IMM_SECOND +#undef MEMF64_FS_FIRST +#undef MEMF64_FS_SECOND +#undef MEM_CHECK_UNALIGNED + +#undef TO_ARGW_HI + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(emit_const(compiler, dst_r, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, DR(TMP_REG2), dst, dstw)); + + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r, UNMOVABLE_INS)); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->size += 1; +#else + compiler->size += 5; +#endif + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, DR(TMP_REG2), dst, dstw)); + + return put_label; +} diff --git a/contrib/libs/pcre2/src/sljit/sljitNativePPC_32.c b/contrib/libs/pcre2/src/sljit/sljitNativePPC_32.c new file mode 100644 index 0000000000..9449e4b9d7 --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitNativePPC_32.c @@ -0,0 +1,340 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ppc 32-bit arch dependent functions. */ + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) +{ + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); + + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm)); + + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; +} + +/* Simplified mnemonics: clrlwi. */ +#define INS_CLEAR_LEFT(dst, src, from) \ + (RLWINM | S(src) | A(dst) | RLWI_MBE(from, 31)) + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) +{ + sljit_u32 imm; + + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV_P: + SLJIT_ASSERT(src1 == TMP_REG1); + if (dst != src2) + return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S8) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); + } + else if ((flags & REG_DEST) && op == SLJIT_MOV_S8) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S16) + return push_inst(compiler, EXTSH | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, CNTLZW | S(src2) | A(dst)); + + case SLJIT_CTZ: + SLJIT_ASSERT(src1 == TMP_REG1); + FAIL_IF(push_inst(compiler, NEG | D(TMP_REG1) | A(src2))); + FAIL_IF(push_inst(compiler, AND | S(src2) | A(dst) | B(TMP_REG1))); + FAIL_IF(push_inst(compiler, CNTLZW | S(dst) | A(dst))); + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG1) | A(dst) | IMM(-32))); + /* The highest bits are set, if dst < 32, zero otherwise. */ + FAIL_IF(push_inst(compiler, SRWI(27) | S(TMP_REG1) | A(TMP_REG1))); + return push_inst(compiler, XOR | S(dst) | A(dst) | B(TMP_REG1)); + + case SLJIT_ADD: + if (flags & ALT_FORM1) { + /* Setting XER SO is not enough, CR SO is also needed. */ + return push_inst(compiler, ADD | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + } + + if (flags & ALT_FORM2) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + + if (flags & ALT_FORM3) + return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); + + imm = compiler->imm; + + if (flags & ALT_FORM4) { + FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((imm >> 16) & 0xffff) + ((imm >> 15) & 0x1)))); + src1 = dst; + } + + return push_inst(compiler, ADDI | D(dst) | A(src1) | (imm & 0xffff)); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); + } + SLJIT_ASSERT(!(flags & ALT_FORM4)); + if (!(flags & ALT_SET_FLAGS)) + return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); + if (flags & ALT_FORM5) + return push_inst(compiler, ADDC | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + return push_inst(compiler, ADD | RC(flags) | D(dst) | A(src1) | B(src2)); + + case SLJIT_ADDC: + return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)); + + case SLJIT_SUB: + if (flags & ALT_FORM1) { + if (flags & ALT_FORM2) { + FAIL_IF(push_inst(compiler, CMPLI | CRD(0) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM3)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMPL | CRD(0) | A(src1) | B(src2))); + if (!(flags & ALT_FORM3)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM2) { + if (flags & ALT_FORM3) { + FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2))); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM3) { + /* Setting XER SO is not enough, CR SO is also needed. */ + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); + } + + if (flags & ALT_FORM4) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); + } + + if (!(flags & ALT_SET_FLAGS)) { + SLJIT_ASSERT(src1 != TMP_ZERO); + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM5) + return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); + + case SLJIT_SUBC: + return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); + + case SLJIT_MUL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm); + } + return push_inst(compiler, MULLW | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1)); + + case SLJIT_AND: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm); + } + return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_OR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(imm))); + return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(imm >> 16)); + } + return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_XOR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(imm))); + return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(imm >> 16)); + } + return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_SHL: + case SLJIT_MSHL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm & 0x1f; + return push_inst(compiler, SLWI(imm) | RC(flags) | S(src1) | A(dst)); + } + + if (op == SLJIT_MSHL) { + FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x1f)); + src2 = TMP_REG2; + } + + return push_inst(compiler, SLW | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_LSHR: + case SLJIT_MLSHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm & 0x1f; + /* Since imm can be 0, SRWI() cannot be used. */ + return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | RLWI_SH((32 - imm) & 0x1f) | RLWI_MBE(imm, 31)); + } + + if (op == SLJIT_MLSHR) { + FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x1f)); + src2 = TMP_REG2; + } + + return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_ASHR: + case SLJIT_MASHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm & 0x1f; + return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (imm << 11)); + } + + if (op == SLJIT_MASHR) { + FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x1f)); + src2 = TMP_REG2; + } + + return push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_ROTL: + case SLJIT_ROTR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + if (op == SLJIT_ROTR) + imm = (sljit_u32)(-(sljit_s32)imm); + + imm &= 0x1f; + return push_inst(compiler, RLWINM | S(src1) | A(dst) | RLWI_SH(imm) | RLWI_MBE(0, 31)); + } + + if (op == SLJIT_ROTR) { + FAIL_IF(push_inst(compiler, SUBFIC | D(TMP_REG2) | A(src2) | 0)); + src2 = TMP_REG2; + } + + return push_inst(compiler, RLWNM | S(src1) | A(dst) | B(src2) | RLWI_MBE(0, 31)); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 16))); + return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins *)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); + SLJIT_ASSERT((inst[0] & 0xfc1f0000) == ADDIS && (inst[1] & 0xfc000000) == ORI); + inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} diff --git a/contrib/libs/pcre2/src/sljit/sljitNativePPC_64.c b/contrib/libs/pcre2/src/sljit/sljitNativePPC_64.c new file mode 100644 index 0000000000..80549108bf --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitNativePPC_64.c @@ -0,0 +1,579 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ppc 64-bit arch dependent functions. */ + +#if defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM) +#define ASM_SLJIT_CLZ(src, dst) \ + __asm__ volatile ( "cntlzd %0, %1" : "=r"(dst) : "r"(src) ) +#elif defined(__xlc__) +#error "Please enable GCC syntax for inline assembly statements" +#else +#error "Must implement count leading zeroes" +#endif + +/* Computes SLDI(63 - shift). */ +#define PUSH_SLDI_NEG(reg, shift) \ + push_inst(compiler, RLDICR | S(reg) | A(reg) | RLDI_SH(63 - shift) | RLDI_ME(shift)) + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) +{ + sljit_uw tmp; + sljit_uw shift; + sljit_uw tmp2; + sljit_uw shift2; + + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); + + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm)); + + if (imm <= 0x7fffffffl && imm >= -0x80000000l) { + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; + } + + /* Count leading zeroes. */ + tmp = (sljit_uw)((imm >= 0) ? imm : ~imm); + ASM_SLJIT_CLZ(tmp, shift); + SLJIT_ASSERT(shift > 0); + shift--; + tmp = ((sljit_uw)imm << shift); + + if ((tmp & ~0xffff000000000000ul) == 0) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); + shift += 15; + return PUSH_SLDI_NEG(reg, shift); + } + + if ((tmp & ~0xffffffff00000000ul) == 0) { + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32))); + shift += 31; + return PUSH_SLDI_NEG(reg, shift); + } + + /* Cut out the 16 bit from immediate. */ + shift += 15; + tmp2 = (sljit_uw)imm & (((sljit_uw)1 << (63 - shift)) - 1); + + if (tmp2 <= 0xffff) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); + FAIL_IF(PUSH_SLDI_NEG(reg, shift)); + return push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)tmp2); + } + + if (tmp2 <= 0xffffffff) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(PUSH_SLDI_NEG(reg, shift)); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS; + } + + ASM_SLJIT_CLZ(tmp2, shift2); + tmp2 <<= shift2; + + if ((tmp2 & ~0xffff000000000000ul) == 0) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); + shift2 += 15; + shift += (63 - shift2); + FAIL_IF(PUSH_SLDI_NEG(reg, shift)); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 48))); + return PUSH_SLDI_NEG(reg, shift2); + } + + /* The general version. */ + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)((sljit_uw)imm >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32))); + FAIL_IF(PUSH_SLDI_NEG(reg, 31)); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16))); + return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)); +} + +#undef PUSH_SLDI_NEG + +#define CLRLDI(dst, src, n) \ + (RLDICL | S(src) | A(dst) | RLDI_SH(0) | RLDI_MB(n)) + +/* Sign extension for integer operations. */ +#define UN_EXTS() \ + if ((flags & (ALT_SIGN_EXT | REG2_SOURCE)) == (ALT_SIGN_EXT | REG2_SOURCE)) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \ + src2 = TMP_REG2; \ + } + +#define BIN_EXTS() \ + if (flags & ALT_SIGN_EXT) { \ + if (flags & REG1_SOURCE) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \ + src1 = TMP_REG1; \ + } \ + if (flags & REG2_SOURCE) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \ + src2 = TMP_REG2; \ + } \ + } + +#define BIN_IMM_EXTS() \ + if ((flags & (ALT_SIGN_EXT | REG1_SOURCE)) == (ALT_SIGN_EXT | REG1_SOURCE)) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \ + src1 = TMP_REG1; \ + } + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) +{ + sljit_u32 imm; + + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: + SLJIT_ASSERT(src1 == TMP_REG1); + if (dst != src2) + return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S32) + return push_inst(compiler, EXTSW | S(src2) | A(dst)); + return push_inst(compiler, CLRLDI(dst, src2, 32)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S8) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + return push_inst(compiler, CLRLDI(dst, src2, 56)); + } + else if ((flags & REG_DEST) && op == SLJIT_MOV_S8) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_S16) + return push_inst(compiler, EXTSH | S(src2) | A(dst)); + return push_inst(compiler, CLRLDI(dst, src2, 48)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1); + UN_EXTS(); + return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, ((flags & ALT_FORM1) ? CNTLZW : CNTLZD) | S(src2) | A(dst)); + + case SLJIT_CTZ: + SLJIT_ASSERT(src1 == TMP_REG1); + FAIL_IF(push_inst(compiler, NEG | D(TMP_REG1) | A(src2))); + FAIL_IF(push_inst(compiler, AND | S(src2) | A(dst) | B(TMP_REG1))); + FAIL_IF(push_inst(compiler, ((flags & ALT_FORM1) ? CNTLZW : CNTLZD) | S(dst) | A(dst))); + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG1) | A(dst) | IMM((flags & ALT_FORM1) ? -32 : -64))); + /* The highest bits are set, if dst < bit width, zero otherwise. */ + FAIL_IF(push_inst(compiler, ((flags & ALT_FORM1) ? SRWI(27) : SRDI(58)) | S(TMP_REG1) | A(TMP_REG1))); + return push_inst(compiler, XOR | S(dst) | A(dst) | B(TMP_REG1)); + + case SLJIT_ADD: + if (flags & ALT_FORM1) { + if (flags & ALT_SIGN_EXT) { + FAIL_IF(push_inst(compiler, SLDI(32) | S(src1) | A(TMP_REG1))); + src1 = TMP_REG1; + FAIL_IF(push_inst(compiler, SLDI(32) | S(src2) | A(TMP_REG2))); + src2 = TMP_REG2; + } + /* Setting XER SO is not enough, CR SO is also needed. */ + FAIL_IF(push_inst(compiler, ADD | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2))); + if (flags & ALT_SIGN_EXT) + return push_inst(compiler, SRDI(32) | S(dst) | A(dst)); + return SLJIT_SUCCESS; + } + + if (flags & ALT_FORM2) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + + if (flags & ALT_FORM3) + return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); + + imm = compiler->imm; + + if (flags & ALT_FORM4) { + FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((imm >> 16) & 0xffff) + ((imm >> 15) & 0x1)))); + src1 = dst; + } + + return push_inst(compiler, ADDI | D(dst) | A(src1) | (imm & 0xffff)); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + BIN_IMM_EXTS(); + return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM4) { + if (flags & ALT_FORM5) + FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm)); + else + FAIL_IF(push_inst(compiler, ADD | D(dst) | A(src1) | B(src2))); + return push_inst(compiler, CMPI | A(dst) | 0); + } + if (!(flags & ALT_SET_FLAGS)) + return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); + BIN_EXTS(); + if (flags & ALT_FORM5) + return push_inst(compiler, ADDC | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + return push_inst(compiler, ADD | RC(flags) | D(dst) | A(src1) | B(src2)); + + case SLJIT_ADDC: + BIN_EXTS(); + return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)); + + case SLJIT_SUB: + if (flags & ALT_FORM1) { + if (flags & ALT_FORM2) { + FAIL_IF(push_inst(compiler, CMPLI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM3)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMPL | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); + if (!(flags & ALT_FORM3)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM2) { + if (flags & ALT_FORM3) { + FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM3) { + if (flags & ALT_SIGN_EXT) { + if (src1 != TMP_ZERO) { + FAIL_IF(push_inst(compiler, SLDI(32) | S(src1) | A(TMP_REG1))); + src1 = TMP_REG1; + } + if (src2 != TMP_ZERO) { + FAIL_IF(push_inst(compiler, SLDI(32) | S(src2) | A(TMP_REG2))); + src2 = TMP_REG2; + } + } + + /* Setting XER SO is not enough, CR SO is also needed. */ + if (src1 != TMP_ZERO) + FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1))); + else + FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2))); + + if (flags & ALT_SIGN_EXT) + return push_inst(compiler, SRDI(32) | S(dst) | A(dst)); + return SLJIT_SUCCESS; + } + + if (flags & ALT_FORM4) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); + } + + if (!(flags & ALT_SET_FLAGS)) { + SLJIT_ASSERT(src1 != TMP_ZERO); + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + BIN_EXTS(); + if (flags & ALT_FORM5) + return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); + + case SLJIT_SUBC: + BIN_EXTS(); + return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); + + case SLJIT_MUL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm); + } + BIN_EXTS(); + if (flags & ALT_FORM2) + return push_inst(compiler, MULLW | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, MULLD | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1)); + + case SLJIT_AND: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm); + } + return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_OR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(imm))); + return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(imm >> 16)); + } + return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_XOR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(imm))); + return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(imm >> 16)); + } + return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_SHL: + case SLJIT_MSHL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + if (flags & ALT_FORM2) { + imm &= 0x1f; + return push_inst(compiler, SLWI(imm) | RC(flags) | S(src1) | A(dst)); + } + + imm &= 0x3f; + return push_inst(compiler, SLDI(imm) | RC(flags) | S(src1) | A(dst)); + } + + if (op == SLJIT_MSHL) { + FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | ((flags & ALT_FORM2) ? 0x1f : 0x3f))); + src2 = TMP_REG2; + } + + return push_inst(compiler, ((flags & ALT_FORM2) ? SLW : SLD) | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_LSHR: + case SLJIT_MLSHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + if (flags & ALT_FORM2) { + imm &= 0x1f; + /* Since imm can be 0, SRWI() cannot be used. */ + return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | RLWI_SH((32 - imm) & 0x1f) | RLWI_MBE(imm, 31)); + } + + imm &= 0x3f; + /* Since imm can be 0, SRDI() cannot be used. */ + return push_inst(compiler, RLDICL | RC(flags) | S(src1) | A(dst) | RLDI_SH((64 - imm) & 0x3f) | RLDI_MB(imm)); + } + + if (op == SLJIT_MLSHR) { + FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | ((flags & ALT_FORM2) ? 0x1f : 0x3f))); + src2 = TMP_REG2; + } + + return push_inst(compiler, ((flags & ALT_FORM2) ? SRW : SRD) | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_ASHR: + case SLJIT_MASHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + if (flags & ALT_FORM2) { + imm &= 0x1f; + return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (imm << 11)); + } + + imm &= 0x3f; + return push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | RLDI_SH(imm)); + } + + if (op == SLJIT_MASHR) { + FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | ((flags & ALT_FORM2) ? 0x1f : 0x3f))); + src2 = TMP_REG2; + } + + return push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_ROTL: + case SLJIT_ROTR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + imm = compiler->imm; + + if (op == SLJIT_ROTR) + imm = (sljit_u32)(-(sljit_s32)imm); + + if (flags & ALT_FORM2) { + imm &= 0x1f; + return push_inst(compiler, RLWINM | S(src1) | A(dst) | RLWI_SH(imm) | RLWI_MBE(0, 31)); + } + + imm &= 0x3f; + return push_inst(compiler, RLDICL | S(src1) | A(dst) | RLDI_SH(imm)); + } + + if (op == SLJIT_ROTR) { + FAIL_IF(push_inst(compiler, SUBFIC | D(TMP_REG2) | A(src2) | 0)); + src2 = TMP_REG2; + } + + return push_inst(compiler, ((flags & ALT_FORM2) ? (RLWNM | RLWI_MBE(0, 31)) : (RLDCL | RLDI_MB(0))) | S(src1) | A(dst) | B(src2)); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) +{ + sljit_s32 arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 types = 0; + sljit_s32 reg = 0; + + if (src) + reg = *src & REG_MASK; + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: + arg_count++; + break; + default: + arg_count++; + word_arg_count++; + + if (arg_count != word_arg_count && arg_count == reg) { + FAIL_IF(push_inst(compiler, OR | S(reg) | A(TMP_CALL_REG) | B(reg))); + *src = TMP_CALL_REG; + } + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + while (types) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: + arg_count--; + break; + default: + if (arg_count != word_arg_count) + FAIL_IF(push_inst(compiler, OR | S(word_arg_count) | A(arg_count) | B(word_arg_count))); + + arg_count--; + word_arg_count--; + break; + } + + types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value >> 32))); + FAIL_IF(push_inst(compiler, SLDI(32) | S(reg) | A(reg))); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(init_value >> 16))); + return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins*)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0); + inst[0] = (inst[0] & 0xffff0000u) | ((sljit_ins)(new_target >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000u) | ((sljit_ins)(new_target >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000u) | ((sljit_ins)(new_target >> 16) & 0xffff); + inst[4] = (inst[4] & 0xffff0000u) | ((sljit_ins)new_target & 0xffff); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 5); +} diff --git a/contrib/libs/pcre2/src/sljit/sljitNativePPC_common.c b/contrib/libs/pcre2/src/sljit/sljitNativePPC_common.c new file mode 100644 index 0000000000..790faffe46 --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitNativePPC_common.c @@ -0,0 +1,2851 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ + return "PowerPC" SLJIT_CPUINFO; +} + +/* Length of an instruction word. + Both for ppc-32 and ppc-64. */ +typedef sljit_u32 sljit_ins; + +#if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \ + || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define SLJIT_PPC_STACK_FRAME_V2 1 +#endif + +#ifdef _AIX +#error #include <sys/cache.h> +#endif + +#if (defined _CALL_ELF && _CALL_ELF == 2) +#define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1 +#endif + +#if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) + +static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) +{ +#ifdef _AIX + _sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from)); +#elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM) +# if defined(_ARCH_PWR) || defined(_ARCH_PWR2) + /* Cache flush for POWER architecture. */ + while (from < to) { + __asm__ volatile ( + "clf 0, %0\n" + "dcs\n" + : : "r"(from) + ); + from++; + } + __asm__ volatile ( "ics" ); +# elif defined(_ARCH_COM) && !defined(_ARCH_PPC) +# error "Cache flush is not implemented for PowerPC/POWER common mode." +# else + /* Cache flush for PowerPC architecture. */ + while (from < to) { + __asm__ volatile ( + "dcbf 0, %0\n" + "sync\n" + "icbi 0, %0\n" + : : "r"(from) + ); + from++; + } + __asm__ volatile ( "isync" ); +# endif +# ifdef __xlc__ +# warning "This file may fail to compile if -qfuncsect is used" +# endif +#elif defined(__xlc__) +#error "Please enable GCC syntax for inline assembly statements with -qasm=gcc" +#else +#error "This platform requires a cache flush implementation." +#endif /* _AIX */ +} + +#endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */ + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_ZERO (SLJIT_NUMBER_OF_REGISTERS + 4) + +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) +#define TMP_CALL_REG (SLJIT_NUMBER_OF_REGISTERS + 5) +#else +#define TMP_CALL_REG TMP_REG2 +#endif + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { + 0, 3, 4, 5, 6, 7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 9, 10, 31, 12 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 0, 13 +}; + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ +#define D(d) ((sljit_ins)reg_map[d] << 21) +#define S(s) ((sljit_ins)reg_map[s] << 21) +#define A(a) ((sljit_ins)reg_map[a] << 16) +#define B(b) ((sljit_ins)reg_map[b] << 11) +#define C(c) ((sljit_ins)reg_map[c] << 6) +#define FD(fd) ((sljit_ins)freg_map[fd] << 21) +#define FS(fs) ((sljit_ins)freg_map[fs] << 21) +#define FA(fa) ((sljit_ins)freg_map[fa] << 16) +#define FB(fb) ((sljit_ins)freg_map[fb] << 11) +#define FC(fc) ((sljit_ins)freg_map[fc] << 6) +#define IMM(imm) ((sljit_ins)(imm) & 0xffff) +#define CRD(d) ((sljit_ins)(d) << 21) + +/* Instruction bit sections. + OE and Rc flag (see ALT_SET_FLAGS). */ +#define OE(flags) ((flags) & ALT_SET_FLAGS) +/* Rc flag (see ALT_SET_FLAGS). */ +#define RC(flags) (((flags) & ALT_SET_FLAGS) >> 10) +#define HI(opcode) ((sljit_ins)(opcode) << 26) +#define LO(opcode) ((sljit_ins)(opcode) << 1) + +#define ADD (HI(31) | LO(266)) +#define ADDC (HI(31) | LO(10)) +#define ADDE (HI(31) | LO(138)) +#define ADDI (HI(14)) +#define ADDIC (HI(13)) +#define ADDIS (HI(15)) +#define ADDME (HI(31) | LO(234)) +#define AND (HI(31) | LO(28)) +#define ANDI (HI(28)) +#define ANDIS (HI(29)) +#define Bx (HI(18)) +#define BCx (HI(16)) +#define BCCTR (HI(19) | LO(528) | (3 << 11)) +#define BLR (HI(19) | LO(16) | (0x14 << 21)) +#define CNTLZD (HI(31) | LO(58)) +#define CNTLZW (HI(31) | LO(26)) +#define CMP (HI(31) | LO(0)) +#define CMPI (HI(11)) +#define CMPL (HI(31) | LO(32)) +#define CMPLI (HI(10)) +#define CROR (HI(19) | LO(449)) +#define DCBT (HI(31) | LO(278)) +#define DIVD (HI(31) | LO(489)) +#define DIVDU (HI(31) | LO(457)) +#define DIVW (HI(31) | LO(491)) +#define DIVWU (HI(31) | LO(459)) +#define EXTSB (HI(31) | LO(954)) +#define EXTSH (HI(31) | LO(922)) +#define EXTSW (HI(31) | LO(986)) +#define FABS (HI(63) | LO(264)) +#define FADD (HI(63) | LO(21)) +#define FADDS (HI(59) | LO(21)) +#define FCFID (HI(63) | LO(846)) +#define FCMPU (HI(63) | LO(0)) +#define FCTIDZ (HI(63) | LO(815)) +#define FCTIWZ (HI(63) | LO(15)) +#define FDIV (HI(63) | LO(18)) +#define FDIVS (HI(59) | LO(18)) +#define FMR (HI(63) | LO(72)) +#define FMUL (HI(63) | LO(25)) +#define FMULS (HI(59) | LO(25)) +#define FNEG (HI(63) | LO(40)) +#define FRSP (HI(63) | LO(12)) +#define FSUB (HI(63) | LO(20)) +#define FSUBS (HI(59) | LO(20)) +#define LD (HI(58) | 0) +#define LFD (HI(50)) +#define LWZ (HI(32)) +#define MFCR (HI(31) | LO(19)) +#define MFLR (HI(31) | LO(339) | 0x80000) +#define MFXER (HI(31) | LO(339) | 0x10000) +#define MTCTR (HI(31) | LO(467) | 0x90000) +#define MTLR (HI(31) | LO(467) | 0x80000) +#define MTXER (HI(31) | LO(467) | 0x10000) +#define MULHD (HI(31) | LO(73)) +#define MULHDU (HI(31) | LO(9)) +#define MULHW (HI(31) | LO(75)) +#define MULHWU (HI(31) | LO(11)) +#define MULLD (HI(31) | LO(233)) +#define MULLI (HI(7)) +#define MULLW (HI(31) | LO(235)) +#define NEG (HI(31) | LO(104)) +#define NOP (HI(24)) +#define NOR (HI(31) | LO(124)) +#define OR (HI(31) | LO(444)) +#define ORI (HI(24)) +#define ORIS (HI(25)) +#define RLDCL (HI(30) | LO(8)) +#define RLDICL (HI(30) | LO(0 << 1)) +#define RLDICR (HI(30) | LO(1 << 1)) +#define RLDIMI (HI(30) | LO(3 << 1)) +#define RLWIMI (HI(20)) +#define RLWINM (HI(21)) +#define RLWNM (HI(23)) +#define SLD (HI(31) | LO(27)) +#define SLW (HI(31) | LO(24)) +#define SRAD (HI(31) | LO(794)) +#define SRADI (HI(31) | LO(413 << 1)) +#define SRAW (HI(31) | LO(792)) +#define SRAWI (HI(31) | LO(824)) +#define SRD (HI(31) | LO(539)) +#define SRW (HI(31) | LO(536)) +#define STD (HI(62) | 0) +#define STDU (HI(62) | 1) +#define STDUX (HI(31) | LO(181)) +#define STFD (HI(54)) +#define STFIWX (HI(31) | LO(983)) +#define STW (HI(36)) +#define STWU (HI(37)) +#define STWUX (HI(31) | LO(183)) +#define SUBF (HI(31) | LO(40)) +#define SUBFC (HI(31) | LO(8)) +#define SUBFE (HI(31) | LO(136)) +#define SUBFIC (HI(8)) +#define XOR (HI(31) | LO(316)) +#define XORI (HI(26)) +#define XORIS (HI(27)) + +#define SIMM_MAX (0x7fff) +#define SIMM_MIN (-0x8000) +#define UIMM_MAX (0xffff) + +/* Shift helpers. */ +#define RLWI_SH(sh) ((sljit_ins)(sh) << 11) +#define RLWI_MBE(mb, me) (((sljit_ins)(mb) << 6) | ((sljit_ins)(me) << 1)) +#define RLDI_SH(sh) ((((sljit_ins)(sh) & 0x1f) << 11) | (((sljit_ins)(sh) & 0x20) >> 4)) +#define RLDI_MB(mb) ((((sljit_ins)(mb) & 0x1f) << 6) | ((sljit_ins)(mb) & 0x20)) +#define RLDI_ME(me) RLDI_MB(me) + +#define SLWI(shift) (RLWINM | RLWI_SH(shift) | RLWI_MBE(0, 31 - (shift))) +#define SLDI(shift) (RLDICR | RLDI_SH(shift) | RLDI_ME(63 - (shift))) +/* shift > 0 */ +#define SRWI(shift) (RLWINM | RLWI_SH(32 - (shift)) | RLWI_MBE((shift), 31)) +#define SRDI(shift) (RLDICL | RLDI_SH(64 - (shift)) | RLDI_MB(shift)) + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#define SLWI_W(shift) SLWI(shift) +#else /* !SLJIT_CONFIG_PPC_32 */ +#define SLWI_W(shift) SLDI(shift) +#endif /* SLJIT_CONFIG_PPC_32 */ + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func) +{ + sljit_uw* ptrs; + + if (func_ptr) + *func_ptr = (void*)context; + + ptrs = (sljit_uw*)func; + context->addr = addr ? addr : ptrs[0]; + context->r2 = ptrs[1]; + context->r11 = ptrs[2]; +} +#endif + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_sw diff; + sljit_uw target_addr; + sljit_uw extra_jump_flags; + +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL)) + return 0; +#else + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return 0; +#endif + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + } + +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (jump->flags & IS_CALL) + goto keep_address; +#endif + + diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr) - executable_offset) & ~0x3l; + + extra_jump_flags = 0; + if (jump->flags & IS_COND) { + if (diff <= 0x7fff && diff >= -0x8000) { + jump->flags |= PATCH_B; + return 1; + } + if (target_addr <= 0xffff) { + jump->flags |= PATCH_B | PATCH_ABS_B; + return 1; + } + extra_jump_flags = REMOVE_COND; + + diff -= SSIZE_OF(ins); + } + + if (diff <= 0x01ffffff && diff >= -0x02000000) { + jump->flags |= PATCH_B | extra_jump_flags; + return 1; + } + + if (target_addr <= 0x03ffffff) { + jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags; + return 1; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) +keep_address: +#endif + if (target_addr <= 0x7fffffff) { + jump->flags |= PATCH_ABS32; + return 1; + } + + if (target_addr <= 0x7fffffffffffl) { + jump->flags |= PATCH_ABS48; + return 1; + } +#endif + + return 0; +} + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + +static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +{ + if (max_label < 0x100000000l) { + put_label->flags = 0; + return 1; + } + + if (max_label < 0x1000000000000l) { + put_label->flags = 1; + return 3; + } + + put_label->flags = 2; + return 4; +} + +static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label) +{ + sljit_uw addr = put_label->label->addr; + sljit_ins *inst = (sljit_ins *)put_label->addr; + sljit_u32 reg = *inst; + + if (put_label->flags == 0) { + SLJIT_ASSERT(addr < 0x100000000l); + inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 16); + } + else { + if (put_label->flags == 1) { + SLJIT_ASSERT(addr < 0x1000000000000l); + inst[0] = ORI | S(TMP_ZERO) | A(reg) | IMM(addr >> 32); + } + else { + inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 48); + inst[1] = ORI | S(reg) | A(reg) | IMM((addr >> 32) & 0xffff); + inst++; + } + + inst[1] = SLDI(32) | S(reg) | A(reg); + inst[2] = ORIS | S(reg) | A(reg) | IMM((addr >> 16) & 0xffff); + inst += 2; + } + + inst[1] = ORI | S(reg) | A(reg) | IMM(addr & 0xffff); +} + +#endif /* SLJIT_CONFIG_PPC_64 */ + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw next_addr; + sljit_sw executable_offset; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_put_label *put_label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins)); +#else + compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins)); +#endif +#endif + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + next_addr = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + put_label = compiler->put_labels; + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + if (next_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + SLJIT_ASSERT(!put_label || put_label->addr >= word_count); + + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + /* Just recording the address. */ + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + jump->addr = (sljit_uw)(code_ptr - 3); +#else + jump->addr = (sljit_uw)(code_ptr - 6); +#endif + if (detect_jump_type(jump, code_ptr, code, executable_offset)) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + code_ptr[-3] = code_ptr[0]; + code_ptr -= 3; +#else + if (jump->flags & PATCH_ABS32) { + code_ptr -= 3; + code_ptr[-1] = code_ptr[2]; + code_ptr[0] = code_ptr[3]; + } + else if (jump->flags & PATCH_ABS48) { + code_ptr--; + code_ptr[-1] = code_ptr[0]; + code_ptr[0] = code_ptr[1]; + /* rldicr rX,rX,32,31 -> rX,rX,16,47 */ + SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6); + code_ptr[-3] ^= 0x8422; + /* oris -> ori */ + code_ptr[-2] ^= 0x4000000; + } + else { + code_ptr[-6] = code_ptr[0]; + code_ptr -= 6; + } +#endif + if (jump->flags & REMOVE_COND) { + code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001); + code_ptr++; + jump->addr += sizeof(sljit_ins); + code_ptr[0] = Bx; + jump->flags -= IS_COND; + } + } + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + if (put_label && put_label->addr == word_count) { + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)code_ptr; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); + word_count += 4; +#endif + put_label = put_label->next; + } + next_addr = compute_next_addr(label, jump, const_, put_label); + } + code_ptr++; + word_count++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(!put_label); + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)(compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)))); +#else + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); +#endif + + jump = compiler->jumps; + while (jump) { + do { + addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + buf_ptr = (sljit_ins *)jump->addr; + + if (jump->flags & PATCH_B) { + if (jump->flags & IS_COND) { + if (!(jump->flags & PATCH_ABS_B)) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); + SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000); + *buf_ptr = BCx | ((sljit_ins)addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); + } + else { + SLJIT_ASSERT(addr <= 0xffff); + *buf_ptr = BCx | ((sljit_ins)addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001); + } + } + else { + if (!(jump->flags & PATCH_ABS_B)) { + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); + SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000); + *buf_ptr = Bx | ((sljit_ins)addr & 0x03fffffc) | ((*buf_ptr) & 0x1); + } + else { + SLJIT_ASSERT(addr <= 0x03ffffff); + *buf_ptr = Bx | ((sljit_ins)addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1); + } + } + break; + } + + /* Set the fields of immediate loads. */ +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[1] |= (sljit_ins)addr & 0xffff; +#else + if (jump->flags & PATCH_ABS32) { + SLJIT_ASSERT(addr <= 0x7fffffff); + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[1] |= (sljit_ins)addr & 0xffff; + break; + } + + if (jump->flags & PATCH_ABS48) { + SLJIT_ASSERT(addr <= 0x7fffffffffff); + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1] | buf_ptr[3]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 32) & 0xffff; + buf_ptr[1] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[3] |= (sljit_ins)addr & 0xffff; + break; + } + + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1] | buf_ptr[3] | buf_ptr[4]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 48) & 0xffff; + buf_ptr[1] |= (sljit_ins)(addr >> 32) & 0xffff; + buf_ptr[3] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[4] |= (sljit_ins)addr & 0xffff; +#endif + } while (0); + jump = jump->next; + } + + put_label = compiler->put_labels; + while (put_label) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + addr = put_label->label->addr; + buf_ptr = (sljit_ins *)put_label->addr; + + SLJIT_ASSERT((buf_ptr[0] & 0xfc1f0000) == ADDIS && (buf_ptr[1] & 0xfc000000) == ORI); + buf_ptr[0] |= (addr >> 16) & 0xffff; + buf_ptr[1] |= addr & 0xffff; +#else + put_label_set(put_label); +#endif + put_label = put_label->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (((sljit_sw)code_ptr) & 0x4) + code_ptr++; +#endif + sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_uw)code, (void*)sljit_generate_code); +#endif + + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + return code_ptr; +#else + return code; +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif + + /* A saved register is set to a zero value. */ + case SLJIT_HAS_ZERO_REGISTER: + case SLJIT_HAS_CLZ: + case SLJIT_HAS_ROT: + case SLJIT_HAS_PREFETCH: + return 1; + + case SLJIT_HAS_CTZ: + return 2; + + default: + return 0; + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + return (type >= SLJIT_UNORDERED && type <= SLJIT_ORDERED_LESS_EQUAL); +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* inp_flags: */ + +/* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define INDEXED 0x02 +#define SIGNED_DATA 0x04 + +#define WORD_DATA 0x00 +#define BYTE_DATA 0x08 +#define HALF_DATA 0x10 +#define INT_DATA 0x18 +/* Separates integer and floating point registers */ +#define GPR_REG 0x1f +#define DOUBLE_DATA 0x20 + +#define MEM_MASK 0x7f + +/* Other inp_flags. */ + +/* Integer opertion and set flags -> requires exts on 64 bit systems. */ +#define ALT_SIGN_EXT 0x000100 +/* This flag affects the RC() and OERC() macros. */ +#define ALT_SET_FLAGS 0x000400 +#define ALT_FORM1 0x001000 +#define ALT_FORM2 0x002000 +#define ALT_FORM3 0x004000 +#define ALT_FORM4 0x008000 +#define ALT_FORM5 0x010000 + +/* Source and destination is register. */ +#define REG_DEST 0x000001 +#define REG1_SOURCE 0x000002 +#define REG2_SOURCE 0x000004 +/* +ALT_SIGN_EXT 0x000100 +ALT_SET_FLAGS 0x000200 +ALT_FORM1 0x001000 +... +ALT_FORM5 0x010000 */ + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#include "sljitNativePPC_32.c" +#else +#include "sljitNativePPC_64.c" +#endif + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#define STACK_STORE STW +#define STACK_LOAD LWZ +#else +#define STACK_STORE STD +#define STACK_LOAD LD +#endif + +#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) +#define LR_SAVE_OFFSET 2 * SSIZE_OF(sw) +#else +#define LR_SAVE_OFFSET SSIZE_OF(sw) +#endif + +#define STACK_MAX_DISTANCE (0x8000 - SSIZE_OF(sw) - LR_SAVE_OFFSET) + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg); + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 i, tmp, base, offset; + sljit_s32 word_arg_count = 0; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 arg_count = 0; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 0) + + GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + + if (!(options & SLJIT_ENTER_REG_ARG)) + local_size += SSIZE_OF(sw); + + local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; + compiler->local_size = local_size; + + FAIL_IF(push_inst(compiler, MFLR | D(0))); + + base = SLJIT_SP; + offset = local_size; + + if (local_size <= STACK_MAX_DISTANCE) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); +#else + FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); +#endif + } else { + base = TMP_REG1; + FAIL_IF(push_inst(compiler, OR | S(SLJIT_SP) | A(TMP_REG1) | B(SLJIT_SP))); + FAIL_IF(load_immediate(compiler, TMP_REG2, -local_size)); +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(TMP_REG2))); +#else + FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(TMP_REG2))); +#endif + local_size = 0; + offset = 0; + } + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, STFD | FS(i) | A(base) | IMM(offset))); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, STFD | FS(i) | A(base) | IMM(offset))); + } + + if (!(options & SLJIT_ENTER_REG_ARG)) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(base) | IMM(offset))); + } + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(base) | IMM(offset))); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(base) | IMM(offset))); + } + + FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(base) | IMM(local_size + LR_SAVE_OFFSET))); + + if (options & SLJIT_ENTER_REG_ARG) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0)); + + arg_types >>= SLJIT_ARG_SHIFT; + saved_arg_count = 0; + + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + do { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (arg_count != word_arg_count) + tmp = SLJIT_R0 + word_arg_count; + else + break; + + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0 + arg_count) | A(tmp) | B(SLJIT_R0 + arg_count))); + } while (0); +#else + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0 + word_arg_count) | A(SLJIT_S0 - saved_arg_count) | B(SLJIT_R0 + word_arg_count))); + saved_arg_count++; + } +#endif + word_arg_count++; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + arg_count++; +#endif + arg_types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 0) + + GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + + if (!(options & SLJIT_ENTER_REG_ARG)) + local_size += SSIZE_OF(sw); + + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to) +{ + sljit_s32 i, tmp, base, offset; + sljit_s32 local_size = compiler->local_size; + + base = SLJIT_SP; + if (local_size > STACK_MAX_DISTANCE) { + base = TMP_REG1; + if (local_size > 2 * STACK_MAX_DISTANCE + LR_SAVE_OFFSET) { + FAIL_IF(push_inst(compiler, STACK_LOAD | D(base) | A(SLJIT_SP) | IMM(0))); + local_size = 0; + } else { + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG1) | A(SLJIT_SP) | IMM(local_size - STACK_MAX_DISTANCE))); + local_size = STACK_MAX_DISTANCE; + } + } + + offset = local_size; + if (!is_return_to) + FAIL_IF(push_inst(compiler, STACK_LOAD | S(0) | A(base) | IMM(offset + LR_SAVE_OFFSET))); + + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, LFD | FS(i) | A(base) | IMM(offset))); + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, LFD | FS(i) | A(base) | IMM(offset))); + } + + if (!(compiler->options & SLJIT_ENTER_REG_ARG)) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(TMP_ZERO) | A(base) | IMM(offset))); + } + + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(i) | A(base) | IMM(offset))); + } + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(i) | A(base) | IMM(offset))); + } + + if (!is_return_to) + push_inst(compiler, MTLR | S(0)); + + if (local_size > 0) + return push_inst(compiler, ADDI | D(SLJIT_SP) | A(base) | IMM(local_size)); + + SLJIT_ASSERT(base == TMP_REG1); + return push_inst(compiler, OR | S(base) | A(SLJIT_SP) | B(base)); +} + +#undef STACK_STORE +#undef STACK_LOAD + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + return push_inst(compiler, BLR); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_CALL_REG, src, srcw, TMP_CALL_REG)); + src = TMP_CALL_REG; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); + src = TMP_CALL_REG; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +/* s/l - store/load (1 bit) + i/x - immediate/indexed form + u/s - signed/unsigned (1 bit) + w/b/h/i - word/byte/half/int allowed (2 bit) + + Some opcodes are repeated (e.g. store signed / unsigned byte is the same instruction). */ + +/* 64 bit only: [reg+imm] must be aligned to 4 bytes. */ +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define INT_ALIGNED 0x10000 +#endif + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#define ARCH_32_64(a, b) a +#define INST_CODE_AND_DST(inst, flags, reg) \ + ((sljit_ins)(inst) | (sljit_ins)(((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) +#else +#define ARCH_32_64(a, b) b +#define INST_CODE_AND_DST(inst, flags, reg) \ + (((sljit_ins)(inst) & ~(sljit_ins)INT_ALIGNED) | (sljit_ins)(((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) +#endif + +static const sljit_ins data_transfer_insts[64 + 16] = { + +/* -------- Integer -------- */ + +/* Word. */ + +/* w u i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */), +/* w u i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */), +/* w u x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), +/* w u x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), + +/* w s i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */), +/* w s i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */), +/* w s x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), +/* w s x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), + +/* Byte. */ + +/* b u i s */ HI(38) /* stb */, +/* b u i l */ HI(34) /* lbz */, +/* b u x s */ HI(31) | LO(215) /* stbx */, +/* b u x l */ HI(31) | LO(87) /* lbzx */, + +/* b s i s */ HI(38) /* stb */, +/* b s i l */ HI(34) /* lbz */ /* EXTS_REQ */, +/* b s x s */ HI(31) | LO(215) /* stbx */, +/* b s x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */, + +/* Half. */ + +/* h u i s */ HI(44) /* sth */, +/* h u i l */ HI(40) /* lhz */, +/* h u x s */ HI(31) | LO(407) /* sthx */, +/* h u x l */ HI(31) | LO(279) /* lhzx */, + +/* h s i s */ HI(44) /* sth */, +/* h s i l */ HI(42) /* lha */, +/* h s x s */ HI(31) | LO(407) /* sthx */, +/* h s x l */ HI(31) | LO(343) /* lhax */, + +/* Int. */ + +/* i u i s */ HI(36) /* stw */, +/* i u i l */ HI(32) /* lwz */, +/* i u x s */ HI(31) | LO(151) /* stwx */, +/* i u x l */ HI(31) | LO(23) /* lwzx */, + +/* i s i s */ HI(36) /* stw */, +/* i s i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */), +/* i s x s */ HI(31) | LO(151) /* stwx */, +/* i s x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */), + +/* -------- Floating point -------- */ + +/* d i s */ HI(54) /* stfd */, +/* d i l */ HI(50) /* lfd */, +/* d x s */ HI(31) | LO(727) /* stfdx */, +/* d x l */ HI(31) | LO(599) /* lfdx */, + +/* s i s */ HI(52) /* stfs */, +/* s i l */ HI(48) /* lfs */, +/* s x s */ HI(31) | LO(663) /* stfsx */, +/* s x l */ HI(31) | LO(535) /* lfsx */, +}; + +static const sljit_ins updated_data_transfer_insts[64] = { + +/* -------- Integer -------- */ + +/* Word. */ + +/* w u i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */), +/* w u i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */), +/* w u x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), +/* w u x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), + +/* w s i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */), +/* w s i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */), +/* w s x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), +/* w s x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), + +/* Byte. */ + +/* b u i s */ HI(39) /* stbu */, +/* b u i l */ HI(35) /* lbzu */, +/* b u x s */ HI(31) | LO(247) /* stbux */, +/* b u x l */ HI(31) | LO(119) /* lbzux */, + +/* b s i s */ HI(39) /* stbu */, +/* b s i l */ 0 /* no such instruction */, +/* b s x s */ HI(31) | LO(247) /* stbux */, +/* b s x l */ 0 /* no such instruction */, + +/* Half. */ + +/* h u i s */ HI(45) /* sthu */, +/* h u i l */ HI(41) /* lhzu */, +/* h u x s */ HI(31) | LO(439) /* sthux */, +/* h u x l */ HI(31) | LO(311) /* lhzux */, + +/* h s i s */ HI(45) /* sthu */, +/* h s i l */ HI(43) /* lhau */, +/* h s x s */ HI(31) | LO(439) /* sthux */, +/* h s x l */ HI(31) | LO(375) /* lhaux */, + +/* Int. */ + +/* i u i s */ HI(37) /* stwu */, +/* i u i l */ HI(33) /* lwzu */, +/* i u x s */ HI(31) | LO(183) /* stwux */, +/* i u x l */ HI(31) | LO(55) /* lwzux */, + +/* i s i s */ HI(37) /* stwu */, +/* i s i l */ ARCH_32_64(HI(33) /* lwzu */, 0 /* no such instruction */), +/* i s x s */ HI(31) | LO(183) /* stwux */, +/* i s x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */), + +/* -------- Floating point -------- */ + +/* d i s */ HI(55) /* stfdu */, +/* d i l */ HI(51) /* lfdu */, +/* d x s */ HI(31) | LO(759) /* stfdux */, +/* d x l */ HI(31) | LO(631) /* lfdux */, + +/* s i s */ HI(53) /* stfsu */, +/* s i l */ HI(49) /* lfsu */, +/* s x s */ HI(31) | LO(695) /* stfsux */, +/* s x l */ HI(31) | LO(567) /* lfsux */, +}; + +#undef ARCH_32_64 + +/* Simple cases, (no caching is required). */ +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) +{ + sljit_ins inst; + sljit_s32 offs_reg; + + /* Should work when (arg & REG_MASK) == 0. */ + SLJIT_ASSERT(A(0) == 0); + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + offs_reg = OFFS_REG(arg); + + if (argw != 0) { + FAIL_IF(push_inst(compiler, SLWI_W(argw) | S(OFFS_REG(arg)) | A(tmp_reg))); + offs_reg = tmp_reg; + } + + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + SLJIT_ASSERT(!(inst & INT_ALIGNED)); +#endif /* SLJIT_CONFIG_PPC_64 */ + + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(offs_reg)); + } + + inst = data_transfer_insts[inp_flags & MEM_MASK]; + arg &= REG_MASK; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((inst & INT_ALIGNED) && (argw & 0x3) != 0) { + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg)); + } +#endif /* SLJIT_CONFIG_PPC_64 */ + + if (argw <= SIMM_MAX && argw >= SIMM_MIN) + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | IMM(argw)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (argw <= 0x7fff7fffl && argw >= -0x80000000l) { +#endif /* SLJIT_CONFIG_PPC_64 */ + FAIL_IF(push_inst(compiler, ADDIS | D(tmp_reg) | A(arg) | IMM((argw + 0x8000) >> 16))); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_reg) | IMM(argw)); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + } + + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg)); +#endif /* SLJIT_CONFIG_PPC_64 */ +} + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + result goes to TMP_REG2, so put result can use TMP_REG1. */ + sljit_s32 dst_r = TMP_REG2; + sljit_s32 src1_r; + sljit_s32 src2_r; + sljit_s32 sugg_src2_r = TMP_REG2; + sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_SIGN_EXT | ALT_SET_FLAGS); + + /* Destination check. */ + if (FAST_IS_REG(dst)) { + dst_r = dst; + /* The REG_DEST is only used by SLJIT_MOV operations, although + * it is set for op2 operations with unset destination. */ + flags |= REG_DEST; + + if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) + sugg_src2_r = dst_r; + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) { + src1_r = src1; + flags |= REG1_SOURCE; + } + else if (src1 & SLJIT_IMM) { + src1_r = TMP_ZERO; + if (src1w != 0) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_r = TMP_REG1; + } + } + else { + FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); + src1_r = TMP_REG1; + } + + /* Source 2. */ + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG2_SOURCE; + + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P) + dst_r = src2_r; + } + else if (src2 & SLJIT_IMM) { + src2_r = TMP_ZERO; + if (src2w != 0) { + FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); + src2_r = sugg_src2_r; + } + } + else { + FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, TMP_REG2)); + src2_r = sugg_src2_r; + } + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + + return emit_op_mem(compiler, input_flags, dst_r, dst, dstw, TMP_REG1); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 int_op = op & SLJIT_32; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + case SLJIT_NOP: + return push_inst(compiler, NOP); + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); +#else + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); +#endif + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) : (op == SLJIT_DIVMOD_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); +#else + FAIL_IF(push_inst(compiler, (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); +#endif + return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1)); + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + return push_inst(compiler, (int_op ? (op == SLJIT_DIV_UW ? DIVWU : DIVW) : (op == SLJIT_DIV_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)); +#else + return push_inst(compiler, (op == SLJIT_DIV_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)); +#endif + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + if (!(src & OFFS_REG_MASK)) { + if (srcw == 0 && (src & REG_MASK)) + return push_inst(compiler, DCBT | A(0) | B(src & REG_MASK)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + /* Works with SLJIT_MEM0() case as well. */ + return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1)); + } + + srcw &= 0x3; + + if (srcw == 0) + return push_inst(compiler, DCBT | A(src & REG_MASK) | B(OFFS_REG(src))); + + FAIL_IF(push_inst(compiler, SLWI_W(srcw) | S(OFFS_REG(src)) | A(TMP_REG1))); + return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1)); +} + +#define EMIT_MOV(type, type_flags, type_cast) \ + emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0; + sljit_s32 op_flags = GET_ALL_FLAGS(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + op = GET_OPCODE(op); + + if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW) + FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); + + if (op < SLJIT_NOT && FAST_IS_REG(src) && src == dst) { + if (!TYPE_CAST_NEEDED(op)) + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op_flags & SLJIT_32) { + if (op < SLJIT_NOT) { + if (src & SLJIT_MEM) { + if (op == SLJIT_MOV_S32) + op = SLJIT_MOV_U32; + } + else if (src & SLJIT_IMM) { + if (op == SLJIT_MOV_U32) + op = SLJIT_MOV_S32; + } + } + else { + /* Most operations expect sign extended arguments. */ + flags |= INT_DATA | SIGNED_DATA; + if (HAS_FLAGS(op_flags)) + flags |= ALT_SIGN_EXT; + } + } +#endif + + switch (op) { + case SLJIT_MOV: +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: +#endif + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + case SLJIT_MOV_U32: + return EMIT_MOV(SLJIT_MOV_U32, INT_DATA, (sljit_u32)); + + case SLJIT_MOV_S32: + case SLJIT_MOV32: + return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, (sljit_s32)); +#endif + + case SLJIT_MOV_U8: + return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA, (sljit_u8)); + + case SLJIT_MOV_S8: + return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, (sljit_s8)); + + case SLJIT_MOV_U16: + return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA, (sljit_u16)); + + case SLJIT_MOV_S16: + return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, (sljit_s16)); + + case SLJIT_NOT: + return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_CLZ: + case SLJIT_CTZ: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + return emit_op(compiler, op, flags | (!(op_flags & SLJIT_32) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw); +#else + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); +#endif + } + + return SLJIT_SUCCESS; +} + +#undef EMIT_MOV + +#define TEST_SL_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN) + +#define TEST_UL_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & ~0xffff)) + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_SH_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l) +#else +#define TEST_SH_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & 0xffff)) +#endif + +#define TEST_UH_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & ~(sljit_sw)0xffff0000)) + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_ADD_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l) +#else +#define TEST_ADD_IMM(src, srcw) \ + ((src) & SLJIT_IMM) +#endif + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_UI_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff)) +#else +#define TEST_UI_IMM(src, srcw) \ + ((src) & SLJIT_IMM) +#endif + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_ADD_FORM1(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \ + || (op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_32 | SLJIT_SET_Z | SLJIT_SET_CARRY)) +#define TEST_SUB_FORM2(op) \ + ((GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) \ + || (op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_32 | SLJIT_SET_Z)) +#define TEST_SUB_FORM3(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \ + || (op & (SLJIT_32 | SLJIT_SET_Z)) == (SLJIT_32 | SLJIT_SET_Z)) +#else +#define TEST_ADD_FORM1(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) +#define TEST_SUB_FORM2(op) \ + (GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) +#define TEST_SUB_FORM3(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_32) { + /* Most operations expect sign extended arguments. */ + flags |= INT_DATA | SIGNED_DATA; + if (src1 & SLJIT_IMM) + src1w = (sljit_s32)(src1w); + if (src2 & SLJIT_IMM) + src2w = (sljit_s32)(src2w); + if (HAS_FLAGS(op)) + flags |= ALT_SIGN_EXT; + } +#endif + if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) + FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + + if (TEST_ADD_FORM1(op)) + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); + + if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)src1w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + } + if (TEST_SH_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)(src2w >> 16) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SH_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)(src1w >> 16) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + } + /* Range between -1 and -32768 is covered above. */ + if (TEST_ADD_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w & 0xffffffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_ADD_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)src1w & 0xffffffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((op & (SLJIT_32 | SLJIT_SET_Z)) == (SLJIT_32 | SLJIT_SET_Z)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)src1w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src2, src2w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); + } +#endif + if (HAS_FLAGS(op)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)src1w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + + if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL) { + if (dst == TMP_REG2) { + if (TEST_UL_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); + } + + if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= (SIMM_MAX + 1)) { + compiler->imm = (sljit_ins)src2w; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); + } + + if (dst == TMP_REG2 && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, src2, src2w); + } + + if (TEST_SUB_FORM2(op)) { + if ((src2 & SLJIT_IMM) && src2w >= -SIMM_MAX && src2w <= SIMM_MAX) { + compiler->imm = (sljit_ins)src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); + } + + if (TEST_SUB_FORM3(op)) + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); + + if (TEST_SL_IMM(src2, -src2w)) { + compiler->imm = (sljit_ins)(-src2w) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | (!HAS_FLAGS(op) ? ALT_FORM2 : ALT_FORM3), dst, dstw, src1, src1w, TMP_REG2, 0); + } + + if (TEST_SL_IMM(src1, src1w) && !(op & SLJIT_SET_Z)) { + compiler->imm = (sljit_ins)src1w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); + } + + if (!HAS_FLAGS(op)) { + if (TEST_SH_IMM(src2, -src2w)) { + compiler->imm = (sljit_ins)((-src2w) >> 16) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + /* Range between -1 and -32768 is covered above. */ + if (TEST_ADD_IMM(src2, -src2w)) { + compiler->imm = (sljit_ins)-src2w; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); + } + } + + /* We know ALT_SIGN_EXT is set if it is an SLJIT_32 on 64 bit systems. */ + return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + return emit_op(compiler, SLJIT_SUBC, flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_32) + flags |= ALT_FORM2; +#endif + if (!HAS_FLAGS(op)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w & 0xffff; + return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)src1w & 0xffff; + return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + else + FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); + return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + /* Commutative unsigned operations. */ + if (!HAS_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) { + if (TEST_UL_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_UL_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)src1w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + } + if (TEST_UH_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)(src2w >> 16) & 0xffff; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_UH_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)(src1w >> 16) & 0xffff; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + if (!HAS_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) { + /* Unlike or and xor, the and resets unwanted bits as well. */ + if (TEST_UI_IMM(src2, src2w)) { + compiler->imm = (sljit_ins)src2w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_UI_IMM(src1, src1w)) { + compiler->imm = (sljit_ins)src1w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_MSHL: + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + case SLJIT_ROTL: + case SLJIT_ROTR: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_32) + flags |= ALT_FORM2; +#endif + if (src2 & SLJIT_IMM) { + compiler->imm = (sljit_ins)src2w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); +} + +#undef TEST_ADD_FORM1 +#undef TEST_SUB_FORM2 +#undef TEST_SUB_FORM3 + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 is_right; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; + sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64; +#else /* !SLJIT_CONFIG_PPC_64 */ + sljit_s32 inp_flags = WORD_DATA | LOAD_DATA; + sljit_sw bit_length = 32; +#endif /* SLJIT_CONFIG_PPC_64 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w)); + + is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR); + + if (src_dst == src1) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w); + } + + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (src2 & SLJIT_IMM) { + src2w &= bit_length - 1; + + if (src2w == 0) + return SLJIT_SUCCESS; + } else if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src2, src2w, TMP_REG2)); + src2 = TMP_REG2; + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG1, src1, src1w, TMP_REG1)); + src1 = TMP_REG1; + } else if (src1 & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1 = TMP_REG1; + } + + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (!(op & SLJIT_32)) { + if (is_right) { + FAIL_IF(push_inst(compiler, SRDI(src2w) | S(src_dst) | A(src_dst))); + return push_inst(compiler, RLDIMI | S(src1) | A(src_dst) | RLDI_SH(64 - src2w) | RLDI_MB(0)); + } + + FAIL_IF(push_inst(compiler, SLDI(src2w) | S(src_dst) | A(src_dst))); + /* Computes SRDI(64 - src2w). */ + FAIL_IF(push_inst(compiler, RLDICL | S(src1) | A(TMP_REG1) | RLDI_SH(src2w) | RLDI_MB(64 - src2w))); + return push_inst(compiler, OR | S(src_dst) | A(src_dst) | B(TMP_REG1)); + } +#endif /* SLJIT_CONFIG_PPC_64 */ + + if (is_right) { + FAIL_IF(push_inst(compiler, SRWI(src2w) | S(src_dst) | A(src_dst))); + return push_inst(compiler, RLWIMI | S(src1) | A(src_dst) | RLWI_SH(32 - src2w) | RLWI_MBE(0, src2w - 1)); + } + + FAIL_IF(push_inst(compiler, SLWI(src2w) | S(src_dst) | A(src_dst))); + return push_inst(compiler, RLWIMI | S(src1) | A(src_dst) | RLWI_SH(src2w) | RLWI_MBE(32 - src2w, 31)); + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (!(op & SLJIT_32)) { + if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) { + FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x3f)); + src2 = TMP_REG2; + } + + FAIL_IF(push_inst(compiler, (is_right ? SRD : SLD) | S(src_dst) | A(src_dst) | B(src2))); + FAIL_IF(push_inst(compiler, (is_right ? SLDI(1) : SRDI(1)) | S(src1) | A(TMP_REG1))); + FAIL_IF(push_inst(compiler, XORI | S(src2) | A(TMP_REG2) | 0x3f)); + FAIL_IF(push_inst(compiler, (is_right ? SLD : SRD) | S(TMP_REG1) | A(TMP_REG1) | B(TMP_REG2))); + return push_inst(compiler, OR | S(src_dst) | A(src_dst) | B(TMP_REG1)); + } +#endif /* SLJIT_CONFIG_PPC_64 */ + + if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) { + FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x1f)); + src2 = TMP_REG2; + } + + FAIL_IF(push_inst(compiler, (is_right ? SRW : SLW) | S(src_dst) | A(src_dst) | B(src2))); + FAIL_IF(push_inst(compiler, (is_right ? SLWI(1) : SRWI(1)) | S(src1) | A(TMP_REG1))); + FAIL_IF(push_inst(compiler, XORI | S(src2) | A(TMP_REG2) | 0x1f)); + FAIL_IF(push_inst(compiler, (is_right ? SLW : SRW) | S(TMP_REG1) | A(TMP_REG1) | B(TMP_REG2))); + return push_inst(compiler, OR | S(src_dst) | A(src_dst) | B(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, MTLR | S(src))); + else { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG2)); + FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2))); + } + + return push_inst(compiler, BLR); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + return emit_prefetch(compiler, src, srcw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return freg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 6)) +#define SELECT_FOP(op, single, double) ((sljit_ins)((op & SLJIT_32) ? single : double)) + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw)) +#else +#define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw)) + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw)) +#define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw)) +#else +#define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw)) +#define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw)) +#endif + +#endif /* SLJIT_CONFIG_PPC_64 */ + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + if (src & SLJIT_MEM) { + /* We can ignore the temporary data store on the stack from caching point of view. */ + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, TMP_REG1)); + src = TMP_FREG1; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + op = GET_OPCODE(op); + FAIL_IF(push_inst(compiler, (op == SLJIT_CONV_S32_FROM_F64 ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src))); + + if (op == SLJIT_CONV_SW_FROM_F64) { + if (FAST_IS_REG(dst)) { + FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1)); + return emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1); + } + return emit_op_mem(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, TMP_REG1); + } +#else + FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src))); +#endif + + if (FAST_IS_REG(dst)) { + FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1))); + return emit_op_mem(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1); + } + + SLJIT_ASSERT(dst & SLJIT_MEM); + + if (dst & OFFS_REG_MASK) { + dstw &= 0x3; + if (dstw) { + FAIL_IF(push_inst(compiler, SLWI_W(dstw) | S(OFFS_REG(dst)) | A(TMP_REG1))); + dstw = TMP_REG1; + } + else + dstw = OFFS_REG(dst); + } + else { + if ((dst & REG_MASK) && !dstw) { + dstw = dst & REG_MASK; + dst = 0; + } + else { + /* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */ + FAIL_IF(load_immediate(compiler, TMP_REG1, dstw)); + dstw = TMP_REG1; + } + } + + return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_IMM) { + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) { + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1))); + else + FAIL_IF(emit_op_mem(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + if (FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1)); + FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1)); + } + else + FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, TMP_REG1)); + + FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1))); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); + if (op & SLJIT_32) + return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); + return SLJIT_SUCCESS; + +#else + + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_s32 invert_sign = 1; + + if (src & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ (sljit_sw)0x80000000)); + src = TMP_REG1; + invert_sign = 0; + } + else if (!FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + /* First, a special double floating point value is constructed: (2^53 + (input xor (2^31))) + The double precision format has exactly 53 bit precision, so the lower 32 bit represents + the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000 + to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating + point value, we need to subtract 2^53 + 2^31 from the constructed value. */ + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330)); + if (invert_sign) + FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000)); + FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, TMP_REG1)); + FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, TMP_REG2)); + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000)); + FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1)); + FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, TMP_REG2)); + FAIL_IF(emit_op_mem(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, TMP_REG1)); + + FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2))); + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); + if (op & SLJIT_32) + return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); + return SLJIT_SUCCESS; + +#endif +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, TMP_REG1)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, TMP_REG2)); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2))); + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return push_inst(compiler, CROR | ((4 + 2) << 21) | ((4 + 2) << 16) | ((4 + 3) << 11)); + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_ORDERED_GREATER_EQUAL: + return push_inst(compiler, CROR | ((4 + 0) << 21) | ((4 + 0) << 16) | ((4 + 3) << 11)); + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED_LESS_EQUAL: + return push_inst(compiler, CROR | ((4 + 1) << 21) | ((4 + 1) << 16) | ((4 + 3) << 11)); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_32; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, TMP_REG1)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_CONV_F64_FROM_F32: + op ^= SLJIT_32; + if (op & SLJIT_32) { + FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src))); + break; + } + /* Fall through. */ + case SLJIT_MOV_F64: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src))); + break; + } + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op), dst_r, dst, dstw, TMP_REG1)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, TMP_REG1)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, TMP_REG2)); + src2 = TMP_FREG2; + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2))); + break; + + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2))); + break; + + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */)); + break; + + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2))); + break; + } + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, TMP_REG1)); + + return SLJIT_SUCCESS; +} + +#undef SELECT_FOP + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, MFLR | D(dst)); + + /* Memory. */ + FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2))); + return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, sljit_s32 type) +{ + switch (type) { + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (4 << 21) | (2 << 16); + /* fallthrough */ + + case SLJIT_EQUAL: + return (12 << 21) | (2 << 16); + + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (12 << 21) | (2 << 16); + /* fallthrough */ + + case SLJIT_NOT_EQUAL: + return (4 << 21) | (2 << 16); + + case SLJIT_LESS: + case SLJIT_SIG_LESS: + return (12 << 21) | (0 << 16); + + case SLJIT_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + return (4 << 21) | (0 << 16); + + case SLJIT_GREATER: + case SLJIT_SIG_GREATER: + return (12 << 21) | (1 << 16); + + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + return (4 << 21) | (1 << 16); + + case SLJIT_OVERFLOW: + return (12 << 21) | (3 << 16); + + case SLJIT_NOT_OVERFLOW: + return (4 << 21) | (3 << 16); + + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_LESS: + return (12 << 21) | ((4 + 0) << 16); + + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + return (4 << 21) | ((4 + 0) << 16); + + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + return (12 << 21) | ((4 + 1) << 16); + + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + return (4 << 21) | ((4 + 1) << 16); + + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + return (12 << 21) | ((4 + 2) << 16); + + case SLJIT_F_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + return (4 << 21) | ((4 + 2) << 16); + + case SLJIT_UNORDERED: + return (12 << 21) | ((4 + 3) << 16); + + case SLJIT_ORDERED: + return (4 << 21) | ((4 + 3) << 16); + + default: + SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_REG_ARG); + return (20 << 21); + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + sljit_ins bo_bi_flags; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + bo_bi_flags = get_bo_bi_flags(compiler, type & 0xff); + if (!bo_bi_flags) + return NULL; + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, (sljit_u32)type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if (type == SLJIT_CARRY || type == SLJIT_NOT_CARRY) + PTR_FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG1) | A(TMP_ZERO) | B(TMP_ZERO))); + + /* In PPC, we don't need to touch the arguments. */ + if (type < SLJIT_JUMP) + jump->flags |= IS_COND; +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL) + jump->flags |= IS_CALL; +#endif + + PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0)); + PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG))); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0))); + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); +#endif + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump = NULL; + sljit_s32 src_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + + if (FAST_IS_REG(src)) { +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL && src != TMP_CALL_REG) { + FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); + src_r = TMP_CALL_REG; + } + else + src_r = src; +#else /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ + src_r = src; +#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ + } else if (src & SLJIT_IMM) { + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR); + jump->u.target = (sljit_uw)srcw; + +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL) + jump->flags |= IS_CALL; +#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ + + FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0)); + src_r = TMP_CALL_REG; + } else { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_CALL_REG, src, srcw, TMP_CALL_REG)); + src_r = TMP_CALL_REG; + } + + FAIL_IF(push_inst(compiler, MTCTR | S(src_r))); + if (jump) + jump->addr = compiler->size; + return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_CALL_REG, src, srcw, TMP_CALL_REG)); + src = TMP_CALL_REG; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); + src = TMP_CALL_REG; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + FAIL_IF(call_with_args(compiler, arg_types, &src)); +#endif + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 reg, invert; + sljit_u32 bit, from_xer; + sljit_s32 saved_op = op; + sljit_sw saved_dstw = dstw; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 input_flags = ((op & SLJIT_32) || op == SLJIT_MOV32) ? INT_DATA : WORD_DATA; +#else + sljit_s32 input_flags = WORD_DATA; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + op = GET_OPCODE(op); + reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; + + if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) + FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG1)); + + invert = 0; + bit = 0; + from_xer = 0; + + switch (type) { + case SLJIT_LESS: + case SLJIT_SIG_LESS: + break; + + case SLJIT_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + invert = 1; + break; + + case SLJIT_GREATER: + case SLJIT_SIG_GREATER: + bit = 1; + break; + + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + bit = 1; + invert = 1; + break; + + case SLJIT_EQUAL: + bit = 2; + break; + + case SLJIT_NOT_EQUAL: + bit = 2; + invert = 1; + break; + + case SLJIT_OVERFLOW: + from_xer = 1; + bit = 1; + break; + + case SLJIT_NOT_OVERFLOW: + from_xer = 1; + bit = 1; + invert = 1; + break; + + case SLJIT_CARRY: + from_xer = 1; + bit = 2; + invert = (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) != 0; + break; + + case SLJIT_NOT_CARRY: + from_xer = 1; + bit = 2; + invert = (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) != 0; + break; + + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_LESS: + bit = 4 + 0; + break; + + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + bit = 4 + 0; + invert = 1; + break; + + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + bit = 4 + 1; + break; + + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + bit = 4 + 1; + invert = 1; + break; + + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + bit = 4 + 2; + break; + + case SLJIT_F_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + bit = 4 + 2; + invert = 1; + break; + + case SLJIT_UNORDERED: + bit = 4 + 3; + break; + + case SLJIT_ORDERED: + bit = 4 + 3; + invert = 1; + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + + FAIL_IF(push_inst(compiler, (from_xer ? MFXER : MFCR) | D(reg))); + /* Simplified mnemonics: extrwi. */ + FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | RLWI_SH(1 + bit) | RLWI_MBE(31, 31))); + + if (invert) + FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1)); + + if (op < SLJIT_ADD) { + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_op_mem(compiler, input_flags, reg, dst, dstw, TMP_REG1); + } + + SLJIT_SKIP_CHECKS(compiler); + + if (dst & SLJIT_MEM) + return sljit_emit_op2(compiler, saved_op, dst, saved_dstw, TMP_REG1, 0, TMP_REG2, 0); + return sljit_emit_op2(compiler, saved_op, dst, 0, dst, 0, TMP_REG2, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);; +} + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + +#define EMIT_MEM_LOAD_IMM(inst, mem, memw) \ + ((sljit_s16)(memw) > SIMM_MAX - SSIZE_OF(sw)) + +#else /* !SLJIT_CONFIG_PPC_32 */ + +#define EMIT_MEM_LOAD_IMM(inst, mem, memw) \ + ((((inst) & INT_ALIGNED) && ((memw) & 0x3) != 0) \ + || ((sljit_s16)(memw) > SIMM_MAX - SSIZE_OF(sw)) \ + || ((memw) > 0x7fff7fffl || (memw) < -0x80000000l)) \ + +#endif /* SLJIT_CONFIG_PPC_32 */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + ADJUST_LOCAL_OFFSET(mem, memw); + + inst = data_transfer_insts[WORD_DATA | ((type & SLJIT_MEM_STORE) ? 0 : LOAD_DATA)]; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + memw &= 0x3; + + if (memw != 0) { + FAIL_IF(push_inst(compiler, SLWI_W(memw) | S(OFFS_REG(mem)) | A(TMP_REG1))); + FAIL_IF(push_inst(compiler, ADD | D(TMP_REG1) | A(TMP_REG1) | B(mem & REG_MASK))); + } else + FAIL_IF(push_inst(compiler, ADD | D(TMP_REG1) | A(mem & REG_MASK) | B(OFFS_REG(mem)))); + + mem = TMP_REG1; + memw = 0; + } else { + if (EMIT_MEM_LOAD_IMM(inst, mem, memw)) { + if ((mem & REG_MASK) != 0) { + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_op2(compiler, SLJIT_ADD, TMP_REG1, 0, mem & REG_MASK, 0, SLJIT_IMM, memw)); + } else + FAIL_IF(load_immediate(compiler, TMP_REG1, memw)); + + memw = 0; + mem = TMP_REG1; + } else if (memw > SIMM_MAX || memw < SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(mem & REG_MASK) | IMM((memw + 0x8000) >> 16))); + + memw &= 0xffff; + mem = TMP_REG1; + } else { + memw &= 0xffff; + mem &= REG_MASK; + } + } + + SLJIT_ASSERT((memw >= 0 && memw <= SIMM_MAX - SSIZE_OF(sw)) || (memw >= 0x8000 && memw <= 0xffff)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + inst &= (sljit_ins)~INT_ALIGNED; +#endif /* SLJIT_CONFIG_PPC_64 */ + + if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) { + FAIL_IF(push_inst(compiler, inst | D(REG_PAIR_SECOND(reg)) | A(mem) | IMM(memw + SSIZE_OF(sw)))); + return push_inst(compiler, inst | D(REG_PAIR_FIRST(reg)) | A(mem) | IMM(memw)); + } + + FAIL_IF(push_inst(compiler, inst | D(REG_PAIR_FIRST(reg)) | A(mem) | IMM(memw))); + return push_inst(compiler, inst | D(REG_PAIR_SECOND(reg)) | A(mem) | IMM(memw + SSIZE_OF(sw))); +} + +#undef EMIT_MEM_LOAD_IMM + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 mem_flags; + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw)); + + if (type & SLJIT_MEM_POST) + return SLJIT_ERR_UNSUPPORTED; + + switch (type & 0xff) { + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: +#endif + mem_flags = WORD_DATA; + break; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + case SLJIT_MOV_U32: + case SLJIT_MOV32: + mem_flags = INT_DATA; + break; + + case SLJIT_MOV_S32: + mem_flags = INT_DATA; + + if (!(type & SLJIT_MEM_STORE) && !(type & SLJIT_32)) { + if (mem & OFFS_REG_MASK) + mem_flags |= SIGNED_DATA; + else + return SLJIT_ERR_UNSUPPORTED; + } + break; +#endif + + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + mem_flags = BYTE_DATA; + break; + + case SLJIT_MOV_U16: + mem_flags = HALF_DATA; + break; + + case SLJIT_MOV_S16: + mem_flags = HALF_DATA | SIGNED_DATA; + break; + + default: + SLJIT_UNREACHABLE(); + mem_flags = WORD_DATA; + break; + } + + if (!(type & SLJIT_MEM_STORE)) + mem_flags |= LOAD_DATA; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + if (memw != 0) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + inst = updated_data_transfer_insts[mem_flags | INDEXED]; + FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, 0, reg) | A(mem & REG_MASK) | B(OFFS_REG(mem)))); + } + else { + if (memw > SIMM_MAX || memw < SIMM_MIN) + return SLJIT_ERR_UNSUPPORTED; + + inst = updated_data_transfer_insts[mem_flags]; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((inst & INT_ALIGNED) && (memw & 0x3) != 0) + return SLJIT_ERR_UNSUPPORTED; +#endif + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, 0, reg) | A(mem & REG_MASK) | IMM(memw))); + } + + if ((mem_flags & LOAD_DATA) && (type & 0xff) == SLJIT_MOV_S8) + return push_inst(compiler, EXTSB | S(reg) | A(reg)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 mem_flags; + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem_update(compiler, type, freg, mem, memw)); + + if (type & SLJIT_MEM_POST) + return SLJIT_ERR_UNSUPPORTED; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + if (memw != 0) + return SLJIT_ERR_UNSUPPORTED; + } + else { + if (memw > SIMM_MAX || memw < SIMM_MIN) + return SLJIT_ERR_UNSUPPORTED; + } + + if (type & SLJIT_MEM_SUPP) + return SLJIT_SUCCESS; + + mem_flags = FLOAT_DATA(type); + + if (!(type & SLJIT_MEM_STORE)) + mem_flags |= LOAD_DATA; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + inst = updated_data_transfer_insts[mem_flags | INDEXED]; + return push_inst(compiler, INST_CODE_AND_DST(inst, DOUBLE_DATA, freg) | A(mem & REG_MASK) | B(OFFS_REG(mem))); + } + + inst = updated_data_transfer_insts[mem_flags]; + return push_inst(compiler, INST_CODE_AND_DST(inst, DOUBLE_DATA, freg) | A(mem & REG_MASK) | IMM(memw)); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(emit_const(compiler, dst_r, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); + + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + PTR_FAIL_IF(emit_const(compiler, dst_r, 0)); +#else + PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r)); + compiler->size += 4; +#endif + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); + + return put_label; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} diff --git a/contrib/libs/pcre2/src/sljit/sljitNativeRISCV_32.c b/contrib/libs/pcre2/src/sljit/sljitNativeRISCV_32.c new file mode 100644 index 0000000000..b38e6924c8 --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitNativeRISCV_32.c @@ -0,0 +1,73 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm, sljit_s32 tmp_r) +{ + SLJIT_UNUSED_ARG(tmp_r); + SLJIT_ASSERT(dst_r != tmp_r); + + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm)); + + if (imm & 0x800) + imm += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); + + if ((imm & 0xfff) == 0) + return SLJIT_SUCCESS; + + return push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins) +{ + if ((init_value & 0x800) != 0) + init_value += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(dst) | (sljit_ins)(init_value & ~0xfff))); + return push_inst(compiler, last_ins | RS1(dst) | IMM_I(init_value)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins*)addr; + SLJIT_UNUSED_ARG(executable_offset); + + if ((new_target & 0x800) != 0) + new_target += 0x1000; + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0); + + SLJIT_ASSERT((inst[0] & 0x7f) == LUI); + inst[0] = (inst[0] & 0xfff) | (sljit_ins)((sljit_sw)new_target & ~0xfff); + SLJIT_ASSERT((inst[1] & 0x707f) == ADDI || (inst[1] & 0x707f) == JALR); + inst[1] = (inst[1] & 0xfffff) | IMM_I(new_target); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 5); +} diff --git a/contrib/libs/pcre2/src/sljit/sljitNativeRISCV_64.c b/contrib/libs/pcre2/src/sljit/sljitNativeRISCV_64.c new file mode 100644 index 0000000000..32cec7848d --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitNativeRISCV_64.c @@ -0,0 +1,183 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm, sljit_s32 tmp_r) +{ + sljit_sw high; + + SLJIT_ASSERT(dst_r != tmp_r); + + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm)); + + if (imm <= 0x7fffffffl && imm >= S32_MIN) { + if (imm > S32_MAX) { + SLJIT_ASSERT((imm & 0x800) != 0); + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); + return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); + } + + if ((imm & 0x800) != 0) + imm += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); + + if ((imm & 0xfff) == 0) + return SLJIT_SUCCESS; + + return push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); + } + + /* Trailing zeroes could be used to produce shifted immediates. */ + + if (imm <= 0x7ffffffffffl && imm >= -0x80000000000l) { + high = imm >> 12; + + if (imm & 0x800) + high = ~high; + + if (high > S32_MAX) { + SLJIT_ASSERT((high & 0x800) != 0); + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); + FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(high))); + } else { + if ((high & 0x800) != 0) + high += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(high & ~0xfff))); + + if ((high & 0xfff) != 0) + FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(high))); + } + + FAIL_IF(push_inst(compiler, SLLI | RD(dst_r) | RS1(dst_r) | IMM_I(12))); + + if ((imm & 0xfff) != 0) + return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); + + return SLJIT_SUCCESS; + } + + high = imm >> 32; + imm = (sljit_s32)imm; + + if ((imm & 0x80000000l) != 0) + high = ~high; + + if (high <= 0x7ffff && high >= -0x80000) { + FAIL_IF(push_inst(compiler, LUI | RD(tmp_r) | (sljit_ins)(high << 12))); + high = 0x1000; + } else { + if ((high & 0x800) != 0) + high += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(tmp_r) | (sljit_ins)(high & ~0xfff))); + high &= 0xfff; + } + + if (imm <= SIMM_MAX && imm >= SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm))); + imm = 0; + } else if (imm > S32_MAX) { + SLJIT_ASSERT((imm & 0x800) != 0); + + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); + imm = 0x1000 | (imm & 0xfff); + } else { + if ((imm & 0x800) != 0) + imm += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); + imm &= 0xfff; + } + + if ((high & 0xfff) != 0) + FAIL_IF(push_inst(compiler, ADDI | RD(tmp_r) | RS1(tmp_r) | IMM_I(high))); + + if (imm & 0x1000) + FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm))); + else if (imm != 0) + FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm))); + + FAIL_IF(push_inst(compiler, SLLI | RD(tmp_r) | RS1(tmp_r) | IMM_I((high & 0x1000) ? 20 : 32))); + return push_inst(compiler, XOR | RD(dst_r) | RS1(dst_r) | RS2(tmp_r)); +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins) +{ + sljit_sw high; + + if ((init_value & 0x800) != 0) + init_value += 0x1000; + + high = init_value >> 32; + + if ((init_value & 0x80000000l) != 0) + high = ~high; + + if ((high & 0x800) != 0) + high += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(TMP_REG3) | (sljit_ins)(high & ~0xfff))); + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(high))); + FAIL_IF(push_inst(compiler, LUI | RD(dst) | (sljit_ins)(init_value & ~0xfff))); + FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(32))); + FAIL_IF(push_inst(compiler, XOR | RD(dst) | RS1(dst) | RS2(TMP_REG3))); + return push_inst(compiler, last_ins | RS1(dst) | IMM_I(init_value)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins*)addr; + sljit_sw high; + SLJIT_UNUSED_ARG(executable_offset); + + if ((new_target & 0x800) != 0) + new_target += 0x1000; + + high = (sljit_sw)new_target >> 32; + + if ((new_target & 0x80000000l) != 0) + high = ~high; + + if ((high & 0x800) != 0) + high += 0x1000; + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0); + + SLJIT_ASSERT((inst[0] & 0x7f) == LUI); + inst[0] = (inst[0] & 0xfff) | (sljit_ins)(high & ~0xfff); + SLJIT_ASSERT((inst[1] & 0x707f) == ADDI); + inst[1] = (inst[1] & 0xfffff) | IMM_I(high); + SLJIT_ASSERT((inst[2] & 0x7f) == LUI); + inst[2] = (inst[2] & 0xfff) | (sljit_ins)((sljit_sw)new_target & ~0xfff); + SLJIT_ASSERT((inst[5] & 0x707f) == ADDI || (inst[5] & 0x707f) == JALR); + inst[5] = (inst[5] & 0xfffff) | IMM_I(new_target); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1); + + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 5); +} diff --git a/contrib/libs/pcre2/src/sljit/sljitNativeRISCV_common.c b/contrib/libs/pcre2/src/sljit/sljitNativeRISCV_common.c new file mode 100644 index 0000000000..58a48c649c --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitNativeRISCV_common.c @@ -0,0 +1,2762 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + return "RISC-V-32" SLJIT_CPUINFO; +#else /* !SLJIT_CONFIG_RISCV_32 */ + return "RISC-V-64" SLJIT_CPUINFO; +#endif /* SLJIT_CONFIG_RISCV_32 */ +} + +/* Length of an instruction word + Both for riscv-32 and riscv-64 */ +typedef sljit_u32 sljit_ins; + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_ZERO 0 + +/* Flags are kept in volatile registers. */ +#define EQUAL_FLAG (SLJIT_NUMBER_OF_REGISTERS + 5) +#define RETURN_ADDR_REG TMP_REG2 +#define OTHER_FLAG (SLJIT_NUMBER_OF_REGISTERS + 6) + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { + 0, 10, 11, 12, 13, 14, 15, 16, 17, 29, 30, 31, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 9, 8, 2, 6, 1, 7, 5, 28 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 10, 11, 12, 13, 14, 15, 16, 17, 2, 3, 4, 5, 6, 7, 28, 29, 30, 31, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 9, 8, 0, 1, +}; + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define RD(rd) ((sljit_ins)reg_map[rd] << 7) +#define RS1(rs1) ((sljit_ins)reg_map[rs1] << 15) +#define RS2(rs2) ((sljit_ins)reg_map[rs2] << 20) +#define FRD(rd) ((sljit_ins)freg_map[rd] << 7) +#define FRS1(rs1) ((sljit_ins)freg_map[rs1] << 15) +#define FRS2(rs2) ((sljit_ins)freg_map[rs2] << 20) +#define IMM_I(imm) ((sljit_ins)(imm) << 20) +#define IMM_S(imm) ((((sljit_ins)(imm) & 0xfe0) << 20) | (((sljit_ins)(imm) & 0x1f) << 7)) + +/* Represents funct(i) parts of the instructions. */ +#define OPC(o) ((sljit_ins)(o)) +#define F3(f) ((sljit_ins)(f) << 12) +#define F12(f) ((sljit_ins)(f) << 20) +#define F7(f) ((sljit_ins)(f) << 25) + +#define ADD (F7(0x0) | F3(0x0) | OPC(0x33)) +#define ADDI (F3(0x0) | OPC(0x13)) +#define AND (F7(0x0) | F3(0x7) | OPC(0x33)) +#define ANDI (F3(0x7) | OPC(0x13)) +#define AUIPC (OPC(0x17)) +#define BEQ (F3(0x0) | OPC(0x63)) +#define BNE (F3(0x1) | OPC(0x63)) +#define BLT (F3(0x4) | OPC(0x63)) +#define BGE (F3(0x5) | OPC(0x63)) +#define BLTU (F3(0x6) | OPC(0x63)) +#define BGEU (F3(0x7) | OPC(0x63)) +#define DIV (F7(0x1) | F3(0x4) | OPC(0x33)) +#define DIVU (F7(0x1) | F3(0x5) | OPC(0x33)) +#define EBREAK (F12(0x1) | F3(0x0) | OPC(0x73)) +#define FADD_S (F7(0x0) | F3(0x7) | OPC(0x53)) +#define FDIV_S (F7(0xc) | F3(0x7) | OPC(0x53)) +#define FEQ_S (F7(0x50) | F3(0x2) | OPC(0x53)) +#define FLD (F3(0x3) | OPC(0x7)) +#define FLE_S (F7(0x50) | F3(0x0) | OPC(0x53)) +#define FLT_S (F7(0x50) | F3(0x1) | OPC(0x53)) +#define FSD (F3(0x3) | OPC(0x27)) +/* These conversion opcodes are partly defined. */ +#define FCVT_S_D (F7(0x20) | OPC(0x53)) +#define FCVT_S_W (F7(0x68) | OPC(0x53)) +#define FCVT_W_S (F7(0x60) | F3(0x1) | OPC(0x53)) +#define FMUL_S (F7(0x8) | F3(0x7) | OPC(0x53)) +#define FSGNJ_S (F7(0x10) | F3(0x0) | OPC(0x53)) +#define FSGNJN_S (F7(0x10) | F3(0x1) | OPC(0x53)) +#define FSGNJX_S (F7(0x10) | F3(0x2) | OPC(0x53)) +#define FSUB_S (F7(0x4) | F3(0x7) | OPC(0x53)) +#define JAL (OPC(0x6f)) +#define JALR (F3(0x0) | OPC(0x67)) +#define LD (F3(0x3) | OPC(0x3)) +#define LUI (OPC(0x37)) +#define LW (F3(0x2) | OPC(0x3)) +#define MUL (F7(0x1) | F3(0x0) | OPC(0x33)) +#define MULH (F7(0x1) | F3(0x1) | OPC(0x33)) +#define MULHU (F7(0x1) | F3(0x3) | OPC(0x33)) +#define OR (F7(0x0) | F3(0x6) | OPC(0x33)) +#define ORI (F3(0x6) | OPC(0x13)) +#define REM (F7(0x1) | F3(0x6) | OPC(0x33)) +#define REMU (F7(0x1) | F3(0x7) | OPC(0x33)) +#define SD (F3(0x3) | OPC(0x23)) +#define SLL (F7(0x0) | F3(0x1) | OPC(0x33)) +#define SLLI (IMM_I(0x0) | F3(0x1) | OPC(0x13)) +#define SLT (F7(0x0) | F3(0x2) | OPC(0x33)) +#define SLTI (F3(0x2) | OPC(0x13)) +#define SLTU (F7(0x0) | F3(0x3) | OPC(0x33)) +#define SLTUI (F3(0x3) | OPC(0x13)) +#define SRL (F7(0x0) | F3(0x5) | OPC(0x33)) +#define SRLI (IMM_I(0x0) | F3(0x5) | OPC(0x13)) +#define SRA (F7(0x20) | F3(0x5) | OPC(0x33)) +#define SRAI (IMM_I(0x400) | F3(0x5) | OPC(0x13)) +#define SUB (F7(0x20) | F3(0x0) | OPC(0x33)) +#define SW (F3(0x2) | OPC(0x23)) +#define XOR (F7(0x0) | F3(0x4) | OPC(0x33)) +#define XORI (F3(0x4) | OPC(0x13)) + +#define SIMM_MAX (0x7ff) +#define SIMM_MIN (-0x800) +#define BRANCH_MAX (0xfff) +#define BRANCH_MIN (-0x1000) +#define JUMP_MAX (0xfffff) +#define JUMP_MIN (-0x100000) + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) +#define S32_MAX (0x7ffff7ffl) +#define S32_MIN (-0x80000000l) +#define S44_MAX (0x7fffffff7ffl) +#define S52_MAX (0x7ffffffffffffl) +#endif + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +static sljit_s32 push_imm_s_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_sw imm) +{ + return push_inst(compiler, ins | IMM_S(imm)); +} + +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_sw diff; + sljit_uw target_addr; + sljit_ins *inst; + + inst = (sljit_ins *)jump->addr; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + goto exit; + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + } + + diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset; + + if (jump->flags & IS_COND) { + inst--; + diff += SSIZE_OF(ins); + + if (diff >= BRANCH_MIN && diff <= BRANCH_MAX) { + jump->flags |= PATCH_B; + inst[0] = (inst[0] & 0x1fff07f) ^ 0x1000; + jump->addr = (sljit_uw)inst; + return inst; + } + + inst++; + diff -= SSIZE_OF(ins); + } + + if (diff >= JUMP_MIN && diff <= JUMP_MAX) { + if (jump->flags & IS_COND) { +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + inst[-1] -= (sljit_ins)(1 * sizeof(sljit_ins)) << 7; +#else + inst[-1] -= (sljit_ins)(5 * sizeof(sljit_ins)) << 7; +#endif + } + + jump->flags |= PATCH_J; + return inst; + } + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (diff >= S32_MIN && diff <= S32_MAX) { + if (jump->flags & IS_COND) + inst[-1] -= (sljit_ins)(4 * sizeof(sljit_ins)) << 7; + + jump->flags |= PATCH_REL32; + inst[1] = inst[0]; + return inst + 1; + } + + if (target_addr <= (sljit_uw)S32_MAX) { + if (jump->flags & IS_COND) + inst[-1] -= (sljit_ins)(4 * sizeof(sljit_ins)) << 7; + + jump->flags |= PATCH_ABS32; + inst[1] = inst[0]; + return inst + 1; + } + + if (target_addr <= S44_MAX) { + if (jump->flags & IS_COND) + inst[-1] -= (sljit_ins)(2 * sizeof(sljit_ins)) << 7; + + jump->flags |= PATCH_ABS44; + inst[3] = inst[0]; + return inst + 3; + } + + if (target_addr <= S52_MAX) { + if (jump->flags & IS_COND) + inst[-1] -= (sljit_ins)(1 * sizeof(sljit_ins)) << 7; + + jump->flags |= PATCH_ABS52; + inst[4] = inst[0]; + return inst + 4; + } +#endif + +exit: +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + inst[1] = inst[0]; + return inst + 1; +#else + inst[5] = inst[0]; + return inst + 5; +#endif +} + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + +static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +{ + if (max_label <= (sljit_uw)S32_MAX) { + put_label->flags = PATCH_ABS32; + return 1; + } + + if (max_label <= S44_MAX) { + put_label->flags = PATCH_ABS44; + return 3; + } + + if (max_label <= S52_MAX) { + put_label->flags = PATCH_ABS52; + return 4; + } + + put_label->flags = 0; + return 5; +} + +#endif /* SLJIT_CONFIG_RISCV_64 */ + +static SLJIT_INLINE void load_addr_to_reg(void *dst, sljit_u32 reg) +{ + struct sljit_jump *jump = NULL; + struct sljit_put_label *put_label; + sljit_uw flags; + sljit_ins *inst; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_sw high; +#endif + sljit_uw addr; + + if (reg != 0) { + jump = (struct sljit_jump*)dst; + flags = jump->flags; + inst = (sljit_ins*)jump->addr; + addr = (flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + } else { + put_label = (struct sljit_put_label*)dst; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + flags = put_label->flags; +#endif + inst = (sljit_ins*)put_label->addr; + addr = put_label->label->addr; + reg = *inst; + } + + if ((addr & 0x800) != 0) + addr += 0x1000; + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + inst[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); +#else /* !SLJIT_CONFIG_RISCV_32 */ + + if (flags & PATCH_ABS32) { + SLJIT_ASSERT(addr <= S32_MAX); + inst[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); + } else if (flags & PATCH_ABS44) { + high = (sljit_sw)addr >> 12; + SLJIT_ASSERT((sljit_uw)high <= 0x7fffffff); + + if (high > S32_MAX) { + SLJIT_ASSERT((high & 0x800) != 0); + inst[0] = LUI | RD(reg) | (sljit_ins)0x80000000u; + inst[1] = XORI | RD(reg) | RS1(reg) | IMM_I(high); + } else { + if ((high & 0x800) != 0) + high += 0x1000; + + inst[0] = LUI | RD(reg) | (sljit_ins)(high & ~0xfff); + inst[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(high); + } + + inst[2] = SLLI | RD(reg) | RS1(reg) | IMM_I(12); + inst += 2; + } else { + high = (sljit_sw)addr >> 32; + + if ((addr & 0x80000000l) != 0) + high = ~high; + + if ((high & 0x800) != 0) + high += 0x1000; + + if (flags & PATCH_ABS52) { + SLJIT_ASSERT(addr <= S52_MAX); + inst[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high << 12); + } else { + inst[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high & ~0xfff); + inst[1] = ADDI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(high); + inst++; + } + + inst[1] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); + inst[2] = SLLI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I((flags & PATCH_ABS52) ? 20 : 32); + inst[3] = XOR | RD(reg) | RS1(reg) | RS2(TMP_REG3); + inst += 3; + } +#endif /* !SLJIT_CONFIG_RISCV_32 */ + + if (jump != NULL) { + SLJIT_ASSERT((inst[1] & 0x707f) == JALR); + inst[1] = (inst[1] & 0xfffff) | IMM_I(addr); + } else + inst[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(addr); +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw next_addr; + sljit_sw executable_offset; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_put_label *put_label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + next_addr = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + put_label = compiler->put_labels; + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + if (next_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + SLJIT_ASSERT(!put_label || put_label->addr >= word_count); + + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + word_count += 1; +#else + word_count += 5; +#endif + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code, executable_offset); + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + if (put_label && put_label->addr == word_count) { + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)code_ptr; +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + code_ptr += 1; + word_count += 1; +#else + code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); + word_count += 5; +#endif + put_label = put_label->next; + } + next_addr = compute_next_addr(label, jump, const_, put_label); + } + code_ptr++; + word_count++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)code_ptr; + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(!put_label); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + if (!(jump->flags & (PATCH_B | PATCH_J | PATCH_REL32))) { + load_addr_to_reg(jump, TMP_REG1); + break; + } + + addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + buf_ptr = (sljit_ins *)jump->addr; + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); + + if (jump->flags & PATCH_B) { + SLJIT_ASSERT((sljit_sw)addr >= BRANCH_MIN && (sljit_sw)addr <= BRANCH_MAX); + addr = ((addr & 0x800) >> 4) | ((addr & 0x1e) << 7) | ((addr & 0x7e0) << 20) | ((addr & 0x1000) << 19); + buf_ptr[0] |= (sljit_ins)addr; + break; + } + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (jump->flags & PATCH_REL32) { + SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX); + + if ((addr & 0x800) != 0) + addr += 0x1000; + + buf_ptr[0] = AUIPC | RD(TMP_REG1) | (sljit_ins)((sljit_sw)addr & ~0xfff); + SLJIT_ASSERT((buf_ptr[1] & 0x707f) == JALR); + buf_ptr[1] |= IMM_I(addr); + break; + } +#endif + + SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX); + addr = (addr & 0xff000) | ((addr & 0x800) << 9) | ((addr & 0x7fe) << 20) | ((addr & 0x100000) << 11); + buf_ptr[0] = JAL | RD((jump->flags & IS_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | (sljit_ins)addr; + } while (0); + jump = jump->next; + } + + put_label = compiler->put_labels; + while (put_label) { + load_addr_to_reg(put_label, 0); + put_label = put_label->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: + case SLJIT_HAS_ZERO_REGISTER: + return 1; + default: + return 0; + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + return (type >= SLJIT_ORDERED_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL); +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define WORD_DATA 0x00 +#define BYTE_DATA 0x02 +#define HALF_DATA 0x04 +#define INT_DATA 0x06 +#define SIGNED_DATA 0x08 +/* Separates integer and floating point registers */ +#define GPR_REG 0x0f +#define DOUBLE_DATA 0x10 +#define SINGLE_DATA 0x12 + +#define MEM_MASK 0x1f + +#define ARG_TEST 0x00020 +#define ALT_KEEP_CACHE 0x00040 +#define CUMULATIVE_OP 0x00080 +#define IMM_OP 0x00100 +#define MOVE_OP 0x00200 +#define SRC2_IMM 0x00400 + +#define UNUSED_DEST 0x00800 +#define REG_DEST 0x01000 +#define REG1_SOURCE 0x02000 +#define REG2_SOURCE 0x04000 +#define SLOW_SRC1 0x08000 +#define SLOW_SRC2 0x10000 +#define SLOW_DEST 0x20000 + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +#define STACK_STORE SW +#define STACK_LOAD LW +#else +#define STACK_STORE SD +#define STACK_LOAD LD +#endif + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +#include "sljitNativeRISCV_32.c" +#else +#include "sljitNativeRISCV_64.c" +#endif + +#define STACK_MAX_DISTANCE (-SIMM_MIN) + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw); + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 i, tmp, offset; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((local_size & SSIZE_OF(sw)) != 0) + local_size += SSIZE_OF(sw); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + } +#else + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); +#endif + local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; + compiler->local_size = local_size; + + if (local_size <= STACK_MAX_DISTANCE) { + /* Frequent case. */ + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(-local_size))); + offset = local_size - SSIZE_OF(sw); + local_size = 0; + } else { + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(STACK_MAX_DISTANCE))); + local_size -= STACK_MAX_DISTANCE; + + if (local_size > STACK_MAX_DISTANCE) + FAIL_IF(load_immediate(compiler, TMP_REG1, local_size, TMP_REG3)); + offset = STACK_MAX_DISTANCE - SSIZE_OF(sw); + } + + FAIL_IF(push_imm_s_inst(compiler, STACK_STORE | RS1(SLJIT_SP) | RS2(RETURN_ADDR_REG), offset)); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_imm_s_inst(compiler, STACK_STORE | RS1(SLJIT_SP) | RS2(i), offset)); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_imm_s_inst(compiler, STACK_STORE | RS1(SLJIT_SP) | RS2(i), offset)); + } + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + /* This alignment is valid because offset is not used after storing FPU regs. */ + if ((offset & SSIZE_OF(sw)) != 0) + offset -= SSIZE_OF(sw); +#endif + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_imm_s_inst(compiler, FSD | RS1(SLJIT_SP) | FRS2(i), offset)); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_imm_s_inst(compiler, FSD | RS1(SLJIT_SP) | FRS2(i), offset)); + } + + if (local_size > STACK_MAX_DISTANCE) + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RS1(SLJIT_SP) | RS2(TMP_REG1))); + else if (local_size > 0) + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(-local_size))); + + if (options & SLJIT_ENTER_REG_ARG) + return SLJIT_SUCCESS; + + arg_types >>= SLJIT_ARG_SHIFT; + saved_arg_count = 0; + tmp = SLJIT_R0; + + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_S0 - saved_arg_count) | RS1(tmp) | IMM_I(0))); + saved_arg_count++; + } + tmp++; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#undef STACK_MAX_DISTANCE + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((local_size & SSIZE_OF(sw)) != 0) + local_size += SSIZE_OF(sw); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + } +#else + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); +#endif + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; + + return SLJIT_SUCCESS; +} + +#define STACK_MAX_DISTANCE (-SIMM_MIN - 16) + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to) +{ + sljit_s32 i, tmp, offset; + sljit_s32 local_size = compiler->local_size; + + if (local_size > STACK_MAX_DISTANCE) { + local_size -= STACK_MAX_DISTANCE; + + if (local_size > STACK_MAX_DISTANCE) { + FAIL_IF(load_immediate(compiler, TMP_REG2, local_size, TMP_REG3)); + FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RS1(SLJIT_SP) | RS2(TMP_REG2))); + } else + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(local_size))); + + local_size = STACK_MAX_DISTANCE; + } + + SLJIT_ASSERT(local_size > 0); + + offset = local_size - SSIZE_OF(sw); + if (!is_return_to) + FAIL_IF(push_inst(compiler, STACK_LOAD | RD(RETURN_ADDR_REG) | RS1(SLJIT_SP) | IMM_I(offset))); + + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RS1(SLJIT_SP) | IMM_I(offset))); + } + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RS1(SLJIT_SP) | IMM_I(offset))); + } + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + /* This alignment is valid because offset is not used after storing FPU regs. */ + if ((offset & SSIZE_OF(sw)) != 0) + offset -= SSIZE_OF(sw); +#endif + + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, FLD | FRD(i) | RS1(SLJIT_SP) | IMM_I(offset))); + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, FLD | FRD(i) | RS1(SLJIT_SP) | IMM_I(offset))); + } + + return push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(local_size)); +} + +#undef STACK_MAX_DISTANCE + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + return push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(RETURN_ADDR_REG) | IMM_I(0)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(src) | IMM_I(0))); + src = TMP_REG1; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +#define ARCH_32_64(a, b) a +#else +#define ARCH_32_64(a, b) b +#endif + +static const sljit_ins data_transfer_insts[16 + 4] = { +/* u w s */ ARCH_32_64(F3(0x2) | OPC(0x23) /* sw */, F3(0x3) | OPC(0x23) /* sd */), +/* u w l */ ARCH_32_64(F3(0x2) | OPC(0x3) /* lw */, F3(0x3) | OPC(0x3) /* ld */), +/* u b s */ F3(0x0) | OPC(0x23) /* sb */, +/* u b l */ F3(0x4) | OPC(0x3) /* lbu */, +/* u h s */ F3(0x1) | OPC(0x23) /* sh */, +/* u h l */ F3(0x5) | OPC(0x3) /* lhu */, +/* u i s */ F3(0x2) | OPC(0x23) /* sw */, +/* u i l */ ARCH_32_64(F3(0x2) | OPC(0x3) /* lw */, F3(0x6) | OPC(0x3) /* lwu */), + +/* s w s */ ARCH_32_64(F3(0x2) | OPC(0x23) /* sw */, F3(0x3) | OPC(0x23) /* sd */), +/* s w l */ ARCH_32_64(F3(0x2) | OPC(0x3) /* lw */, F3(0x3) | OPC(0x3) /* ld */), +/* s b s */ F3(0x0) | OPC(0x23) /* sb */, +/* s b l */ F3(0x0) | OPC(0x3) /* lb */, +/* s h s */ F3(0x1) | OPC(0x23) /* sh */, +/* s h l */ F3(0x1) | OPC(0x3) /* lh */, +/* s i s */ F3(0x2) | OPC(0x23) /* sw */, +/* s i l */ F3(0x2) | OPC(0x3) /* lw */, + +/* d s */ F3(0x3) | OPC(0x27) /* fsd */, +/* d l */ F3(0x3) | OPC(0x7) /* fld */, +/* s s */ F3(0x2) | OPC(0x27) /* fsw */, +/* s l */ F3(0x2) | OPC(0x7) /* flw */, +}; + +#undef ARCH_32_64 + +static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 base, sljit_sw offset) +{ + sljit_ins ins; + + SLJIT_ASSERT(FAST_IS_REG(base) && offset <= 0xfff && offset >= SIMM_MIN); + + ins = data_transfer_insts[flags & MEM_MASK] | RS1(base); + if (flags & LOAD_DATA) + ins |= ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) | IMM_I(offset); + else + ins |= ((flags & MEM_MASK) <= GPR_REG ? RS2(reg) : FRS2(reg)) | IMM_S(offset); + + return push_inst(compiler, ins); +} + +/* Can perform an operation using at most 1 instruction. */ +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) { + /* Works for both absoulte and relative addresses. */ + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + + FAIL_IF(push_mem_inst(compiler, flags, reg, arg & REG_MASK, argw)); + return -1; + } + return 0; +} + +#define TO_ARGW_HI(argw) (((argw) & ~0xfff) + (((argw) & 0x800) ? 0x1000 : 0)) + +/* See getput_arg below. + Note: can_cache is called only for binary operators. */ +static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); + + /* Simple operation except for updates. */ + if (arg & OFFS_REG_MASK) { + argw &= 0x3; + next_argw &= 0x3; + if (argw && argw == next_argw && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK))) + return 1; + return 0; + } + + if (arg == next_arg) { + if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) + || TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw)) + return 1; + return 0; + } + + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + sljit_s32 base = arg & REG_MASK; + sljit_s32 tmp_r = TMP_REG1; + sljit_sw offset, argw_hi; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + /* Since tmp can be the same as base or offset registers, + * these might be unavailable after modifying tmp. */ + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) + tmp_r = reg; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + /* Using the cache. */ + if (argw == compiler->cache_argw) { + if (arg == compiler->cache_arg) + return push_mem_inst(compiler, flags, reg, TMP_REG3, 0); + + if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RS1(TMP_REG3) | RS2(base))); + return push_mem_inst(compiler, flags, reg, TMP_REG3, 0); + } + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(TMP_REG3))); + return push_mem_inst(compiler, flags, reg, tmp_r, 0); + } + } + + if (SLJIT_UNLIKELY(argw)) { + compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG3) | RS1(OFFS_REG(arg)) | IMM_I(argw))); + } + + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RS1(base) | RS2(!argw ? OFFS_REG(arg) : TMP_REG3))); + tmp_r = TMP_REG3; + } + else + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(!argw ? OFFS_REG(arg) : TMP_REG3))); + return push_mem_inst(compiler, flags, reg, tmp_r, 0); + } + + if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) + return push_mem_inst(compiler, flags, reg, TMP_REG3, argw - compiler->cache_argw); + + if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw <= SIMM_MAX) && (argw - compiler->cache_argw >= SIMM_MIN)) { + offset = argw - compiler->cache_argw; + } else { + compiler->cache_arg = SLJIT_MEM; + + argw_hi = TO_ARGW_HI(argw); + + if (next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN && argw_hi != TO_ARGW_HI(next_argw)) { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw, tmp_r)); + compiler->cache_argw = argw; + offset = 0; + } else { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi, tmp_r)); + compiler->cache_argw = argw_hi; + offset = argw & 0xfff; + argw = argw_hi; + } + } + + if (!base) + return push_mem_inst(compiler, flags, reg, TMP_REG3, offset); + + if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) { + compiler->cache_arg = arg; + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RS1(TMP_REG3) | RS2(base))); + return push_mem_inst(compiler, flags, reg, TMP_REG3, offset); + } + + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(TMP_REG3) | RS2(base))); + return push_mem_inst(compiler, flags, reg, tmp_r, offset); +} + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + sljit_s32 base = arg & REG_MASK; + sljit_s32 tmp_r = TMP_REG1; + + if (getput_arg_fast(compiler, flags, reg, arg, argw)) + return compiler->error; + + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) + tmp_r = reg; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (SLJIT_UNLIKELY(argw)) { + FAIL_IF(push_inst(compiler, SLLI | RD(tmp_r) | RS1(OFFS_REG(arg)) | IMM_I(argw))); + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(tmp_r) | RS2(base))); + } + else + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(OFFS_REG(arg)))); + + argw = 0; + } else { + FAIL_IF(load_immediate(compiler, tmp_r, TO_ARGW_HI(argw), TMP_REG3)); + + if (base != 0) + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(tmp_r) | RS2(base))); + } + + return push_mem_inst(compiler, flags, reg, tmp_r, argw & 0xfff); +} + +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +#define WORD 0 +#define IMM_EXTEND(v) (IMM_I(v)) +#else /* !SLJIT_CONFIG_RISCV_32 */ +#define WORD word +#define IMM_EXTEND(v) (IMM_I((op & SLJIT_32) ? (v) : (32 + (v)))) +#endif /* SLJIT_CONFIG_RISCV_32 */ + +static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src) +{ + sljit_s32 is_clz = (GET_OPCODE(op) == SLJIT_CLZ); +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (op & SLJIT_32) >> 5; + sljit_ins max = (op & SLJIT_32) ? 32 : 64; +#else /* !SLJIT_CONFIG_RISCV_64 */ + sljit_ins max = 32; +#endif /* SLJIT_CONFIG_RISCV_64 */ + + SLJIT_ASSERT(WORD == 0 || WORD == 0x8); + + /* The OTHER_FLAG is the counter. */ + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(OTHER_FLAG) | RS1(TMP_ZERO) | IMM_I(max))); + + /* The TMP_REG2 is the next value. */ + if (src != TMP_REG2) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(TMP_REG2) | RS1(src) | IMM_I(0))); + + FAIL_IF(push_inst(compiler, BEQ | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)((is_clz ? 4 : 5) * SSIZE_OF(ins)) << 7) | ((sljit_ins)(8 * SSIZE_OF(ins)) << 20))); + + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(OTHER_FLAG) | RS1(TMP_ZERO) | IMM_I(0))); + if (!is_clz) { + FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG1) | RS1(TMP_REG2) | IMM_I(1))); + FAIL_IF(push_inst(compiler, BNE | RS1(TMP_REG1) | RS2(TMP_ZERO) | ((sljit_ins)(2 * SSIZE_OF(ins)) << 7) | ((sljit_ins)(8 * SSIZE_OF(ins)) << 20))); + } else + FAIL_IF(push_inst(compiler, BLT | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)(2 * SSIZE_OF(ins)) << 7) | ((sljit_ins)(8 * SSIZE_OF(ins)) << 20))); + + /* The TMP_REG1 is the next shift. */ + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(TMP_REG1) | RS1(TMP_ZERO) | IMM_I(max))); + + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(TMP_REG2) | IMM_I(0))); + FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_I(1))); + + FAIL_IF(push_inst(compiler, (is_clz ? SRL : SLL) | WORD | RD(TMP_REG2) | RS1(EQUAL_FLAG) | RS2(TMP_REG1))); + FAIL_IF(push_inst(compiler, BNE | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)0xfe000e80 - ((2 * SSIZE_OF(ins)) << 7)))); + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(TMP_REG2) | RS1(TMP_REG1) | IMM_I(-1))); + FAIL_IF(push_inst(compiler, (is_clz ? SRL : SLL) | WORD | RD(TMP_REG2) | RS1(EQUAL_FLAG) | RS2(TMP_REG2))); + FAIL_IF(push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(TMP_REG1))); + FAIL_IF(push_inst(compiler, BEQ | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)0xfe000e80 - ((5 * SSIZE_OF(ins)) << 7)))); + + return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(OTHER_FLAG) | IMM_I(0)); +} + +#define EMIT_LOGICAL(op_imm, op_reg) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_imm | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2))); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_imm | RD(dst) | RS1(src1) | IMM_I(src2))); \ + } \ + else { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RS1(src1) | RS2(src2))); \ + } + +#define EMIT_SHIFT(imm, reg) \ + op_imm = (imm); \ + op_reg = (reg); + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_sw src2) +{ + sljit_s32 is_overflow, is_carry, carry_src_r, is_handled; + sljit_ins op_imm, op_reg; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (op & SLJIT_32) >> 5; +#endif /* SLJIT_CONFIG_RISCV_64 */ + + SLJIT_ASSERT(WORD == 0 || WORD == 0x8); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (dst != src2) + return push_inst(compiler, ADDI | RD(dst) | RS1(src2) | IMM_I(0)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | RD(dst) | RS1(src2) | IMM_I(0xff)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(24))); + return push_inst(compiler, SRAI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(24)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(16))); + return push_inst(compiler, SRLI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(16)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(16))); + return push_inst(compiler, SRAI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(16)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + case SLJIT_MOV_U32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(src2) | IMM_I(32))); + return push_inst(compiler, SRLI | RD(dst) | RS1(dst) | IMM_I(32)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ADDI | 0x8 | RD(dst) | RS1(src2) | IMM_I(0)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; +#endif /* SLJIT_CONFIG_RISCV_64 */ + + case SLJIT_CLZ: + case SLJIT_CTZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + return emit_clz_ctz(compiler, op, dst, src2); + + case SLJIT_ADD: + /* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */ + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + carry_src_r = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0))); + else + FAIL_IF(push_inst(compiler, XORI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-1))); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2))); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADD | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + + if (is_overflow || carry_src_r != 0) { + if (src1 != dst) + carry_src_r = (sljit_s32)src1; + else if (src2 != dst) + carry_src_r = (sljit_s32)src2; + else { + FAIL_IF(push_inst(compiler, ADDI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(0))); + carry_src_r = OTHER_FLAG; + } + } + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(src1) | RS2(src2))); + } + + /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ + if (is_overflow || carry_src_r != 0) { + if (flags & SRC2_IMM) + FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(dst) | IMM_I(src2))); + else + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(dst) | RS2(carry_src_r))); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RS1(dst) | RS2(EQUAL_FLAG))); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(0))); + FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_EXTEND(31))); + return push_inst(compiler, XOR | RD(OTHER_FLAG) | RS1(TMP_REG1) | RS2(OTHER_FLAG)); + + case SLJIT_ADDC: + carry_src_r = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2))); + } else { + if (carry_src_r != 0) { + if (src1 != dst) + carry_src_r = (sljit_s32)src1; + else if (src2 != dst) + carry_src_r = (sljit_s32)src2; + else { + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0))); + carry_src_r = EQUAL_FLAG; + } + } + + FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(src1) | RS2(src2))); + } + + /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ + if (carry_src_r != 0) { + if (flags & SRC2_IMM) + FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(src2))); + else + FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RS1(dst) | RS2(carry_src_r))); + } + + FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(dst) | RS2(OTHER_FLAG))); + + if (carry_src_r == 0) + return SLJIT_SUCCESS; + + /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */ + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(dst) | RS2(OTHER_FLAG))); + /* Set carry flag. */ + return push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(EQUAL_FLAG)); + + case SLJIT_SUB: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(TMP_ZERO) | IMM_I(src2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_handled = 0; + + if (flags & SRC2_IMM) { + if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2))); + is_handled = 1; + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2))); + is_handled = 1; + } + } + + if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + is_handled = 1; + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(TMP_ZERO) | IMM_I(src2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_LESS: + case SLJIT_GREATER_EQUAL: + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src1) | RS2(src2))); + break; + case SLJIT_GREATER: + case SLJIT_LESS_EQUAL: + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src2) | RS2(src1))); + break; + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER_EQUAL: + FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RS1(src1) | RS2(src2))); + break; + case SLJIT_SIG_GREATER: + case SLJIT_SIG_LESS_EQUAL: + FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RS1(src2) | RS2(src1))); + break; + } + } + + if (is_handled) { + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-src2))); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2)); + } + else { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SUB | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, SUB | WORD | RD(dst) | RS1(src1) | RS2(src2)); + } + return SLJIT_SUCCESS; + } + + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0))); + else + FAIL_IF(push_inst(compiler, XORI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-1))); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-src2))); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2))); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SUB | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src1) | RS2(src2))); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SUB | WORD | RD(dst) | RS1(src1) | RS2(src2))); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RS1(dst) | RS2(EQUAL_FLAG))); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(0))); + FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_EXTEND(31))); + return push_inst(compiler, XOR | RD(OTHER_FLAG) | RS1(TMP_REG1) | RS2(OTHER_FLAG)); + + case SLJIT_SUBC: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(TMP_ZERO) | IMM_I(src2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2))); + + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2))); + } + else { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + + FAIL_IF(push_inst(compiler, SUB | WORD | RD(dst) | RS1(src1) | RS2(src2))); + } + + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | RD(TMP_REG1) | RS1(dst) | RS2(OTHER_FLAG))); + + FAIL_IF(push_inst(compiler, SUB | WORD | RD(dst) | RS1(dst) | RS2(OTHER_FLAG))); + + if (!is_carry) + return SLJIT_SUCCESS; + + return push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(EQUAL_FLAG) | RS2(TMP_REG1)); + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & SRC2_IMM)); + + if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) + return push_inst(compiler, MUL | WORD | RD(dst) | RS1(src1) | RS2(src2)); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (word) { + FAIL_IF(push_inst(compiler, MUL | RD(OTHER_FLAG) | RS1(src1) | RS2(src2))); + FAIL_IF(push_inst(compiler, MUL | 0x8 | RD(dst) | RS1(src1) | RS2(src2))); + return push_inst(compiler, SUB | RD(OTHER_FLAG) | RS1(dst) | RS2(OTHER_FLAG)); + } +#endif /* SLJIT_CONFIG_RISCV_64 */ + + FAIL_IF(push_inst(compiler, MULH | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + FAIL_IF(push_inst(compiler, MUL | RD(dst) | RS1(src1) | RS2(src2))); +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + FAIL_IF(push_inst(compiler, SRAI | RD(OTHER_FLAG) | RS1(dst) | IMM_I(31))); +#else /* !SLJIT_CONFIG_RISCV_32 */ + FAIL_IF(push_inst(compiler, SRAI | RD(OTHER_FLAG) | RS1(dst) | IMM_I(63))); +#endif /* SLJIT_CONFIG_RISCV_32 */ + return push_inst(compiler, SUB | RD(OTHER_FLAG) | RS1(EQUAL_FLAG) | RS2(OTHER_FLAG)); + + case SLJIT_AND: + EMIT_LOGICAL(ANDI, AND); + return SLJIT_SUCCESS; + + case SLJIT_OR: + EMIT_LOGICAL(ORI, OR); + return SLJIT_SUCCESS; + + case SLJIT_XOR: + EMIT_LOGICAL(XORI, XOR); + return SLJIT_SUCCESS; + + case SLJIT_SHL: + case SLJIT_MSHL: + EMIT_SHIFT(SLLI, SLL); + break; + + case SLJIT_LSHR: + case SLJIT_MLSHR: + EMIT_SHIFT(SRLI, SRL); + break; + + case SLJIT_ASHR: + case SLJIT_MASHR: + EMIT_SHIFT(SRAI, SRA); + break; + + case SLJIT_ROTL: + case SLJIT_ROTR: + if (flags & SRC2_IMM) { + SLJIT_ASSERT(src2 != 0); + + op_imm = (GET_OPCODE(op) == SLJIT_ROTL) ? SLLI : SRLI; + FAIL_IF(push_inst(compiler, op_imm | WORD | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2))); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + src2 = ((op & SLJIT_32) ? 32 : 64) - src2; +#else /* !SLJIT_CONFIG_RISCV_64 */ + src2 = 32 - src2; +#endif /* SLJIT_CONFIG_RISCV_64 */ + op_imm = (GET_OPCODE(op) == SLJIT_ROTL) ? SRLI : SLLI; + FAIL_IF(push_inst(compiler, op_imm | WORD | RD(dst) | RS1(src1) | IMM_I(src2))); + return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(OTHER_FLAG)); + } + + if (src2 == TMP_ZERO) { + if (dst != src1) + return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(0)); + return SLJIT_SUCCESS; + } + + FAIL_IF(push_inst(compiler, SUB | WORD | RD(EQUAL_FLAG) | RS1(TMP_ZERO) | RS2(src2))); + op_reg = (GET_OPCODE(op) == SLJIT_ROTL) ? SLL : SRL; + FAIL_IF(push_inst(compiler, op_reg | WORD | RD(OTHER_FLAG) | RS1(src1) | RS2(src2))); + op_reg = (GET_OPCODE(op) == SLJIT_ROTL) ? SRL : SLL; + FAIL_IF(push_inst(compiler, op_reg | WORD | RD(dst) | RS1(src1) | RS2(EQUAL_FLAG))); + return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(OTHER_FLAG)); + + default: + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; + } + + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, op_imm | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2))); + + if (flags & UNUSED_DEST) + return SLJIT_SUCCESS; + return push_inst(compiler, op_imm | WORD | RD(dst) | RS1(src1) | IMM_I(src2)); + } + + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, op_reg | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + + if (flags & UNUSED_DEST) + return SLJIT_SUCCESS; + return push_inst(compiler, op_reg | WORD | RD(dst) | RS1(src1) | RS2(src2)); +} + +#undef IMM_EXTEND + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + sljit_s32 dst_r = TMP_REG2; + sljit_s32 src1_r; + sljit_sw src2_r = 0; + sljit_s32 sugg_src2_r = TMP_REG2; + + if (!(flags & ALT_KEEP_CACHE)) { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + } + + if (dst == TMP_REG2) { + SLJIT_ASSERT(HAS_FLAGS(op)); + flags |= UNUSED_DEST; + } + else if (FAST_IS_REG(dst)) { + dst_r = dst; + flags |= REG_DEST; + if (flags & MOVE_OP) + sugg_src2_r = dst_r; + } + else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) + flags |= SLOW_DEST; + + if (flags & IMM_OP) { + if ((src2 & SLJIT_IMM) && src2w != 0 && src2w <= SIMM_MAX && src2w >= SIMM_MIN) { + flags |= SRC2_IMM; + src2_r = src2w; + } + else if ((flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w != 0 && src1w <= SIMM_MAX && src1w >= SIMM_MIN) { + flags |= SRC2_IMM; + src2_r = src1w; + + /* And swap arguments. */ + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + /* src2w = src2_r unneeded. */ + } + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) { + src1_r = src1; + flags |= REG1_SOURCE; + } + else if (src1 & SLJIT_IMM) { + if (src1w) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3)); + src1_r = TMP_REG1; + } + else + src1_r = TMP_ZERO; + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + src1_r = TMP_REG1; + } + + /* Source 2. */ + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG2_SOURCE; + if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) + dst_r = (sljit_s32)src2_r; + } + else if (src2 & SLJIT_IMM) { + if (!(flags & SRC2_IMM)) { + if (src2w) { + FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w, TMP_REG3)); + src2_r = sugg_src2_r; + } + else { + src2_r = TMP_ZERO; + if (flags & MOVE_OP) { + if (dst & SLJIT_MEM) + dst_r = 0; + else + op = SLJIT_MOV; + } + } + } + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + src2_r = sugg_src2_r; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + SLJIT_ASSERT(src2_r == TMP_REG2); + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, flags, dst_r, dst, dstw); + return compiler->error; + } + return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (op & SLJIT_32) >> 5; + + SLJIT_ASSERT(word == 0 || word == 0x8); +#endif /* SLJIT_CONFIG_RISCV_64 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + switch (GET_OPCODE(op)) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, EBREAK); + case SLJIT_NOP: + return push_inst(compiler, ADDI | RD(TMP_ZERO) | RS1(TMP_ZERO) | IMM_I(0)); + case SLJIT_LMUL_UW: + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R1) | IMM_I(0))); + FAIL_IF(push_inst(compiler, MULHU | RD(SLJIT_R1) | RS1(SLJIT_R0) | RS2(SLJIT_R1))); + return push_inst(compiler, MUL | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(TMP_REG1)); + case SLJIT_LMUL_SW: + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R1) | IMM_I(0))); + FAIL_IF(push_inst(compiler, MULH | RD(SLJIT_R1) | RS1(SLJIT_R0) | RS2(SLJIT_R1))); + return push_inst(compiler, MUL | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(TMP_REG1)); + case SLJIT_DIVMOD_UW: + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R0) | IMM_I(0))); + FAIL_IF(push_inst(compiler, DIVU | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1))); + return push_inst(compiler, REMU | WORD | RD(SLJIT_R1) | RS1(TMP_REG1) | RS2(SLJIT_R1)); + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R0) | IMM_I(0))); + FAIL_IF(push_inst(compiler, DIV | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1))); + return push_inst(compiler, REM | WORD | RD(SLJIT_R1) | RS1(TMP_REG1) | RS2(SLJIT_R1)); + case SLJIT_DIV_UW: + return push_inst(compiler, DIVU | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1)); + case SLJIT_DIV_SW: + return push_inst(compiler, DIV | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1)); + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (op & SLJIT_32) + flags = INT_DATA | SIGNED_DATA; +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: +#endif + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + case SLJIT_MOV_U32: + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw); + + case SLJIT_MOV_S32: + /* Logical operators have no W variant, so sign extended input is necessary for them. */ + case SLJIT_MOV32: + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw); +#endif + + case SLJIT_MOV_U8: + return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); + + case SLJIT_MOV_S8: + return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); + + case SLJIT_MOV_U16: + return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); + + case SLJIT_MOV_S16: + return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); + + case SLJIT_NOT: + return emit_op(compiler, SLJIT_XOR | (op & (SLJIT_32 | SLJIT_SET_Z)), flags, dst, dstw, src, srcw, SLJIT_IMM, -1); + + case SLJIT_CLZ: + case SLJIT_CTZ: + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (op & SLJIT_32) { + flags |= INT_DATA | SIGNED_DATA; + if (src1 & SLJIT_IMM) + src1w = (sljit_s32)src1w; + if (src2 & SLJIT_IMM) + src2w = (sljit_s32)src2w; + } +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: + compiler->status_flags_state = 0; + return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_MSHL: + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + case SLJIT_ROTL: + case SLJIT_ROTR: + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + src2w &= 0x1f; +#else /* !SLJIT_CONFIG_RISCV_32 */ + if (op & SLJIT_32) + src2w &= 0x1f; + else + src2w &= 0x3f; +#endif /* SLJIT_CONFIG_RISCV_32 */ + } + + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 is_left; + sljit_ins ins1, ins2, ins3; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (op & SLJIT_32) >> 5; + sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; + sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64; +#else /* !SLJIT_CONFIG_RISCV_64 */ + sljit_s32 inp_flags = WORD_DATA | LOAD_DATA; + sljit_sw bit_length = 32; +#endif /* SLJIT_CONFIG_RISCV_64 */ + + SLJIT_ASSERT(WORD == 0 || WORD == 0x8); + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w)); + + is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL); + + if (src_dst == src1) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w); + } + + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (src2 & SLJIT_IMM) { + src2w &= bit_length - 1; + + if (src2w == 0) + return SLJIT_SUCCESS; + } else if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src2, src2w)); + src2 = TMP_REG2; + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG1, src1, src1w)); + src1 = TMP_REG1; + } else if (src1 & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3)); + src1 = TMP_REG1; + } + + if (src2 & SLJIT_IMM) { + if (is_left) { + ins1 = SLLI | WORD | IMM_I(src2w); + src2w = bit_length - src2w; + ins2 = SRLI | WORD | IMM_I(src2w); + } else { + ins1 = SRLI | WORD | IMM_I(src2w); + src2w = bit_length - src2w; + ins2 = SLLI | WORD | IMM_I(src2w); + } + + FAIL_IF(push_inst(compiler, ins1 | RD(src_dst) | RS1(src_dst))); + FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RS1(src1))); + return push_inst(compiler, OR | RD(src_dst) | RS1(src_dst) | RS2(TMP_REG1)); + } + + if (is_left) { + ins1 = SLL; + ins2 = SRLI; + ins3 = SRL; + } else { + ins1 = SRL; + ins2 = SLLI; + ins3 = SLL; + } + + FAIL_IF(push_inst(compiler, ins1 | WORD | RD(src_dst) | RS1(src_dst) | RS2(src2))); + + if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) { + FAIL_IF(push_inst(compiler, ins2 | WORD | RD(TMP_REG1) | RS1(src1) | IMM_I(1))); + FAIL_IF(push_inst(compiler, XORI | RD(TMP_REG2) | RS1(src2) | IMM_I((sljit_ins)bit_length - 1))); + src1 = TMP_REG1; + } else + FAIL_IF(push_inst(compiler, SUB | WORD | RD(TMP_REG2) | RS1(TMP_ZERO) | RS2(src2))); + + FAIL_IF(push_inst(compiler, ins3 | WORD | RD(TMP_REG1) | RS1(src1) | RS2(TMP_REG2))); + return push_inst(compiler, OR | RD(src_dst) | RS1(src_dst) | RS2(TMP_REG1)); +} + +#undef WORD + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, ADDI | RD(RETURN_ADDR_REG) | RS1(src) | IMM_I(0))); + else + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw)); + + return push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(RETURN_ADDR_REG) | IMM_I(0)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return freg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7)) +#define FMT(op) ((sljit_ins)((op & SLJIT_32) ^ SLJIT_32) << 17) + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +# define flags (sljit_u32)0 +#else + sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)) << 21; +#endif + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, FCVT_W_S | FMT(op) | flags | RD(dst_r) | FRS1(src))); + + /* Store the integer value from a VFP register. */ + if (dst & SLJIT_MEM) { +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + return emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0); +#else + return emit_op_mem2(compiler, flags ? WORD_DATA : INT_DATA, TMP_REG2, dst, dstw, 0, 0); +#endif + } + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +# undef flags +#endif +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inst; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)) << 21; +#endif + + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); +#else + FAIL_IF(emit_op_mem2(compiler, (flags ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); +#endif + src = TMP_REG1; + } else if (src & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; +#endif + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw, TMP_REG3)); + src = TMP_REG1; + } + + inst = FCVT_S_W | FMT(op) | FRD(dst_r) | RS1(src); + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + if (op & SLJIT_32) + inst |= F3(0x7); +#else + inst |= flags; + + if (op != SLJIT_CONV_F64_FROM_S32) + inst |= F3(0x7); +#endif + + FAIL_IF(push_inst(compiler, inst)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins inst; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_F_EQUAL: + case SLJIT_F_NOT_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + inst = FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2); + break; + case SLJIT_F_LESS: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + inst = FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2); + break; + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + inst = FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src2) | FRS2(src1); + break; + case SLJIT_F_GREATER: + case SLJIT_F_LESS_EQUAL: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED_LESS_EQUAL: + inst = FLE_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2); + break; + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_ORDERED_GREATER_EQUAL: + inst = FLE_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src2) | FRS2(src1); + break; + case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ + case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */ + FAIL_IF(push_inst(compiler, FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2))); + FAIL_IF(push_inst(compiler, FLT_S | FMT(op) | RD(TMP_REG1) | FRS1(src2) | FRS2(src1))); + inst = OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(TMP_REG1); + break; + default: /* SLJIT_UNORDERED, SLJIT_ORDERED */ + FAIL_IF(push_inst(compiler, FADD_S | FMT(op) | FRD(TMP_FREG1) | FRS1(src1) | FRS2(src2))); + inst = FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(TMP_FREG1) | FRS2(TMP_FREG1); + break; + } + + return push_inst(compiler, inst); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_32; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, FSGNJN_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, FSGNJX_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src))); + break; + case SLJIT_CONV_F64_FROM_F32: + /* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */ + FAIL_IF(push_inst(compiler, FCVT_S_D | ((op & SLJIT_32) ? (1 << 25) : ((1 << 20) | F3(7))) | FRD(dst_r) | FRS1(src))); + op ^= SLJIT_32; + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r, flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; + + if (src1 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { + FAIL_IF(compiler->error); + src1 = TMP_FREG1; + } else + flags |= SLOW_SRC1; + } + + if (src2 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) { + FAIL_IF(compiler->error); + src2 = TMP_FREG2; + } else + flags |= SLOW_SRC2; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + + if (flags & SLOW_SRC1) + src1 = TMP_FREG1; + if (flags & SLOW_SRC2) + src2 = TMP_FREG2; + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, FADD_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2))); + break; + + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, FSUB_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2))); + break; + + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, FMUL_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2))); + break; + + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, FDIV_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2))); + break; + } + + if (dst_r == TMP_FREG2) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); + + return SLJIT_SUCCESS; +} + +#undef FLOAT_DATA +#undef FMT + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, ADDI | RD(dst) | RS1(RETURN_ADDR_REG) | IMM_I(0)); + + /* Memory. */ + return emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +#define BRANCH_LENGTH ((sljit_ins)(3 * sizeof(sljit_ins)) << 7) +#else +#define BRANCH_LENGTH ((sljit_ins)(7 * sizeof(sljit_ins)) << 7) +#endif + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + sljit_ins inst; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + switch (type) { + case SLJIT_EQUAL: + inst = BNE | RS1(EQUAL_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH; + break; + case SLJIT_NOT_EQUAL: + inst = BEQ | RS1(EQUAL_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH; + break; + case SLJIT_LESS: + case SLJIT_GREATER: + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER: + case SLJIT_OVERFLOW: + case SLJIT_CARRY: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */ + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + case SLJIT_ORDERED_GREATER: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_ORDERED: + inst = BEQ | RS1(OTHER_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH; + break; + case SLJIT_GREATER_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_NOT_OVERFLOW: + case SLJIT_NOT_CARRY: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ + case SLJIT_F_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED: + inst = BNE | RS1(OTHER_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH; + break; + default: + /* Not conditional branch. */ + inst = 0; + break; + } + + if (inst != 0) { + PTR_FAIL_IF(push_inst(compiler, inst)); + jump->flags |= IS_COND; + } + + jump->addr = compiler->size; + inst = JALR | RS1(TMP_REG1) | IMM_I(0); + + if (type >= SLJIT_FAST_CALL) { + jump->flags |= IS_CALL; + inst |= RD(RETURN_ADDR_REG); + } + + PTR_FAIL_IF(push_inst(compiler, inst)); + + /* Maximum number of instructions required for generating a constant. */ +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + compiler->size += 1; +#else + compiler->size += 5; +#endif + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + struct sljit_jump *jump; + sljit_s32 flags; + sljit_ins inst; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + flags = WORD_DATA | LOAD_DATA; +#else /* !SLJIT_CONFIG_RISCV_32 */ + flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; +#endif /* SLJIT_CONFIG_RISCV_32 */ + + if (src1 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG1, src1, src1w, src2, src2w)); + src1 = TMP_REG1; + } + + if (src2 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG2, src2, src2w, 0, 0)); + src2 = TMP_REG2; + } + + if (src1 & SLJIT_IMM) { + if (src1w != 0) { + PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3)); + src1 = TMP_REG1; + } + else + src1 = TMP_ZERO; + } + + if (src2 & SLJIT_IMM) { + if (src2w != 0) { + PTR_FAIL_IF(load_immediate(compiler, TMP_REG2, src2w, TMP_REG3)); + src2 = TMP_REG2; + } + else + src2 = TMP_ZERO; + } + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | IS_COND)); + type &= 0xff; + + switch (type) { + case SLJIT_EQUAL: + inst = BNE | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_NOT_EQUAL: + inst = BEQ | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_LESS: + inst = BGEU | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_GREATER_EQUAL: + inst = BLTU | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_GREATER: + inst = BGEU | RS1(src2) | RS2(src1) | BRANCH_LENGTH; + break; + case SLJIT_LESS_EQUAL: + inst = BLTU | RS1(src2) | RS2(src1) | BRANCH_LENGTH; + break; + case SLJIT_SIG_LESS: + inst = BGE | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_SIG_GREATER_EQUAL: + inst = BLT | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_SIG_GREATER: + inst = BGE | RS1(src2) | RS2(src1) | BRANCH_LENGTH; + break; + case SLJIT_SIG_LESS_EQUAL: + inst = BLT | RS1(src2) | RS2(src1) | BRANCH_LENGTH; + break; + } + + PTR_FAIL_IF(push_inst(compiler, inst)); + + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(TMP_REG1) | IMM_I(0))); + + /* Maximum number of instructions required for generating a constant. */ +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + compiler->size += 1; +#else + compiler->size += 5; +#endif + return jump; +} + +#undef BRANCH_LENGTH + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + + if (!(src & SLJIT_IMM)) { + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + } + return push_inst(compiler, JALR | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RS1(src) | IMM_I(0)); + } + + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_CALL : 0)); + jump->u.target = (sljit_uw)srcw; + + jump->addr = compiler->size; + FAIL_IF(push_inst(compiler, JALR | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RS1(TMP_REG1) | IMM_I(0))); + + /* Maximum number of instructions required for generating a constant. */ +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + compiler->size += 1; +#else + compiler->size += 5; +#endif + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(src) | IMM_I(0))); + src = TMP_REG1; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP; + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 src_r, dst_r, invert; + sljit_s32 saved_op = op; +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + sljit_s32 mem_type = WORD_DATA; +#else + sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + op = GET_OPCODE(op); + dst_r = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) + FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw)); + + if (type < SLJIT_F_EQUAL) { + src_r = OTHER_FLAG; + invert = type & 0x1; + + switch (type) { + case SLJIT_EQUAL: + case SLJIT_NOT_EQUAL: + FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RS1(EQUAL_FLAG) | IMM_I(1))); + src_r = dst_r; + break; + case SLJIT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) { + src_r = OTHER_FLAG; + break; + } + FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RS1(OTHER_FLAG) | IMM_I(1))); + src_r = dst_r; + invert ^= 0x1; + break; + } + } else { + invert = 0; + src_r = OTHER_FLAG; + + switch (type) { + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ + case SLJIT_F_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED: + invert = 1; + break; + } + } + + if (invert) { + FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(src_r) | IMM_I(1))); + src_r = dst_r; + } + + if (op < SLJIT_ADD) { + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_type, src_r, dst, dstw); + + if (src_r != dst_r) + return push_inst(compiler, ADDI | RD(dst_r) | RS1(src_r) | IMM_I(0)); + return SLJIT_SUCCESS; + } + + mem_type |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE; + + if (dst & SLJIT_MEM) + return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, src_r, 0); + return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 flags; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + memw &= 0x3; + + if (SLJIT_UNLIKELY(memw != 0)) { + FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG1) | RS1(OFFS_REG(mem)) | IMM_I(memw))); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(mem & REG_MASK))); + } else + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RS1(mem & REG_MASK) | RS2(OFFS_REG(mem)))); + + mem = TMP_REG1; + memw = 0; + } else if (memw > SIMM_MAX - SSIZE_OF(sw) || memw < SIMM_MIN) { + if (((memw + 0x800) & 0xfff) <= 0xfff - SSIZE_OF(sw)) { + FAIL_IF(load_immediate(compiler, TMP_REG1, TO_ARGW_HI(memw), TMP_REG3)); + memw &= 0xfff; + } else { + FAIL_IF(load_immediate(compiler, TMP_REG1, memw, TMP_REG3)); + memw = 0; + } + + if (mem & REG_MASK) + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(mem & REG_MASK))); + + mem = TMP_REG1; + } else { + mem &= REG_MASK; + memw &= 0xfff; + } + + SLJIT_ASSERT((memw >= 0 && memw <= SIMM_MAX - SSIZE_OF(sw)) || (memw > SIMM_MAX && memw <= 0xfff)); + + if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) { + FAIL_IF(push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_SECOND(reg), mem, (memw + SSIZE_OF(sw)) & 0xfff)); + return push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_FIRST(reg), mem, memw); + } + + flags = WORD_DATA | (!(type & SLJIT_MEM_STORE) ? LOAD_DATA : 0); + + FAIL_IF(push_mem_inst(compiler, flags, REG_PAIR_FIRST(reg), mem, memw)); + return push_mem_inst(compiler, flags, REG_PAIR_SECOND(reg), mem, (memw + SSIZE_OF(sw)) & 0xfff); +} + +#undef TO_ARGW_HI + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(emit_const(compiler, dst_r, init_value, ADDI | RD(dst_r))); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); + + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r)); +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + compiler->size += 1; +#else + compiler->size += 5; +#endif + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); + + return put_label; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} diff --git a/contrib/libs/pcre2/src/sljit/sljitNativeS390X.c b/contrib/libs/pcre2/src/sljit/sljitNativeS390X.c new file mode 100644 index 0000000000..8b51bad9bc --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitNativeS390X.c @@ -0,0 +1,3747 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/auxv.h> + +#ifdef __ARCH__ +#define ENABLE_STATIC_FACILITY_DETECTION 1 +#else +#define ENABLE_STATIC_FACILITY_DETECTION 0 +#endif +#define ENABLE_DYNAMIC_FACILITY_DETECTION 1 + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ + return "s390x" SLJIT_CPUINFO; +} + +/* Instructions. */ +typedef sljit_uw sljit_ins; + +/* Instruction tags (most significant halfword). */ +static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48; + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1 +}; + +/* there are also a[2-15] available, but they are slower to access and + * their use is limited as mundaym explained: + * https://github.com/zherczeg/sljit/pull/91#discussion_r486895689 + */ + +/* General Purpose Registers [0-15]. */ +typedef sljit_uw sljit_gpr; + +/* + * WARNING + * the following code is non standard and should be improved for + * consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based + * registers because r0 and r1 are the ABI recommended volatiles. + * there is a gpr() function that maps sljit to physical register numbers + * that should be used instead of the usual index into reg_map[] and + * will be retired ASAP (TODO: carenas) + */ + +static const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */ +static const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */ +static const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */ +static const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */ +static const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */ +static const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */ +static const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */ +static const sljit_gpr r7 = 7; /* reg_map[6] */ +static const sljit_gpr r8 = 8; /* reg_map[7] */ +static const sljit_gpr r9 = 9; /* reg_map[8] */ +static const sljit_gpr r10 = 10; /* reg_map[9] */ +static const sljit_gpr r11 = 11; /* reg_map[10] */ +static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */ +static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */ +static const sljit_gpr r14 = 14; /* reg_map[0]: return address and flag register */ +static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */ + +/* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */ +/* TODO(carenas): r12 might conflict in PIC code, reserve? */ +/* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp + * like we do know might be faster though, reserve? + */ + +/* TODO(carenas): should be named TMP_REG[1-2] for consistency */ +#define tmp0 r0 +#define tmp1 r1 + +/* TODO(carenas): flags should move to a different register so that + * link register doesn't need to change + */ + +/* When reg cannot be unused. */ +#define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP) + +/* Link register. */ +static const sljit_gpr link_r = 14; /* r14 */ + +#define TMP_FREG1 (0) + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { + 1, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, +}; + +#define R0A(r) (r) +#define R4A(r) ((r) << 4) +#define R8A(r) ((r) << 8) +#define R12A(r) ((r) << 12) +#define R16A(r) ((r) << 16) +#define R20A(r) ((r) << 20) +#define R28A(r) ((r) << 28) +#define R32A(r) ((r) << 32) +#define R36A(r) ((r) << 36) + +#define R0(r) ((sljit_ins)reg_map[r]) + +#define F0(r) ((sljit_ins)freg_map[r]) +#define F4(r) (R4A((sljit_ins)freg_map[r])) +#define F20(r) (R20A((sljit_ins)freg_map[r])) +#define F36(r) (R36A((sljit_ins)freg_map[r])) + +struct sljit_s390x_const { + struct sljit_const const_; /* must be first */ + sljit_sw init_value; /* required to build literal pool */ +}; + +/* Convert SLJIT register to hardware register. */ +static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r) +{ + SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0]))); + return reg_map[r]; +} + +static SLJIT_INLINE sljit_gpr fgpr(sljit_s32 r) +{ + SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(freg_map) / sizeof(freg_map[0]))); + return freg_map[r]; +} + +/* Size of instruction in bytes. Tags must already be cleared. */ +static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins) +{ + /* keep faulting instructions */ + if (ins == 0) + return 2; + + if ((ins & 0x00000000ffffL) == ins) + return 2; + if ((ins & 0x0000ffffffffL) == ins) + return 4; + if ((ins & 0xffffffffffffL) == ins) + return 6; + + SLJIT_UNREACHABLE(); + return (sljit_uw)-1; +} + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ibuf); + *ibuf = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +static sljit_s32 encode_inst(void **ptr, sljit_ins ins) +{ + sljit_u16 *ibuf = (sljit_u16 *)*ptr; + sljit_uw size = sizeof_ins(ins); + + SLJIT_ASSERT((size & 6) == size); + switch (size) { + case 6: + *ibuf++ = (sljit_u16)(ins >> 32); + /* fallthrough */ + case 4: + *ibuf++ = (sljit_u16)(ins >> 16); + /* fallthrough */ + case 2: + *ibuf++ = (sljit_u16)(ins); + } + *ptr = (void*)ibuf; + return SLJIT_SUCCESS; +} + +#define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \ + (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \ + && !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE)) + +/* Map the given type to a 4-bit condition code mask. */ +static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) { + const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */ + const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */ + const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */ + const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */ + + switch (type) { + case SLJIT_EQUAL: + if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) { + sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state); + if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL) + return cc0; + if (type == SLJIT_OVERFLOW) + return (cc0 | cc3); + return (cc0 | cc2); + } + /* fallthrough */ + + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + return cc0; + + case SLJIT_NOT_EQUAL: + if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) { + sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state); + if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL) + return (cc1 | cc2 | cc3); + if (type == SLJIT_OVERFLOW) + return (cc1 | cc2); + return (cc1 | cc3); + } + /* fallthrough */ + + case SLJIT_UNORDERED_OR_NOT_EQUAL: + return (cc1 | cc2 | cc3); + + case SLJIT_LESS: + return cc1; + + case SLJIT_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + return (cc0 | cc2 | cc3); + + case SLJIT_GREATER: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE) + return cc2; + return cc3; + + case SLJIT_LESS_EQUAL: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE) + return (cc0 | cc1); + return (cc0 | cc1 | cc2); + + case SLJIT_SIG_LESS: + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + return cc1; + + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (cc2 | cc3); + /* fallthrough */ + + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + return (cc0 | cc1); + + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (cc0 | cc1); + /* fallthrough */ + + case SLJIT_SIG_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + /* Overflow is considered greater, see SLJIT_SUB. */ + return cc2 | cc3; + + case SLJIT_SIG_GREATER_EQUAL: + return (cc0 | cc2 | cc3); + + case SLJIT_OVERFLOW: + if (compiler->status_flags_state & SLJIT_SET_Z) + return (cc2 | cc3); + /* fallthrough */ + + case SLJIT_UNORDERED: + return cc3; + + case SLJIT_NOT_OVERFLOW: + if (compiler->status_flags_state & SLJIT_SET_Z) + return (cc0 | cc1); + /* fallthrough */ + + case SLJIT_ORDERED: + return (cc0 | cc1 | cc2); + + case SLJIT_F_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return (cc1 | cc2); + + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + return cc2; + + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + return (cc0 | cc2); + + case SLJIT_UNORDERED_OR_LESS_EQUAL: + return (cc0 | cc1 | cc3); + + case SLJIT_UNORDERED_OR_EQUAL: + return (cc0 | cc3); + + case SLJIT_UNORDERED_OR_LESS: + return (cc1 | cc3); + } + + SLJIT_UNREACHABLE(); + return (sljit_u8)-1; +} + +/* Facility to bit index mappings. + Note: some facilities share the same bit index. */ +typedef sljit_uw facility_bit; +#define STORE_FACILITY_LIST_EXTENDED_FACILITY 7 +#define FAST_LONG_DISPLACEMENT_FACILITY 19 +#define EXTENDED_IMMEDIATE_FACILITY 21 +#define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34 +#define DISTINCT_OPERAND_FACILITY 45 +#define HIGH_WORD_FACILITY 45 +#define POPULATION_COUNT_FACILITY 45 +#define LOAD_STORE_ON_CONDITION_1_FACILITY 45 +#define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49 +#define LOAD_STORE_ON_CONDITION_2_FACILITY 53 +#define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58 +#define VECTOR_FACILITY 129 +#define VECTOR_ENHANCEMENTS_1_FACILITY 135 + +/* Report whether a facility is known to be present due to the compiler + settings. This function should always be compiled to a constant + value given a constant argument. */ +static SLJIT_INLINE int have_facility_static(facility_bit x) +{ +#if ENABLE_STATIC_FACILITY_DETECTION + switch (x) { + case FAST_LONG_DISPLACEMENT_FACILITY: + return (__ARCH__ >= 6 /* z990 */); + case EXTENDED_IMMEDIATE_FACILITY: + case STORE_FACILITY_LIST_EXTENDED_FACILITY: + return (__ARCH__ >= 7 /* z9-109 */); + case GENERAL_INSTRUCTION_EXTENSION_FACILITY: + return (__ARCH__ >= 8 /* z10 */); + case DISTINCT_OPERAND_FACILITY: + return (__ARCH__ >= 9 /* z196 */); + case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY: + return (__ARCH__ >= 10 /* zEC12 */); + case LOAD_STORE_ON_CONDITION_2_FACILITY: + case VECTOR_FACILITY: + return (__ARCH__ >= 11 /* z13 */); + case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY: + case VECTOR_ENHANCEMENTS_1_FACILITY: + return (__ARCH__ >= 12 /* z14 */); + default: + SLJIT_UNREACHABLE(); + } +#endif + return 0; +} + +static SLJIT_INLINE unsigned long get_hwcap() +{ + static unsigned long hwcap = 0; + if (SLJIT_UNLIKELY(!hwcap)) { + hwcap = getauxval(AT_HWCAP); + SLJIT_ASSERT(hwcap != 0); + } + return hwcap; +} + +static SLJIT_INLINE int have_stfle() +{ + if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY)) + return 1; + + return (get_hwcap() & HWCAP_S390_STFLE); +} + +/* Report whether the given facility is available. This function always + performs a runtime check. */ +static int have_facility_dynamic(facility_bit x) +{ +#if ENABLE_DYNAMIC_FACILITY_DETECTION + static struct { + sljit_uw bits[4]; + } cpu_features; + size_t size = sizeof(cpu_features); + const sljit_uw word_index = x >> 6; + const sljit_uw bit_index = ((1UL << 63) >> (x & 63)); + + SLJIT_ASSERT(x < size * 8); + if (SLJIT_UNLIKELY(!have_stfle())) + return 0; + + if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) { + __asm__ __volatile__ ( + "lgr %%r0, %0;" + "stfle 0(%1);" + /* outputs */: + /* inputs */: "d" ((size / 8) - 1), "a" (&cpu_features) + /* clobbers */: "r0", "cc", "memory" + ); + SLJIT_ASSERT(cpu_features.bits[0] != 0); + } + return (cpu_features.bits[word_index] & bit_index) != 0; +#else + return 0; +#endif +} + +#define HAVE_FACILITY(name, bit) \ +static SLJIT_INLINE int name() \ +{ \ + static int have = -1; \ + /* Static check first. May allow the function to be optimized away. */ \ + if (have_facility_static(bit)) \ + have = 1; \ + else if (SLJIT_UNLIKELY(have < 0)) \ + have = have_facility_dynamic(bit) ? 1 : 0; \ +\ + return have; \ +} + +HAVE_FACILITY(have_eimm, EXTENDED_IMMEDIATE_FACILITY) +HAVE_FACILITY(have_ldisp, FAST_LONG_DISPLACEMENT_FACILITY) +HAVE_FACILITY(have_genext, GENERAL_INSTRUCTION_EXTENSION_FACILITY) +HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY) +HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY) +HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY) +#undef HAVE_FACILITY + +#define is_u12(d) (0 <= (d) && (d) <= 0x00000fffL) +#define is_u32(d) (0 <= (d) && (d) <= 0xffffffffL) + +#define CHECK_SIGNED(v, bitlen) \ + ((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1))) + +#define is_s8(d) CHECK_SIGNED((d), 8) +#define is_s16(d) CHECK_SIGNED((d), 16) +#define is_s20(d) CHECK_SIGNED((d), 20) +#define is_s32(d) ((d) == (sljit_s32)(d)) + +static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d) +{ + SLJIT_ASSERT(is_s20(d)); + + sljit_uw dh = (d >> 12) & 0xff; + sljit_uw dl = (d << 8) & 0xfff00; + return (dh | dl) << 8; +} + +/* TODO(carenas): variadic macro is not strictly needed */ +#define SLJIT_S390X_INSTRUCTION(op, ...) \ +static SLJIT_INLINE sljit_ins op(__VA_ARGS__) + +/* RR form instructions. */ +#define SLJIT_S390X_RR(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \ +{ \ + return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \ +} + +/* AND */ +SLJIT_S390X_RR(nr, 0x1400) + +/* BRANCH AND SAVE */ +SLJIT_S390X_RR(basr, 0x0d00) + +/* BRANCH ON CONDITION */ +SLJIT_S390X_RR(bcr, 0x0700) /* TODO(mundaym): type for mask? */ + +/* DIVIDE */ +SLJIT_S390X_RR(dr, 0x1d00) + +/* EXCLUSIVE OR */ +SLJIT_S390X_RR(xr, 0x1700) + +/* LOAD */ +SLJIT_S390X_RR(lr, 0x1800) + +/* LOAD COMPLEMENT */ +SLJIT_S390X_RR(lcr, 0x1300) + +/* OR */ +SLJIT_S390X_RR(or, 0x1600) + +#undef SLJIT_S390X_RR + +/* RRE form instructions */ +#define SLJIT_S390X_RRE(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \ +{ \ + return (pattern) | R4A(dst) | R0A(src); \ +} + +/* AND */ +SLJIT_S390X_RRE(ngr, 0xb9800000) + +/* DIVIDE LOGICAL */ +SLJIT_S390X_RRE(dlr, 0xb9970000) +SLJIT_S390X_RRE(dlgr, 0xb9870000) + +/* DIVIDE SINGLE */ +SLJIT_S390X_RRE(dsgr, 0xb90d0000) + +/* EXCLUSIVE OR */ +SLJIT_S390X_RRE(xgr, 0xb9820000) + +/* LOAD */ +SLJIT_S390X_RRE(lgr, 0xb9040000) +SLJIT_S390X_RRE(lgfr, 0xb9140000) + +/* LOAD BYTE */ +SLJIT_S390X_RRE(lbr, 0xb9260000) +SLJIT_S390X_RRE(lgbr, 0xb9060000) + +/* LOAD COMPLEMENT */ +SLJIT_S390X_RRE(lcgr, 0xb9030000) + +/* LOAD HALFWORD */ +SLJIT_S390X_RRE(lhr, 0xb9270000) +SLJIT_S390X_RRE(lghr, 0xb9070000) + +/* LOAD LOGICAL */ +SLJIT_S390X_RRE(llgfr, 0xb9160000) + +/* LOAD LOGICAL CHARACTER */ +SLJIT_S390X_RRE(llcr, 0xb9940000) +SLJIT_S390X_RRE(llgcr, 0xb9840000) + +/* LOAD LOGICAL HALFWORD */ +SLJIT_S390X_RRE(llhr, 0xb9950000) +SLJIT_S390X_RRE(llghr, 0xb9850000) + +/* MULTIPLY LOGICAL */ +SLJIT_S390X_RRE(mlgr, 0xb9860000) + +/* MULTIPLY SINGLE */ +SLJIT_S390X_RRE(msgfr, 0xb91c0000) + +/* OR */ +SLJIT_S390X_RRE(ogr, 0xb9810000) + +/* SUBTRACT */ +SLJIT_S390X_RRE(sgr, 0xb9090000) + +#undef SLJIT_S390X_RRE + +/* RI-a form instructions */ +#define SLJIT_S390X_RIA(name, pattern, imm_type) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ +{ \ + return (pattern) | R20A(reg) | (imm & 0xffff); \ +} + +/* ADD HALFWORD IMMEDIATE */ +SLJIT_S390X_RIA(aghi, 0xa70b0000, sljit_s16) + +/* LOAD HALFWORD IMMEDIATE */ +SLJIT_S390X_RIA(lhi, 0xa7080000, sljit_s16) +SLJIT_S390X_RIA(lghi, 0xa7090000, sljit_s16) + +/* LOAD LOGICAL IMMEDIATE */ +SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16) +SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16) +SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16) +SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16) + +/* MULTIPLY HALFWORD IMMEDIATE */ +SLJIT_S390X_RIA(mhi, 0xa70c0000, sljit_s16) +SLJIT_S390X_RIA(mghi, 0xa70d0000, sljit_s16) + +/* OR IMMEDIATE */ +SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16) + +#undef SLJIT_S390X_RIA + +/* RIL-a form instructions (requires extended immediate facility) */ +#define SLJIT_S390X_RILA(name, pattern, imm_type) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ +{ \ + SLJIT_ASSERT(have_eimm()); \ + return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \ +} + +/* ADD IMMEDIATE */ +SLJIT_S390X_RILA(agfi, 0xc20800000000, sljit_s32) + +/* ADD IMMEDIATE HIGH */ +SLJIT_S390X_RILA(aih, 0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */ + +/* AND IMMEDIATE */ +SLJIT_S390X_RILA(nihf, 0xc00a00000000, sljit_u32) + +/* EXCLUSIVE OR IMMEDIATE */ +SLJIT_S390X_RILA(xilf, 0xc00700000000, sljit_u32) + +/* INSERT IMMEDIATE */ +SLJIT_S390X_RILA(iihf, 0xc00800000000, sljit_u32) +SLJIT_S390X_RILA(iilf, 0xc00900000000, sljit_u32) + +/* LOAD IMMEDIATE */ +SLJIT_S390X_RILA(lgfi, 0xc00100000000, sljit_s32) + +/* LOAD LOGICAL IMMEDIATE */ +SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32) +SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32) + +/* SUBTRACT LOGICAL IMMEDIATE */ +SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32) + +#undef SLJIT_S390X_RILA + +/* RX-a form instructions */ +#define SLJIT_S390X_RXA(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \ +{ \ + SLJIT_ASSERT((d & 0xfff) == d); \ +\ + return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \ +} + +/* LOAD */ +SLJIT_S390X_RXA(l, 0x58000000) + +/* LOAD ADDRESS */ +SLJIT_S390X_RXA(la, 0x41000000) + +/* LOAD HALFWORD */ +SLJIT_S390X_RXA(lh, 0x48000000) + +/* MULTIPLY SINGLE */ +SLJIT_S390X_RXA(ms, 0x71000000) + +/* STORE */ +SLJIT_S390X_RXA(st, 0x50000000) + +/* STORE CHARACTER */ +SLJIT_S390X_RXA(stc, 0x42000000) + +/* STORE HALFWORD */ +SLJIT_S390X_RXA(sth, 0x40000000) + +#undef SLJIT_S390X_RXA + +/* RXY-a instructions */ +#define SLJIT_S390X_RXYA(name, pattern, cond) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \ +{ \ + SLJIT_ASSERT(cond); \ +\ + return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \ +} + +/* LOAD */ +SLJIT_S390X_RXYA(ly, 0xe30000000058, have_ldisp()) +SLJIT_S390X_RXYA(lg, 0xe30000000004, 1) +SLJIT_S390X_RXYA(lgf, 0xe30000000014, 1) + +/* LOAD BYTE */ +SLJIT_S390X_RXYA(lb, 0xe30000000076, have_ldisp()) +SLJIT_S390X_RXYA(lgb, 0xe30000000077, have_ldisp()) + +/* LOAD HALFWORD */ +SLJIT_S390X_RXYA(lhy, 0xe30000000078, have_ldisp()) +SLJIT_S390X_RXYA(lgh, 0xe30000000015, 1) + +/* LOAD LOGICAL */ +SLJIT_S390X_RXYA(llgf, 0xe30000000016, 1) + +/* LOAD LOGICAL CHARACTER */ +SLJIT_S390X_RXYA(llc, 0xe30000000094, have_eimm()) +SLJIT_S390X_RXYA(llgc, 0xe30000000090, 1) + +/* LOAD LOGICAL HALFWORD */ +SLJIT_S390X_RXYA(llh, 0xe30000000095, have_eimm()) +SLJIT_S390X_RXYA(llgh, 0xe30000000091, 1) + +/* MULTIPLY SINGLE */ +SLJIT_S390X_RXYA(msy, 0xe30000000051, have_ldisp()) +SLJIT_S390X_RXYA(msg, 0xe3000000000c, 1) + +/* STORE */ +SLJIT_S390X_RXYA(sty, 0xe30000000050, have_ldisp()) +SLJIT_S390X_RXYA(stg, 0xe30000000024, 1) + +/* STORE CHARACTER */ +SLJIT_S390X_RXYA(stcy, 0xe30000000072, have_ldisp()) + +/* STORE HALFWORD */ +SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp()) + +#undef SLJIT_S390X_RXYA + +/* RSY-a instructions */ +#define SLJIT_S390X_RSYA(name, pattern, cond) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \ +{ \ + SLJIT_ASSERT(cond); \ +\ + return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \ +} + +/* LOAD MULTIPLE */ +SLJIT_S390X_RSYA(lmg, 0xeb0000000004, 1) + +/* SHIFT LEFT LOGICAL */ +SLJIT_S390X_RSYA(sllg, 0xeb000000000d, 1) + +/* SHIFT RIGHT SINGLE */ +SLJIT_S390X_RSYA(srag, 0xeb000000000a, 1) + +/* STORE MULTIPLE */ +SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1) + +#undef SLJIT_S390X_RSYA + +/* RIE-f instructions (require general-instructions-extension facility) */ +#define SLJIT_S390X_RIEF(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \ +{ \ + sljit_ins i3, i4, i5; \ +\ + SLJIT_ASSERT(have_genext()); \ + i3 = (sljit_ins)start << 24; \ + i4 = (sljit_ins)end << 16; \ + i5 = (sljit_ins)rot << 8; \ +\ + return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \ +} + +/* ROTATE THEN AND SELECTED BITS */ +/* SLJIT_S390X_RIEF(rnsbg, 0xec0000000054) */ + +/* ROTATE THEN EXCLUSIVE OR SELECTED BITS */ +/* SLJIT_S390X_RIEF(rxsbg, 0xec0000000057) */ + +/* ROTATE THEN OR SELECTED BITS */ +SLJIT_S390X_RIEF(rosbg, 0xec0000000056) + +/* ROTATE THEN INSERT SELECTED BITS */ +/* SLJIT_S390X_RIEF(risbg, 0xec0000000055) */ +/* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */ + +/* ROTATE THEN INSERT SELECTED BITS HIGH */ +SLJIT_S390X_RIEF(risbhg, 0xec000000005d) + +/* ROTATE THEN INSERT SELECTED BITS LOW */ +/* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */ + +#undef SLJIT_S390X_RIEF + +/* RRF-c instructions (require load/store-on-condition 1 facility) */ +#define SLJIT_S390X_RRFC(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \ +{ \ + sljit_ins m3; \ +\ + SLJIT_ASSERT(have_lscond1()); \ + m3 = (sljit_ins)(mask & 0xf) << 12; \ +\ + return (pattern) | m3 | R4A(dst) | R0A(src); \ +} + +/* LOAD HALFWORD IMMEDIATE ON CONDITION */ +SLJIT_S390X_RRFC(locr, 0xb9f20000) +SLJIT_S390X_RRFC(locgr, 0xb9e20000) + +#undef SLJIT_S390X_RRFC + +/* RIE-g instructions (require load/store-on-condition 2 facility) */ +#define SLJIT_S390X_RIEG(name, pattern) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \ +{ \ + sljit_ins m3, i2; \ +\ + SLJIT_ASSERT(have_lscond2()); \ + m3 = (sljit_ins)(mask & 0xf) << 32; \ + i2 = (sljit_ins)(imm & 0xffffL) << 16; \ +\ + return (pattern) | R36A(reg) | m3 | i2; \ +} + +/* LOAD HALFWORD IMMEDIATE ON CONDITION */ +SLJIT_S390X_RIEG(lochi, 0xec0000000042) +SLJIT_S390X_RIEG(locghi, 0xec0000000046) + +#undef SLJIT_S390X_RIEG + +#define SLJIT_S390X_RILB(name, pattern, cond) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \ +{ \ + SLJIT_ASSERT(cond); \ +\ + return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \ +} + +/* BRANCH RELATIVE AND SAVE LONG */ +SLJIT_S390X_RILB(brasl, 0xc00500000000, 1) + +/* LOAD ADDRESS RELATIVE LONG */ +SLJIT_S390X_RILB(larl, 0xc00000000000, 1) + +/* LOAD RELATIVE LONG */ +SLJIT_S390X_RILB(lgrl, 0xc40800000000, have_genext()) + +#undef SLJIT_S390X_RILB + +SLJIT_S390X_INSTRUCTION(br, sljit_gpr target) +{ + return 0x07f0 | target; +} + +SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target) +{ + sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20; + sljit_ins ri2 = (sljit_ins)target & 0xffff; + return 0xa7040000L | m1 | ri2; +} + +SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target) +{ + sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36; + sljit_ins ri2 = (sljit_ins)target & 0xffffffff; + return 0xc00400000000L | m1 | ri2; +} + +SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src) +{ + SLJIT_ASSERT(have_eimm()); + return 0xb9830000 | R8A(dst) | R0A(src); +} + +/* INSERT PROGRAM MASK */ +SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst) +{ + return 0xb2220000 | R4A(dst); +} + +/* SET PROGRAM MASK */ +SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst) +{ + return 0x0400 | R4A(dst); +} + +/* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */ +SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) +{ + return risbhg(dst, src, start, 0x8 | end, rot); +} + +#undef SLJIT_S390X_INSTRUCTION + +static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r) +{ + /* Condition codes: bits 18 and 19. + Transformation: + 0 (zero and no overflow) : unchanged + 1 (non-zero and no overflow) : unchanged + 2 (zero and overflow) : decreased by 1 + 3 (non-zero and overflow) : decreased by 1 if non-zero */ + FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1))); + FAIL_IF(push_inst(compiler, ipm(tmp1))); + FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r))); + FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3))); + FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000))); + FAIL_IF(push_inst(compiler, spm(tmp1))); + return SLJIT_SUCCESS; +} + +/* load 64-bit immediate into register without clobbering flags */ +static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v) +{ + /* 4 byte instructions */ + if (is_s16(v)) + return push_inst(compiler, lghi(target, (sljit_s16)v)); + + if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0) + return push_inst(compiler, llill(target, (sljit_u16)v)); + + if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0) + return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16))); + + if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0) + return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32))); + + if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0) + return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48))); + + /* 6 byte instructions (requires extended immediate facility) */ + if (have_eimm()) { + if (is_s32(v)) + return push_inst(compiler, lgfi(target, (sljit_s32)v)); + + if (((sljit_uw)v >> 32) == 0) + return push_inst(compiler, llilf(target, (sljit_u32)v)); + + if (((sljit_uw)v << 32) == 0) + return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32))); + + FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v))); + return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32))); + } + + /* TODO(mundaym): instruction sequences that don't use extended immediates */ + abort(); +} + +struct addr { + sljit_gpr base; + sljit_gpr index; + sljit_s32 offset; +}; + +/* transform memory operand into D(X,B) form with a signed 20-bit offset */ +static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler, + struct addr *addr, sljit_s32 mem, sljit_sw off, + sljit_gpr tmp /* clobbered, must not be r0 */) +{ + sljit_gpr base = r0; + sljit_gpr index = r0; + + SLJIT_ASSERT(tmp != r0); + if (mem & REG_MASK) + base = gpr(mem & REG_MASK); + + if (mem & OFFS_REG_MASK) { + index = gpr(OFFS_REG(mem)); + if (off != 0) { + /* shift and put the result into tmp */ + SLJIT_ASSERT(0 <= off && off < 64); + FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0))); + index = tmp; + off = 0; /* clear offset */ + } + } + else if (!is_s20(off)) { + FAIL_IF(push_load_imm_inst(compiler, tmp, off)); + index = tmp; + off = 0; /* clear offset */ + } + addr->base = base; + addr->index = index; + addr->offset = (sljit_s32)off; + return SLJIT_SUCCESS; +} + +/* transform memory operand into D(X,B) form with an unsigned 12-bit offset */ +static sljit_s32 make_addr_bx(struct sljit_compiler *compiler, + struct addr *addr, sljit_s32 mem, sljit_sw off, + sljit_gpr tmp /* clobbered, must not be r0 */) +{ + sljit_gpr base = r0; + sljit_gpr index = r0; + + SLJIT_ASSERT(tmp != r0); + if (mem & REG_MASK) + base = gpr(mem & REG_MASK); + + if (mem & OFFS_REG_MASK) { + index = gpr(OFFS_REG(mem)); + if (off != 0) { + /* shift and put the result into tmp */ + SLJIT_ASSERT(0 <= off && off < 64); + FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0))); + index = tmp; + off = 0; /* clear offset */ + } + } + else if (!is_u12(off)) { + FAIL_IF(push_load_imm_inst(compiler, tmp, off)); + index = tmp; + off = 0; /* clear offset */ + } + addr->base = base; + addr->index = index; + addr->offset = (sljit_s32)off; + return SLJIT_SUCCESS; +} + +#define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base) +#define WHEN(cond, r, i1, i2, addr) \ + (cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr) + +/* May clobber tmp1. */ +static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r, + sljit_s32 src, sljit_sw srcw, + sljit_s32 is_32bit) +{ + struct addr addr; + sljit_ins ins; + + SLJIT_ASSERT(src & SLJIT_MEM); + + if (is_32bit && ((src & OFFS_REG_MASK) || is_u12(srcw) || !is_s20(srcw))) { + FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1)); + return push_inst(compiler, 0x58000000 /* l */ | R20A(dst_r) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset); + } + + FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1)); + + ins = is_32bit ? 0xe30000000058 /* ly */ : 0xe30000000004 /* lg */; + return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); +} + +/* May clobber tmp1. */ +static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr dst_r, + sljit_s32 src, sljit_sw srcw, + sljit_s32 is_32bit) +{ + struct addr addr; + sljit_ins ins; + + SLJIT_ASSERT(src & SLJIT_MEM); + + FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1)); + + ins = is_32bit ? 0xe30000000016 /* llgf */ : 0xe30000000004 /* lg */; + return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); +} + +/* May clobber tmp1. */ +static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 is_32bit) +{ + struct addr addr; + sljit_ins ins; + + SLJIT_ASSERT(dst & SLJIT_MEM); + + if (is_32bit && ((dst & OFFS_REG_MASK) || is_u12(dstw) || !is_s20(dstw))) { + FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp1)); + return push_inst(compiler, 0x50000000 /* st */ | R20A(src_r) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset); + } + + FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1)); + + ins = is_32bit ? 0xe30000000050 /* sty */ : 0xe30000000024 /* stg */; + return push_inst(compiler, ins | R36A(src_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); +} + +#undef WHEN + +static sljit_s32 emit_move(struct sljit_compiler *compiler, + sljit_gpr dst_r, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK)); + + if (src & SLJIT_IMM) + return push_load_imm_inst(compiler, dst_r, srcw); + + if (src & SLJIT_MEM) + return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0); + + sljit_gpr src_r = gpr(src & REG_MASK); + return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r)); +} + +static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_gpr dst_r = tmp0; + sljit_gpr src_r = tmp1; + sljit_s32 needs_move = 1; + + if (FAST_IS_REG(dst)) { + dst_r = gpr(dst); + + if (dst == src1) + needs_move = 0; + else if (dst == src2) { + dst_r = tmp0; + needs_move = 2; + } + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + if (FAST_IS_REG(src2)) + src_r = gpr(src2); + else + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + + FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r))); + + if (needs_move != 2) + return SLJIT_SUCCESS; + + dst_r = gpr(dst & REG_MASK); + return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); +} + +static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w) +{ + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + sljit_gpr src_r = tmp1; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1); + else + FAIL_IF(emit_move(compiler, tmp1, src1, src1w)); + + return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)); +} + +static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr src1_r = tmp0; + sljit_gpr src2_r = tmp1; + + if (FAST_IS_REG(src1)) + src1_r = gpr(src1); + else + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + + if (FAST_IS_REG(src2)) + src2_r = gpr(src2); + else + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + + return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r)); +} + +typedef enum { + RI_A, + RIL_A, +} emit_ril_type; + +static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_sw src2w, + emit_ril_type type) +{ + sljit_gpr dst_r = tmp0; + sljit_s32 needs_move = 1; + + if (FAST_IS_REG(dst)) { + dst_r = gpr(dst); + + if (dst == src1) + needs_move = 0; + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + if (type == RIL_A) + return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff)); + return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff)); +} + +static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_sw src2w) +{ + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + sljit_gpr src_r = tmp0; + + if (!FAST_IS_REG(src1)) + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + else + src_r = gpr(src1 & REG_MASK); + + return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16); +} + +typedef enum { + RX_A, + RXY_A, +} emit_rx_type; + +static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w, + emit_rx_type type) +{ + sljit_gpr dst_r = tmp0; + sljit_s32 needs_move = 1; + sljit_gpr base, index; + + SLJIT_ASSERT(src2 & SLJIT_MEM); + + if (FAST_IS_REG(dst)) { + dst_r = gpr(dst); + + if (dst == src1) + needs_move = 0; + else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) { + dst_r = tmp0; + needs_move = 2; + } + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + base = gpr(src2 & REG_MASK); + index = tmp0; + + if (src2 & OFFS_REG_MASK) { + index = gpr(OFFS_REG(src2)); + + if (src2w != 0) { + FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0))); + src2w = 0; + index = tmp1; + } + } else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) { + FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w)); + + if (src2 & REG_MASK) + index = tmp1; + else + base = tmp1; + src2w = 0; + } + + if (type == RX_A) + ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w; + else + ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w); + + FAIL_IF(push_inst(compiler, ins)); + + if (needs_move != 2) + return SLJIT_SUCCESS; + + dst_r = gpr(dst); + return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); +} + +static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, sljit_sw dstw, + sljit_sw srcw) +{ + SLJIT_ASSERT(dst & SLJIT_MEM); + + sljit_gpr dst_r = tmp1; + + if (dst & OFFS_REG_MASK) { + sljit_gpr index = tmp1; + + if ((dstw & 0x3) == 0) + index = gpr(OFFS_REG(dst)); + else + FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0))); + + FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index))); + dstw = 0; + } + else if (!is_s20(dstw)) { + FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw)); + + if (dst & REG_MASK) + FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1))); + + dstw = 0; + } + else + dst_r = gpr(dst & REG_MASK); + + return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw)); +} + +struct ins_forms { + sljit_ins op_r; + sljit_ins op_gr; + sljit_ins op_rk; + sljit_ins op_grk; + sljit_ins op; + sljit_ins op_y; + sljit_ins op_g; +}; + +static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 mode = compiler->mode; + sljit_ins ins, ins_k; + + if ((src1 | src2) & SLJIT_MEM) { + sljit_ins ins12, ins20; + + if (mode & SLJIT_32) { + ins12 = forms->op; + ins20 = forms->op_y; + } + else { + ins12 = 0; + ins20 = forms->op_g; + } + + if (ins12 && ins20) { + /* Extra instructions needed for address computation can be executed independently. */ + if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM) + || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) { + if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w)) + return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A); + + return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); + } + + if (src1 & SLJIT_MEM) { + if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w)) + return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A); + + return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A); + } + } + else if (ins12 || ins20) { + emit_rx_type rx_type; + + if (ins12) { + rx_type = RX_A; + ins = ins12; + } + else { + rx_type = RXY_A; + ins = ins20; + } + + if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM) + || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w))))) + return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type); + + if (src1 & SLJIT_MEM) + return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type); + } + } + + if (mode & SLJIT_32) { + ins = forms->op_r; + ins_k = forms->op_rk; + } + else { + ins = forms->op_gr; + ins_k = forms->op_grk; + } + + SLJIT_ASSERT(ins != 0 || ins_k != 0); + + if (ins && FAST_IS_REG(dst)) { + if (dst == src1) + return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w); + + if (dst == src2) + return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w); + } + + if (ins_k == 0) + return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w); + + return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w); +} + +static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 mode = compiler->mode; + sljit_ins ins; + + if (src2 & SLJIT_MEM) { + sljit_ins ins12, ins20; + + if (mode & SLJIT_32) { + ins12 = forms->op; + ins20 = forms->op_y; + } + else { + ins12 = 0; + ins20 = forms->op_g; + } + + if (ins12 && ins20) { + if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w)) + return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A); + + return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); + } + else if (ins12) + return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A); + else if (ins20) + return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); + } + + ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk; + + if (ins == 0 || (FAST_IS_REG(dst) && dst == src1)) + return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w); + + return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_s390x_const *const_; + struct sljit_put_label *put_label; + sljit_sw executable_offset; + sljit_uw ins_size = 0; /* instructions */ + sljit_uw pool_size = 0; /* literal pool */ + sljit_uw pad_size; + sljit_uw i, j = 0; + struct sljit_memory_fragment *buf; + void *code, *code_ptr; + sljit_uw *pool, *pool_ptr; + sljit_sw source, offset; /* TODO(carenas): only need 32 bit */ + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + /* branch handling */ + label = compiler->labels; + jump = compiler->jumps; + put_label = compiler->put_labels; + + /* TODO(carenas): compiler->executable_size could be calculated + * before to avoid the following loop (except for + * pool_size) + */ + /* calculate the size of the code */ + for (buf = compiler->buf; buf != NULL; buf = buf->next) { + sljit_uw len = buf->used_size / sizeof(sljit_ins); + sljit_ins *ibuf = (sljit_ins *)buf->memory; + for (i = 0; i < len; ++i, ++j) { + sljit_ins ins = ibuf[i]; + + /* TODO(carenas): instruction tag vs size/addr == j + * using instruction tags for const is creative + * but unlike all other architectures, and is not + * done consistently for all other objects. + * This might need reviewing later. + */ + if (ins & sljit_ins_const) { + pool_size += sizeof(*pool); + ins &= ~sljit_ins_const; + } + if (label && label->size == j) { + label->size = ins_size; + label = label->next; + } + if (jump && jump->addr == j) { + if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) { + /* encoded: */ + /* brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */ + /* replace with: */ + /* lgrl %r1, <pool_addr> */ + /* bras %r14, %r1 (or bcr <mask>, %r1) */ + pool_size += sizeof(*pool); + ins_size += 2; + } + jump = jump->next; + } + if (put_label && put_label->addr == j) { + pool_size += sizeof(*pool); + put_label = put_label->next; + } + ins_size += sizeof_ins(ins); + } + } + + /* emit trailing label */ + if (label && label->size == j) { + label->size = ins_size; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!put_label); + + /* pad code size to 8 bytes so is accessible with half word offsets */ + /* the literal pool needs to be doubleword aligned */ + pad_size = ((ins_size + 7UL) & ~7UL) - ins_size; + SLJIT_ASSERT(pad_size < 8UL); + + /* allocate target buffer */ + code = SLJIT_MALLOC_EXEC(ins_size + pad_size + pool_size, + compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + code_ptr = code; + executable_offset = SLJIT_EXEC_OFFSET(code); + + /* TODO(carenas): pool is optional, and the ABI recommends it to + * be created before the function code, instead of + * globally; if generated code is too big could + * need offsets bigger than 32bit words and asser() + */ + pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size); + pool_ptr = pool; + const_ = (struct sljit_s390x_const *)compiler->consts; + + /* update label addresses */ + label = compiler->labels; + while (label) { + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET( + (sljit_uw)code_ptr + label->size, executable_offset); + label = label->next; + } + + /* reset jumps */ + jump = compiler->jumps; + put_label = compiler->put_labels; + + /* emit the code */ + j = 0; + for (buf = compiler->buf; buf != NULL; buf = buf->next) { + sljit_uw len = buf->used_size / sizeof(sljit_ins); + sljit_ins *ibuf = (sljit_ins *)buf->memory; + for (i = 0; i < len; ++i, ++j) { + sljit_ins ins = ibuf[i]; + if (ins & sljit_ins_const) { + /* clear the const tag */ + ins &= ~sljit_ins_const; + + /* update instruction with relative address of constant */ + source = (sljit_sw)code_ptr; + offset = (sljit_sw)pool_ptr - source; + + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; /* halfword (not byte) offset */ + SLJIT_ASSERT(is_s32(offset)); + + ins |= (sljit_ins)offset & 0xffffffff; + + /* update address */ + const_->const_.addr = (sljit_uw)pool_ptr; + + /* store initial value into pool and update pool address */ + *(pool_ptr++) = (sljit_uw)const_->init_value; + + /* move to next constant */ + const_ = (struct sljit_s390x_const *)const_->const_.next; + } + if (jump && jump->addr == j) { + sljit_sw target = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); + if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) { + jump->addr = (sljit_uw)pool_ptr; + + /* load address into tmp1 */ + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; + SLJIT_ASSERT(is_s32(offset)); + + encode_inst(&code_ptr, lgrl(tmp1, offset & 0xffffffff)); + + /* store jump target into pool and update pool address */ + *(pool_ptr++) = (sljit_uw)target; + + /* branch to tmp1 */ + sljit_ins op = (ins >> 32) & 0xf; + sljit_ins arg = (ins >> 36) & 0xf; + switch (op) { + case 4: /* brcl -> bcr */ + ins = bcr(arg, tmp1); + break; + case 5: /* brasl -> basr */ + ins = basr(arg, tmp1); + break; + default: + abort(); + } + } + else { + jump->addr = (sljit_uw)code_ptr + 2; + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + offset = target - source; + + /* offset must be halfword aligned */ + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; + SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */ + + /* patch jump target */ + ins |= (sljit_ins)offset & 0xffffffff; + } + jump = jump->next; + } + if (put_label && put_label->addr == j) { + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)code_ptr; + + /* store target into pool */ + *pool_ptr = put_label->label->addr; + offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + pool_ptr++; + + SLJIT_ASSERT(!(offset & 1)); + offset >>= 1; + SLJIT_ASSERT(is_s32(offset)); + ins |= (sljit_ins)offset & 0xffffffff; + + put_label = put_label->next; + } + encode_inst(&code_ptr, ins); + } + } + SLJIT_ASSERT((sljit_u8 *)code + ins_size == code_ptr); + SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr); + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = ins_size; + code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + /* TODO(mundaym): implement all */ + switch (feature_type) { + case SLJIT_HAS_FPU: + case SLJIT_HAS_CLZ: + case SLJIT_HAS_ROT: + case SLJIT_HAS_PREFETCH: + return 1; + case SLJIT_HAS_CTZ: + return 2; + case SLJIT_HAS_CMOV: + return have_lscond1() ? 1 : 0; + } + return 0; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + return (type >= SLJIT_UNORDERED && type <= SLJIT_ORDERED_LESS_EQUAL); +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); + sljit_s32 offset, i, tmp; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + /* Saved registers are stored in callee allocated save area. */ + SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13); + + offset = 2 * SSIZE_OF(sw); + if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) { + if (saved_arg_count == 0) { + FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15))); + offset += 9 * SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15))); + offset += (8 - saved_arg_count) * SSIZE_OF(sw); + } + } else { + if (scratches == SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else if (scratches > SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15))); + offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw); + } + + if (saved_arg_count == 0) { + if (saveds == 0) { + FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15))); + offset += (saveds + 1) * SSIZE_OF(sw); + } + } else if (saveds > saved_arg_count) { + if (saveds == saved_arg_count + 1) { + FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15))); + offset += (saveds - saved_arg_count) * SSIZE_OF(sw); + } + } + } + + if (saved_arg_count > 0) { + FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } + + local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf; + compiler->local_size = local_size; + + FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size))); + + if (options & SLJIT_ENTER_REG_ARG) + return SLJIT_SUCCESS; + + arg_types >>= SLJIT_ARG_SHIFT; + saved_arg_count = 0; + tmp = 0; + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp)))); + saved_arg_count++; + } + tmp++; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_gpr last_reg) +{ + sljit_s32 offset, i, tmp; + sljit_s32 local_size = compiler->local_size; + sljit_s32 saveds = compiler->saveds; + sljit_s32 scratches = compiler->scratches; + sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + + if (is_u12(local_size)) + FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size)); + else + FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size))); + + offset = 2 * SSIZE_OF(sw); + if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) { + if (kept_saveds_count == 0) { + FAIL_IF(push_inst(compiler, lmg(r6, last_reg, offset, r15))); + offset += 9 * SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15))); + offset += (8 - kept_saveds_count) * SSIZE_OF(sw); + } + } else { + if (scratches == SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else if (scratches > SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15))); + offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw); + } + + if (kept_saveds_count == 0) { + if (saveds == 0) { + if (last_reg == r14) + FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else if (saveds == 1 && last_reg == r13) { + FAIL_IF(push_inst(compiler, lg(r13, offset, 0, r15))); + offset += 2 * SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, last_reg, offset, r15))); + offset += (saveds + 1) * SSIZE_OF(sw); + } + } else if (saveds > kept_saveds_count) { + if (saveds == kept_saveds_count + 1) { + FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15))); + offset += (saveds - kept_saveds_count) * SSIZE_OF(sw); + } + } + } + + if (kept_saveds_count > 0) { + if (last_reg == r14) + FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } + + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler, r14)); + return push_inst(compiler, br(r14)); /* return */ +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */)); + src = TMP_REG2; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src)))); + src = TMP_REG2; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, r13)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + sljit_gpr arg0 = gpr(SLJIT_R0); + sljit_gpr arg1 = gpr(SLJIT_R1); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op) | (op & SLJIT_32); + switch (op) { + case SLJIT_BREAKPOINT: + /* The following invalid instruction is emitted by gdb. */ + return push_inst(compiler, 0x0001 /* 2-byte trap */); + case SLJIT_NOP: + return push_inst(compiler, 0x0700 /* 2-byte nop */); + case SLJIT_LMUL_UW: + FAIL_IF(push_inst(compiler, mlgr(arg0, arg0))); + break; + case SLJIT_LMUL_SW: + /* signed multiplication from: */ + /* Hacker's Delight, Second Edition: Chapter 8-3. */ + FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0))); + FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0))); + FAIL_IF(push_inst(compiler, ngr(tmp0, arg1))); + FAIL_IF(push_inst(compiler, ngr(tmp1, arg0))); + + /* unsigned multiplication */ + FAIL_IF(push_inst(compiler, mlgr(arg0, arg0))); + + FAIL_IF(push_inst(compiler, sgr(arg0, tmp0))); + FAIL_IF(push_inst(compiler, sgr(arg0, tmp1))); + break; + case SLJIT_DIV_U32: + case SLJIT_DIVMOD_U32: + FAIL_IF(push_inst(compiler, lhi(tmp0, 0))); + FAIL_IF(push_inst(compiler, lr(tmp1, arg0))); + FAIL_IF(push_inst(compiler, dlr(tmp0, arg1))); + FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */ + if (op == SLJIT_DIVMOD_U32) + return push_inst(compiler, lr(arg1, tmp0)); /* remainder */ + + return SLJIT_SUCCESS; + case SLJIT_DIV_S32: + case SLJIT_DIVMOD_S32: + FAIL_IF(push_inst(compiler, lhi(tmp0, 0))); + FAIL_IF(push_inst(compiler, lr(tmp1, arg0))); + FAIL_IF(push_inst(compiler, dr(tmp0, arg1))); + FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */ + if (op == SLJIT_DIVMOD_S32) + return push_inst(compiler, lr(arg1, tmp0)); /* remainder */ + + return SLJIT_SUCCESS; + case SLJIT_DIV_UW: + case SLJIT_DIVMOD_UW: + FAIL_IF(push_inst(compiler, lghi(tmp0, 0))); + FAIL_IF(push_inst(compiler, lgr(tmp1, arg0))); + FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1))); + FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */ + if (op == SLJIT_DIVMOD_UW) + return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */ + + return SLJIT_SUCCESS; + case SLJIT_DIV_SW: + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst(compiler, lgr(tmp1, arg0))); + FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1))); + FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */ + if (op == SLJIT_DIVMOD_SW) + return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */ + + return SLJIT_SUCCESS; + case SLJIT_ENDBR: + return SLJIT_SUCCESS; + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + default: + SLJIT_UNREACHABLE(); + } + /* swap result registers */ + FAIL_IF(push_inst(compiler, lgr(tmp0, arg0))); + FAIL_IF(push_inst(compiler, lgr(arg0, arg1))); + return push_inst(compiler, lgr(arg1, tmp0)); +} + +static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r, sljit_gpr src_r) +{ + sljit_s32 is_ctz = (GET_OPCODE(op) == SLJIT_CTZ); + + if ((op & SLJIT_32) && src_r != tmp0) { + FAIL_IF(push_inst(compiler, 0xb9160000 /* llgfr */ | R4A(tmp0) | R0A(src_r))); + src_r = tmp0; + } + + if (is_ctz) { + FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */) | R4A(tmp1) | R0A(src_r))); + + if (src_r == tmp0) + FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1400 /* nr */ : 0xb9800000 /* ngr */) | R4A(tmp0) | R0A(tmp1))); + else + FAIL_IF(push_inst(compiler, 0xb9e40000 /* ngrk */ | R12A(tmp1) | R4A(tmp0) | R0A(src_r))); + + src_r = tmp0; + } + + FAIL_IF(push_inst(compiler, 0xb9830000 /* flogr */ | R4A(tmp0) | R0A(src_r))); + + if (is_ctz) + FAIL_IF(push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(tmp1) | R32A(tmp0) | ((sljit_ins)(-64 & 0xffff) << 16))); + + if (op & SLJIT_32) { + if (!is_ctz && dst_r != tmp0) + return push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(dst_r) | R32A(tmp0) | ((sljit_ins)(-32 & 0xffff) << 16)); + + FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp0) | (sljit_u32)-32)); + } + + if (is_ctz) + FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp0) | R32A(tmp1) | ((sljit_ins)((op & SLJIT_32) ? 59 : 58) << 24) | (63 << 16) | ((sljit_ins)((op & SLJIT_32) ? 5 : 6) << 8))); + + if (dst_r == tmp0) + return SLJIT_SUCCESS; + + return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0)); +} + +/* LEVAL will be defined later with different parameters as needed */ +#define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins; + struct addr mem; + sljit_gpr dst_r; + sljit_gpr src_r; + sljit_s32 opcode = GET_OPCODE(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) { + /* LOAD REGISTER */ + if (FAST_IS_REG(dst) && FAST_IS_REG(src)) { + dst_r = gpr(dst); + src_r = gpr(src); + switch (opcode | (op & SLJIT_32)) { + /* 32-bit */ + case SLJIT_MOV32_U8: + ins = llcr(dst_r, src_r); + break; + case SLJIT_MOV32_S8: + ins = lbr(dst_r, src_r); + break; + case SLJIT_MOV32_U16: + ins = llhr(dst_r, src_r); + break; + case SLJIT_MOV32_S16: + ins = lhr(dst_r, src_r); + break; + case SLJIT_MOV32: + if (dst_r == src_r) + return SLJIT_SUCCESS; + ins = lr(dst_r, src_r); + break; + /* 64-bit */ + case SLJIT_MOV_U8: + ins = llgcr(dst_r, src_r); + break; + case SLJIT_MOV_S8: + ins = lgbr(dst_r, src_r); + break; + case SLJIT_MOV_U16: + ins = llghr(dst_r, src_r); + break; + case SLJIT_MOV_S16: + ins = lghr(dst_r, src_r); + break; + case SLJIT_MOV_U32: + ins = llgfr(dst_r, src_r); + break; + case SLJIT_MOV_S32: + ins = lgfr(dst_r, src_r); + break; + case SLJIT_MOV: + case SLJIT_MOV_P: + if (dst_r == src_r) + return SLJIT_SUCCESS; + ins = lgr(dst_r, src_r); + break; + default: + ins = 0; + SLJIT_UNREACHABLE(); + break; + } + FAIL_IF(push_inst(compiler, ins)); + return SLJIT_SUCCESS; + } + /* LOAD IMMEDIATE */ + if (FAST_IS_REG(dst) && (src & SLJIT_IMM)) { + switch (opcode) { + case SLJIT_MOV_U8: + srcw = (sljit_sw)((sljit_u8)(srcw)); + break; + case SLJIT_MOV_S8: + srcw = (sljit_sw)((sljit_s8)(srcw)); + break; + case SLJIT_MOV_U16: + srcw = (sljit_sw)((sljit_u16)(srcw)); + break; + case SLJIT_MOV_S16: + srcw = (sljit_sw)((sljit_s16)(srcw)); + break; + case SLJIT_MOV_U32: + srcw = (sljit_sw)((sljit_u32)(srcw)); + break; + case SLJIT_MOV_S32: + case SLJIT_MOV32: + srcw = (sljit_sw)((sljit_s32)(srcw)); + break; + } + return push_load_imm_inst(compiler, gpr(dst), srcw); + } + /* LOAD */ + /* TODO(carenas): avoid reg being defined later */ + #define LEVAL(i) EVAL(i, reg, mem) + if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) { + sljit_gpr reg = gpr(dst); + + FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1)); + /* TODO(carenas): convert all calls below to LEVAL */ + switch (opcode | (op & SLJIT_32)) { + case SLJIT_MOV32_U8: + ins = llc(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV32_S8: + ins = lb(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV32_U16: + ins = llh(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV32_S16: + ins = WHEN2(is_u12(mem.offset), lh, lhy); + break; + case SLJIT_MOV32: + ins = WHEN2(is_u12(mem.offset), l, ly); + break; + case SLJIT_MOV_U8: + ins = LEVAL(llgc); + break; + case SLJIT_MOV_S8: + ins = lgb(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV_U16: + ins = LEVAL(llgh); + break; + case SLJIT_MOV_S16: + ins = lgh(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV_U32: + ins = LEVAL(llgf); + break; + case SLJIT_MOV_S32: + ins = lgf(reg, mem.offset, mem.index, mem.base); + break; + case SLJIT_MOV_P: + case SLJIT_MOV: + ins = lg(reg, mem.offset, mem.index, mem.base); + break; + default: + ins = 0; + SLJIT_UNREACHABLE(); + break; + } + FAIL_IF(push_inst(compiler, ins)); + return SLJIT_SUCCESS; + } + /* STORE and STORE IMMEDIATE */ + if ((dst & SLJIT_MEM) + && (FAST_IS_REG(src) || (src & SLJIT_IMM))) { + sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0; + if (src & SLJIT_IMM) { + /* TODO(mundaym): MOVE IMMEDIATE? */ + FAIL_IF(push_load_imm_inst(compiler, reg, srcw)); + } + struct addr mem; + FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); + switch (opcode) { + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + return push_inst(compiler, + WHEN2(is_u12(mem.offset), stc, stcy)); + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + return push_inst(compiler, + WHEN2(is_u12(mem.offset), sth, sthy)); + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + return push_inst(compiler, + WHEN2(is_u12(mem.offset), st, sty)); + case SLJIT_MOV_P: + case SLJIT_MOV: + FAIL_IF(push_inst(compiler, LEVAL(stg))); + return SLJIT_SUCCESS; + default: + SLJIT_UNREACHABLE(); + } + } + #undef LEVAL + /* MOVE CHARACTERS */ + if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) { + struct addr mem; + FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1)); + switch (opcode) { + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + FAIL_IF(push_inst(compiler, + EVAL(llgc, tmp0, mem))); + FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); + return push_inst(compiler, + EVAL(stcy, tmp0, mem)); + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + FAIL_IF(push_inst(compiler, + EVAL(llgh, tmp0, mem))); + FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); + return push_inst(compiler, + EVAL(sthy, tmp0, mem)); + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + FAIL_IF(push_inst(compiler, + EVAL(ly, tmp0, mem))); + FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); + return push_inst(compiler, + EVAL(sty, tmp0, mem)); + case SLJIT_MOV_P: + case SLJIT_MOV: + FAIL_IF(push_inst(compiler, + EVAL(lg, tmp0, mem))); + FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); + FAIL_IF(push_inst(compiler, + EVAL(stg, tmp0, mem))); + return SLJIT_SUCCESS; + default: + SLJIT_UNREACHABLE(); + } + } + SLJIT_UNREACHABLE(); + } + + SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */ + + dst_r = FAST_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0; + src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0; + + compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); + + /* TODO(mundaym): optimize loads and stores */ + switch (opcode) { + case SLJIT_NOT: + if (src & SLJIT_MEM) + FAIL_IF(load_word(compiler, src_r, src, srcw, op & SLJIT_32)); + + /* emulate ~x with x^-1 */ + if (!(op & SLJIT_32)) { + FAIL_IF(push_load_imm_inst(compiler, tmp1, -1)); + if (src_r != dst_r) + FAIL_IF(push_inst(compiler, lgr(dst_r, src_r))); + + FAIL_IF(push_inst(compiler, xgr(dst_r, tmp1))); + break; + } + + if (have_eimm()) + FAIL_IF(push_inst(compiler, xilf(dst_r, 0xffffffff))); + else { + FAIL_IF(push_load_imm_inst(compiler, tmp1, -1)); + if (src_r != dst_r) + FAIL_IF(push_inst(compiler, lr(dst_r, src_r))); + + FAIL_IF(push_inst(compiler, xr(dst_r, tmp1))); + } + break; + case SLJIT_CLZ: + case SLJIT_CTZ: + if (src & SLJIT_MEM) + FAIL_IF(load_unsigned_word(compiler, src_r, src, srcw, op & SLJIT_32)); + + FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r)); + break; + default: + SLJIT_UNREACHABLE(); + } + + if ((op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW)) + FAIL_IF(update_zero_overflow(compiler, op, dst_r)); + + if (dst & SLJIT_MEM) + return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32); + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE int is_commutative(sljit_s32 op) +{ + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + case SLJIT_MUL: + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + return 1; + } + return 0; +} + +static const struct ins_forms add_forms = { + 0x1a00, /* ar */ + 0xb9080000, /* agr */ + 0xb9f80000, /* ark */ + 0xb9e80000, /* agrk */ + 0x5a000000, /* a */ + 0xe3000000005a, /* ay */ + 0xe30000000008, /* ag */ +}; + +static const struct ins_forms logical_add_forms = { + 0x1e00, /* alr */ + 0xb90a0000, /* algr */ + 0xb9fa0000, /* alrk */ + 0xb9ea0000, /* algrk */ + 0x5e000000, /* al */ + 0xe3000000005e, /* aly */ + 0xe3000000000a, /* alg */ +}; + +static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW; + int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW); + const struct ins_forms *forms; + sljit_ins ins; + + if (src2 & SLJIT_IMM) { + if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { + if (sets_overflow) + ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; + else + ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; + return emit_siy(compiler, ins, dst, dstw, src2w); + } + + if (is_s16(src2w)) { + if (sets_overflow) + ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; + else + ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; + FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w)); + goto done; + } + + if (!sets_overflow) { + if ((op & SLJIT_32) || is_u32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); + goto done; + } + if (is_u32(-src2w)) { + FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A)); + goto done; + } + } + else if ((op & SLJIT_32) || is_s32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); + goto done; + } + } + + forms = sets_overflow ? &add_forms : &logical_add_forms; + FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w)); + +done: + if (sets_zero_overflow) + FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0)); + + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); + + return SLJIT_SUCCESS; +} + +static const struct ins_forms sub_forms = { + 0x1b00, /* sr */ + 0xb9090000, /* sgr */ + 0xb9f90000, /* srk */ + 0xb9e90000, /* sgrk */ + 0x5b000000, /* s */ + 0xe3000000005b, /* sy */ + 0xe30000000009, /* sg */ +}; + +static const struct ins_forms logical_sub_forms = { + 0x1f00, /* slr */ + 0xb90b0000, /* slgr */ + 0xb9fb0000, /* slrk */ + 0xb9eb0000, /* slgrk */ + 0x5f000000, /* sl */ + 0xe3000000005f, /* sly */ + 0xe3000000000b, /* slg */ +}; + +static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 flag_type = GET_FLAG_TYPE(op); + int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW); + int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW); + const struct ins_forms *forms; + sljit_ins ins; + + if (dst == (sljit_s32)tmp0 && flag_type <= SLJIT_SIG_LESS_EQUAL) { + int compare_signed = flag_type >= SLJIT_SIG_LESS; + + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE; + + if (src2 & SLJIT_IMM) { + if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) + { + if ((op & SLJIT_32) || is_s32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */; + return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); + } + } + else { + if ((op & SLJIT_32) || is_u32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */; + return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); + } + if (is_s16(src2w)) + return emit_rie_d(compiler, 0xec00000000db /* alghsik */, (sljit_s32)tmp0, src1, src1w, src2w); + } + } + else if (src2 & SLJIT_MEM) { + if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) { + ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */; + return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A); + } + + if (compare_signed) + ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */; + else + ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */; + return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A); + } + + if (compare_signed) + ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */; + else + ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */; + return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w); + } + + if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) { + ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */; + FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w)); + goto done; + } + + if (src2 & SLJIT_IMM) { + sljit_sw neg_src2w = -src2w; + + if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) { + if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { + if (sets_signed) + ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; + else + ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; + return emit_siy(compiler, ins, dst, dstw, neg_src2w); + } + + if (is_s16(neg_src2w)) { + if (sets_signed) + ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; + else + ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; + FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w)); + goto done; + } + } + + if (!sets_signed) { + if ((op & SLJIT_32) || is_u32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); + goto done; + } + if (is_u32(neg_src2w)) { + FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A)); + goto done; + } + } + else if ((op & SLJIT_32) || is_s32(neg_src2w)) { + ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A)); + goto done; + } + } + + forms = sets_signed ? &sub_forms : &logical_sub_forms; + FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w)); + +done: + if (sets_signed) { + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + + if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) { + /* In case of overflow, the sign bit of the two source operands must be different, and + - the first operand is greater if the sign bit of the result is set + - the first operand is less if the sign bit of the result is not set + The -result operation sets the corrent sign, because the result cannot be zero. + The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */ + FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2))); + FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r))); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(update_zero_overflow(compiler, op, dst_r)); + } + + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); + + return SLJIT_SUCCESS; +} + +static const struct ins_forms multiply_forms = { + 0xb2520000, /* msr */ + 0xb90c0000, /* msgr */ + 0xb9fd0000, /* msrkc */ + 0xb9ed0000, /* msgrkc */ + 0x71000000, /* ms */ + 0xe30000000051, /* msy */ + 0xe3000000000c, /* msg */ +}; + +static const struct ins_forms multiply_overflow_forms = { + 0, + 0, + 0xb9fd0000, /* msrkc */ + 0xb9ed0000, /* msgrkc */ + 0, + 0xe30000000053, /* msc */ + 0xe30000000083, /* msgc */ +}; + +static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins ins; + + if (HAS_FLAGS(op)) { + /* if have_misc2 fails, this operation should be emulated. 32 bit emulation: + FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r))); + FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r))); + if (dst_r != tmp0) { + FAIL_IF(push_inst(compiler, lr(dst_r, tmp0))); + } + FAIL_IF(push_inst(compiler, aih(tmp0, 1))); + FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U))); + FAIL_IF(push_inst(compiler, ipm(tmp1))); + FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */ + + return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w); + } + + if (src2 & SLJIT_IMM) { + if (is_s16(src2w)) { + ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */; + return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A); + } + + if (is_s32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */; + return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A); + } + } + + return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w); +} + +static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_uw imm, sljit_s32 count16) +{ + sljit_s32 mode = compiler->mode; + sljit_gpr dst_r = tmp0; + sljit_s32 needs_move = 1; + + if (IS_GPR_REG(dst)) { + dst_r = gpr(dst & REG_MASK); + if (dst == src1) + needs_move = 0; + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + if (type == SLJIT_AND) { + if (!(mode & SLJIT_32)) + FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32))); + return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff)); + } + else if (type == SLJIT_OR) { + if (count16 >= 3) { + FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32))); + return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff)); + } + + if (count16 >= 2) { + if ((imm & 0x00000000ffffffffull) == 0) + return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)); + if ((imm & 0xffffffff00000000ull) == 0) + return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff)); + } + + if ((imm & 0xffff000000000000ull) != 0) + FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48))); + if ((imm & 0x0000ffff00000000ull) != 0) + FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff))); + if ((imm & 0x00000000ffff0000ull) != 0) + FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff))); + if ((imm & 0x000000000000ffffull) != 0 || imm == 0) + return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff)); + return SLJIT_SUCCESS; + } + + if ((imm & 0xffffffff00000000ull) != 0) + FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32))); + if ((imm & 0x00000000ffffffffull) != 0 || imm == 0) + return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff)); + return SLJIT_SUCCESS; +} + +static const struct ins_forms bitwise_and_forms = { + 0x1400, /* nr */ + 0xb9800000, /* ngr */ + 0xb9f40000, /* nrk */ + 0xb9e40000, /* ngrk */ + 0x54000000, /* n */ + 0xe30000000054, /* ny */ + 0xe30000000080, /* ng */ +}; + +static const struct ins_forms bitwise_or_forms = { + 0x1600, /* or */ + 0xb9810000, /* ogr */ + 0xb9f60000, /* ork */ + 0xb9e60000, /* ogrk */ + 0x56000000, /* o */ + 0xe30000000056, /* oy */ + 0xe30000000081, /* og */ +}; + +static const struct ins_forms bitwise_xor_forms = { + 0x1700, /* xr */ + 0xb9820000, /* xgr */ + 0xb9f70000, /* xrk */ + 0xb9e70000, /* xgrk */ + 0x57000000, /* x */ + 0xe30000000057, /* xy */ + 0xe30000000082, /* xg */ +}; + +static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 type = GET_OPCODE(op); + const struct ins_forms *forms; + + if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) { + sljit_s32 count16 = 0; + sljit_uw imm = (sljit_uw)src2w; + + if (op & SLJIT_32) + imm &= 0xffffffffull; + + if ((imm & 0x000000000000ffffull) != 0 || imm == 0) + count16++; + if ((imm & 0x00000000ffff0000ull) != 0) + count16++; + if ((imm & 0x0000ffff00000000ull) != 0) + count16++; + if ((imm & 0xffff000000000000ull) != 0) + count16++; + + if (type == SLJIT_AND && dst == (sljit_s32)tmp0 && count16 == 1) { + sljit_gpr src_r = tmp0; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1 & REG_MASK); + else + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + + if ((imm & 0x000000000000ffffull) != 0 || imm == 0) + return push_inst(compiler, 0xa7010000 | R20A(src_r) | imm); + if ((imm & 0x00000000ffff0000ull) != 0) + return push_inst(compiler, 0xa7000000 | R20A(src_r) | (imm >> 16)); + if ((imm & 0x0000ffff00000000ull) != 0) + return push_inst(compiler, 0xa7030000 | R20A(src_r) | (imm >> 32)); + return push_inst(compiler, 0xa7020000 | R20A(src_r) | (imm >> 48)); + } + + if (!(op & SLJIT_SET_Z)) + return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16); + } + + if (type == SLJIT_AND) + forms = &bitwise_and_forms; + else if (type == SLJIT_OR) + forms = &bitwise_or_forms; + else + forms = &bitwise_xor_forms; + + return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w); +} + +static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 type = GET_OPCODE(op); + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr src_r = tmp0; + sljit_gpr base_r = tmp0; + sljit_ins imm = 0; + sljit_ins ins; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1); + else + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + + if (!(src2 & SLJIT_IMM)) { + if (FAST_IS_REG(src2)) + base_r = gpr(src2); + else { + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + base_r = tmp1; + } + + if ((op & SLJIT_32) && (type == SLJIT_MSHL || type == SLJIT_MLSHR || type == SLJIT_MASHR)) { + if (base_r != tmp1) { + FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(base_r) | (59 << 24) | (1 << 23) | (63 << 16))); + base_r = tmp1; + } else + FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f)); + } + } else + imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f)); + + if ((op & SLJIT_32) && dst_r == src_r) { + if (type == SLJIT_SHL || type == SLJIT_MSHL) + ins = 0x89000000 /* sll */; + else if (type == SLJIT_LSHR || type == SLJIT_MLSHR) + ins = 0x88000000 /* srl */; + else + ins = 0x8a000000 /* sra */; + + FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm)); + } else { + if (type == SLJIT_SHL || type == SLJIT_MSHL) + ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */; + else if (type == SLJIT_LSHR || type == SLJIT_MLSHR) + ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */; + else + ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */; + + FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16))); + } + + if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR) + return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)); + + return SLJIT_SUCCESS; +} + +static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr src_r = tmp0; + sljit_gpr base_r = tmp0; + sljit_ins imm = 0; + sljit_ins ins; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1); + else + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + + if (!(src2 & SLJIT_IMM)) { + if (FAST_IS_REG(src2)) + base_r = gpr(src2); + else { + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + base_r = tmp1; + } + } + + if (GET_OPCODE(op) == SLJIT_ROTR) { + if (!(src2 & SLJIT_IMM)) { + ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */; + FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r))); + base_r = tmp1; + } else + src2w = -src2w; + } + + if (src2 & SLJIT_IMM) + imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f)); + + ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */; + return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16)); +} + +static const struct ins_forms addc_forms = { + 0xb9980000, /* alcr */ + 0xb9880000, /* alcgr */ + 0, + 0, + 0, + 0xe30000000098, /* alc */ + 0xe30000000088, /* alcg */ +}; + +static const struct ins_forms subc_forms = { + 0xb9990000, /* slbr */ + 0xb9890000, /* slbgr */ + 0, + 0, + 0, + 0xe30000000099, /* slb */ + 0xe30000000089, /* slbg */ +}; + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->mode = op & SLJIT_32; + compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); + + if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) { + src1 ^= src2; + src2 ^= src1; + src1 ^= src2; + + src1w ^= src2w; + src2w ^= src1w; + src1w ^= src2w; + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD; + return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ADDC: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD; + FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w)); + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); + return SLJIT_SUCCESS; + case SLJIT_SUB: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB; + return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SUBC: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB; + FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w)); + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); + return SLJIT_SUCCESS; + case SLJIT_MUL: + FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w)); + break; + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w)); + break; + case SLJIT_SHL: + case SLJIT_MSHL: + case SLJIT_LSHR: + case SLJIT_MLSHR: + case SLJIT_ASHR: + case SLJIT_MASHR: + FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w)); + break; + case SLJIT_ROTL: + case SLJIT_ROTR: + FAIL_IF(sljit_emit_rotate(compiler, op, dst, src1, src1w, src2, src2w)); + break; + } + + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 is_right; + sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64; + sljit_gpr src_dst_r = gpr(src_dst); + sljit_gpr src1_r = tmp0; + sljit_gpr src2_r = tmp1; + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w)); + + is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR); + + if (src_dst == src1) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w); + } + + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (src1 & SLJIT_MEM) + FAIL_IF(load_word(compiler, tmp0, src1, src1w, op & SLJIT_32)); + else if (src1 & SLJIT_IMM) + FAIL_IF(push_load_imm_inst(compiler, tmp0, src1w)); + else + src1_r = gpr(src1); + + if (src2 & SLJIT_IMM) { + src2w &= bit_length - 1; + + if (src2w == 0) + return SLJIT_SUCCESS; + } else if (!(src2 & SLJIT_MEM)) + src2_r = gpr(src2); + else + FAIL_IF(load_word(compiler, tmp1, src2, src2w, op & SLJIT_32)); + + if (src2 & SLJIT_IMM) { + if (op & SLJIT_32) { + ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */; + FAIL_IF(push_inst(compiler, ins | R20A(src_dst_r) | (sljit_ins)src2w)); + } else { + ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */; + FAIL_IF(push_inst(compiler, ins | R36A(src_dst_r) | R32A(src_dst_r) | ((sljit_ins)src2w << 16))); + } + + ins = 0xec0000000055 /* risbg */; + + if (is_right) { + src2w = bit_length - src2w; + ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src2w) << 16) | ((sljit_ins)src2w << 8); + } else + ins |= ((sljit_ins)(64 - src2w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)src2w << 8); + + return push_inst(compiler, ins | R36A(src_dst_r) | R32A(src1_r)); + } + + if (op & SLJIT_32) { + if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) { + if (src2_r != tmp1) { + FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src2_r) | (59 << 24) | (1 << 23) | (63 << 16))); + src2_r = tmp1; + } else + FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f)); + } + + ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */; + FAIL_IF(push_inst(compiler, ins | R20A(src_dst_r) | R12A(src2_r))); + + if (src2_r != tmp1) { + FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f)); + FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src2_r))); + } else + FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f)); + + if (src1_r == tmp0) { + ins = is_right ? 0x89000000 /* sll */ : 0x88000000 /* srl */; + FAIL_IF(push_inst(compiler, ins | R20A(tmp0) | R12A(tmp1) | 0x1)); + } else { + ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */; + FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | R28A(tmp1) | (0x1 << 16))); + } + + return push_inst(compiler, 0x1600 /* or */ | R4A(src_dst_r) | R0A(tmp0)); + } + + ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */; + FAIL_IF(push_inst(compiler, ins | R36A(src_dst_r) | R32A(src_dst_r) | R28A(src2_r))); + + ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */; + + if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) { + if (src2_r != tmp1) + FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f)); + + FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | (0x1 << 16))); + src1_r = tmp0; + + if (src2_r != tmp1) + FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src2_r))); + else + FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f)); + } else + FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src2_r))); + + FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | R28A(tmp1))); + return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(src_dst_r) | R0A(tmp0)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src( + struct sljit_compiler *compiler, + sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + sljit_gpr src_r; + struct addr addr; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + src_r = FAST_IS_REG(src) ? gpr(src) : tmp1; + if (src & SLJIT_MEM) + FAIL_IF(load_word(compiler, tmp1, src, srcw, 0)); + + return push_inst(compiler, br(src_r)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1)); + return push_inst(compiler, 0xe31000000036 /* pfd */ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); + default: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return (sljit_s32)gpr(reg); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return (sljit_s32)fgpr(reg); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size); + return push_inst(compiler, ins); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define FLOAT_LOAD 0 +#define FLOAT_STORE 1 + +static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + struct addr addr; + sljit_ins ins; + + SLJIT_ASSERT(mem & SLJIT_MEM); + + if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) { + FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1)); + + if (op & FLOAT_STORE) + ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */; + else + ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */; + + return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset); + } + + FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1)); + + if (op & FLOAT_STORE) + ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */; + else + ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */; + + return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); +} + +static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins, + sljit_s32 reg, + sljit_s32 src, sljit_sw srcw) +{ + struct addr addr; + + if (!(src & SLJIT_MEM)) + return push_inst(compiler, ins_r | F4(reg) | F0(src)); + + FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1)); + return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + sljit_ins ins; + + if (src & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + /* M3 is set to 5 */ + if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) + ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */; + else + ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */; + + FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src))); + + if (dst & SLJIT_MEM) + return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64); + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_ins ins; + + if (src & SLJIT_IMM) { + FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw)); + src = (sljit_s32)tmp0; + } + else if (src & SLJIT_MEM) { + FAIL_IF(load_word(compiler, tmp0, src, srcw, GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_S32)); + src = (sljit_s32)tmp0; + } + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) + ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */; + else + ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */; + + FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src))); + + if (dst & SLJIT_MEM) + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins ins_r, ins; + + if (src1 & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + if (op & SLJIT_32) { + ins_r = 0xb3090000 /* cebr */; + ins = 0xed0000000009 /* ceb */; + } else { + ins_r = 0xb3190000 /* cdbr */; + ins = 0xed0000000019 /* cdb */; + } + + return emit_float(compiler, ins_r, ins, src1, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + sljit_ins ins; + + CHECK_ERROR(); + + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (op == SLJIT_CONV_F64_FROM_F32) + FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw)); + else { + if (src & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (FAST_IS_REG(dst)) { + if (dst == src) + return SLJIT_SUCCESS; + + ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */; + break; + } + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw); + case SLJIT_CONV_F64_FROM_F32: + /* Only SLJIT_CONV_F32_FROM_F64. */ + ins = 0xb3440000 /* ledbr */; + break; + case SLJIT_NEG_F64: + ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */; + break; + default: + SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64); + ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */; + break; + } + + FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src))); + } + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + + SLJIT_ASSERT(dst_r == TMP_FREG1); + + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); +} + +#define FLOAT_MOV(op, dst_r, src_r) \ + (((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r)) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r = TMP_FREG1; + sljit_ins ins_r, ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + do { + if (FAST_IS_REG(dst)) { + dst_r = dst; + + if (dst == src1) + break; + + if (dst == src2) { + if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) { + src2 = src1; + src2w = src1w; + src1 = dst; + break; + } + + FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2))); + src2 = TMP_FREG1; + } + } + + if (src1 & SLJIT_MEM) + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w)); + else + FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1))); + } while (0); + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */; + ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */; + break; + case SLJIT_SUB_F64: + ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */; + ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */; + break; + case SLJIT_MUL_F64: + ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */; + ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */; + break; + default: + SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64); + ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */; + ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */; + break; + } + + FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w)); + + if (dst & SLJIT_MEM) + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); + + SLJIT_ASSERT(dst_r != TMP_FREG1); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, lgr(gpr(dst), link_r)); + + /* memory */ + return store_word(compiler, link_r, dst, dstw, 0); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + /* record jump */ + struct sljit_jump *jump = (struct sljit_jump *) + ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + jump->addr = compiler->size; + + /* emit jump instruction */ + type &= 0xff; + if (type >= SLJIT_FAST_CALL) + PTR_FAIL_IF(push_inst(compiler, brasl(link_r, 0))); + else + PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0))); + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, r14)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + + if (src & SLJIT_IMM) { + SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */ + FAIL_IF(push_load_imm_inst(compiler, src_r, srcw)); + } + else if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */)); + } + + /* emit jump instruction */ + if (type >= SLJIT_FAST_CALL) + return push_inst(compiler, basr(link_r, src_r)); + + return push_inst(compiler, br(src_r)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + SLJIT_ASSERT(gpr(TMP_REG2) == tmp1); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */)); + src = TMP_REG2; + srcw = 0; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src)))); + src = TMP_REG2; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, r14)); + type = SLJIT_JUMP; + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_u8 mask = get_cc(compiler, type); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr loc_r = tmp1; + switch (GET_OPCODE(op)) { + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + compiler->status_flags_state = op & SLJIT_SET_Z; + + /* dst is also source operand */ + if (dst & SLJIT_MEM) + FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32)); + + break; + case SLJIT_MOV32: + op |= SLJIT_32; + /* fallthrough */ + case SLJIT_MOV: + /* can write straight into destination */ + loc_r = dst_r; + break; + default: + SLJIT_UNREACHABLE(); + } + + /* TODO(mundaym): fold into cmov helper function? */ + #define LEVAL(i) i(loc_r, 1, mask) + if (have_lscond2()) { + FAIL_IF(push_load_imm_inst(compiler, loc_r, 0)); + FAIL_IF(push_inst(compiler, + WHEN2(op & SLJIT_32, lochi, locghi))); + } else { + /* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */ + abort(); + } + #undef LEVAL + + /* apply bitwise op and set condition codes */ + switch (GET_OPCODE(op)) { + #define LEVAL(i) i(dst_r, loc_r) + case SLJIT_AND: + FAIL_IF(push_inst(compiler, + WHEN2(op & SLJIT_32, nr, ngr))); + break; + case SLJIT_OR: + FAIL_IF(push_inst(compiler, + WHEN2(op & SLJIT_32, or, ogr))); + break; + case SLJIT_XOR: + FAIL_IF(push_inst(compiler, + WHEN2(op & SLJIT_32, xr, xgr))); + break; + #undef LEVAL + } + + /* store result to memory if required */ + if (dst & SLJIT_MEM) + return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32)); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins mask = get_cc(compiler, type & ~SLJIT_32); + sljit_gpr src_r; + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + + if (type & SLJIT_32) + srcw = (sljit_s32)srcw; + + if (have_lscond2() && (src & SLJIT_IMM) && is_s16(srcw)) { + ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */; + return push_inst(compiler, ins | R36A(gpr(dst_reg)) | (mask << 32) | (sljit_ins)(srcw & 0xffff) << 16); + } + + if (src & SLJIT_IMM) { + FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw)); + src_r = tmp0; + } else + src_r = gpr(src); + + if (have_lscond1()) { + ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */; + return push_inst(compiler, ins | (mask << 12) | R4A(gpr(dst_reg)) | R0A(src_r)); + } + + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_ins ins, reg1, reg2, base, offs = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + ADJUST_LOCAL_OFFSET(mem, memw); + + base = gpr(mem & REG_MASK); + reg1 = gpr(REG_PAIR_FIRST(reg)); + reg2 = gpr(REG_PAIR_SECOND(reg)); + + if (mem & OFFS_REG_MASK) { + memw &= 0x3; + offs = gpr(OFFS_REG(mem)); + + if (memw != 0) { + FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(offs) | ((sljit_ins)memw << 16))); + offs = tmp1; + } else if (!(type & SLJIT_MEM_STORE) && (base == reg1 || base == reg2) && (offs == reg1 || offs == reg2)) { + FAIL_IF(push_inst(compiler, 0xb9f80000 | R12A(tmp1) | R4A(base) | R0A(offs))); + base = tmp1; + offs = 0; + } + + memw = 0; + } else if (memw < -0x80000 || memw > 0x7ffff - ((reg2 == reg1 + 1) ? 0 : SSIZE_OF(sw))) { + FAIL_IF(push_load_imm_inst(compiler, tmp1, memw)); + + if (base == 0) + base = tmp1; + else + offs = tmp1; + + memw = 0; + } + + if (offs == 0 && reg2 == (reg1 + 1)) { + ins = (type & SLJIT_MEM_STORE) ? 0xeb0000000024 /* stmg */ : 0xeb0000000004 /* lmg */; + return push_inst(compiler, ins | R36A(reg1) | R32A(reg2) | R28A(base) | disp_s20((sljit_s32)memw)); + } + + ins = ((type & SLJIT_MEM_STORE) ? 0xe30000000024 /* stg */ : 0xe30000000004 /* lg */) | R32A(offs) | R28A(base); + + if (!(type & SLJIT_MEM_STORE) && base == reg1) { + FAIL_IF(push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)))); + return push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw)); + } + + FAIL_IF(push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw))); + return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw))); +} + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +/* On s390x we build a literal pool to hold constants. This has two main + advantages: + + 1. we only need one instruction in the instruction stream (LGRL) + 2. we can store 64 bit addresses and use 32 bit offsets + + To retrofit the extra information needed to build the literal pool we + add a new sljit_s390x_const struct that contains the initial value but + can still be cast to a sljit_const. */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_s390x_const *const_; + sljit_gpr dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + + const_ = (struct sljit_s390x_const*)ensure_abuf(compiler, + sizeof(struct sljit_s390x_const)); + PTR_FAIL_IF(!const_); + set_const((struct sljit_const*)const_, compiler); + const_->init_value = init_value; + + dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + if (have_genext()) + PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | lgrl(dst_r, 0))); + else { + PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | larl(tmp1, 0))); + PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1))); + } + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */)); + + return (struct sljit_const*)const_; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + /* Update the constant pool. */ + sljit_uw *ptr = (sljit_uw *)addr; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0); + *ptr = new_target; + SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1); + SLJIT_CACHE_FLUSH(ptr, ptr + 1); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label( + struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_gpr dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + + if (have_genext()) + PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0))); + else { + PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0))); + PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1))); + } + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0)); + + return put_label; +} + +/* TODO(carenas): EVAL probably should move up or be refactored */ +#undef WHEN2 +#undef EVAL + +#undef tmp1 +#undef tmp0 + +/* TODO(carenas): undef other macros that spill like is_u12? */ diff --git a/contrib/libs/pcre2/src/sljit/sljitNativeX86_32.c b/contrib/libs/pcre2/src/sljit/sljitNativeX86_32.c new file mode 100644 index 0000000000..08da03026d --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitNativeX86_32.c @@ -0,0 +1,1298 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* x86 32-bit arch dependent functions. */ + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm) +{ + sljit_u8 *inst; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw)); + FAIL_IF(!inst); + INC_SIZE(1 + sizeof(sljit_sw)); + *inst++ = opcode; + sljit_unaligned_store_sw(inst, imm); + return SLJIT_SUCCESS; +} + +/* Size contains the flags as well. */ +static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size, + /* The register or immediate operand. */ + sljit_s32 a, sljit_sw imma, + /* The general operand (not immediate). */ + sljit_s32 b, sljit_sw immb) +{ + sljit_u8 *inst; + sljit_u8 *buf_ptr; + sljit_u8 reg_map_b; + sljit_uw flags = size; + sljit_uw inst_size; + + /* Both cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); + /* Size flags not allowed for typed instructions. */ + SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); + /* Both size flags cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); + /* SSE2 and immediate is not possible. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); + SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) + && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) + && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); + + size &= 0xf; + inst_size = size; + + if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) + inst_size++; + if (flags & EX86_PREF_66) + inst_size++; + + /* Calculate size of b. */ + inst_size += 1; /* mod r/m byte. */ + if (b & SLJIT_MEM) { + if (!(b & REG_MASK)) + inst_size += sizeof(sljit_sw); + else { + if (immb != 0 && !(b & OFFS_REG_MASK)) { + /* Immediate operand. */ + if (immb <= 127 && immb >= -128) + inst_size += sizeof(sljit_s8); + else + inst_size += sizeof(sljit_sw); + } + else if (reg_map[b & REG_MASK] == 5) { + /* Swap registers if possible. */ + if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_map[OFFS_REG(b)] != 5) + b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK); + else + inst_size += sizeof(sljit_s8); + } + + if (reg_map[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK)) + b |= TO_OFFS_REG(SLJIT_SP); + + if (b & OFFS_REG_MASK) + inst_size += 1; /* SIB byte. */ + } + } + + /* Calculate size of a. */ + if (a & SLJIT_IMM) { + if (flags & EX86_BIN_INS) { + if (imma <= 127 && imma >= -128) { + inst_size += 1; + flags |= EX86_BYTE_ARG; + } else + inst_size += 4; + } + else if (flags & EX86_SHIFT_INS) { + SLJIT_ASSERT(imma <= 0x1f); + if (imma != 1) { + inst_size++; + flags |= EX86_BYTE_ARG; + } + } else if (flags & EX86_BYTE_ARG) + inst_size++; + else if (flags & EX86_HALF_ARG) + inst_size += sizeof(short); + else + inst_size += sizeof(sljit_sw); + } + else + SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); + PTR_FAIL_IF(!inst); + + /* Encoding the byte. */ + INC_SIZE(inst_size); + if (flags & EX86_PREF_F2) + *inst++ = 0xf2; + if (flags & EX86_PREF_F3) + *inst++ = 0xf3; + if (flags & EX86_PREF_66) + *inst++ = 0x66; + + buf_ptr = inst + size; + + /* Encode mod/rm byte. */ + if (!(flags & EX86_SHIFT_INS)) { + if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) + *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; + + if (a & SLJIT_IMM) + *buf_ptr = 0; + else if (!(flags & EX86_SSE2_OP1)) + *buf_ptr = U8(reg_map[a] << 3); + else + *buf_ptr = U8(a << 3); + } + else { + if (a & SLJIT_IMM) { + if (imma == 1) + *inst = GROUP_SHIFT_1; + else + *inst = GROUP_SHIFT_N; + } else + *inst = GROUP_SHIFT_CL; + *buf_ptr = 0; + } + + if (!(b & SLJIT_MEM)) { + *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : b)); + buf_ptr++; + } else if (b & REG_MASK) { + reg_map_b = reg_map[b & REG_MASK]; + + if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { + if (immb != 0 || reg_map_b == 5) { + if (immb <= 127 && immb >= -128) + *buf_ptr |= 0x40; + else + *buf_ptr |= 0x80; + } + + if (!(b & OFFS_REG_MASK)) + *buf_ptr++ |= reg_map_b; + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3)); + } + + if (immb != 0 || reg_map_b == 5) { + if (immb <= 127 && immb >= -128) + *buf_ptr++ = U8(immb); /* 8 bit displacement. */ + else { + sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_sw); + } + } + } + else { + if (reg_map_b == 5) + *buf_ptr |= 0x40; + + *buf_ptr++ |= 0x04; + *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6)); + + if (reg_map_b == 5) + *buf_ptr++ = 0; + } + } + else { + *buf_ptr++ |= 0x05; + sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_sw); + } + + if (a & SLJIT_IMM) { + if (flags & EX86_BYTE_ARG) + *buf_ptr = U8(imma); + else if (flags & EX86_HALF_ARG) + sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma); + else if (!(flags & EX86_SHIFT_INS)) + sljit_unaligned_store_sw(buf_ptr, imma); + } + + return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); +} + +/* --------------------------------------------------------------------- */ +/* Enter / return */ +/* --------------------------------------------------------------------- */ + +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset) +{ + sljit_uw type = jump->flags >> TYPE_SHIFT; + + if (type == SLJIT_JUMP) { + *code_ptr++ = JMP_i32; + jump->addr++; + } + else if (type >= SLJIT_FAST_CALL) { + *code_ptr++ = CALL_i32; + jump->addr++; + } + else { + *code_ptr++ = GROUP_0F; + *code_ptr++ = get_jump_code(type); + jump->addr += 2; + } + + if (jump->flags & JUMP_LABEL) + jump->flags |= PATCH_MW; + else + sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset)); + code_ptr += 4; + + return code_ptr; +} + +#define ENTER_TMP_TO_R4 0x00001 +#define ENTER_TMP_TO_S 0x00002 + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 word_arg_count, saved_arg_count, float_arg_count; + sljit_s32 size, args_size, types, status; + sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(options); + sljit_u8 *inst; +#ifdef _WIN32 + sljit_s32 r2_offset = -1; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + /* Emit ENDBR32 at function entry if needed. */ + FAIL_IF(emit_endbranch(compiler)); + + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); + + arg_types >>= SLJIT_ARG_SHIFT; + word_arg_count = 0; + status = 0; + + if (options & SLJIT_ENTER_REG_ARG) { + args_size = 3 * SSIZE_OF(sw); + + while (arg_types) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + word_arg_count++; + if (word_arg_count >= 4) + status |= ENTER_TMP_TO_R4; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + compiler->args_size = 0; + } else { + types = arg_types; + saved_arg_count = 0; + float_arg_count = 0; + args_size = SSIZE_OF(sw); + while (types) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size)); + args_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size)); + args_size += SSIZE_OF(f32); + break; + default: + word_arg_count++; + + if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG)) + saved_arg_count++; + + if (word_arg_count == 4) { + if (types & SLJIT_ARG_TYPE_SCRATCH_REG) { + status |= ENTER_TMP_TO_R4; + arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT); + } else if (saved_arg_count == 4) { + status |= ENTER_TMP_TO_S; + arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT); + } + } + + args_size += SSIZE_OF(sw); + break; + } + types >>= SLJIT_ARG_SHIFT; + } + + args_size -= SSIZE_OF(sw); + compiler->args_size = args_size; + } + + size = (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - kept_saveds_count; + if (!(options & SLJIT_ENTER_REG_ARG)) + size++; + + if (size != 0) { + inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1)); + FAIL_IF(!inst); + + INC_SIZE((sljit_uw)size); + + if (!(options & SLJIT_ENTER_REG_ARG)) + PUSH_REG(reg_map[TMP_REG1]); + + if ((saveds > 2 && kept_saveds_count <= 2) || scratches > 9) + PUSH_REG(reg_map[SLJIT_S2]); + if ((saveds > 1 && kept_saveds_count <= 1) || scratches > 10) + PUSH_REG(reg_map[SLJIT_S1]); + if ((saveds > 0 && kept_saveds_count == 0) || scratches > 11) + PUSH_REG(reg_map[SLJIT_S0]); + + size *= SSIZE_OF(sw); + } + + if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size); + + size += SSIZE_OF(sw); + + local_size = ((SLJIT_LOCALS_OFFSET_BASE + local_size + size + 0xf) & ~0xf) - size; + compiler->local_size = local_size; + + word_arg_count = 0; + saved_arg_count = 0; + args_size = size; + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + args_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + args_size += SSIZE_OF(f32); + break; + default: + word_arg_count++; + SLJIT_ASSERT(word_arg_count <= 3 || (word_arg_count == 4 && !(status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)))); + + if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) { +#ifdef _WIN32 + if (word_arg_count == 3 && local_size > 4 * 4096) + r2_offset = local_size + args_size; + else +#endif + EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size); + + } else { + EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size); + saved_arg_count++; + } + + args_size += SSIZE_OF(sw); + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0); + +#ifdef _WIN32 + SLJIT_ASSERT(r2_offset == -1 || local_size > 4 * 4096); + + if (local_size > 4096) { + if (local_size <= 4 * 4096) { + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096); + + if (local_size > 2 * 4096) + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); + if (local_size > 3 * 4096) + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); + } + else { + if (options & SLJIT_ENTER_REG_ARG) { + SLJIT_ASSERT(r2_offset == -1); + + inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 1)); + FAIL_IF(!inst); + INC_SIZE(1); + PUSH_REG(reg_map[SLJIT_R2]); + + local_size -= SSIZE_OF(sw); + r2_offset = local_size; + } + + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12); + + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096); + BINARY_IMM32(SUB, 4096, SLJIT_SP, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + + INC_SIZE(2); + inst[0] = LOOP_i8; + inst[1] = (sljit_u8)-16; + local_size &= 0xfff; + } + } + + if (local_size > 0) { + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -local_size); + BINARY_IMM32(SUB, local_size, SLJIT_SP, 0); + } + + if (r2_offset != -1) + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset); + +#else /* !_WIN32 */ + + SLJIT_ASSERT(local_size > 0); + + BINARY_IMM32(SUB, local_size, SLJIT_SP, 0); + +#endif /* _WIN32 */ + + size = SLJIT_LOCALS_OFFSET_BASE - SSIZE_OF(sw); + kept_saveds_count = SLJIT_R3 - kept_saveds_count; + + while (saved_arg_count > 3) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, kept_saveds_count, 0); + kept_saveds_count++; + size -= SSIZE_OF(sw); + saved_arg_count--; + } + + if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) { + if (status & ENTER_TMP_TO_R4) + size = 2 * SSIZE_OF(sw); + + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 args_size; + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + arg_types >>= SLJIT_ARG_SHIFT; + args_size = 0; + + if (!(options & SLJIT_ENTER_REG_ARG)) { + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + args_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + args_size += SSIZE_OF(f32); + break; + default: + args_size += SSIZE_OF(sw); + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + } + + compiler->args_size = args_size; + + /* [esp+0] for saving temporaries and for function calls. */ + + saveds = (1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - SLJIT_KEPT_SAVEDS_COUNT(options)) * SSIZE_OF(sw); + + /* Saving ebp. */ + if (!(options & SLJIT_ENTER_REG_ARG)) + saveds += SSIZE_OF(sw); + + compiler->local_size = ((SLJIT_LOCALS_OFFSET_BASE + local_size + saveds + 0xf) & ~0xf) - saveds; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to) +{ + sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + sljit_s32 local_size, saveds; + sljit_uw size; + sljit_u8 *inst; + + size = (sljit_uw)((compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count); + + local_size = compiler->local_size; + + if (!(compiler->options & SLJIT_ENTER_REG_ARG)) + size++; + else if (is_return_to && size == 0) { + local_size += SSIZE_OF(sw); + is_return_to = 0; + } + + if (local_size > 0) + BINARY_IMM32(ADD, local_size, SLJIT_SP, 0); + + if (size == 0) + return SLJIT_SUCCESS; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + + INC_SIZE(size); + + saveds = compiler->saveds; + + if ((saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) + POP_REG(reg_map[SLJIT_S0]); + if ((saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) + POP_REG(reg_map[SLJIT_S1]); + if ((saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9) + POP_REG(reg_map[SLJIT_S2]); + + if (!(compiler->options & SLJIT_ENTER_REG_ARG)) + POP_REG(reg_map[TMP_REG1]); + + if (is_return_to) + BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + SLJIT_ASSERT(compiler->args_size >= 0); + SLJIT_ASSERT(compiler->local_size > 0); + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + RET(); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 src_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if ((src & SLJIT_MEM) || (src > SLJIT_R2 && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) { + ADJUST_LOCAL_OFFSET(src, srcw); + CHECK_EXTRA_REGS(src, srcw, (void)0); + + src_r = (compiler->options & SLJIT_ENTER_REG_ARG) ? TMP_REG1 : SLJIT_R1; + + EMIT_MOV(compiler, src_r, 0, src, srcw); + src = src_r; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Call / return instructions */ +/* --------------------------------------------------------------------- */ + +static sljit_s32 call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) +{ + sljit_sw stack_size = 0; + sljit_s32 word_arg_count = 0; + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + stack_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + stack_size += SSIZE_OF(f32); + break; + default: + word_arg_count++; + stack_size += SSIZE_OF(sw); + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (word_arg_count_ptr) + *word_arg_count_ptr = word_arg_count; + + if (stack_size <= 4 * SSIZE_OF(sw)) + return 0; + + return ((stack_size - (4 * SSIZE_OF(sw)) + 0xf) & ~0xf); +} + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, + sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count, sljit_s32 keep_tmp1) +{ + sljit_s32 float_arg_count = 0, arg4_reg = 0, arg_offset; + sljit_u8 *inst; + + if (word_arg_count >= 4) { + arg4_reg = SLJIT_R0; + + if (!keep_tmp1) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw)); + arg4_reg = TMP_REG1; + } + } + + if (stack_size > 0) + BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0); + + arg_offset = 0; + word_arg_count = 0; + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), arg_offset, float_arg_count)); + arg_offset += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), arg_offset, float_arg_count)); + arg_offset += SSIZE_OF(f32); + break; + default: + word_arg_count++; + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), arg_offset, (word_arg_count >= 4) ? arg4_reg : word_arg_count, 0); + + if (word_arg_count == 1 && arg4_reg == SLJIT_R0) + EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw) + stack_size); + + arg_offset += SSIZE_OF(sw); + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, + sljit_s32 arg_types, sljit_s32 stack_size) +{ + sljit_u8 *inst; + sljit_s32 single; + + if (stack_size > 0) + BINARY_IMM32(ADD, stack_size, SLJIT_SP, 0); + + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) + return SLJIT_SUCCESS; + + single = ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + INC_SIZE(3); + inst[0] = single ? FSTPS : FSTPD; + inst[1] = (0x03 << 3) | 0x04; + inst[2] = (0x04 << 3) | reg_map[SLJIT_SP]; + + return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0); +} + +static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, + sljit_s32 *extra_space, sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + sljit_sw args_size, saved_regs_size; + sljit_sw types, word_arg_count, float_arg_count; + sljit_sw stack_size, prev_stack_size, min_size, offset; + sljit_sw word_arg4_offset; + sljit_u8 r2_offset = 0; + sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + sljit_u8* inst; + + ADJUST_LOCAL_OFFSET(src, srcw); + CHECK_EXTRA_REGS(src, srcw, (void)0); + + saved_regs_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count) * SSIZE_OF(sw); + + word_arg_count = 0; + float_arg_count = 0; + arg_types >>= SLJIT_ARG_SHIFT; + types = 0; + args_size = 0; + + while (arg_types != 0) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + args_size += SSIZE_OF(f64); + float_arg_count++; + break; + case SLJIT_ARG_TYPE_F32: + args_size += SSIZE_OF(f32); + float_arg_count++; + break; + default: + word_arg_count++; + args_size += SSIZE_OF(sw); + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (args_size <= compiler->args_size) { + *extra_space = 0; + stack_size = args_size + SSIZE_OF(sw) + saved_regs_size; + + offset = stack_size + compiler->local_size; + + if (!(src & SLJIT_IMM) && src != SLJIT_R0) { + if (word_arg_count >= 1) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0); + r2_offset = sizeof(sljit_sw); + } + EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw); + } + + while (types != 0) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + offset -= SSIZE_OF(f64); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F32: + offset -= SSIZE_OF(f32); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); + float_arg_count--; + break; + default: + switch (word_arg_count) { + case 1: + offset -= SSIZE_OF(sw); + if (r2_offset != 0) { + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + } else + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0); + break; + case 2: + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0); + break; + case 3: + offset -= SSIZE_OF(sw); + break; + case 4: + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw)); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + break; + } + word_arg_count--; + break; + } + types >>= SLJIT_ARG_SHIFT; + } + + return emit_stack_frame_release(compiler, 0); + } + + stack_size = args_size + SSIZE_OF(sw); + + if (word_arg_count >= 1 && !(src & SLJIT_IMM) && src != SLJIT_R0) { + r2_offset = SSIZE_OF(sw); + stack_size += SSIZE_OF(sw); + } + + if (word_arg_count >= 3) + stack_size += SSIZE_OF(sw); + + prev_stack_size = SSIZE_OF(sw) + saved_regs_size; + min_size = prev_stack_size + compiler->local_size; + + word_arg4_offset = 2 * SSIZE_OF(sw); + + if (stack_size > min_size) { + BINARY_IMM32(SUB, stack_size - min_size, SLJIT_SP, 0); + if (src == SLJIT_MEM1(SLJIT_SP)) + srcw += stack_size - min_size; + word_arg4_offset += stack_size - min_size; + } + else + stack_size = min_size; + + if (word_arg_count >= 3) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0); + + if (word_arg_count >= 4) + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset); + } + + if (!(src & SLJIT_IMM) && src != SLJIT_R0) { + if (word_arg_count >= 1) { + SLJIT_ASSERT(r2_offset == sizeof(sljit_sw)); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0); + } + EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw); + } + + /* Restore saved registers. */ + offset = stack_size - 2 * SSIZE_OF(sw); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset); + + if (compiler->saveds > 2 || compiler->scratches > 9) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), offset); + } + if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_SP), offset); + } + if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), offset); + } + + /* Copy fourth argument and return address. */ + offset = stack_size - SSIZE_OF(sw); + *extra_space = args_size; + + if (word_arg_count >= 4) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + } + + while (types != 0) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + offset -= SSIZE_OF(f64); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F32: + offset -= SSIZE_OF(f32); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); + float_arg_count--; + break; + default: + switch (word_arg_count) { + case 1: + offset -= SSIZE_OF(sw); + if (r2_offset != 0) { + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + } else + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0); + break; + case 2: + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0); + break; + case 3: + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + break; + } + word_arg_count--; + break; + } + types >>= SLJIT_ARG_SHIFT; + } + + SLJIT_ASSERT(offset >= 0); + + if (offset == 0) + return SLJIT_SUCCESS; + + BINARY_IMM32(ADD, offset, SLJIT_SP, 0); + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space) +{ + /* Called when stack consumption cannot be reduced to 0. */ + sljit_u8 *inst; + + BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + RET(); + + return SLJIT_SUCCESS; +} + +static sljit_s32 tail_call_reg_arg_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + sljit_s32 word_arg_count = 0; + sljit_s32 kept_saveds_count, offset; + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) + word_arg_count++; + + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (word_arg_count < 4) + return SLJIT_SUCCESS; + + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw)); + + kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + offset = compiler->local_size + 3 * SSIZE_OF(sw); + + if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) + offset += SSIZE_OF(sw); + if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) + offset += SSIZE_OF(sw); + if ((compiler->saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9) + offset += SSIZE_OF(sw); + + return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), offset, TMP_REG1, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + struct sljit_jump *jump; + sljit_sw stack_size = 0; + sljit_s32 word_arg_count; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + if (type & SLJIT_CALL_RETURN) { + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + PTR_FAIL_IF(tail_call_reg_arg_with_args(compiler, arg_types)); + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP)); + } + + stack_size = type; + PTR_FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, SLJIT_IMM, 0)); + + SLJIT_SKIP_CHECKS(compiler); + + if (stack_size == 0) + return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP)); + + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + PTR_FAIL_IF(emit_tail_call_end(compiler, stack_size)); + return jump; + } + + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); + } + + stack_size = call_get_stack_size(arg_types, &word_arg_count); + PTR_FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count, 0)); + + SLJIT_SKIP_CHECKS(compiler); + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size)); + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + sljit_sw stack_size = 0; + sljit_s32 word_arg_count; + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (type & SLJIT_CALL_RETURN) { + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + FAIL_IF(tail_call_reg_arg_with_args(compiler, arg_types)); + + if ((src & SLJIT_MEM) || (src > SLJIT_R2 && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) { + ADJUST_LOCAL_OFFSET(src, srcw); + CHECK_EXTRA_REGS(src, srcw, (void)0); + + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + src = TMP_REG1; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); + } + + stack_size = type; + FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw)); + + if (!(src & SLJIT_IMM)) { + src = SLJIT_R0; + srcw = 0; + } + + SLJIT_SKIP_CHECKS(compiler); + + if (stack_size == 0) + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); + + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + return emit_tail_call_end(compiler, stack_size); + } + + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); + } + + ADJUST_LOCAL_OFFSET(src, srcw); + CHECK_EXTRA_REGS(src, srcw, (void)0); + + if (src & SLJIT_MEM) { + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + src = TMP_REG1; + srcw = 0; + } + + stack_size = call_get_stack_size(arg_types, &word_arg_count); + FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count, src == TMP_REG1)); + + if (stack_size > 0 && src == SLJIT_MEM1(SLJIT_SP)) + srcw += stack_size; + + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + + return post_call_with_args(compiler, arg_types, stack_size); +} + +static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + + if (compiler->options & SLJIT_ENTER_REG_ARG) { + if (src == SLJIT_FR0) + return SLJIT_SUCCESS; + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw); + } + + if (FAST_IS_REG(src)) { + FAIL_IF(emit_sse2_store(compiler, op & SLJIT_32, SLJIT_MEM1(SLJIT_SP), 0, src)); + + src = SLJIT_MEM1(SLJIT_SP); + srcw = 0; + } else { + ADJUST_LOCAL_OFFSET(src, srcw); + } + + inst = emit_x86_instruction(compiler, 1 | EX86_SSE2_OP1, 0, 0, src, srcw); + *inst = (op & SLJIT_32) ? FLDS : FLDL; + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + if (FAST_IS_REG(dst)) { + /* Unused dest is possible here. */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + + INC_SIZE(1); + POP_REG(reg_map[dst]); + return SLJIT_SUCCESS; + } + + /* Memory. */ + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = POP_rm; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) +{ + sljit_u8 *inst; + + CHECK_EXTRA_REGS(src, srcw, (void)0); + + if (FAST_IS_REG(src)) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1); + FAIL_IF(!inst); + + INC_SIZE(1 + 1); + PUSH_REG(reg_map[src]); + } + else { + inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_FF; + *inst |= PUSH_rm; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + } + + RET(); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Other operations */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_u8* inst; + sljit_s32 i, next, reg_idx, offset; + sljit_u8 regs[2]; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + ADJUST_LOCAL_OFFSET(mem, memw); + + regs[0] = U8(REG_PAIR_FIRST(reg)); + regs[1] = U8(REG_PAIR_SECOND(reg)); + + next = SSIZE_OF(sw); + + if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) { + if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) { + /* None of them are virtual register so TMP_REG1 will not be used. */ + EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0); + + if (regs[1] == OFFS_REG(mem)) + next = -SSIZE_OF(sw); + + mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1); + } else { + next = -SSIZE_OF(sw); + + if (!(mem & OFFS_REG_MASK)) + memw += SSIZE_OF(sw); + } + } + + for (i = 0; i < 2; i++) { + reg_idx = next > 0 ? i : (i ^ 0x1); + reg = regs[reg_idx]; + + offset = -1; + + if (reg >= SLJIT_R3 && reg <= SLJIT_S3) { + offset = (2 * SSIZE_OF(sw)) + ((reg) - SLJIT_R3) * SSIZE_OF(sw); + reg = TMP_REG1; + + if (type & SLJIT_MEM_STORE) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset); + } + + if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) { + inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 4)); + FAIL_IF(!inst); + + INC_SIZE(4); + + inst[0] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm; + inst[1] = 0x44 | U8(reg_map[reg] << 3); + inst[2] = U8(memw << 6) | U8(reg_map[OFFS_REG(mem)] << 3) | reg_map[mem & REG_MASK]; + inst[3] = sizeof(sljit_sw); + } else if (type & SLJIT_MEM_STORE) { + EMIT_MOV(compiler, mem, memw, reg, 0); + } else { + EMIT_MOV(compiler, reg, 0, mem, memw); + } + + if (!(mem & OFFS_REG_MASK)) + memw += next; + + if (!(type & SLJIT_MEM_STORE) && offset != -1) + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler) +{ + sljit_sw size; + + /* Don't adjust shadow stack if it isn't enabled. */ + if (!cpu_has_shadow_stack()) + return SLJIT_SUCCESS; + + SLJIT_ASSERT(compiler->args_size >= 0); + SLJIT_ASSERT(compiler->local_size > 0); + + size = compiler->local_size; + size += (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw); + + return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size); +} diff --git a/contrib/libs/pcre2/src/sljit/sljitNativeX86_64.c b/contrib/libs/pcre2/src/sljit/sljitNativeX86_64.c new file mode 100644 index 0000000000..4e938ffcf3 --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitNativeX86_64.c @@ -0,0 +1,1092 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* x86 64-bit arch dependent functions. */ + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) +{ + sljit_u8 *inst; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw)); + FAIL_IF(!inst); + INC_SIZE(2 + sizeof(sljit_sw)); + *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); + *inst++ = U8(MOV_r_i32 | (reg_map[reg] & 0x7)); + sljit_unaligned_store_sw(inst, imm); + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm) +{ + sljit_u8 *inst; + sljit_uw length = (rex ? 2 : 1) + sizeof(sljit_s32); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + length); + FAIL_IF(!inst); + INC_SIZE(length); + if (rex) + *inst++ = rex; + *inst++ = opcode; + sljit_unaligned_store_s32(inst, (sljit_s32)imm); + return SLJIT_SUCCESS; +} + +static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size, + /* The register or immediate operand. */ + sljit_s32 a, sljit_sw imma, + /* The general operand (not immediate). */ + sljit_s32 b, sljit_sw immb) +{ + sljit_u8 *inst; + sljit_u8 *buf_ptr; + sljit_u8 rex = 0; + sljit_u8 reg_lmap_b; + sljit_uw flags = size; + sljit_uw inst_size; + + /* The immediate operand must be 32 bit. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma)); + /* Both cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); + /* Size flags not allowed for typed instructions. */ + SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); + /* Both size flags cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); + /* SSE2 and immediate is not possible. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); + SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) + && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) + && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); + + size &= 0xf; + inst_size = size; + + if (!compiler->mode32 && !(flags & EX86_NO_REXW)) + rex |= REX_W; + else if (flags & EX86_REX) + rex |= REX; + + if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) + inst_size++; + if (flags & EX86_PREF_66) + inst_size++; + + /* Calculate size of b. */ + inst_size += 1; /* mod r/m byte. */ + if (b & SLJIT_MEM) { + if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) { + PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb)); + immb = 0; + if (b & REG_MASK) + b |= TO_OFFS_REG(TMP_REG2); + else + b |= TMP_REG2; + } + + if (!(b & REG_MASK)) + inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */ + else { + if (immb != 0 && !(b & OFFS_REG_MASK)) { + /* Immediate operand. */ + if (immb <= 127 && immb >= -128) + inst_size += sizeof(sljit_s8); + else + inst_size += sizeof(sljit_s32); + } + else if (reg_lmap[b & REG_MASK] == 5) { + /* Swap registers if possible. */ + if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5) + b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK); + else + inst_size += sizeof(sljit_s8); + } + + if (reg_map[b & REG_MASK] >= 8) + rex |= REX_B; + + if (reg_lmap[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK)) + b |= TO_OFFS_REG(SLJIT_SP); + + if (b & OFFS_REG_MASK) { + inst_size += 1; /* SIB byte. */ + if (reg_map[OFFS_REG(b)] >= 8) + rex |= REX_X; + } + } + } + else if (!(flags & EX86_SSE2_OP2)) { + if (reg_map[b] >= 8) + rex |= REX_B; + } + else if (freg_map[b] >= 8) + rex |= REX_B; + + if (a & SLJIT_IMM) { + if (flags & EX86_BIN_INS) { + if (imma <= 127 && imma >= -128) { + inst_size += 1; + flags |= EX86_BYTE_ARG; + } else + inst_size += 4; + } + else if (flags & EX86_SHIFT_INS) { + SLJIT_ASSERT(imma <= (compiler->mode32 ? 0x1f : 0x3f)); + if (imma != 1) { + inst_size++; + flags |= EX86_BYTE_ARG; + } + } else if (flags & EX86_BYTE_ARG) + inst_size++; + else if (flags & EX86_HALF_ARG) + inst_size += sizeof(short); + else + inst_size += sizeof(sljit_s32); + } + else { + SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); + /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */ + if (!(flags & EX86_SSE2_OP1)) { + if (reg_map[a] >= 8) + rex |= REX_R; + } + else if (freg_map[a] >= 8) + rex |= REX_R; + } + + if (rex) + inst_size++; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); + PTR_FAIL_IF(!inst); + + /* Encoding the byte. */ + INC_SIZE(inst_size); + if (flags & EX86_PREF_F2) + *inst++ = 0xf2; + if (flags & EX86_PREF_F3) + *inst++ = 0xf3; + if (flags & EX86_PREF_66) + *inst++ = 0x66; + if (rex) + *inst++ = rex; + buf_ptr = inst + size; + + /* Encode mod/rm byte. */ + if (!(flags & EX86_SHIFT_INS)) { + if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) + *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; + + if (a & SLJIT_IMM) + *buf_ptr = 0; + else if (!(flags & EX86_SSE2_OP1)) + *buf_ptr = U8(reg_lmap[a] << 3); + else + *buf_ptr = U8(freg_lmap[a] << 3); + } + else { + if (a & SLJIT_IMM) { + if (imma == 1) + *inst = GROUP_SHIFT_1; + else + *inst = GROUP_SHIFT_N; + } else + *inst = GROUP_SHIFT_CL; + *buf_ptr = 0; + } + + if (!(b & SLJIT_MEM)) { + *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_lmap[b] : freg_lmap[b])); + buf_ptr++; + } else if (b & REG_MASK) { + reg_lmap_b = reg_lmap[b & REG_MASK]; + + if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { + if (immb != 0 || reg_lmap_b == 5) { + if (immb <= 127 && immb >= -128) + *buf_ptr |= 0x40; + else + *buf_ptr |= 0x80; + } + + if (!(b & OFFS_REG_MASK)) + *buf_ptr++ |= reg_lmap_b; + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3)); + } + + if (immb != 0 || reg_lmap_b == 5) { + if (immb <= 127 && immb >= -128) + *buf_ptr++ = U8(immb); /* 8 bit displacement. */ + else { + sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_s32); + } + } + } + else { + if (reg_lmap_b == 5) + *buf_ptr |= 0x40; + + *buf_ptr++ |= 0x04; + *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6)); + + if (reg_lmap_b == 5) + *buf_ptr++ = 0; + } + } + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = 0x25; + sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_s32); + } + + if (a & SLJIT_IMM) { + if (flags & EX86_BYTE_ARG) + *buf_ptr = U8(imma); + else if (flags & EX86_HALF_ARG) + sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma); + else if (!(flags & EX86_SHIFT_INS)) + sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma); + } + + return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); +} + +/* --------------------------------------------------------------------- */ +/* Enter / return */ +/* --------------------------------------------------------------------- */ + +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr) +{ + sljit_uw type = jump->flags >> TYPE_SHIFT; + + int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff); + + /* The relative jump below specialized for this case. */ + SLJIT_ASSERT(reg_map[TMP_REG2] >= 8); + + if (type < SLJIT_JUMP) { + /* Invert type. */ + *code_ptr++ = U8(get_jump_code(type ^ 0x1) - 0x10); + *code_ptr++ = short_addr ? (6 + 3) : (10 + 3); + } + + *code_ptr++ = short_addr ? REX_B : (REX_W | REX_B); + *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2]; + jump->addr = (sljit_uw)code_ptr; + + if (jump->flags & JUMP_LABEL) + jump->flags |= PATCH_MD; + else if (short_addr) + sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target); + else + sljit_unaligned_store_sw(code_ptr, (sljit_sw)jump->u.target); + + code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw); + + *code_ptr++ = REX_B; + *code_ptr++ = GROUP_FF; + *code_ptr++ = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]); + + return code_ptr; +} + +static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label) +{ + if (max_label > HALFWORD_MAX) { + put_label->addr -= put_label->flags; + put_label->flags = PATCH_MD; + return code_ptr; + } + + if (put_label->flags == 0) { + /* Destination is register. */ + code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw); + + SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); + SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32); + + if ((code_ptr[0] & 0x07) != 0) { + code_ptr[0] = U8(code_ptr[0] & ~0x08); + code_ptr += 2 + sizeof(sljit_s32); + } + else { + code_ptr[0] = code_ptr[1]; + code_ptr += 1 + sizeof(sljit_s32); + } + + put_label->addr = (sljit_uw)code_ptr; + return code_ptr; + } + + code_ptr -= put_label->flags + (2 + sizeof(sljit_uw)); + SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags); + + SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); + + if ((code_ptr[1] & 0xf8) == MOV_r_i32) { + code_ptr += 2 + sizeof(sljit_uw); + SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); + } + + SLJIT_ASSERT(code_ptr[1] == MOV_rm_r); + + code_ptr[0] = U8(code_ptr[0] & ~0x4); + code_ptr[1] = MOV_rm_i32; + code_ptr[2] = U8(code_ptr[2] & ~(0x7 << 3)); + + code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32)); + put_label->addr = (sljit_uw)code_ptr; + put_label->flags = 0; + return code_ptr; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_uw size; + sljit_s32 word_arg_count = 0; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); + sljit_s32 saved_regs_size, tmp, i; +#ifdef _WIN64 + sljit_s32 saved_float_regs_size; + sljit_s32 saved_float_regs_offset = 0; + sljit_s32 float_arg_count = 0; +#endif /* _WIN64 */ + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + if (options & SLJIT_ENTER_REG_ARG) + arg_types = 0; + + /* Emit ENDBR64 at function entry if needed. */ + FAIL_IF(emit_endbranch(compiler)); + + compiler->mode32 = 0; + + /* Including the return address saved by the call instruction. */ + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + PUSH_REG(reg_lmap[i]); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + PUSH_REG(reg_lmap[i]); + } + +#ifdef _WIN64 + local_size += SLJIT_LOCALS_OFFSET; + saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16); + + if (saved_float_regs_size > 0) { + saved_float_regs_offset = ((local_size + 0xf) & ~0xf); + local_size = saved_float_regs_offset + saved_float_regs_size; + } +#else /* !_WIN64 */ + SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0); +#endif /* _WIN64 */ + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + tmp = 0; +#ifndef _WIN64 + switch (word_arg_count) { + case 0: + tmp = SLJIT_R2; + break; + case 1: + tmp = SLJIT_R1; + break; + case 2: + tmp = TMP_REG1; + break; + default: + tmp = SLJIT_R3; + break; + } +#else /* !_WIN64 */ + switch (word_arg_count + float_arg_count) { + case 0: + tmp = SLJIT_R3; + break; + case 1: + tmp = SLJIT_R1; + break; + case 2: + tmp = SLJIT_R2; + break; + default: + tmp = TMP_REG1; + break; + } +#endif /* _WIN64 */ + if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) { + if (tmp != SLJIT_R0 + word_arg_count) + EMIT_MOV(compiler, SLJIT_R0 + word_arg_count, 0, tmp, 0); + } else { + EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, tmp, 0); + saved_arg_count++; + } + word_arg_count++; + } else { +#ifdef _WIN64 + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); + float_arg_count++; + if (float_arg_count != float_arg_count + word_arg_count) + FAIL_IF(emit_sse2_load(compiler, (arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32, + float_arg_count, float_arg_count + word_arg_count, 0)); +#endif /* _WIN64 */ + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size; + compiler->local_size = local_size; + +#ifdef _WIN64 + if (local_size > 0) { + if (local_size <= 4 * 4096) { + if (local_size > 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096); + if (local_size > 2 * 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); + if (local_size > 3 * 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, local_size >> 12); + + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_SP), -4096); + BINARY_IMM32(SUB, 4096, SLJIT_SP, 0); + BINARY_IMM32(SUB, 1, TMP_REG1, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + + INC_SIZE(2); + inst[0] = JNE_i8; + inst[1] = (sljit_u8)-21; + local_size &= 0xfff; + } + + if (local_size > 0) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size); + } +#endif /* _WIN64 */ + + if (local_size > 0) + BINARY_IMM32(SUB, local_size, SLJIT_SP, 0); + +#ifdef _WIN64 + if (saved_float_regs_size > 0) { + compiler->mode32 = 1; + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); + *inst++ = GROUP_0F; + *inst = MOVAPS_xm_x; + saved_float_regs_offset += 16; + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); + *inst++ = GROUP_0F; + *inst = MOVAPS_xm_x; + saved_float_regs_offset += 16; + } + } +#endif /* _WIN64 */ + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 saved_regs_size; +#ifdef _WIN64 + sljit_s32 saved_float_regs_size; +#endif /* _WIN64 */ + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + +#ifdef _WIN64 + local_size += SLJIT_LOCALS_OFFSET; + saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16); + + if (saved_float_regs_size > 0) + local_size = ((local_size + 0xf) & ~0xf) + saved_float_regs_size; +#else /* !_WIN64 */ + SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0); +#endif /* _WIN64 */ + + /* Including the return address saved by the call instruction. */ + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); + compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to) +{ + sljit_uw size; + sljit_s32 local_size, i, tmp; + sljit_u8 *inst; +#ifdef _WIN64 + sljit_s32 saved_float_regs_offset; + sljit_s32 fscratches = compiler->fscratches; + sljit_s32 fsaveds = compiler->fsaveds; +#endif /* _WIN64 */ + +#ifdef _WIN64 + saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16); + + if (saved_float_regs_offset > 0) { + compiler->mode32 = 1; + saved_float_regs_offset = (compiler->local_size - saved_float_regs_offset) & ~0xf; + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); + *inst++ = GROUP_0F; + *inst = MOVAPS_x_xm; + saved_float_regs_offset += 16; + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); + *inst++ = GROUP_0F; + *inst = MOVAPS_x_xm; + saved_float_regs_offset += 16; + } + + compiler->mode32 = 0; + } +#endif /* _WIN64 */ + + local_size = compiler->local_size; + + if (is_return_to && compiler->scratches < SLJIT_FIRST_SAVED_REG && (compiler->saveds == SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + local_size += SSIZE_OF(sw); + is_return_to = 0; + } + + if (local_size > 0) + BINARY_IMM32(ADD, local_size, SLJIT_SP, 0); + + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + POP_REG(reg_lmap[i]); + } + + tmp = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + for (i = SLJIT_S0 + 1 - compiler->saveds; i <= tmp; i++) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + POP_REG(reg_lmap[i]); + } + + if (is_return_to) + BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + compiler->mode32 = 0; + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + RET(); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + compiler->mode32 = 0; + + if ((src & SLJIT_MEM) || (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) { + ADJUST_LOCAL_OFFSET(src, srcw); + + EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); + src = TMP_REG2; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + +/* --------------------------------------------------------------------- */ +/* Call / return instructions */ +/* --------------------------------------------------------------------- */ + +#ifndef _WIN64 + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr) +{ + sljit_s32 src = src_ptr ? (*src_ptr) : 0; + sljit_s32 word_arg_count = 0; + + SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2); + SLJIT_ASSERT(!(src & SLJIT_MEM)); + + /* Remove return value. */ + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) + word_arg_count++; + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (word_arg_count == 0) + return SLJIT_SUCCESS; + + if (word_arg_count >= 3) { + if (src == SLJIT_R2) + *src_ptr = TMP_REG1; + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0); + } + + return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0); +} + +#else + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr) +{ + sljit_s32 src = src_ptr ? (*src_ptr) : 0; + sljit_s32 arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 float_arg_count = 0; + sljit_s32 types = 0; + sljit_s32 data_trandfer = 0; + static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 }; + + SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9); + SLJIT_ASSERT(!(src & SLJIT_MEM)); + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: + arg_count++; + float_arg_count++; + + if (arg_count != float_arg_count) + data_trandfer = 1; + break; + default: + arg_count++; + word_arg_count++; + + if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) { + data_trandfer = 1; + + if (src == word_arg_regs[arg_count]) { + EMIT_MOV(compiler, TMP_REG2, 0, src, 0); + *src_ptr = TMP_REG2; + } + } + break; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (!data_trandfer) + return SLJIT_SUCCESS; + + while (types) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (arg_count != float_arg_count) + FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0)); + arg_count--; + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F32: + if (arg_count != float_arg_count) + FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0)); + arg_count--; + float_arg_count--; + break; + default: + if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) + EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0); + arg_count--; + word_arg_count--; + break; + } + + types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#endif + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + compiler->mode32 = 0; + + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + compiler->mode32 = 0; + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); + src = TMP_REG2; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); + src = TMP_REG2; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0)); + } + + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + FAIL_IF(call_with_args(compiler, arg_types, &src)); + + if (type & SLJIT_CALL_RETURN) + type = SLJIT_JUMP; + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (FAST_IS_REG(dst)) { + if (reg_map[dst] < 8) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + POP_REG(reg_lmap[dst]); + return SLJIT_SUCCESS; + } + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + *inst++ = REX_B; + POP_REG(reg_lmap[dst]); + return SLJIT_SUCCESS; + } + + /* REX_W is not necessary (src is not immediate). */ + compiler->mode32 = 1; + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = POP_rm; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) +{ + sljit_u8 *inst; + + if (FAST_IS_REG(src)) { + if (reg_map[src] < 8) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1); + FAIL_IF(!inst); + + INC_SIZE(1 + 1); + PUSH_REG(reg_lmap[src]); + } + else { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1); + FAIL_IF(!inst); + + INC_SIZE(2 + 1); + *inst++ = REX_B; + PUSH_REG(reg_lmap[src]); + } + } + else { + /* REX_W is not necessary (src is not immediate). */ + compiler->mode32 = 1; + inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_FF; + *inst |= PUSH_rm; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + } + + RET(); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Other operations */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_u8* inst; + sljit_s32 i, next, reg_idx; + sljit_u8 regs[2]; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + ADJUST_LOCAL_OFFSET(mem, memw); + + compiler->mode32 = 0; + + if ((mem & REG_MASK) == 0) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw); + + mem = SLJIT_MEM1(TMP_REG1); + memw = 0; + } else if (!(mem & OFFS_REG_MASK) && ((memw < HALFWORD_MIN) || (memw > HALFWORD_MAX - SSIZE_OF(sw)))) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw); + + mem = SLJIT_MEM2(mem & REG_MASK, TMP_REG1); + memw = 0; + } + + regs[0] = U8(REG_PAIR_FIRST(reg)); + regs[1] = U8(REG_PAIR_SECOND(reg)); + + next = SSIZE_OF(sw); + + if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) { + if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) { + /* Base and offset cannot be TMP_REG1. */ + EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0); + + if (regs[1] == OFFS_REG(mem)) + next = -SSIZE_OF(sw); + + mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1); + } else { + next = -SSIZE_OF(sw); + + if (!(mem & OFFS_REG_MASK)) + memw += SSIZE_OF(sw); + } + } + + for (i = 0; i < 2; i++) { + reg_idx = next > 0 ? i : (i ^ 0x1); + reg = regs[reg_idx]; + + if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) { + inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 5)); + FAIL_IF(!inst); + + INC_SIZE(5); + + inst[0] = U8(REX_W | ((reg_map[reg] >= 8) ? REX_R : 0) | ((reg_map[mem & REG_MASK] >= 8) ? REX_B : 0) | ((reg_map[OFFS_REG(mem)] >= 8) ? REX_X : 0)); + inst[1] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm; + inst[2] = 0x44 | U8(reg_lmap[reg] << 3); + inst[3] = U8(memw << 6) | U8(reg_lmap[OFFS_REG(mem)] << 3) | reg_lmap[mem & REG_MASK]; + inst[4] = sizeof(sljit_sw); + } else if (type & SLJIT_MEM_STORE) { + EMIT_MOV(compiler, mem, memw, reg, 0); + } else { + EMIT_MOV(compiler, reg, 0, mem, memw); + } + + if (!(mem & OFFS_REG_MASK)) + memw += next; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + sljit_s32 dst_r; + + compiler->mode32 = 0; + + if (src & SLJIT_IMM) { + if (FAST_IS_REG(dst)) { + if (sign || ((sljit_uw)srcw <= 0x7fffffff)) { + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; + } + return emit_load_imm64(compiler, dst, srcw); + } + compiler->mode32 = 1; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + compiler->mode32 = 0; + return SLJIT_SUCCESS; + } + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) + dst_r = src; + else { + if (sign) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = MOVSXD_r_rm; + } else { + compiler->mode32 = 1; + FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw)); + compiler->mode32 = 0; + } + } + + if (dst & SLJIT_MEM) { + compiler->mode32 = 1; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + compiler->mode32 = 0; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler) +{ + sljit_s32 tmp, size; + + /* Don't adjust shadow stack if it isn't enabled. */ + if (!cpu_has_shadow_stack()) + return SLJIT_SUCCESS; + + size = compiler->local_size; + tmp = compiler->scratches; + if (tmp >= SLJIT_FIRST_SAVED_REG) + size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * SSIZE_OF(sw); + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + if (SLJIT_S0 >= tmp) + size += (SLJIT_S0 - tmp + 1) * SSIZE_OF(sw); + + return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size); +} diff --git a/contrib/libs/pcre2/src/sljit/sljitNativeX86_common.c b/contrib/libs/pcre2/src/sljit/sljitNativeX86_common.c new file mode 100644 index 0000000000..651942be80 --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitNativeX86_common.c @@ -0,0 +1,3422 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ + return "x86" SLJIT_CPUINFO; +} + +/* + 32b register indexes: + 0 - EAX + 1 - ECX + 2 - EDX + 3 - EBX + 4 - ESP + 5 - EBP + 6 - ESI + 7 - EDI +*/ + +/* + 64b register indexes: + 0 - RAX + 1 - RCX + 2 - RDX + 3 - RBX + 4 - RSP + 5 - RBP + 6 - RSI + 7 - RDI + 8 - R8 - From now on REX prefix is required + 9 - R9 + 10 - R10 + 11 - R11 + 12 - R12 + 13 - R13 + 14 - R14 + 15 - R15 +*/ + +#define TMP_FREG (0) + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + +/* Last register + 1. */ +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { + 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5 +}; + +#define CHECK_EXTRA_REGS(p, w, do) \ + if (p >= SLJIT_R3 && p <= SLJIT_S3) { \ + w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \ + p = SLJIT_MEM1(SLJIT_SP); \ + do; \ + } + +#else /* SLJIT_CONFIG_X86_32 */ + +/* Last register + 1. */ +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) + +/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present + Note: avoid to use r12 and r13 for memory addessing + therefore r12 is better to be a higher saved register. */ +#ifndef _WIN64 +/* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9 +}; +/* low-map. reg_map & 0x7. */ +static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1 +}; +#else +/* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10 +}; +/* low-map. reg_map & 0x7. */ +static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2 +}; +#endif + +/* Args: xmm0-xmm3 */ +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { + 4, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +}; +/* low-map. freg_map & 0x7. */ +static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { + 4, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 +}; + +#define REX_W 0x48 +#define REX_R 0x44 +#define REX_X 0x42 +#define REX_B 0x41 +#define REX 0x40 + +#ifndef _WIN64 +#define HALFWORD_MAX 0x7fffffffl +#define HALFWORD_MIN -0x80000000l +#else +#define HALFWORD_MAX 0x7fffffffll +#define HALFWORD_MIN -0x80000000ll +#endif + +#define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN) +#define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN) + +#define CHECK_EXTRA_REGS(p, w, do) + +#endif /* SLJIT_CONFIG_X86_32 */ + +#define U8(v) ((sljit_u8)(v)) + + +/* Size flags for emit_x86_instruction: */ +#define EX86_BIN_INS 0x0010 +#define EX86_SHIFT_INS 0x0020 +#define EX86_REX 0x0040 +#define EX86_NO_REXW 0x0080 +#define EX86_BYTE_ARG 0x0100 +#define EX86_HALF_ARG 0x0200 +#define EX86_PREF_66 0x0400 +#define EX86_PREF_F2 0x0800 +#define EX86_PREF_F3 0x1000 +#define EX86_SSE2_OP1 0x2000 +#define EX86_SSE2_OP2 0x4000 +#define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define ADD (/* BINARY */ 0 << 3) +#define ADD_EAX_i32 0x05 +#define ADD_r_rm 0x03 +#define ADD_rm_r 0x01 +#define ADDSD_x_xm 0x58 +#define ADC (/* BINARY */ 2 << 3) +#define ADC_EAX_i32 0x15 +#define ADC_r_rm 0x13 +#define ADC_rm_r 0x11 +#define AND (/* BINARY */ 4 << 3) +#define AND_EAX_i32 0x25 +#define AND_r_rm 0x23 +#define AND_rm_r 0x21 +#define ANDPD_x_xm 0x54 +#define BSR_r_rm (/* GROUP_0F */ 0xbd) +#define BSF_r_rm (/* GROUP_0F */ 0xbc) +#define CALL_i32 0xe8 +#define CALL_rm (/* GROUP_FF */ 2 << 3) +#define CDQ 0x99 +#define CMOVE_r_rm (/* GROUP_0F */ 0x44) +#define CMP (/* BINARY */ 7 << 3) +#define CMP_EAX_i32 0x3d +#define CMP_r_rm 0x3b +#define CMP_rm_r 0x39 +#define CVTPD2PS_x_xm 0x5a +#define CVTSI2SD_x_rm 0x2a +#define CVTTSD2SI_r_xm 0x2c +#define DIV (/* GROUP_F7 */ 6 << 3) +#define DIVSD_x_xm 0x5e +#define FLDS 0xd9 +#define FLDL 0xdd +#define FSTPS 0xd9 +#define FSTPD 0xdd +#define INT3 0xcc +#define IDIV (/* GROUP_F7 */ 7 << 3) +#define IMUL (/* GROUP_F7 */ 5 << 3) +#define IMUL_r_rm (/* GROUP_0F */ 0xaf) +#define IMUL_r_rm_i8 0x6b +#define IMUL_r_rm_i32 0x69 +#define JE_i8 0x74 +#define JNE_i8 0x75 +#define JMP_i8 0xeb +#define JMP_i32 0xe9 +#define JMP_rm (/* GROUP_FF */ 4 << 3) +#define LEA_r_m 0x8d +#define LOOP_i8 0xe2 +#define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd) +#define MOV_r_rm 0x8b +#define MOV_r_i32 0xb8 +#define MOV_rm_r 0x89 +#define MOV_rm_i32 0xc7 +#define MOV_rm8_i8 0xc6 +#define MOV_rm8_r8 0x88 +#define MOVAPS_x_xm 0x28 +#define MOVAPS_xm_x 0x29 +#define MOVSD_x_xm 0x10 +#define MOVSD_xm_x 0x11 +#define MOVSXD_r_rm 0x63 +#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) +#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) +#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) +#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) +#define MUL (/* GROUP_F7 */ 4 << 3) +#define MULSD_x_xm 0x59 +#define NEG_rm (/* GROUP_F7 */ 3 << 3) +#define NOP 0x90 +#define NOT_rm (/* GROUP_F7 */ 2 << 3) +#define OR (/* BINARY */ 1 << 3) +#define OR_r_rm 0x0b +#define OR_EAX_i32 0x0d +#define OR_rm_r 0x09 +#define OR_rm8_r8 0x08 +#define POP_r 0x58 +#define POP_rm 0x8f +#define POPF 0x9d +#define PREFETCH 0x18 +#define PUSH_i32 0x68 +#define PUSH_r 0x50 +#define PUSH_rm (/* GROUP_FF */ 6 << 3) +#define PUSHF 0x9c +#define ROL (/* SHIFT */ 0 << 3) +#define ROR (/* SHIFT */ 1 << 3) +#define RET_near 0xc3 +#define RET_i16 0xc2 +#define SBB (/* BINARY */ 3 << 3) +#define SBB_EAX_i32 0x1d +#define SBB_r_rm 0x1b +#define SBB_rm_r 0x19 +#define SAR (/* SHIFT */ 7 << 3) +#define SHL (/* SHIFT */ 4 << 3) +#define SHLD (/* GROUP_0F */ 0xa5) +#define SHRD (/* GROUP_0F */ 0xad) +#define SHR (/* SHIFT */ 5 << 3) +#define SUB (/* BINARY */ 5 << 3) +#define SUB_EAX_i32 0x2d +#define SUB_r_rm 0x2b +#define SUB_rm_r 0x29 +#define SUBSD_x_xm 0x5c +#define TEST_EAX_i32 0xa9 +#define TEST_rm_r 0x85 +#define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc) +#define UCOMISD_x_xm 0x2e +#define UNPCKLPD_x_xm 0x14 +#define XCHG_EAX_r 0x90 +#define XCHG_r_rm 0x87 +#define XOR (/* BINARY */ 6 << 3) +#define XOR_EAX_i32 0x35 +#define XOR_r_rm 0x33 +#define XOR_rm_r 0x31 +#define XORPD_x_xm 0x57 + +#define GROUP_0F 0x0f +#define GROUP_F3 0xf3 +#define GROUP_F7 0xf7 +#define GROUP_FF 0xff +#define GROUP_BINARY_81 0x81 +#define GROUP_BINARY_83 0x83 +#define GROUP_SHIFT_1 0xd1 +#define GROUP_SHIFT_N 0xc1 +#define GROUP_SHIFT_CL 0xd3 + +#define MOD_REG 0xc0 +#define MOD_DISP8 0x40 + +#define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s)) + +#define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r))) +#define POP_REG(r) (*inst++ = U8(POP_r + (r))) +#define RET() (*inst++ = RET_near) +#define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0) + +/* Multithreading does not affect these static variables, since they store + built-in CPU features. Therefore they can be overwritten by different threads + if they detect the CPU features in the same time. */ +#define CPU_FEATURE_DETECTED 0x001 +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) +#define CPU_FEATURE_SSE2 0x002 +#endif +#define CPU_FEATURE_LZCNT 0x004 +#define CPU_FEATURE_TZCNT 0x008 +#define CPU_FEATURE_CMOV 0x010 + +static sljit_u32 cpu_feature_list = 0; + +#ifdef _WIN32_WCE +#include <cmnintrin.h> +#elif defined(_MSC_VER) && _MSC_VER >= 1400 +#include <intrin.h> +#endif + +/******************************************************/ +/* Unaligned-store functions */ +/******************************************************/ + +static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value) +{ + SLJIT_MEMCPY(addr, &value, sizeof(value)); +} + +static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value) +{ + SLJIT_MEMCPY(addr, &value, sizeof(value)); +} + +static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value) +{ + SLJIT_MEMCPY(addr, &value, sizeof(value)); +} + +/******************************************************/ +/* Utility functions */ +/******************************************************/ + +static void get_cpu_features(void) +{ + sljit_u32 feature_list = CPU_FEATURE_DETECTED; + sljit_u32 value; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + + int CPUInfo[4]; + + __cpuid(CPUInfo, 0); + if (CPUInfo[0] >= 7) { + __cpuidex(CPUInfo, 7, 0); + if (CPUInfo[1] & 0x8) + feature_list |= CPU_FEATURE_TZCNT; + } + + __cpuid(CPUInfo, (int)0x80000001); + if (CPUInfo[2] & 0x20) + feature_list |= CPU_FEATURE_LZCNT; + + __cpuid(CPUInfo, 1); + value = (sljit_u32)CPUInfo[3]; + +#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) + + /* AT&T syntax. */ + __asm__ ( + "movl $0x0, %%eax\n" + "lzcnt %%eax, %%eax\n" + "setnz %%al\n" + "movl %%eax, %0\n" + : "=g" (value) + : +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + : "eax" +#else + : "rax" +#endif + ); + + if (value & 0x1) + feature_list |= CPU_FEATURE_LZCNT; + + __asm__ ( + "movl $0x0, %%eax\n" + "tzcnt %%eax, %%eax\n" + "setnz %%al\n" + "movl %%eax, %0\n" + : "=g" (value) + : +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + : "eax" +#else + : "rax" +#endif + ); + + if (value & 0x1) + feature_list |= CPU_FEATURE_TZCNT; + + __asm__ ( + "movl $0x1, %%eax\n" +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + /* On x86-32, there is no red zone, so this + should work (no need for a local variable). */ + "push %%ebx\n" +#endif + "cpuid\n" +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + "pop %%ebx\n" +#endif + "movl %%edx, %0\n" + : "=g" (value) + : +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + : "%eax", "%ecx", "%edx" +#else + : "%rax", "%rbx", "%rcx", "%rdx" +#endif + ); + +#else /* _MSC_VER && _MSC_VER >= 1400 */ + + /* Intel syntax. */ + __asm { + mov eax, 0 + lzcnt eax, eax + setnz al + mov value, eax + } + + if (value & 0x1) + feature_list |= CPU_FEATURE_LZCNT; + + __asm { + mov eax, 0 + tzcnt eax, eax + setnz al + mov value, eax + } + + if (value & 0x1) + feature_list |= CPU_FEATURE_TZCNT; + + __asm { + mov eax, 1 + cpuid + mov value, edx + } + +#endif /* _MSC_VER && _MSC_VER >= 1400 */ + +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + if (value & 0x4000000) + feature_list |= CPU_FEATURE_SSE2; +#endif + if (value & 0x8000) + feature_list |= CPU_FEATURE_CMOV; + + cpu_feature_list = feature_list; +} + +static sljit_u8 get_jump_code(sljit_uw type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_F_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_EQUAL: /* Not supported. */ + return 0x84 /* je */; + + case SLJIT_NOT_EQUAL: + case SLJIT_F_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not supported. */ + return 0x85 /* jne */; + + case SLJIT_LESS: + case SLJIT_CARRY: + case SLJIT_F_LESS: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED_OR_GREATER: + return 0x82 /* jc */; + + case SLJIT_GREATER_EQUAL: + case SLJIT_NOT_CARRY: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + return 0x83 /* jae */; + + case SLJIT_GREATER: + case SLJIT_F_GREATER: + case SLJIT_ORDERED_LESS: + case SLJIT_ORDERED_GREATER: + return 0x87 /* jnbe */; + + case SLJIT_LESS_EQUAL: + case SLJIT_F_LESS_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + return 0x86 /* jbe */; + + case SLJIT_SIG_LESS: + return 0x8c /* jl */; + + case SLJIT_SIG_GREATER_EQUAL: + return 0x8d /* jnl */; + + case SLJIT_SIG_GREATER: + return 0x8f /* jnle */; + + case SLJIT_SIG_LESS_EQUAL: + return 0x8e /* jle */; + + case SLJIT_OVERFLOW: + return 0x80 /* jo */; + + case SLJIT_NOT_OVERFLOW: + return 0x81 /* jno */; + + case SLJIT_UNORDERED: + return 0x8a /* jp */; + + case SLJIT_ORDERED: + return 0x8b /* jpo */; + } + return 0; +} + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset); +#else +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr); +static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label); +#endif + +static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset) +{ + sljit_uw type = jump->flags >> TYPE_SHIFT; + sljit_s32 short_jump; + sljit_uw label_addr; + + if (jump->flags & JUMP_LABEL) + label_addr = (sljit_uw)(code + jump->u.label->size); + else + label_addr = jump->u.target - (sljit_uw)executable_offset; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN) + return generate_far_jump_code(jump, code_ptr); +#endif + + short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; + + if (type == SLJIT_JUMP) { + if (short_jump) + *code_ptr++ = JMP_i8; + else + *code_ptr++ = JMP_i32; + jump->addr++; + } + else if (type >= SLJIT_FAST_CALL) { + short_jump = 0; + *code_ptr++ = CALL_i32; + jump->addr++; + } + else if (short_jump) { + *code_ptr++ = U8(get_jump_code(type) - 0x10); + jump->addr++; + } + else { + *code_ptr++ = GROUP_0F; + *code_ptr++ = get_jump_code(type); + jump->addr += 2; + } + + if (short_jump) { + jump->flags |= PATCH_MB; + code_ptr += sizeof(sljit_s8); + } else { + jump->flags |= PATCH_MW; + code_ptr += sizeof(sljit_s32); + } + + return code_ptr; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_u8 *code; + sljit_u8 *code_ptr; + sljit_u8 *buf_ptr; + sljit_u8 *buf_end; + sljit_u8 len; + sljit_sw executable_offset; + sljit_uw jump_addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_put_label *put_label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + /* Second code generation pass. */ + code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size, compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + put_label = compiler->put_labels; + executable_offset = SLJIT_EXEC_OFFSET(code); + + do { + buf_ptr = buf->memory; + buf_end = buf_ptr + buf->used_size; + do { + len = *buf_ptr++; + if (len > 0) { + /* The code is already generated. */ + SLJIT_MEMCPY(code_ptr, buf_ptr, len); + code_ptr += len; + buf_ptr += len; + } + else { + switch (*buf_ptr) { + case 0: + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + break; + case 1: + jump->addr = (sljit_uw)code_ptr; + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) + code_ptr = generate_near_jump_code(jump, code_ptr, code, executable_offset); + else { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + code_ptr = generate_far_jump_code(jump, code_ptr, executable_offset); +#else + code_ptr = generate_far_jump_code(jump, code_ptr); +#endif + } + jump = jump->next; + break; + case 2: + const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); + const_ = const_->next; + break; + default: + SLJIT_ASSERT(*buf_ptr == 3); + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)code_ptr; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + code_ptr = generate_put_label_code(put_label, code_ptr, (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size); +#endif + put_label = put_label->next; + break; + } + buf_ptr++; + } + } while (buf_ptr < buf_end); + SLJIT_ASSERT(buf_ptr == buf_end); + buf = buf->next; + } while (buf); + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(!put_label); + SLJIT_ASSERT(code_ptr <= code + compiler->size); + + jump = compiler->jumps; + while (jump) { + if (jump->flags & (PATCH_MB | PATCH_MW)) { + if (jump->flags & JUMP_LABEL) + jump_addr = jump->u.label->addr; + else + jump_addr = jump->u.target; + + jump_addr -= jump->addr + (sljit_uw)executable_offset; + + if (jump->flags & PATCH_MB) { + jump_addr -= sizeof(sljit_s8); + SLJIT_ASSERT((sljit_sw)jump_addr >= -128 && (sljit_sw)jump_addr <= 127); + *(sljit_u8*)jump->addr = U8(jump_addr); + } else { + jump_addr -= sizeof(sljit_s32); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump_addr); +#else + SLJIT_ASSERT((sljit_sw)jump_addr >= HALFWORD_MIN && (sljit_sw)jump_addr <= HALFWORD_MAX); + sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)jump_addr); +#endif + } + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + else if (jump->flags & PATCH_MD) { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump->u.label->addr); + } +#endif + + jump = jump->next; + } + + put_label = compiler->put_labels; + while (put_label) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr); +#else + if (put_label->flags & PATCH_MD) { + SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX); + sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr); + } + else { + SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX); + sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr); + } +#endif + + put_label = put_label->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (sljit_uw)(code_ptr - code); + + code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + + SLJIT_UPDATE_WX_FLAGS(code, (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset), 1); + return (void*)code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + if (cpu_feature_list == 0) + get_cpu_features(); + return (cpu_feature_list & CPU_FEATURE_SSE2) != 0; +#else /* SLJIT_DETECT_SSE2 */ + return 1; +#endif /* SLJIT_DETECT_SSE2 */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + case SLJIT_HAS_VIRTUAL_REGISTERS: + return 1; +#endif /* SLJIT_CONFIG_X86_32 */ + + case SLJIT_HAS_CLZ: + if (cpu_feature_list == 0) + get_cpu_features(); + + return (cpu_feature_list & CPU_FEATURE_LZCNT) ? 1 : 2; + + case SLJIT_HAS_CTZ: + if (cpu_feature_list == 0) + get_cpu_features(); + + return (cpu_feature_list & CPU_FEATURE_TZCNT) ? 1 : 2; + + case SLJIT_HAS_CMOV: + if (cpu_feature_list == 0) + get_cpu_features(); + return (cpu_feature_list & CPU_FEATURE_CMOV) != 0; + + case SLJIT_HAS_ROT: + case SLJIT_HAS_PREFETCH: + return 1; + + case SLJIT_HAS_SSE2: +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + if (cpu_feature_list == 0) + get_cpu_features(); + return (cpu_feature_list & CPU_FEATURE_SSE2) != 0; +#else /* !SLJIT_DETECT_SSE2 */ + return 1; +#endif /* SLJIT_DETECT_SSE2 */ + + default: + return 0; + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + if (type < SLJIT_UNORDERED || type > SLJIT_ORDERED_LESS_EQUAL) + return 0; + + switch (type) { + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + return 0; + } + + return 1; +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode)) + +#define BINARY_IMM32(op_imm, immw, arg, argw) \ + do { \ + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ + FAIL_IF(!inst); \ + *(inst + 1) |= (op_imm); \ + } while (0) + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ + do { \ + if (IS_HALFWORD(immw) || compiler->mode32) { \ + BINARY_IMM32(op_imm, immw, arg, argw); \ + } \ + else { \ + FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \ + inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \ + FAIL_IF(!inst); \ + *inst = (op_mr); \ + } \ + } while (0) + +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) + +#else /* !SLJIT_CONFIG_X86_64 */ + +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ + BINARY_IMM32(op_imm, immw, arg, argw) + +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) + +#endif /* SLJIT_CONFIG_X86_64 */ + +static sljit_s32 emit_mov(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw); + +#define EMIT_MOV(compiler, dst, dstw, src, srcw) \ + FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); + +static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, + sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src); + +static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, + sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw); + +static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler) +{ +#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) + /* Emit endbr32/endbr64 when CET is enabled. */ + sljit_u8 *inst; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *inst++ = 0xf3; + *inst++ = 0x0f; + *inst++ = 0x1e; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + *inst = 0xfb; +#else + *inst = 0xfa; +#endif +#else /* !SLJIT_CONFIG_X86_CET */ + SLJIT_UNUSED_ARG(compiler); +#endif /* SLJIT_CONFIG_X86_CET */ + return SLJIT_SUCCESS; +} + +#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) + +static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg) +{ + sljit_u8 *inst; + sljit_s32 size; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + size = 5; +#else + size = 4; +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + *inst++ = 0xf3; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B); +#endif + *inst++ = 0x0f; + *inst++ = 0x1e; + *inst = (0x3 << 6) | (0x1 << 3) | (reg_map[reg] & 0x7); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg) +{ + sljit_u8 *inst; + sljit_s32 size; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + size = 5; +#else + size = 4; +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + *inst++ = 0xf3; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B); +#endif + *inst++ = 0x0f; + *inst++ = 0xae; + *inst = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7); + return SLJIT_SUCCESS; +} + +#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ + +static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void) +{ +#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) + return _get_ssp() != 0; +#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */ + return 0; +#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ +} + +static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) + sljit_u8 *inst, *jz_after_cmp_inst; + sljit_uw size_jz_after_cmp_inst; + + sljit_uw size_before_rdssp_inst = compiler->size; + + /* Generate "RDSSP TMP_REG1". */ + FAIL_IF(emit_rdssp(compiler, TMP_REG1)); + + /* Load return address on shadow stack into TMP_REG1. */ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + SLJIT_ASSERT(reg_map[TMP_REG1] == 5); + + /* Hand code unsupported "mov 0x0(%ebp),%ebp". */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + INC_SIZE(3); + *inst++ = 0x8b; + *inst++ = 0x6d; + *inst = 0; +#else /* !SLJIT_CONFIG_X86_32 */ + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0); +#endif /* SLJIT_CONFIG_X86_32 */ + + /* Compare return address against TMP_REG1. */ + FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw)); + + /* Generate JZ to skip shadow stack ajdustment when shadow + stack matches normal stack. */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + *inst++ = get_jump_code(SLJIT_EQUAL) - 0x10; + size_jz_after_cmp_inst = compiler->size; + jz_after_cmp_inst = inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* REX_W is not necessary. */ + compiler->mode32 = 1; +#endif + /* Load 1 into TMP_REG1. */ + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1); + + /* Generate "INCSSP TMP_REG1". */ + FAIL_IF(emit_incssp(compiler, TMP_REG1)); + + /* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + *inst++ = JMP_i8; + *inst = size_before_rdssp_inst - compiler->size; + + *jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst; +#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); +#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ + return SLJIT_SUCCESS; +} + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#include "sljitNativeX86_32.c" +#else +#include "sljitNativeX86_64.c" +#endif + +static sljit_s32 emit_mov(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + + if (FAST_IS_REG(src)) { + inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + return SLJIT_SUCCESS; + } + if (src & SLJIT_IMM) { + if (FAST_IS_REG(dst)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); +#else + if (!compiler->mode32) { + if (NOT_HALFWORD(srcw)) + return emit_load_imm64(compiler, dst, srcw); + } + else + return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw); +#endif + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (!compiler->mode32 && NOT_HALFWORD(srcw)) { + /* Immediate to memory move. Only SLJIT_MOV operation copies + an immediate directly into memory so TMP_REG1 can be used. */ + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw)); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + return SLJIT_SUCCESS; + } +#endif + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; + } + if (FAST_IS_REG(dst)) { + inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); + FAIL_IF(!inst); + *inst = MOV_r_rm; + return SLJIT_SUCCESS; + } + + /* Memory to memory move. Only SLJIT_MOV operation copies + data from memory to memory so TMP_REG1 can be used. */ + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); + FAIL_IF(!inst); + *inst = MOV_r_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ + sljit_u8 *inst; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_uw size; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + switch (GET_OPCODE(op)) { + case SLJIT_BREAKPOINT: + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = INT3; + break; + case SLJIT_NOP: + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = NOP; + break; + case SLJIT_LMUL_UW: + case SLJIT_LMUL_SW: + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#ifdef _WIN64 + SLJIT_ASSERT( + reg_map[SLJIT_R0] == 0 + && reg_map[SLJIT_R1] == 2 + && reg_map[TMP_REG1] > 7); +#else + SLJIT_ASSERT( + reg_map[SLJIT_R0] == 0 + && reg_map[SLJIT_R1] < 7 + && reg_map[TMP_REG1] == 2); +#endif + compiler->mode32 = op & SLJIT_32; +#endif + SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); + + op = GET_OPCODE(op); + if ((op | 0x2) == SLJIT_DIV_UW) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); + inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); +#else + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); +#endif + FAIL_IF(!inst); + *inst = XOR_r_rm; + } + + if ((op | 0x2) == SLJIT_DIV_SW) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = CDQ; +#else + if (compiler->mode32) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = CDQ; + } else { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + *inst++ = REX_W; + *inst = CDQ; + } +#endif + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + *inst++ = GROUP_F7; + *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); +#else +#ifdef _WIN64 + size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2; +#else + size = (!compiler->mode32) ? 3 : 2; +#endif + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); +#ifdef _WIN64 + if (!compiler->mode32) + *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0); + else if (op >= SLJIT_DIVMOD_UW) + *inst++ = REX_B; + *inst++ = GROUP_F7; + *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); +#else + if (!compiler->mode32) + *inst++ = REX_W; + *inst++ = GROUP_F7; + *inst = MOD_REG | reg_map[SLJIT_R1]; +#endif +#endif + switch (op) { + case SLJIT_LMUL_UW: + *inst |= MUL; + break; + case SLJIT_LMUL_SW: + *inst |= IMUL; + break; + case SLJIT_DIVMOD_UW: + case SLJIT_DIV_UW: + *inst |= DIV; + break; + case SLJIT_DIVMOD_SW: + case SLJIT_DIV_SW: + *inst |= IDIV; + break; + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) + if (op <= SLJIT_DIVMOD_SW) + EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); +#else + if (op >= SLJIT_DIV_UW) + EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); +#endif + break; + case SLJIT_ENDBR: + return emit_endbranch(compiler); + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return skip_frames_before_return(compiler); + } + + return SLJIT_SUCCESS; +} + +#define ENCODE_PREFIX(prefix) \ + do { \ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \ + FAIL_IF(!inst); \ + INC_SIZE(1); \ + *inst = U8(prefix); \ + } while (0) + +static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + sljit_s32 dst_r; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 work_r; +#endif + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + if (src & SLJIT_IMM) { + if (FAST_IS_REG(dst)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); +#else + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; +#endif + } + inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_i8; + return SLJIT_SUCCESS; + } + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (reg_map[src] >= 4) { + SLJIT_ASSERT(dst_r == TMP_REG1); + EMIT_MOV(compiler, TMP_REG1, 0, src, 0); + } else + dst_r = src; +#else + dst_r = src; +#endif + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + else if (FAST_IS_REG(src) && reg_map[src] >= 4) { + /* src, dst are registers. */ + SLJIT_ASSERT(FAST_IS_REG(dst)); + if (reg_map[dst] < 4) { + if (dst != src) + EMIT_MOV(compiler, dst, 0, src, 0); + inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; + } + else { + if (dst != src) + EMIT_MOV(compiler, dst, 0, src, 0); + if (sign) { + /* shl reg, 24 */ + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); + FAIL_IF(!inst); + *inst |= SHL; + /* sar reg, 24 */ + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); + FAIL_IF(!inst); + *inst |= SAR; + } + else { + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); + FAIL_IF(!inst); + *(inst + 1) |= AND; + } + } + return SLJIT_SUCCESS; + } +#endif + else { + /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; + } + + if (dst & SLJIT_MEM) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (dst_r == TMP_REG1) { + /* Find a non-used register, whose reg_map[src] < 4. */ + if ((dst & REG_MASK) == SLJIT_R0) { + if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1)) + work_r = SLJIT_R2; + else + work_r = SLJIT_R1; + } + else { + if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) + work_r = SLJIT_R0; + else if ((dst & REG_MASK) == SLJIT_R1) + work_r = SLJIT_R2; + else + work_r = SLJIT_R1; + } + + if (work_r == SLJIT_R0) { + ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]); + } + else { + inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); + FAIL_IF(!inst); + *inst = XCHG_r_rm; + } + + inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_r8; + + if (work_r == SLJIT_R0) { + ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]); + } + else { + inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); + FAIL_IF(!inst); + *inst = XCHG_r_rm; + } + } + else { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_r8; + } +#else + inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_r8; +#endif + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst++ = PREFETCH; + + if (op == SLJIT_PREFETCH_L1) + *inst |= (1 << 3); + else if (op == SLJIT_PREFETCH_L2) + *inst |= (2 << 3); + else if (op == SLJIT_PREFETCH_L3) + *inst |= (3 << 3); + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + sljit_s32 dst_r; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + if (src & SLJIT_IMM) { + if (FAST_IS_REG(dst)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); +#else + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; +#endif + } + inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; + } + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) + dst_r = src; + else { + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16; + } + + if (dst & SLJIT_MEM) { + inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + + if (dst == src && dstw == srcw) { + /* Same input and output */ + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(dst)) { + EMIT_MOV(compiler, dst, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; + return SLJIT_SUCCESS; + } + + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + + if (FAST_IS_REG(dst)) { + EMIT_MOV(compiler, dst, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= NOT_rm; + inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); + FAIL_IF(!inst); + *inst = OR_r_rm; + return SLJIT_SUCCESS; + } + + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= NOT_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst = OR_r_rm; + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +static const sljit_sw emit_clz_arg = 32 + 31; +static const sljit_sw emit_ctz_arg = 32; +#endif + +static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + sljit_s32 dst_r; + sljit_sw max; + + if (cpu_feature_list == 0) + get_cpu_features(); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) { + /* Group prefix added separately. */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst++ = GROUP_F3; + + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = is_clz ? LZCNT_r_rm : TZCNT_r_rm; + + if (dst & SLJIT_MEM) + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; + } + + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = is_clz ? BSR_r_rm : BSF_r_rm; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + max = is_clz ? (32 + 31) : 32; + + if (cpu_feature_list & CPU_FEATURE_CMOV) { + if (dst_r != TMP_REG1) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, max); + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0); + } + else + inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), is_clz ? (sljit_sw)&emit_clz_arg : (sljit_sw)&emit_ctz_arg); + + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CMOVE_r_rm; + } + else + FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max)); + + if (is_clz) { + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); + FAIL_IF(!inst); + *(inst + 1) |= XOR; + } +#else + if (is_clz) + max = compiler->mode32 ? (32 + 31) : (64 + 63); + else + max = compiler->mode32 ? 32 : 64; + + if (cpu_feature_list & CPU_FEATURE_CMOV) { + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max); + + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CMOVE_r_rm; + } + else + FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max)); + + if (is_clz) { + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0); + FAIL_IF(!inst); + *(inst + 1) |= XOR; + } +#endif + + if (dst & SLJIT_MEM) + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 op_flags = GET_ALL_FLAGS(op); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 dst_is_ereg = 0; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); + CHECK_EXTRA_REGS(src, srcw, (void)0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op_flags & SLJIT_32; +#endif + + op = GET_OPCODE(op); + + if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + if (FAST_IS_REG(src) && src == dst) { + if (!TYPE_CAST_NEEDED(op)) + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (op_flags & SLJIT_32) { + if (src & SLJIT_MEM) { + if (op == SLJIT_MOV_S32) + op = SLJIT_MOV_U32; + } + else if (src & SLJIT_IMM) { + if (op == SLJIT_MOV_U32) + op = SLJIT_MOV_S32; + } + } +#endif + + if (src & SLJIT_IMM) { + switch (op) { + case SLJIT_MOV_U8: + srcw = (sljit_u8)srcw; + break; + case SLJIT_MOV_S8: + srcw = (sljit_s8)srcw; + break; + case SLJIT_MOV_U16: + srcw = (sljit_u16)srcw; + break; + case SLJIT_MOV_S16: + srcw = (sljit_s16)srcw; + break; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + case SLJIT_MOV_U32: + srcw = (sljit_u32)srcw; + break; + case SLJIT_MOV_S32: + srcw = (sljit_s32)srcw; + break; +#endif + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (SLJIT_UNLIKELY(dst_is_ereg)) + return emit_mov(compiler, dst, dstw, src, srcw); +#endif + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { + SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); + dst = TMP_REG1; + } +#endif + + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: +#endif + EMIT_MOV(compiler, dst, dstw, src, srcw); + break; + case SLJIT_MOV_U8: + FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_S8: + FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_U16: + FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_S16: + FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw)); + break; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + case SLJIT_MOV_U32: + FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_S32: + FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); + break; + case SLJIT_MOV32: + compiler->mode32 = 1; + EMIT_MOV(compiler, dst, dstw, src, srcw); + compiler->mode32 = 0; + break; +#endif + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1) + return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0); +#endif + return SLJIT_SUCCESS; + } + + switch (op) { + case SLJIT_NOT: + if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z)) + return emit_not_with_flags(compiler, dst, dstw, src, srcw); + return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); + + case SLJIT_CLZ: + case SLJIT_CTZ: + return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw); + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, + sljit_u32 op_types, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + sljit_u8 op_eax_imm = U8(op_types >> 24); + sljit_u8 op_rm = U8((op_types >> 16) & 0xff); + sljit_u8 op_mr = U8((op_types >> 8) & 0xff); + sljit_u8 op_imm = U8(op_types & 0xff); + + if (dst == src1 && dstw == src1w) { + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(op_eax_imm, src2w); + } + else { + BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); + } + } + else if (FAST_IS_REG(dst)) { + inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + else if (FAST_IS_REG(src2)) { + /* Special exception for sljit_emit_op_flags. */ + inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + return SLJIT_SUCCESS; + } + + /* Only for cumulative operations. */ + if (dst == src2 && dstw == src2w) { + if (src1 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { +#else + if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) { +#endif + BINARY_EAX_IMM(op_eax_imm, src1w); + } + else { + BINARY_IMM(op_imm, op_mr, src1w, dst, dstw); + } + } + else if (FAST_IS_REG(dst)) { + inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); + FAIL_IF(!inst); + *inst = op_rm; + } + else if (FAST_IS_REG(src1)) { + inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + return SLJIT_SUCCESS; + } + + /* General version. */ + if (FAST_IS_REG(dst)) { + EMIT_MOV(compiler, dst, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, dst, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + } + else { + /* This version requires less memory writing. */ + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, + sljit_u32 op_types, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + sljit_u8 op_eax_imm = U8(op_types >> 24); + sljit_u8 op_rm = U8((op_types >> 16) & 0xff); + sljit_u8 op_mr = U8((op_types >> 8) & 0xff); + sljit_u8 op_imm = U8(op_types & 0xff); + + if (dst == src1 && dstw == src1w) { + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(op_eax_imm, src2w); + } + else { + BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); + } + } + else if (FAST_IS_REG(dst)) { + inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + else if (FAST_IS_REG(src2)) { + inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + return SLJIT_SUCCESS; + } + + /* General version. */ + if (FAST_IS_REG(dst) && dst != src2) { + EMIT_MOV(compiler, dst, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, dst, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + } + else { + /* This version requires less memory writing. */ + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_mul(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + /* Register destination. */ + if (dst_r == src1 && !(src2 & SLJIT_IMM)) { + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; + } + else if (dst_r == src2 && !(src1 & SLJIT_IMM)) { + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; + } + else if (src1 & SLJIT_IMM) { + if (src2 & SLJIT_IMM) { + EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w); + src2 = dst_r; + src2w = 0; + } + + if (src1w <= 127 && src1w >= -128) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i8; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = U8(src1w); + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + else { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + sljit_unaligned_store_sw(inst, src1w); + } +#else + else if (IS_HALFWORD(src1w)) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + sljit_unaligned_store_s32(inst, (sljit_s32)src1w); + } + else { + if (dst_r != src2) + EMIT_MOV(compiler, dst_r, 0, src2, src2w); + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; + } +#endif + } + else if (src2 & SLJIT_IMM) { + /* Note: src1 is NOT immediate. */ + + if (src2w <= 127 && src2w >= -128) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i8; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = U8(src2w); + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + else { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + sljit_unaligned_store_sw(inst, src2w); + } +#else + else if (IS_HALFWORD(src2w)) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + sljit_unaligned_store_s32(inst, (sljit_s32)src2w); + } + else { + if (dst_r != src1) + EMIT_MOV(compiler, dst_r, 0, src1, src1w); + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; + } +#endif + } + else { + /* Neither argument is immediate. */ + if (ADDRESSING_DEPENDS_ON(src2, dst_r)) + dst_r = TMP_REG1; + EMIT_MOV(compiler, dst_r, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; + } + + if (dst & SLJIT_MEM) + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + sljit_s32 dst_r, done = 0; + + /* These cases better be left to handled by normal way. */ + if (dst == src1 && dstw == src1w) + return SLJIT_ERR_UNSUPPORTED; + if (dst == src2 && dstw == src2w) + return SLJIT_ERR_UNSUPPORTED; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (FAST_IS_REG(src1)) { + if (FAST_IS_REG(src2)) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); + FAIL_IF(!inst); + *inst = LEA_r_m; + done = 1; + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w); +#else + if (src2 & SLJIT_IMM) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); +#endif + FAIL_IF(!inst); + *inst = LEA_r_m; + done = 1; + } + } + else if (FAST_IS_REG(src2)) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w); +#else + if (src1 & SLJIT_IMM) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); +#endif + FAIL_IF(!inst); + *inst = LEA_r_m; + done = 1; + } + } + + if (done) { + if (dst_r == TMP_REG1) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; + } + return SLJIT_ERR_UNSUPPORTED; +} + +static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(CMP_EAX_i32, src2w); + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(src1)) { + if (src2 & SLJIT_IMM) { + BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = CMP_r_rm; + } + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) { + inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); + FAIL_IF(!inst); + *inst = CMP_rm_r; + return SLJIT_SUCCESS; + } + + if (src2 & SLJIT_IMM) { + if (src1 & SLJIT_IMM) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + src1 = TMP_REG1; + src1w = 0; + } + BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w); + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = CMP_r_rm; + } + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_u8* inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(TEST_EAX_i32, src2w); + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { +#else + if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { +#endif + BINARY_EAX_IMM(TEST_EAX_i32, src1w); + return SLJIT_SUCCESS; + } + + if (!(src1 & SLJIT_IMM)) { + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (IS_HALFWORD(src2w) || compiler->mode32) { + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); + FAIL_IF(!inst); + *inst = GROUP_F7; + } + else { + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w)); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } +#else + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); + FAIL_IF(!inst); + *inst = GROUP_F7; +#endif + return SLJIT_SUCCESS; + } + else if (FAST_IS_REG(src1)) { + inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + return SLJIT_SUCCESS; + } + } + + if (!(src2 & SLJIT_IMM)) { + if (src1 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (IS_HALFWORD(src1w) || compiler->mode32) { + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w); + FAIL_IF(!inst); + *inst = GROUP_F7; + } + else { + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w)); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } +#else + inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w); + FAIL_IF(!inst); + *inst = GROUP_F7; +#endif + return SLJIT_SUCCESS; + } + else if (FAST_IS_REG(src2)) { + inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + return SLJIT_SUCCESS; + } + } + + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (IS_HALFWORD(src2w) || compiler->mode32) { + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; + } + else { + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } +#else + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; +#endif + } + else { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_shift(struct sljit_compiler *compiler, + sljit_u8 mode, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_s32 mode32; +#endif + sljit_u8* inst; + + if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { + if (dst == src1 && dstw == src1w) { + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst |= mode; + return SLJIT_SUCCESS; + } + if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst |= mode; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + return SLJIT_SUCCESS; + } + if (FAST_IS_REG(dst)) { + EMIT_MOV(compiler, dst, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); + FAIL_IF(!inst); + *inst |= mode; + return SLJIT_SUCCESS; + } + + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); + FAIL_IF(!inst); + *inst |= mode; + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; + } + + if (dst == SLJIT_PREF_SHIFT_REG) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst |= mode; + return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + } + + if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) { + if (src1 != dst) + EMIT_MOV(compiler, dst, 0, src1, src1w); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + mode32 = compiler->mode32; + compiler->mode32 = 0; +#endif + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = mode32; +#endif + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); + FAIL_IF(!inst); + *inst |= mode; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = mode32; +#endif + return SLJIT_SUCCESS; + } + + /* This case is complex since ecx itself may be used for + addressing, and this case must be supported as well. */ + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0); +#else /* !SLJIT_CONFIG_X86_32 */ + mode32 = compiler->mode32; + compiler->mode32 = 0; + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); + compiler->mode32 = mode32; +#endif /* SLJIT_CONFIG_X86_32 */ + + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst |= mode; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0); +#else + compiler->mode32 = 0; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); + compiler->mode32 = mode32; +#endif /* SLJIT_CONFIG_X86_32 */ + + if (dst != TMP_REG1) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler, + sljit_u8 mode, sljit_s32 set_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* The CPU does not set flags if the shift count is 0. */ + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + src2w &= compiler->mode32 ? 0x1f : 0x3f; +#else /* !SLJIT_CONFIG_X86_64 */ + src2w &= 0x1f; +#endif /* SLJIT_CONFIG_X86_64 */ + if (src2w != 0) + return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); + + if (!set_flags) + return emit_mov(compiler, dst, dstw, src1, src1w); + /* OR dst, src, 0 */ + return emit_cum_binary(compiler, BINARY_OPCODE(OR), + dst, dstw, src1, src1w, SLJIT_IMM, 0); + } + + if (!set_flags) + return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); + + if (!FAST_IS_REG(dst)) + FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); + + FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w)); + + if (FAST_IS_REG(dst)) + return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2, src2w, (void)0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_32; +#endif + + SLJIT_ASSERT(dst != TMP_REG1 || HAS_FLAGS(op)); + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + if (!HAS_FLAGS(op)) { + if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) + return compiler->error; + } + return emit_cum_binary(compiler, BINARY_OPCODE(ADD), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ADDC: + return emit_cum_binary(compiler, BINARY_OPCODE(ADC), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SUB: + if (src1 == SLJIT_IMM && src1w == 0) + return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w); + + if (!HAS_FLAGS(op)) { + if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) + return compiler->error; + if (FAST_IS_REG(dst) && src2 == dst) { + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w)); + return emit_unary(compiler, NEG_rm, dst, 0, dst, 0); + } + } + + return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SUBC: + return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_MUL: + return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_AND: + return emit_cum_binary(compiler, BINARY_OPCODE(AND), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_OR: + return emit_cum_binary(compiler, BINARY_OPCODE(OR), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_XOR: + return emit_cum_binary(compiler, BINARY_OPCODE(XOR), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SHL: + case SLJIT_MSHL: + return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_LSHR: + case SLJIT_MLSHR: + return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ASHR: + case SLJIT_MASHR: + return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ROTL: + return emit_shift_with_flags(compiler, ROL, 0, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ROTR: + return emit_shift_with_flags(compiler, ROR, 0, + dst, dstw, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 opcode = GET_OPCODE(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + if (opcode != SLJIT_SUB && opcode != SLJIT_AND) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); + } + + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2, src2w, (void)0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_32; +#endif + + if (opcode == SLJIT_SUB) { + return emit_cmp_binary(compiler, src1, src1w, src2, src2w); + } + return emit_test_binary(compiler, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 restore_ecx = 0; + sljit_s32 is_rotate, is_left; + sljit_u8* inst; + sljit_sw dstw = 0; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_s32 tmp2 = SLJIT_MEM1(SLJIT_SP); +#else /* !SLJIT_CONFIG_X86_32 */ + sljit_s32 tmp2 = TMP_REG2; +#endif /* SLJIT_CONFIG_X86_32 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2, src2w, (void)0); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_32; +#endif + + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + src2w &= 0x1f; +#else /* !SLJIT_CONFIG_X86_32 */ + src2w &= (op & SLJIT_32) ? 0x1f : 0x3f; +#endif /* SLJIT_CONFIG_X86_32 */ + + if (src2w == 0) + return SLJIT_SUCCESS; + } + + is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL); + + is_rotate = (src_dst == src1); + CHECK_EXTRA_REGS(src_dst, dstw, (void)0); + + if (is_rotate) + return emit_shift(compiler, is_left ? ROL : ROR, src_dst, dstw, src1, src1w, src2, src2w); + + if ((src2 & SLJIT_IMM) || src2 == SLJIT_PREF_SHIFT_REG) { + if (!FAST_IS_REG(src1)) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + src1 = TMP_REG1; + } + } else if (FAST_IS_REG(src1)) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_32; +#endif + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + + if (src1 == SLJIT_PREF_SHIFT_REG) + src1 = TMP_REG1; + + if (src_dst == SLJIT_PREF_SHIFT_REG) + src_dst = TMP_REG1; + + restore_ecx = 1; + } else { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + EMIT_MOV(compiler, tmp2, 0, SLJIT_PREF_SHIFT_REG, 0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_32; +#endif + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + + src1 = TMP_REG1; + + if (src_dst == SLJIT_PREF_SHIFT_REG) { + src_dst = tmp2; + SLJIT_ASSERT(dstw == 0); + } + + restore_ecx = 2; + } + + inst = emit_x86_instruction(compiler, 2, src1, 0, src_dst, dstw); + FAIL_IF(!inst); + inst[0] = GROUP_0F; + + if (src2 & SLJIT_IMM) { + inst[1] = U8((is_left ? SHLD : SHRD) - 1); + + /* Immedate argument is added separately. */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = U8(src2w); + } else + inst[1] = U8(is_left ? SHLD : SHRD); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + if (restore_ecx == 1) + return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + if (restore_ecx == 2) + return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, tmp2, 0); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + CHECK_EXTRA_REGS(src, srcw, (void)0); + + switch (op) { + case SLJIT_FAST_RETURN: + return emit_fast_return(compiler, src, srcw); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + /* Don't adjust shadow stack if it isn't enabled. */ + if (!cpu_has_shadow_stack ()) + return SLJIT_SUCCESS; + return adjust_shadow_stack(compiler, src, srcw); + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + return emit_prefetch(compiler, op, src, srcw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (reg >= SLJIT_R3 && reg <= SLJIT_R8) + return -1; +#endif + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return reg; +#else + return freg_map[reg]; +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + SLJIT_MEMCPY(inst, instruction, size); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +/* Alignment(3) + 4 * 16 bytes. */ +static sljit_u32 sse2_data[3 + (4 * 4)]; +static sljit_u32 *sse2_buffer; + +static void init_compiler(void) +{ + /* Align to 16 bytes. */ + sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf); + + /* Single precision constants (each constant is 16 byte long). */ + sse2_buffer[0] = 0x80000000; + sse2_buffer[4] = 0x7fffffff; + /* Double precision constants (each constant is 16 byte long). */ + sse2_buffer[8] = 0; + sse2_buffer[9] = 0x80000000; + sse2_buffer[12] = 0xffffffff; + sse2_buffer[13] = 0x7fffffff; +} + +static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode, + sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) +{ + sljit_u8 *inst; + + inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = opcode; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode, + sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) +{ + sljit_u8 *inst; + + inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = opcode; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, + sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw) +{ + return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw); +} + +static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, + sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src) +{ + return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + sljit_u8 *inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) + compiler->mode32 = 0; +#endif + + inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CVTTSD2SI_r_xm; + + if (dst & SLJIT_MEM) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + sljit_u8 *inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) + compiler->mode32 = 0; +#endif + + if (src & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; +#endif + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + src = TMP_REG1; + srcw = 0; + } + + inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CVTSI2SD_x_rm; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + switch (GET_FLAG_TYPE(op)) { + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED_LESS_EQUAL: + if (!FAST_IS_REG(src2)) { + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w)); + src2 = TMP_FREG; + } + + return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src2, src1, src1w); + } + + if (!FAST_IS_REG(src1)) { + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); + src1 = TMP_FREG; + } + + return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src1, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + CHECK_ERROR(); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_MOV_F64) { + if (FAST_IS_REG(dst)) + return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw); + if (FAST_IS_REG(src)) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw)); + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + } + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) { + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + if (FAST_IS_REG(src)) { + /* We overwrite the high bits of source. From SLJIT point of view, + this is not an issue. + Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ + FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_32, src, src, 0)); + } + else { + FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw)); + src = TMP_FREG; + } + + FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_32, dst_r, src, 0)); + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(dst)) { + dst_r = dst; + if (dst != src) + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw)); + } + else { + dst_r = TMP_FREG; + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw)); + } + + switch (GET_OPCODE(op)) { + case SLJIT_NEG_F64: + FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer : sse2_buffer + 8))); + break; + + case SLJIT_ABS_F64: + FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer + 4 : sse2_buffer + 12))); + break; + } + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + if (FAST_IS_REG(dst)) { + dst_r = dst; + if (dst == src1) + ; /* Do nothing here. */ + else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) { + /* Swap arguments. */ + src2 = src1; + src2w = src1w; + } + else if (dst != src2) + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w)); + else { + dst_r = TMP_FREG; + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); + } + } + else { + dst_r = TMP_FREG; + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); + break; + + case SLJIT_SUB_F64: + FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); + break; + + case SLJIT_MUL_F64: + FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); + break; + + case SLJIT_DIV_F64: + FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); + break; + } + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + sljit_u8 *inst; + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + + inst = (sljit_u8*)ensure_buf(compiler, 2); + PTR_FAIL_IF(!inst); + + *inst++ = 0; + *inst++ = 0; + + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + sljit_u8 *inst; + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF_NULL(jump); + set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT))); + type &= 0xff; + + /* Worst case size. */ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + compiler->size += (type >= SLJIT_JUMP) ? 5 : 6; +#else + compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 2); + PTR_FAIL_IF_NULL(inst); + + *inst++ = 0; + *inst++ = 1; + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + sljit_u8 *inst; + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + CHECK_EXTRA_REGS(src, srcw, (void)0); + + if (src == SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF_NULL(jump); + set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT))); + jump->u.target = (sljit_uw)srcw; + + /* Worst case size. */ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + compiler->size += 5; +#else + compiler->size += 10 + 3; +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 2); + FAIL_IF_NULL(inst); + + *inst++ = 0; + *inst++ = 1; + } + else { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* REX_W is not necessary (src is not immediate). */ + compiler->mode32 = 1; +#endif + inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_FF; + *inst = U8(*inst | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm)); + } + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_u8 *inst; + sljit_u8 cond_set = 0; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_s32 reg; +#endif + /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */ + sljit_s32 dst_save = dst; + sljit_sw dstw_save = dstw; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + /* setcc = jcc + 0x10. */ + cond_set = U8(get_jump_code((sljit_uw)type) + 0x10); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3); + FAIL_IF(!inst); + INC_SIZE(4 + 3); + /* Set low register to conditional flag. */ + *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | reg_lmap[TMP_REG1]; + *inst++ = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B)); + *inst++ = OR_rm8_r8; + *inst++ = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]); + return SLJIT_SUCCESS; + } + + reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4); + FAIL_IF(!inst); + INC_SIZE(4 + 4); + /* Set low register to conditional flag. */ + *inst++ = (reg_map[reg] <= 7) ? REX : REX_B; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | reg_lmap[reg]; + *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); + /* The movzx instruction does not affect flags. */ + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]); + + if (reg != TMP_REG1) + return SLJIT_SUCCESS; + + if (GET_OPCODE(op) < SLJIT_ADD) { + compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); + +#else + /* The SLJIT_CONFIG_X86_32 code path starts here. */ + if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) { + if (reg_map[dst] <= 4) { + /* Low byte is accessible. */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3); + FAIL_IF(!inst); + INC_SIZE(3 + 3); + /* Set low byte to conditional flag. */ + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = U8(MOD_REG | reg_map[dst]); + + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]); + return SLJIT_SUCCESS; + } + + /* Low byte is not accessible. */ + if (cpu_feature_list == 0) + get_cpu_features(); + + if (cpu_feature_list & CPU_FEATURE_CMOV) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1); + /* a xor reg, reg operation would overwrite the flags. */ + EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + INC_SIZE(3); + + *inst++ = GROUP_0F; + /* cmovcc = setcc - 0x50. */ + *inst++ = U8(cond_set - 0x50); + *inst++ = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]); + return SLJIT_SUCCESS; + } + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); + FAIL_IF(!inst); + INC_SIZE(1 + 3 + 3 + 1); + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); + /* Set al to conditional flag. */ + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | 0 /* eax */; + + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst++ = U8(MOD_REG | (reg_map[dst] << 3) | 0 /* eax */); + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); + return SLJIT_SUCCESS; + } + + if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) { + SLJIT_ASSERT(reg_map[SLJIT_R0] == 0); + + if (dst != SLJIT_R0) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); + FAIL_IF(!inst); + INC_SIZE(1 + 3 + 2 + 1); + /* Set low register to conditional flag. */ + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | 0 /* eax */; + *inst++ = OR_rm8_r8; + *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst]; + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); + } + else { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2); + FAIL_IF(!inst); + INC_SIZE(2 + 3 + 2 + 2); + /* Set low register to conditional flag. */ + *inst++ = XCHG_r_rm; + *inst++ = U8(MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]); + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | 1 /* ecx */; + *inst++ = OR_rm8_r8; + *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */; + *inst++ = XCHG_r_rm; + *inst++ = U8(MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]); + } + return SLJIT_SUCCESS; + } + + /* Set TMP_REG1 to the bit. */ + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); + FAIL_IF(!inst); + INC_SIZE(1 + 3 + 3 + 1); + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); + /* Set al to conditional flag. */ + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | 0 /* eax */; + + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */; + + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); + + if (GET_OPCODE(op) < SLJIT_ADD) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); +#endif /* SLJIT_CONFIG_X86_64 */ +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + type &= ~SLJIT_32; + + if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3)) + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); +#else + if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV)) + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); +#endif + + /* ADJUST_LOCAL_OFFSET is not needed. */ + CHECK_EXTRA_REGS(src, srcw, (void)0); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = type & SLJIT_32; + type &= ~SLJIT_32; +#endif + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); + src = TMP_REG1; + srcw = 0; + } + + inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = U8(get_jump_code((sljit_uw)type) - 0x40); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) +{ + CHECK_ERROR(); + CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (NOT_HALFWORD(offset)) { + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); + return compiler->error; +#else + return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); +#endif + } +#endif + + if (offset != 0) + return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); + return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + sljit_u8 *inst; + struct sljit_const *const_; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_s32 reg; +#endif + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; + reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (emit_load_imm64(compiler, reg, init_value)) + return NULL; +#else + if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value)) + return NULL; +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 2); + PTR_FAIL_IF(!inst); + + *inst++ = 0; + *inst++ = 2; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (dst & SLJIT_MEM) + if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) + return NULL; +#endif + + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_u8 *inst; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_s32 reg; + sljit_uw start_size; +#endif + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 0); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; + reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (emit_load_imm64(compiler, reg, 0)) + return NULL; +#else + if (emit_mov(compiler, dst, dstw, SLJIT_IMM, 0)) + return NULL; +#endif + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (dst & SLJIT_MEM) { + start_size = compiler->size; + if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) + return NULL; + put_label->flags = compiler->size - start_size; + } +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 2); + PTR_FAIL_IF(!inst); + + *inst++ = 0; + *inst++ = 3; + + return put_label; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset)); +#else + sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target); +#endif + SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 0); + sljit_unaligned_store_sw((void*)addr, new_constant); + SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 1); +} diff --git a/contrib/libs/pcre2/src/sljit/sljitProtExecAllocator.c b/contrib/libs/pcre2/src/sljit/sljitProtExecAllocator.c new file mode 100644 index 0000000000..915411fbed --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitProtExecAllocator.c @@ -0,0 +1,474 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + This file contains a simple executable memory allocator + + It is assumed, that executable code blocks are usually medium (or sometimes + large) memory blocks, and the allocator is not too frequently called (less + optimized than other allocators). Thus, using it as a generic allocator is + not suggested. + + How does it work: + Memory is allocated in continuous memory areas called chunks by alloc_chunk() + Chunk format: + [ block ][ block ] ... [ block ][ block terminator ] + + All blocks and the block terminator is started with block_header. The block + header contains the size of the previous and the next block. These sizes + can also contain special values. + Block size: + 0 - The block is a free_block, with a different size member. + 1 - The block is a block terminator. + n - The block is used at the moment, and the value contains its size. + Previous block size: + 0 - This is the first block of the memory chunk. + n - The size of the previous block. + + Using these size values we can go forward or backward on the block chain. + The unused blocks are stored in a chain list pointed by free_blocks. This + list is useful if we need to find a suitable memory area when the allocator + is called. + + When a block is freed, the new free block is connected to its adjacent free + blocks if possible. + + [ free block ][ used block ][ free block ] + and "used block" is freed, the three blocks are connected together: + [ one big free block ] +*/ + +/* --------------------------------------------------------------------- */ +/* System (OS) functions */ +/* --------------------------------------------------------------------- */ + +/* 64 KByte. */ +#define CHUNK_SIZE (sljit_uw)0x10000 + +struct chunk_header { + void *executable; +}; + +/* + alloc_chunk / free_chunk : + * allocate executable system memory chunks + * the size is always divisible by CHUNK_SIZE + SLJIT_ALLOCATOR_LOCK / SLJIT_ALLOCATOR_UNLOCK : + * provided as part of sljitUtils + * only the allocator requires this lock, sljit is fully thread safe + as it only uses local variables +*/ + +#ifndef __NetBSD__ +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> + +#ifndef O_NOATIME +#define O_NOATIME 0 +#endif + +/* this is a linux extension available since kernel 3.11 */ +#ifndef O_TMPFILE +#define O_TMPFILE 020200000 +#endif + +#ifndef _GNU_SOURCE +char *secure_getenv(const char *name); +int mkostemp(char *template, int flags); +#endif + +static SLJIT_INLINE int create_tempfile(void) +{ + int fd; + char tmp_name[256]; + size_t tmp_name_len = 0; + char *dir; + struct stat st; +#if defined(SLJIT_SINGLE_THREADED) && SLJIT_SINGLE_THREADED + mode_t mode; +#endif + +#ifdef HAVE_MEMFD_CREATE + /* this is a GNU extension, make sure to use -D_GNU_SOURCE */ + fd = memfd_create("sljit", MFD_CLOEXEC); + if (fd != -1) { + fchmod(fd, 0); + return fd; + } +#endif + + dir = secure_getenv("TMPDIR"); + + if (dir) { + tmp_name_len = strlen(dir); + if (tmp_name_len > 0 && tmp_name_len < sizeof(tmp_name)) { + if ((stat(dir, &st) == 0) && S_ISDIR(st.st_mode)) + strcpy(tmp_name, dir); + } + } + +#ifdef P_tmpdir + if (!tmp_name_len) { + tmp_name_len = strlen(P_tmpdir); + if (tmp_name_len > 0 && tmp_name_len < sizeof(tmp_name)) + strcpy(tmp_name, P_tmpdir); + } +#endif + if (!tmp_name_len) { + strcpy(tmp_name, "/tmp"); + tmp_name_len = 4; + } + + SLJIT_ASSERT(tmp_name_len > 0 && tmp_name_len < sizeof(tmp_name)); + + if (tmp_name[tmp_name_len - 1] == '/') + tmp_name[--tmp_name_len] = '\0'; + +#ifdef __linux__ + /* + * the previous trimming might had left an empty string if TMPDIR="/" + * so work around the problem below + */ + fd = open(tmp_name_len ? tmp_name : "/", + O_TMPFILE | O_EXCL | O_RDWR | O_NOATIME | O_CLOEXEC, 0); + if (fd != -1) + return fd; +#endif + + if (tmp_name_len + 7 >= sizeof(tmp_name)) + return -1; + + strcpy(tmp_name + tmp_name_len, "/XXXXXX"); +#if defined(SLJIT_SINGLE_THREADED) && SLJIT_SINGLE_THREADED + mode = umask(0777); +#endif + fd = mkostemp(tmp_name, O_CLOEXEC | O_NOATIME); +#if defined(SLJIT_SINGLE_THREADED) && SLJIT_SINGLE_THREADED + umask(mode); +#else + fchmod(fd, 0); +#endif + + if (fd == -1) + return -1; + + if (unlink(tmp_name)) { + close(fd); + return -1; + } + + return fd; +} + +static SLJIT_INLINE struct chunk_header* alloc_chunk(sljit_uw size) +{ + struct chunk_header *retval; + int fd; + + fd = create_tempfile(); + if (fd == -1) + return NULL; + + if (ftruncate(fd, (off_t)size)) { + close(fd); + return NULL; + } + + retval = (struct chunk_header *)mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (retval == MAP_FAILED) { + close(fd); + return NULL; + } + + retval->executable = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0); + + if (retval->executable == MAP_FAILED) { + munmap((void *)retval, size); + close(fd); + return NULL; + } + + close(fd); + return retval; +} +#else +/* + * MAP_REMAPDUP is a NetBSD extension available sinde 8.0, make sure to + * adjust your feature macros (ex: -D_NETBSD_SOURCE) as needed + */ +static SLJIT_INLINE struct chunk_header* alloc_chunk(sljit_uw size) +{ + struct chunk_header *retval; + + retval = (struct chunk_header *)mmap(NULL, size, + PROT_READ | PROT_WRITE | PROT_MPROTECT(PROT_EXEC), + MAP_ANON | MAP_SHARED, -1, 0); + + if (retval == MAP_FAILED) + return NULL; + + retval->executable = mremap(retval, size, NULL, size, MAP_REMAPDUP); + if (retval->executable == MAP_FAILED) { + munmap((void *)retval, size); + return NULL; + } + + if (mprotect(retval->executable, size, PROT_READ | PROT_EXEC) == -1) { + munmap(retval->executable, size); + munmap((void *)retval, size); + return NULL; + } + + return retval; +} +#endif /* NetBSD */ + +static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) +{ + struct chunk_header *header = ((struct chunk_header *)chunk) - 1; + + munmap(header->executable, size); + munmap((void *)header, size); +} + +/* --------------------------------------------------------------------- */ +/* Common functions */ +/* --------------------------------------------------------------------- */ + +#define CHUNK_MASK (~(CHUNK_SIZE - 1)) + +struct block_header { + sljit_uw size; + sljit_uw prev_size; + sljit_sw executable_offset; +}; + +struct free_block { + struct block_header header; + struct free_block *next; + struct free_block *prev; + sljit_uw size; +}; + +#define AS_BLOCK_HEADER(base, offset) \ + ((struct block_header*)(((sljit_u8*)base) + offset)) +#define AS_FREE_BLOCK(base, offset) \ + ((struct free_block*)(((sljit_u8*)base) + offset)) +#define MEM_START(base) ((void*)((base) + 1)) +#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7u) & ~(sljit_uw)7) + +static struct free_block* free_blocks; +static sljit_uw allocated_size; +static sljit_uw total_size; + +static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size) +{ + free_block->header.size = 0; + free_block->size = size; + + free_block->next = free_blocks; + free_block->prev = NULL; + if (free_blocks) + free_blocks->prev = free_block; + free_blocks = free_block; +} + +static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block) +{ + if (free_block->next) + free_block->next->prev = free_block->prev; + + if (free_block->prev) + free_block->prev->next = free_block->next; + else { + SLJIT_ASSERT(free_blocks == free_block); + free_blocks = free_block->next; + } +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) +{ + struct chunk_header *chunk_header; + struct block_header *header; + struct block_header *next_header; + struct free_block *free_block; + sljit_uw chunk_size; + sljit_sw executable_offset; + + SLJIT_ALLOCATOR_LOCK(); + if (size < (64 - sizeof(struct block_header))) + size = (64 - sizeof(struct block_header)); + size = ALIGN_SIZE(size); + + free_block = free_blocks; + while (free_block) { + if (free_block->size >= size) { + chunk_size = free_block->size; + if (chunk_size > size + 64) { + /* We just cut a block from the end of the free block. */ + chunk_size -= size; + free_block->size = chunk_size; + header = AS_BLOCK_HEADER(free_block, chunk_size); + header->prev_size = chunk_size; + header->executable_offset = free_block->header.executable_offset; + AS_BLOCK_HEADER(header, size)->prev_size = size; + } + else { + sljit_remove_free_block(free_block); + header = (struct block_header*)free_block; + size = chunk_size; + } + allocated_size += size; + header->size = size; + SLJIT_ALLOCATOR_UNLOCK(); + return MEM_START(header); + } + free_block = free_block->next; + } + + chunk_size = sizeof(struct chunk_header) + sizeof(struct block_header); + chunk_size = (chunk_size + size + CHUNK_SIZE - 1) & CHUNK_MASK; + + chunk_header = alloc_chunk(chunk_size); + if (!chunk_header) { + SLJIT_ALLOCATOR_UNLOCK(); + return NULL; + } + + executable_offset = (sljit_sw)((sljit_u8*)chunk_header->executable - (sljit_u8*)chunk_header); + + chunk_size -= sizeof(struct chunk_header) + sizeof(struct block_header); + total_size += chunk_size; + + header = (struct block_header *)(chunk_header + 1); + + header->prev_size = 0; + header->executable_offset = executable_offset; + if (chunk_size > size + 64) { + /* Cut the allocated space into a free and a used block. */ + allocated_size += size; + header->size = size; + chunk_size -= size; + + free_block = AS_FREE_BLOCK(header, size); + free_block->header.prev_size = size; + free_block->header.executable_offset = executable_offset; + sljit_insert_free_block(free_block, chunk_size); + next_header = AS_BLOCK_HEADER(free_block, chunk_size); + } + else { + /* All space belongs to this allocation. */ + allocated_size += chunk_size; + header->size = chunk_size; + next_header = AS_BLOCK_HEADER(header, chunk_size); + } + next_header->size = 1; + next_header->prev_size = chunk_size; + next_header->executable_offset = executable_offset; + SLJIT_ALLOCATOR_UNLOCK(); + return MEM_START(header); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) +{ + struct block_header *header; + struct free_block* free_block; + + SLJIT_ALLOCATOR_LOCK(); + header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header)); + header = AS_BLOCK_HEADER(header, -header->executable_offset); + allocated_size -= header->size; + + /* Connecting free blocks together if possible. */ + + /* If header->prev_size == 0, free_block will equal to header. + In this case, free_block->header.size will be > 0. */ + free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size); + if (SLJIT_UNLIKELY(!free_block->header.size)) { + free_block->size += header->size; + header = AS_BLOCK_HEADER(free_block, free_block->size); + header->prev_size = free_block->size; + } + else { + free_block = (struct free_block*)header; + sljit_insert_free_block(free_block, header->size); + } + + header = AS_BLOCK_HEADER(free_block, free_block->size); + if (SLJIT_UNLIKELY(!header->size)) { + free_block->size += ((struct free_block*)header)->size; + sljit_remove_free_block((struct free_block*)header); + header = AS_BLOCK_HEADER(free_block, free_block->size); + header->prev_size = free_block->size; + } + + /* The whole chunk is free. */ + if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) { + /* If this block is freed, we still have (allocated_size / 2) free space. */ + if (total_size - free_block->size > (allocated_size * 3 / 2)) { + total_size -= free_block->size; + sljit_remove_free_block(free_block); + free_chunk(free_block, free_block->size + + sizeof(struct chunk_header) + + sizeof(struct block_header)); + } + } + + SLJIT_ALLOCATOR_UNLOCK(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) +{ + struct free_block* free_block; + struct free_block* next_free_block; + + SLJIT_ALLOCATOR_LOCK(); + + free_block = free_blocks; + while (free_block) { + next_free_block = free_block->next; + if (!free_block->header.prev_size && + AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) { + total_size -= free_block->size; + sljit_remove_free_block(free_block); + free_chunk(free_block, free_block->size + + sizeof(struct chunk_header) + + sizeof(struct block_header)); + } + free_block = next_free_block; + } + + SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks)); + SLJIT_ALLOCATOR_UNLOCK(); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr) +{ + return ((struct block_header *)(ptr))[-1].executable_offset; +} diff --git a/contrib/libs/pcre2/src/sljit/sljitUtils.c b/contrib/libs/pcre2/src/sljit/sljitUtils.c new file mode 100644 index 0000000000..967593b157 --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitUtils.c @@ -0,0 +1,344 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ------------------------------------------------------------------------ */ +/* Locks */ +/* ------------------------------------------------------------------------ */ + +/* Executable Allocator */ + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) \ + && !(defined SLJIT_WX_EXECUTABLE_ALLOCATOR && SLJIT_WX_EXECUTABLE_ALLOCATOR) +#if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) +#define SLJIT_ALLOCATOR_LOCK() +#define SLJIT_ALLOCATOR_UNLOCK() +#elif !(defined _WIN32) +#include <pthread.h> + +static pthread_mutex_t allocator_lock = PTHREAD_MUTEX_INITIALIZER; + +#define SLJIT_ALLOCATOR_LOCK() pthread_mutex_lock(&allocator_lock) +#define SLJIT_ALLOCATOR_UNLOCK() pthread_mutex_unlock(&allocator_lock) +#else /* windows */ +static HANDLE allocator_lock; + +static SLJIT_INLINE void allocator_grab_lock(void) +{ + HANDLE lock; + if (SLJIT_UNLIKELY(!InterlockedCompareExchangePointer(&allocator_lock, NULL, NULL))) { + lock = CreateMutex(NULL, FALSE, NULL); + if (InterlockedCompareExchangePointer(&allocator_lock, lock, NULL)) + CloseHandle(lock); + } + WaitForSingleObject(allocator_lock, INFINITE); +} + +#define SLJIT_ALLOCATOR_LOCK() allocator_grab_lock() +#define SLJIT_ALLOCATOR_UNLOCK() ReleaseMutex(allocator_lock) +#endif /* thread implementation */ +#endif /* SLJIT_EXECUTABLE_ALLOCATOR && !SLJIT_WX_EXECUTABLE_ALLOCATOR */ + +/* ------------------------------------------------------------------------ */ +/* Stack */ +/* ------------------------------------------------------------------------ */ + +#if ((defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) \ + && !(defined SLJIT_UTIL_SIMPLE_STACK_ALLOCATION && SLJIT_UTIL_SIMPLE_STACK_ALLOCATION)) \ + || ((defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) \ + && !((defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) \ + || (defined SLJIT_WX_EXECUTABLE_ALLOCATOR && SLJIT_WX_EXECUTABLE_ALLOCATOR))) + +#ifndef _WIN32 +/* Provides mmap function. */ +#include <sys/types.h> +#include <sys/mman.h> + +#ifndef MAP_ANON +#ifdef MAP_ANONYMOUS +#define MAP_ANON MAP_ANONYMOUS +#endif /* MAP_ANONYMOUS */ +#endif /* !MAP_ANON */ + +#ifndef MAP_ANON + +#include <fcntl.h> + +#ifdef O_CLOEXEC +#define SLJIT_CLOEXEC O_CLOEXEC +#else /* !O_CLOEXEC */ +#define SLJIT_CLOEXEC 0 +#endif /* O_CLOEXEC */ + +/* Some old systems do not have MAP_ANON. */ +static int dev_zero = -1; + +#if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) + +static SLJIT_INLINE int open_dev_zero(void) +{ + dev_zero = open("/dev/zero", O_RDWR | SLJIT_CLOEXEC); + + return dev_zero < 0; +} + +#else /* !SLJIT_SINGLE_THREADED */ + +#include <pthread.h> + +static pthread_mutex_t dev_zero_mutex = PTHREAD_MUTEX_INITIALIZER; + +static SLJIT_INLINE int open_dev_zero(void) +{ + pthread_mutex_lock(&dev_zero_mutex); + if (SLJIT_UNLIKELY(dev_zero < 0)) + dev_zero = open("/dev/zero", O_RDWR | SLJIT_CLOEXEC); + + pthread_mutex_unlock(&dev_zero_mutex); + return dev_zero < 0; +} + +#endif /* SLJIT_SINGLE_THREADED */ +#undef SLJIT_CLOEXEC +#endif /* !MAP_ANON */ +#endif /* !_WIN32 */ +#endif /* open_dev_zero */ + +#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) \ + || (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + +#ifdef _WIN32 + +static SLJIT_INLINE sljit_uw get_page_alignment(void) { + SYSTEM_INFO si; + static sljit_uw sljit_page_align = 0; + if (!sljit_page_align) { + GetSystemInfo(&si); + sljit_page_align = (sljit_uw)si.dwPageSize - 1; + } + return sljit_page_align; +} + +#else + +#include <unistd.h> + +static SLJIT_INLINE sljit_uw get_page_alignment(void) { + static sljit_uw sljit_page_align = 0; + + sljit_sw align; + + if (!sljit_page_align) { +#ifdef _SC_PAGESIZE + align = sysconf(_SC_PAGESIZE); +#else + align = getpagesize(); +#endif + /* Should never happen. */ + if (align < 0) + align = 4096; + sljit_page_align = (sljit_uw)align - 1; + } + return sljit_page_align; +} + +#endif /* _WIN32 */ + +#endif /* get_page_alignment() */ + +#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) + +#if (defined SLJIT_UTIL_SIMPLE_STACK_ALLOCATION && SLJIT_UTIL_SIMPLE_STACK_ALLOCATION) + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw start_size, sljit_uw max_size, void *allocator_data) +{ + struct sljit_stack *stack; + void *ptr; + + SLJIT_UNUSED_ARG(allocator_data); + + if (start_size > max_size || start_size < 1) + return NULL; + + stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack), allocator_data); + if (stack == NULL) + return NULL; + + ptr = SLJIT_MALLOC(max_size, allocator_data); + if (ptr == NULL) { + SLJIT_FREE(stack, allocator_data); + return NULL; + } + + stack->min_start = (sljit_u8 *)ptr; + stack->end = stack->min_start + max_size; + stack->start = stack->end - start_size; + stack->top = stack->end; + return stack; +} + +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data) +{ + SLJIT_UNUSED_ARG(allocator_data); + SLJIT_FREE((void*)stack->min_start, allocator_data); + SLJIT_FREE(stack, allocator_data); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_start) +{ + if ((new_start < stack->min_start) || (new_start >= stack->end)) + return NULL; + stack->start = new_start; + return new_start; +} + +#else /* !SLJIT_UTIL_SIMPLE_STACK_ALLOCATION */ + +#ifdef _WIN32 + +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data) +{ + SLJIT_UNUSED_ARG(allocator_data); + VirtualFree((void*)stack->min_start, 0, MEM_RELEASE); + SLJIT_FREE(stack, allocator_data); +} + +#else /* !_WIN32 */ + +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data) +{ + SLJIT_UNUSED_ARG(allocator_data); + munmap((void*)stack->min_start, (size_t)(stack->end - stack->min_start)); + SLJIT_FREE(stack, allocator_data); +} + +#endif /* _WIN32 */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw start_size, sljit_uw max_size, void *allocator_data) +{ + struct sljit_stack *stack; + void *ptr; + sljit_uw page_align; + + SLJIT_UNUSED_ARG(allocator_data); + + if (start_size > max_size || start_size < 1) + return NULL; + + stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack), allocator_data); + if (stack == NULL) + return NULL; + + /* Align max_size. */ + page_align = get_page_alignment(); + max_size = (max_size + page_align) & ~page_align; + +#ifdef _WIN32 + ptr = VirtualAlloc(NULL, max_size, MEM_RESERVE, PAGE_READWRITE); + if (!ptr) { + SLJIT_FREE(stack, allocator_data); + return NULL; + } + + stack->min_start = (sljit_u8 *)ptr; + stack->end = stack->min_start + max_size; + stack->start = stack->end; + + if (sljit_stack_resize(stack, stack->end - start_size) == NULL) { + sljit_free_stack(stack, allocator_data); + return NULL; + } +#else /* !_WIN32 */ +#ifdef MAP_ANON + ptr = mmap(NULL, max_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); +#else /* !MAP_ANON */ + if (SLJIT_UNLIKELY((dev_zero < 0) && open_dev_zero())) { + SLJIT_FREE(stack, allocator_data); + return NULL; + } + ptr = mmap(NULL, max_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0); +#endif /* MAP_ANON */ + if (ptr == MAP_FAILED) { + SLJIT_FREE(stack, allocator_data); + return NULL; + } + stack->min_start = (sljit_u8 *)ptr; + stack->end = stack->min_start + max_size; + stack->start = stack->end - start_size; +#endif /* _WIN32 */ + + stack->top = stack->end; + return stack; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_start) +{ +#if defined _WIN32 || defined(POSIX_MADV_DONTNEED) + sljit_uw aligned_old_start; + sljit_uw aligned_new_start; + sljit_uw page_align; +#endif + + if ((new_start < stack->min_start) || (new_start >= stack->end)) + return NULL; + +#ifdef _WIN32 + page_align = get_page_alignment(); + + aligned_new_start = (sljit_uw)new_start & ~page_align; + aligned_old_start = ((sljit_uw)stack->start) & ~page_align; + if (aligned_new_start != aligned_old_start) { + if (aligned_new_start < aligned_old_start) { + if (!VirtualAlloc((void*)aligned_new_start, aligned_old_start - aligned_new_start, MEM_COMMIT, PAGE_READWRITE)) + return NULL; + } + else { + if (!VirtualFree((void*)aligned_old_start, aligned_new_start - aligned_old_start, MEM_DECOMMIT)) + return NULL; + } + } +#elif defined(POSIX_MADV_DONTNEED) + if (stack->start < new_start) { + page_align = get_page_alignment(); + + aligned_new_start = (sljit_uw)new_start & ~page_align; + aligned_old_start = ((sljit_uw)stack->start) & ~page_align; + + if (aligned_new_start > aligned_old_start) { + posix_madvise((void*)aligned_old_start, aligned_new_start - aligned_old_start, POSIX_MADV_DONTNEED); +#ifdef MADV_FREE + madvise((void*)aligned_old_start, aligned_new_start - aligned_old_start, MADV_FREE); +#endif /* MADV_FREE */ + } + } +#endif /* _WIN32 */ + + stack->start = new_start; + return new_start; +} + +#endif /* SLJIT_UTIL_SIMPLE_STACK_ALLOCATION */ + +#endif /* SLJIT_UTIL_STACK */ diff --git a/contrib/libs/pcre2/src/sljit/sljitWXExecAllocator.c b/contrib/libs/pcre2/src/sljit/sljitWXExecAllocator.c new file mode 100644 index 0000000000..6893813155 --- /dev/null +++ b/contrib/libs/pcre2/src/sljit/sljitWXExecAllocator.c @@ -0,0 +1,204 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + This file contains a simple W^X executable memory allocator for POSIX + like systems and Windows + + In *NIX, MAP_ANON is required (that is considered a feature) so make + sure to set the right availability macros for your system or the code + will fail to build. + + If your system doesn't support mapping of anonymous pages (ex: IRIX) it + is also likely that it doesn't need this allocator and should be using + the standard one instead. + + It allocates a separate map for each code block and may waste a lot of + memory, because whatever was requested, will be rounded up to the page + size (minimum 4KB, but could be even bigger). + + It changes the page permissions (RW <-> RX) as needed and therefore, if you + will be updating the code after it has been generated, need to make sure to + block any concurrent execution, or could result in a SIGBUS, that could + even manifest itself at a different address than the one that was being + modified. + + Only use if you are unable to use the regular allocator because of security + restrictions and adding exceptions to your application or the system are + not possible. +*/ + +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) \ + sljit_update_wx_flags((from), (to), (enable_exec)) + +#ifndef _WIN32 +#include <sys/types.h> +#include <sys/mman.h> + +#ifdef __NetBSD__ +#define SLJIT_PROT_WX PROT_MPROTECT(PROT_EXEC) +#define check_se_protected(ptr, size) (0) +#else /* POSIX */ +#if !(defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) +#include <pthread.h> +#define SLJIT_SE_LOCK() pthread_mutex_lock(&se_lock) +#define SLJIT_SE_UNLOCK() pthread_mutex_unlock(&se_lock) +#endif /* !SLJIT_SINGLE_THREADED */ + +#define check_se_protected(ptr, size) generic_se_protected(ptr, size) + +static SLJIT_INLINE int generic_se_protected(void *ptr, sljit_uw size) +{ + if (SLJIT_LIKELY(!mprotect(ptr, size, PROT_EXEC))) + return mprotect(ptr, size, PROT_READ | PROT_WRITE); + + return -1; +} +#endif /* NetBSD */ + +#ifndef SLJIT_SE_LOCK +#define SLJIT_SE_LOCK() +#endif +#ifndef SLJIT_SE_UNLOCK +#define SLJIT_SE_UNLOCK() +#endif +#ifndef SLJIT_PROT_WX +#define SLJIT_PROT_WX 0 +#endif + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) +{ +#if !(defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) \ + && !defined(__NetBSD__) + static pthread_mutex_t se_lock = PTHREAD_MUTEX_INITIALIZER; +#endif + static int se_protected = !SLJIT_PROT_WX; + int prot = PROT_READ | PROT_WRITE | SLJIT_PROT_WX; + sljit_uw* ptr; + + if (SLJIT_UNLIKELY(se_protected < 0)) + return NULL; + +#ifdef PROT_MAX + prot |= PROT_MAX(PROT_READ | PROT_WRITE | PROT_EXEC); +#endif + + size += sizeof(sljit_uw); + ptr = (sljit_uw*)mmap(NULL, size, prot, MAP_PRIVATE | MAP_ANON, -1, 0); + + if (ptr == MAP_FAILED) + return NULL; + + if (SLJIT_UNLIKELY(se_protected > 0)) { + SLJIT_SE_LOCK(); + se_protected = check_se_protected(ptr, size); + SLJIT_SE_UNLOCK(); + if (SLJIT_UNLIKELY(se_protected < 0)) { + munmap((void *)ptr, size); + return NULL; + } + } + + *ptr++ = size; + return ptr; +} + +#undef SLJIT_PROT_WX +#undef SLJIT_SE_UNLOCK +#undef SLJIT_SE_LOCK + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) +{ + sljit_uw *start_ptr = ((sljit_uw*)ptr) - 1; + munmap((void*)start_ptr, *start_ptr); +} + +static void sljit_update_wx_flags(void *from, void *to, sljit_s32 enable_exec) +{ + sljit_uw page_mask = (sljit_uw)get_page_alignment(); + sljit_uw start = (sljit_uw)from; + sljit_uw end = (sljit_uw)to; + int prot = PROT_READ | (enable_exec ? PROT_EXEC : PROT_WRITE); + + SLJIT_ASSERT(start < end); + + start &= ~page_mask; + end = (end + page_mask) & ~page_mask; + + mprotect((void*)start, end - start, prot); +} + +#else /* windows */ + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) +{ + sljit_uw *ptr; + + size += sizeof(sljit_uw); + ptr = (sljit_uw*)VirtualAlloc(NULL, size, + MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + + if (!ptr) + return NULL; + + *ptr++ = size; + + return ptr; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) +{ + sljit_uw start = (sljit_uw)ptr - sizeof(sljit_uw); +#if defined(SLJIT_DEBUG) && SLJIT_DEBUG + sljit_uw page_mask = (sljit_uw)get_page_alignment(); + + SLJIT_ASSERT(!(start & page_mask)); +#endif + VirtualFree((void*)start, 0, MEM_RELEASE); +} + +static void sljit_update_wx_flags(void *from, void *to, sljit_s32 enable_exec) +{ + DWORD oldprot; + sljit_uw page_mask = (sljit_uw)get_page_alignment(); + sljit_uw start = (sljit_uw)from; + sljit_uw end = (sljit_uw)to; + DWORD prot = enable_exec ? PAGE_EXECUTE : PAGE_READWRITE; + + SLJIT_ASSERT(start < end); + + start &= ~page_mask; + end = (end + page_mask) & ~page_mask; + + VirtualProtect((void*)start, end - start, prot, &oldprot); +} + +#endif /* !windows */ + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) +{ + /* This allocator does not keep unused memory for future allocations. */ +} |