diff options
author | Timo Rothenpieler <timo@rothenpieler.org> | 2022-09-03 19:49:53 +0200 |
---|---|---|
committer | Timo Rothenpieler <timo@rothenpieler.org> | 2022-09-03 20:27:34 +0200 |
commit | 416923346a6d31563801784963d2893a8d1da1c8 (patch) | |
tree | 330563edbcea9367c81872ec2432cebfb8c5e6ba | |
parent | 73fada029c527fda6c248785a948c61249fd4b2d (diff) | |
download | ffmpeg-416923346a6d31563801784963d2893a8d1da1c8.tar.gz |
compat/cuda: switch from powf to __powf intrinsic
The powf builtin causes crashes on older clang, so manually implement
the (faster) intrinsic.
The code it spawns is identical to that of nvcc.
-rw-r--r-- | compat/cuda/cuda_runtime.h | 2 | ||||
-rw-r--r-- | libavfilter/vf_bilateral_cuda.cu | 8 |
2 files changed, 5 insertions, 5 deletions
diff --git a/compat/cuda/cuda_runtime.h b/compat/cuda/cuda_runtime.h index 082e4a8ba3..699c4b6c75 100644 --- a/compat/cuda/cuda_runtime.h +++ b/compat/cuda/cuda_runtime.h @@ -182,11 +182,11 @@ static inline __device__ float fabsf(float a) { return __builtin_fabsf(a); } static inline __device__ float fabs(float a) { return __builtin_fabsf(a); } static inline __device__ double fabs(double a) { return __builtin_fabs(a); } static inline __device__ float sqrtf(float a) { return __builtin_sqrtf(a); } -static inline __device__ float powf(float a, float y) { return __builtin_powf(a,y); } static inline __device__ float __saturatef(float a) { return __nvvm_saturate_f(a); } static inline __device__ float __sinf(float a) { return __nvvm_sin_approx_f(a); } static inline __device__ float __cosf(float a) { return __nvvm_cos_approx_f(a); } static inline __device__ float __expf(float a) { return __nvvm_ex2_approx_f(a * (float)__builtin_log2(__builtin_exp(1))); } +static inline __device__ float __powf(float a, float b) { return __nvvm_ex2_approx_f(__nvvm_lg2_approx_f(a) * b); } #endif /* COMPAT_CUDA_CUDA_RUNTIME_H */ diff --git a/libavfilter/vf_bilateral_cuda.cu b/libavfilter/vf_bilateral_cuda.cu index 8aba3a079f..bbcfc81db5 100644 --- a/libavfilter/vf_bilateral_cuda.cu +++ b/libavfilter/vf_bilateral_cuda.cu @@ -34,9 +34,9 @@ extern "C" __device__ static inline float norm_squared(float4 first_yuv, float4 second_yuv) { float ans = 0; - ans += powf(first_yuv.x - second_yuv.x, 2); - ans += powf(first_yuv.y - second_yuv.y, 2); - ans += powf(first_yuv.z - second_yuv.z, 2); + ans += __powf(first_yuv.x - second_yuv.x, 2); + ans += __powf(first_yuv.y - second_yuv.y, 2); + ans += __powf(first_yuv.z - second_yuv.z, 2); return ans; } @@ -52,7 +52,7 @@ __device__ static inline float calculate_w(int x, int y, int r, int c, float sigma_space, float sigma_color) { float first_term, second_term; - first_term = (powf(x - r, 2) + powf(y - c, 2)) / (2 * sigma_space * sigma_space); + first_term = (__powf(x - r, 2) + __powf(y - c, 2)) / (2 * sigma_space * sigma_space); second_term = norm_squared(pixel_value, neighbor_value) / (2 * sigma_color * sigma_color); return __expf(-first_term - second_term); } |