contrib/libs/cxxsupp/builtins/fp_trunc.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158

//=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Set source and destination precision setting
//
//===----------------------------------------------------------------------===//

#ifndef FP_TRUNC_HEADER
#define FP_TRUNC_HEADER

#include "int_lib.h"

#if defined SRC_SINGLE
typedef float src_t;
typedef uint32_t src_rep_t;
#define SRC_REP_C UINT32_C
static const int srcBits = sizeof(src_t) * CHAR_BIT;
static const int srcSigFracBits = 23;
// -1 accounts for the sign bit.
// srcBits - srcSigFracBits - 1
static const int srcExpBits = 8;

#elif defined SRC_DOUBLE
typedef double src_t;
typedef uint64_t src_rep_t;
#define SRC_REP_C UINT64_C
static const int srcBits = sizeof(src_t) * CHAR_BIT;
static const int srcSigFracBits = 52;
// -1 accounts for the sign bit.
// srcBits - srcSigFracBits - 1
static const int srcExpBits = 11;

#elif defined SRC_QUAD
typedef tf_float src_t;
typedef __uint128_t src_rep_t;
#define SRC_REP_C (__uint128_t)
static const int srcBits = sizeof(src_t) * CHAR_BIT;
static const int srcSigFracBits = 112;
// -1 accounts for the sign bit.
// srcBits - srcSigFracBits - 1
static const int srcExpBits = 15;

#else
#error Source should be double precision or quad precision!
#endif // end source precision

#if defined DST_DOUBLE
typedef double dst_t;
typedef uint64_t dst_rep_t;
#define DST_REP_C UINT64_C
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 52;
// -1 accounts for the sign bit.
// dstBits - dstSigFracBits - 1
static const int dstExpBits = 11;

#elif defined DST_80
typedef xf_float dst_t;
typedef __uint128_t dst_rep_t;
#define DST_REP_C (__uint128_t)
static const int dstBits = 80;
static const int dstSigFracBits = 63;
// -1 accounts for the sign bit.
// -1 accounts for the explicitly stored integer bit.
// dstBits - dstSigFracBits - 1 - 1
static const int dstExpBits = 15;

#elif defined DST_SINGLE
typedef float dst_t;
typedef uint32_t dst_rep_t;
#define DST_REP_C UINT32_C
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 23;
// -1 accounts for the sign bit.
// dstBits - dstSigFracBits - 1
static const int dstExpBits = 8;

#elif defined DST_HALF
#ifdef COMPILER_RT_HAS_FLOAT16
typedef _Float16 dst_t;
#else
typedef uint16_t dst_t;
#endif
typedef uint16_t dst_rep_t;
#define DST_REP_C UINT16_C
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 10;
// -1 accounts for the sign bit.
// dstBits - dstSigFracBits - 1
static const int dstExpBits = 5;

#elif defined DST_BFLOAT
typedef __bf16 dst_t;
typedef uint16_t dst_rep_t;
#define DST_REP_C UINT16_C
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 7;
// -1 accounts for the sign bit.
// dstBits - dstSigFracBits - 1
static const int dstExpBits = 8;

#else
#error Destination should be single precision or double precision!
#endif // end destination precision

// TODO: These helper routines should be placed into fp_lib.h
// Currently they depend on macros/constants defined above.

static inline src_rep_t extract_sign_from_src(src_rep_t x) {
  const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1);
  return (x & srcSignMask) >> (srcBits - 1);
}

static inline src_rep_t extract_exp_from_src(src_rep_t x) {
  const int srcSigBits = srcBits - 1 - srcExpBits;
  const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
  return (x & srcExpMask) >> srcSigBits;
}

static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) {
  const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
  return x & srcSigFracMask;
}

static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) {
  dst_rep_t result = (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac;
  // Set the explicit integer bit in F80 if present.
  if (dstBits == 80 && exp) {
    result |= (DST_REP_C(1) << dstSigFracBits);
  }
  return result;
}

// End of specialization parameters.  Two helper routines for conversion to and
// from the representation of floating-point data as integer values follow.

static inline src_rep_t srcToRep(src_t x) {
  const union {
    src_t f;
    src_rep_t i;
  } rep = {.f = x};
  return rep.i;
}

static inline dst_t dstFromRep(dst_rep_t x) {
  const union {
    dst_t f;
    dst_rep_t i;
  } rep = {.i = x};
  return rep.f;
}

#endif // FP_TRUNC_HEADER