aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/cxxsupp/builtins/aarch64/sme-libc-routines.c
blob: cd73025a19cc1aca00568f39a15ff9b33be0e1b1 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#include <stdlib.h>

// WARNING: When building the scalar versions of these functions you need to
// use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang
// from recognising a loop idiom and planting calls to memcpy!

static void *__arm_sc_memcpy_fwd(void *dest, const void *src,
                                 size_t n) __arm_streaming_compatible {
  unsigned char *destp = (unsigned char *)dest;
  const unsigned char *srcp = (const unsigned char *)src;
  for (size_t i = 0; i < n; ++i)
    destp[i] = srcp[i];

  return dest;
}

// If dest and src overlap then behaviour is undefined, hence we can add the
// restrict keywords here. This also matches the definition of the libc memcpy
// according to the man page.
void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src,
                      size_t n) __arm_streaming_compatible {
  return __arm_sc_memcpy_fwd(dest, src, n);
}

void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible {
  unsigned char *destp = (unsigned char *)dest;
  unsigned char c8 = (unsigned char)c;
  for (size_t i = 0; i < n; ++i)
    destp[i] = c8;

  return dest;
}

static void *__arm_sc_memcpy_rev(void *dest, const void *src,
                                 size_t n) __arm_streaming_compatible {
  unsigned char *destp = (unsigned char *)dest;
  const unsigned char *srcp = (const unsigned char *)src;
  // TODO: Improve performance by copying larger chunks in reverse, or by
  // using SVE.
  while (n > 0) {
    --n;
    destp[n] = srcp[n];
  }
  return dest;
}

// Semantically a memmove is equivalent to the following:
//   1. Copy the entire contents of src to a temporary array that does not
//      overlap with src or dest.
//   2. Copy the contents of the temporary array into dest.
void *__arm_sc_memmove(void *dest, const void *src,
                       size_t n) __arm_streaming_compatible {
  unsigned char *destp = (unsigned char *)dest;
  const unsigned char *srcp = (const unsigned char *)src;

  // If src and dest don't overlap then just invoke memcpy
  if ((srcp > (destp + n)) || (destp > (srcp + n)))
    return __arm_sc_memcpy_fwd(dest, src, n);

  // Overlap case 1:
  //     src: Low     |   ->   |     High
  //    dest: Low  |   ->   |        High
  // Here src is always ahead of dest at a higher addres. If we first read a
  // chunk of data from src we can safely write the same chunk to dest without
  // corrupting future reads of src.
  if (srcp > destp)
    return __arm_sc_memcpy_fwd(dest, src, n);

  // Overlap case 2:
  //     src: Low  |   ->   |        High
  //    dest: Low     |   ->   |     High
  // While we're in the overlap region we're always corrupting future reads of
  // src when writing to dest. An efficient way to do this is to copy the data
  // in reverse by starting at the highest address.
  return __arm_sc_memcpy_rev(dest, src, n);
}

const void *__arm_sc_memchr(const void *src, int c,
                            size_t n) __arm_streaming_compatible {
  const unsigned char *srcp = (const unsigned char *)src;
  unsigned char c8 = (unsigned char)c;
  for (size_t i = 0; i < n; ++i)
    if (srcp[i] == c8)
      return &srcp[i];

  return NULL;
}