aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/include/llvm/IR/IntrinsicsARM.td
blob: e2283bfd60101871d66aeb7837c47a5b748d1d3d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
//===- IntrinsicsARM.td - Defines ARM intrinsics -----------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines all of the ARM-specific intrinsics.
//
//===----------------------------------------------------------------------===//


//===----------------------------------------------------------------------===//
// TLS

let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".

// A space-consuming intrinsic primarily for testing ARMConstantIslands. The
// first argument is the number of bytes this "instruction" takes up, the second
// and return value are essentially chains, used to force ordering during ISel.
def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;

// 16-bit multiplications
def int_arm_smulbb : GCCBuiltin<"__builtin_arm_smulbb">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_smulbt : GCCBuiltin<"__builtin_arm_smulbt">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_smultb : GCCBuiltin<"__builtin_arm_smultb">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_smultt : GCCBuiltin<"__builtin_arm_smultt">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_smulwb : GCCBuiltin<"__builtin_arm_smulwb">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_smulwt : GCCBuiltin<"__builtin_arm_smulwt">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;

//===----------------------------------------------------------------------===//
// Saturating Arithmetic

def int_arm_qadd : GCCBuiltin<"__builtin_arm_qadd">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
    [Commutative, IntrNoMem]>;
def int_arm_qsub : GCCBuiltin<"__builtin_arm_qsub">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_ssat : GCCBuiltin<"__builtin_arm_ssat">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_usat : GCCBuiltin<"__builtin_arm_usat">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;

// Accumulating multiplications
def int_arm_smlabb : GCCBuiltin<"__builtin_arm_smlabb">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    [IntrNoMem]>;
def int_arm_smlabt : GCCBuiltin<"__builtin_arm_smlabt">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    [IntrNoMem]>;
def int_arm_smlatb : GCCBuiltin<"__builtin_arm_smlatb">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    [IntrNoMem]>;
def int_arm_smlatt : GCCBuiltin<"__builtin_arm_smlatt">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    [IntrNoMem]>;
def int_arm_smlawb : GCCBuiltin<"__builtin_arm_smlawb">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    [IntrNoMem]>;
def int_arm_smlawt : GCCBuiltin<"__builtin_arm_smlawt">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
    [IntrNoMem]>;

// Parallel 16-bit saturation
def int_arm_ssat16 : GCCBuiltin<"__builtin_arm_ssat16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_usat16 : GCCBuiltin<"__builtin_arm_usat16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;

// Packing and unpacking
def int_arm_sxtab16 : GCCBuiltin<"__builtin_arm_sxtab16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_sxtb16 : GCCBuiltin<"__builtin_arm_sxtb16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_arm_uxtab16 : GCCBuiltin<"__builtin_arm_uxtab16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_uxtb16 : GCCBuiltin<"__builtin_arm_uxtb16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;

// Parallel selection, reads the GE flags.
def int_arm_sel : GCCBuiltin<"__builtin_arm_sel">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;

// Parallel 8-bit addition and subtraction
def int_arm_qadd8  : GCCBuiltin<"__builtin_arm_qadd8">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_qsub8  : GCCBuiltin<"__builtin_arm_qsub8">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// Writes to the GE bits.
def int_arm_sadd8  : GCCBuiltin<"__builtin_arm_sadd8">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
def int_arm_shadd8  : GCCBuiltin<"__builtin_arm_shadd8">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_shsub8  : GCCBuiltin<"__builtin_arm_shsub8">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// Writes to the GE bits.
def int_arm_ssub8  : GCCBuiltin<"__builtin_arm_ssub8">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
// Writes to the GE bits.
def int_arm_uadd8  : GCCBuiltin<"__builtin_arm_uadd8">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
def int_arm_uhadd8  : GCCBuiltin<"__builtin_arm_uhadd8">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_uhsub8  : GCCBuiltin<"__builtin_arm_uhsub8">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_uqadd8  : GCCBuiltin<"__builtin_arm_uqadd8">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_uqsub8  : GCCBuiltin<"__builtin_arm_uqsub8">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// Writes to the GE bits.
def int_arm_usub8  : GCCBuiltin<"__builtin_arm_usub8">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;

// Sum of 8-bit absolute differences
def int_arm_usad8  : GCCBuiltin<"__builtin_arm_usad8">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_usada8  : GCCBuiltin<"__builtin_arm_usada8">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
              [IntrNoMem]>;

// Parallel 16-bit addition and subtraction
def int_arm_qadd16  : GCCBuiltin<"__builtin_arm_qadd16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_qasx  : GCCBuiltin<"__builtin_arm_qasx">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_qsax  : GCCBuiltin<"__builtin_arm_qsax">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_qsub16  : GCCBuiltin<"__builtin_arm_qsub16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// Writes to the GE bits.
def int_arm_sadd16  : GCCBuiltin<"__builtin_arm_sadd16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
// Writes to the GE bits.
def int_arm_sasx  : GCCBuiltin<"__builtin_arm_sasx">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
def int_arm_shadd16  : GCCBuiltin<"__builtin_arm_shadd16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_shasx  : GCCBuiltin<"__builtin_arm_shasx">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_shsax  : GCCBuiltin<"__builtin_arm_shsax">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_shsub16  : GCCBuiltin<"__builtin_arm_shsub16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// Writes to the GE bits.
def int_arm_ssax  : GCCBuiltin<"__builtin_arm_ssax">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
// Writes to the GE bits.
def int_arm_ssub16  : GCCBuiltin<"__builtin_arm_ssub16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
// Writes to the GE bits.
def int_arm_uadd16  : GCCBuiltin<"__builtin_arm_uadd16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
// Writes to the GE bits.
def int_arm_uasx  : GCCBuiltin<"__builtin_arm_uasx">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
def int_arm_uhadd16  : GCCBuiltin<"__builtin_arm_uhadd16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_uhasx  : GCCBuiltin<"__builtin_arm_uhasx">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_uhsax  : GCCBuiltin<"__builtin_arm_uhsax">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_uhsub16  : GCCBuiltin<"__builtin_arm_uhsub16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_uqadd16  : GCCBuiltin<"__builtin_arm_uqadd16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_uqasx  : GCCBuiltin<"__builtin_arm_uqasx">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_uqsax  : GCCBuiltin<"__builtin_arm_uqsax">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_uqsub16  : GCCBuiltin<"__builtin_arm_uqsub16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// Writes to the GE bits.
def int_arm_usax  : GCCBuiltin<"__builtin_arm_usax">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
// Writes to the GE bits.
def int_arm_usub16  : GCCBuiltin<"__builtin_arm_usub16">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;

// Parallel 16-bit multiplication
def int_arm_smlad : GCCBuiltin<"__builtin_arm_smlad">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
              [IntrNoMem]>;
def int_arm_smladx : GCCBuiltin<"__builtin_arm_smladx">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
              [IntrNoMem]>;
def int_arm_smlald : GCCBuiltin<"__builtin_arm_smlald">,
    Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
              [IntrNoMem]>;
def int_arm_smlaldx : GCCBuiltin<"__builtin_arm_smlaldx">,
    Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
              [IntrNoMem]>;
def int_arm_smlsd : GCCBuiltin<"__builtin_arm_smlsd">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
              [IntrNoMem]>;
def int_arm_smlsdx : GCCBuiltin<"__builtin_arm_smlsdx">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
              [IntrNoMem]>;
def int_arm_smlsld : GCCBuiltin<"__builtin_arm_smlsld">,
    Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
              [IntrNoMem]>;
def int_arm_smlsldx : GCCBuiltin<"__builtin_arm_smlsldx">,
    Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
              [IntrNoMem]>;
def int_arm_smuad : GCCBuiltin<"__builtin_arm_smuad">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_smuadx : GCCBuiltin<"__builtin_arm_smuadx">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_smusd : GCCBuiltin<"__builtin_arm_smusd">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_smusdx : GCCBuiltin<"__builtin_arm_smusdx">,
    Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;


//===----------------------------------------------------------------------===//
// Load, Store and Clear exclusive

def int_arm_ldrex : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty]>;
def int_arm_strex : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyptr_ty]>;

def int_arm_ldaex : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty]>;
def int_arm_stlex : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyptr_ty]>;

def int_arm_clrex : Intrinsic<[]>;

def int_arm_strexd : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
    llvm_ptr_ty]>;
def int_arm_ldrexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>;

def int_arm_stlexd : Intrinsic<[llvm_i32_ty],
                               [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty]>;
def int_arm_ldaexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>;

//===----------------------------------------------------------------------===//
// Data barrier instructions
def int_arm_dmb : GCCBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">,
                  Intrinsic<[], [llvm_i32_ty]>;
def int_arm_dsb : GCCBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">,
                  Intrinsic<[], [llvm_i32_ty]>;
def int_arm_isb : GCCBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
                  Intrinsic<[], [llvm_i32_ty]>;

//===----------------------------------------------------------------------===//
// VFP

def int_arm_get_fpscr : GCCBuiltin<"__builtin_arm_get_fpscr">,
                       Intrinsic<[llvm_i32_ty], [], []>;
def int_arm_set_fpscr : GCCBuiltin<"__builtin_arm_set_fpscr">,
                       Intrinsic<[], [llvm_i32_ty], []>;
def int_arm_vcvtr     : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
                                  [IntrNoMem]>;
def int_arm_vcvtru    : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
                                  [IntrNoMem]>;

//===----------------------------------------------------------------------===//
// Coprocessor

def int_arm_ldc : GCCBuiltin<"__builtin_arm_ldc">,
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
def int_arm_ldcl : GCCBuiltin<"__builtin_arm_ldcl">,
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
def int_arm_ldc2 : GCCBuiltin<"__builtin_arm_ldc2">,
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
def int_arm_ldc2l : GCCBuiltin<"__builtin_arm_ldc2l">,
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;

def int_arm_stc : GCCBuiltin<"__builtin_arm_stc">,
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
def int_arm_stcl : GCCBuiltin<"__builtin_arm_stcl">,
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
def int_arm_stc2 : GCCBuiltin<"__builtin_arm_stc2">,
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
def int_arm_stc2l : GCCBuiltin<"__builtin_arm_stc2l">,
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;

// Move to coprocessor
def int_arm_mcr : GCCBuiltin<"__builtin_arm_mcr">,
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
def int_arm_mcr2 : GCCBuiltin<"__builtin_arm_mcr2">,
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;

// Move from coprocessor
def int_arm_mrc : GCCBuiltin<"__builtin_arm_mrc">,
                  MSBuiltin<"_MoveFromCoprocessor">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
                             llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
def int_arm_mrc2 : GCCBuiltin<"__builtin_arm_mrc2">,
                   MSBuiltin<"_MoveFromCoprocessor2">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
                             llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;

// Coprocessor data processing
def int_arm_cdp : GCCBuiltin<"__builtin_arm_cdp">,
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
def int_arm_cdp2 : GCCBuiltin<"__builtin_arm_cdp2">,
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
                  llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;

// Move from two registers to coprocessor
def int_arm_mcrr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
                                  llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
def int_arm_mcrr2 : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
                                   llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;

def int_arm_mrrc : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty,
                              llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
def int_arm_mrrc2 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty,
                               llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;

//===----------------------------------------------------------------------===//
// CRC32

def int_arm_crc32b  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
    [IntrNoMem]>;
def int_arm_crc32cb : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
    [IntrNoMem]>;
def int_arm_crc32h  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
    [IntrNoMem]>;
def int_arm_crc32ch : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
    [IntrNoMem]>;
def int_arm_crc32w  : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
    [IntrNoMem]>;
def int_arm_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
    [IntrNoMem]>;

//===----------------------------------------------------------------------===//
// CMSE

def int_arm_cmse_tt : GCCBuiltin<"__builtin_arm_cmse_TT">,
    Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
def int_arm_cmse_ttt : GCCBuiltin<"__builtin_arm_cmse_TTT">,
    Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
def int_arm_cmse_tta : GCCBuiltin<"__builtin_arm_cmse_TTA">,
    Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
def int_arm_cmse_ttat : GCCBuiltin<"__builtin_arm_cmse_TTAT">,
    Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;

//===----------------------------------------------------------------------===//
// HINT

def int_arm_hint : Intrinsic<[], [llvm_i32_ty]>;
def int_arm_dbg : Intrinsic<[], [llvm_i32_ty]>;

//===----------------------------------------------------------------------===//
// UND (reserved undefined sequence)

def int_arm_undefined : Intrinsic<[], [llvm_i32_ty]>;

//===----------------------------------------------------------------------===//
// Advanced SIMD (NEON)

// The following classes do not correspond directly to GCC builtins.
class Neon_1Arg_Intrinsic
  : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
class Neon_1Arg_Narrow_Intrinsic
  : Intrinsic<[llvm_anyvector_ty], [LLVMExtendedType<0>], [IntrNoMem]>;
class Neon_2Arg_Intrinsic
  : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
              [IntrNoMem]>;
class Neon_2Arg_Narrow_Intrinsic
  : Intrinsic<[llvm_anyvector_ty], [LLVMExtendedType<0>, LLVMExtendedType<0>],
              [IntrNoMem]>;
class Neon_2Arg_Long_Intrinsic
  : Intrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>, LLVMTruncatedType<0>],
              [IntrNoMem]>;
class Neon_3Arg_Intrinsic
  : Intrinsic<[llvm_anyvector_ty],
              [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
              [IntrNoMem]>;
class Neon_3Arg_Long_Intrinsic
  : Intrinsic<[llvm_anyvector_ty],
              [LLVMMatchType<0>, LLVMTruncatedType<0>, LLVMTruncatedType<0>],
              [IntrNoMem]>;

class Neon_1FloatArg_Intrinsic
  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;

class Neon_CvtFxToFP_Intrinsic
  : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
class Neon_CvtFPToFx_Intrinsic
  : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
class Neon_CvtFPtoInt_1Arg_Intrinsic
  : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;

class Neon_Compare_Intrinsic
  : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>],
              [IntrNoMem]>;

// The table operands for VTBL and VTBX consist of 1 to 4 v8i8 vectors.
// Besides the table, VTBL has one other v8i8 argument and VTBX has two.
// Overall, the classes range from 2 to 6 v8i8 arguments.
class Neon_Tbl2Arg_Intrinsic
  : Intrinsic<[llvm_v8i8_ty],
              [llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
class Neon_Tbl3Arg_Intrinsic
  : Intrinsic<[llvm_v8i8_ty],
              [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
class Neon_Tbl4Arg_Intrinsic
  : Intrinsic<[llvm_v8i8_ty],
              [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty],
              [IntrNoMem]>;
class Neon_Tbl5Arg_Intrinsic
  : Intrinsic<[llvm_v8i8_ty],
              [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
               llvm_v8i8_ty], [IntrNoMem]>;
class Neon_Tbl6Arg_Intrinsic
  : Intrinsic<[llvm_v8i8_ty],
              [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
               llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;

// Arithmetic ops

let IntrProperties = [IntrNoMem, Commutative] in {

  // Vector Add.
  def int_arm_neon_vhadds : Neon_2Arg_Intrinsic;
  def int_arm_neon_vhaddu : Neon_2Arg_Intrinsic;
  def int_arm_neon_vrhadds : Neon_2Arg_Intrinsic;
  def int_arm_neon_vrhaddu : Neon_2Arg_Intrinsic;
  def int_arm_neon_vraddhn : Neon_2Arg_Narrow_Intrinsic;

  // Vector Multiply.
  def int_arm_neon_vmulp : Neon_2Arg_Intrinsic;
  def int_arm_neon_vqdmulh : Neon_2Arg_Intrinsic;
  def int_arm_neon_vqrdmulh : Neon_2Arg_Intrinsic;
  def int_arm_neon_vmulls : Neon_2Arg_Long_Intrinsic;
  def int_arm_neon_vmullu : Neon_2Arg_Long_Intrinsic;
  def int_arm_neon_vmullp : Neon_2Arg_Long_Intrinsic;
  def int_arm_neon_vqdmull : Neon_2Arg_Long_Intrinsic;

  // Vector Maximum.
  def int_arm_neon_vmaxs : Neon_2Arg_Intrinsic;
  def int_arm_neon_vmaxu : Neon_2Arg_Intrinsic;
  def int_arm_neon_vmaxnm : Neon_2Arg_Intrinsic;

  // Vector Minimum.
  def int_arm_neon_vmins : Neon_2Arg_Intrinsic;
  def int_arm_neon_vminu : Neon_2Arg_Intrinsic;
  def int_arm_neon_vminnm : Neon_2Arg_Intrinsic;

  // Vector Reciprocal Step.
  def int_arm_neon_vrecps : Neon_2Arg_Intrinsic;

  // Vector Reciprocal Square Root Step.
  def int_arm_neon_vrsqrts : Neon_2Arg_Intrinsic;
}

// Vector Subtract.
def int_arm_neon_vhsubs : Neon_2Arg_Intrinsic;
def int_arm_neon_vhsubu : Neon_2Arg_Intrinsic;
def int_arm_neon_vrsubhn : Neon_2Arg_Narrow_Intrinsic;

// Vector Absolute Compare.
def int_arm_neon_vacge : Neon_Compare_Intrinsic;
def int_arm_neon_vacgt : Neon_Compare_Intrinsic;

// Vector Absolute Differences.
def int_arm_neon_vabds : Neon_2Arg_Intrinsic;
def int_arm_neon_vabdu : Neon_2Arg_Intrinsic;

// Vector Pairwise Add.
def int_arm_neon_vpadd : Neon_2Arg_Intrinsic;

// Vector Pairwise Add Long.
// Note: This is different than the other "long" NEON intrinsics because
// the result vector has half as many elements as the source vector.
// The source and destination vector types must be specified separately.
def int_arm_neon_vpaddls : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
                                     [IntrNoMem]>;
def int_arm_neon_vpaddlu : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
                                     [IntrNoMem]>;

// Vector Pairwise Add and Accumulate Long.
// Note: This is similar to vpaddl but the destination vector also appears
// as the first argument.
def int_arm_neon_vpadals : Intrinsic<[llvm_anyvector_ty],
                                     [LLVMMatchType<0>, llvm_anyvector_ty],
                                     [IntrNoMem]>;
def int_arm_neon_vpadalu : Intrinsic<[llvm_anyvector_ty],
                                     [LLVMMatchType<0>, llvm_anyvector_ty],
                                     [IntrNoMem]>;

// Vector Pairwise Maximum and Minimum.
def int_arm_neon_vpmaxs : Neon_2Arg_Intrinsic;
def int_arm_neon_vpmaxu : Neon_2Arg_Intrinsic;
def int_arm_neon_vpmins : Neon_2Arg_Intrinsic;
def int_arm_neon_vpminu : Neon_2Arg_Intrinsic;

// Vector Shifts:
//
// The various saturating and rounding vector shift operations need to be
// represented by intrinsics in LLVM, and even the basic VSHL variable shift
// operation cannot be safely translated to LLVM's shift operators.  VSHL can
// be used for both left and right shifts, or even combinations of the two,
// depending on the signs of the shift amounts.  It also has well-defined
// behavior for shift amounts that LLVM leaves undefined.  Only basic shifts
// by constants can be represented with LLVM's shift operators.
//
// The shift counts for these intrinsics are always vectors, even for constant
// shifts, where the constant is replicated.  For consistency with VSHL (and
// other variable shift instructions), left shifts have positive shift counts
// and right shifts have negative shift counts.  This convention is also used
// for constant right shift intrinsics, and to help preserve sanity, the
// intrinsic names use "shift" instead of either "shl" or "shr".  Where
// applicable, signed and unsigned versions of the intrinsics are
// distinguished with "s" and "u" suffixes.  A few NEON shift instructions,
// such as VQSHLU, take signed operands but produce unsigned results; these
// use a "su" suffix.

// Vector Shift.
def int_arm_neon_vshifts : Neon_2Arg_Intrinsic;
def int_arm_neon_vshiftu : Neon_2Arg_Intrinsic;

// Vector Rounding Shift.
def int_arm_neon_vrshifts : Neon_2Arg_Intrinsic;
def int_arm_neon_vrshiftu : Neon_2Arg_Intrinsic;
def int_arm_neon_vrshiftn : Neon_2Arg_Narrow_Intrinsic;

// Vector Saturating Shift.
def int_arm_neon_vqshifts : Neon_2Arg_Intrinsic;
def int_arm_neon_vqshiftu : Neon_2Arg_Intrinsic;
def int_arm_neon_vqshiftsu : Neon_2Arg_Intrinsic;
def int_arm_neon_vqshiftns : Neon_2Arg_Narrow_Intrinsic;
def int_arm_neon_vqshiftnu : Neon_2Arg_Narrow_Intrinsic;
def int_arm_neon_vqshiftnsu : Neon_2Arg_Narrow_Intrinsic;

// Vector Saturating Rounding Shift.
def int_arm_neon_vqrshifts : Neon_2Arg_Intrinsic;
def int_arm_neon_vqrshiftu : Neon_2Arg_Intrinsic;
def int_arm_neon_vqrshiftns : Neon_2Arg_Narrow_Intrinsic;
def int_arm_neon_vqrshiftnu : Neon_2Arg_Narrow_Intrinsic;
def int_arm_neon_vqrshiftnsu : Neon_2Arg_Narrow_Intrinsic;

// Vector Shift and Insert.
def int_arm_neon_vshiftins : Neon_3Arg_Intrinsic;

// Vector Absolute Value and Saturating Absolute Value.
def int_arm_neon_vabs : Neon_1Arg_Intrinsic;
def int_arm_neon_vqabs : Neon_1Arg_Intrinsic;

// Vector Saturating Negate.
def int_arm_neon_vqneg : Neon_1Arg_Intrinsic;

// Vector Count Leading Sign/Zero Bits.
def int_arm_neon_vcls : Neon_1Arg_Intrinsic;

// Vector Reciprocal Estimate.
def int_arm_neon_vrecpe : Neon_1Arg_Intrinsic;

// Vector Reciprocal Square Root Estimate.
def int_arm_neon_vrsqrte : Neon_1Arg_Intrinsic;

// Vector Conversions Between Floating-point and Integer
def int_arm_neon_vcvtau : Neon_CvtFPtoInt_1Arg_Intrinsic;
def int_arm_neon_vcvtas : Neon_CvtFPtoInt_1Arg_Intrinsic;
def int_arm_neon_vcvtnu : Neon_CvtFPtoInt_1Arg_Intrinsic;
def int_arm_neon_vcvtns : Neon_CvtFPtoInt_1Arg_Intrinsic;
def int_arm_neon_vcvtpu : Neon_CvtFPtoInt_1Arg_Intrinsic;
def int_arm_neon_vcvtps : Neon_CvtFPtoInt_1Arg_Intrinsic;
def int_arm_neon_vcvtmu : Neon_CvtFPtoInt_1Arg_Intrinsic;
def int_arm_neon_vcvtms : Neon_CvtFPtoInt_1Arg_Intrinsic;

// Vector Conversions Between Floating-point and Fixed-point.
def int_arm_neon_vcvtfp2fxs : Neon_CvtFPToFx_Intrinsic;
def int_arm_neon_vcvtfp2fxu : Neon_CvtFPToFx_Intrinsic;
def int_arm_neon_vcvtfxs2fp : Neon_CvtFxToFP_Intrinsic;
def int_arm_neon_vcvtfxu2fp : Neon_CvtFxToFP_Intrinsic;

// Vector Conversions Between Half-Precision and Single-Precision.
def int_arm_neon_vcvtfp2hf
    : Intrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_arm_neon_vcvthf2fp
    : Intrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;

// Narrowing Saturating Vector Moves.
def int_arm_neon_vqmovns : Neon_1Arg_Narrow_Intrinsic;
def int_arm_neon_vqmovnu : Neon_1Arg_Narrow_Intrinsic;
def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic;

// Vector Table Lookup.
// The first 1-4 arguments are the table.
def int_arm_neon_vtbl1 : Neon_Tbl2Arg_Intrinsic;
def int_arm_neon_vtbl2 : Neon_Tbl3Arg_Intrinsic;
def int_arm_neon_vtbl3 : Neon_Tbl4Arg_Intrinsic;
def int_arm_neon_vtbl4 : Neon_Tbl5Arg_Intrinsic;

// Vector Table Extension.
// Some elements of the destination vector may not be updated, so the original
// value of that vector is passed as the first argument.  The next 1-4
// arguments after that are the table.
def int_arm_neon_vtbx1 : Neon_Tbl3Arg_Intrinsic;
def int_arm_neon_vtbx2 : Neon_Tbl4Arg_Intrinsic;
def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic;
def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic;

// Vector and Scalar Rounding.
def int_arm_neon_vrintn : Neon_1FloatArg_Intrinsic;
def int_arm_neon_vrintx : Neon_1Arg_Intrinsic;
def int_arm_neon_vrinta : Neon_1Arg_Intrinsic;
def int_arm_neon_vrintz : Neon_1Arg_Intrinsic;
def int_arm_neon_vrintm : Neon_1Arg_Intrinsic;
def int_arm_neon_vrintp : Neon_1Arg_Intrinsic;

// De-interleaving vector loads from N-element structures.
// Source operands are the address and alignment.
def int_arm_neon_vld1 : Intrinsic<[llvm_anyvector_ty],
                                  [llvm_anyptr_ty, llvm_i32_ty],
                                  [IntrReadMem, IntrArgMemOnly]>;
def int_arm_neon_vld2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                                  [llvm_anyptr_ty, llvm_i32_ty],
                                  [IntrReadMem, IntrArgMemOnly]>;
def int_arm_neon_vld3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                                   LLVMMatchType<0>],
                                  [llvm_anyptr_ty, llvm_i32_ty],
                                  [IntrReadMem, IntrArgMemOnly]>;
def int_arm_neon_vld4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                                   LLVMMatchType<0>, LLVMMatchType<0>],
                                  [llvm_anyptr_ty, llvm_i32_ty],
                                  [IntrReadMem, IntrArgMemOnly]>;

def int_arm_neon_vld1x2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                                    [LLVMAnyPointerType<LLVMMatchType<0>>],
                                    [IntrReadMem, IntrArgMemOnly]>;
def int_arm_neon_vld1x3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                                     LLVMMatchType<0>],
                                    [LLVMAnyPointerType<LLVMMatchType<0>>],
                                    [IntrReadMem, IntrArgMemOnly]>;
def int_arm_neon_vld1x4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                                     LLVMMatchType<0>, LLVMMatchType<0>],
                                    [LLVMAnyPointerType<LLVMMatchType<0>>],
                                    [IntrReadMem, IntrArgMemOnly]>;

// Vector load N-element structure to one lane.
// Source operands are: the address, the N input vectors (since only one
// lane is assigned), the lane number, and the alignment.
def int_arm_neon_vld2lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                                      [llvm_anyptr_ty, LLVMMatchType<0>,
                                       LLVMMatchType<0>, llvm_i32_ty,
                                       llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_arm_neon_vld3lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                                       LLVMMatchType<0>],
                                      [llvm_anyptr_ty, LLVMMatchType<0>,
                                       LLVMMatchType<0>, LLVMMatchType<0>,
                                       llvm_i32_ty, llvm_i32_ty],
                                      [IntrReadMem, IntrArgMemOnly]>;
def int_arm_neon_vld4lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                                       LLVMMatchType<0>, LLVMMatchType<0>],
                                      [llvm_anyptr_ty, LLVMMatchType<0>,
                                       LLVMMatchType<0>, LLVMMatchType<0>,
                                       LLVMMatchType<0>, llvm_i32_ty,
                                       llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;

// Vector load N-element structure to all lanes.
// Source operands are the address and alignment.
def int_arm_neon_vld2dup : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                                     [llvm_anyptr_ty, llvm_i32_ty],
                                     [IntrReadMem, IntrArgMemOnly]>;
def int_arm_neon_vld3dup : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                                      LLVMMatchType<0>],
                                     [llvm_anyptr_ty, llvm_i32_ty],
                                     [IntrReadMem, IntrArgMemOnly]>;
def int_arm_neon_vld4dup : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                                      LLVMMatchType<0>, LLVMMatchType<0>],
                                     [llvm_anyptr_ty, llvm_i32_ty],
                                     [IntrReadMem, IntrArgMemOnly]>;

// Interleaving vector stores from N-element structures.
// Source operands are: the address, the N vectors, and the alignment.
def int_arm_neon_vst1 : Intrinsic<[],
                                  [llvm_anyptr_ty, llvm_anyvector_ty,
                                   llvm_i32_ty], [IntrArgMemOnly]>;
def int_arm_neon_vst2 : Intrinsic<[],
                                  [llvm_anyptr_ty, llvm_anyvector_ty,
                                   LLVMMatchType<1>, llvm_i32_ty],
                                  [IntrArgMemOnly]>;
def int_arm_neon_vst3 : Intrinsic<[],
                                  [llvm_anyptr_ty, llvm_anyvector_ty,
                                   LLVMMatchType<1>, LLVMMatchType<1>,
                                   llvm_i32_ty], [IntrArgMemOnly]>;
def int_arm_neon_vst4 : Intrinsic<[],
                                  [llvm_anyptr_ty, llvm_anyvector_ty,
                                   LLVMMatchType<1>, LLVMMatchType<1>,
                                   LLVMMatchType<1>, llvm_i32_ty],
                                  [IntrArgMemOnly]>;

def int_arm_neon_vst1x2 : Intrinsic<[],
                                    [llvm_anyptr_ty, llvm_anyvector_ty,
                                     LLVMMatchType<1>],
                                    [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
def int_arm_neon_vst1x3 : Intrinsic<[],
                                    [llvm_anyptr_ty, llvm_anyvector_ty,
                                     LLVMMatchType<1>, LLVMMatchType<1>],
                                    [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
def int_arm_neon_vst1x4 : Intrinsic<[],
                                    [llvm_anyptr_ty, llvm_anyvector_ty,
                                     LLVMMatchType<1>, LLVMMatchType<1>,
                                     LLVMMatchType<1>],
                                    [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;

// Vector store N-element structure from one lane.
// Source operands are: the address, the N vectors, the lane number, and
// the alignment.
def int_arm_neon_vst2lane : Intrinsic<[],
                                      [llvm_anyptr_ty, llvm_anyvector_ty,
                                       LLVMMatchType<1>, llvm_i32_ty,
                                       llvm_i32_ty], [IntrArgMemOnly]>;
def int_arm_neon_vst3lane : Intrinsic<[],
                                      [llvm_anyptr_ty, llvm_anyvector_ty,
                                       LLVMMatchType<1>, LLVMMatchType<1>,
                                       llvm_i32_ty, llvm_i32_ty],
                                      [IntrArgMemOnly]>;
def int_arm_neon_vst4lane : Intrinsic<[],
                                      [llvm_anyptr_ty, llvm_anyvector_ty,
                                       LLVMMatchType<1>, LLVMMatchType<1>,
                                       LLVMMatchType<1>, llvm_i32_ty,
                                       llvm_i32_ty], [IntrArgMemOnly]>;

// Vector bitwise select.
def int_arm_neon_vbsl : Intrinsic<[llvm_anyvector_ty],
                        [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
                        [IntrNoMem]>;


// Crypto instructions
class AES_1Arg_Intrinsic : Intrinsic<[llvm_v16i8_ty],
                                     [llvm_v16i8_ty], [IntrNoMem]>;
class AES_2Arg_Intrinsic : Intrinsic<[llvm_v16i8_ty],
                                     [llvm_v16i8_ty, llvm_v16i8_ty],
                                     [IntrNoMem]>;

class SHA_1Arg_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
                                     [IntrNoMem]>;
class SHA_2Arg_Intrinsic : Intrinsic<[llvm_v4i32_ty],
                                     [llvm_v4i32_ty, llvm_v4i32_ty],
                                     [IntrNoMem]>;
class SHA_3Arg_i32_Intrinsic : Intrinsic<[llvm_v4i32_ty],
                                   [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
                                   [IntrNoMem]>;
class SHA_3Arg_v4i32_Intrinsic : Intrinsic<[llvm_v4i32_ty],
                                   [llvm_v4i32_ty, llvm_v4i32_ty,llvm_v4i32_ty],
                                   [IntrNoMem]>;

def int_arm_neon_aesd : AES_2Arg_Intrinsic;
def int_arm_neon_aese : AES_2Arg_Intrinsic;
def int_arm_neon_aesimc : AES_1Arg_Intrinsic;
def int_arm_neon_aesmc : AES_1Arg_Intrinsic;
def int_arm_neon_sha1h : SHA_1Arg_Intrinsic;
def int_arm_neon_sha1su1 : SHA_2Arg_Intrinsic;
def int_arm_neon_sha256su0 : SHA_2Arg_Intrinsic;
def int_arm_neon_sha1c : SHA_3Arg_i32_Intrinsic;
def int_arm_neon_sha1m : SHA_3Arg_i32_Intrinsic;
def int_arm_neon_sha1p : SHA_3Arg_i32_Intrinsic;
def int_arm_neon_sha1su0: SHA_3Arg_v4i32_Intrinsic;
def int_arm_neon_sha256h: SHA_3Arg_v4i32_Intrinsic;
def int_arm_neon_sha256h2: SHA_3Arg_v4i32_Intrinsic;
def int_arm_neon_sha256su1: SHA_3Arg_v4i32_Intrinsic;

// Armv8.2-A dot product instructions
class Neon_Dot_Intrinsic
  : Intrinsic<[llvm_anyvector_ty],
              [LLVMMatchType<0>, llvm_anyvector_ty,
               LLVMMatchType<1>],
              [IntrNoMem]>;
def int_arm_neon_udot : Neon_Dot_Intrinsic;
def int_arm_neon_sdot : Neon_Dot_Intrinsic;

// v8.6-A Matrix Multiply Intrinsics
class Neon_MatMul_Intrinsic
  : Intrinsic<[llvm_anyvector_ty],
              [LLVMMatchType<0>, llvm_anyvector_ty,
               LLVMMatchType<1>],
              [IntrNoMem]>;
def int_arm_neon_ummla  : Neon_MatMul_Intrinsic;
def int_arm_neon_smmla  : Neon_MatMul_Intrinsic;
def int_arm_neon_usmmla : Neon_MatMul_Intrinsic;
def int_arm_neon_usdot  : Neon_Dot_Intrinsic;

// v8.6-A Bfloat Intrinsics
def int_arm_neon_vcvtfp2bf
    : Intrinsic<[llvm_anyvector_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_arm_neon_vcvtbfp2bf
    : Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem]>;

def int_arm_neon_bfdot : Neon_Dot_Intrinsic;
def int_arm_neon_bfmmla 
    : Intrinsic<[llvm_v4f32_ty], 
                [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty], 
                [IntrNoMem]>; 

class Neon_BF16FML_Intrinsic 
    : Intrinsic<[llvm_v4f32_ty], 
                [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty], 
                [IntrNoMem]>; 
def int_arm_neon_bfmlalb : Neon_BF16FML_Intrinsic; 
def int_arm_neon_bfmlalt : Neon_BF16FML_Intrinsic; 

def int_arm_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_arm_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;

def int_arm_mve_vctp8  : Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_arm_mve_vctp16 : Intrinsic<[llvm_v8i1_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_arm_mve_vctp32 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>;
// vctp64 takes v4i1, to work around v2i1 not being a legal MVE type
def int_arm_mve_vctp64 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>;

// v8.3-A Floating-point complex add
def int_arm_neon_vcadd_rot90  : Neon_2Arg_Intrinsic;
def int_arm_neon_vcadd_rot270 : Neon_2Arg_Intrinsic;

// GNU eabi mcount
def int_arm_gnu_eabi_mcount : Intrinsic<[], [], []>; 

def int_arm_mve_pred_i2v : Intrinsic<
  [llvm_anyvector_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_arm_mve_pred_v2i : Intrinsic<
  [llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem]>;
def int_arm_mve_vreinterpretq : Intrinsic<
  [llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;

def int_arm_mve_min_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
    llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_max_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
    llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_abd_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
    llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_add_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_and_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_bic_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_eor_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_orn_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_orr_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_sub_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_mul_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_mulh_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
    llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_qdmulh_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_rmulh_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
    llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_qrdmulh_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_mull_int_predicated: Intrinsic<[llvm_anyvector_ty],
   [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty /* unsigned */,
    llvm_i32_ty /* top */, llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_mull_poly_predicated: Intrinsic<[llvm_anyvector_ty],
   [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty,
    LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_qadd_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
    llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_hadd_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
    llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_rhadd_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
    llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_qsub_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
    llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_hsub_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */,
    llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_vmina_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
    [IntrNoMem]>;
def int_arm_mve_vmaxa_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
    [IntrNoMem]>;
def int_arm_mve_vminnma_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
    [IntrNoMem]>;
def int_arm_mve_vmaxnma_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
    [IntrNoMem]>;

multiclass MVEPredicated<list<LLVMType> rets, list<LLVMType> params,
                         LLVMType pred = llvm_anyvector_ty,
                         list<IntrinsicProperty> props = [IntrNoMem]> {
  def "": Intrinsic<rets, params, props>;
  def _predicated: Intrinsic<rets, params # [pred], props>;
}
multiclass MVEPredicatedM<list<LLVMType> rets, list<LLVMType> params,
                          LLVMType pred = llvm_anyvector_ty,
                          list<IntrinsicProperty> props = [IntrNoMem]> {
  def "": Intrinsic<rets, params, props>;
  def _predicated: Intrinsic<rets, params # [pred,
      !if(!eq(rets[0], llvm_anyvector_ty), 
          LLVMMatchType<0>, rets[0])], props>;
}

multiclass MVE_minmaxv {
  defm v: MVEPredicated<[llvm_i32_ty],
     [llvm_i32_ty, llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>;
  defm av: MVEPredicated<[llvm_i32_ty],
     [llvm_i32_ty, llvm_anyvector_ty]>;
  defm nmv: MVEPredicated<[llvm_anyfloat_ty],
     [LLVMMatchType<0>, llvm_anyvector_ty]>;
  defm nmav: MVEPredicated<[llvm_anyfloat_ty],
     [LLVMMatchType<0>, llvm_anyvector_ty]>;
}
defm int_arm_mve_min: MVE_minmaxv;
defm int_arm_mve_max: MVE_minmaxv;

defm int_arm_mve_addv: MVEPredicated<[llvm_i32_ty],
   [llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>;
defm int_arm_mve_addlv: MVEPredicated<[llvm_i64_ty],
   [llvm_anyvector_ty, llvm_i32_ty /* unsigned */]>;

// Intrinsic with a predicated and a non-predicated case. The predicated case
// has two additional parameters: inactive (the value for inactive lanes, can
// be undef) and predicate.
multiclass MVEMXPredicated<list<LLVMType> rets, list<LLVMType> flags,
                           list<LLVMType> params, LLVMType inactive,
                           LLVMType predicate,
                           list<IntrinsicProperty> props = [IntrNoMem]> {
  def "":          Intrinsic<rets, flags # params, props>;
  def _predicated: Intrinsic<rets, flags # [inactive] # params # [predicate],
                             props>;
}

defm int_arm_mve_vcvt_narrow: MVEPredicated<[llvm_v8f16_ty],
   [llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], llvm_v4i1_ty>;
defm int_arm_mve_vcvt_widen: MVEMXPredicated<[llvm_v4f32_ty], [],
   [llvm_v8f16_ty, llvm_i32_ty], llvm_v4f32_ty, llvm_v4i1_ty>;

defm int_arm_mve_vldr_gather_base: MVEPredicated<
   [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty],
   llvm_anyvector_ty, [IntrReadMem]>;
defm int_arm_mve_vldr_gather_base_wb: MVEPredicated<
   [llvm_anyvector_ty, llvm_anyvector_ty],
   [LLVMMatchType<1>, llvm_i32_ty], llvm_anyvector_ty, [IntrReadMem]>;
defm int_arm_mve_vstr_scatter_base: MVEPredicated<
   [], [llvm_anyvector_ty, llvm_i32_ty, llvm_anyvector_ty],
   llvm_anyvector_ty, [IntrWriteMem]>;
defm int_arm_mve_vstr_scatter_base_wb: MVEPredicated<
   [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty],
   llvm_anyvector_ty, [IntrWriteMem]>;

// gather_offset takes three i32 parameters. The first is the size of
// memory element loaded, in bits. The second is a left bit shift to
// apply to each offset in the vector parameter (must be either 0, or
// correspond to the element size of the destination vector type). The
// last is 1 to indicate zero extension (if the load is widening), or
// 0 for sign extension.
//
// scatter_offset has the first two of those parameters, but since it
// narrows rather than widening, it doesn't have the last one.
defm int_arm_mve_vldr_gather_offset: MVEPredicated<
   [llvm_anyvector_ty], [llvm_anyptr_ty, llvm_anyvector_ty,
   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrReadMem]>;
defm int_arm_mve_vstr_scatter_offset: MVEPredicated<
   [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_anyvector_ty,
   llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrWriteMem]>;

def int_arm_mve_shl_imm_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;
def int_arm_mve_shr_imm_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, // extra i32 is unsigned flag
    llvm_anyvector_ty, LLVMMatchType<0>],
   [IntrNoMem]>;

defm int_arm_mve_vqshl_imm: MVEPredicatedM<[llvm_anyvector_ty],
   [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>;
defm int_arm_mve_vrshr_imm: MVEPredicatedM<[llvm_anyvector_ty],
   [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>;
defm int_arm_mve_vqshlu_imm: MVEPredicatedM<[llvm_anyvector_ty],
   [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/]>;
defm int_arm_mve_vshll_imm: MVEPredicatedM<[llvm_anyvector_ty],
   [llvm_anyvector_ty, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/,
                       llvm_i32_ty /*top-half*/]>;

defm int_arm_mve_vsli: MVEPredicated<
   [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;
defm int_arm_mve_vsri: MVEPredicated<
   [llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty]>;

defm int_arm_mve_vshrn: MVEPredicated<
   [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty,
    llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/,
    llvm_i32_ty /*unsigned-out*/, llvm_i32_ty /*unsigned-in*/,
    llvm_i32_ty /*top-half*/]>;

defm int_arm_mve_vshl_scalar: MVEPredicated<
   [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/,
    llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, llvm_i32_ty /*unsigned*/]>;
defm int_arm_mve_vshl_vector: MVEPredicatedM<
   [llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty /*shiftcounts*/,
    llvm_i32_ty /*saturate*/, llvm_i32_ty /*round*/, llvm_i32_ty /*unsigned*/]>;

// MVE scalar shifts.
class ARM_MVE_qrshift_single<list<LLVMType> value,
                             list<LLVMType> saturate = []> :
  Intrinsic<value, value # [llvm_i32_ty] # saturate, [IntrNoMem]>;
multiclass ARM_MVE_qrshift<list<LLVMType> saturate = []> {
  // Most of these shifts come in 32- and 64-bit versions. But only
  // the 64-bit ones have the extra saturation argument (if any).
  def "": ARM_MVE_qrshift_single<[llvm_i32_ty]>;
  def l:  ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty], saturate>;
}
defm int_arm_mve_urshr: ARM_MVE_qrshift;
defm int_arm_mve_uqshl: ARM_MVE_qrshift;
defm int_arm_mve_srshr: ARM_MVE_qrshift;
defm int_arm_mve_sqshl: ARM_MVE_qrshift;
defm int_arm_mve_uqrshl: ARM_MVE_qrshift<[llvm_i32_ty]>;
defm int_arm_mve_sqrshr: ARM_MVE_qrshift<[llvm_i32_ty]>;
// LSLL and ASRL only have 64-bit versions, not 32.
def int_arm_mve_lsll: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>;
def int_arm_mve_asrl: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>;

def int_arm_mve_vabd: Intrinsic<
   [llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
   [IntrNoMem]>;
def int_arm_mve_vadc: Intrinsic<
   [llvm_anyvector_ty, llvm_i32_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
def int_arm_mve_vsbc: Intrinsic<
   [llvm_anyvector_ty, llvm_i32_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
def int_arm_mve_vadc_predicated: Intrinsic<
   [llvm_anyvector_ty, llvm_i32_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
    llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
def int_arm_mve_vsbc_predicated: Intrinsic<
   [llvm_anyvector_ty, llvm_i32_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
    llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>;
def int_arm_mve_vshlc: Intrinsic<
   [llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty],
   [LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */,
    llvm_i32_ty /* shift count */], [IntrNoMem]>;
def int_arm_mve_vshlc_predicated: Intrinsic<
   [llvm_i32_ty /* bits shifted out */, llvm_anyvector_ty],
   [LLVMMatchType<0>, llvm_i32_ty /* bits shifted in */,
    llvm_i32_ty /* shift count */, llvm_anyvector_ty], [IntrNoMem]>;
def int_arm_mve_vmulh: Intrinsic<
   [llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
   [IntrNoMem]>;
def int_arm_mve_vqdmulh: Intrinsic<
   [llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_vhadd: Intrinsic<
   [llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
   [IntrNoMem]>;
def int_arm_mve_vrhadd: Intrinsic<
   [llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
   [IntrNoMem]>;
def int_arm_mve_vhsub: Intrinsic<
   [llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
   [IntrNoMem]>;
def int_arm_mve_vrmulh: Intrinsic<
   [llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty /* unsigned */],
   [IntrNoMem]>;
def int_arm_mve_vqrdmulh: Intrinsic<
   [llvm_anyvector_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_vmull: Intrinsic<
   [llvm_anyvector_ty],
   [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty /* unsigned */,
    llvm_i32_ty /* top */], [IntrNoMem]>;
def int_arm_mve_vmull_poly: Intrinsic<
   [llvm_anyvector_ty],
   [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrNoMem]>;

// The first two parameters are compile-time constants:
// * Halving: 0 means  halving (vhcaddq), 1 means non-halving (vcaddq) 
//            instruction. Note: the flag is inverted to match the corresponding
//            bit in the instruction encoding
// * Rotation angle: 0 mean 90 deg, 1 means 180 deg
defm int_arm_mve_vcaddq : MVEMXPredicated<
  [llvm_anyvector_ty],
  [llvm_i32_ty, llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
   LLVMMatchType<0>, llvm_anyvector_ty>;

// The first operand of the following two intrinsics is the rotation angle
// (must be a compile-time constant):
// 0 - 0 deg
// 1 - 90 deg
// 2 - 180 deg
// 3 - 270 deg
defm int_arm_mve_vcmulq : MVEMXPredicated<
  [llvm_anyvector_ty],
  [llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
   LLVMMatchType<0>, llvm_anyvector_ty>;

defm int_arm_mve_vcmlaq : MVEPredicated<
  [llvm_anyvector_ty],
  [llvm_i32_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
   llvm_anyvector_ty>;

def int_arm_mve_vld2q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_arm_mve_vld4q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>;

def int_arm_mve_vst2q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrWriteMem, IntrArgMemOnly]>;
def int_arm_mve_vst4q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty], [IntrWriteMem, IntrArgMemOnly]>;

// MVE vector absolute difference and accumulate across vector
// The first operand is an 'unsigned' flag. The remaining operands are:
// * accumulator
// * first vector operand
// * second vector operand
// * mask (only in predicated versions)
defm int_arm_mve_vabav: MVEPredicated<
  [llvm_i32_ty],
  [llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], llvm_anyvector_ty>;

// The following 3 instrinsics are MVE vector reductions with two vector
// operands.
// The first 3 operands are boolean flags (must be compile-time constants):
// * unsigned - the instruction operates on vectors of unsigned values and
//              unsigned scalars
// * subtract - the instruction performs subtraction after multiplication of
//              lane pairs (e.g., vmlsdav vs vmladav)
// * exchange - the instruction exchanges successive even and odd lanes of
//              the first operands before multiplication of lane pairs
//              (e.g., vmladavx vs vmladav)
// The remaining operands are:
// * accumulator
// * first vector operand
// * second vector operand
// * mask (only in predicated versions)

// Version with 32-bit result, vml{a,s}dav[a][x]
defm int_arm_mve_vmldava: MVEPredicated<
  [llvm_i32_ty],
  [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
   llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
  llvm_anyvector_ty>;

// Version with 64-bit result, vml{a,s}ldav[a][x]
defm int_arm_mve_vmlldava: MVEPredicated<
  [llvm_i32_ty, llvm_i32_ty],
  [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
   llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
  llvm_anyvector_ty>;

// Version with 72-bit rounded result, vrml{a,s}ldavh[a][x]
defm int_arm_mve_vrmlldavha: MVEPredicated<
  [llvm_i32_ty, llvm_i32_ty],
  [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
   llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
  llvm_anyvector_ty>;

defm int_arm_mve_vidup: MVEMXPredicated<
   [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
   [llvm_i32_ty /* base */, llvm_i32_ty /* step */],
   LLVMMatchType<0>, llvm_anyvector_ty>;
defm int_arm_mve_vddup: MVEMXPredicated<
   [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
   [llvm_i32_ty /* base */, llvm_i32_ty /* step */],
   LLVMMatchType<0>, llvm_anyvector_ty>;
defm int_arm_mve_viwdup: MVEMXPredicated<
   [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
   [llvm_i32_ty /* base */, llvm_i32_ty /* limit */, llvm_i32_ty /* step */],
   LLVMMatchType<0>, llvm_anyvector_ty>;
defm int_arm_mve_vdwdup: MVEMXPredicated<
   [llvm_anyvector_ty /* output */, llvm_i32_ty /* written-back base */], [],
   [llvm_i32_ty /* base */, llvm_i32_ty /* limit */, llvm_i32_ty /* step */],
   LLVMMatchType<0>, llvm_anyvector_ty>;

// Flags:
// * unsigned
defm int_arm_mve_vcvt_fix: MVEMXPredicated<
  [llvm_anyvector_ty /* output */], [llvm_i32_ty],
  [llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */],
  LLVMMatchType<0>, llvm_anyvector_ty>;

def int_arm_mve_vcvt_fp_int_predicated: Intrinsic<
  [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */,
   llvm_anyvector_ty /* predicate */, LLVMMatchType<0> /* inactive */],
  [IntrNoMem]>;

foreach suffix = ["a","n","p","m"] in {
  defm "int_arm_mve_vcvt"#suffix: MVEMXPredicated<
    [llvm_anyvector_ty /* output */], [llvm_i32_ty /* unsigned */],
    [llvm_anyvector_ty /* input */], LLVMMatchType<0>, llvm_anyvector_ty>;
}

def int_arm_mve_vrintn: Intrinsic<
  [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_vcls: Intrinsic<
  [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;

defm int_arm_mve_vbrsr: MVEMXPredicated<
  [llvm_anyvector_ty], [],
  [LLVMMatchType<0>, llvm_i32_ty], LLVMMatchType<0>, llvm_anyvector_ty>;

def int_arm_mve_vqdmull: Intrinsic<
  [llvm_anyvector_ty],
  [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty],
  [IntrNoMem]>;
def int_arm_mve_vqdmull_predicated: Intrinsic<
  [llvm_anyvector_ty],
  [llvm_anyvector_ty, LLVMMatchType<1>, llvm_i32_ty, llvm_anyvector_ty,
   LLVMMatchType<0>],
  [IntrNoMem]>;

class MVESimpleUnaryPredicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;

def int_arm_mve_mvn_predicated: MVESimpleUnaryPredicated;
def int_arm_mve_abs_predicated: MVESimpleUnaryPredicated;
def int_arm_mve_neg_predicated: MVESimpleUnaryPredicated;
def int_arm_mve_qabs_predicated: MVESimpleUnaryPredicated;
def int_arm_mve_qneg_predicated: MVESimpleUnaryPredicated;
def int_arm_mve_clz_predicated: MVESimpleUnaryPredicated;
def int_arm_mve_cls_predicated: MVESimpleUnaryPredicated;
def int_arm_mve_vrintz_predicated: MVESimpleUnaryPredicated;
def int_arm_mve_vrintm_predicated: MVESimpleUnaryPredicated;
def int_arm_mve_vrintp_predicated: MVESimpleUnaryPredicated;
def int_arm_mve_vrinta_predicated: MVESimpleUnaryPredicated;
def int_arm_mve_vrintx_predicated: MVESimpleUnaryPredicated;
def int_arm_mve_vrintn_predicated: MVESimpleUnaryPredicated;

def int_arm_mve_vrev_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, llvm_i32_ty /* size to reverse */,
    llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;

def int_arm_mve_vmovl_predicated: Intrinsic<[llvm_anyvector_ty],
   [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, llvm_i32_ty /* top half */,
    llvm_anyvector_ty /* predicate */, LLVMMatchType<0>], [IntrNoMem]>;
def int_arm_mve_vmovn_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i32_ty /* top half */,
    llvm_anyvector_ty /* predicate */], [IntrNoMem]>;

def int_arm_mve_vqmovn: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, llvm_anyvector_ty,
    llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */,
    llvm_i32_ty /* top half */], [IntrNoMem]>;
def int_arm_mve_vqmovn_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0>, llvm_anyvector_ty,
    llvm_i32_ty /* unsigned output */, llvm_i32_ty /* unsigned input */,
    llvm_i32_ty /* top half */, llvm_anyvector_ty /* pred */], [IntrNoMem]>;

def int_arm_mve_fma_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
    LLVMMatchType<0> /* addend */, llvm_anyvector_ty /* pred */], [IntrNoMem]>;
def int_arm_mve_vmla_n_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */,
    llvm_i32_ty /* mult op #2 (scalar) */, llvm_anyvector_ty /* pred */],
   [IntrNoMem]>;
def int_arm_mve_vmlas_n_predicated: Intrinsic<[llvm_anyvector_ty],
   [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
    llvm_i32_ty /* addend (scalar) */, llvm_anyvector_ty /* pred */],
   [IntrNoMem]>;

defm int_arm_mve_vqdmlah: MVEPredicated<[llvm_anyvector_ty],
  [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */,
   llvm_i32_ty /* mult op #2 (scalar) */]>;
defm int_arm_mve_vqrdmlah: MVEPredicated<[llvm_anyvector_ty],
  [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* addend */,
   llvm_i32_ty /* mult op #2 (scalar) */]>;
defm int_arm_mve_vqdmlash: MVEPredicated<[llvm_anyvector_ty],
  [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
   llvm_i32_ty /* addend (scalar) */]>;
defm int_arm_mve_vqrdmlash: MVEPredicated<[llvm_anyvector_ty],
  [LLVMMatchType<0> /* mult op #1 */, LLVMMatchType<0> /* mult op #2 */,
   llvm_i32_ty /* addend (scalar) */]>;

defm int_arm_mve_vqdmlad: MVEPredicated<[llvm_anyvector_ty],
  [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
   llvm_i32_ty /* exchange */, llvm_i32_ty /* round */,
   llvm_i32_ty /* subtract */]>;

// CDE (Custom Datapath Extension)

multiclass CDEGPRIntrinsics<list<LLVMType> args> {
  def "" : Intrinsic<
    [llvm_i32_ty],
    !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
  def a : Intrinsic<
    [llvm_i32_ty],
    !listconcat([llvm_i32_ty /* coproc */, llvm_i32_ty /* acc */], args,
                [llvm_i32_ty /* imm */]),
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;

  def d: Intrinsic<
    [llvm_i32_ty /* lo */, llvm_i32_ty /* hi */],
    !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
  def da: Intrinsic<
    [llvm_i32_ty /* lo */, llvm_i32_ty /* hi */],
    !listconcat([llvm_i32_ty /* coproc */, llvm_i32_ty /* acc_lo */,
                 llvm_i32_ty /* acc_hi */], args, [llvm_i32_ty /* imm */]),
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 3)>>]>;
}

defm int_arm_cde_cx1: CDEGPRIntrinsics<[]>;
defm int_arm_cde_cx2: CDEGPRIntrinsics<[llvm_i32_ty]>;
defm int_arm_cde_cx3: CDEGPRIntrinsics<[llvm_i32_ty, llvm_i32_ty]>;

multiclass CDEVCXIntrinsics<list<LLVMType> args> {
  def "" : Intrinsic<
    [llvm_anyfloat_ty],
    !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
  def a : Intrinsic<
    [llvm_anyfloat_ty],
    !listconcat([llvm_i32_ty /* coproc */,  LLVMMatchType<0> /* acc */],
                args, [llvm_i32_ty /* imm */]),
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
}

defm int_arm_cde_vcx1 : CDEVCXIntrinsics<[]>;
defm int_arm_cde_vcx2 : CDEVCXIntrinsics<[LLVMMatchType<0>]>;
defm int_arm_cde_vcx3 : CDEVCXIntrinsics<[LLVMMatchType<0>, LLVMMatchType<0>]>;

multiclass CDEVCXVecIntrinsics<list<LLVMType> args> {
  def "" : Intrinsic<
    [llvm_v16i8_ty],
    !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 1)>>]>;
  def a : Intrinsic<
    [llvm_v16i8_ty],
    !listconcat([llvm_i32_ty /* coproc */, llvm_v16i8_ty /* acc */],
                args, [llvm_i32_ty /* imm */]),
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;

  def _predicated : Intrinsic<
    [llvm_anyvector_ty],
    !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* inactive */],
                args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]),
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
  def a_predicated : Intrinsic<
    [llvm_anyvector_ty],
    !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */],
                args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]),
    [IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<!add(!size(args), 2)>>]>;
}

defm int_arm_cde_vcx1q : CDEVCXVecIntrinsics<[]>;
defm int_arm_cde_vcx2q : CDEVCXVecIntrinsics<[llvm_v16i8_ty]>;
defm int_arm_cde_vcx3q : CDEVCXVecIntrinsics<[llvm_v16i8_ty, llvm_v16i8_ty]>;

} // end TargetPrefix