aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/lib/Target/PowerPC
diff options
context:
space:
mode:
authorheretic <heretic@yandex-team.ru>2022-02-10 16:45:46 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:46 +0300
commit81eddc8c0b55990194e112b02d127b87d54164a9 (patch)
tree9142afc54d335ea52910662635b898e79e192e49 /contrib/libs/llvm12/lib/Target/PowerPC
parent397cbe258b9e064f49c4ca575279f02f39fef76e (diff)
downloadydb-81eddc8c0b55990194e112b02d127b87d54164a9.tar.gz
Restoring authorship annotation for <heretic@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/PowerPC')
-rw-r--r--contrib/libs/llvm12/lib/Target/PowerPC/.yandex_meta/licenses.list.txt32
-rw-r--r--contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/.yandex_meta/licenses.list.txt14
-rw-r--r--contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make18
-rw-r--r--contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/.yandex_meta/licenses.list.txt14
-rw-r--r--contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make18
-rw-r--r--contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/.yandex_meta/licenses.list.txt606
-rw-r--r--contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make24
-rw-r--r--contrib/libs/llvm12/lib/Target/PowerPC/README.txt1204
-rw-r--r--contrib/libs/llvm12/lib/Target/PowerPC/README_ALTIVEC.txt676
-rw-r--r--contrib/libs/llvm12/lib/Target/PowerPC/README_P9.txt1210
-rw-r--r--contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/.yandex_meta/licenses.list.txt14
-rw-r--r--contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make18
-rw-r--r--contrib/libs/llvm12/lib/Target/PowerPC/ya.make16
13 files changed, 1932 insertions, 1932 deletions
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/PowerPC/.yandex_meta/licenses.list.txt
index 3a4cf0af9f..2f43d3f272 100644
--- a/contrib/libs/llvm12/lib/Target/PowerPC/.yandex_meta/licenses.list.txt
+++ b/contrib/libs/llvm12/lib/Target/PowerPC/.yandex_meta/licenses.list.txt
@@ -1,16 +1,16 @@
-====================Apache-2.0 WITH LLVM-exception====================
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https)//llvm.org/LICENSE.txt for license information.
-
-
-====================Apache-2.0 WITH LLVM-exception====================
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-
-
-====================Apache-2.0 WITH LLVM-exception====================
-// SPDX-License-Identifier) Apache-2.0 WITH LLVM-exception
-
-
-====================Apache-2.0 WITH LLVM-exception====================
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+====================Apache-2.0 WITH LLVM-exception====================
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https)//llvm.org/LICENSE.txt for license information.
+
+
+====================Apache-2.0 WITH LLVM-exception====================
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+
+
+====================Apache-2.0 WITH LLVM-exception====================
+// SPDX-License-Identifier) Apache-2.0 WITH LLVM-exception
+
+
+====================Apache-2.0 WITH LLVM-exception====================
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/.yandex_meta/licenses.list.txt
index a4433625d4..c62d353021 100644
--- a/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/.yandex_meta/licenses.list.txt
+++ b/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/.yandex_meta/licenses.list.txt
@@ -1,7 +1,7 @@
-====================Apache-2.0 WITH LLVM-exception====================
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-
-
-====================Apache-2.0 WITH LLVM-exception====================
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+====================Apache-2.0 WITH LLVM-exception====================
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+
+
+====================Apache-2.0 WITH LLVM-exception====================
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make
index 2388d58641..24183440dc 100644
--- a/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make
+++ b/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make
@@ -2,15 +2,15 @@
LIBRARY()
-OWNER(
- orivej
- g:cpp-contrib
-)
-
-LICENSE(Apache-2.0 WITH LLVM-exception)
-
-LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-
+OWNER(
+ orivej
+ g:cpp-contrib
+)
+
+LICENSE(Apache-2.0 WITH LLVM-exception)
+
+LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
+
PEERDIR(
contrib/libs/llvm12
contrib/libs/llvm12/include
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/.yandex_meta/licenses.list.txt
index a4433625d4..c62d353021 100644
--- a/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/.yandex_meta/licenses.list.txt
+++ b/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/.yandex_meta/licenses.list.txt
@@ -1,7 +1,7 @@
-====================Apache-2.0 WITH LLVM-exception====================
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-
-
-====================Apache-2.0 WITH LLVM-exception====================
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+====================Apache-2.0 WITH LLVM-exception====================
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+
+
+====================Apache-2.0 WITH LLVM-exception====================
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make
index c43266cf40..a412740df2 100644
--- a/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make
+++ b/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make
@@ -2,15 +2,15 @@
LIBRARY()
-OWNER(
- orivej
- g:cpp-contrib
-)
-
-LICENSE(Apache-2.0 WITH LLVM-exception)
-
-LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-
+OWNER(
+ orivej
+ g:cpp-contrib
+)
+
+LICENSE(Apache-2.0 WITH LLVM-exception)
+
+LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
+
PEERDIR(
contrib/libs/llvm12
contrib/libs/llvm12/include
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/.yandex_meta/licenses.list.txt
index b0b34714ca..ad3879fc45 100644
--- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/.yandex_meta/licenses.list.txt
+++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/.yandex_meta/licenses.list.txt
@@ -1,303 +1,303 @@
-====================Apache-2.0 WITH LLVM-exception====================
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-
-
-====================Apache-2.0 WITH LLVM-exception====================
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-
-====================File: LICENSE.TXT====================
-==============================================================================
-The LLVM Project is under the Apache License v2.0 with LLVM Exceptions:
-==============================================================================
-
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright [yyyy] [name of copyright owner]
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-
-
----- LLVM Exceptions to the Apache 2.0 License ----
-
-As an exception, if, as a result of your compiling your source code, portions
-of this Software are embedded into an Object form of such source code, you
-may redistribute such embedded portions in such Object form without complying
-with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
-
-In addition, if you combine or link compiled forms of this Software with
-software that is licensed under the GPLv2 ("Combined Software") and if a
-court of competent jurisdiction determines that the patent provision (Section
-3), the indemnity provision (Section 9) or other Section of the License
-conflicts with the conditions of the GPLv2, you may retroactively and
-prospectively choose to deem waived or otherwise exclude such Section(s) of
-the License, but only in their entirety and only with respect to the Combined
-Software.
-
-==============================================================================
-Software from third parties included in the LLVM Project:
-==============================================================================
-The LLVM Project contains third party software which is under different license
-terms. All such code will be identified clearly using at least one of two
-mechanisms:
-1) It will be in a separate directory tree with its own `LICENSE.txt` or
- `LICENSE` file at the top containing the specific license and restrictions
- which apply to that software, or
-2) It will contain specific license and restriction terms at the top of every
- file.
-
-==============================================================================
-Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy):
-==============================================================================
-University of Illinois/NCSA
-Open Source License
-
-Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign.
-All rights reserved.
-
-Developed by:
-
- LLVM Team
-
- University of Illinois at Urbana-Champaign
-
- http://llvm.org
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal with
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimers.
-
- * Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimers in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the names of the LLVM Team, University of Illinois at
- Urbana-Champaign, nor the names of its contributors may be used to
- endorse or promote products derived from this Software without specific
- prior written permission.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
-SOFTWARE.
-
-
-
-====================File: include/llvm/Support/LICENSE.TXT====================
-LLVM System Interface Library
--------------------------------------------------------------------------------
-The LLVM System Interface Library is licensed under the Illinois Open Source
-License and has the following additional copyright:
-
-Copyright (C) 2004 eXtensible Systems, Inc.
-
-
-====================NCSA====================
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+====================Apache-2.0 WITH LLVM-exception====================
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+
+
+====================Apache-2.0 WITH LLVM-exception====================
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+
+====================File: LICENSE.TXT====================
+==============================================================================
+The LLVM Project is under the Apache License v2.0 with LLVM Exceptions:
+==============================================================================
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+
+---- LLVM Exceptions to the Apache 2.0 License ----
+
+As an exception, if, as a result of your compiling your source code, portions
+of this Software are embedded into an Object form of such source code, you
+may redistribute such embedded portions in such Object form without complying
+with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
+
+In addition, if you combine or link compiled forms of this Software with
+software that is licensed under the GPLv2 ("Combined Software") and if a
+court of competent jurisdiction determines that the patent provision (Section
+3), the indemnity provision (Section 9) or other Section of the License
+conflicts with the conditions of the GPLv2, you may retroactively and
+prospectively choose to deem waived or otherwise exclude such Section(s) of
+the License, but only in their entirety and only with respect to the Combined
+Software.
+
+==============================================================================
+Software from third parties included in the LLVM Project:
+==============================================================================
+The LLVM Project contains third party software which is under different license
+terms. All such code will be identified clearly using at least one of two
+mechanisms:
+1) It will be in a separate directory tree with its own `LICENSE.txt` or
+ `LICENSE` file at the top containing the specific license and restrictions
+ which apply to that software, or
+2) It will contain specific license and restriction terms at the top of every
+ file.
+
+==============================================================================
+Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy):
+==============================================================================
+University of Illinois/NCSA
+Open Source License
+
+Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign.
+All rights reserved.
+
+Developed by:
+
+ LLVM Team
+
+ University of Illinois at Urbana-Champaign
+
+ http://llvm.org
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimers.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimers in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the names of the LLVM Team, University of Illinois at
+ Urbana-Champaign, nor the names of its contributors may be used to
+ endorse or promote products derived from this Software without specific
+ prior written permission.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+SOFTWARE.
+
+
+
+====================File: include/llvm/Support/LICENSE.TXT====================
+LLVM System Interface Library
+-------------------------------------------------------------------------------
+The LLVM System Interface Library is licensed under the Illinois Open Source
+License and has the following additional copyright:
+
+Copyright (C) 2004 eXtensible Systems, Inc.
+
+
+====================NCSA====================
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make
index 0e037d61de..903dc6ec7f 100644
--- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make
+++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make
@@ -2,18 +2,18 @@
LIBRARY()
-OWNER(
- orivej
- g:cpp-contrib
-)
-
-LICENSE(
- Apache-2.0 WITH LLVM-exception AND
- NCSA
-)
-
-LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-
+OWNER(
+ orivej
+ g:cpp-contrib
+)
+
+LICENSE(
+ Apache-2.0 WITH LLVM-exception AND
+ NCSA
+)
+
+LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
+
PEERDIR(
contrib/libs/llvm12
contrib/libs/llvm12/include
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/README.txt b/contrib/libs/llvm12/lib/Target/PowerPC/README.txt
index 0902298a4f..492eb22af2 100644
--- a/contrib/libs/llvm12/lib/Target/PowerPC/README.txt
+++ b/contrib/libs/llvm12/lib/Target/PowerPC/README.txt
@@ -1,607 +1,607 @@
-//===- README.txt - Notes for improving PowerPC-specific code gen ---------===//
-
-TODO:
-* lmw/stmw pass a la arm load store optimizer for prolog/epilog
-
-===-------------------------------------------------------------------------===
-
-This code:
-
-unsigned add32carry(unsigned sum, unsigned x) {
- unsigned z = sum + x;
- if (sum + x < x)
- z++;
- return z;
-}
-
-Should compile to something like:
-
- addc r3,r3,r4
- addze r3,r3
-
-instead we get:
-
- add r3, r4, r3
- cmplw cr7, r3, r4
- mfcr r4 ; 1
- rlwinm r4, r4, 29, 31, 31
- add r3, r3, r4
-
-Ick.
-
-===-------------------------------------------------------------------------===
-
-We compile the hottest inner loop of viterbi to:
-
- li r6, 0
- b LBB1_84 ;bb432.i
-LBB1_83: ;bb420.i
- lbzx r8, r5, r7
- addi r6, r7, 1
- stbx r8, r4, r7
-LBB1_84: ;bb432.i
- mr r7, r6
- cmplwi cr0, r7, 143
- bne cr0, LBB1_83 ;bb420.i
-
-The CBE manages to produce:
-
- li r0, 143
- mtctr r0
-loop:
- lbzx r2, r2, r11
- stbx r0, r2, r9
- addi r2, r2, 1
- bdz later
- b loop
-
-This could be much better (bdnz instead of bdz) but it still beats us. If we
-produced this with bdnz, the loop would be a single dispatch group.
-
-===-------------------------------------------------------------------------===
-
-Lump the constant pool for each function into ONE pic object, and reference
-pieces of it as offsets from the start. For functions like this (contrived
-to have lots of constants obviously):
-
-double X(double Y) { return (Y*1.23 + 4.512)*2.34 + 14.38; }
-
-We generate:
-
-_X:
- lis r2, ha16(.CPI_X_0)
- lfd f0, lo16(.CPI_X_0)(r2)
- lis r2, ha16(.CPI_X_1)
- lfd f2, lo16(.CPI_X_1)(r2)
- fmadd f0, f1, f0, f2
- lis r2, ha16(.CPI_X_2)
- lfd f1, lo16(.CPI_X_2)(r2)
- lis r2, ha16(.CPI_X_3)
- lfd f2, lo16(.CPI_X_3)(r2)
- fmadd f1, f0, f1, f2
+//===- README.txt - Notes for improving PowerPC-specific code gen ---------===//
+
+TODO:
+* lmw/stmw pass a la arm load store optimizer for prolog/epilog
+
+===-------------------------------------------------------------------------===
+
+This code:
+
+unsigned add32carry(unsigned sum, unsigned x) {
+ unsigned z = sum + x;
+ if (sum + x < x)
+ z++;
+ return z;
+}
+
+Should compile to something like:
+
+ addc r3,r3,r4
+ addze r3,r3
+
+instead we get:
+
+ add r3, r4, r3
+ cmplw cr7, r3, r4
+ mfcr r4 ; 1
+ rlwinm r4, r4, 29, 31, 31
+ add r3, r3, r4
+
+Ick.
+
+===-------------------------------------------------------------------------===
+
+We compile the hottest inner loop of viterbi to:
+
+ li r6, 0
+ b LBB1_84 ;bb432.i
+LBB1_83: ;bb420.i
+ lbzx r8, r5, r7
+ addi r6, r7, 1
+ stbx r8, r4, r7
+LBB1_84: ;bb432.i
+ mr r7, r6
+ cmplwi cr0, r7, 143
+ bne cr0, LBB1_83 ;bb420.i
+
+The CBE manages to produce:
+
+ li r0, 143
+ mtctr r0
+loop:
+ lbzx r2, r2, r11
+ stbx r0, r2, r9
+ addi r2, r2, 1
+ bdz later
+ b loop
+
+This could be much better (bdnz instead of bdz) but it still beats us. If we
+produced this with bdnz, the loop would be a single dispatch group.
+
+===-------------------------------------------------------------------------===
+
+Lump the constant pool for each function into ONE pic object, and reference
+pieces of it as offsets from the start. For functions like this (contrived
+to have lots of constants obviously):
+
+double X(double Y) { return (Y*1.23 + 4.512)*2.34 + 14.38; }
+
+We generate:
+
+_X:
+ lis r2, ha16(.CPI_X_0)
+ lfd f0, lo16(.CPI_X_0)(r2)
+ lis r2, ha16(.CPI_X_1)
+ lfd f2, lo16(.CPI_X_1)(r2)
+ fmadd f0, f1, f0, f2
+ lis r2, ha16(.CPI_X_2)
+ lfd f1, lo16(.CPI_X_2)(r2)
+ lis r2, ha16(.CPI_X_3)
+ lfd f2, lo16(.CPI_X_3)(r2)
+ fmadd f1, f0, f1, f2
+ blr
+
+It would be better to materialize .CPI_X into a register, then use immediates
+off of the register to avoid the lis's. This is even more important in PIC
+mode.
+
+Note that this (and the static variable version) is discussed here for GCC:
+http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
+
+Here's another example (the sgn function):
+double testf(double a) {
+ return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0);
+}
+
+it produces a BB like this:
+LBB1_1: ; cond_true
+ lis r2, ha16(LCPI1_0)
+ lfs f0, lo16(LCPI1_0)(r2)
+ lis r2, ha16(LCPI1_1)
+ lis r3, ha16(LCPI1_2)
+ lfs f2, lo16(LCPI1_2)(r3)
+ lfs f3, lo16(LCPI1_1)(r2)
+ fsub f0, f0, f1
+ fsel f1, f0, f2, f3
blr
-
-It would be better to materialize .CPI_X into a register, then use immediates
-off of the register to avoid the lis's. This is even more important in PIC
-mode.
-
-Note that this (and the static variable version) is discussed here for GCC:
-http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
-
-Here's another example (the sgn function):
-double testf(double a) {
- return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0);
-}
-
-it produces a BB like this:
-LBB1_1: ; cond_true
- lis r2, ha16(LCPI1_0)
- lfs f0, lo16(LCPI1_0)(r2)
- lis r2, ha16(LCPI1_1)
- lis r3, ha16(LCPI1_2)
- lfs f2, lo16(LCPI1_2)(r3)
- lfs f3, lo16(LCPI1_1)(r2)
- fsub f0, f0, f1
- fsel f1, f0, f2, f3
- blr
-
-===-------------------------------------------------------------------------===
-
-PIC Code Gen IPO optimization:
-
-Squish small scalar globals together into a single global struct, allowing the
-address of the struct to be CSE'd, avoiding PIC accesses (also reduces the size
-of the GOT on targets with one).
-
-Note that this is discussed here for GCC:
-http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
-
-===-------------------------------------------------------------------------===
-
-Fold add and sub with constant into non-extern, non-weak addresses so this:
-
-static int a;
-void bar(int b) { a = b; }
-void foo(unsigned char *c) {
- *c = a;
-}
-
-So that
-
-_foo:
- lis r2, ha16(_a)
- la r2, lo16(_a)(r2)
- lbz r2, 3(r2)
- stb r2, 0(r3)
- blr
-
-Becomes
-
-_foo:
- lis r2, ha16(_a+3)
- lbz r2, lo16(_a+3)(r2)
- stb r2, 0(r3)
- blr
-
-===-------------------------------------------------------------------------===
-
-We should compile these two functions to the same thing:
-
-#include <stdlib.h>
-void f(int a, int b, int *P) {
- *P = (a-b)>=0?(a-b):(b-a);
-}
-void g(int a, int b, int *P) {
- *P = abs(a-b);
-}
-
-Further, they should compile to something better than:
-
-_g:
- subf r2, r4, r3
- subfic r3, r2, 0
- cmpwi cr0, r2, -1
- bgt cr0, LBB2_2 ; entry
-LBB2_1: ; entry
- mr r2, r3
-LBB2_2: ; entry
- stw r2, 0(r5)
- blr
-
-GCC produces:
-
-_g:
- subf r4,r4,r3
- srawi r2,r4,31
- xor r0,r2,r4
- subf r0,r2,r0
- stw r0,0(r5)
- blr
-
-... which is much nicer.
-
-This theoretically may help improve twolf slightly (used in dimbox.c:142?).
-
-===-------------------------------------------------------------------------===
-
-PR5945: This:
-define i32 @clamp0g(i32 %a) {
-entry:
- %cmp = icmp slt i32 %a, 0
- %sel = select i1 %cmp, i32 0, i32 %a
- ret i32 %sel
-}
-
-Is compile to this with the PowerPC (32-bit) backend:
-
-_clamp0g:
- cmpwi cr0, r3, 0
- li r2, 0
- blt cr0, LBB1_2
-; %bb.1: ; %entry
- mr r2, r3
-LBB1_2: ; %entry
- mr r3, r2
- blr
-
-This could be reduced to the much simpler:
-
-_clamp0g:
- srawi r2, r3, 31
- andc r3, r3, r2
- blr
-
-===-------------------------------------------------------------------------===
-
-int foo(int N, int ***W, int **TK, int X) {
- int t, i;
-
- for (t = 0; t < N; ++t)
- for (i = 0; i < 4; ++i)
- W[t / X][i][t % X] = TK[i][t];
-
- return 5;
-}
-
-We generate relatively atrocious code for this loop compared to gcc.
-
-We could also strength reduce the rem and the div:
-http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf
-
-===-------------------------------------------------------------------------===
-
-We generate ugly code for this:
-
-void func(unsigned int *ret, float dx, float dy, float dz, float dw) {
- unsigned code = 0;
- if(dx < -dw) code |= 1;
- if(dx > dw) code |= 2;
- if(dy < -dw) code |= 4;
- if(dy > dw) code |= 8;
- if(dz < -dw) code |= 16;
- if(dz > dw) code |= 32;
- *ret = code;
-}
-
-===-------------------------------------------------------------------------===
-
-%struct.B = type { i8, [3 x i8] }
-
-define void @bar(%struct.B* %b) {
-entry:
- %tmp = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1]
- %tmp = load i32* %tmp ; <uint> [#uses=1]
- %tmp3 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1]
- %tmp4 = load i32* %tmp3 ; <uint> [#uses=1]
- %tmp8 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=2]
- %tmp9 = load i32* %tmp8 ; <uint> [#uses=1]
- %tmp4.mask17 = shl i32 %tmp4, i8 1 ; <uint> [#uses=1]
- %tmp1415 = and i32 %tmp4.mask17, 2147483648 ; <uint> [#uses=1]
- %tmp.masked = and i32 %tmp, 2147483648 ; <uint> [#uses=1]
- %tmp11 = or i32 %tmp1415, %tmp.masked ; <uint> [#uses=1]
- %tmp12 = and i32 %tmp9, 2147483647 ; <uint> [#uses=1]
- %tmp13 = or i32 %tmp12, %tmp11 ; <uint> [#uses=1]
- store i32 %tmp13, i32* %tmp8
- ret void
-}
-
-We emit:
-
-_foo:
- lwz r2, 0(r3)
- slwi r4, r2, 1
- or r4, r4, r2
- rlwimi r2, r4, 0, 0, 0
- stw r2, 0(r3)
- blr
-
-We could collapse a bunch of those ORs and ANDs and generate the following
-equivalent code:
-
-_foo:
- lwz r2, 0(r3)
- rlwinm r4, r2, 1, 0, 0
- or r2, r2, r4
- stw r2, 0(r3)
- blr
-
-===-------------------------------------------------------------------------===
-
-Consider a function like this:
-
-float foo(float X) { return X + 1234.4123f; }
-
-The FP constant ends up in the constant pool, so we need to get the LR register.
- This ends up producing code like this:
-
-_foo:
-.LBB_foo_0: ; entry
- mflr r11
-*** stw r11, 8(r1)
- bl "L00000$pb"
-"L00000$pb":
- mflr r2
- addis r2, r2, ha16(.CPI_foo_0-"L00000$pb")
- lfs f0, lo16(.CPI_foo_0-"L00000$pb")(r2)
- fadds f1, f1, f0
-*** lwz r11, 8(r1)
- mtlr r11
- blr
-
-This is functional, but there is no reason to spill the LR register all the way
-to the stack (the two marked instrs): spilling it to a GPR is quite enough.
-
-Implementing this will require some codegen improvements. Nate writes:
-
-"So basically what we need to support the "no stack frame save and restore" is a
-generalization of the LR optimization to "callee-save regs".
-
-Currently, we have LR marked as a callee-save reg. The register allocator sees
-that it's callee save, and spills it directly to the stack.
-
-Ideally, something like this would happen:
-
-LR would be in a separate register class from the GPRs. The class of LR would be
-marked "unspillable". When the register allocator came across an unspillable
-reg, it would ask "what is the best class to copy this into that I *can* spill"
-If it gets a class back, which it will in this case (the gprs), it grabs a free
-register of that class. If it is then later necessary to spill that reg, so be
-it.
-
-===-------------------------------------------------------------------------===
-
-We compile this:
-int test(_Bool X) {
- return X ? 524288 : 0;
-}
-
-to:
-_test:
- cmplwi cr0, r3, 0
- lis r2, 8
- li r3, 0
- beq cr0, LBB1_2 ;entry
-LBB1_1: ;entry
- mr r3, r2
-LBB1_2: ;entry
- blr
-
-instead of:
-_test:
- addic r2,r3,-1
- subfe r0,r2,r3
- slwi r3,r0,19
- blr
-
-This sort of thing occurs a lot due to globalopt.
-
-===-------------------------------------------------------------------------===
-
-We compile:
-
-define i32 @bar(i32 %x) nounwind readnone ssp {
-entry:
- %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1]
- %neg = sext i1 %0 to i32 ; <i32> [#uses=1]
- ret i32 %neg
-}
-
+
+===-------------------------------------------------------------------------===
+
+PIC Code Gen IPO optimization:
+
+Squish small scalar globals together into a single global struct, allowing the
+address of the struct to be CSE'd, avoiding PIC accesses (also reduces the size
+of the GOT on targets with one).
+
+Note that this is discussed here for GCC:
+http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
+
+===-------------------------------------------------------------------------===
+
+Fold add and sub with constant into non-extern, non-weak addresses so this:
+
+static int a;
+void bar(int b) { a = b; }
+void foo(unsigned char *c) {
+ *c = a;
+}
+
+So that
+
+_foo:
+ lis r2, ha16(_a)
+ la r2, lo16(_a)(r2)
+ lbz r2, 3(r2)
+ stb r2, 0(r3)
+ blr
+
+Becomes
+
+_foo:
+ lis r2, ha16(_a+3)
+ lbz r2, lo16(_a+3)(r2)
+ stb r2, 0(r3)
+ blr
+
+===-------------------------------------------------------------------------===
+
+We should compile these two functions to the same thing:
+
+#include <stdlib.h>
+void f(int a, int b, int *P) {
+ *P = (a-b)>=0?(a-b):(b-a);
+}
+void g(int a, int b, int *P) {
+ *P = abs(a-b);
+}
+
+Further, they should compile to something better than:
+
+_g:
+ subf r2, r4, r3
+ subfic r3, r2, 0
+ cmpwi cr0, r2, -1
+ bgt cr0, LBB2_2 ; entry
+LBB2_1: ; entry
+ mr r2, r3
+LBB2_2: ; entry
+ stw r2, 0(r5)
+ blr
+
+GCC produces:
+
+_g:
+ subf r4,r4,r3
+ srawi r2,r4,31
+ xor r0,r2,r4
+ subf r0,r2,r0
+ stw r0,0(r5)
+ blr
+
+... which is much nicer.
+
+This theoretically may help improve twolf slightly (used in dimbox.c:142?).
+
+===-------------------------------------------------------------------------===
+
+PR5945: This:
+define i32 @clamp0g(i32 %a) {
+entry:
+ %cmp = icmp slt i32 %a, 0
+ %sel = select i1 %cmp, i32 0, i32 %a
+ ret i32 %sel
+}
+
+Is compile to this with the PowerPC (32-bit) backend:
+
+_clamp0g:
+ cmpwi cr0, r3, 0
+ li r2, 0
+ blt cr0, LBB1_2
+; %bb.1: ; %entry
+ mr r2, r3
+LBB1_2: ; %entry
+ mr r3, r2
+ blr
+
+This could be reduced to the much simpler:
+
+_clamp0g:
+ srawi r2, r3, 31
+ andc r3, r3, r2
+ blr
+
+===-------------------------------------------------------------------------===
+
+int foo(int N, int ***W, int **TK, int X) {
+ int t, i;
+
+ for (t = 0; t < N; ++t)
+ for (i = 0; i < 4; ++i)
+ W[t / X][i][t % X] = TK[i][t];
+
+ return 5;
+}
+
+We generate relatively atrocious code for this loop compared to gcc.
+
+We could also strength reduce the rem and the div:
+http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf
+
+===-------------------------------------------------------------------------===
+
+We generate ugly code for this:
+
+void func(unsigned int *ret, float dx, float dy, float dz, float dw) {
+ unsigned code = 0;
+ if(dx < -dw) code |= 1;
+ if(dx > dw) code |= 2;
+ if(dy < -dw) code |= 4;
+ if(dy > dw) code |= 8;
+ if(dz < -dw) code |= 16;
+ if(dz > dw) code |= 32;
+ *ret = code;
+}
+
+===-------------------------------------------------------------------------===
+
+%struct.B = type { i8, [3 x i8] }
+
+define void @bar(%struct.B* %b) {
+entry:
+ %tmp = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1]
+ %tmp = load i32* %tmp ; <uint> [#uses=1]
+ %tmp3 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1]
+ %tmp4 = load i32* %tmp3 ; <uint> [#uses=1]
+ %tmp8 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=2]
+ %tmp9 = load i32* %tmp8 ; <uint> [#uses=1]
+ %tmp4.mask17 = shl i32 %tmp4, i8 1 ; <uint> [#uses=1]
+ %tmp1415 = and i32 %tmp4.mask17, 2147483648 ; <uint> [#uses=1]
+ %tmp.masked = and i32 %tmp, 2147483648 ; <uint> [#uses=1]
+ %tmp11 = or i32 %tmp1415, %tmp.masked ; <uint> [#uses=1]
+ %tmp12 = and i32 %tmp9, 2147483647 ; <uint> [#uses=1]
+ %tmp13 = or i32 %tmp12, %tmp11 ; <uint> [#uses=1]
+ store i32 %tmp13, i32* %tmp8
+ ret void
+}
+
+We emit:
+
+_foo:
+ lwz r2, 0(r3)
+ slwi r4, r2, 1
+ or r4, r4, r2
+ rlwimi r2, r4, 0, 0, 0
+ stw r2, 0(r3)
+ blr
+
+We could collapse a bunch of those ORs and ANDs and generate the following
+equivalent code:
+
+_foo:
+ lwz r2, 0(r3)
+ rlwinm r4, r2, 1, 0, 0
+ or r2, r2, r4
+ stw r2, 0(r3)
+ blr
+
+===-------------------------------------------------------------------------===
+
+Consider a function like this:
+
+float foo(float X) { return X + 1234.4123f; }
+
+The FP constant ends up in the constant pool, so we need to get the LR register.
+ This ends up producing code like this:
+
+_foo:
+.LBB_foo_0: ; entry
+ mflr r11
+*** stw r11, 8(r1)
+ bl "L00000$pb"
+"L00000$pb":
+ mflr r2
+ addis r2, r2, ha16(.CPI_foo_0-"L00000$pb")
+ lfs f0, lo16(.CPI_foo_0-"L00000$pb")(r2)
+ fadds f1, f1, f0
+*** lwz r11, 8(r1)
+ mtlr r11
+ blr
+
+This is functional, but there is no reason to spill the LR register all the way
+to the stack (the two marked instrs): spilling it to a GPR is quite enough.
+
+Implementing this will require some codegen improvements. Nate writes:
+
+"So basically what we need to support the "no stack frame save and restore" is a
+generalization of the LR optimization to "callee-save regs".
+
+Currently, we have LR marked as a callee-save reg. The register allocator sees
+that it's callee save, and spills it directly to the stack.
+
+Ideally, something like this would happen:
+
+LR would be in a separate register class from the GPRs. The class of LR would be
+marked "unspillable". When the register allocator came across an unspillable
+reg, it would ask "what is the best class to copy this into that I *can* spill"
+If it gets a class back, which it will in this case (the gprs), it grabs a free
+register of that class. If it is then later necessary to spill that reg, so be
+it.
+
+===-------------------------------------------------------------------------===
+
+We compile this:
+int test(_Bool X) {
+ return X ? 524288 : 0;
+}
+
to:
-
-_bar:
- cntlzw r2, r3
- slwi r2, r2, 26
- srawi r3, r2, 31
- blr
-
-it would be better to produce:
-
-_bar:
- addic r3,r3,-1
- subfe r3,r3,r3
+_test:
+ cmplwi cr0, r3, 0
+ lis r2, 8
+ li r3, 0
+ beq cr0, LBB1_2 ;entry
+LBB1_1: ;entry
+ mr r3, r2
+LBB1_2: ;entry
blr
-
-===-------------------------------------------------------------------------===
-
-We generate horrible ppc code for this:
-
-#define N 2000000
-double a[N],c[N];
-void simpleloop() {
- int j;
- for (j=0; j<N; j++)
- c[j] = a[j];
-}
-
-LBB1_1: ;bb
- lfdx f0, r3, r4
- addi r5, r5, 1 ;; Extra IV for the exit value compare.
- stfdx f0, r2, r4
- addi r4, r4, 8
-
- xoris r6, r5, 30 ;; This is due to a large immediate.
- cmplwi cr0, r6, 33920
- bne cr0, LBB1_1
-
-//===---------------------------------------------------------------------===//
-
-This:
- #include <algorithm>
- inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b)
- { return std::make_pair(a + b, a + b < a); }
- bool no_overflow(unsigned a, unsigned b)
- { return !full_add(a, b).second; }
-
-Should compile to:
-
-__Z11no_overflowjj:
- add r4,r3,r4
- subfc r3,r3,r4
- li r3,0
- adde r3,r3,r3
+
+instead of:
+_test:
+ addic r2,r3,-1
+ subfe r0,r2,r3
+ slwi r3,r0,19
+ blr
+
+This sort of thing occurs a lot due to globalopt.
+
+===-------------------------------------------------------------------------===
+
+We compile:
+
+define i32 @bar(i32 %x) nounwind readnone ssp {
+entry:
+ %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1]
+ %neg = sext i1 %0 to i32 ; <i32> [#uses=1]
+ ret i32 %neg
+}
+
+to:
+
+_bar:
+ cntlzw r2, r3
+ slwi r2, r2, 26
+ srawi r3, r2, 31
+ blr
+
+it would be better to produce:
+
+_bar:
+ addic r3,r3,-1
+ subfe r3,r3,r3
+ blr
+
+===-------------------------------------------------------------------------===
+
+We generate horrible ppc code for this:
+
+#define N 2000000
+double a[N],c[N];
+void simpleloop() {
+ int j;
+ for (j=0; j<N; j++)
+ c[j] = a[j];
+}
+
+LBB1_1: ;bb
+ lfdx f0, r3, r4
+ addi r5, r5, 1 ;; Extra IV for the exit value compare.
+ stfdx f0, r2, r4
+ addi r4, r4, 8
+
+ xoris r6, r5, 30 ;; This is due to a large immediate.
+ cmplwi cr0, r6, 33920
+ bne cr0, LBB1_1
+
+//===---------------------------------------------------------------------===//
+
+This:
+ #include <algorithm>
+ inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b)
+ { return std::make_pair(a + b, a + b < a); }
+ bool no_overflow(unsigned a, unsigned b)
+ { return !full_add(a, b).second; }
+
+Should compile to:
+
+__Z11no_overflowjj:
+ add r4,r3,r4
+ subfc r3,r3,r4
+ li r3,0
+ adde r3,r3,r3
+ blr
+
+(or better) not:
+
+__Z11no_overflowjj:
+ add r2, r4, r3
+ cmplw cr7, r2, r3
+ mfcr r2
+ rlwinm r2, r2, 29, 31, 31
+ xori r3, r2, 1
blr
-
-(or better) not:
-
-__Z11no_overflowjj:
- add r2, r4, r3
- cmplw cr7, r2, r3
- mfcr r2
- rlwinm r2, r2, 29, 31, 31
- xori r3, r2, 1
- blr
-
-//===---------------------------------------------------------------------===//
-
-We compile some FP comparisons into an mfcr with two rlwinms and an or. For
-example:
-#include <math.h>
-int test(double x, double y) { return islessequal(x, y);}
-int test2(double x, double y) { return islessgreater(x, y);}
-int test3(double x, double y) { return !islessequal(x, y);}
-
-Compiles into (all three are similar, but the bits differ):
-
-_test:
- fcmpu cr7, f1, f2
- mfcr r2
- rlwinm r3, r2, 29, 31, 31
- rlwinm r2, r2, 31, 31, 31
- or r3, r2, r3
- blr
-
-GCC compiles this into:
-
- _test:
- fcmpu cr7,f1,f2
- cror 30,28,30
- mfcr r3
- rlwinm r3,r3,31,1
+
+//===---------------------------------------------------------------------===//
+
+We compile some FP comparisons into an mfcr with two rlwinms and an or. For
+example:
+#include <math.h>
+int test(double x, double y) { return islessequal(x, y);}
+int test2(double x, double y) { return islessgreater(x, y);}
+int test3(double x, double y) { return !islessequal(x, y);}
+
+Compiles into (all three are similar, but the bits differ):
+
+_test:
+ fcmpu cr7, f1, f2
+ mfcr r2
+ rlwinm r3, r2, 29, 31, 31
+ rlwinm r2, r2, 31, 31, 31
+ or r3, r2, r3
+ blr
+
+GCC compiles this into:
+
+ _test:
+ fcmpu cr7,f1,f2
+ cror 30,28,30
+ mfcr r3
+ rlwinm r3,r3,31,1
+ blr
+
+which is more efficient and can use mfocr. See PR642 for some more context.
+
+//===---------------------------------------------------------------------===//
+
+void foo(float *data, float d) {
+ long i;
+ for (i = 0; i < 8000; i++)
+ data[i] = d;
+}
+void foo2(float *data, float d) {
+ long i;
+ data--;
+ for (i = 0; i < 8000; i++) {
+ data[1] = d;
+ data++;
+ }
+}
+
+These compile to:
+
+_foo:
+ li r2, 0
+LBB1_1: ; bb
+ addi r4, r2, 4
+ stfsx f1, r3, r2
+ cmplwi cr0, r4, 32000
+ mr r2, r4
+ bne cr0, LBB1_1 ; bb
+ blr
+_foo2:
+ li r2, 0
+LBB2_1: ; bb
+ addi r4, r2, 4
+ stfsx f1, r3, r2
+ cmplwi cr0, r4, 32000
+ mr r2, r4
+ bne cr0, LBB2_1 ; bb
blr
-
-which is more efficient and can use mfocr. See PR642 for some more context.
-
-//===---------------------------------------------------------------------===//
-
-void foo(float *data, float d) {
- long i;
- for (i = 0; i < 8000; i++)
- data[i] = d;
-}
-void foo2(float *data, float d) {
- long i;
- data--;
- for (i = 0; i < 8000; i++) {
- data[1] = d;
- data++;
- }
-}
-
-These compile to:
-
-_foo:
- li r2, 0
-LBB1_1: ; bb
- addi r4, r2, 4
- stfsx f1, r3, r2
- cmplwi cr0, r4, 32000
- mr r2, r4
- bne cr0, LBB1_1 ; bb
- blr
-_foo2:
- li r2, 0
-LBB2_1: ; bb
- addi r4, r2, 4
- stfsx f1, r3, r2
- cmplwi cr0, r4, 32000
- mr r2, r4
- bne cr0, LBB2_1 ; bb
- blr
-
-The 'mr' could be eliminated to folding the add into the cmp better.
-
-//===---------------------------------------------------------------------===//
-Codegen for the following (low-probability) case deteriorated considerably
-when the correctness fixes for unordered comparisons went in (PR 642, 58871).
-It should be possible to recover the code quality described in the comments.
-
-; RUN: llvm-as < %s | llc -march=ppc32 | grep or | count 3
-; This should produce one 'or' or 'cror' instruction per function.
-
-; RUN: llvm-as < %s | llc -march=ppc32 | grep mfcr | count 3
-; PR2964
-
-define i32 @test(double %x, double %y) nounwind {
-entry:
- %tmp3 = fcmp ole double %x, %y ; <i1> [#uses=1]
- %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
- ret i32 %tmp345
-}
-
-define i32 @test2(double %x, double %y) nounwind {
-entry:
- %tmp3 = fcmp one double %x, %y ; <i1> [#uses=1]
- %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
- ret i32 %tmp345
-}
-
-define i32 @test3(double %x, double %y) nounwind {
-entry:
- %tmp3 = fcmp ugt double %x, %y ; <i1> [#uses=1]
- %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
- ret i32 %tmp34
-}
-
-//===---------------------------------------------------------------------===//
-for the following code:
-
-void foo (float *__restrict__ a, int *__restrict__ b, int n) {
- a[n] = b[n] * 2.321;
-}
-
-we load b[n] to GPR, then move it VSX register and convert it float. We should
-use vsx scalar integer load instructions to avoid direct moves
-
-//===----------------------------------------------------------------------===//
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg
-
-; This could generate FSEL with appropriate flags (FSEL is not IEEE-safe, and
-; should not be generated except with -enable-finite-only-fp-math or the like).
-; With the correctness fixes for PR642 (58871) LowerSELECT_CC would need to
-; recognize a more elaborate tree than a simple SETxx.
-
-define double @test_FNEG_sel(double %A, double %B, double %C) {
- %D = fsub double -0.000000e+00, %A ; <double> [#uses=1]
- %Cond = fcmp ugt double %D, -0.000000e+00 ; <i1> [#uses=1]
- %E = select i1 %Cond, double %B, double %C ; <double> [#uses=1]
- ret double %E
-}
-
-//===----------------------------------------------------------------------===//
-The save/restore sequence for CR in prolog/epilog is terrible:
-- Each CR subreg is saved individually, rather than doing one save as a unit.
-- On Darwin, the save is done after the decrement of SP, which means the offset
-from SP of the save slot can be too big for a store instruction, which means we
-need an additional register (currently hacked in 96015+96020; the solution there
-is correct, but poor).
-- On SVR4 the same thing can happen, and I don't think saving before the SP
-decrement is safe on that target, as there is no red zone. This is currently
-broken AFAIK, although it's not a target I can exercise.
-The following demonstrates the problem:
-extern void bar(char *p);
-void foo() {
- char x[100000];
- bar(x);
- __asm__("" ::: "cr2");
-}
-
-//===-------------------------------------------------------------------------===
-Naming convention for instruction formats is very haphazard.
-We have agreed on a naming scheme as follows:
-
-<INST_form>{_<OP_type><OP_len>}+
-
-Where:
-INST_form is the instruction format (X-form, etc.)
-OP_type is the operand type - one of OPC (opcode), RD (register destination),
- RS (register source),
- RDp (destination register pair),
- RSp (source register pair), IM (immediate),
- XO (extended opcode)
-OP_len is the length of the operand in bits
-
-VSX register operands would be of length 6 (split across two fields),
-condition register fields of length 3.
-We would not need denote reserved fields in names of instruction formats.
-
-//===----------------------------------------------------------------------===//
-
-Instruction fusion was introduced in ISA 2.06 and more opportunities added in
-ISA 2.07. LLVM needs to add infrastructure to recognize fusion opportunities
-and force instruction pairs to be scheduled together.
-
------------------------------------------------------------------------------
-
-More general handling of any_extend and zero_extend:
-
-See https://reviews.llvm.org/D24924#555306
+
+The 'mr' could be eliminated to folding the add into the cmp better.
+
+//===---------------------------------------------------------------------===//
+Codegen for the following (low-probability) case deteriorated considerably
+when the correctness fixes for unordered comparisons went in (PR 642, 58871).
+It should be possible to recover the code quality described in the comments.
+
+; RUN: llvm-as < %s | llc -march=ppc32 | grep or | count 3
+; This should produce one 'or' or 'cror' instruction per function.
+
+; RUN: llvm-as < %s | llc -march=ppc32 | grep mfcr | count 3
+; PR2964
+
+define i32 @test(double %x, double %y) nounwind {
+entry:
+ %tmp3 = fcmp ole double %x, %y ; <i1> [#uses=1]
+ %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
+ ret i32 %tmp345
+}
+
+define i32 @test2(double %x, double %y) nounwind {
+entry:
+ %tmp3 = fcmp one double %x, %y ; <i1> [#uses=1]
+ %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
+ ret i32 %tmp345
+}
+
+define i32 @test3(double %x, double %y) nounwind {
+entry:
+ %tmp3 = fcmp ugt double %x, %y ; <i1> [#uses=1]
+ %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
+ ret i32 %tmp34
+}
+
+//===---------------------------------------------------------------------===//
+for the following code:
+
+void foo (float *__restrict__ a, int *__restrict__ b, int n) {
+ a[n] = b[n] * 2.321;
+}
+
+we load b[n] to GPR, then move it VSX register and convert it float. We should
+use vsx scalar integer load instructions to avoid direct moves
+
+//===----------------------------------------------------------------------===//
+; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg
+
+; This could generate FSEL with appropriate flags (FSEL is not IEEE-safe, and
+; should not be generated except with -enable-finite-only-fp-math or the like).
+; With the correctness fixes for PR642 (58871) LowerSELECT_CC would need to
+; recognize a more elaborate tree than a simple SETxx.
+
+define double @test_FNEG_sel(double %A, double %B, double %C) {
+ %D = fsub double -0.000000e+00, %A ; <double> [#uses=1]
+ %Cond = fcmp ugt double %D, -0.000000e+00 ; <i1> [#uses=1]
+ %E = select i1 %Cond, double %B, double %C ; <double> [#uses=1]
+ ret double %E
+}
+
+//===----------------------------------------------------------------------===//
+The save/restore sequence for CR in prolog/epilog is terrible:
+- Each CR subreg is saved individually, rather than doing one save as a unit.
+- On Darwin, the save is done after the decrement of SP, which means the offset
+from SP of the save slot can be too big for a store instruction, which means we
+need an additional register (currently hacked in 96015+96020; the solution there
+is correct, but poor).
+- On SVR4 the same thing can happen, and I don't think saving before the SP
+decrement is safe on that target, as there is no red zone. This is currently
+broken AFAIK, although it's not a target I can exercise.
+The following demonstrates the problem:
+extern void bar(char *p);
+void foo() {
+ char x[100000];
+ bar(x);
+ __asm__("" ::: "cr2");
+}
+
+//===-------------------------------------------------------------------------===
+Naming convention for instruction formats is very haphazard.
+We have agreed on a naming scheme as follows:
+
+<INST_form>{_<OP_type><OP_len>}+
+
+Where:
+INST_form is the instruction format (X-form, etc.)
+OP_type is the operand type - one of OPC (opcode), RD (register destination),
+ RS (register source),
+ RDp (destination register pair),
+ RSp (source register pair), IM (immediate),
+ XO (extended opcode)
+OP_len is the length of the operand in bits
+
+VSX register operands would be of length 6 (split across two fields),
+condition register fields of length 3.
+We would not need denote reserved fields in names of instruction formats.
+
+//===----------------------------------------------------------------------===//
+
+Instruction fusion was introduced in ISA 2.06 and more opportunities added in
+ISA 2.07. LLVM needs to add infrastructure to recognize fusion opportunities
+and force instruction pairs to be scheduled together.
+
+-----------------------------------------------------------------------------
+
+More general handling of any_extend and zero_extend:
+
+See https://reviews.llvm.org/D24924#555306
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/README_ALTIVEC.txt b/contrib/libs/llvm12/lib/Target/PowerPC/README_ALTIVEC.txt
index 47d18ecfca..6d32e76ed8 100644
--- a/contrib/libs/llvm12/lib/Target/PowerPC/README_ALTIVEC.txt
+++ b/contrib/libs/llvm12/lib/Target/PowerPC/README_ALTIVEC.txt
@@ -1,338 +1,338 @@
-//===- README_ALTIVEC.txt - Notes for improving Altivec code gen ----------===//
-
-Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector
-registers, to generate better spill code.
-
-//===----------------------------------------------------------------------===//
-
-The first should be a single lvx from the constant pool, the second should be
-a xor/stvx:
-
-void foo(void) {
- int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 17, 1, 1, 1, 1 };
- bar (x);
-}
-
-#include <string.h>
-void foo(void) {
- int x[8] __attribute__((aligned(128)));
- memset (x, 0, sizeof (x));
- bar (x);
-}
-
-//===----------------------------------------------------------------------===//
-
-Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0:
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763
-
-When -ffast-math is on, we can use 0.0.
-
-//===----------------------------------------------------------------------===//
-
- Consider this:
- v4f32 Vector;
- v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X };
-
-Since we know that "Vector" is 16-byte aligned and we know the element offset
-of ".X", we should change the load into a lve*x instruction, instead of doing
-a load/store/lve*x sequence.
-
-//===----------------------------------------------------------------------===//
-
-Implement passing vectors by value into calls and receiving them as arguments.
-
-//===----------------------------------------------------------------------===//
-
-GCC apparently tries to codegen { C1, C2, Variable, C3 } as a constant pool load
-of C1/C2/C3, then a load and vperm of Variable.
-
-//===----------------------------------------------------------------------===//
-
-We need a way to teach tblgen that some operands of an intrinsic are required to
-be constants. The verifier should enforce this constraint.
-
-//===----------------------------------------------------------------------===//
-
-We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a 16-byte
-aligned stack slot, followed by a load/vperm. We should probably just store it
-to a scalar stack slot, then use lvsl/vperm to load it. If the value is already
-in memory this is a big win.
-
-//===----------------------------------------------------------------------===//
-
-extract_vector_elt of an arbitrary constant vector can be done with the
-following instructions:
-
-vTemp = vec_splat(v0,2); // 2 is the element the src is in.
-vec_ste(&destloc,0,vTemp);
-
-We can do an arbitrary non-constant value by using lvsr/perm/ste.
-
-//===----------------------------------------------------------------------===//
-
-If we want to tie instruction selection into the scheduler, we can do some
-constant formation with different instructions. For example, we can generate
-"vsplti -1" with "vcmpequw R,R" and 1,1,1,1 with "vsubcuw R,R", and 0,0,0,0 with
-"vsplti 0" or "vxor", each of which use different execution units, thus could
-help scheduling.
-
-This is probably only reasonable for a post-pass scheduler.
-
-//===----------------------------------------------------------------------===//
-
-For this function:
-
-void test(vector float *A, vector float *B) {
- vector float C = (vector float)vec_cmpeq(*A, *B);
- if (!vec_any_eq(*A, *B))
- *B = (vector float){0,0,0,0};
- *A = C;
-}
-
-we get the following basic block:
-
- ...
- lvx v2, 0, r4
- lvx v3, 0, r3
- vcmpeqfp v4, v3, v2
- vcmpeqfp. v2, v3, v2
- bne cr6, LBB1_2 ; cond_next
-
-The vcmpeqfp/vcmpeqfp. instructions currently cannot be merged when the
-vcmpeqfp. result is used by a branch. This can be improved.
-
-//===----------------------------------------------------------------------===//
-
-The code generated for this is truly aweful:
-
-vector float test(float a, float b) {
- return (vector float){ 0.0, a, 0.0, 0.0};
-}
-
-LCPI1_0: ; float
- .space 4
- .text
- .globl _test
- .align 4
-_test:
- mfspr r2, 256
- oris r3, r2, 4096
- mtspr 256, r3
- lis r3, ha16(LCPI1_0)
- addi r4, r1, -32
- stfs f1, -16(r1)
- addi r5, r1, -16
- lfs f0, lo16(LCPI1_0)(r3)
- stfs f0, -32(r1)
- lvx v2, 0, r4
- lvx v3, 0, r5
- vmrghw v3, v3, v2
- vspltw v2, v2, 0
- vmrghw v2, v2, v3
- mtspr 256, r2
- blr
-
-//===----------------------------------------------------------------------===//
-
-int foo(vector float *x, vector float *y) {
- if (vec_all_eq(*x,*y)) return 3245;
- else return 12;
-}
-
-A predicate compare being used in a select_cc should have the same peephole
-applied to it as a predicate compare used by a br_cc. There should be no
-mfcr here:
-
-_foo:
- mfspr r2, 256
- oris r5, r2, 12288
- mtspr 256, r5
- li r5, 12
- li r6, 3245
- lvx v2, 0, r4
- lvx v3, 0, r3
- vcmpeqfp. v2, v3, v2
- mfcr r3, 2
- rlwinm r3, r3, 25, 31, 31
- cmpwi cr0, r3, 0
- bne cr0, LBB1_2 ; entry
-LBB1_1: ; entry
- mr r6, r5
-LBB1_2: ; entry
- mr r3, r6
- mtspr 256, r2
- blr
-
-//===----------------------------------------------------------------------===//
-
-CodeGen/PowerPC/vec_constants.ll has an and operation that should be
-codegen'd to andc. The issue is that the 'all ones' build vector is
-SelectNodeTo'd a VSPLTISB instruction node before the and/xor is selected
-which prevents the vnot pattern from matching.
-
-
-//===----------------------------------------------------------------------===//
-
-An alternative to the store/store/load approach for illegal insert element
-lowering would be:
-
-1. store element to any ol' slot
-2. lvx the slot
-3. lvsl 0; splat index; vcmpeq to generate a select mask
-4. lvsl slot + x; vperm to rotate result into correct slot
-5. vsel result together.
-
-//===----------------------------------------------------------------------===//
-
-Should codegen branches on vec_any/vec_all to avoid mfcr. Two examples:
-
-#include <altivec.h>
- int f(vector float a, vector float b)
- {
- int aa = 0;
- if (vec_all_ge(a, b))
- aa |= 0x1;
- if (vec_any_ge(a,b))
- aa |= 0x2;
- return aa;
-}
-
-vector float f(vector float a, vector float b) {
- if (vec_any_eq(a, b))
- return a;
- else
- return b;
-}
-
-//===----------------------------------------------------------------------===//
-
-We should do a little better with eliminating dead stores.
-The stores to the stack are dead since %a and %b are not needed
-
-; Function Attrs: nounwind
-define <16 x i8> @test_vpmsumb() #0 {
- entry:
- %a = alloca <16 x i8>, align 16
- %b = alloca <16 x i8>, align 16
- store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
- store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
- %0 = load <16 x i8>* %a, align 16
- %1 = load <16 x i8>* %b, align 16
- %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1)
- ret <16 x i8> %2
-}
-
-
-; Function Attrs: nounwind readnone
-declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1
-
-
-Produces the following code with -mtriple=powerpc64-unknown-linux-gnu:
-# %bb.0: # %entry
- addis 3, 2, .LCPI0_0@toc@ha
- addis 4, 2, .LCPI0_1@toc@ha
- addi 3, 3, .LCPI0_0@toc@l
- addi 4, 4, .LCPI0_1@toc@l
- lxvw4x 0, 0, 3
- addi 3, 1, -16
- lxvw4x 35, 0, 4
- stxvw4x 0, 0, 3
- ori 2, 2, 0
- lxvw4x 34, 0, 3
- addi 3, 1, -32
- stxvw4x 35, 0, 3
- vpmsumb 2, 2, 3
- blr
- .long 0
- .quad 0
-
-The two stxvw4x instructions are not needed.
-With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes
-are present too.
-
-//===----------------------------------------------------------------------===//
-
-The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll:
-
-define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {
- %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
- %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
- %result = add <2 x i64> %x, %tmpvec2
- ret <2 x i64> %result
-
-This will generate the following instruction sequence:
- std 5, -8(1)
- std 5, -16(1)
- addi 3, 1, -16
- ori 2, 2, 0
- lxvd2x 35, 0, 3
- vaddudm 2, 2, 3
- blr
-
-This will almost certainly cause a load-hit-store hazard.
-Since val is a value parameter, it should not need to be saved onto
-the stack, unless it's being done set up the vector register. Instead,
-it would be better to splat the value into a vector register, and then
-remove the (dead) stores to the stack.
-
-//===----------------------------------------------------------------------===//
-
-At the moment we always generate a lxsdx in preference to lfd, or stxsdx in
-preference to stfd. When we have a reg-immediate addressing mode, this is a
-poor choice, since we have to load the address into an index register. This
-should be fixed for P7/P8.
-
-//===----------------------------------------------------------------------===//
-
-Right now, ShuffleKind 0 is supported only on BE, and ShuffleKind 2 only on LE.
-However, we could actually support both kinds on either endianness, if we check
-for the appropriate shufflevector pattern for each case ... this would cause
-some additional shufflevectors to be recognized and implemented via the
-"swapped" form.
-
-//===----------------------------------------------------------------------===//
-
-There is a utility program called PerfectShuffle that generates a table of the
-shortest instruction sequence for implementing a shufflevector operation on
-PowerPC. However, this was designed for big-endian code generation. We could
-modify this program to create a little endian version of the table. The table
-is used in PPCISelLowering.cpp, PPCTargetLowering::LOWERVECTOR_SHUFFLE().
-
-//===----------------------------------------------------------------------===//
-
-Opportunies to use instructions from PPCInstrVSX.td during code gen
- - Conversion instructions (Sections 7.6.1.5 and 7.6.1.6 of ISA 2.07)
- - Scalar comparisons (xscmpodp and xscmpudp)
- - Min and max (xsmaxdp, xsmindp, xvmaxdp, xvmindp, xvmaxsp, xvminsp)
-
-Related to this: we currently do not generate the lxvw4x instruction for either
-v4f32 or v4i32, probably because adding a dag pattern to the recognizer requires
-a single target type. This should probably be addressed in the PPCISelDAGToDAG logic.
-
-//===----------------------------------------------------------------------===//
-
-Currently EXTRACT_VECTOR_ELT and INSERT_VECTOR_ELT are type-legal only
-for v2f64 with VSX available. We should create custom lowering
-support for the other vector types. Without this support, we generate
-sequences with load-hit-store hazards.
-
-v4f32 can be supported with VSX by shifting the correct element into
-big-endian lane 0, using xscvspdpn to produce a double-precision
-representation of the single-precision value in big-endian
-double-precision lane 0, and reinterpreting lane 0 as an FPR or
-vector-scalar register.
-
-v2i64 can be supported with VSX and P8Vector in the same manner as
-v2f64, followed by a direct move to a GPR.
-
-v4i32 can be supported with VSX and P8Vector by shifting the correct
-element into big-endian lane 1, using a direct move to a GPR, and
-sign-extending the 32-bit result to 64 bits.
-
-v8i16 can be supported with VSX and P8Vector by shifting the correct
-element into big-endian lane 3, using a direct move to a GPR, and
-sign-extending the 16-bit result to 64 bits.
-
-v16i8 can be supported with VSX and P8Vector by shifting the correct
-element into big-endian lane 7, using a direct move to a GPR, and
-sign-extending the 8-bit result to 64 bits.
+//===- README_ALTIVEC.txt - Notes for improving Altivec code gen ----------===//
+
+Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector
+registers, to generate better spill code.
+
+//===----------------------------------------------------------------------===//
+
+The first should be a single lvx from the constant pool, the second should be
+a xor/stvx:
+
+void foo(void) {
+ int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 17, 1, 1, 1, 1 };
+ bar (x);
+}
+
+#include <string.h>
+void foo(void) {
+ int x[8] __attribute__((aligned(128)));
+ memset (x, 0, sizeof (x));
+ bar (x);
+}
+
+//===----------------------------------------------------------------------===//
+
+Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763
+
+When -ffast-math is on, we can use 0.0.
+
+//===----------------------------------------------------------------------===//
+
+ Consider this:
+ v4f32 Vector;
+ v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X };
+
+Since we know that "Vector" is 16-byte aligned and we know the element offset
+of ".X", we should change the load into a lve*x instruction, instead of doing
+a load/store/lve*x sequence.
+
+//===----------------------------------------------------------------------===//
+
+Implement passing vectors by value into calls and receiving them as arguments.
+
+//===----------------------------------------------------------------------===//
+
+GCC apparently tries to codegen { C1, C2, Variable, C3 } as a constant pool load
+of C1/C2/C3, then a load and vperm of Variable.
+
+//===----------------------------------------------------------------------===//
+
+We need a way to teach tblgen that some operands of an intrinsic are required to
+be constants. The verifier should enforce this constraint.
+
+//===----------------------------------------------------------------------===//
+
+We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a 16-byte
+aligned stack slot, followed by a load/vperm. We should probably just store it
+to a scalar stack slot, then use lvsl/vperm to load it. If the value is already
+in memory this is a big win.
+
+//===----------------------------------------------------------------------===//
+
+extract_vector_elt of an arbitrary constant vector can be done with the
+following instructions:
+
+vTemp = vec_splat(v0,2); // 2 is the element the src is in.
+vec_ste(&destloc,0,vTemp);
+
+We can do an arbitrary non-constant value by using lvsr/perm/ste.
+
+//===----------------------------------------------------------------------===//
+
+If we want to tie instruction selection into the scheduler, we can do some
+constant formation with different instructions. For example, we can generate
+"vsplti -1" with "vcmpequw R,R" and 1,1,1,1 with "vsubcuw R,R", and 0,0,0,0 with
+"vsplti 0" or "vxor", each of which use different execution units, thus could
+help scheduling.
+
+This is probably only reasonable for a post-pass scheduler.
+
+//===----------------------------------------------------------------------===//
+
+For this function:
+
+void test(vector float *A, vector float *B) {
+ vector float C = (vector float)vec_cmpeq(*A, *B);
+ if (!vec_any_eq(*A, *B))
+ *B = (vector float){0,0,0,0};
+ *A = C;
+}
+
+we get the following basic block:
+
+ ...
+ lvx v2, 0, r4
+ lvx v3, 0, r3
+ vcmpeqfp v4, v3, v2
+ vcmpeqfp. v2, v3, v2
+ bne cr6, LBB1_2 ; cond_next
+
+The vcmpeqfp/vcmpeqfp. instructions currently cannot be merged when the
+vcmpeqfp. result is used by a branch. This can be improved.
+
+//===----------------------------------------------------------------------===//
+
+The code generated for this is truly aweful:
+
+vector float test(float a, float b) {
+ return (vector float){ 0.0, a, 0.0, 0.0};
+}
+
+LCPI1_0: ; float
+ .space 4
+ .text
+ .globl _test
+ .align 4
+_test:
+ mfspr r2, 256
+ oris r3, r2, 4096
+ mtspr 256, r3
+ lis r3, ha16(LCPI1_0)
+ addi r4, r1, -32
+ stfs f1, -16(r1)
+ addi r5, r1, -16
+ lfs f0, lo16(LCPI1_0)(r3)
+ stfs f0, -32(r1)
+ lvx v2, 0, r4
+ lvx v3, 0, r5
+ vmrghw v3, v3, v2
+ vspltw v2, v2, 0
+ vmrghw v2, v2, v3
+ mtspr 256, r2
+ blr
+
+//===----------------------------------------------------------------------===//
+
+int foo(vector float *x, vector float *y) {
+ if (vec_all_eq(*x,*y)) return 3245;
+ else return 12;
+}
+
+A predicate compare being used in a select_cc should have the same peephole
+applied to it as a predicate compare used by a br_cc. There should be no
+mfcr here:
+
+_foo:
+ mfspr r2, 256
+ oris r5, r2, 12288
+ mtspr 256, r5
+ li r5, 12
+ li r6, 3245
+ lvx v2, 0, r4
+ lvx v3, 0, r3
+ vcmpeqfp. v2, v3, v2
+ mfcr r3, 2
+ rlwinm r3, r3, 25, 31, 31
+ cmpwi cr0, r3, 0
+ bne cr0, LBB1_2 ; entry
+LBB1_1: ; entry
+ mr r6, r5
+LBB1_2: ; entry
+ mr r3, r6
+ mtspr 256, r2
+ blr
+
+//===----------------------------------------------------------------------===//
+
+CodeGen/PowerPC/vec_constants.ll has an and operation that should be
+codegen'd to andc. The issue is that the 'all ones' build vector is
+SelectNodeTo'd a VSPLTISB instruction node before the and/xor is selected
+which prevents the vnot pattern from matching.
+
+
+//===----------------------------------------------------------------------===//
+
+An alternative to the store/store/load approach for illegal insert element
+lowering would be:
+
+1. store element to any ol' slot
+2. lvx the slot
+3. lvsl 0; splat index; vcmpeq to generate a select mask
+4. lvsl slot + x; vperm to rotate result into correct slot
+5. vsel result together.
+
+//===----------------------------------------------------------------------===//
+
+Should codegen branches on vec_any/vec_all to avoid mfcr. Two examples:
+
+#include <altivec.h>
+ int f(vector float a, vector float b)
+ {
+ int aa = 0;
+ if (vec_all_ge(a, b))
+ aa |= 0x1;
+ if (vec_any_ge(a,b))
+ aa |= 0x2;
+ return aa;
+}
+
+vector float f(vector float a, vector float b) {
+ if (vec_any_eq(a, b))
+ return a;
+ else
+ return b;
+}
+
+//===----------------------------------------------------------------------===//
+
+We should do a little better with eliminating dead stores.
+The stores to the stack are dead since %a and %b are not needed
+
+; Function Attrs: nounwind
+define <16 x i8> @test_vpmsumb() #0 {
+ entry:
+ %a = alloca <16 x i8>, align 16
+ %b = alloca <16 x i8>, align 16
+ store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
+ store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
+ %0 = load <16 x i8>* %a, align 16
+ %1 = load <16 x i8>* %b, align 16
+ %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1)
+ ret <16 x i8> %2
+}
+
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1
+
+
+Produces the following code with -mtriple=powerpc64-unknown-linux-gnu:
+# %bb.0: # %entry
+ addis 3, 2, .LCPI0_0@toc@ha
+ addis 4, 2, .LCPI0_1@toc@ha
+ addi 3, 3, .LCPI0_0@toc@l
+ addi 4, 4, .LCPI0_1@toc@l
+ lxvw4x 0, 0, 3
+ addi 3, 1, -16
+ lxvw4x 35, 0, 4
+ stxvw4x 0, 0, 3
+ ori 2, 2, 0
+ lxvw4x 34, 0, 3
+ addi 3, 1, -32
+ stxvw4x 35, 0, 3
+ vpmsumb 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+
+The two stxvw4x instructions are not needed.
+With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes
+are present too.
+
+//===----------------------------------------------------------------------===//
+
+The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll:
+
+define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {
+ %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
+ %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
+ %result = add <2 x i64> %x, %tmpvec2
+ ret <2 x i64> %result
+
+This will generate the following instruction sequence:
+ std 5, -8(1)
+ std 5, -16(1)
+ addi 3, 1, -16
+ ori 2, 2, 0
+ lxvd2x 35, 0, 3
+ vaddudm 2, 2, 3
+ blr
+
+This will almost certainly cause a load-hit-store hazard.
+Since val is a value parameter, it should not need to be saved onto
+the stack, unless it's being done set up the vector register. Instead,
+it would be better to splat the value into a vector register, and then
+remove the (dead) stores to the stack.
+
+//===----------------------------------------------------------------------===//
+
+At the moment we always generate a lxsdx in preference to lfd, or stxsdx in
+preference to stfd. When we have a reg-immediate addressing mode, this is a
+poor choice, since we have to load the address into an index register. This
+should be fixed for P7/P8.
+
+//===----------------------------------------------------------------------===//
+
+Right now, ShuffleKind 0 is supported only on BE, and ShuffleKind 2 only on LE.
+However, we could actually support both kinds on either endianness, if we check
+for the appropriate shufflevector pattern for each case ... this would cause
+some additional shufflevectors to be recognized and implemented via the
+"swapped" form.
+
+//===----------------------------------------------------------------------===//
+
+There is a utility program called PerfectShuffle that generates a table of the
+shortest instruction sequence for implementing a shufflevector operation on
+PowerPC. However, this was designed for big-endian code generation. We could
+modify this program to create a little endian version of the table. The table
+is used in PPCISelLowering.cpp, PPCTargetLowering::LOWERVECTOR_SHUFFLE().
+
+//===----------------------------------------------------------------------===//
+
+Opportunies to use instructions from PPCInstrVSX.td during code gen
+ - Conversion instructions (Sections 7.6.1.5 and 7.6.1.6 of ISA 2.07)
+ - Scalar comparisons (xscmpodp and xscmpudp)
+ - Min and max (xsmaxdp, xsmindp, xvmaxdp, xvmindp, xvmaxsp, xvminsp)
+
+Related to this: we currently do not generate the lxvw4x instruction for either
+v4f32 or v4i32, probably because adding a dag pattern to the recognizer requires
+a single target type. This should probably be addressed in the PPCISelDAGToDAG logic.
+
+//===----------------------------------------------------------------------===//
+
+Currently EXTRACT_VECTOR_ELT and INSERT_VECTOR_ELT are type-legal only
+for v2f64 with VSX available. We should create custom lowering
+support for the other vector types. Without this support, we generate
+sequences with load-hit-store hazards.
+
+v4f32 can be supported with VSX by shifting the correct element into
+big-endian lane 0, using xscvspdpn to produce a double-precision
+representation of the single-precision value in big-endian
+double-precision lane 0, and reinterpreting lane 0 as an FPR or
+vector-scalar register.
+
+v2i64 can be supported with VSX and P8Vector in the same manner as
+v2f64, followed by a direct move to a GPR.
+
+v4i32 can be supported with VSX and P8Vector by shifting the correct
+element into big-endian lane 1, using a direct move to a GPR, and
+sign-extending the 32-bit result to 64 bits.
+
+v8i16 can be supported with VSX and P8Vector by shifting the correct
+element into big-endian lane 3, using a direct move to a GPR, and
+sign-extending the 16-bit result to 64 bits.
+
+v16i8 can be supported with VSX and P8Vector by shifting the correct
+element into big-endian lane 7, using a direct move to a GPR, and
+sign-extending the 8-bit result to 64 bits.
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/README_P9.txt b/contrib/libs/llvm12/lib/Target/PowerPC/README_P9.txt
index 79cb6cceca..c9984b7604 100644
--- a/contrib/libs/llvm12/lib/Target/PowerPC/README_P9.txt
+++ b/contrib/libs/llvm12/lib/Target/PowerPC/README_P9.txt
@@ -1,605 +1,605 @@
-//===- README_P9.txt - Notes for improving Power9 code gen ----------------===//
-
-TODO: Instructions Need Implement Instrinstics or Map to LLVM IR
-
-Altivec:
-- Vector Compare Not Equal (Zero):
- vcmpneb(.) vcmpneh(.) vcmpnew(.)
- vcmpnezb(.) vcmpnezh(.) vcmpnezw(.)
- . Same as other VCMP*, use VCMP/VCMPo form (support intrinsic)
-
-- Vector Extract Unsigned: vextractub vextractuh vextractuw vextractd
- . Don't use llvm extractelement because they have different semantics
- . Use instrinstics:
- (set v2i64:$vD, (int_ppc_altivec_vextractub v16i8:$vA, imm:$UIMM))
- (set v2i64:$vD, (int_ppc_altivec_vextractuh v8i16:$vA, imm:$UIMM))
- (set v2i64:$vD, (int_ppc_altivec_vextractuw v4i32:$vA, imm:$UIMM))
- (set v2i64:$vD, (int_ppc_altivec_vextractd v2i64:$vA, imm:$UIMM))
-
-- Vector Extract Unsigned Byte Left/Right-Indexed:
- vextublx vextubrx vextuhlx vextuhrx vextuwlx vextuwrx
- . Use instrinstics:
- // Left-Indexed
- (set i64:$rD, (int_ppc_altivec_vextublx i64:$rA, v16i8:$vB))
- (set i64:$rD, (int_ppc_altivec_vextuhlx i64:$rA, v8i16:$vB))
- (set i64:$rD, (int_ppc_altivec_vextuwlx i64:$rA, v4i32:$vB))
-
- // Right-Indexed
- (set i64:$rD, (int_ppc_altivec_vextubrx i64:$rA, v16i8:$vB))
- (set i64:$rD, (int_ppc_altivec_vextuhrx i64:$rA, v8i16:$vB))
- (set i64:$rD, (int_ppc_altivec_vextuwrx i64:$rA, v4i32:$vB))
-
-- Vector Insert Element Instructions: vinsertb vinsertd vinserth vinsertw
- (set v16i8:$vD, (int_ppc_altivec_vinsertb v16i8:$vA, imm:$UIMM))
- (set v8i16:$vD, (int_ppc_altivec_vinsertd v8i16:$vA, imm:$UIMM))
- (set v4i32:$vD, (int_ppc_altivec_vinserth v4i32:$vA, imm:$UIMM))
- (set v2i64:$vD, (int_ppc_altivec_vinsertw v2i64:$vA, imm:$UIMM))
-
-- Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD]:
- vclzlsbb vctzlsbb
- . Use intrinsic:
- (set i64:$rD, (int_ppc_altivec_vclzlsbb v16i8:$vB))
- (set i64:$rD, (int_ppc_altivec_vctzlsbb v16i8:$vB))
-
-- Vector Count Trailing Zeros: vctzb vctzh vctzw vctzd
- . Map to llvm cttz
- (set v16i8:$vD, (cttz v16i8:$vB)) // vctzb
- (set v8i16:$vD, (cttz v8i16:$vB)) // vctzh
- (set v4i32:$vD, (cttz v4i32:$vB)) // vctzw
- (set v2i64:$vD, (cttz v2i64:$vB)) // vctzd
-
-- Vector Extend Sign: vextsb2w vextsh2w vextsb2d vextsh2d vextsw2d
- . vextsb2w:
- (set v4i32:$vD, (sext v4i8:$vB))
-
- // PowerISA_V3.0:
- do i = 0 to 3
- VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].byte[3])
- end
-
- . vextsh2w:
- (set v4i32:$vD, (sext v4i16:$vB))
-
- // PowerISA_V3.0:
- do i = 0 to 3
- VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].hword[1])
- end
-
- . vextsb2d
- (set v2i64:$vD, (sext v2i8:$vB))
-
- // PowerISA_V3.0:
- do i = 0 to 1
- VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].byte[7])
- end
-
- . vextsh2d
- (set v2i64:$vD, (sext v2i16:$vB))
-
- // PowerISA_V3.0:
- do i = 0 to 1
- VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].hword[3])
- end
-
- . vextsw2d
- (set v2i64:$vD, (sext v2i32:$vB))
-
- // PowerISA_V3.0:
- do i = 0 to 1
- VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].word[1])
- end
-
-- Vector Integer Negate: vnegw vnegd
- . Map to llvm ineg
- (set v4i32:$rT, (ineg v4i32:$rA)) // vnegw
- (set v2i64:$rT, (ineg v2i64:$rA)) // vnegd
-
-- Vector Parity Byte: vprtybw vprtybd vprtybq
- . Use intrinsic:
- (set v4i32:$rD, (int_ppc_altivec_vprtybw v4i32:$vB))
- (set v2i64:$rD, (int_ppc_altivec_vprtybd v2i64:$vB))
- (set v1i128:$rD, (int_ppc_altivec_vprtybq v1i128:$vB))
-
-- Vector (Bit) Permute (Right-indexed):
- . vbpermd: Same as "vbpermq", use VX1_Int_Ty2:
- VX1_Int_Ty2<1484, "vbpermd", int_ppc_altivec_vbpermd, v2i64, v2i64>;
-
- . vpermr: use VA1a_Int_Ty3
- VA1a_Int_Ty3<59, "vpermr", int_ppc_altivec_vpermr, v16i8, v16i8, v16i8>;
-
-- Vector Rotate Left Mask/Mask-Insert: vrlwnm vrlwmi vrldnm vrldmi
- . Use intrinsic:
- VX1_Int_Ty<389, "vrlwnm", int_ppc_altivec_vrlwnm, v4i32>;
- VX1_Int_Ty<133, "vrlwmi", int_ppc_altivec_vrlwmi, v4i32>;
- VX1_Int_Ty<453, "vrldnm", int_ppc_altivec_vrldnm, v2i64>;
- VX1_Int_Ty<197, "vrldmi", int_ppc_altivec_vrldmi, v2i64>;
-
-- Vector Shift Left/Right: vslv vsrv
- . Use intrinsic, don't map to llvm shl and lshr, because they have different
- semantics, e.g. vslv:
-
- do i = 0 to 15
- sh ← VR[VRB].byte[i].bit[5:7]
- VR[VRT].byte[i] ← src.byte[i:i+1].bit[sh:sh+7]
- end
-
- VR[VRT].byte[i] is composed of 2 bytes from src.byte[i:i+1]
-
- . VX1_Int_Ty<1860, "vslv", int_ppc_altivec_vslv, v16i8>;
- VX1_Int_Ty<1796, "vsrv", int_ppc_altivec_vsrv, v16i8>;
-
-- Vector Multiply-by-10 (& Write Carry) Unsigned Quadword:
- vmul10uq vmul10cuq
- . Use intrinsic:
- VX1_Int_Ty<513, "vmul10uq", int_ppc_altivec_vmul10uq, v1i128>;
- VX1_Int_Ty< 1, "vmul10cuq", int_ppc_altivec_vmul10cuq, v1i128>;
-
-- Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword:
- vmul10euq vmul10ecuq
- . Use intrinsic:
- VX1_Int_Ty<577, "vmul10euq", int_ppc_altivec_vmul10euq, v1i128>;
- VX1_Int_Ty< 65, "vmul10ecuq", int_ppc_altivec_vmul10ecuq, v1i128>;
-
-- Decimal Convert From/to National/Zoned/Signed-QWord:
- bcdcfn. bcdcfz. bcdctn. bcdctz. bcdcfsq. bcdctsq.
- . Use instrinstics:
- (set v1i128:$vD, (int_ppc_altivec_bcdcfno v1i128:$vB, i1:$PS))
- (set v1i128:$vD, (int_ppc_altivec_bcdcfzo v1i128:$vB, i1:$PS))
- (set v1i128:$vD, (int_ppc_altivec_bcdctno v1i128:$vB))
- (set v1i128:$vD, (int_ppc_altivec_bcdctzo v1i128:$vB, i1:$PS))
- (set v1i128:$vD, (int_ppc_altivec_bcdcfsqo v1i128:$vB, i1:$PS))
- (set v1i128:$vD, (int_ppc_altivec_bcdctsqo v1i128:$vB))
-
-- Decimal Copy-Sign/Set-Sign: bcdcpsgn. bcdsetsgn.
- . Use instrinstics:
- (set v1i128:$vD, (int_ppc_altivec_bcdcpsgno v1i128:$vA, v1i128:$vB))
- (set v1i128:$vD, (int_ppc_altivec_bcdsetsgno v1i128:$vB, i1:$PS))
-
-- Decimal Shift/Unsigned-Shift/Shift-and-Round: bcds. bcdus. bcdsr.
- . Use instrinstics:
- (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS))
- (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB))
- (set v1i128:$vD, (int_ppc_altivec_bcdsro v1i128:$vA, v1i128:$vB, i1:$PS))
-
- . Note! Their VA is accessed only 1 byte, i.e. VA.byte[7]
-
-- Decimal (Unsigned) Truncate: bcdtrunc. bcdutrunc.
- . Use instrinstics:
- (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS))
- (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB))
-
- . Note! Their VA is accessed only 2 byte, i.e. VA.hword[3] (VA.bit[48:63])
-
-VSX:
-- QP Copy Sign: xscpsgnqp
- . Similar to xscpsgndp
- . (set f128:$vT, (fcopysign f128:$vB, f128:$vA)
-
-- QP Absolute/Negative-Absolute/Negate: xsabsqp xsnabsqp xsnegqp
- . Similar to xsabsdp/xsnabsdp/xsnegdp
- . (set f128:$vT, (fabs f128:$vB)) // xsabsqp
- (set f128:$vT, (fneg (fabs f128:$vB))) // xsnabsqp
- (set f128:$vT, (fneg f128:$vB)) // xsnegqp
-
-- QP Add/Divide/Multiply/Subtract/Square-Root:
- xsaddqp xsdivqp xsmulqp xssubqp xssqrtqp
- . Similar to xsadddp
- . isCommutable = 1
- (set f128:$vT, (fadd f128:$vA, f128:$vB)) // xsaddqp
- (set f128:$vT, (fmul f128:$vA, f128:$vB)) // xsmulqp
-
- . isCommutable = 0
- (set f128:$vT, (fdiv f128:$vA, f128:$vB)) // xsdivqp
- (set f128:$vT, (fsub f128:$vA, f128:$vB)) // xssubqp
- (set f128:$vT, (fsqrt f128:$vB))) // xssqrtqp
-
-- Round to Odd of QP Add/Divide/Multiply/Subtract/Square-Root:
- xsaddqpo xsdivqpo xsmulqpo xssubqpo xssqrtqpo
- . Similar to xsrsqrtedp??
- def XSRSQRTEDP : XX2Form<60, 74,
- (outs vsfrc:$XT), (ins vsfrc:$XB),
- "xsrsqrtedp $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
-
- . Define DAG Node in PPCInstrInfo.td:
- def PPCfaddrto: SDNode<"PPCISD::FADDRTO", SDTFPBinOp, []>;
- def PPCfdivrto: SDNode<"PPCISD::FDIVRTO", SDTFPBinOp, []>;
- def PPCfmulrto: SDNode<"PPCISD::FMULRTO", SDTFPBinOp, []>;
- def PPCfsubrto: SDNode<"PPCISD::FSUBRTO", SDTFPBinOp, []>;
- def PPCfsqrtrto: SDNode<"PPCISD::FSQRTRTO", SDTFPUnaryOp, []>;
-
- DAG patterns of each instruction (PPCInstrVSX.td):
- . isCommutable = 1
- (set f128:$vT, (PPCfaddrto f128:$vA, f128:$vB)) // xsaddqpo
- (set f128:$vT, (PPCfmulrto f128:$vA, f128:$vB)) // xsmulqpo
-
- . isCommutable = 0
- (set f128:$vT, (PPCfdivrto f128:$vA, f128:$vB)) // xsdivqpo
- (set f128:$vT, (PPCfsubrto f128:$vA, f128:$vB)) // xssubqpo
- (set f128:$vT, (PPCfsqrtrto f128:$vB)) // xssqrtqpo
-
-- QP (Negative) Multiply-{Add/Subtract}: xsmaddqp xsmsubqp xsnmaddqp xsnmsubqp
- . Ref: xsmaddadp/xsmsubadp/xsnmaddadp/xsnmsubadp
-
- . isCommutable = 1
- // xsmaddqp
- [(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>,
- RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
- AltVSXFMARel;
-
- // xsmsubqp
- [(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>,
- RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
- AltVSXFMARel;
-
- // xsnmaddqp
- [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>,
- RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
- AltVSXFMARel;
-
- // xsnmsubqp
- [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>,
- RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
- AltVSXFMARel;
-
-- Round to Odd of QP (Negative) Multiply-{Add/Subtract}:
- xsmaddqpo xsmsubqpo xsnmaddqpo xsnmsubqpo
- . Similar to xsrsqrtedp??
-
- . Define DAG Node in PPCInstrInfo.td:
- def PPCfmarto: SDNode<"PPCISD::FMARTO", SDTFPTernaryOp, []>;
-
- It looks like we only need to define "PPCfmarto" for these instructions,
- because according to PowerISA_V3.0, these instructions perform RTO on
- fma's result:
- xsmaddqp(o)
- v ← bfp_MULTIPLY_ADD(src1, src3, src2)
- rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)
- result ← bfp_CONVERT_TO_BFP128(rnd)
-
- xsmsubqp(o)
- v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2))
- rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)
- result ← bfp_CONVERT_TO_BFP128(rnd)
-
- xsnmaddqp(o)
- v ← bfp_MULTIPLY_ADD(src1,src3,src2)
- rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v))
- result ← bfp_CONVERT_TO_BFP128(rnd)
-
- xsnmsubqp(o)
- v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2))
- rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v))
- result ← bfp_CONVERT_TO_BFP128(rnd)
-
- DAG patterns of each instruction (PPCInstrVSX.td):
- . isCommutable = 1
- // xsmaddqpo
- [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, f128:$vTi))]>,
- RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
- AltVSXFMARel;
-
- // xsmsubqpo
- [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi)))]>,
- RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
- AltVSXFMARel;
-
- // xsnmaddqpo
- [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, f128:$vTi)))]>,
- RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
- AltVSXFMARel;
-
- // xsnmsubqpo
- [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi))))]>,
- RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
- AltVSXFMARel;
-
-- QP Compare Ordered/Unordered: xscmpoqp xscmpuqp
- . ref: XSCMPUDP
- def XSCMPUDP : XX3Form_1<60, 35,
- (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
- "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>;
-
- . No SDAG, intrinsic, builtin are required??
- Or llvm fcmp order/unorder compare??
-
-- DP/QP Compare Exponents: xscmpexpdp xscmpexpqp
- . No SDAG, intrinsic, builtin are required?
-
-- DP Compare ==, >=, >, !=: xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp
- . I checked existing instruction "XSCMPUDP". They are different in target
- register. "XSCMPUDP" write to CR field, xscmp*dp write to VSX register
-
- . Use instrinsic:
- (set i128:$XT, (int_ppc_vsx_xscmpeqdp f64:$XA, f64:$XB))
- (set i128:$XT, (int_ppc_vsx_xscmpgedp f64:$XA, f64:$XB))
- (set i128:$XT, (int_ppc_vsx_xscmpgtdp f64:$XA, f64:$XB))
- (set i128:$XT, (int_ppc_vsx_xscmpnedp f64:$XA, f64:$XB))
-
-- Vector Compare Not Equal: xvcmpnedp xvcmpnedp. xvcmpnesp xvcmpnesp.
- . Similar to xvcmpeqdp:
- defm XVCMPEQDP : XX3Form_Rcr<60, 99,
- "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare,
- int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>;
-
- . So we should use "XX3Form_Rcr" to implement instrinsic
-
-- Convert DP -> QP: xscvdpqp
- . Similar to XSCVDPSP:
- def XSCVDPSP : XX2Form<60, 265,
- (outs vsfrc:$XT), (ins vsfrc:$XB),
- "xscvdpsp $XT, $XB", IIC_VecFP, []>;
- . So, No SDAG, intrinsic, builtin are required??
-
-- Round & Convert QP -> DP (dword[1] is set to zero): xscvqpdp xscvqpdpo
- . Similar to XSCVDPSP
- . No SDAG, intrinsic, builtin are required??
-
-- Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero):
- xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz
- . According to PowerISA_V3.0, these are similar to "XSCVDPSXDS", "XSCVDPSXWS",
- "XSCVDPUXDS", "XSCVDPUXWS"
-
- . DAG patterns:
- (set f128:$XT, (PPCfctidz f128:$XB)) // xscvqpsdz
- (set f128:$XT, (PPCfctiwz f128:$XB)) // xscvqpswz
- (set f128:$XT, (PPCfctiduz f128:$XB)) // xscvqpudz
- (set f128:$XT, (PPCfctiwuz f128:$XB)) // xscvqpuwz
-
-- Convert (Un)Signed DWord -> QP: xscvsdqp xscvudqp
- . Similar to XSCVSXDSP
- . (set f128:$XT, (PPCfcfids f64:$XB)) // xscvsdqp
- (set f128:$XT, (PPCfcfidus f64:$XB)) // xscvudqp
-
-- (Round &) Convert DP <-> HP: xscvdphp xscvhpdp
- . Similar to XSCVDPSP
- . No SDAG, intrinsic, builtin are required??
-
-- Vector HP -> SP: xvcvhpsp xvcvsphp
- . Similar to XVCVDPSP:
- def XVCVDPSP : XX2Form<60, 393,
- (outs vsrc:$XT), (ins vsrc:$XB),
- "xvcvdpsp $XT, $XB", IIC_VecFP, []>;
- . No SDAG, intrinsic, builtin are required??
-
-- Round to Quad-Precision Integer: xsrqpi xsrqpix
- . These are combination of "XSRDPI", "XSRDPIC", "XSRDPIM", .., because you
- need to assign rounding mode in instruction
- . Provide builtin?
- (set f128:$vT, (int_ppc_vsx_xsrqpi f128:$vB))
- (set f128:$vT, (int_ppc_vsx_xsrqpix f128:$vB))
-
-- Round Quad-Precision to Double-Extended Precision (fp80): xsrqpxp
- . Provide builtin?
- (set f128:$vT, (int_ppc_vsx_xsrqpxp f128:$vB))
-
-Fixed Point Facility:
-
-- Exploit cmprb and cmpeqb (perhaps for something like
- isalpha/isdigit/isupper/islower and isspace respectivelly). This can
- perhaps be done through a builtin.
-
-- Provide testing for cnttz[dw]
-- Insert Exponent DP/QP: xsiexpdp xsiexpqp
- . Use intrinsic?
- . xsiexpdp:
- // Note: rA and rB are the unsigned integer value.
- (set f128:$XT, (int_ppc_vsx_xsiexpdp i64:$rA, i64:$rB))
-
- . xsiexpqp:
- (set f128:$vT, (int_ppc_vsx_xsiexpqp f128:$vA, f64:$vB))
-
-- Extract Exponent/Significand DP/QP: xsxexpdp xsxsigdp xsxexpqp xsxsigqp
- . Use intrinsic?
- . (set i64:$rT, (int_ppc_vsx_xsxexpdp f64$XB)) // xsxexpdp
- (set i64:$rT, (int_ppc_vsx_xsxsigdp f64$XB)) // xsxsigdp
- (set f128:$vT, (int_ppc_vsx_xsxexpqp f128$vB)) // xsxexpqp
- (set f128:$vT, (int_ppc_vsx_xsxsigqp f128$vB)) // xsxsigqp
-
-- Vector Insert Word: xxinsertw
- - Useful for inserting f32/i32 elements into vectors (the element to be
- inserted needs to be prepared)
- . Note: llvm has insertelem in "Vector Operations"
- ; yields <n x <ty>>
- <result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx>
-
- But how to map to it??
- [(set v1f128:$XT, (insertelement v1f128:$XTi, f128:$XB, i4:$UIMM))]>,
- RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
-
- . Or use intrinsic?
- (set v1f128:$XT, (int_ppc_vsx_xxinsertw v1f128:$XTi, f128:$XB, i4:$UIMM))
-
-- Vector Extract Unsigned Word: xxextractuw
- - Not useful for extraction of f32 from v4f32 (the current pattern is better -
- shift->convert)
- - It is useful for (uint_to_fp (vector_extract v4i32, N))
- - Unfortunately, it can't be used for (sint_to_fp (vector_extract v4i32, N))
- . Note: llvm has extractelement in "Vector Operations"
- ; yields <ty>
- <result> = extractelement <n x <ty>> <val>, <ty2> <idx>
-
- How to map to it??
- [(set f128:$XT, (extractelement v1f128:$XB, i4:$UIMM))]
-
- . Or use intrinsic?
- (set f128:$XT, (int_ppc_vsx_xxextractuw v1f128:$XB, i4:$UIMM))
-
-- Vector Insert Exponent DP/SP: xviexpdp xviexpsp
- . Use intrinsic
- (set v2f64:$XT, (int_ppc_vsx_xviexpdp v2f64:$XA, v2f64:$XB))
- (set v4f32:$XT, (int_ppc_vsx_xviexpsp v4f32:$XA, v4f32:$XB))
-
-- Vector Extract Exponent/Significand DP/SP: xvxexpdp xvxexpsp xvxsigdp xvxsigsp
- . Use intrinsic
- (set v2f64:$XT, (int_ppc_vsx_xvxexpdp v2f64:$XB))
- (set v4f32:$XT, (int_ppc_vsx_xvxexpsp v4f32:$XB))
- (set v2f64:$XT, (int_ppc_vsx_xvxsigdp v2f64:$XB))
- (set v4f32:$XT, (int_ppc_vsx_xvxsigsp v4f32:$XB))
-
-- Test Data Class SP/DP/QP: xststdcsp xststdcdp xststdcqp
- . No SDAG, intrinsic, builtin are required?
- Because it seems that we have no way to map BF field?
-
- Instruction Form: [PO T XO B XO BX TX]
- Asm: xststd* BF,XB,DCMX
-
- BF is an index to CR register field.
-
-- Vector Test Data Class SP/DP: xvtstdcsp xvtstdcdp
- . Use intrinsic
- (set v4f32:$XT, (int_ppc_vsx_xvtstdcsp v4f32:$XB, i7:$DCMX))
- (set v2f64:$XT, (int_ppc_vsx_xvtstdcdp v2f64:$XB, i7:$DCMX))
-
-- Maximum/Minimum Type-C/Type-J DP: xsmaxcdp xsmaxjdp xsmincdp xsminjdp
- . PowerISA_V3.0:
- "xsmaxcdp can be used to implement the C/C++/Java conditional operation
- (x>y)?x:y for single-precision and double-precision arguments."
-
- Note! c type and j type have different behavior when:
- 1. Either input is NaN
- 2. Both input are +-Infinity, +-Zero
-
- . dtype map to llvm fmaxnum/fminnum
- jtype use intrinsic
-
- . xsmaxcdp xsmincdp
- (set f64:$XT, (fmaxnum f64:$XA, f64:$XB))
- (set f64:$XT, (fminnum f64:$XA, f64:$XB))
-
- . xsmaxjdp xsminjdp
- (set f64:$XT, (int_ppc_vsx_xsmaxjdp f64:$XA, f64:$XB))
- (set f64:$XT, (int_ppc_vsx_xsminjdp f64:$XA, f64:$XB))
-
-- Vector Byte-Reverse H/W/D/Q Word: xxbrh xxbrw xxbrd xxbrq
- . Use intrinsic
- (set v8i16:$XT, (int_ppc_vsx_xxbrh v8i16:$XB))
- (set v4i32:$XT, (int_ppc_vsx_xxbrw v4i32:$XB))
- (set v2i64:$XT, (int_ppc_vsx_xxbrd v2i64:$XB))
- (set v1i128:$XT, (int_ppc_vsx_xxbrq v1i128:$XB))
-
-- Vector Permute: xxperm xxpermr
- . I have checked "PPCxxswapd" in PPCInstrVSX.td, but they are different
- . Use intrinsic
- (set v16i8:$XT, (int_ppc_vsx_xxperm v16i8:$XA, v16i8:$XB))
- (set v16i8:$XT, (int_ppc_vsx_xxpermr v16i8:$XA, v16i8:$XB))
-
-- Vector Splat Immediate Byte: xxspltib
- . Similar to XXSPLTW:
- def XXSPLTW : XX2Form_2<60, 164,
- (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
- "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
-
- . No SDAG, intrinsic, builtin are required?
-
-- Load/Store Vector: lxv stxv
- . Has likely SDAG match:
- (set v?:$XT, (load ix16addr:$src))
- (set v?:$XT, (store ix16addr:$dst))
-
- . Need define ix16addr in PPCInstrInfo.td
- ix16addr: 16-byte aligned, see "def memrix16" in PPCInstrInfo.td
-
-- Load/Store Vector Indexed: lxvx stxvx
- . Has likely SDAG match:
- (set v?:$XT, (load xoaddr:$src))
- (set v?:$XT, (store xoaddr:$dst))
-
-- Load/Store DWord: lxsd stxsd
- . Similar to lxsdx/stxsdx:
- def LXSDX : XX1Form<31, 588,
- (outs vsfrc:$XT), (ins memrr:$src),
- "lxsdx $XT, $src", IIC_LdStLFD,
- [(set f64:$XT, (load xoaddr:$src))]>;
-
- . (set f64:$XT, (load iaddrX4:$src))
- (set f64:$XT, (store iaddrX4:$dst))
-
-- Load/Store SP, with conversion from/to DP: lxssp stxssp
- . Similar to lxsspx/stxsspx:
- def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
- "lxsspx $XT, $src", IIC_LdStLFD,
- [(set f32:$XT, (load xoaddr:$src))]>;
-
- . (set f32:$XT, (load iaddrX4:$src))
- (set f32:$XT, (store iaddrX4:$dst))
-
-- Load as Integer Byte/Halfword & Zero Indexed: lxsibzx lxsihzx
- . Similar to lxsiwzx:
- def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
- "lxsiwzx $XT, $src", IIC_LdStLFD,
- [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
-
- . (set f64:$XT, (PPClfiwzx xoaddr:$src))
-
-- Store as Integer Byte/Halfword Indexed: stxsibx stxsihx
- . Similar to stxsiwx:
- def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
- "stxsiwx $XT, $dst", IIC_LdStSTFD,
- [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
-
- . (PPCstfiwx f64:$XT, xoaddr:$dst)
-
-- Load Vector Halfword*8/Byte*16 Indexed: lxvh8x lxvb16x
- . Similar to lxvd2x/lxvw4x:
- def LXVD2X : XX1Form<31, 844,
- (outs vsrc:$XT), (ins memrr:$src),
- "lxvd2x $XT, $src", IIC_LdStLFD,
- [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>;
-
- . (set v8i16:$XT, (int_ppc_vsx_lxvh8x xoaddr:$src))
- (set v16i8:$XT, (int_ppc_vsx_lxvb16x xoaddr:$src))
-
-- Store Vector Halfword*8/Byte*16 Indexed: stxvh8x stxvb16x
- . Similar to stxvd2x/stxvw4x:
- def STXVD2X : XX1Form<31, 972,
- (outs), (ins vsrc:$XT, memrr:$dst),
- "stxvd2x $XT, $dst", IIC_LdStSTFD,
- [(store v2f64:$XT, xoaddr:$dst)]>;
-
- . (store v8i16:$XT, xoaddr:$dst)
- (store v16i8:$XT, xoaddr:$dst)
-
-- Load/Store Vector (Left-justified) with Length: lxvl lxvll stxvl stxvll
- . Likely needs an intrinsic
- . (set v?:$XT, (int_ppc_vsx_lxvl xoaddr:$src))
- (set v?:$XT, (int_ppc_vsx_lxvll xoaddr:$src))
-
- . (int_ppc_vsx_stxvl xoaddr:$dst))
- (int_ppc_vsx_stxvll xoaddr:$dst))
-
-- Load Vector Word & Splat Indexed: lxvwsx
- . Likely needs an intrinsic
- . (set v?:$XT, (int_ppc_vsx_lxvwsx xoaddr:$src))
-
-Atomic operations (l[dw]at, st[dw]at):
-- Provide custom lowering for common atomic operations to use these
- instructions with the correct Function Code
-- Ensure the operands are in the correct register (i.e. RT+1, RT+2)
-- Provide builtins since not all FC's necessarily have an existing LLVM
- atomic operation
-
-Load Doubleword Monitored (ldmx):
-- Investigate whether there are any uses for this. It seems to be related to
- Garbage Collection so it isn't likely to be all that useful for most
- languages we deal with.
-
-Move to CR from XER Extended (mcrxrx):
-- Is there a use for this in LLVM?
-
-Fixed Point Facility:
-
-- Copy-Paste Facility: copy copy_first cp_abort paste paste. paste_last
- . Use instrinstics:
- (int_ppc_copy_first i32:$rA, i32:$rB)
- (int_ppc_copy i32:$rA, i32:$rB)
-
- (int_ppc_paste i32:$rA, i32:$rB)
- (int_ppc_paste_last i32:$rA, i32:$rB)
-
- (int_cp_abort)
-
-- Message Synchronize: msgsync
-- SLB*: slbieg slbsync
-- stop
- . No instrinstics
+//===- README_P9.txt - Notes for improving Power9 code gen ----------------===//
+
+TODO: Instructions Need Implement Instrinstics or Map to LLVM IR
+
+Altivec:
+- Vector Compare Not Equal (Zero):
+ vcmpneb(.) vcmpneh(.) vcmpnew(.)
+ vcmpnezb(.) vcmpnezh(.) vcmpnezw(.)
+ . Same as other VCMP*, use VCMP/VCMPo form (support intrinsic)
+
+- Vector Extract Unsigned: vextractub vextractuh vextractuw vextractd
+ . Don't use llvm extractelement because they have different semantics
+ . Use instrinstics:
+ (set v2i64:$vD, (int_ppc_altivec_vextractub v16i8:$vA, imm:$UIMM))
+ (set v2i64:$vD, (int_ppc_altivec_vextractuh v8i16:$vA, imm:$UIMM))
+ (set v2i64:$vD, (int_ppc_altivec_vextractuw v4i32:$vA, imm:$UIMM))
+ (set v2i64:$vD, (int_ppc_altivec_vextractd v2i64:$vA, imm:$UIMM))
+
+- Vector Extract Unsigned Byte Left/Right-Indexed:
+ vextublx vextubrx vextuhlx vextuhrx vextuwlx vextuwrx
+ . Use instrinstics:
+ // Left-Indexed
+ (set i64:$rD, (int_ppc_altivec_vextublx i64:$rA, v16i8:$vB))
+ (set i64:$rD, (int_ppc_altivec_vextuhlx i64:$rA, v8i16:$vB))
+ (set i64:$rD, (int_ppc_altivec_vextuwlx i64:$rA, v4i32:$vB))
+
+ // Right-Indexed
+ (set i64:$rD, (int_ppc_altivec_vextubrx i64:$rA, v16i8:$vB))
+ (set i64:$rD, (int_ppc_altivec_vextuhrx i64:$rA, v8i16:$vB))
+ (set i64:$rD, (int_ppc_altivec_vextuwrx i64:$rA, v4i32:$vB))
+
+- Vector Insert Element Instructions: vinsertb vinsertd vinserth vinsertw
+ (set v16i8:$vD, (int_ppc_altivec_vinsertb v16i8:$vA, imm:$UIMM))
+ (set v8i16:$vD, (int_ppc_altivec_vinsertd v8i16:$vA, imm:$UIMM))
+ (set v4i32:$vD, (int_ppc_altivec_vinserth v4i32:$vA, imm:$UIMM))
+ (set v2i64:$vD, (int_ppc_altivec_vinsertw v2i64:$vA, imm:$UIMM))
+
+- Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD]:
+ vclzlsbb vctzlsbb
+ . Use intrinsic:
+ (set i64:$rD, (int_ppc_altivec_vclzlsbb v16i8:$vB))
+ (set i64:$rD, (int_ppc_altivec_vctzlsbb v16i8:$vB))
+
+- Vector Count Trailing Zeros: vctzb vctzh vctzw vctzd
+ . Map to llvm cttz
+ (set v16i8:$vD, (cttz v16i8:$vB)) // vctzb
+ (set v8i16:$vD, (cttz v8i16:$vB)) // vctzh
+ (set v4i32:$vD, (cttz v4i32:$vB)) // vctzw
+ (set v2i64:$vD, (cttz v2i64:$vB)) // vctzd
+
+- Vector Extend Sign: vextsb2w vextsh2w vextsb2d vextsh2d vextsw2d
+ . vextsb2w:
+ (set v4i32:$vD, (sext v4i8:$vB))
+
+ // PowerISA_V3.0:
+ do i = 0 to 3
+ VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].byte[3])
+ end
+
+ . vextsh2w:
+ (set v4i32:$vD, (sext v4i16:$vB))
+
+ // PowerISA_V3.0:
+ do i = 0 to 3
+ VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].hword[1])
+ end
+
+ . vextsb2d
+ (set v2i64:$vD, (sext v2i8:$vB))
+
+ // PowerISA_V3.0:
+ do i = 0 to 1
+ VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].byte[7])
+ end
+
+ . vextsh2d
+ (set v2i64:$vD, (sext v2i16:$vB))
+
+ // PowerISA_V3.0:
+ do i = 0 to 1
+ VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].hword[3])
+ end
+
+ . vextsw2d
+ (set v2i64:$vD, (sext v2i32:$vB))
+
+ // PowerISA_V3.0:
+ do i = 0 to 1
+ VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].word[1])
+ end
+
+- Vector Integer Negate: vnegw vnegd
+ . Map to llvm ineg
+ (set v4i32:$rT, (ineg v4i32:$rA)) // vnegw
+ (set v2i64:$rT, (ineg v2i64:$rA)) // vnegd
+
+- Vector Parity Byte: vprtybw vprtybd vprtybq
+ . Use intrinsic:
+ (set v4i32:$rD, (int_ppc_altivec_vprtybw v4i32:$vB))
+ (set v2i64:$rD, (int_ppc_altivec_vprtybd v2i64:$vB))
+ (set v1i128:$rD, (int_ppc_altivec_vprtybq v1i128:$vB))
+
+- Vector (Bit) Permute (Right-indexed):
+ . vbpermd: Same as "vbpermq", use VX1_Int_Ty2:
+ VX1_Int_Ty2<1484, "vbpermd", int_ppc_altivec_vbpermd, v2i64, v2i64>;
+
+ . vpermr: use VA1a_Int_Ty3
+ VA1a_Int_Ty3<59, "vpermr", int_ppc_altivec_vpermr, v16i8, v16i8, v16i8>;
+
+- Vector Rotate Left Mask/Mask-Insert: vrlwnm vrlwmi vrldnm vrldmi
+ . Use intrinsic:
+ VX1_Int_Ty<389, "vrlwnm", int_ppc_altivec_vrlwnm, v4i32>;
+ VX1_Int_Ty<133, "vrlwmi", int_ppc_altivec_vrlwmi, v4i32>;
+ VX1_Int_Ty<453, "vrldnm", int_ppc_altivec_vrldnm, v2i64>;
+ VX1_Int_Ty<197, "vrldmi", int_ppc_altivec_vrldmi, v2i64>;
+
+- Vector Shift Left/Right: vslv vsrv
+ . Use intrinsic, don't map to llvm shl and lshr, because they have different
+ semantics, e.g. vslv:
+
+ do i = 0 to 15
+ sh ← VR[VRB].byte[i].bit[5:7]
+ VR[VRT].byte[i] ← src.byte[i:i+1].bit[sh:sh+7]
+ end
+
+ VR[VRT].byte[i] is composed of 2 bytes from src.byte[i:i+1]
+
+ . VX1_Int_Ty<1860, "vslv", int_ppc_altivec_vslv, v16i8>;
+ VX1_Int_Ty<1796, "vsrv", int_ppc_altivec_vsrv, v16i8>;
+
+- Vector Multiply-by-10 (& Write Carry) Unsigned Quadword:
+ vmul10uq vmul10cuq
+ . Use intrinsic:
+ VX1_Int_Ty<513, "vmul10uq", int_ppc_altivec_vmul10uq, v1i128>;
+ VX1_Int_Ty< 1, "vmul10cuq", int_ppc_altivec_vmul10cuq, v1i128>;
+
+- Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword:
+ vmul10euq vmul10ecuq
+ . Use intrinsic:
+ VX1_Int_Ty<577, "vmul10euq", int_ppc_altivec_vmul10euq, v1i128>;
+ VX1_Int_Ty< 65, "vmul10ecuq", int_ppc_altivec_vmul10ecuq, v1i128>;
+
+- Decimal Convert From/to National/Zoned/Signed-QWord:
+ bcdcfn. bcdcfz. bcdctn. bcdctz. bcdcfsq. bcdctsq.
+ . Use instrinstics:
+ (set v1i128:$vD, (int_ppc_altivec_bcdcfno v1i128:$vB, i1:$PS))
+ (set v1i128:$vD, (int_ppc_altivec_bcdcfzo v1i128:$vB, i1:$PS))
+ (set v1i128:$vD, (int_ppc_altivec_bcdctno v1i128:$vB))
+ (set v1i128:$vD, (int_ppc_altivec_bcdctzo v1i128:$vB, i1:$PS))
+ (set v1i128:$vD, (int_ppc_altivec_bcdcfsqo v1i128:$vB, i1:$PS))
+ (set v1i128:$vD, (int_ppc_altivec_bcdctsqo v1i128:$vB))
+
+- Decimal Copy-Sign/Set-Sign: bcdcpsgn. bcdsetsgn.
+ . Use instrinstics:
+ (set v1i128:$vD, (int_ppc_altivec_bcdcpsgno v1i128:$vA, v1i128:$vB))
+ (set v1i128:$vD, (int_ppc_altivec_bcdsetsgno v1i128:$vB, i1:$PS))
+
+- Decimal Shift/Unsigned-Shift/Shift-and-Round: bcds. bcdus. bcdsr.
+ . Use instrinstics:
+ (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS))
+ (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB))
+ (set v1i128:$vD, (int_ppc_altivec_bcdsro v1i128:$vA, v1i128:$vB, i1:$PS))
+
+ . Note! Their VA is accessed only 1 byte, i.e. VA.byte[7]
+
+- Decimal (Unsigned) Truncate: bcdtrunc. bcdutrunc.
+ . Use instrinstics:
+ (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS))
+ (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB))
+
+ . Note! Their VA is accessed only 2 byte, i.e. VA.hword[3] (VA.bit[48:63])
+
+VSX:
+- QP Copy Sign: xscpsgnqp
+ . Similar to xscpsgndp
+ . (set f128:$vT, (fcopysign f128:$vB, f128:$vA)
+
+- QP Absolute/Negative-Absolute/Negate: xsabsqp xsnabsqp xsnegqp
+ . Similar to xsabsdp/xsnabsdp/xsnegdp
+ . (set f128:$vT, (fabs f128:$vB)) // xsabsqp
+ (set f128:$vT, (fneg (fabs f128:$vB))) // xsnabsqp
+ (set f128:$vT, (fneg f128:$vB)) // xsnegqp
+
+- QP Add/Divide/Multiply/Subtract/Square-Root:
+ xsaddqp xsdivqp xsmulqp xssubqp xssqrtqp
+ . Similar to xsadddp
+ . isCommutable = 1
+ (set f128:$vT, (fadd f128:$vA, f128:$vB)) // xsaddqp
+ (set f128:$vT, (fmul f128:$vA, f128:$vB)) // xsmulqp
+
+ . isCommutable = 0
+ (set f128:$vT, (fdiv f128:$vA, f128:$vB)) // xsdivqp
+ (set f128:$vT, (fsub f128:$vA, f128:$vB)) // xssubqp
+ (set f128:$vT, (fsqrt f128:$vB))) // xssqrtqp
+
+- Round to Odd of QP Add/Divide/Multiply/Subtract/Square-Root:
+ xsaddqpo xsdivqpo xsmulqpo xssubqpo xssqrtqpo
+ . Similar to xsrsqrtedp??
+ def XSRSQRTEDP : XX2Form<60, 74,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrsqrtedp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
+
+ . Define DAG Node in PPCInstrInfo.td:
+ def PPCfaddrto: SDNode<"PPCISD::FADDRTO", SDTFPBinOp, []>;
+ def PPCfdivrto: SDNode<"PPCISD::FDIVRTO", SDTFPBinOp, []>;
+ def PPCfmulrto: SDNode<"PPCISD::FMULRTO", SDTFPBinOp, []>;
+ def PPCfsubrto: SDNode<"PPCISD::FSUBRTO", SDTFPBinOp, []>;
+ def PPCfsqrtrto: SDNode<"PPCISD::FSQRTRTO", SDTFPUnaryOp, []>;
+
+ DAG patterns of each instruction (PPCInstrVSX.td):
+ . isCommutable = 1
+ (set f128:$vT, (PPCfaddrto f128:$vA, f128:$vB)) // xsaddqpo
+ (set f128:$vT, (PPCfmulrto f128:$vA, f128:$vB)) // xsmulqpo
+
+ . isCommutable = 0
+ (set f128:$vT, (PPCfdivrto f128:$vA, f128:$vB)) // xsdivqpo
+ (set f128:$vT, (PPCfsubrto f128:$vA, f128:$vB)) // xssubqpo
+ (set f128:$vT, (PPCfsqrtrto f128:$vB)) // xssqrtqpo
+
+- QP (Negative) Multiply-{Add/Subtract}: xsmaddqp xsmsubqp xsnmaddqp xsnmsubqp
+ . Ref: xsmaddadp/xsmsubadp/xsnmaddadp/xsnmsubadp
+
+ . isCommutable = 1
+ // xsmaddqp
+ [(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>,
+ RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+ AltVSXFMARel;
+
+ // xsmsubqp
+ [(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>,
+ RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+ AltVSXFMARel;
+
+ // xsnmaddqp
+ [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>,
+ RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+ AltVSXFMARel;
+
+ // xsnmsubqp
+ [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>,
+ RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+ AltVSXFMARel;
+
+- Round to Odd of QP (Negative) Multiply-{Add/Subtract}:
+ xsmaddqpo xsmsubqpo xsnmaddqpo xsnmsubqpo
+ . Similar to xsrsqrtedp??
+
+ . Define DAG Node in PPCInstrInfo.td:
+ def PPCfmarto: SDNode<"PPCISD::FMARTO", SDTFPTernaryOp, []>;
+
+ It looks like we only need to define "PPCfmarto" for these instructions,
+ because according to PowerISA_V3.0, these instructions perform RTO on
+ fma's result:
+ xsmaddqp(o)
+ v ← bfp_MULTIPLY_ADD(src1, src3, src2)
+ rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)
+ result ← bfp_CONVERT_TO_BFP128(rnd)
+
+ xsmsubqp(o)
+ v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2))
+ rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)
+ result ← bfp_CONVERT_TO_BFP128(rnd)
+
+ xsnmaddqp(o)
+ v ← bfp_MULTIPLY_ADD(src1,src3,src2)
+ rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v))
+ result ← bfp_CONVERT_TO_BFP128(rnd)
+
+ xsnmsubqp(o)
+ v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2))
+ rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v))
+ result ← bfp_CONVERT_TO_BFP128(rnd)
+
+ DAG patterns of each instruction (PPCInstrVSX.td):
+ . isCommutable = 1
+ // xsmaddqpo
+ [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, f128:$vTi))]>,
+ RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+ AltVSXFMARel;
+
+ // xsmsubqpo
+ [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi)))]>,
+ RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+ AltVSXFMARel;
+
+ // xsnmaddqpo
+ [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, f128:$vTi)))]>,
+ RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+ AltVSXFMARel;
+
+ // xsnmsubqpo
+ [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi))))]>,
+ RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+ AltVSXFMARel;
+
+- QP Compare Ordered/Unordered: xscmpoqp xscmpuqp
+ . ref: XSCMPUDP
+ def XSCMPUDP : XX3Form_1<60, 35,
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>;
+
+ . No SDAG, intrinsic, builtin are required??
+ Or llvm fcmp order/unorder compare??
+
+- DP/QP Compare Exponents: xscmpexpdp xscmpexpqp
+ . No SDAG, intrinsic, builtin are required?
+
+- DP Compare ==, >=, >, !=: xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp
+ . I checked existing instruction "XSCMPUDP". They are different in target
+ register. "XSCMPUDP" write to CR field, xscmp*dp write to VSX register
+
+ . Use instrinsic:
+ (set i128:$XT, (int_ppc_vsx_xscmpeqdp f64:$XA, f64:$XB))
+ (set i128:$XT, (int_ppc_vsx_xscmpgedp f64:$XA, f64:$XB))
+ (set i128:$XT, (int_ppc_vsx_xscmpgtdp f64:$XA, f64:$XB))
+ (set i128:$XT, (int_ppc_vsx_xscmpnedp f64:$XA, f64:$XB))
+
+- Vector Compare Not Equal: xvcmpnedp xvcmpnedp. xvcmpnesp xvcmpnesp.
+ . Similar to xvcmpeqdp:
+ defm XVCMPEQDP : XX3Form_Rcr<60, 99,
+ "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare,
+ int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>;
+
+ . So we should use "XX3Form_Rcr" to implement instrinsic
+
+- Convert DP -> QP: xscvdpqp
+ . Similar to XSCVDPSP:
+ def XSCVDPSP : XX2Form<60, 265,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpsp $XT, $XB", IIC_VecFP, []>;
+ . So, No SDAG, intrinsic, builtin are required??
+
+- Round & Convert QP -> DP (dword[1] is set to zero): xscvqpdp xscvqpdpo
+ . Similar to XSCVDPSP
+ . No SDAG, intrinsic, builtin are required??
+
+- Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero):
+ xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz
+ . According to PowerISA_V3.0, these are similar to "XSCVDPSXDS", "XSCVDPSXWS",
+ "XSCVDPUXDS", "XSCVDPUXWS"
+
+ . DAG patterns:
+ (set f128:$XT, (PPCfctidz f128:$XB)) // xscvqpsdz
+ (set f128:$XT, (PPCfctiwz f128:$XB)) // xscvqpswz
+ (set f128:$XT, (PPCfctiduz f128:$XB)) // xscvqpudz
+ (set f128:$XT, (PPCfctiwuz f128:$XB)) // xscvqpuwz
+
+- Convert (Un)Signed DWord -> QP: xscvsdqp xscvudqp
+ . Similar to XSCVSXDSP
+ . (set f128:$XT, (PPCfcfids f64:$XB)) // xscvsdqp
+ (set f128:$XT, (PPCfcfidus f64:$XB)) // xscvudqp
+
+- (Round &) Convert DP <-> HP: xscvdphp xscvhpdp
+ . Similar to XSCVDPSP
+ . No SDAG, intrinsic, builtin are required??
+
+- Vector HP -> SP: xvcvhpsp xvcvsphp
+ . Similar to XVCVDPSP:
+ def XVCVDPSP : XX2Form<60, 393,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsp $XT, $XB", IIC_VecFP, []>;
+ . No SDAG, intrinsic, builtin are required??
+
+- Round to Quad-Precision Integer: xsrqpi xsrqpix
+ . These are combination of "XSRDPI", "XSRDPIC", "XSRDPIM", .., because you
+ need to assign rounding mode in instruction
+ . Provide builtin?
+ (set f128:$vT, (int_ppc_vsx_xsrqpi f128:$vB))
+ (set f128:$vT, (int_ppc_vsx_xsrqpix f128:$vB))
+
+- Round Quad-Precision to Double-Extended Precision (fp80): xsrqpxp
+ . Provide builtin?
+ (set f128:$vT, (int_ppc_vsx_xsrqpxp f128:$vB))
+
+Fixed Point Facility:
+
+- Exploit cmprb and cmpeqb (perhaps for something like
+ isalpha/isdigit/isupper/islower and isspace respectivelly). This can
+ perhaps be done through a builtin.
+
+- Provide testing for cnttz[dw]
+- Insert Exponent DP/QP: xsiexpdp xsiexpqp
+ . Use intrinsic?
+ . xsiexpdp:
+ // Note: rA and rB are the unsigned integer value.
+ (set f128:$XT, (int_ppc_vsx_xsiexpdp i64:$rA, i64:$rB))
+
+ . xsiexpqp:
+ (set f128:$vT, (int_ppc_vsx_xsiexpqp f128:$vA, f64:$vB))
+
+- Extract Exponent/Significand DP/QP: xsxexpdp xsxsigdp xsxexpqp xsxsigqp
+ . Use intrinsic?
+ . (set i64:$rT, (int_ppc_vsx_xsxexpdp f64$XB)) // xsxexpdp
+ (set i64:$rT, (int_ppc_vsx_xsxsigdp f64$XB)) // xsxsigdp
+ (set f128:$vT, (int_ppc_vsx_xsxexpqp f128$vB)) // xsxexpqp
+ (set f128:$vT, (int_ppc_vsx_xsxsigqp f128$vB)) // xsxsigqp
+
+- Vector Insert Word: xxinsertw
+ - Useful for inserting f32/i32 elements into vectors (the element to be
+ inserted needs to be prepared)
+ . Note: llvm has insertelem in "Vector Operations"
+ ; yields <n x <ty>>
+ <result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx>
+
+ But how to map to it??
+ [(set v1f128:$XT, (insertelement v1f128:$XTi, f128:$XB, i4:$UIMM))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+
+ . Or use intrinsic?
+ (set v1f128:$XT, (int_ppc_vsx_xxinsertw v1f128:$XTi, f128:$XB, i4:$UIMM))
+
+- Vector Extract Unsigned Word: xxextractuw
+ - Not useful for extraction of f32 from v4f32 (the current pattern is better -
+ shift->convert)
+ - It is useful for (uint_to_fp (vector_extract v4i32, N))
+ - Unfortunately, it can't be used for (sint_to_fp (vector_extract v4i32, N))
+ . Note: llvm has extractelement in "Vector Operations"
+ ; yields <ty>
+ <result> = extractelement <n x <ty>> <val>, <ty2> <idx>
+
+ How to map to it??
+ [(set f128:$XT, (extractelement v1f128:$XB, i4:$UIMM))]
+
+ . Or use intrinsic?
+ (set f128:$XT, (int_ppc_vsx_xxextractuw v1f128:$XB, i4:$UIMM))
+
+- Vector Insert Exponent DP/SP: xviexpdp xviexpsp
+ . Use intrinsic
+ (set v2f64:$XT, (int_ppc_vsx_xviexpdp v2f64:$XA, v2f64:$XB))
+ (set v4f32:$XT, (int_ppc_vsx_xviexpsp v4f32:$XA, v4f32:$XB))
+
+- Vector Extract Exponent/Significand DP/SP: xvxexpdp xvxexpsp xvxsigdp xvxsigsp
+ . Use intrinsic
+ (set v2f64:$XT, (int_ppc_vsx_xvxexpdp v2f64:$XB))
+ (set v4f32:$XT, (int_ppc_vsx_xvxexpsp v4f32:$XB))
+ (set v2f64:$XT, (int_ppc_vsx_xvxsigdp v2f64:$XB))
+ (set v4f32:$XT, (int_ppc_vsx_xvxsigsp v4f32:$XB))
+
+- Test Data Class SP/DP/QP: xststdcsp xststdcdp xststdcqp
+ . No SDAG, intrinsic, builtin are required?
+ Because it seems that we have no way to map BF field?
+
+ Instruction Form: [PO T XO B XO BX TX]
+ Asm: xststd* BF,XB,DCMX
+
+ BF is an index to CR register field.
+
+- Vector Test Data Class SP/DP: xvtstdcsp xvtstdcdp
+ . Use intrinsic
+ (set v4f32:$XT, (int_ppc_vsx_xvtstdcsp v4f32:$XB, i7:$DCMX))
+ (set v2f64:$XT, (int_ppc_vsx_xvtstdcdp v2f64:$XB, i7:$DCMX))
+
+- Maximum/Minimum Type-C/Type-J DP: xsmaxcdp xsmaxjdp xsmincdp xsminjdp
+ . PowerISA_V3.0:
+ "xsmaxcdp can be used to implement the C/C++/Java conditional operation
+ (x>y)?x:y for single-precision and double-precision arguments."
+
+ Note! c type and j type have different behavior when:
+ 1. Either input is NaN
+ 2. Both input are +-Infinity, +-Zero
+
+ . dtype map to llvm fmaxnum/fminnum
+ jtype use intrinsic
+
+ . xsmaxcdp xsmincdp
+ (set f64:$XT, (fmaxnum f64:$XA, f64:$XB))
+ (set f64:$XT, (fminnum f64:$XA, f64:$XB))
+
+ . xsmaxjdp xsminjdp
+ (set f64:$XT, (int_ppc_vsx_xsmaxjdp f64:$XA, f64:$XB))
+ (set f64:$XT, (int_ppc_vsx_xsminjdp f64:$XA, f64:$XB))
+
+- Vector Byte-Reverse H/W/D/Q Word: xxbrh xxbrw xxbrd xxbrq
+ . Use intrinsic
+ (set v8i16:$XT, (int_ppc_vsx_xxbrh v8i16:$XB))
+ (set v4i32:$XT, (int_ppc_vsx_xxbrw v4i32:$XB))
+ (set v2i64:$XT, (int_ppc_vsx_xxbrd v2i64:$XB))
+ (set v1i128:$XT, (int_ppc_vsx_xxbrq v1i128:$XB))
+
+- Vector Permute: xxperm xxpermr
+ . I have checked "PPCxxswapd" in PPCInstrVSX.td, but they are different
+ . Use intrinsic
+ (set v16i8:$XT, (int_ppc_vsx_xxperm v16i8:$XA, v16i8:$XB))
+ (set v16i8:$XT, (int_ppc_vsx_xxpermr v16i8:$XA, v16i8:$XB))
+
+- Vector Splat Immediate Byte: xxspltib
+ . Similar to XXSPLTW:
+ def XXSPLTW : XX2Form_2<60, 164,
+ (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
+ "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
+
+ . No SDAG, intrinsic, builtin are required?
+
+- Load/Store Vector: lxv stxv
+ . Has likely SDAG match:
+ (set v?:$XT, (load ix16addr:$src))
+ (set v?:$XT, (store ix16addr:$dst))
+
+ . Need define ix16addr in PPCInstrInfo.td
+ ix16addr: 16-byte aligned, see "def memrix16" in PPCInstrInfo.td
+
+- Load/Store Vector Indexed: lxvx stxvx
+ . Has likely SDAG match:
+ (set v?:$XT, (load xoaddr:$src))
+ (set v?:$XT, (store xoaddr:$dst))
+
+- Load/Store DWord: lxsd stxsd
+ . Similar to lxsdx/stxsdx:
+ def LXSDX : XX1Form<31, 588,
+ (outs vsfrc:$XT), (ins memrr:$src),
+ "lxsdx $XT, $src", IIC_LdStLFD,
+ [(set f64:$XT, (load xoaddr:$src))]>;
+
+ . (set f64:$XT, (load iaddrX4:$src))
+ (set f64:$XT, (store iaddrX4:$dst))
+
+- Load/Store SP, with conversion from/to DP: lxssp stxssp
+ . Similar to lxsspx/stxsspx:
+ def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
+ "lxsspx $XT, $src", IIC_LdStLFD,
+ [(set f32:$XT, (load xoaddr:$src))]>;
+
+ . (set f32:$XT, (load iaddrX4:$src))
+ (set f32:$XT, (store iaddrX4:$dst))
+
+- Load as Integer Byte/Halfword & Zero Indexed: lxsibzx lxsihzx
+ . Similar to lxsiwzx:
+ def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
+ "lxsiwzx $XT, $src", IIC_LdStLFD,
+ [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
+
+ . (set f64:$XT, (PPClfiwzx xoaddr:$src))
+
+- Store as Integer Byte/Halfword Indexed: stxsibx stxsihx
+ . Similar to stxsiwx:
+ def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
+ "stxsiwx $XT, $dst", IIC_LdStSTFD,
+ [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
+
+ . (PPCstfiwx f64:$XT, xoaddr:$dst)
+
+- Load Vector Halfword*8/Byte*16 Indexed: lxvh8x lxvb16x
+ . Similar to lxvd2x/lxvw4x:
+ def LXVD2X : XX1Form<31, 844,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvd2x $XT, $src", IIC_LdStLFD,
+ [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>;
+
+ . (set v8i16:$XT, (int_ppc_vsx_lxvh8x xoaddr:$src))
+ (set v16i8:$XT, (int_ppc_vsx_lxvb16x xoaddr:$src))
+
+- Store Vector Halfword*8/Byte*16 Indexed: stxvh8x stxvb16x
+ . Similar to stxvd2x/stxvw4x:
+ def STXVD2X : XX1Form<31, 972,
+ (outs), (ins vsrc:$XT, memrr:$dst),
+ "stxvd2x $XT, $dst", IIC_LdStSTFD,
+ [(store v2f64:$XT, xoaddr:$dst)]>;
+
+ . (store v8i16:$XT, xoaddr:$dst)
+ (store v16i8:$XT, xoaddr:$dst)
+
+- Load/Store Vector (Left-justified) with Length: lxvl lxvll stxvl stxvll
+ . Likely needs an intrinsic
+ . (set v?:$XT, (int_ppc_vsx_lxvl xoaddr:$src))
+ (set v?:$XT, (int_ppc_vsx_lxvll xoaddr:$src))
+
+ . (int_ppc_vsx_stxvl xoaddr:$dst))
+ (int_ppc_vsx_stxvll xoaddr:$dst))
+
+- Load Vector Word & Splat Indexed: lxvwsx
+ . Likely needs an intrinsic
+ . (set v?:$XT, (int_ppc_vsx_lxvwsx xoaddr:$src))
+
+Atomic operations (l[dw]at, st[dw]at):
+- Provide custom lowering for common atomic operations to use these
+ instructions with the correct Function Code
+- Ensure the operands are in the correct register (i.e. RT+1, RT+2)
+- Provide builtins since not all FC's necessarily have an existing LLVM
+ atomic operation
+
+Load Doubleword Monitored (ldmx):
+- Investigate whether there are any uses for this. It seems to be related to
+ Garbage Collection so it isn't likely to be all that useful for most
+ languages we deal with.
+
+Move to CR from XER Extended (mcrxrx):
+- Is there a use for this in LLVM?
+
+Fixed Point Facility:
+
+- Copy-Paste Facility: copy copy_first cp_abort paste paste. paste_last
+ . Use instrinstics:
+ (int_ppc_copy_first i32:$rA, i32:$rB)
+ (int_ppc_copy i32:$rA, i32:$rB)
+
+ (int_ppc_paste i32:$rA, i32:$rB)
+ (int_ppc_paste_last i32:$rA, i32:$rB)
+
+ (int_cp_abort)
+
+- Message Synchronize: msgsync
+- SLB*: slbieg slbsync
+- stop
+ . No instrinstics
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/.yandex_meta/licenses.list.txt
index a4433625d4..c62d353021 100644
--- a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/.yandex_meta/licenses.list.txt
+++ b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/.yandex_meta/licenses.list.txt
@@ -1,7 +1,7 @@
-====================Apache-2.0 WITH LLVM-exception====================
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-
-
-====================Apache-2.0 WITH LLVM-exception====================
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+====================Apache-2.0 WITH LLVM-exception====================
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+
+
+====================Apache-2.0 WITH LLVM-exception====================
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make
index 9903560dcc..68badb4490 100644
--- a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make
+++ b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make
@@ -2,15 +2,15 @@
LIBRARY()
-OWNER(
- orivej
- g:cpp-contrib
-)
-
-LICENSE(Apache-2.0 WITH LLVM-exception)
-
-LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-
+OWNER(
+ orivej
+ g:cpp-contrib
+)
+
+LICENSE(Apache-2.0 WITH LLVM-exception)
+
+LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
+
PEERDIR(
contrib/libs/llvm12
contrib/libs/llvm12/lib/Support
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/ya.make
index a6812524a8..8c7039a575 100644
--- a/contrib/libs/llvm12/lib/Target/PowerPC/ya.make
+++ b/contrib/libs/llvm12/lib/Target/PowerPC/ya.make
@@ -2,15 +2,15 @@
LIBRARY()
-OWNER(
- orivej
- g:cpp-contrib
-)
+OWNER(
+ orivej
+ g:cpp-contrib
+)
+
+LICENSE(Apache-2.0 WITH LLVM-exception)
+
+LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-LICENSE(Apache-2.0 WITH LLVM-exception)
-
-LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-
PEERDIR(
contrib/libs/llvm12
contrib/libs/llvm12/include