diff options
author | heretic <heretic@yandex-team.ru> | 2022-02-10 16:45:46 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:46 +0300 |
commit | 81eddc8c0b55990194e112b02d127b87d54164a9 (patch) | |
tree | 9142afc54d335ea52910662635b898e79e192e49 /contrib/libs/llvm12/lib/Target/PowerPC | |
parent | 397cbe258b9e064f49c4ca575279f02f39fef76e (diff) | |
download | ydb-81eddc8c0b55990194e112b02d127b87d54164a9.tar.gz |
Restoring authorship annotation for <heretic@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/PowerPC')
13 files changed, 1932 insertions, 1932 deletions
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/PowerPC/.yandex_meta/licenses.list.txt index 3a4cf0af9f..2f43d3f272 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/PowerPC/.yandex_meta/licenses.list.txt @@ -1,16 +1,16 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https)//llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier) Apache-2.0 WITH LLVM-exception - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https)//llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier) Apache-2.0 WITH LLVM-exception + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make index 2388d58641..24183440dc 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make +++ b/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make index c43266cf40..a412740df2 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make +++ b/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/.yandex_meta/licenses.list.txt index b0b34714ca..ad3879fc45 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/.yandex_meta/licenses.list.txt @@ -1,303 +1,303 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================File: LICENSE.TXT==================== -============================================================================== -The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: -============================================================================== - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ----- LLVM Exceptions to the Apache 2.0 License ---- - -As an exception, if, as a result of your compiling your source code, portions -of this Software are embedded into an Object form of such source code, you -may redistribute such embedded portions in such Object form without complying -with the conditions of Sections 4(a), 4(b) and 4(d) of the License. - -In addition, if you combine or link compiled forms of this Software with -software that is licensed under the GPLv2 ("Combined Software") and if a -court of competent jurisdiction determines that the patent provision (Section -3), the indemnity provision (Section 9) or other Section of the License -conflicts with the conditions of the GPLv2, you may retroactively and -prospectively choose to deem waived or otherwise exclude such Section(s) of -the License, but only in their entirety and only with respect to the Combined -Software. - -============================================================================== -Software from third parties included in the LLVM Project: -============================================================================== -The LLVM Project contains third party software which is under different license -terms. All such code will be identified clearly using at least one of two -mechanisms: -1) It will be in a separate directory tree with its own `LICENSE.txt` or - `LICENSE` file at the top containing the specific license and restrictions - which apply to that software, or -2) It will contain specific license and restriction terms at the top of every - file. - -============================================================================== -Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): -============================================================================== -University of Illinois/NCSA -Open Source License - -Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - - - -====================File: include/llvm/Support/LICENSE.TXT==================== -LLVM System Interface Library -------------------------------------------------------------------------------- -The LLVM System Interface Library is licensed under the Illinois Open Source -License and has the following additional copyright: - -Copyright (C) 2004 eXtensible Systems, Inc. - - -====================NCSA==================== -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================File: LICENSE.TXT==================== +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +============================================================================== +Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + + + +====================File: include/llvm/Support/LICENSE.TXT==================== +LLVM System Interface Library +------------------------------------------------------------------------------- +The LLVM System Interface Library is licensed under the Illinois Open Source +License and has the following additional copyright: + +Copyright (C) 2004 eXtensible Systems, Inc. + + +====================NCSA==================== +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make index 0e037d61de..903dc6ec7f 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make @@ -2,18 +2,18 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE( - Apache-2.0 WITH LLVM-exception AND - NCSA -) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE( + Apache-2.0 WITH LLVM-exception AND + NCSA +) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/README.txt b/contrib/libs/llvm12/lib/Target/PowerPC/README.txt index 0902298a4f..492eb22af2 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/README.txt +++ b/contrib/libs/llvm12/lib/Target/PowerPC/README.txt @@ -1,607 +1,607 @@ -//===- README.txt - Notes for improving PowerPC-specific code gen ---------===// - -TODO: -* lmw/stmw pass a la arm load store optimizer for prolog/epilog - -===-------------------------------------------------------------------------=== - -This code: - -unsigned add32carry(unsigned sum, unsigned x) { - unsigned z = sum + x; - if (sum + x < x) - z++; - return z; -} - -Should compile to something like: - - addc r3,r3,r4 - addze r3,r3 - -instead we get: - - add r3, r4, r3 - cmplw cr7, r3, r4 - mfcr r4 ; 1 - rlwinm r4, r4, 29, 31, 31 - add r3, r3, r4 - -Ick. - -===-------------------------------------------------------------------------=== - -We compile the hottest inner loop of viterbi to: - - li r6, 0 - b LBB1_84 ;bb432.i -LBB1_83: ;bb420.i - lbzx r8, r5, r7 - addi r6, r7, 1 - stbx r8, r4, r7 -LBB1_84: ;bb432.i - mr r7, r6 - cmplwi cr0, r7, 143 - bne cr0, LBB1_83 ;bb420.i - -The CBE manages to produce: - - li r0, 143 - mtctr r0 -loop: - lbzx r2, r2, r11 - stbx r0, r2, r9 - addi r2, r2, 1 - bdz later - b loop - -This could be much better (bdnz instead of bdz) but it still beats us. If we -produced this with bdnz, the loop would be a single dispatch group. - -===-------------------------------------------------------------------------=== - -Lump the constant pool for each function into ONE pic object, and reference -pieces of it as offsets from the start. For functions like this (contrived -to have lots of constants obviously): - -double X(double Y) { return (Y*1.23 + 4.512)*2.34 + 14.38; } - -We generate: - -_X: - lis r2, ha16(.CPI_X_0) - lfd f0, lo16(.CPI_X_0)(r2) - lis r2, ha16(.CPI_X_1) - lfd f2, lo16(.CPI_X_1)(r2) - fmadd f0, f1, f0, f2 - lis r2, ha16(.CPI_X_2) - lfd f1, lo16(.CPI_X_2)(r2) - lis r2, ha16(.CPI_X_3) - lfd f2, lo16(.CPI_X_3)(r2) - fmadd f1, f0, f1, f2 +//===- README.txt - Notes for improving PowerPC-specific code gen ---------===// + +TODO: +* lmw/stmw pass a la arm load store optimizer for prolog/epilog + +===-------------------------------------------------------------------------=== + +This code: + +unsigned add32carry(unsigned sum, unsigned x) { + unsigned z = sum + x; + if (sum + x < x) + z++; + return z; +} + +Should compile to something like: + + addc r3,r3,r4 + addze r3,r3 + +instead we get: + + add r3, r4, r3 + cmplw cr7, r3, r4 + mfcr r4 ; 1 + rlwinm r4, r4, 29, 31, 31 + add r3, r3, r4 + +Ick. + +===-------------------------------------------------------------------------=== + +We compile the hottest inner loop of viterbi to: + + li r6, 0 + b LBB1_84 ;bb432.i +LBB1_83: ;bb420.i + lbzx r8, r5, r7 + addi r6, r7, 1 + stbx r8, r4, r7 +LBB1_84: ;bb432.i + mr r7, r6 + cmplwi cr0, r7, 143 + bne cr0, LBB1_83 ;bb420.i + +The CBE manages to produce: + + li r0, 143 + mtctr r0 +loop: + lbzx r2, r2, r11 + stbx r0, r2, r9 + addi r2, r2, 1 + bdz later + b loop + +This could be much better (bdnz instead of bdz) but it still beats us. If we +produced this with bdnz, the loop would be a single dispatch group. + +===-------------------------------------------------------------------------=== + +Lump the constant pool for each function into ONE pic object, and reference +pieces of it as offsets from the start. For functions like this (contrived +to have lots of constants obviously): + +double X(double Y) { return (Y*1.23 + 4.512)*2.34 + 14.38; } + +We generate: + +_X: + lis r2, ha16(.CPI_X_0) + lfd f0, lo16(.CPI_X_0)(r2) + lis r2, ha16(.CPI_X_1) + lfd f2, lo16(.CPI_X_1)(r2) + fmadd f0, f1, f0, f2 + lis r2, ha16(.CPI_X_2) + lfd f1, lo16(.CPI_X_2)(r2) + lis r2, ha16(.CPI_X_3) + lfd f2, lo16(.CPI_X_3)(r2) + fmadd f1, f0, f1, f2 + blr + +It would be better to materialize .CPI_X into a register, then use immediates +off of the register to avoid the lis's. This is even more important in PIC +mode. + +Note that this (and the static variable version) is discussed here for GCC: +http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html + +Here's another example (the sgn function): +double testf(double a) { + return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0); +} + +it produces a BB like this: +LBB1_1: ; cond_true + lis r2, ha16(LCPI1_0) + lfs f0, lo16(LCPI1_0)(r2) + lis r2, ha16(LCPI1_1) + lis r3, ha16(LCPI1_2) + lfs f2, lo16(LCPI1_2)(r3) + lfs f3, lo16(LCPI1_1)(r2) + fsub f0, f0, f1 + fsel f1, f0, f2, f3 blr - -It would be better to materialize .CPI_X into a register, then use immediates -off of the register to avoid the lis's. This is even more important in PIC -mode. - -Note that this (and the static variable version) is discussed here for GCC: -http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html - -Here's another example (the sgn function): -double testf(double a) { - return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0); -} - -it produces a BB like this: -LBB1_1: ; cond_true - lis r2, ha16(LCPI1_0) - lfs f0, lo16(LCPI1_0)(r2) - lis r2, ha16(LCPI1_1) - lis r3, ha16(LCPI1_2) - lfs f2, lo16(LCPI1_2)(r3) - lfs f3, lo16(LCPI1_1)(r2) - fsub f0, f0, f1 - fsel f1, f0, f2, f3 - blr - -===-------------------------------------------------------------------------=== - -PIC Code Gen IPO optimization: - -Squish small scalar globals together into a single global struct, allowing the -address of the struct to be CSE'd, avoiding PIC accesses (also reduces the size -of the GOT on targets with one). - -Note that this is discussed here for GCC: -http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html - -===-------------------------------------------------------------------------=== - -Fold add and sub with constant into non-extern, non-weak addresses so this: - -static int a; -void bar(int b) { a = b; } -void foo(unsigned char *c) { - *c = a; -} - -So that - -_foo: - lis r2, ha16(_a) - la r2, lo16(_a)(r2) - lbz r2, 3(r2) - stb r2, 0(r3) - blr - -Becomes - -_foo: - lis r2, ha16(_a+3) - lbz r2, lo16(_a+3)(r2) - stb r2, 0(r3) - blr - -===-------------------------------------------------------------------------=== - -We should compile these two functions to the same thing: - -#include <stdlib.h> -void f(int a, int b, int *P) { - *P = (a-b)>=0?(a-b):(b-a); -} -void g(int a, int b, int *P) { - *P = abs(a-b); -} - -Further, they should compile to something better than: - -_g: - subf r2, r4, r3 - subfic r3, r2, 0 - cmpwi cr0, r2, -1 - bgt cr0, LBB2_2 ; entry -LBB2_1: ; entry - mr r2, r3 -LBB2_2: ; entry - stw r2, 0(r5) - blr - -GCC produces: - -_g: - subf r4,r4,r3 - srawi r2,r4,31 - xor r0,r2,r4 - subf r0,r2,r0 - stw r0,0(r5) - blr - -... which is much nicer. - -This theoretically may help improve twolf slightly (used in dimbox.c:142?). - -===-------------------------------------------------------------------------=== - -PR5945: This: -define i32 @clamp0g(i32 %a) { -entry: - %cmp = icmp slt i32 %a, 0 - %sel = select i1 %cmp, i32 0, i32 %a - ret i32 %sel -} - -Is compile to this with the PowerPC (32-bit) backend: - -_clamp0g: - cmpwi cr0, r3, 0 - li r2, 0 - blt cr0, LBB1_2 -; %bb.1: ; %entry - mr r2, r3 -LBB1_2: ; %entry - mr r3, r2 - blr - -This could be reduced to the much simpler: - -_clamp0g: - srawi r2, r3, 31 - andc r3, r3, r2 - blr - -===-------------------------------------------------------------------------=== - -int foo(int N, int ***W, int **TK, int X) { - int t, i; - - for (t = 0; t < N; ++t) - for (i = 0; i < 4; ++i) - W[t / X][i][t % X] = TK[i][t]; - - return 5; -} - -We generate relatively atrocious code for this loop compared to gcc. - -We could also strength reduce the rem and the div: -http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf - -===-------------------------------------------------------------------------=== - -We generate ugly code for this: - -void func(unsigned int *ret, float dx, float dy, float dz, float dw) { - unsigned code = 0; - if(dx < -dw) code |= 1; - if(dx > dw) code |= 2; - if(dy < -dw) code |= 4; - if(dy > dw) code |= 8; - if(dz < -dw) code |= 16; - if(dz > dw) code |= 32; - *ret = code; -} - -===-------------------------------------------------------------------------=== - -%struct.B = type { i8, [3 x i8] } - -define void @bar(%struct.B* %b) { -entry: - %tmp = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1] - %tmp = load i32* %tmp ; <uint> [#uses=1] - %tmp3 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1] - %tmp4 = load i32* %tmp3 ; <uint> [#uses=1] - %tmp8 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=2] - %tmp9 = load i32* %tmp8 ; <uint> [#uses=1] - %tmp4.mask17 = shl i32 %tmp4, i8 1 ; <uint> [#uses=1] - %tmp1415 = and i32 %tmp4.mask17, 2147483648 ; <uint> [#uses=1] - %tmp.masked = and i32 %tmp, 2147483648 ; <uint> [#uses=1] - %tmp11 = or i32 %tmp1415, %tmp.masked ; <uint> [#uses=1] - %tmp12 = and i32 %tmp9, 2147483647 ; <uint> [#uses=1] - %tmp13 = or i32 %tmp12, %tmp11 ; <uint> [#uses=1] - store i32 %tmp13, i32* %tmp8 - ret void -} - -We emit: - -_foo: - lwz r2, 0(r3) - slwi r4, r2, 1 - or r4, r4, r2 - rlwimi r2, r4, 0, 0, 0 - stw r2, 0(r3) - blr - -We could collapse a bunch of those ORs and ANDs and generate the following -equivalent code: - -_foo: - lwz r2, 0(r3) - rlwinm r4, r2, 1, 0, 0 - or r2, r2, r4 - stw r2, 0(r3) - blr - -===-------------------------------------------------------------------------=== - -Consider a function like this: - -float foo(float X) { return X + 1234.4123f; } - -The FP constant ends up in the constant pool, so we need to get the LR register. - This ends up producing code like this: - -_foo: -.LBB_foo_0: ; entry - mflr r11 -*** stw r11, 8(r1) - bl "L00000$pb" -"L00000$pb": - mflr r2 - addis r2, r2, ha16(.CPI_foo_0-"L00000$pb") - lfs f0, lo16(.CPI_foo_0-"L00000$pb")(r2) - fadds f1, f1, f0 -*** lwz r11, 8(r1) - mtlr r11 - blr - -This is functional, but there is no reason to spill the LR register all the way -to the stack (the two marked instrs): spilling it to a GPR is quite enough. - -Implementing this will require some codegen improvements. Nate writes: - -"So basically what we need to support the "no stack frame save and restore" is a -generalization of the LR optimization to "callee-save regs". - -Currently, we have LR marked as a callee-save reg. The register allocator sees -that it's callee save, and spills it directly to the stack. - -Ideally, something like this would happen: - -LR would be in a separate register class from the GPRs. The class of LR would be -marked "unspillable". When the register allocator came across an unspillable -reg, it would ask "what is the best class to copy this into that I *can* spill" -If it gets a class back, which it will in this case (the gprs), it grabs a free -register of that class. If it is then later necessary to spill that reg, so be -it. - -===-------------------------------------------------------------------------=== - -We compile this: -int test(_Bool X) { - return X ? 524288 : 0; -} - -to: -_test: - cmplwi cr0, r3, 0 - lis r2, 8 - li r3, 0 - beq cr0, LBB1_2 ;entry -LBB1_1: ;entry - mr r3, r2 -LBB1_2: ;entry - blr - -instead of: -_test: - addic r2,r3,-1 - subfe r0,r2,r3 - slwi r3,r0,19 - blr - -This sort of thing occurs a lot due to globalopt. - -===-------------------------------------------------------------------------=== - -We compile: - -define i32 @bar(i32 %x) nounwind readnone ssp { -entry: - %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1] - %neg = sext i1 %0 to i32 ; <i32> [#uses=1] - ret i32 %neg -} - + +===-------------------------------------------------------------------------=== + +PIC Code Gen IPO optimization: + +Squish small scalar globals together into a single global struct, allowing the +address of the struct to be CSE'd, avoiding PIC accesses (also reduces the size +of the GOT on targets with one). + +Note that this is discussed here for GCC: +http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html + +===-------------------------------------------------------------------------=== + +Fold add and sub with constant into non-extern, non-weak addresses so this: + +static int a; +void bar(int b) { a = b; } +void foo(unsigned char *c) { + *c = a; +} + +So that + +_foo: + lis r2, ha16(_a) + la r2, lo16(_a)(r2) + lbz r2, 3(r2) + stb r2, 0(r3) + blr + +Becomes + +_foo: + lis r2, ha16(_a+3) + lbz r2, lo16(_a+3)(r2) + stb r2, 0(r3) + blr + +===-------------------------------------------------------------------------=== + +We should compile these two functions to the same thing: + +#include <stdlib.h> +void f(int a, int b, int *P) { + *P = (a-b)>=0?(a-b):(b-a); +} +void g(int a, int b, int *P) { + *P = abs(a-b); +} + +Further, they should compile to something better than: + +_g: + subf r2, r4, r3 + subfic r3, r2, 0 + cmpwi cr0, r2, -1 + bgt cr0, LBB2_2 ; entry +LBB2_1: ; entry + mr r2, r3 +LBB2_2: ; entry + stw r2, 0(r5) + blr + +GCC produces: + +_g: + subf r4,r4,r3 + srawi r2,r4,31 + xor r0,r2,r4 + subf r0,r2,r0 + stw r0,0(r5) + blr + +... which is much nicer. + +This theoretically may help improve twolf slightly (used in dimbox.c:142?). + +===-------------------------------------------------------------------------=== + +PR5945: This: +define i32 @clamp0g(i32 %a) { +entry: + %cmp = icmp slt i32 %a, 0 + %sel = select i1 %cmp, i32 0, i32 %a + ret i32 %sel +} + +Is compile to this with the PowerPC (32-bit) backend: + +_clamp0g: + cmpwi cr0, r3, 0 + li r2, 0 + blt cr0, LBB1_2 +; %bb.1: ; %entry + mr r2, r3 +LBB1_2: ; %entry + mr r3, r2 + blr + +This could be reduced to the much simpler: + +_clamp0g: + srawi r2, r3, 31 + andc r3, r3, r2 + blr + +===-------------------------------------------------------------------------=== + +int foo(int N, int ***W, int **TK, int X) { + int t, i; + + for (t = 0; t < N; ++t) + for (i = 0; i < 4; ++i) + W[t / X][i][t % X] = TK[i][t]; + + return 5; +} + +We generate relatively atrocious code for this loop compared to gcc. + +We could also strength reduce the rem and the div: +http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf + +===-------------------------------------------------------------------------=== + +We generate ugly code for this: + +void func(unsigned int *ret, float dx, float dy, float dz, float dw) { + unsigned code = 0; + if(dx < -dw) code |= 1; + if(dx > dw) code |= 2; + if(dy < -dw) code |= 4; + if(dy > dw) code |= 8; + if(dz < -dw) code |= 16; + if(dz > dw) code |= 32; + *ret = code; +} + +===-------------------------------------------------------------------------=== + +%struct.B = type { i8, [3 x i8] } + +define void @bar(%struct.B* %b) { +entry: + %tmp = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1] + %tmp = load i32* %tmp ; <uint> [#uses=1] + %tmp3 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1] + %tmp4 = load i32* %tmp3 ; <uint> [#uses=1] + %tmp8 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=2] + %tmp9 = load i32* %tmp8 ; <uint> [#uses=1] + %tmp4.mask17 = shl i32 %tmp4, i8 1 ; <uint> [#uses=1] + %tmp1415 = and i32 %tmp4.mask17, 2147483648 ; <uint> [#uses=1] + %tmp.masked = and i32 %tmp, 2147483648 ; <uint> [#uses=1] + %tmp11 = or i32 %tmp1415, %tmp.masked ; <uint> [#uses=1] + %tmp12 = and i32 %tmp9, 2147483647 ; <uint> [#uses=1] + %tmp13 = or i32 %tmp12, %tmp11 ; <uint> [#uses=1] + store i32 %tmp13, i32* %tmp8 + ret void +} + +We emit: + +_foo: + lwz r2, 0(r3) + slwi r4, r2, 1 + or r4, r4, r2 + rlwimi r2, r4, 0, 0, 0 + stw r2, 0(r3) + blr + +We could collapse a bunch of those ORs and ANDs and generate the following +equivalent code: + +_foo: + lwz r2, 0(r3) + rlwinm r4, r2, 1, 0, 0 + or r2, r2, r4 + stw r2, 0(r3) + blr + +===-------------------------------------------------------------------------=== + +Consider a function like this: + +float foo(float X) { return X + 1234.4123f; } + +The FP constant ends up in the constant pool, so we need to get the LR register. + This ends up producing code like this: + +_foo: +.LBB_foo_0: ; entry + mflr r11 +*** stw r11, 8(r1) + bl "L00000$pb" +"L00000$pb": + mflr r2 + addis r2, r2, ha16(.CPI_foo_0-"L00000$pb") + lfs f0, lo16(.CPI_foo_0-"L00000$pb")(r2) + fadds f1, f1, f0 +*** lwz r11, 8(r1) + mtlr r11 + blr + +This is functional, but there is no reason to spill the LR register all the way +to the stack (the two marked instrs): spilling it to a GPR is quite enough. + +Implementing this will require some codegen improvements. Nate writes: + +"So basically what we need to support the "no stack frame save and restore" is a +generalization of the LR optimization to "callee-save regs". + +Currently, we have LR marked as a callee-save reg. The register allocator sees +that it's callee save, and spills it directly to the stack. + +Ideally, something like this would happen: + +LR would be in a separate register class from the GPRs. The class of LR would be +marked "unspillable". When the register allocator came across an unspillable +reg, it would ask "what is the best class to copy this into that I *can* spill" +If it gets a class back, which it will in this case (the gprs), it grabs a free +register of that class. If it is then later necessary to spill that reg, so be +it. + +===-------------------------------------------------------------------------=== + +We compile this: +int test(_Bool X) { + return X ? 524288 : 0; +} + to: - -_bar: - cntlzw r2, r3 - slwi r2, r2, 26 - srawi r3, r2, 31 - blr - -it would be better to produce: - -_bar: - addic r3,r3,-1 - subfe r3,r3,r3 +_test: + cmplwi cr0, r3, 0 + lis r2, 8 + li r3, 0 + beq cr0, LBB1_2 ;entry +LBB1_1: ;entry + mr r3, r2 +LBB1_2: ;entry blr - -===-------------------------------------------------------------------------=== - -We generate horrible ppc code for this: - -#define N 2000000 -double a[N],c[N]; -void simpleloop() { - int j; - for (j=0; j<N; j++) - c[j] = a[j]; -} - -LBB1_1: ;bb - lfdx f0, r3, r4 - addi r5, r5, 1 ;; Extra IV for the exit value compare. - stfdx f0, r2, r4 - addi r4, r4, 8 - - xoris r6, r5, 30 ;; This is due to a large immediate. - cmplwi cr0, r6, 33920 - bne cr0, LBB1_1 - -//===---------------------------------------------------------------------===// - -This: - #include <algorithm> - inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b) - { return std::make_pair(a + b, a + b < a); } - bool no_overflow(unsigned a, unsigned b) - { return !full_add(a, b).second; } - -Should compile to: - -__Z11no_overflowjj: - add r4,r3,r4 - subfc r3,r3,r4 - li r3,0 - adde r3,r3,r3 + +instead of: +_test: + addic r2,r3,-1 + subfe r0,r2,r3 + slwi r3,r0,19 + blr + +This sort of thing occurs a lot due to globalopt. + +===-------------------------------------------------------------------------=== + +We compile: + +define i32 @bar(i32 %x) nounwind readnone ssp { +entry: + %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1] + %neg = sext i1 %0 to i32 ; <i32> [#uses=1] + ret i32 %neg +} + +to: + +_bar: + cntlzw r2, r3 + slwi r2, r2, 26 + srawi r3, r2, 31 + blr + +it would be better to produce: + +_bar: + addic r3,r3,-1 + subfe r3,r3,r3 + blr + +===-------------------------------------------------------------------------=== + +We generate horrible ppc code for this: + +#define N 2000000 +double a[N],c[N]; +void simpleloop() { + int j; + for (j=0; j<N; j++) + c[j] = a[j]; +} + +LBB1_1: ;bb + lfdx f0, r3, r4 + addi r5, r5, 1 ;; Extra IV for the exit value compare. + stfdx f0, r2, r4 + addi r4, r4, 8 + + xoris r6, r5, 30 ;; This is due to a large immediate. + cmplwi cr0, r6, 33920 + bne cr0, LBB1_1 + +//===---------------------------------------------------------------------===// + +This: + #include <algorithm> + inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b) + { return std::make_pair(a + b, a + b < a); } + bool no_overflow(unsigned a, unsigned b) + { return !full_add(a, b).second; } + +Should compile to: + +__Z11no_overflowjj: + add r4,r3,r4 + subfc r3,r3,r4 + li r3,0 + adde r3,r3,r3 + blr + +(or better) not: + +__Z11no_overflowjj: + add r2, r4, r3 + cmplw cr7, r2, r3 + mfcr r2 + rlwinm r2, r2, 29, 31, 31 + xori r3, r2, 1 blr - -(or better) not: - -__Z11no_overflowjj: - add r2, r4, r3 - cmplw cr7, r2, r3 - mfcr r2 - rlwinm r2, r2, 29, 31, 31 - xori r3, r2, 1 - blr - -//===---------------------------------------------------------------------===// - -We compile some FP comparisons into an mfcr with two rlwinms and an or. For -example: -#include <math.h> -int test(double x, double y) { return islessequal(x, y);} -int test2(double x, double y) { return islessgreater(x, y);} -int test3(double x, double y) { return !islessequal(x, y);} - -Compiles into (all three are similar, but the bits differ): - -_test: - fcmpu cr7, f1, f2 - mfcr r2 - rlwinm r3, r2, 29, 31, 31 - rlwinm r2, r2, 31, 31, 31 - or r3, r2, r3 - blr - -GCC compiles this into: - - _test: - fcmpu cr7,f1,f2 - cror 30,28,30 - mfcr r3 - rlwinm r3,r3,31,1 + +//===---------------------------------------------------------------------===// + +We compile some FP comparisons into an mfcr with two rlwinms and an or. For +example: +#include <math.h> +int test(double x, double y) { return islessequal(x, y);} +int test2(double x, double y) { return islessgreater(x, y);} +int test3(double x, double y) { return !islessequal(x, y);} + +Compiles into (all three are similar, but the bits differ): + +_test: + fcmpu cr7, f1, f2 + mfcr r2 + rlwinm r3, r2, 29, 31, 31 + rlwinm r2, r2, 31, 31, 31 + or r3, r2, r3 + blr + +GCC compiles this into: + + _test: + fcmpu cr7,f1,f2 + cror 30,28,30 + mfcr r3 + rlwinm r3,r3,31,1 + blr + +which is more efficient and can use mfocr. See PR642 for some more context. + +//===---------------------------------------------------------------------===// + +void foo(float *data, float d) { + long i; + for (i = 0; i < 8000; i++) + data[i] = d; +} +void foo2(float *data, float d) { + long i; + data--; + for (i = 0; i < 8000; i++) { + data[1] = d; + data++; + } +} + +These compile to: + +_foo: + li r2, 0 +LBB1_1: ; bb + addi r4, r2, 4 + stfsx f1, r3, r2 + cmplwi cr0, r4, 32000 + mr r2, r4 + bne cr0, LBB1_1 ; bb + blr +_foo2: + li r2, 0 +LBB2_1: ; bb + addi r4, r2, 4 + stfsx f1, r3, r2 + cmplwi cr0, r4, 32000 + mr r2, r4 + bne cr0, LBB2_1 ; bb blr - -which is more efficient and can use mfocr. See PR642 for some more context. - -//===---------------------------------------------------------------------===// - -void foo(float *data, float d) { - long i; - for (i = 0; i < 8000; i++) - data[i] = d; -} -void foo2(float *data, float d) { - long i; - data--; - for (i = 0; i < 8000; i++) { - data[1] = d; - data++; - } -} - -These compile to: - -_foo: - li r2, 0 -LBB1_1: ; bb - addi r4, r2, 4 - stfsx f1, r3, r2 - cmplwi cr0, r4, 32000 - mr r2, r4 - bne cr0, LBB1_1 ; bb - blr -_foo2: - li r2, 0 -LBB2_1: ; bb - addi r4, r2, 4 - stfsx f1, r3, r2 - cmplwi cr0, r4, 32000 - mr r2, r4 - bne cr0, LBB2_1 ; bb - blr - -The 'mr' could be eliminated to folding the add into the cmp better. - -//===---------------------------------------------------------------------===// -Codegen for the following (low-probability) case deteriorated considerably -when the correctness fixes for unordered comparisons went in (PR 642, 58871). -It should be possible to recover the code quality described in the comments. - -; RUN: llvm-as < %s | llc -march=ppc32 | grep or | count 3 -; This should produce one 'or' or 'cror' instruction per function. - -; RUN: llvm-as < %s | llc -march=ppc32 | grep mfcr | count 3 -; PR2964 - -define i32 @test(double %x, double %y) nounwind { -entry: - %tmp3 = fcmp ole double %x, %y ; <i1> [#uses=1] - %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] - ret i32 %tmp345 -} - -define i32 @test2(double %x, double %y) nounwind { -entry: - %tmp3 = fcmp one double %x, %y ; <i1> [#uses=1] - %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] - ret i32 %tmp345 -} - -define i32 @test3(double %x, double %y) nounwind { -entry: - %tmp3 = fcmp ugt double %x, %y ; <i1> [#uses=1] - %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] - ret i32 %tmp34 -} - -//===---------------------------------------------------------------------===// -for the following code: - -void foo (float *__restrict__ a, int *__restrict__ b, int n) { - a[n] = b[n] * 2.321; -} - -we load b[n] to GPR, then move it VSX register and convert it float. We should -use vsx scalar integer load instructions to avoid direct moves - -//===----------------------------------------------------------------------===// -; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg - -; This could generate FSEL with appropriate flags (FSEL is not IEEE-safe, and -; should not be generated except with -enable-finite-only-fp-math or the like). -; With the correctness fixes for PR642 (58871) LowerSELECT_CC would need to -; recognize a more elaborate tree than a simple SETxx. - -define double @test_FNEG_sel(double %A, double %B, double %C) { - %D = fsub double -0.000000e+00, %A ; <double> [#uses=1] - %Cond = fcmp ugt double %D, -0.000000e+00 ; <i1> [#uses=1] - %E = select i1 %Cond, double %B, double %C ; <double> [#uses=1] - ret double %E -} - -//===----------------------------------------------------------------------===// -The save/restore sequence for CR in prolog/epilog is terrible: -- Each CR subreg is saved individually, rather than doing one save as a unit. -- On Darwin, the save is done after the decrement of SP, which means the offset -from SP of the save slot can be too big for a store instruction, which means we -need an additional register (currently hacked in 96015+96020; the solution there -is correct, but poor). -- On SVR4 the same thing can happen, and I don't think saving before the SP -decrement is safe on that target, as there is no red zone. This is currently -broken AFAIK, although it's not a target I can exercise. -The following demonstrates the problem: -extern void bar(char *p); -void foo() { - char x[100000]; - bar(x); - __asm__("" ::: "cr2"); -} - -//===-------------------------------------------------------------------------=== -Naming convention for instruction formats is very haphazard. -We have agreed on a naming scheme as follows: - -<INST_form>{_<OP_type><OP_len>}+ - -Where: -INST_form is the instruction format (X-form, etc.) -OP_type is the operand type - one of OPC (opcode), RD (register destination), - RS (register source), - RDp (destination register pair), - RSp (source register pair), IM (immediate), - XO (extended opcode) -OP_len is the length of the operand in bits - -VSX register operands would be of length 6 (split across two fields), -condition register fields of length 3. -We would not need denote reserved fields in names of instruction formats. - -//===----------------------------------------------------------------------===// - -Instruction fusion was introduced in ISA 2.06 and more opportunities added in -ISA 2.07. LLVM needs to add infrastructure to recognize fusion opportunities -and force instruction pairs to be scheduled together. - ------------------------------------------------------------------------------ - -More general handling of any_extend and zero_extend: - -See https://reviews.llvm.org/D24924#555306 + +The 'mr' could be eliminated to folding the add into the cmp better. + +//===---------------------------------------------------------------------===// +Codegen for the following (low-probability) case deteriorated considerably +when the correctness fixes for unordered comparisons went in (PR 642, 58871). +It should be possible to recover the code quality described in the comments. + +; RUN: llvm-as < %s | llc -march=ppc32 | grep or | count 3 +; This should produce one 'or' or 'cror' instruction per function. + +; RUN: llvm-as < %s | llc -march=ppc32 | grep mfcr | count 3 +; PR2964 + +define i32 @test(double %x, double %y) nounwind { +entry: + %tmp3 = fcmp ole double %x, %y ; <i1> [#uses=1] + %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] + ret i32 %tmp345 +} + +define i32 @test2(double %x, double %y) nounwind { +entry: + %tmp3 = fcmp one double %x, %y ; <i1> [#uses=1] + %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] + ret i32 %tmp345 +} + +define i32 @test3(double %x, double %y) nounwind { +entry: + %tmp3 = fcmp ugt double %x, %y ; <i1> [#uses=1] + %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] + ret i32 %tmp34 +} + +//===---------------------------------------------------------------------===// +for the following code: + +void foo (float *__restrict__ a, int *__restrict__ b, int n) { + a[n] = b[n] * 2.321; +} + +we load b[n] to GPR, then move it VSX register and convert it float. We should +use vsx scalar integer load instructions to avoid direct moves + +//===----------------------------------------------------------------------===// +; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg + +; This could generate FSEL with appropriate flags (FSEL is not IEEE-safe, and +; should not be generated except with -enable-finite-only-fp-math or the like). +; With the correctness fixes for PR642 (58871) LowerSELECT_CC would need to +; recognize a more elaborate tree than a simple SETxx. + +define double @test_FNEG_sel(double %A, double %B, double %C) { + %D = fsub double -0.000000e+00, %A ; <double> [#uses=1] + %Cond = fcmp ugt double %D, -0.000000e+00 ; <i1> [#uses=1] + %E = select i1 %Cond, double %B, double %C ; <double> [#uses=1] + ret double %E +} + +//===----------------------------------------------------------------------===// +The save/restore sequence for CR in prolog/epilog is terrible: +- Each CR subreg is saved individually, rather than doing one save as a unit. +- On Darwin, the save is done after the decrement of SP, which means the offset +from SP of the save slot can be too big for a store instruction, which means we +need an additional register (currently hacked in 96015+96020; the solution there +is correct, but poor). +- On SVR4 the same thing can happen, and I don't think saving before the SP +decrement is safe on that target, as there is no red zone. This is currently +broken AFAIK, although it's not a target I can exercise. +The following demonstrates the problem: +extern void bar(char *p); +void foo() { + char x[100000]; + bar(x); + __asm__("" ::: "cr2"); +} + +//===-------------------------------------------------------------------------=== +Naming convention for instruction formats is very haphazard. +We have agreed on a naming scheme as follows: + +<INST_form>{_<OP_type><OP_len>}+ + +Where: +INST_form is the instruction format (X-form, etc.) +OP_type is the operand type - one of OPC (opcode), RD (register destination), + RS (register source), + RDp (destination register pair), + RSp (source register pair), IM (immediate), + XO (extended opcode) +OP_len is the length of the operand in bits + +VSX register operands would be of length 6 (split across two fields), +condition register fields of length 3. +We would not need denote reserved fields in names of instruction formats. + +//===----------------------------------------------------------------------===// + +Instruction fusion was introduced in ISA 2.06 and more opportunities added in +ISA 2.07. LLVM needs to add infrastructure to recognize fusion opportunities +and force instruction pairs to be scheduled together. + +----------------------------------------------------------------------------- + +More general handling of any_extend and zero_extend: + +See https://reviews.llvm.org/D24924#555306 diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/README_ALTIVEC.txt b/contrib/libs/llvm12/lib/Target/PowerPC/README_ALTIVEC.txt index 47d18ecfca..6d32e76ed8 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/README_ALTIVEC.txt +++ b/contrib/libs/llvm12/lib/Target/PowerPC/README_ALTIVEC.txt @@ -1,338 +1,338 @@ -//===- README_ALTIVEC.txt - Notes for improving Altivec code gen ----------===// - -Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector -registers, to generate better spill code. - -//===----------------------------------------------------------------------===// - -The first should be a single lvx from the constant pool, the second should be -a xor/stvx: - -void foo(void) { - int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 17, 1, 1, 1, 1 }; - bar (x); -} - -#include <string.h> -void foo(void) { - int x[8] __attribute__((aligned(128))); - memset (x, 0, sizeof (x)); - bar (x); -} - -//===----------------------------------------------------------------------===// - -Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763 - -When -ffast-math is on, we can use 0.0. - -//===----------------------------------------------------------------------===// - - Consider this: - v4f32 Vector; - v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X }; - -Since we know that "Vector" is 16-byte aligned and we know the element offset -of ".X", we should change the load into a lve*x instruction, instead of doing -a load/store/lve*x sequence. - -//===----------------------------------------------------------------------===// - -Implement passing vectors by value into calls and receiving them as arguments. - -//===----------------------------------------------------------------------===// - -GCC apparently tries to codegen { C1, C2, Variable, C3 } as a constant pool load -of C1/C2/C3, then a load and vperm of Variable. - -//===----------------------------------------------------------------------===// - -We need a way to teach tblgen that some operands of an intrinsic are required to -be constants. The verifier should enforce this constraint. - -//===----------------------------------------------------------------------===// - -We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a 16-byte -aligned stack slot, followed by a load/vperm. We should probably just store it -to a scalar stack slot, then use lvsl/vperm to load it. If the value is already -in memory this is a big win. - -//===----------------------------------------------------------------------===// - -extract_vector_elt of an arbitrary constant vector can be done with the -following instructions: - -vTemp = vec_splat(v0,2); // 2 is the element the src is in. -vec_ste(&destloc,0,vTemp); - -We can do an arbitrary non-constant value by using lvsr/perm/ste. - -//===----------------------------------------------------------------------===// - -If we want to tie instruction selection into the scheduler, we can do some -constant formation with different instructions. For example, we can generate -"vsplti -1" with "vcmpequw R,R" and 1,1,1,1 with "vsubcuw R,R", and 0,0,0,0 with -"vsplti 0" or "vxor", each of which use different execution units, thus could -help scheduling. - -This is probably only reasonable for a post-pass scheduler. - -//===----------------------------------------------------------------------===// - -For this function: - -void test(vector float *A, vector float *B) { - vector float C = (vector float)vec_cmpeq(*A, *B); - if (!vec_any_eq(*A, *B)) - *B = (vector float){0,0,0,0}; - *A = C; -} - -we get the following basic block: - - ... - lvx v2, 0, r4 - lvx v3, 0, r3 - vcmpeqfp v4, v3, v2 - vcmpeqfp. v2, v3, v2 - bne cr6, LBB1_2 ; cond_next - -The vcmpeqfp/vcmpeqfp. instructions currently cannot be merged when the -vcmpeqfp. result is used by a branch. This can be improved. - -//===----------------------------------------------------------------------===// - -The code generated for this is truly aweful: - -vector float test(float a, float b) { - return (vector float){ 0.0, a, 0.0, 0.0}; -} - -LCPI1_0: ; float - .space 4 - .text - .globl _test - .align 4 -_test: - mfspr r2, 256 - oris r3, r2, 4096 - mtspr 256, r3 - lis r3, ha16(LCPI1_0) - addi r4, r1, -32 - stfs f1, -16(r1) - addi r5, r1, -16 - lfs f0, lo16(LCPI1_0)(r3) - stfs f0, -32(r1) - lvx v2, 0, r4 - lvx v3, 0, r5 - vmrghw v3, v3, v2 - vspltw v2, v2, 0 - vmrghw v2, v2, v3 - mtspr 256, r2 - blr - -//===----------------------------------------------------------------------===// - -int foo(vector float *x, vector float *y) { - if (vec_all_eq(*x,*y)) return 3245; - else return 12; -} - -A predicate compare being used in a select_cc should have the same peephole -applied to it as a predicate compare used by a br_cc. There should be no -mfcr here: - -_foo: - mfspr r2, 256 - oris r5, r2, 12288 - mtspr 256, r5 - li r5, 12 - li r6, 3245 - lvx v2, 0, r4 - lvx v3, 0, r3 - vcmpeqfp. v2, v3, v2 - mfcr r3, 2 - rlwinm r3, r3, 25, 31, 31 - cmpwi cr0, r3, 0 - bne cr0, LBB1_2 ; entry -LBB1_1: ; entry - mr r6, r5 -LBB1_2: ; entry - mr r3, r6 - mtspr 256, r2 - blr - -//===----------------------------------------------------------------------===// - -CodeGen/PowerPC/vec_constants.ll has an and operation that should be -codegen'd to andc. The issue is that the 'all ones' build vector is -SelectNodeTo'd a VSPLTISB instruction node before the and/xor is selected -which prevents the vnot pattern from matching. - - -//===----------------------------------------------------------------------===// - -An alternative to the store/store/load approach for illegal insert element -lowering would be: - -1. store element to any ol' slot -2. lvx the slot -3. lvsl 0; splat index; vcmpeq to generate a select mask -4. lvsl slot + x; vperm to rotate result into correct slot -5. vsel result together. - -//===----------------------------------------------------------------------===// - -Should codegen branches on vec_any/vec_all to avoid mfcr. Two examples: - -#include <altivec.h> - int f(vector float a, vector float b) - { - int aa = 0; - if (vec_all_ge(a, b)) - aa |= 0x1; - if (vec_any_ge(a,b)) - aa |= 0x2; - return aa; -} - -vector float f(vector float a, vector float b) { - if (vec_any_eq(a, b)) - return a; - else - return b; -} - -//===----------------------------------------------------------------------===// - -We should do a little better with eliminating dead stores. -The stores to the stack are dead since %a and %b are not needed - -; Function Attrs: nounwind -define <16 x i8> @test_vpmsumb() #0 { - entry: - %a = alloca <16 x i8>, align 16 - %b = alloca <16 x i8>, align 16 - store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16 - store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16 - %0 = load <16 x i8>* %a, align 16 - %1 = load <16 x i8>* %b, align 16 - %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1) - ret <16 x i8> %2 -} - - -; Function Attrs: nounwind readnone -declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1 - - -Produces the following code with -mtriple=powerpc64-unknown-linux-gnu: -# %bb.0: # %entry - addis 3, 2, .LCPI0_0@toc@ha - addis 4, 2, .LCPI0_1@toc@ha - addi 3, 3, .LCPI0_0@toc@l - addi 4, 4, .LCPI0_1@toc@l - lxvw4x 0, 0, 3 - addi 3, 1, -16 - lxvw4x 35, 0, 4 - stxvw4x 0, 0, 3 - ori 2, 2, 0 - lxvw4x 34, 0, 3 - addi 3, 1, -32 - stxvw4x 35, 0, 3 - vpmsumb 2, 2, 3 - blr - .long 0 - .quad 0 - -The two stxvw4x instructions are not needed. -With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes -are present too. - -//===----------------------------------------------------------------------===// - -The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll: - -define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind { - %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0 - %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1 - %result = add <2 x i64> %x, %tmpvec2 - ret <2 x i64> %result - -This will generate the following instruction sequence: - std 5, -8(1) - std 5, -16(1) - addi 3, 1, -16 - ori 2, 2, 0 - lxvd2x 35, 0, 3 - vaddudm 2, 2, 3 - blr - -This will almost certainly cause a load-hit-store hazard. -Since val is a value parameter, it should not need to be saved onto -the stack, unless it's being done set up the vector register. Instead, -it would be better to splat the value into a vector register, and then -remove the (dead) stores to the stack. - -//===----------------------------------------------------------------------===// - -At the moment we always generate a lxsdx in preference to lfd, or stxsdx in -preference to stfd. When we have a reg-immediate addressing mode, this is a -poor choice, since we have to load the address into an index register. This -should be fixed for P7/P8. - -//===----------------------------------------------------------------------===// - -Right now, ShuffleKind 0 is supported only on BE, and ShuffleKind 2 only on LE. -However, we could actually support both kinds on either endianness, if we check -for the appropriate shufflevector pattern for each case ... this would cause -some additional shufflevectors to be recognized and implemented via the -"swapped" form. - -//===----------------------------------------------------------------------===// - -There is a utility program called PerfectShuffle that generates a table of the -shortest instruction sequence for implementing a shufflevector operation on -PowerPC. However, this was designed for big-endian code generation. We could -modify this program to create a little endian version of the table. The table -is used in PPCISelLowering.cpp, PPCTargetLowering::LOWERVECTOR_SHUFFLE(). - -//===----------------------------------------------------------------------===// - -Opportunies to use instructions from PPCInstrVSX.td during code gen - - Conversion instructions (Sections 7.6.1.5 and 7.6.1.6 of ISA 2.07) - - Scalar comparisons (xscmpodp and xscmpudp) - - Min and max (xsmaxdp, xsmindp, xvmaxdp, xvmindp, xvmaxsp, xvminsp) - -Related to this: we currently do not generate the lxvw4x instruction for either -v4f32 or v4i32, probably because adding a dag pattern to the recognizer requires -a single target type. This should probably be addressed in the PPCISelDAGToDAG logic. - -//===----------------------------------------------------------------------===// - -Currently EXTRACT_VECTOR_ELT and INSERT_VECTOR_ELT are type-legal only -for v2f64 with VSX available. We should create custom lowering -support for the other vector types. Without this support, we generate -sequences with load-hit-store hazards. - -v4f32 can be supported with VSX by shifting the correct element into -big-endian lane 0, using xscvspdpn to produce a double-precision -representation of the single-precision value in big-endian -double-precision lane 0, and reinterpreting lane 0 as an FPR or -vector-scalar register. - -v2i64 can be supported with VSX and P8Vector in the same manner as -v2f64, followed by a direct move to a GPR. - -v4i32 can be supported with VSX and P8Vector by shifting the correct -element into big-endian lane 1, using a direct move to a GPR, and -sign-extending the 32-bit result to 64 bits. - -v8i16 can be supported with VSX and P8Vector by shifting the correct -element into big-endian lane 3, using a direct move to a GPR, and -sign-extending the 16-bit result to 64 bits. - -v16i8 can be supported with VSX and P8Vector by shifting the correct -element into big-endian lane 7, using a direct move to a GPR, and -sign-extending the 8-bit result to 64 bits. +//===- README_ALTIVEC.txt - Notes for improving Altivec code gen ----------===// + +Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector +registers, to generate better spill code. + +//===----------------------------------------------------------------------===// + +The first should be a single lvx from the constant pool, the second should be +a xor/stvx: + +void foo(void) { + int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 17, 1, 1, 1, 1 }; + bar (x); +} + +#include <string.h> +void foo(void) { + int x[8] __attribute__((aligned(128))); + memset (x, 0, sizeof (x)); + bar (x); +} + +//===----------------------------------------------------------------------===// + +Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763 + +When -ffast-math is on, we can use 0.0. + +//===----------------------------------------------------------------------===// + + Consider this: + v4f32 Vector; + v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X }; + +Since we know that "Vector" is 16-byte aligned and we know the element offset +of ".X", we should change the load into a lve*x instruction, instead of doing +a load/store/lve*x sequence. + +//===----------------------------------------------------------------------===// + +Implement passing vectors by value into calls and receiving them as arguments. + +//===----------------------------------------------------------------------===// + +GCC apparently tries to codegen { C1, C2, Variable, C3 } as a constant pool load +of C1/C2/C3, then a load and vperm of Variable. + +//===----------------------------------------------------------------------===// + +We need a way to teach tblgen that some operands of an intrinsic are required to +be constants. The verifier should enforce this constraint. + +//===----------------------------------------------------------------------===// + +We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a 16-byte +aligned stack slot, followed by a load/vperm. We should probably just store it +to a scalar stack slot, then use lvsl/vperm to load it. If the value is already +in memory this is a big win. + +//===----------------------------------------------------------------------===// + +extract_vector_elt of an arbitrary constant vector can be done with the +following instructions: + +vTemp = vec_splat(v0,2); // 2 is the element the src is in. +vec_ste(&destloc,0,vTemp); + +We can do an arbitrary non-constant value by using lvsr/perm/ste. + +//===----------------------------------------------------------------------===// + +If we want to tie instruction selection into the scheduler, we can do some +constant formation with different instructions. For example, we can generate +"vsplti -1" with "vcmpequw R,R" and 1,1,1,1 with "vsubcuw R,R", and 0,0,0,0 with +"vsplti 0" or "vxor", each of which use different execution units, thus could +help scheduling. + +This is probably only reasonable for a post-pass scheduler. + +//===----------------------------------------------------------------------===// + +For this function: + +void test(vector float *A, vector float *B) { + vector float C = (vector float)vec_cmpeq(*A, *B); + if (!vec_any_eq(*A, *B)) + *B = (vector float){0,0,0,0}; + *A = C; +} + +we get the following basic block: + + ... + lvx v2, 0, r4 + lvx v3, 0, r3 + vcmpeqfp v4, v3, v2 + vcmpeqfp. v2, v3, v2 + bne cr6, LBB1_2 ; cond_next + +The vcmpeqfp/vcmpeqfp. instructions currently cannot be merged when the +vcmpeqfp. result is used by a branch. This can be improved. + +//===----------------------------------------------------------------------===// + +The code generated for this is truly aweful: + +vector float test(float a, float b) { + return (vector float){ 0.0, a, 0.0, 0.0}; +} + +LCPI1_0: ; float + .space 4 + .text + .globl _test + .align 4 +_test: + mfspr r2, 256 + oris r3, r2, 4096 + mtspr 256, r3 + lis r3, ha16(LCPI1_0) + addi r4, r1, -32 + stfs f1, -16(r1) + addi r5, r1, -16 + lfs f0, lo16(LCPI1_0)(r3) + stfs f0, -32(r1) + lvx v2, 0, r4 + lvx v3, 0, r5 + vmrghw v3, v3, v2 + vspltw v2, v2, 0 + vmrghw v2, v2, v3 + mtspr 256, r2 + blr + +//===----------------------------------------------------------------------===// + +int foo(vector float *x, vector float *y) { + if (vec_all_eq(*x,*y)) return 3245; + else return 12; +} + +A predicate compare being used in a select_cc should have the same peephole +applied to it as a predicate compare used by a br_cc. There should be no +mfcr here: + +_foo: + mfspr r2, 256 + oris r5, r2, 12288 + mtspr 256, r5 + li r5, 12 + li r6, 3245 + lvx v2, 0, r4 + lvx v3, 0, r3 + vcmpeqfp. v2, v3, v2 + mfcr r3, 2 + rlwinm r3, r3, 25, 31, 31 + cmpwi cr0, r3, 0 + bne cr0, LBB1_2 ; entry +LBB1_1: ; entry + mr r6, r5 +LBB1_2: ; entry + mr r3, r6 + mtspr 256, r2 + blr + +//===----------------------------------------------------------------------===// + +CodeGen/PowerPC/vec_constants.ll has an and operation that should be +codegen'd to andc. The issue is that the 'all ones' build vector is +SelectNodeTo'd a VSPLTISB instruction node before the and/xor is selected +which prevents the vnot pattern from matching. + + +//===----------------------------------------------------------------------===// + +An alternative to the store/store/load approach for illegal insert element +lowering would be: + +1. store element to any ol' slot +2. lvx the slot +3. lvsl 0; splat index; vcmpeq to generate a select mask +4. lvsl slot + x; vperm to rotate result into correct slot +5. vsel result together. + +//===----------------------------------------------------------------------===// + +Should codegen branches on vec_any/vec_all to avoid mfcr. Two examples: + +#include <altivec.h> + int f(vector float a, vector float b) + { + int aa = 0; + if (vec_all_ge(a, b)) + aa |= 0x1; + if (vec_any_ge(a,b)) + aa |= 0x2; + return aa; +} + +vector float f(vector float a, vector float b) { + if (vec_any_eq(a, b)) + return a; + else + return b; +} + +//===----------------------------------------------------------------------===// + +We should do a little better with eliminating dead stores. +The stores to the stack are dead since %a and %b are not needed + +; Function Attrs: nounwind +define <16 x i8> @test_vpmsumb() #0 { + entry: + %a = alloca <16 x i8>, align 16 + %b = alloca <16 x i8>, align 16 + store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16 + store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16 + %0 = load <16 x i8>* %a, align 16 + %1 = load <16 x i8>* %b, align 16 + %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1) + ret <16 x i8> %2 +} + + +; Function Attrs: nounwind readnone +declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1 + + +Produces the following code with -mtriple=powerpc64-unknown-linux-gnu: +# %bb.0: # %entry + addis 3, 2, .LCPI0_0@toc@ha + addis 4, 2, .LCPI0_1@toc@ha + addi 3, 3, .LCPI0_0@toc@l + addi 4, 4, .LCPI0_1@toc@l + lxvw4x 0, 0, 3 + addi 3, 1, -16 + lxvw4x 35, 0, 4 + stxvw4x 0, 0, 3 + ori 2, 2, 0 + lxvw4x 34, 0, 3 + addi 3, 1, -32 + stxvw4x 35, 0, 3 + vpmsumb 2, 2, 3 + blr + .long 0 + .quad 0 + +The two stxvw4x instructions are not needed. +With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes +are present too. + +//===----------------------------------------------------------------------===// + +The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll: + +define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind { + %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0 + %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1 + %result = add <2 x i64> %x, %tmpvec2 + ret <2 x i64> %result + +This will generate the following instruction sequence: + std 5, -8(1) + std 5, -16(1) + addi 3, 1, -16 + ori 2, 2, 0 + lxvd2x 35, 0, 3 + vaddudm 2, 2, 3 + blr + +This will almost certainly cause a load-hit-store hazard. +Since val is a value parameter, it should not need to be saved onto +the stack, unless it's being done set up the vector register. Instead, +it would be better to splat the value into a vector register, and then +remove the (dead) stores to the stack. + +//===----------------------------------------------------------------------===// + +At the moment we always generate a lxsdx in preference to lfd, or stxsdx in +preference to stfd. When we have a reg-immediate addressing mode, this is a +poor choice, since we have to load the address into an index register. This +should be fixed for P7/P8. + +//===----------------------------------------------------------------------===// + +Right now, ShuffleKind 0 is supported only on BE, and ShuffleKind 2 only on LE. +However, we could actually support both kinds on either endianness, if we check +for the appropriate shufflevector pattern for each case ... this would cause +some additional shufflevectors to be recognized and implemented via the +"swapped" form. + +//===----------------------------------------------------------------------===// + +There is a utility program called PerfectShuffle that generates a table of the +shortest instruction sequence for implementing a shufflevector operation on +PowerPC. However, this was designed for big-endian code generation. We could +modify this program to create a little endian version of the table. The table +is used in PPCISelLowering.cpp, PPCTargetLowering::LOWERVECTOR_SHUFFLE(). + +//===----------------------------------------------------------------------===// + +Opportunies to use instructions from PPCInstrVSX.td during code gen + - Conversion instructions (Sections 7.6.1.5 and 7.6.1.6 of ISA 2.07) + - Scalar comparisons (xscmpodp and xscmpudp) + - Min and max (xsmaxdp, xsmindp, xvmaxdp, xvmindp, xvmaxsp, xvminsp) + +Related to this: we currently do not generate the lxvw4x instruction for either +v4f32 or v4i32, probably because adding a dag pattern to the recognizer requires +a single target type. This should probably be addressed in the PPCISelDAGToDAG logic. + +//===----------------------------------------------------------------------===// + +Currently EXTRACT_VECTOR_ELT and INSERT_VECTOR_ELT are type-legal only +for v2f64 with VSX available. We should create custom lowering +support for the other vector types. Without this support, we generate +sequences with load-hit-store hazards. + +v4f32 can be supported with VSX by shifting the correct element into +big-endian lane 0, using xscvspdpn to produce a double-precision +representation of the single-precision value in big-endian +double-precision lane 0, and reinterpreting lane 0 as an FPR or +vector-scalar register. + +v2i64 can be supported with VSX and P8Vector in the same manner as +v2f64, followed by a direct move to a GPR. + +v4i32 can be supported with VSX and P8Vector by shifting the correct +element into big-endian lane 1, using a direct move to a GPR, and +sign-extending the 32-bit result to 64 bits. + +v8i16 can be supported with VSX and P8Vector by shifting the correct +element into big-endian lane 3, using a direct move to a GPR, and +sign-extending the 16-bit result to 64 bits. + +v16i8 can be supported with VSX and P8Vector by shifting the correct +element into big-endian lane 7, using a direct move to a GPR, and +sign-extending the 8-bit result to 64 bits. diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/README_P9.txt b/contrib/libs/llvm12/lib/Target/PowerPC/README_P9.txt index 79cb6cceca..c9984b7604 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/README_P9.txt +++ b/contrib/libs/llvm12/lib/Target/PowerPC/README_P9.txt @@ -1,605 +1,605 @@ -//===- README_P9.txt - Notes for improving Power9 code gen ----------------===// - -TODO: Instructions Need Implement Instrinstics or Map to LLVM IR - -Altivec: -- Vector Compare Not Equal (Zero): - vcmpneb(.) vcmpneh(.) vcmpnew(.) - vcmpnezb(.) vcmpnezh(.) vcmpnezw(.) - . Same as other VCMP*, use VCMP/VCMPo form (support intrinsic) - -- Vector Extract Unsigned: vextractub vextractuh vextractuw vextractd - . Don't use llvm extractelement because they have different semantics - . Use instrinstics: - (set v2i64:$vD, (int_ppc_altivec_vextractub v16i8:$vA, imm:$UIMM)) - (set v2i64:$vD, (int_ppc_altivec_vextractuh v8i16:$vA, imm:$UIMM)) - (set v2i64:$vD, (int_ppc_altivec_vextractuw v4i32:$vA, imm:$UIMM)) - (set v2i64:$vD, (int_ppc_altivec_vextractd v2i64:$vA, imm:$UIMM)) - -- Vector Extract Unsigned Byte Left/Right-Indexed: - vextublx vextubrx vextuhlx vextuhrx vextuwlx vextuwrx - . Use instrinstics: - // Left-Indexed - (set i64:$rD, (int_ppc_altivec_vextublx i64:$rA, v16i8:$vB)) - (set i64:$rD, (int_ppc_altivec_vextuhlx i64:$rA, v8i16:$vB)) - (set i64:$rD, (int_ppc_altivec_vextuwlx i64:$rA, v4i32:$vB)) - - // Right-Indexed - (set i64:$rD, (int_ppc_altivec_vextubrx i64:$rA, v16i8:$vB)) - (set i64:$rD, (int_ppc_altivec_vextuhrx i64:$rA, v8i16:$vB)) - (set i64:$rD, (int_ppc_altivec_vextuwrx i64:$rA, v4i32:$vB)) - -- Vector Insert Element Instructions: vinsertb vinsertd vinserth vinsertw - (set v16i8:$vD, (int_ppc_altivec_vinsertb v16i8:$vA, imm:$UIMM)) - (set v8i16:$vD, (int_ppc_altivec_vinsertd v8i16:$vA, imm:$UIMM)) - (set v4i32:$vD, (int_ppc_altivec_vinserth v4i32:$vA, imm:$UIMM)) - (set v2i64:$vD, (int_ppc_altivec_vinsertw v2i64:$vA, imm:$UIMM)) - -- Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD]: - vclzlsbb vctzlsbb - . Use intrinsic: - (set i64:$rD, (int_ppc_altivec_vclzlsbb v16i8:$vB)) - (set i64:$rD, (int_ppc_altivec_vctzlsbb v16i8:$vB)) - -- Vector Count Trailing Zeros: vctzb vctzh vctzw vctzd - . Map to llvm cttz - (set v16i8:$vD, (cttz v16i8:$vB)) // vctzb - (set v8i16:$vD, (cttz v8i16:$vB)) // vctzh - (set v4i32:$vD, (cttz v4i32:$vB)) // vctzw - (set v2i64:$vD, (cttz v2i64:$vB)) // vctzd - -- Vector Extend Sign: vextsb2w vextsh2w vextsb2d vextsh2d vextsw2d - . vextsb2w: - (set v4i32:$vD, (sext v4i8:$vB)) - - // PowerISA_V3.0: - do i = 0 to 3 - VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].byte[3]) - end - - . vextsh2w: - (set v4i32:$vD, (sext v4i16:$vB)) - - // PowerISA_V3.0: - do i = 0 to 3 - VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].hword[1]) - end - - . vextsb2d - (set v2i64:$vD, (sext v2i8:$vB)) - - // PowerISA_V3.0: - do i = 0 to 1 - VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].byte[7]) - end - - . vextsh2d - (set v2i64:$vD, (sext v2i16:$vB)) - - // PowerISA_V3.0: - do i = 0 to 1 - VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].hword[3]) - end - - . vextsw2d - (set v2i64:$vD, (sext v2i32:$vB)) - - // PowerISA_V3.0: - do i = 0 to 1 - VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].word[1]) - end - -- Vector Integer Negate: vnegw vnegd - . Map to llvm ineg - (set v4i32:$rT, (ineg v4i32:$rA)) // vnegw - (set v2i64:$rT, (ineg v2i64:$rA)) // vnegd - -- Vector Parity Byte: vprtybw vprtybd vprtybq - . Use intrinsic: - (set v4i32:$rD, (int_ppc_altivec_vprtybw v4i32:$vB)) - (set v2i64:$rD, (int_ppc_altivec_vprtybd v2i64:$vB)) - (set v1i128:$rD, (int_ppc_altivec_vprtybq v1i128:$vB)) - -- Vector (Bit) Permute (Right-indexed): - . vbpermd: Same as "vbpermq", use VX1_Int_Ty2: - VX1_Int_Ty2<1484, "vbpermd", int_ppc_altivec_vbpermd, v2i64, v2i64>; - - . vpermr: use VA1a_Int_Ty3 - VA1a_Int_Ty3<59, "vpermr", int_ppc_altivec_vpermr, v16i8, v16i8, v16i8>; - -- Vector Rotate Left Mask/Mask-Insert: vrlwnm vrlwmi vrldnm vrldmi - . Use intrinsic: - VX1_Int_Ty<389, "vrlwnm", int_ppc_altivec_vrlwnm, v4i32>; - VX1_Int_Ty<133, "vrlwmi", int_ppc_altivec_vrlwmi, v4i32>; - VX1_Int_Ty<453, "vrldnm", int_ppc_altivec_vrldnm, v2i64>; - VX1_Int_Ty<197, "vrldmi", int_ppc_altivec_vrldmi, v2i64>; - -- Vector Shift Left/Right: vslv vsrv - . Use intrinsic, don't map to llvm shl and lshr, because they have different - semantics, e.g. vslv: - - do i = 0 to 15 - sh ← VR[VRB].byte[i].bit[5:7] - VR[VRT].byte[i] ← src.byte[i:i+1].bit[sh:sh+7] - end - - VR[VRT].byte[i] is composed of 2 bytes from src.byte[i:i+1] - - . VX1_Int_Ty<1860, "vslv", int_ppc_altivec_vslv, v16i8>; - VX1_Int_Ty<1796, "vsrv", int_ppc_altivec_vsrv, v16i8>; - -- Vector Multiply-by-10 (& Write Carry) Unsigned Quadword: - vmul10uq vmul10cuq - . Use intrinsic: - VX1_Int_Ty<513, "vmul10uq", int_ppc_altivec_vmul10uq, v1i128>; - VX1_Int_Ty< 1, "vmul10cuq", int_ppc_altivec_vmul10cuq, v1i128>; - -- Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword: - vmul10euq vmul10ecuq - . Use intrinsic: - VX1_Int_Ty<577, "vmul10euq", int_ppc_altivec_vmul10euq, v1i128>; - VX1_Int_Ty< 65, "vmul10ecuq", int_ppc_altivec_vmul10ecuq, v1i128>; - -- Decimal Convert From/to National/Zoned/Signed-QWord: - bcdcfn. bcdcfz. bcdctn. bcdctz. bcdcfsq. bcdctsq. - . Use instrinstics: - (set v1i128:$vD, (int_ppc_altivec_bcdcfno v1i128:$vB, i1:$PS)) - (set v1i128:$vD, (int_ppc_altivec_bcdcfzo v1i128:$vB, i1:$PS)) - (set v1i128:$vD, (int_ppc_altivec_bcdctno v1i128:$vB)) - (set v1i128:$vD, (int_ppc_altivec_bcdctzo v1i128:$vB, i1:$PS)) - (set v1i128:$vD, (int_ppc_altivec_bcdcfsqo v1i128:$vB, i1:$PS)) - (set v1i128:$vD, (int_ppc_altivec_bcdctsqo v1i128:$vB)) - -- Decimal Copy-Sign/Set-Sign: bcdcpsgn. bcdsetsgn. - . Use instrinstics: - (set v1i128:$vD, (int_ppc_altivec_bcdcpsgno v1i128:$vA, v1i128:$vB)) - (set v1i128:$vD, (int_ppc_altivec_bcdsetsgno v1i128:$vB, i1:$PS)) - -- Decimal Shift/Unsigned-Shift/Shift-and-Round: bcds. bcdus. bcdsr. - . Use instrinstics: - (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS)) - (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB)) - (set v1i128:$vD, (int_ppc_altivec_bcdsro v1i128:$vA, v1i128:$vB, i1:$PS)) - - . Note! Their VA is accessed only 1 byte, i.e. VA.byte[7] - -- Decimal (Unsigned) Truncate: bcdtrunc. bcdutrunc. - . Use instrinstics: - (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS)) - (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB)) - - . Note! Their VA is accessed only 2 byte, i.e. VA.hword[3] (VA.bit[48:63]) - -VSX: -- QP Copy Sign: xscpsgnqp - . Similar to xscpsgndp - . (set f128:$vT, (fcopysign f128:$vB, f128:$vA) - -- QP Absolute/Negative-Absolute/Negate: xsabsqp xsnabsqp xsnegqp - . Similar to xsabsdp/xsnabsdp/xsnegdp - . (set f128:$vT, (fabs f128:$vB)) // xsabsqp - (set f128:$vT, (fneg (fabs f128:$vB))) // xsnabsqp - (set f128:$vT, (fneg f128:$vB)) // xsnegqp - -- QP Add/Divide/Multiply/Subtract/Square-Root: - xsaddqp xsdivqp xsmulqp xssubqp xssqrtqp - . Similar to xsadddp - . isCommutable = 1 - (set f128:$vT, (fadd f128:$vA, f128:$vB)) // xsaddqp - (set f128:$vT, (fmul f128:$vA, f128:$vB)) // xsmulqp - - . isCommutable = 0 - (set f128:$vT, (fdiv f128:$vA, f128:$vB)) // xsdivqp - (set f128:$vT, (fsub f128:$vA, f128:$vB)) // xssubqp - (set f128:$vT, (fsqrt f128:$vB))) // xssqrtqp - -- Round to Odd of QP Add/Divide/Multiply/Subtract/Square-Root: - xsaddqpo xsdivqpo xsmulqpo xssubqpo xssqrtqpo - . Similar to xsrsqrtedp?? - def XSRSQRTEDP : XX2Form<60, 74, - (outs vsfrc:$XT), (ins vsfrc:$XB), - "xsrsqrtedp $XT, $XB", IIC_VecFP, - [(set f64:$XT, (PPCfrsqrte f64:$XB))]>; - - . Define DAG Node in PPCInstrInfo.td: - def PPCfaddrto: SDNode<"PPCISD::FADDRTO", SDTFPBinOp, []>; - def PPCfdivrto: SDNode<"PPCISD::FDIVRTO", SDTFPBinOp, []>; - def PPCfmulrto: SDNode<"PPCISD::FMULRTO", SDTFPBinOp, []>; - def PPCfsubrto: SDNode<"PPCISD::FSUBRTO", SDTFPBinOp, []>; - def PPCfsqrtrto: SDNode<"PPCISD::FSQRTRTO", SDTFPUnaryOp, []>; - - DAG patterns of each instruction (PPCInstrVSX.td): - . isCommutable = 1 - (set f128:$vT, (PPCfaddrto f128:$vA, f128:$vB)) // xsaddqpo - (set f128:$vT, (PPCfmulrto f128:$vA, f128:$vB)) // xsmulqpo - - . isCommutable = 0 - (set f128:$vT, (PPCfdivrto f128:$vA, f128:$vB)) // xsdivqpo - (set f128:$vT, (PPCfsubrto f128:$vA, f128:$vB)) // xssubqpo - (set f128:$vT, (PPCfsqrtrto f128:$vB)) // xssqrtqpo - -- QP (Negative) Multiply-{Add/Subtract}: xsmaddqp xsmsubqp xsnmaddqp xsnmsubqp - . Ref: xsmaddadp/xsmsubadp/xsnmaddadp/xsnmsubadp - - . isCommutable = 1 - // xsmaddqp - [(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, - AltVSXFMARel; - - // xsmsubqp - [(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, - AltVSXFMARel; - - // xsnmaddqp - [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, - AltVSXFMARel; - - // xsnmsubqp - [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, - AltVSXFMARel; - -- Round to Odd of QP (Negative) Multiply-{Add/Subtract}: - xsmaddqpo xsmsubqpo xsnmaddqpo xsnmsubqpo - . Similar to xsrsqrtedp?? - - . Define DAG Node in PPCInstrInfo.td: - def PPCfmarto: SDNode<"PPCISD::FMARTO", SDTFPTernaryOp, []>; - - It looks like we only need to define "PPCfmarto" for these instructions, - because according to PowerISA_V3.0, these instructions perform RTO on - fma's result: - xsmaddqp(o) - v ← bfp_MULTIPLY_ADD(src1, src3, src2) - rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v) - result ← bfp_CONVERT_TO_BFP128(rnd) - - xsmsubqp(o) - v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2)) - rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v) - result ← bfp_CONVERT_TO_BFP128(rnd) - - xsnmaddqp(o) - v ← bfp_MULTIPLY_ADD(src1,src3,src2) - rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)) - result ← bfp_CONVERT_TO_BFP128(rnd) - - xsnmsubqp(o) - v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2)) - rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)) - result ← bfp_CONVERT_TO_BFP128(rnd) - - DAG patterns of each instruction (PPCInstrVSX.td): - . isCommutable = 1 - // xsmaddqpo - [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, f128:$vTi))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, - AltVSXFMARel; - - // xsmsubqpo - [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi)))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, - AltVSXFMARel; - - // xsnmaddqpo - [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, f128:$vTi)))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, - AltVSXFMARel; - - // xsnmsubqpo - [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi))))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, - AltVSXFMARel; - -- QP Compare Ordered/Unordered: xscmpoqp xscmpuqp - . ref: XSCMPUDP - def XSCMPUDP : XX3Form_1<60, 35, - (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), - "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>; - - . No SDAG, intrinsic, builtin are required?? - Or llvm fcmp order/unorder compare?? - -- DP/QP Compare Exponents: xscmpexpdp xscmpexpqp - . No SDAG, intrinsic, builtin are required? - -- DP Compare ==, >=, >, !=: xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp - . I checked existing instruction "XSCMPUDP". They are different in target - register. "XSCMPUDP" write to CR field, xscmp*dp write to VSX register - - . Use instrinsic: - (set i128:$XT, (int_ppc_vsx_xscmpeqdp f64:$XA, f64:$XB)) - (set i128:$XT, (int_ppc_vsx_xscmpgedp f64:$XA, f64:$XB)) - (set i128:$XT, (int_ppc_vsx_xscmpgtdp f64:$XA, f64:$XB)) - (set i128:$XT, (int_ppc_vsx_xscmpnedp f64:$XA, f64:$XB)) - -- Vector Compare Not Equal: xvcmpnedp xvcmpnedp. xvcmpnesp xvcmpnesp. - . Similar to xvcmpeqdp: - defm XVCMPEQDP : XX3Form_Rcr<60, 99, - "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, - int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>; - - . So we should use "XX3Form_Rcr" to implement instrinsic - -- Convert DP -> QP: xscvdpqp - . Similar to XSCVDPSP: - def XSCVDPSP : XX2Form<60, 265, - (outs vsfrc:$XT), (ins vsfrc:$XB), - "xscvdpsp $XT, $XB", IIC_VecFP, []>; - . So, No SDAG, intrinsic, builtin are required?? - -- Round & Convert QP -> DP (dword[1] is set to zero): xscvqpdp xscvqpdpo - . Similar to XSCVDPSP - . No SDAG, intrinsic, builtin are required?? - -- Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero): - xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz - . According to PowerISA_V3.0, these are similar to "XSCVDPSXDS", "XSCVDPSXWS", - "XSCVDPUXDS", "XSCVDPUXWS" - - . DAG patterns: - (set f128:$XT, (PPCfctidz f128:$XB)) // xscvqpsdz - (set f128:$XT, (PPCfctiwz f128:$XB)) // xscvqpswz - (set f128:$XT, (PPCfctiduz f128:$XB)) // xscvqpudz - (set f128:$XT, (PPCfctiwuz f128:$XB)) // xscvqpuwz - -- Convert (Un)Signed DWord -> QP: xscvsdqp xscvudqp - . Similar to XSCVSXDSP - . (set f128:$XT, (PPCfcfids f64:$XB)) // xscvsdqp - (set f128:$XT, (PPCfcfidus f64:$XB)) // xscvudqp - -- (Round &) Convert DP <-> HP: xscvdphp xscvhpdp - . Similar to XSCVDPSP - . No SDAG, intrinsic, builtin are required?? - -- Vector HP -> SP: xvcvhpsp xvcvsphp - . Similar to XVCVDPSP: - def XVCVDPSP : XX2Form<60, 393, - (outs vsrc:$XT), (ins vsrc:$XB), - "xvcvdpsp $XT, $XB", IIC_VecFP, []>; - . No SDAG, intrinsic, builtin are required?? - -- Round to Quad-Precision Integer: xsrqpi xsrqpix - . These are combination of "XSRDPI", "XSRDPIC", "XSRDPIM", .., because you - need to assign rounding mode in instruction - . Provide builtin? - (set f128:$vT, (int_ppc_vsx_xsrqpi f128:$vB)) - (set f128:$vT, (int_ppc_vsx_xsrqpix f128:$vB)) - -- Round Quad-Precision to Double-Extended Precision (fp80): xsrqpxp - . Provide builtin? - (set f128:$vT, (int_ppc_vsx_xsrqpxp f128:$vB)) - -Fixed Point Facility: - -- Exploit cmprb and cmpeqb (perhaps for something like - isalpha/isdigit/isupper/islower and isspace respectivelly). This can - perhaps be done through a builtin. - -- Provide testing for cnttz[dw] -- Insert Exponent DP/QP: xsiexpdp xsiexpqp - . Use intrinsic? - . xsiexpdp: - // Note: rA and rB are the unsigned integer value. - (set f128:$XT, (int_ppc_vsx_xsiexpdp i64:$rA, i64:$rB)) - - . xsiexpqp: - (set f128:$vT, (int_ppc_vsx_xsiexpqp f128:$vA, f64:$vB)) - -- Extract Exponent/Significand DP/QP: xsxexpdp xsxsigdp xsxexpqp xsxsigqp - . Use intrinsic? - . (set i64:$rT, (int_ppc_vsx_xsxexpdp f64$XB)) // xsxexpdp - (set i64:$rT, (int_ppc_vsx_xsxsigdp f64$XB)) // xsxsigdp - (set f128:$vT, (int_ppc_vsx_xsxexpqp f128$vB)) // xsxexpqp - (set f128:$vT, (int_ppc_vsx_xsxsigqp f128$vB)) // xsxsigqp - -- Vector Insert Word: xxinsertw - - Useful for inserting f32/i32 elements into vectors (the element to be - inserted needs to be prepared) - . Note: llvm has insertelem in "Vector Operations" - ; yields <n x <ty>> - <result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx> - - But how to map to it?? - [(set v1f128:$XT, (insertelement v1f128:$XTi, f128:$XB, i4:$UIMM))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, - - . Or use intrinsic? - (set v1f128:$XT, (int_ppc_vsx_xxinsertw v1f128:$XTi, f128:$XB, i4:$UIMM)) - -- Vector Extract Unsigned Word: xxextractuw - - Not useful for extraction of f32 from v4f32 (the current pattern is better - - shift->convert) - - It is useful for (uint_to_fp (vector_extract v4i32, N)) - - Unfortunately, it can't be used for (sint_to_fp (vector_extract v4i32, N)) - . Note: llvm has extractelement in "Vector Operations" - ; yields <ty> - <result> = extractelement <n x <ty>> <val>, <ty2> <idx> - - How to map to it?? - [(set f128:$XT, (extractelement v1f128:$XB, i4:$UIMM))] - - . Or use intrinsic? - (set f128:$XT, (int_ppc_vsx_xxextractuw v1f128:$XB, i4:$UIMM)) - -- Vector Insert Exponent DP/SP: xviexpdp xviexpsp - . Use intrinsic - (set v2f64:$XT, (int_ppc_vsx_xviexpdp v2f64:$XA, v2f64:$XB)) - (set v4f32:$XT, (int_ppc_vsx_xviexpsp v4f32:$XA, v4f32:$XB)) - -- Vector Extract Exponent/Significand DP/SP: xvxexpdp xvxexpsp xvxsigdp xvxsigsp - . Use intrinsic - (set v2f64:$XT, (int_ppc_vsx_xvxexpdp v2f64:$XB)) - (set v4f32:$XT, (int_ppc_vsx_xvxexpsp v4f32:$XB)) - (set v2f64:$XT, (int_ppc_vsx_xvxsigdp v2f64:$XB)) - (set v4f32:$XT, (int_ppc_vsx_xvxsigsp v4f32:$XB)) - -- Test Data Class SP/DP/QP: xststdcsp xststdcdp xststdcqp - . No SDAG, intrinsic, builtin are required? - Because it seems that we have no way to map BF field? - - Instruction Form: [PO T XO B XO BX TX] - Asm: xststd* BF,XB,DCMX - - BF is an index to CR register field. - -- Vector Test Data Class SP/DP: xvtstdcsp xvtstdcdp - . Use intrinsic - (set v4f32:$XT, (int_ppc_vsx_xvtstdcsp v4f32:$XB, i7:$DCMX)) - (set v2f64:$XT, (int_ppc_vsx_xvtstdcdp v2f64:$XB, i7:$DCMX)) - -- Maximum/Minimum Type-C/Type-J DP: xsmaxcdp xsmaxjdp xsmincdp xsminjdp - . PowerISA_V3.0: - "xsmaxcdp can be used to implement the C/C++/Java conditional operation - (x>y)?x:y for single-precision and double-precision arguments." - - Note! c type and j type have different behavior when: - 1. Either input is NaN - 2. Both input are +-Infinity, +-Zero - - . dtype map to llvm fmaxnum/fminnum - jtype use intrinsic - - . xsmaxcdp xsmincdp - (set f64:$XT, (fmaxnum f64:$XA, f64:$XB)) - (set f64:$XT, (fminnum f64:$XA, f64:$XB)) - - . xsmaxjdp xsminjdp - (set f64:$XT, (int_ppc_vsx_xsmaxjdp f64:$XA, f64:$XB)) - (set f64:$XT, (int_ppc_vsx_xsminjdp f64:$XA, f64:$XB)) - -- Vector Byte-Reverse H/W/D/Q Word: xxbrh xxbrw xxbrd xxbrq - . Use intrinsic - (set v8i16:$XT, (int_ppc_vsx_xxbrh v8i16:$XB)) - (set v4i32:$XT, (int_ppc_vsx_xxbrw v4i32:$XB)) - (set v2i64:$XT, (int_ppc_vsx_xxbrd v2i64:$XB)) - (set v1i128:$XT, (int_ppc_vsx_xxbrq v1i128:$XB)) - -- Vector Permute: xxperm xxpermr - . I have checked "PPCxxswapd" in PPCInstrVSX.td, but they are different - . Use intrinsic - (set v16i8:$XT, (int_ppc_vsx_xxperm v16i8:$XA, v16i8:$XB)) - (set v16i8:$XT, (int_ppc_vsx_xxpermr v16i8:$XA, v16i8:$XB)) - -- Vector Splat Immediate Byte: xxspltib - . Similar to XXSPLTW: - def XXSPLTW : XX2Form_2<60, 164, - (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), - "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; - - . No SDAG, intrinsic, builtin are required? - -- Load/Store Vector: lxv stxv - . Has likely SDAG match: - (set v?:$XT, (load ix16addr:$src)) - (set v?:$XT, (store ix16addr:$dst)) - - . Need define ix16addr in PPCInstrInfo.td - ix16addr: 16-byte aligned, see "def memrix16" in PPCInstrInfo.td - -- Load/Store Vector Indexed: lxvx stxvx - . Has likely SDAG match: - (set v?:$XT, (load xoaddr:$src)) - (set v?:$XT, (store xoaddr:$dst)) - -- Load/Store DWord: lxsd stxsd - . Similar to lxsdx/stxsdx: - def LXSDX : XX1Form<31, 588, - (outs vsfrc:$XT), (ins memrr:$src), - "lxsdx $XT, $src", IIC_LdStLFD, - [(set f64:$XT, (load xoaddr:$src))]>; - - . (set f64:$XT, (load iaddrX4:$src)) - (set f64:$XT, (store iaddrX4:$dst)) - -- Load/Store SP, with conversion from/to DP: lxssp stxssp - . Similar to lxsspx/stxsspx: - def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src), - "lxsspx $XT, $src", IIC_LdStLFD, - [(set f32:$XT, (load xoaddr:$src))]>; - - . (set f32:$XT, (load iaddrX4:$src)) - (set f32:$XT, (store iaddrX4:$dst)) - -- Load as Integer Byte/Halfword & Zero Indexed: lxsibzx lxsihzx - . Similar to lxsiwzx: - def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src), - "lxsiwzx $XT, $src", IIC_LdStLFD, - [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; - - . (set f64:$XT, (PPClfiwzx xoaddr:$src)) - -- Store as Integer Byte/Halfword Indexed: stxsibx stxsihx - . Similar to stxsiwx: - def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst), - "stxsiwx $XT, $dst", IIC_LdStSTFD, - [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; - - . (PPCstfiwx f64:$XT, xoaddr:$dst) - -- Load Vector Halfword*8/Byte*16 Indexed: lxvh8x lxvb16x - . Similar to lxvd2x/lxvw4x: - def LXVD2X : XX1Form<31, 844, - (outs vsrc:$XT), (ins memrr:$src), - "lxvd2x $XT, $src", IIC_LdStLFD, - [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>; - - . (set v8i16:$XT, (int_ppc_vsx_lxvh8x xoaddr:$src)) - (set v16i8:$XT, (int_ppc_vsx_lxvb16x xoaddr:$src)) - -- Store Vector Halfword*8/Byte*16 Indexed: stxvh8x stxvb16x - . Similar to stxvd2x/stxvw4x: - def STXVD2X : XX1Form<31, 972, - (outs), (ins vsrc:$XT, memrr:$dst), - "stxvd2x $XT, $dst", IIC_LdStSTFD, - [(store v2f64:$XT, xoaddr:$dst)]>; - - . (store v8i16:$XT, xoaddr:$dst) - (store v16i8:$XT, xoaddr:$dst) - -- Load/Store Vector (Left-justified) with Length: lxvl lxvll stxvl stxvll - . Likely needs an intrinsic - . (set v?:$XT, (int_ppc_vsx_lxvl xoaddr:$src)) - (set v?:$XT, (int_ppc_vsx_lxvll xoaddr:$src)) - - . (int_ppc_vsx_stxvl xoaddr:$dst)) - (int_ppc_vsx_stxvll xoaddr:$dst)) - -- Load Vector Word & Splat Indexed: lxvwsx - . Likely needs an intrinsic - . (set v?:$XT, (int_ppc_vsx_lxvwsx xoaddr:$src)) - -Atomic operations (l[dw]at, st[dw]at): -- Provide custom lowering for common atomic operations to use these - instructions with the correct Function Code -- Ensure the operands are in the correct register (i.e. RT+1, RT+2) -- Provide builtins since not all FC's necessarily have an existing LLVM - atomic operation - -Load Doubleword Monitored (ldmx): -- Investigate whether there are any uses for this. It seems to be related to - Garbage Collection so it isn't likely to be all that useful for most - languages we deal with. - -Move to CR from XER Extended (mcrxrx): -- Is there a use for this in LLVM? - -Fixed Point Facility: - -- Copy-Paste Facility: copy copy_first cp_abort paste paste. paste_last - . Use instrinstics: - (int_ppc_copy_first i32:$rA, i32:$rB) - (int_ppc_copy i32:$rA, i32:$rB) - - (int_ppc_paste i32:$rA, i32:$rB) - (int_ppc_paste_last i32:$rA, i32:$rB) - - (int_cp_abort) - -- Message Synchronize: msgsync -- SLB*: slbieg slbsync -- stop - . No instrinstics +//===- README_P9.txt - Notes for improving Power9 code gen ----------------===// + +TODO: Instructions Need Implement Instrinstics or Map to LLVM IR + +Altivec: +- Vector Compare Not Equal (Zero): + vcmpneb(.) vcmpneh(.) vcmpnew(.) + vcmpnezb(.) vcmpnezh(.) vcmpnezw(.) + . Same as other VCMP*, use VCMP/VCMPo form (support intrinsic) + +- Vector Extract Unsigned: vextractub vextractuh vextractuw vextractd + . Don't use llvm extractelement because they have different semantics + . Use instrinstics: + (set v2i64:$vD, (int_ppc_altivec_vextractub v16i8:$vA, imm:$UIMM)) + (set v2i64:$vD, (int_ppc_altivec_vextractuh v8i16:$vA, imm:$UIMM)) + (set v2i64:$vD, (int_ppc_altivec_vextractuw v4i32:$vA, imm:$UIMM)) + (set v2i64:$vD, (int_ppc_altivec_vextractd v2i64:$vA, imm:$UIMM)) + +- Vector Extract Unsigned Byte Left/Right-Indexed: + vextublx vextubrx vextuhlx vextuhrx vextuwlx vextuwrx + . Use instrinstics: + // Left-Indexed + (set i64:$rD, (int_ppc_altivec_vextublx i64:$rA, v16i8:$vB)) + (set i64:$rD, (int_ppc_altivec_vextuhlx i64:$rA, v8i16:$vB)) + (set i64:$rD, (int_ppc_altivec_vextuwlx i64:$rA, v4i32:$vB)) + + // Right-Indexed + (set i64:$rD, (int_ppc_altivec_vextubrx i64:$rA, v16i8:$vB)) + (set i64:$rD, (int_ppc_altivec_vextuhrx i64:$rA, v8i16:$vB)) + (set i64:$rD, (int_ppc_altivec_vextuwrx i64:$rA, v4i32:$vB)) + +- Vector Insert Element Instructions: vinsertb vinsertd vinserth vinsertw + (set v16i8:$vD, (int_ppc_altivec_vinsertb v16i8:$vA, imm:$UIMM)) + (set v8i16:$vD, (int_ppc_altivec_vinsertd v8i16:$vA, imm:$UIMM)) + (set v4i32:$vD, (int_ppc_altivec_vinserth v4i32:$vA, imm:$UIMM)) + (set v2i64:$vD, (int_ppc_altivec_vinsertw v2i64:$vA, imm:$UIMM)) + +- Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD]: + vclzlsbb vctzlsbb + . Use intrinsic: + (set i64:$rD, (int_ppc_altivec_vclzlsbb v16i8:$vB)) + (set i64:$rD, (int_ppc_altivec_vctzlsbb v16i8:$vB)) + +- Vector Count Trailing Zeros: vctzb vctzh vctzw vctzd + . Map to llvm cttz + (set v16i8:$vD, (cttz v16i8:$vB)) // vctzb + (set v8i16:$vD, (cttz v8i16:$vB)) // vctzh + (set v4i32:$vD, (cttz v4i32:$vB)) // vctzw + (set v2i64:$vD, (cttz v2i64:$vB)) // vctzd + +- Vector Extend Sign: vextsb2w vextsh2w vextsb2d vextsh2d vextsw2d + . vextsb2w: + (set v4i32:$vD, (sext v4i8:$vB)) + + // PowerISA_V3.0: + do i = 0 to 3 + VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].byte[3]) + end + + . vextsh2w: + (set v4i32:$vD, (sext v4i16:$vB)) + + // PowerISA_V3.0: + do i = 0 to 3 + VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].hword[1]) + end + + . vextsb2d + (set v2i64:$vD, (sext v2i8:$vB)) + + // PowerISA_V3.0: + do i = 0 to 1 + VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].byte[7]) + end + + . vextsh2d + (set v2i64:$vD, (sext v2i16:$vB)) + + // PowerISA_V3.0: + do i = 0 to 1 + VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].hword[3]) + end + + . vextsw2d + (set v2i64:$vD, (sext v2i32:$vB)) + + // PowerISA_V3.0: + do i = 0 to 1 + VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].word[1]) + end + +- Vector Integer Negate: vnegw vnegd + . Map to llvm ineg + (set v4i32:$rT, (ineg v4i32:$rA)) // vnegw + (set v2i64:$rT, (ineg v2i64:$rA)) // vnegd + +- Vector Parity Byte: vprtybw vprtybd vprtybq + . Use intrinsic: + (set v4i32:$rD, (int_ppc_altivec_vprtybw v4i32:$vB)) + (set v2i64:$rD, (int_ppc_altivec_vprtybd v2i64:$vB)) + (set v1i128:$rD, (int_ppc_altivec_vprtybq v1i128:$vB)) + +- Vector (Bit) Permute (Right-indexed): + . vbpermd: Same as "vbpermq", use VX1_Int_Ty2: + VX1_Int_Ty2<1484, "vbpermd", int_ppc_altivec_vbpermd, v2i64, v2i64>; + + . vpermr: use VA1a_Int_Ty3 + VA1a_Int_Ty3<59, "vpermr", int_ppc_altivec_vpermr, v16i8, v16i8, v16i8>; + +- Vector Rotate Left Mask/Mask-Insert: vrlwnm vrlwmi vrldnm vrldmi + . Use intrinsic: + VX1_Int_Ty<389, "vrlwnm", int_ppc_altivec_vrlwnm, v4i32>; + VX1_Int_Ty<133, "vrlwmi", int_ppc_altivec_vrlwmi, v4i32>; + VX1_Int_Ty<453, "vrldnm", int_ppc_altivec_vrldnm, v2i64>; + VX1_Int_Ty<197, "vrldmi", int_ppc_altivec_vrldmi, v2i64>; + +- Vector Shift Left/Right: vslv vsrv + . Use intrinsic, don't map to llvm shl and lshr, because they have different + semantics, e.g. vslv: + + do i = 0 to 15 + sh ← VR[VRB].byte[i].bit[5:7] + VR[VRT].byte[i] ← src.byte[i:i+1].bit[sh:sh+7] + end + + VR[VRT].byte[i] is composed of 2 bytes from src.byte[i:i+1] + + . VX1_Int_Ty<1860, "vslv", int_ppc_altivec_vslv, v16i8>; + VX1_Int_Ty<1796, "vsrv", int_ppc_altivec_vsrv, v16i8>; + +- Vector Multiply-by-10 (& Write Carry) Unsigned Quadword: + vmul10uq vmul10cuq + . Use intrinsic: + VX1_Int_Ty<513, "vmul10uq", int_ppc_altivec_vmul10uq, v1i128>; + VX1_Int_Ty< 1, "vmul10cuq", int_ppc_altivec_vmul10cuq, v1i128>; + +- Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword: + vmul10euq vmul10ecuq + . Use intrinsic: + VX1_Int_Ty<577, "vmul10euq", int_ppc_altivec_vmul10euq, v1i128>; + VX1_Int_Ty< 65, "vmul10ecuq", int_ppc_altivec_vmul10ecuq, v1i128>; + +- Decimal Convert From/to National/Zoned/Signed-QWord: + bcdcfn. bcdcfz. bcdctn. bcdctz. bcdcfsq. bcdctsq. + . Use instrinstics: + (set v1i128:$vD, (int_ppc_altivec_bcdcfno v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcdcfzo v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcdctno v1i128:$vB)) + (set v1i128:$vD, (int_ppc_altivec_bcdctzo v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcdcfsqo v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcdctsqo v1i128:$vB)) + +- Decimal Copy-Sign/Set-Sign: bcdcpsgn. bcdsetsgn. + . Use instrinstics: + (set v1i128:$vD, (int_ppc_altivec_bcdcpsgno v1i128:$vA, v1i128:$vB)) + (set v1i128:$vD, (int_ppc_altivec_bcdsetsgno v1i128:$vB, i1:$PS)) + +- Decimal Shift/Unsigned-Shift/Shift-and-Round: bcds. bcdus. bcdsr. + . Use instrinstics: + (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB)) + (set v1i128:$vD, (int_ppc_altivec_bcdsro v1i128:$vA, v1i128:$vB, i1:$PS)) + + . Note! Their VA is accessed only 1 byte, i.e. VA.byte[7] + +- Decimal (Unsigned) Truncate: bcdtrunc. bcdutrunc. + . Use instrinstics: + (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB)) + + . Note! Their VA is accessed only 2 byte, i.e. VA.hword[3] (VA.bit[48:63]) + +VSX: +- QP Copy Sign: xscpsgnqp + . Similar to xscpsgndp + . (set f128:$vT, (fcopysign f128:$vB, f128:$vA) + +- QP Absolute/Negative-Absolute/Negate: xsabsqp xsnabsqp xsnegqp + . Similar to xsabsdp/xsnabsdp/xsnegdp + . (set f128:$vT, (fabs f128:$vB)) // xsabsqp + (set f128:$vT, (fneg (fabs f128:$vB))) // xsnabsqp + (set f128:$vT, (fneg f128:$vB)) // xsnegqp + +- QP Add/Divide/Multiply/Subtract/Square-Root: + xsaddqp xsdivqp xsmulqp xssubqp xssqrtqp + . Similar to xsadddp + . isCommutable = 1 + (set f128:$vT, (fadd f128:$vA, f128:$vB)) // xsaddqp + (set f128:$vT, (fmul f128:$vA, f128:$vB)) // xsmulqp + + . isCommutable = 0 + (set f128:$vT, (fdiv f128:$vA, f128:$vB)) // xsdivqp + (set f128:$vT, (fsub f128:$vA, f128:$vB)) // xssubqp + (set f128:$vT, (fsqrt f128:$vB))) // xssqrtqp + +- Round to Odd of QP Add/Divide/Multiply/Subtract/Square-Root: + xsaddqpo xsdivqpo xsmulqpo xssubqpo xssqrtqpo + . Similar to xsrsqrtedp?? + def XSRSQRTEDP : XX2Form<60, 74, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrsqrtedp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfrsqrte f64:$XB))]>; + + . Define DAG Node in PPCInstrInfo.td: + def PPCfaddrto: SDNode<"PPCISD::FADDRTO", SDTFPBinOp, []>; + def PPCfdivrto: SDNode<"PPCISD::FDIVRTO", SDTFPBinOp, []>; + def PPCfmulrto: SDNode<"PPCISD::FMULRTO", SDTFPBinOp, []>; + def PPCfsubrto: SDNode<"PPCISD::FSUBRTO", SDTFPBinOp, []>; + def PPCfsqrtrto: SDNode<"PPCISD::FSQRTRTO", SDTFPUnaryOp, []>; + + DAG patterns of each instruction (PPCInstrVSX.td): + . isCommutable = 1 + (set f128:$vT, (PPCfaddrto f128:$vA, f128:$vB)) // xsaddqpo + (set f128:$vT, (PPCfmulrto f128:$vA, f128:$vB)) // xsmulqpo + + . isCommutable = 0 + (set f128:$vT, (PPCfdivrto f128:$vA, f128:$vB)) // xsdivqpo + (set f128:$vT, (PPCfsubrto f128:$vA, f128:$vB)) // xssubqpo + (set f128:$vT, (PPCfsqrtrto f128:$vB)) // xssqrtqpo + +- QP (Negative) Multiply-{Add/Subtract}: xsmaddqp xsmsubqp xsnmaddqp xsnmsubqp + . Ref: xsmaddadp/xsmsubadp/xsnmaddadp/xsnmsubadp + + . isCommutable = 1 + // xsmaddqp + [(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsmsubqp + [(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsnmaddqp + [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsnmsubqp + [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + +- Round to Odd of QP (Negative) Multiply-{Add/Subtract}: + xsmaddqpo xsmsubqpo xsnmaddqpo xsnmsubqpo + . Similar to xsrsqrtedp?? + + . Define DAG Node in PPCInstrInfo.td: + def PPCfmarto: SDNode<"PPCISD::FMARTO", SDTFPTernaryOp, []>; + + It looks like we only need to define "PPCfmarto" for these instructions, + because according to PowerISA_V3.0, these instructions perform RTO on + fma's result: + xsmaddqp(o) + v ← bfp_MULTIPLY_ADD(src1, src3, src2) + rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v) + result ← bfp_CONVERT_TO_BFP128(rnd) + + xsmsubqp(o) + v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2)) + rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v) + result ← bfp_CONVERT_TO_BFP128(rnd) + + xsnmaddqp(o) + v ← bfp_MULTIPLY_ADD(src1,src3,src2) + rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)) + result ← bfp_CONVERT_TO_BFP128(rnd) + + xsnmsubqp(o) + v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2)) + rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)) + result ← bfp_CONVERT_TO_BFP128(rnd) + + DAG patterns of each instruction (PPCInstrVSX.td): + . isCommutable = 1 + // xsmaddqpo + [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, f128:$vTi))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsmsubqpo + [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi)))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsnmaddqpo + [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, f128:$vTi)))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsnmsubqpo + [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi))))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + +- QP Compare Ordered/Unordered: xscmpoqp xscmpuqp + . ref: XSCMPUDP + def XSCMPUDP : XX3Form_1<60, 35, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>; + + . No SDAG, intrinsic, builtin are required?? + Or llvm fcmp order/unorder compare?? + +- DP/QP Compare Exponents: xscmpexpdp xscmpexpqp + . No SDAG, intrinsic, builtin are required? + +- DP Compare ==, >=, >, !=: xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp + . I checked existing instruction "XSCMPUDP". They are different in target + register. "XSCMPUDP" write to CR field, xscmp*dp write to VSX register + + . Use instrinsic: + (set i128:$XT, (int_ppc_vsx_xscmpeqdp f64:$XA, f64:$XB)) + (set i128:$XT, (int_ppc_vsx_xscmpgedp f64:$XA, f64:$XB)) + (set i128:$XT, (int_ppc_vsx_xscmpgtdp f64:$XA, f64:$XB)) + (set i128:$XT, (int_ppc_vsx_xscmpnedp f64:$XA, f64:$XB)) + +- Vector Compare Not Equal: xvcmpnedp xvcmpnedp. xvcmpnesp xvcmpnesp. + . Similar to xvcmpeqdp: + defm XVCMPEQDP : XX3Form_Rcr<60, 99, + "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, + int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>; + + . So we should use "XX3Form_Rcr" to implement instrinsic + +- Convert DP -> QP: xscvdpqp + . Similar to XSCVDPSP: + def XSCVDPSP : XX2Form<60, 265, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpsp $XT, $XB", IIC_VecFP, []>; + . So, No SDAG, intrinsic, builtin are required?? + +- Round & Convert QP -> DP (dword[1] is set to zero): xscvqpdp xscvqpdpo + . Similar to XSCVDPSP + . No SDAG, intrinsic, builtin are required?? + +- Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero): + xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz + . According to PowerISA_V3.0, these are similar to "XSCVDPSXDS", "XSCVDPSXWS", + "XSCVDPUXDS", "XSCVDPUXWS" + + . DAG patterns: + (set f128:$XT, (PPCfctidz f128:$XB)) // xscvqpsdz + (set f128:$XT, (PPCfctiwz f128:$XB)) // xscvqpswz + (set f128:$XT, (PPCfctiduz f128:$XB)) // xscvqpudz + (set f128:$XT, (PPCfctiwuz f128:$XB)) // xscvqpuwz + +- Convert (Un)Signed DWord -> QP: xscvsdqp xscvudqp + . Similar to XSCVSXDSP + . (set f128:$XT, (PPCfcfids f64:$XB)) // xscvsdqp + (set f128:$XT, (PPCfcfidus f64:$XB)) // xscvudqp + +- (Round &) Convert DP <-> HP: xscvdphp xscvhpdp + . Similar to XSCVDPSP + . No SDAG, intrinsic, builtin are required?? + +- Vector HP -> SP: xvcvhpsp xvcvsphp + . Similar to XVCVDPSP: + def XVCVDPSP : XX2Form<60, 393, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpsp $XT, $XB", IIC_VecFP, []>; + . No SDAG, intrinsic, builtin are required?? + +- Round to Quad-Precision Integer: xsrqpi xsrqpix + . These are combination of "XSRDPI", "XSRDPIC", "XSRDPIM", .., because you + need to assign rounding mode in instruction + . Provide builtin? + (set f128:$vT, (int_ppc_vsx_xsrqpi f128:$vB)) + (set f128:$vT, (int_ppc_vsx_xsrqpix f128:$vB)) + +- Round Quad-Precision to Double-Extended Precision (fp80): xsrqpxp + . Provide builtin? + (set f128:$vT, (int_ppc_vsx_xsrqpxp f128:$vB)) + +Fixed Point Facility: + +- Exploit cmprb and cmpeqb (perhaps for something like + isalpha/isdigit/isupper/islower and isspace respectivelly). This can + perhaps be done through a builtin. + +- Provide testing for cnttz[dw] +- Insert Exponent DP/QP: xsiexpdp xsiexpqp + . Use intrinsic? + . xsiexpdp: + // Note: rA and rB are the unsigned integer value. + (set f128:$XT, (int_ppc_vsx_xsiexpdp i64:$rA, i64:$rB)) + + . xsiexpqp: + (set f128:$vT, (int_ppc_vsx_xsiexpqp f128:$vA, f64:$vB)) + +- Extract Exponent/Significand DP/QP: xsxexpdp xsxsigdp xsxexpqp xsxsigqp + . Use intrinsic? + . (set i64:$rT, (int_ppc_vsx_xsxexpdp f64$XB)) // xsxexpdp + (set i64:$rT, (int_ppc_vsx_xsxsigdp f64$XB)) // xsxsigdp + (set f128:$vT, (int_ppc_vsx_xsxexpqp f128$vB)) // xsxexpqp + (set f128:$vT, (int_ppc_vsx_xsxsigqp f128$vB)) // xsxsigqp + +- Vector Insert Word: xxinsertw + - Useful for inserting f32/i32 elements into vectors (the element to be + inserted needs to be prepared) + . Note: llvm has insertelem in "Vector Operations" + ; yields <n x <ty>> + <result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx> + + But how to map to it?? + [(set v1f128:$XT, (insertelement v1f128:$XTi, f128:$XB, i4:$UIMM))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + + . Or use intrinsic? + (set v1f128:$XT, (int_ppc_vsx_xxinsertw v1f128:$XTi, f128:$XB, i4:$UIMM)) + +- Vector Extract Unsigned Word: xxextractuw + - Not useful for extraction of f32 from v4f32 (the current pattern is better - + shift->convert) + - It is useful for (uint_to_fp (vector_extract v4i32, N)) + - Unfortunately, it can't be used for (sint_to_fp (vector_extract v4i32, N)) + . Note: llvm has extractelement in "Vector Operations" + ; yields <ty> + <result> = extractelement <n x <ty>> <val>, <ty2> <idx> + + How to map to it?? + [(set f128:$XT, (extractelement v1f128:$XB, i4:$UIMM))] + + . Or use intrinsic? + (set f128:$XT, (int_ppc_vsx_xxextractuw v1f128:$XB, i4:$UIMM)) + +- Vector Insert Exponent DP/SP: xviexpdp xviexpsp + . Use intrinsic + (set v2f64:$XT, (int_ppc_vsx_xviexpdp v2f64:$XA, v2f64:$XB)) + (set v4f32:$XT, (int_ppc_vsx_xviexpsp v4f32:$XA, v4f32:$XB)) + +- Vector Extract Exponent/Significand DP/SP: xvxexpdp xvxexpsp xvxsigdp xvxsigsp + . Use intrinsic + (set v2f64:$XT, (int_ppc_vsx_xvxexpdp v2f64:$XB)) + (set v4f32:$XT, (int_ppc_vsx_xvxexpsp v4f32:$XB)) + (set v2f64:$XT, (int_ppc_vsx_xvxsigdp v2f64:$XB)) + (set v4f32:$XT, (int_ppc_vsx_xvxsigsp v4f32:$XB)) + +- Test Data Class SP/DP/QP: xststdcsp xststdcdp xststdcqp + . No SDAG, intrinsic, builtin are required? + Because it seems that we have no way to map BF field? + + Instruction Form: [PO T XO B XO BX TX] + Asm: xststd* BF,XB,DCMX + + BF is an index to CR register field. + +- Vector Test Data Class SP/DP: xvtstdcsp xvtstdcdp + . Use intrinsic + (set v4f32:$XT, (int_ppc_vsx_xvtstdcsp v4f32:$XB, i7:$DCMX)) + (set v2f64:$XT, (int_ppc_vsx_xvtstdcdp v2f64:$XB, i7:$DCMX)) + +- Maximum/Minimum Type-C/Type-J DP: xsmaxcdp xsmaxjdp xsmincdp xsminjdp + . PowerISA_V3.0: + "xsmaxcdp can be used to implement the C/C++/Java conditional operation + (x>y)?x:y for single-precision and double-precision arguments." + + Note! c type and j type have different behavior when: + 1. Either input is NaN + 2. Both input are +-Infinity, +-Zero + + . dtype map to llvm fmaxnum/fminnum + jtype use intrinsic + + . xsmaxcdp xsmincdp + (set f64:$XT, (fmaxnum f64:$XA, f64:$XB)) + (set f64:$XT, (fminnum f64:$XA, f64:$XB)) + + . xsmaxjdp xsminjdp + (set f64:$XT, (int_ppc_vsx_xsmaxjdp f64:$XA, f64:$XB)) + (set f64:$XT, (int_ppc_vsx_xsminjdp f64:$XA, f64:$XB)) + +- Vector Byte-Reverse H/W/D/Q Word: xxbrh xxbrw xxbrd xxbrq + . Use intrinsic + (set v8i16:$XT, (int_ppc_vsx_xxbrh v8i16:$XB)) + (set v4i32:$XT, (int_ppc_vsx_xxbrw v4i32:$XB)) + (set v2i64:$XT, (int_ppc_vsx_xxbrd v2i64:$XB)) + (set v1i128:$XT, (int_ppc_vsx_xxbrq v1i128:$XB)) + +- Vector Permute: xxperm xxpermr + . I have checked "PPCxxswapd" in PPCInstrVSX.td, but they are different + . Use intrinsic + (set v16i8:$XT, (int_ppc_vsx_xxperm v16i8:$XA, v16i8:$XB)) + (set v16i8:$XT, (int_ppc_vsx_xxpermr v16i8:$XA, v16i8:$XB)) + +- Vector Splat Immediate Byte: xxspltib + . Similar to XXSPLTW: + def XXSPLTW : XX2Form_2<60, 164, + (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), + "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; + + . No SDAG, intrinsic, builtin are required? + +- Load/Store Vector: lxv stxv + . Has likely SDAG match: + (set v?:$XT, (load ix16addr:$src)) + (set v?:$XT, (store ix16addr:$dst)) + + . Need define ix16addr in PPCInstrInfo.td + ix16addr: 16-byte aligned, see "def memrix16" in PPCInstrInfo.td + +- Load/Store Vector Indexed: lxvx stxvx + . Has likely SDAG match: + (set v?:$XT, (load xoaddr:$src)) + (set v?:$XT, (store xoaddr:$dst)) + +- Load/Store DWord: lxsd stxsd + . Similar to lxsdx/stxsdx: + def LXSDX : XX1Form<31, 588, + (outs vsfrc:$XT), (ins memrr:$src), + "lxsdx $XT, $src", IIC_LdStLFD, + [(set f64:$XT, (load xoaddr:$src))]>; + + . (set f64:$XT, (load iaddrX4:$src)) + (set f64:$XT, (store iaddrX4:$dst)) + +- Load/Store SP, with conversion from/to DP: lxssp stxssp + . Similar to lxsspx/stxsspx: + def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src), + "lxsspx $XT, $src", IIC_LdStLFD, + [(set f32:$XT, (load xoaddr:$src))]>; + + . (set f32:$XT, (load iaddrX4:$src)) + (set f32:$XT, (store iaddrX4:$dst)) + +- Load as Integer Byte/Halfword & Zero Indexed: lxsibzx lxsihzx + . Similar to lxsiwzx: + def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src), + "lxsiwzx $XT, $src", IIC_LdStLFD, + [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; + + . (set f64:$XT, (PPClfiwzx xoaddr:$src)) + +- Store as Integer Byte/Halfword Indexed: stxsibx stxsihx + . Similar to stxsiwx: + def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst), + "stxsiwx $XT, $dst", IIC_LdStSTFD, + [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; + + . (PPCstfiwx f64:$XT, xoaddr:$dst) + +- Load Vector Halfword*8/Byte*16 Indexed: lxvh8x lxvb16x + . Similar to lxvd2x/lxvw4x: + def LXVD2X : XX1Form<31, 844, + (outs vsrc:$XT), (ins memrr:$src), + "lxvd2x $XT, $src", IIC_LdStLFD, + [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>; + + . (set v8i16:$XT, (int_ppc_vsx_lxvh8x xoaddr:$src)) + (set v16i8:$XT, (int_ppc_vsx_lxvb16x xoaddr:$src)) + +- Store Vector Halfword*8/Byte*16 Indexed: stxvh8x stxvb16x + . Similar to stxvd2x/stxvw4x: + def STXVD2X : XX1Form<31, 972, + (outs), (ins vsrc:$XT, memrr:$dst), + "stxvd2x $XT, $dst", IIC_LdStSTFD, + [(store v2f64:$XT, xoaddr:$dst)]>; + + . (store v8i16:$XT, xoaddr:$dst) + (store v16i8:$XT, xoaddr:$dst) + +- Load/Store Vector (Left-justified) with Length: lxvl lxvll stxvl stxvll + . Likely needs an intrinsic + . (set v?:$XT, (int_ppc_vsx_lxvl xoaddr:$src)) + (set v?:$XT, (int_ppc_vsx_lxvll xoaddr:$src)) + + . (int_ppc_vsx_stxvl xoaddr:$dst)) + (int_ppc_vsx_stxvll xoaddr:$dst)) + +- Load Vector Word & Splat Indexed: lxvwsx + . Likely needs an intrinsic + . (set v?:$XT, (int_ppc_vsx_lxvwsx xoaddr:$src)) + +Atomic operations (l[dw]at, st[dw]at): +- Provide custom lowering for common atomic operations to use these + instructions with the correct Function Code +- Ensure the operands are in the correct register (i.e. RT+1, RT+2) +- Provide builtins since not all FC's necessarily have an existing LLVM + atomic operation + +Load Doubleword Monitored (ldmx): +- Investigate whether there are any uses for this. It seems to be related to + Garbage Collection so it isn't likely to be all that useful for most + languages we deal with. + +Move to CR from XER Extended (mcrxrx): +- Is there a use for this in LLVM? + +Fixed Point Facility: + +- Copy-Paste Facility: copy copy_first cp_abort paste paste. paste_last + . Use instrinstics: + (int_ppc_copy_first i32:$rA, i32:$rB) + (int_ppc_copy i32:$rA, i32:$rB) + + (int_ppc_paste i32:$rA, i32:$rB) + (int_ppc_paste_last i32:$rA, i32:$rB) + + (int_cp_abort) + +- Message Synchronize: msgsync +- SLB*: slbieg slbsync +- stop + . No instrinstics diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make index 9903560dcc..68badb4490 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make +++ b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/Support diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/ya.make index a6812524a8..8c7039a575 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/ya.make +++ b/contrib/libs/llvm12/lib/Target/PowerPC/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include |