diff options
author | heretic <heretic@yandex-team.ru> | 2022-02-10 16:45:46 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:46 +0300 |
commit | 81eddc8c0b55990194e112b02d127b87d54164a9 (patch) | |
tree | 9142afc54d335ea52910662635b898e79e192e49 /contrib/libs/llvm12/lib | |
parent | 397cbe258b9e064f49c4ca575279f02f39fef76e (diff) | |
download | ydb-81eddc8c0b55990194e112b02d127b87d54164a9.tar.gz |
Restoring authorship annotation for <heretic@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib')
170 files changed, 12711 insertions, 12711 deletions
diff --git a/contrib/libs/llvm12/lib/Analysis/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Analysis/.yandex_meta/licenses.list.txt index 33ca2e033a..1da7eb6ff9 100644 --- a/contrib/libs/llvm12/lib/Analysis/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Analysis/.yandex_meta/licenses.list.txt @@ -1,346 +1,346 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================COPYRIGHT==================== - // into sext(%x) + sext(c). We'll sext the Offset ourselves: - unsigned OldWidth = Offset.getBitWidth(); - Offset = Offset.trunc(SmallWidth).sext(NewWidth).zextOrSelf(OldWidth); - - -====================COPYRIGHT==================== - // sext(%x) + sext(c) - Scale = 1; - Offset = 0; - - -====================COPYRIGHT==================== - // |X| < |C| --> X > -abs(C) and X < abs(C) - Constant *PosDivisorC = ConstantInt::get(Ty, C->abs()); - Constant *NegDivisorC = ConstantInt::get(Ty, -C->abs()); - - -====================COPYRIGHT==================== - // {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a}, - // then after merging (a) and (b) together, we need to put (a,b) back in - // the worklist so that (c) can get merged in as well resulting in - // {(a,b,c) -> d} - // We also need to remove the old target (b), from the worklist. We first - - -====================COPYRIGHT==================== - if (match(BO.getOperand(0), m_APInt(C))) { - if (IIQ.hasNoUnsignedWrap(&BO)) { - // 'shl nuw C, x' produces [C, C << CLZ(C)] - Lower = *C; - Upper = Lower.shl(Lower.countLeadingZeros()) + 1; - - -====================COPYRIGHT==================== - if (match(X, m_APInt(C)) && !C->isMinSignedValue()) { - // Is the variable divisor magnitude always greater than the constant - // dividend magnitude? - // |Y| > |C| --> Y < -abs(C) or Y > abs(C) - Constant *PosDividendC = ConstantInt::get(Ty, C->abs()); - Constant *NegDividendC = ConstantInt::get(Ty, -C->abs()); - - -====================File: LICENSE.TXT==================== -============================================================================== -The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: -============================================================================== - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ----- LLVM Exceptions to the Apache 2.0 License ---- - -As an exception, if, as a result of your compiling your source code, portions -of this Software are embedded into an Object form of such source code, you -may redistribute such embedded portions in such Object form without complying -with the conditions of Sections 4(a), 4(b) and 4(d) of the License. - -In addition, if you combine or link compiled forms of this Software with -software that is licensed under the GPLv2 ("Combined Software") and if a -court of competent jurisdiction determines that the patent provision (Section -3), the indemnity provision (Section 9) or other Section of the License -conflicts with the conditions of the GPLv2, you may retroactively and -prospectively choose to deem waived or otherwise exclude such Section(s) of -the License, but only in their entirety and only with respect to the Combined -Software. - -============================================================================== -Software from third parties included in the LLVM Project: -============================================================================== -The LLVM Project contains third party software which is under different license -terms. All such code will be identified clearly using at least one of two -mechanisms: -1) It will be in a separate directory tree with its own `LICENSE.txt` or - `LICENSE` file at the top containing the specific license and restrictions - which apply to that software, or -2) It will contain specific license and restriction terms at the top of every - file. - -============================================================================== -Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): -============================================================================== -University of Illinois/NCSA -Open Source License - -Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - - - -====================File: include/llvm/Support/LICENSE.TXT==================== -LLVM System Interface Library -------------------------------------------------------------------------------- -The LLVM System Interface Library is licensed under the Illinois Open Source -License and has the following additional copyright: - -Copyright (C) 2004 eXtensible Systems, Inc. - - -====================NCSA==================== -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================COPYRIGHT==================== + // into sext(%x) + sext(c). We'll sext the Offset ourselves: + unsigned OldWidth = Offset.getBitWidth(); + Offset = Offset.trunc(SmallWidth).sext(NewWidth).zextOrSelf(OldWidth); + + +====================COPYRIGHT==================== + // sext(%x) + sext(c) + Scale = 1; + Offset = 0; + + +====================COPYRIGHT==================== + // |X| < |C| --> X > -abs(C) and X < abs(C) + Constant *PosDivisorC = ConstantInt::get(Ty, C->abs()); + Constant *NegDivisorC = ConstantInt::get(Ty, -C->abs()); + + +====================COPYRIGHT==================== + // {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a}, + // then after merging (a) and (b) together, we need to put (a,b) back in + // the worklist so that (c) can get merged in as well resulting in + // {(a,b,c) -> d} + // We also need to remove the old target (b), from the worklist. We first + + +====================COPYRIGHT==================== + if (match(BO.getOperand(0), m_APInt(C))) { + if (IIQ.hasNoUnsignedWrap(&BO)) { + // 'shl nuw C, x' produces [C, C << CLZ(C)] + Lower = *C; + Upper = Lower.shl(Lower.countLeadingZeros()) + 1; + + +====================COPYRIGHT==================== + if (match(X, m_APInt(C)) && !C->isMinSignedValue()) { + // Is the variable divisor magnitude always greater than the constant + // dividend magnitude? + // |Y| > |C| --> Y < -abs(C) or Y > abs(C) + Constant *PosDividendC = ConstantInt::get(Ty, C->abs()); + Constant *NegDividendC = ConstantInt::get(Ty, -C->abs()); + + +====================File: LICENSE.TXT==================== +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +============================================================================== +Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + + + +====================File: include/llvm/Support/LICENSE.TXT==================== +LLVM System Interface Library +------------------------------------------------------------------------------- +The LLVM System Interface Library is licensed under the Illinois Open Source +License and has the following additional copyright: + +Copyright (C) 2004 eXtensible Systems, Inc. + + +====================NCSA==================== +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. diff --git a/contrib/libs/llvm12/lib/Analysis/MLInlineAdvisor.cpp b/contrib/libs/llvm12/lib/Analysis/MLInlineAdvisor.cpp index ba8a5bd922..89f4ff427d 100644 --- a/contrib/libs/llvm12/lib/Analysis/MLInlineAdvisor.cpp +++ b/contrib/libs/llvm12/lib/Analysis/MLInlineAdvisor.cpp @@ -1,254 +1,254 @@ -//===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the interface between the inliner and a learned model. -// It delegates model evaluation to either the AOT compiled model (the -// 'release' mode) or a runtime-loaded model (the 'development' case). -// -//===----------------------------------------------------------------------===// +//===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the interface between the inliner and a learned model. +// It delegates model evaluation to either the AOT compiled model (the +// 'release' mode) or a runtime-loaded model (the 'development' case). +// +//===----------------------------------------------------------------------===// #include "llvm/Config/config.h" #if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) -#include <limits> -#include <unordered_map> -#include <unordered_set> - -#include "llvm/ADT/SCCIterator.h" -#include "llvm/Analysis/CallGraph.h" +#include <limits> +#include <unordered_map> +#include <unordered_set> + +#include "llvm/ADT/SCCIterator.h" +#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/FunctionPropertiesAnalysis.h" -#include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/MLInlineAdvisor.h" -#include "llvm/Analysis/MLModelRunner.h" -#include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/PassManager.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Path.h" - -using namespace llvm; - -#define DEBUG_TYPE "inline-ml" - -static cl::opt<float> SizeIncreaseThreshold( - "ml-advisor-size-increase-threshold", cl::Hidden, - cl::desc("Maximum factor by which expected native size may increase before " - "blocking any further inlining."), - cl::init(2.0)); - -const std::array<std::string, NumberOfFeatures> llvm::FeatureNameMap{ -#define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME, - INLINE_FEATURE_ITERATOR(POPULATE_NAMES) -#undef POPULATE_NAMES -}; - -const char *const llvm::DecisionName = "inlining_decision"; -const char *const llvm::DefaultDecisionName = "inlining_default"; -const char *const llvm::RewardName = "delta_size"; - -CallBase *getInlinableCS(Instruction &I) { - if (auto *CS = dyn_cast<CallBase>(&I)) - if (Function *Callee = CS->getCalledFunction()) { - if (!Callee->isDeclaration()) { - return CS; - } - } - return nullptr; -} - -MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM, - std::unique_ptr<MLModelRunner> Runner) - : InlineAdvisor( +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/MLInlineAdvisor.h" +#include "llvm/Analysis/MLModelRunner.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Path.h" + +using namespace llvm; + +#define DEBUG_TYPE "inline-ml" + +static cl::opt<float> SizeIncreaseThreshold( + "ml-advisor-size-increase-threshold", cl::Hidden, + cl::desc("Maximum factor by which expected native size may increase before " + "blocking any further inlining."), + cl::init(2.0)); + +const std::array<std::string, NumberOfFeatures> llvm::FeatureNameMap{ +#define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME, + INLINE_FEATURE_ITERATOR(POPULATE_NAMES) +#undef POPULATE_NAMES +}; + +const char *const llvm::DecisionName = "inlining_decision"; +const char *const llvm::DefaultDecisionName = "inlining_default"; +const char *const llvm::RewardName = "delta_size"; + +CallBase *getInlinableCS(Instruction &I) { + if (auto *CS = dyn_cast<CallBase>(&I)) + if (Function *Callee = CS->getCalledFunction()) { + if (!Callee->isDeclaration()) { + return CS; + } + } + return nullptr; +} + +MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM, + std::unique_ptr<MLModelRunner> Runner) + : InlineAdvisor( M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), ModelRunner(std::move(Runner)), CG(new CallGraph(M)), - InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) { - assert(ModelRunner); - - // Extract the 'call site height' feature - the position of a call site - // relative to the farthest statically reachable SCC node. We don't mutate - // this value while inlining happens. Empirically, this feature proved - // critical in behavioral cloning - i.e. training a model to mimic the manual - // heuristic's decisions - and, thus, equally important for training for - // improvement. - for (auto I = scc_begin(CG.get()); !I.isAtEnd(); ++I) { - const std::vector<CallGraphNode *> &CGNodes = *I; - unsigned Level = 0; - for (auto *CGNode : CGNodes) { - Function *F = CGNode->getFunction(); - if (!F || F->isDeclaration()) - continue; - for (auto &I : instructions(F)) { - if (auto *CS = getInlinableCS(I)) { - auto *Called = CS->getCalledFunction(); - auto Pos = FunctionLevels.find(Called); - // In bottom up traversal, an inlinable callee is either in the - // same SCC, or to a function in a visited SCC. So not finding its - // level means we haven't visited it yet, meaning it's in this SCC. - if (Pos == FunctionLevels.end()) - continue; - Level = std::max(Level, Pos->second + 1); - } - } - } - for (auto *CGNode : CGNodes) { - Function *F = CGNode->getFunction(); - if (F && !F->isDeclaration()) - FunctionLevels[F] = Level; - } - } -} - -void MLInlineAdvisor::onPassEntry() { - // Function passes executed between InlinerPass runs may have changed the - // module-wide features. - NodeCount = 0; - EdgeCount = 0; - for (auto &F : M) - if (!F.isDeclaration()) { - ++NodeCount; - EdgeCount += getLocalCalls(F); - } -} - -int64_t MLInlineAdvisor::getLocalCalls(Function &F) { + InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) { + assert(ModelRunner); + + // Extract the 'call site height' feature - the position of a call site + // relative to the farthest statically reachable SCC node. We don't mutate + // this value while inlining happens. Empirically, this feature proved + // critical in behavioral cloning - i.e. training a model to mimic the manual + // heuristic's decisions - and, thus, equally important for training for + // improvement. + for (auto I = scc_begin(CG.get()); !I.isAtEnd(); ++I) { + const std::vector<CallGraphNode *> &CGNodes = *I; + unsigned Level = 0; + for (auto *CGNode : CGNodes) { + Function *F = CGNode->getFunction(); + if (!F || F->isDeclaration()) + continue; + for (auto &I : instructions(F)) { + if (auto *CS = getInlinableCS(I)) { + auto *Called = CS->getCalledFunction(); + auto Pos = FunctionLevels.find(Called); + // In bottom up traversal, an inlinable callee is either in the + // same SCC, or to a function in a visited SCC. So not finding its + // level means we haven't visited it yet, meaning it's in this SCC. + if (Pos == FunctionLevels.end()) + continue; + Level = std::max(Level, Pos->second + 1); + } + } + } + for (auto *CGNode : CGNodes) { + Function *F = CGNode->getFunction(); + if (F && !F->isDeclaration()) + FunctionLevels[F] = Level; + } + } +} + +void MLInlineAdvisor::onPassEntry() { + // Function passes executed between InlinerPass runs may have changed the + // module-wide features. + NodeCount = 0; + EdgeCount = 0; + for (auto &F : M) + if (!F.isDeclaration()) { + ++NodeCount; + EdgeCount += getLocalCalls(F); + } +} + +int64_t MLInlineAdvisor::getLocalCalls(Function &F) { return FAM.getResult<FunctionPropertiesAnalysis>(F) .DirectCallsToDefinedFunctions; -} - -// Update the internal state of the advisor, and force invalidate feature -// analysis. Currently, we maintain minimal (and very simple) global state - the -// number of functions and the number of static calls. We also keep track of the -// total IR size in this module, to stop misbehaving policies at a certain bloat -// factor (SizeIncreaseThreshold) -void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, - bool CalleeWasDeleted) { - assert(!ForceStop); - Function *Caller = Advice.getCaller(); - Function *Callee = Advice.getCallee(); - - // The caller features aren't valid anymore. +} + +// Update the internal state of the advisor, and force invalidate feature +// analysis. Currently, we maintain minimal (and very simple) global state - the +// number of functions and the number of static calls. We also keep track of the +// total IR size in this module, to stop misbehaving policies at a certain bloat +// factor (SizeIncreaseThreshold) +void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice, + bool CalleeWasDeleted) { + assert(!ForceStop); + Function *Caller = Advice.getCaller(); + Function *Callee = Advice.getCallee(); + + // The caller features aren't valid anymore. FAM.invalidate<FunctionPropertiesAnalysis>(*Caller); - int64_t IRSizeAfter = - getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize); - CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize); - if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize) - ForceStop = true; - - // We can delta-update module-wide features. We know the inlining only changed - // the caller, and maybe the callee (by deleting the latter). - // Nodes are simple to update. - // For edges, we 'forget' the edges that the caller and callee used to have - // before inlining, and add back what they currently have together. - int64_t NewCallerAndCalleeEdges = + int64_t IRSizeAfter = + getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize); + CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize); + if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize) + ForceStop = true; + + // We can delta-update module-wide features. We know the inlining only changed + // the caller, and maybe the callee (by deleting the latter). + // Nodes are simple to update. + // For edges, we 'forget' the edges that the caller and callee used to have + // before inlining, and add back what they currently have together. + int64_t NewCallerAndCalleeEdges = FAM.getResult<FunctionPropertiesAnalysis>(*Caller) - .DirectCallsToDefinedFunctions; - - if (CalleeWasDeleted) - --NodeCount; - else + .DirectCallsToDefinedFunctions; + + if (CalleeWasDeleted) + --NodeCount; + else NewCallerAndCalleeEdges += FAM.getResult<FunctionPropertiesAnalysis>(*Callee) .DirectCallsToDefinedFunctions; - EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges); - assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0); -} - -int64_t MLInlineAdvisor::getModuleIRSize() const { - int64_t Ret = 0; - for (auto &F : CG->getModule()) - if (!F.isDeclaration()) - Ret += getIRSize(F); - return Ret; -} - + EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges); + assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0); +} + +int64_t MLInlineAdvisor::getModuleIRSize() const { + int64_t Ret = 0; + for (auto &F : CG->getModule()) + if (!F.isDeclaration()) + Ret += getIRSize(F); + return Ret; +} + std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) { - auto &Caller = *CB.getCaller(); - auto &Callee = *CB.getCalledFunction(); - - auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { - return FAM.getResult<AssumptionAnalysis>(F); - }; - auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee); - auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller); - + auto &Caller = *CB.getCaller(); + auto &Callee = *CB.getCalledFunction(); + + auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { + return FAM.getResult<AssumptionAnalysis>(F); + }; + auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee); + auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller); + auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE); - // If this is a "never inline" case, there won't be any changes to internal - // state we need to track, so we can just return the base InlineAdvice, which - // will do nothing interesting. - // Same thing if this is a recursive case. + // If this is a "never inline" case, there won't be any changes to internal + // state we need to track, so we can just return the base InlineAdvice, which + // will do nothing interesting. + // Same thing if this is a recursive case. if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never || - &Caller == &Callee) + &Caller == &Callee) return getMandatoryAdvice(CB, false); - + bool Mandatory = MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always; - - // If we need to stop, we won't want to track anymore any state changes, so - // we just return the base InlineAdvice, which acts as a noop. - if (ForceStop) { - ORE.emit([&] { - return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB) - << "Won't attempt inlining because module size grew too much."; - }); - return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory); - } - - int CostEstimate = 0; - if (!Mandatory) { - auto IsCallSiteInlinable = - llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache); - if (!IsCallSiteInlinable) { - // We can't inline this for correctness reasons, so return the base - // InlineAdvice, as we don't care about tracking any state changes (which - // won't happen). - return std::make_unique<InlineAdvice>(this, CB, ORE, false); - } - CostEstimate = *IsCallSiteInlinable; - } - - if (Mandatory) + + // If we need to stop, we won't want to track anymore any state changes, so + // we just return the base InlineAdvice, which acts as a noop. + if (ForceStop) { + ORE.emit([&] { + return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB) + << "Won't attempt inlining because module size grew too much."; + }); + return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory); + } + + int CostEstimate = 0; + if (!Mandatory) { + auto IsCallSiteInlinable = + llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache); + if (!IsCallSiteInlinable) { + // We can't inline this for correctness reasons, so return the base + // InlineAdvice, as we don't care about tracking any state changes (which + // won't happen). + return std::make_unique<InlineAdvice>(this, CB, ORE, false); + } + CostEstimate = *IsCallSiteInlinable; + } + + if (Mandatory) return getMandatoryAdvice(CB, true); - - auto NrCtantParams = 0; - for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { - NrCtantParams += (isa<Constant>(*I)); - } - + + auto NrCtantParams = 0; + for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { + NrCtantParams += (isa<Constant>(*I)); + } + auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller); auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee); - - ModelRunner->setFeature(FeatureIndex::CalleeBasicBlockCount, - CalleeBefore.BasicBlockCount); - ModelRunner->setFeature(FeatureIndex::CallSiteHeight, - FunctionLevels[&Caller]); - ModelRunner->setFeature(FeatureIndex::NodeCount, NodeCount); - ModelRunner->setFeature(FeatureIndex::NrCtantParams, NrCtantParams); - ModelRunner->setFeature(FeatureIndex::CostEstimate, CostEstimate); - ModelRunner->setFeature(FeatureIndex::EdgeCount, EdgeCount); - ModelRunner->setFeature(FeatureIndex::CallerUsers, CallerBefore.Uses); - ModelRunner->setFeature(FeatureIndex::CallerConditionallyExecutedBlocks, - CallerBefore.BlocksReachedFromConditionalInstruction); - ModelRunner->setFeature(FeatureIndex::CallerBasicBlockCount, - CallerBefore.BasicBlockCount); - ModelRunner->setFeature(FeatureIndex::CalleeConditionallyExecutedBlocks, - CalleeBefore.BlocksReachedFromConditionalInstruction); - ModelRunner->setFeature(FeatureIndex::CalleeUsers, CalleeBefore.Uses); - return getAdviceFromModel(CB, ORE); -} - -std::unique_ptr<MLInlineAdvice> -MLInlineAdvisor::getAdviceFromModel(CallBase &CB, - OptimizationRemarkEmitter &ORE) { - return std::make_unique<MLInlineAdvice>(this, CB, ORE, ModelRunner->run()); -} - + + ModelRunner->setFeature(FeatureIndex::CalleeBasicBlockCount, + CalleeBefore.BasicBlockCount); + ModelRunner->setFeature(FeatureIndex::CallSiteHeight, + FunctionLevels[&Caller]); + ModelRunner->setFeature(FeatureIndex::NodeCount, NodeCount); + ModelRunner->setFeature(FeatureIndex::NrCtantParams, NrCtantParams); + ModelRunner->setFeature(FeatureIndex::CostEstimate, CostEstimate); + ModelRunner->setFeature(FeatureIndex::EdgeCount, EdgeCount); + ModelRunner->setFeature(FeatureIndex::CallerUsers, CallerBefore.Uses); + ModelRunner->setFeature(FeatureIndex::CallerConditionallyExecutedBlocks, + CallerBefore.BlocksReachedFromConditionalInstruction); + ModelRunner->setFeature(FeatureIndex::CallerBasicBlockCount, + CallerBefore.BasicBlockCount); + ModelRunner->setFeature(FeatureIndex::CalleeConditionallyExecutedBlocks, + CalleeBefore.BlocksReachedFromConditionalInstruction); + ModelRunner->setFeature(FeatureIndex::CalleeUsers, CalleeBefore.Uses); + return getAdviceFromModel(CB, ORE); +} + +std::unique_ptr<MLInlineAdvice> +MLInlineAdvisor::getAdviceFromModel(CallBase &CB, + OptimizationRemarkEmitter &ORE) { + return std::make_unique<MLInlineAdvice>(this, CB, ORE, ModelRunner->run()); +} + std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB, bool Advice) { // Make sure we track inlinings in all cases - mandatory or not. @@ -262,53 +262,53 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB, return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice); } -std::unique_ptr<MLInlineAdvice> +std::unique_ptr<MLInlineAdvice> MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) { return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true); -} - -void MLInlineAdvice::reportContextForRemark( - DiagnosticInfoOptimizationBase &OR) { - using namespace ore; - OR << NV("Callee", Callee->getName()); - for (size_t I = 0; I < NumberOfFeatures; ++I) - OR << NV(FeatureNameMap[I], getAdvisor()->getModelRunner().getFeature(I)); - OR << NV("ShouldInline", isInliningRecommended()); -} - -void MLInlineAdvice::recordInliningImpl() { - ORE.emit([&]() { - OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block); - reportContextForRemark(R); - return R; - }); - getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false); -} - -void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() { - ORE.emit([&]() { - OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc, - Block); - reportContextForRemark(R); - return R; - }); - getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true); -} - -void MLInlineAdvice::recordUnsuccessfulInliningImpl( - const InlineResult &Result) { - ORE.emit([&]() { - OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful", - DLoc, Block); - reportContextForRemark(R); - return R; - }); -} -void MLInlineAdvice::recordUnattemptedInliningImpl() { - ORE.emit([&]() { - OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block); - reportContextForRemark(R); - return R; - }); +} + +void MLInlineAdvice::reportContextForRemark( + DiagnosticInfoOptimizationBase &OR) { + using namespace ore; + OR << NV("Callee", Callee->getName()); + for (size_t I = 0; I < NumberOfFeatures; ++I) + OR << NV(FeatureNameMap[I], getAdvisor()->getModelRunner().getFeature(I)); + OR << NV("ShouldInline", isInliningRecommended()); +} + +void MLInlineAdvice::recordInliningImpl() { + ORE.emit([&]() { + OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block); + reportContextForRemark(R); + return R; + }); + getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false); +} + +void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() { + ORE.emit([&]() { + OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc, + Block); + reportContextForRemark(R); + return R; + }); + getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true); +} + +void MLInlineAdvice::recordUnsuccessfulInliningImpl( + const InlineResult &Result) { + ORE.emit([&]() { + OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful", + DLoc, Block); + reportContextForRemark(R); + return R; + }); +} +void MLInlineAdvice::recordUnattemptedInliningImpl() { + ORE.emit([&]() { + OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block); + reportContextForRemark(R); + return R; + }); } #endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) diff --git a/contrib/libs/llvm12/lib/Analysis/README.txt b/contrib/libs/llvm12/lib/Analysis/README.txt index 6d6b30e0b1..0e96e4c950 100644 --- a/contrib/libs/llvm12/lib/Analysis/README.txt +++ b/contrib/libs/llvm12/lib/Analysis/README.txt @@ -1,30 +1,30 @@ -Analysis Opportunities: - -//===---------------------------------------------------------------------===// - -In test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll, the -ScalarEvolution expression for %r is this: - - {1,+,3,+,2}<loop> - -Outside the loop, this could be evaluated simply as (%n * %n), however -ScalarEvolution currently evaluates it as - - (-2 + (2 * (trunc i65 (((zext i64 (-2 + %n) to i65) * (zext i64 (-1 + %n) to i65)) /u 2) to i64)) + (3 * %n)) - -In addition to being much more complicated, it involves i65 arithmetic, -which is very inefficient when expanded into code. - -//===---------------------------------------------------------------------===// - -In formatValue in test/CodeGen/X86/lsr-delayed-fold.ll, - -ScalarEvolution is forming this expression: - -((trunc i64 (-1 * %arg5) to i32) + (trunc i64 %arg5 to i32) + (-1 * (trunc i64 undef to i32))) - -This could be folded to - -(-1 * (trunc i64 undef to i32)) - -//===---------------------------------------------------------------------===// +Analysis Opportunities: + +//===---------------------------------------------------------------------===// + +In test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll, the +ScalarEvolution expression for %r is this: + + {1,+,3,+,2}<loop> + +Outside the loop, this could be evaluated simply as (%n * %n), however +ScalarEvolution currently evaluates it as + + (-2 + (2 * (trunc i65 (((zext i64 (-2 + %n) to i65) * (zext i64 (-1 + %n) to i65)) /u 2) to i64)) + (3 * %n)) + +In addition to being much more complicated, it involves i65 arithmetic, +which is very inefficient when expanded into code. + +//===---------------------------------------------------------------------===// + +In formatValue in test/CodeGen/X86/lsr-delayed-fold.ll, + +ScalarEvolution is forming this expression: + +((trunc i64 (-1 * %arg5) to i32) + (trunc i64 %arg5 to i32) + (-1 * (trunc i64 undef to i32))) + +This could be folded to + +(-1 * (trunc i64 undef to i32)) + +//===---------------------------------------------------------------------===// diff --git a/contrib/libs/llvm12/lib/Analysis/TFUtils.cpp b/contrib/libs/llvm12/lib/Analysis/TFUtils.cpp index 1351db7d30..3f26bdfdc0 100644 --- a/contrib/libs/llvm12/lib/Analysis/TFUtils.cpp +++ b/contrib/libs/llvm12/lib/Analysis/TFUtils.cpp @@ -1,69 +1,69 @@ -//===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements utilities for interfacing with tensorflow C APIs. -// -//===----------------------------------------------------------------------===// +//===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements utilities for interfacing with tensorflow C APIs. +// +//===----------------------------------------------------------------------===// #include "llvm/Config/config.h" #if defined(LLVM_HAVE_TF_API) - + #include "llvm/ADT/Twine.h" -#include "llvm/Analysis/Utils/TFUtils.h" -#include "llvm/Support/Debug.h" +#include "llvm/Analysis/Utils/TFUtils.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/JSON.h" -#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" -#include "llvm/Support/raw_ostream.h" - +#include "llvm/Support/raw_ostream.h" + #error #include "tensorflow/c/c_api.h" #error #include "tensorflow/c/c_api_experimental.h" - -#include <cassert> + +#include <cassert> #include <numeric> - -using namespace llvm; - -namespace { - -using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>; -using TFSessionOptionsPtr = - std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>; -using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>; - -struct TFInitializer { - TFInitializer() { - assert(!IsInitialized && "TFInitialized should be called only once"); - int Argc = 1; - const char *Name = ""; - const char **NamePtr = &Name; - TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr)); - IsInitialized = true; - } - bool IsInitialized = false; -}; - -llvm::ManagedStatic<TFInitializer> TFLibInitializer; - -bool ensureInitTF() { return TFLibInitializer->IsInitialized; } - -TFGraphPtr createTFGraph() { - return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph); -} - -TFStatusPtr createTFStatus() { - return TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus); -} - -TFSessionOptionsPtr createTFSessionOptions() { - return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions); -} + +using namespace llvm; + +namespace { + +using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>; +using TFSessionOptionsPtr = + std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>; +using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>; + +struct TFInitializer { + TFInitializer() { + assert(!IsInitialized && "TFInitialized should be called only once"); + int Argc = 1; + const char *Name = ""; + const char **NamePtr = &Name; + TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr)); + IsInitialized = true; + } + bool IsInitialized = false; +}; + +llvm::ManagedStatic<TFInitializer> TFLibInitializer; + +bool ensureInitTF() { return TFLibInitializer->IsInitialized; } + +TFGraphPtr createTFGraph() { + return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph); +} + +TFStatusPtr createTFStatus() { + return TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus); +} + +TFSessionOptionsPtr createTFSessionOptions() { + return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions); +} /// Write the values of one tensor as a list. template <typename T> @@ -147,29 +147,29 @@ void writeRawTensorsAsFeatureLists(raw_ostream &OutFile, OutFile << " }\n"; OutFile << " }\n"; } -} // namespace - -namespace llvm { -class EvaluationResultImpl { -public: - EvaluationResultImpl(size_t OutputSize) - : OutputSize(OutputSize), Output(OutputSize){}; - - ~EvaluationResultImpl() { - for (auto *P : Output) - if (P) - TF_DeleteTensor(P); - } - - EvaluationResultImpl(const EvaluationResultImpl &) = delete; - EvaluationResultImpl(EvaluationResultImpl &&Other) = delete; - std::vector<TF_Tensor *> &getOutput() { return Output; } - -private: - const size_t OutputSize; - std::vector<TF_Tensor *> Output; -}; - +} // namespace + +namespace llvm { +class EvaluationResultImpl { +public: + EvaluationResultImpl(size_t OutputSize) + : OutputSize(OutputSize), Output(OutputSize){}; + + ~EvaluationResultImpl() { + for (auto *P : Output) + if (P) + TF_DeleteTensor(P); + } + + EvaluationResultImpl(const EvaluationResultImpl &) = delete; + EvaluationResultImpl(EvaluationResultImpl &&Other) = delete; + std::vector<TF_Tensor *> &getOutput() { return Output; } + +private: + const size_t OutputSize; + std::vector<TF_Tensor *> Output; +}; + size_t TensorSpec::getElementByteSize() const { return TF_DataTypeSize(static_cast<TF_DataType>(TypeIndex)); } @@ -280,110 +280,110 @@ loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName, return Ret; } -class TFModelEvaluatorImpl { -public: - TFModelEvaluatorImpl(StringRef SavedModelPath, +class TFModelEvaluatorImpl { +public: + TFModelEvaluatorImpl(StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs, function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize, const char *Tags); - - bool isValid() const { return IsValid; } - size_t OutputSize() const { return OutputFeed.size(); } - - void evaluate(TF_Tensor **Output, TF_Status *Status) { - TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(), - Input.size(), OutputFeed.data(), Output, OutputFeed.size(), - nullptr, 0, nullptr, Status); - } - - void initInput(size_t Index, TF_DataType Type, - const std::vector<int64_t> &Dimensions); - const std::vector<TF_Tensor *> &getInput() const { return Input; } - - ~TFModelEvaluatorImpl(); - -private: - /// The objects necessary for carrying out an evaluation of the SavedModel. - /// They are expensive to set up, and we maintain them accross all the - /// evaluations of the model. - TF_Session *Session = nullptr; - TFGraphPtr Graph; - TFSessionOptionsPtr Options; - - /// The specification of the input nodes. - std::vector<TF_Output> InputFeed; - - /// The input tensors. They must match by index of the corresponding InputFeed - /// value. We set up the tensors once and just mutate theirs scalars before - /// each evaluation. The input tensors keep their value after an evaluation. - std::vector<TF_Tensor *> Input; - - /// The specification of the output nodes. When evaluating, the tensors in the - /// output tensor vector must match by index the corresponding element in the - /// OutputFeed. - std::vector<TF_Output> OutputFeed; - - void invalidate() { IsValid = false; } - - bool IsValid = true; - - /// Reusable utility for ensuring we can bind the requested Name to a node in - /// the SavedModel Graph. + + bool isValid() const { return IsValid; } + size_t OutputSize() const { return OutputFeed.size(); } + + void evaluate(TF_Tensor **Output, TF_Status *Status) { + TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(), + Input.size(), OutputFeed.data(), Output, OutputFeed.size(), + nullptr, 0, nullptr, Status); + } + + void initInput(size_t Index, TF_DataType Type, + const std::vector<int64_t> &Dimensions); + const std::vector<TF_Tensor *> &getInput() const { return Input; } + + ~TFModelEvaluatorImpl(); + +private: + /// The objects necessary for carrying out an evaluation of the SavedModel. + /// They are expensive to set up, and we maintain them accross all the + /// evaluations of the model. + TF_Session *Session = nullptr; + TFGraphPtr Graph; + TFSessionOptionsPtr Options; + + /// The specification of the input nodes. + std::vector<TF_Output> InputFeed; + + /// The input tensors. They must match by index of the corresponding InputFeed + /// value. We set up the tensors once and just mutate theirs scalars before + /// each evaluation. The input tensors keep their value after an evaluation. + std::vector<TF_Tensor *> Input; + + /// The specification of the output nodes. When evaluating, the tensors in the + /// output tensor vector must match by index the corresponding element in the + /// OutputFeed. + std::vector<TF_Output> OutputFeed; + + void invalidate() { IsValid = false; } + + bool IsValid = true; + + /// Reusable utility for ensuring we can bind the requested Name to a node in + /// the SavedModel Graph. bool checkReportAndInvalidate(const TF_Output &Output, const TensorSpec &OutputSpec); -}; -} // namespace llvm - -TFModelEvaluatorImpl::TFModelEvaluatorImpl( +}; +} // namespace llvm + +TFModelEvaluatorImpl::TFModelEvaluatorImpl( StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs, function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize, const char *Tags = "serve") - : Graph(createTFGraph()), Options(createTFSessionOptions()), + : Graph(createTFGraph()), Options(createTFSessionOptions()), InputFeed(InputSpecs.size()), Input(InputSpecs.size()), OutputFeed(OutputSpecsSize) { - if (!ensureInitTF()) { - errs() << "Tensorflow should have been initialized"; - return; - } - auto Status = createTFStatus(); - - Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr, - SavedModelPath.str().c_str(), &Tags, 1, - Graph.get(), nullptr, Status.get()); - if (TF_GetCode(Status.get()) != TF_Code::TF_OK) { - errs() << TF_Message(Status.get()); - invalidate(); - } + if (!ensureInitTF()) { + errs() << "Tensorflow should have been initialized"; + return; + } + auto Status = createTFStatus(); + + Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr, + SavedModelPath.str().c_str(), &Tags, 1, + Graph.get(), nullptr, Status.get()); + if (TF_GetCode(Status.get()) != TF_Code::TF_OK) { + errs() << TF_Message(Status.get()); + invalidate(); + } for (size_t I = 0; I < InputSpecs.size(); ++I) { auto &InputSpec = InputSpecs[I]; - InputFeed[I] = { + InputFeed[I] = { TF_GraphOperationByName(Graph.get(), (InputSpec.name()).c_str()), InputSpec.port()}; if (!checkReportAndInvalidate(InputFeed[I], InputSpec)) - return; + return; initInput(I, static_cast<TF_DataType>(InputSpec.typeIndex()), InputSpec.shape()); - } + } for (size_t I = 0; I < OutputSpecsSize; ++I) { auto OutputSpec = GetOutputSpecs(I); - OutputFeed[I] = { + OutputFeed[I] = { TF_GraphOperationByName(Graph.get(), (OutputSpec.name()).c_str()), OutputSpec.port()}; if (!checkReportAndInvalidate(OutputFeed[I], OutputSpec)) - return; - } -} - + return; + } +} + TFModelEvaluator::TFModelEvaluator( StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs, function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize, const char *Tags) : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputSpecs, GetOutputSpecs, OutputSpecsSize, Tags)) { - if (!Impl->isValid()) - Impl.reset(); -} - + if (!Impl->isValid()) + Impl.reset(); +} + TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs, const std::vector<TensorSpec> &OutputSpecs, @@ -392,90 +392,90 @@ TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath, SavedModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I]; }, OutputSpecs.size(), Tags) {} -TFModelEvaluatorImpl::~TFModelEvaluatorImpl() { - for (auto *T : Input) { - TF_DeleteTensor(T); - } - if (Session == nullptr) - return; - auto Status = createTFStatus(); - TF_DeleteSession(Session, Status.get()); - Session = nullptr; - if (TF_GetCode(Status.get()) != TF_Code::TF_OK) - errs() << "Could not delete TF session"; -} - +TFModelEvaluatorImpl::~TFModelEvaluatorImpl() { + for (auto *T : Input) { + TF_DeleteTensor(T); + } + if (Session == nullptr) + return; + auto Status = createTFStatus(); + TF_DeleteSession(Session, Status.get()); + Session = nullptr; + if (TF_GetCode(Status.get()) != TF_Code::TF_OK) + errs() << "Could not delete TF session"; +} + bool TFModelEvaluatorImpl::checkReportAndInvalidate( const TF_Output &Output, const TensorSpec &OutputSpec) { - if (Output.oper) - return true; + if (Output.oper) + return true; errs() << "Could not find TF_Output named: " + OutputSpec.name(); - IsValid = false; - return IsValid; -} - -Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() { - if (!isValid()) - return None; - std::unique_ptr<EvaluationResultImpl> Ret = - std::make_unique<EvaluationResultImpl>(Impl->OutputSize()); - auto Status = createTFStatus(); - Impl->evaluate(Ret->getOutput().data(), Status.get()); - if (TF_GetCode(Status.get()) != TF_Code::TF_OK) { - errs() << TF_Message(Status.get()); - Impl.reset(); - return None; - } - return EvaluationResult(std::move(Ret)); -} - -void TFModelEvaluatorImpl::initInput(size_t Index, TF_DataType Type, - const std::vector<int64_t> &Dimensions) { - int64_t TotalSize = TF_DataTypeSize(Type); - for (auto &D : Dimensions) - TotalSize *= D; - - Input[Index] = - TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize); - std::memset(TF_TensorData(Input[Index]), 0, TotalSize); -} - -void *TFModelEvaluator::getUntypedInput(size_t Index) { - return TF_TensorData(Impl->getInput()[Index]); -} - -TFModelEvaluator::EvaluationResult::EvaluationResult( - std::unique_ptr<EvaluationResultImpl> Impl) - : Impl(std::move(Impl)) {} - -TFModelEvaluator::EvaluationResult::EvaluationResult(EvaluationResult &&Other) - : Impl(std::move(Other.Impl)) {} - + IsValid = false; + return IsValid; +} + +Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() { + if (!isValid()) + return None; + std::unique_ptr<EvaluationResultImpl> Ret = + std::make_unique<EvaluationResultImpl>(Impl->OutputSize()); + auto Status = createTFStatus(); + Impl->evaluate(Ret->getOutput().data(), Status.get()); + if (TF_GetCode(Status.get()) != TF_Code::TF_OK) { + errs() << TF_Message(Status.get()); + Impl.reset(); + return None; + } + return EvaluationResult(std::move(Ret)); +} + +void TFModelEvaluatorImpl::initInput(size_t Index, TF_DataType Type, + const std::vector<int64_t> &Dimensions) { + int64_t TotalSize = TF_DataTypeSize(Type); + for (auto &D : Dimensions) + TotalSize *= D; + + Input[Index] = + TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize); + std::memset(TF_TensorData(Input[Index]), 0, TotalSize); +} + +void *TFModelEvaluator::getUntypedInput(size_t Index) { + return TF_TensorData(Impl->getInput()[Index]); +} + +TFModelEvaluator::EvaluationResult::EvaluationResult( + std::unique_ptr<EvaluationResultImpl> Impl) + : Impl(std::move(Impl)) {} + +TFModelEvaluator::EvaluationResult::EvaluationResult(EvaluationResult &&Other) + : Impl(std::move(Other.Impl)) {} + TFModelEvaluator::EvaluationResult & TFModelEvaluator::EvaluationResult::operator=(EvaluationResult &&Other) { Impl = std::move(Other.Impl); return *this; } -void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) { - return TF_TensorData(Impl->getOutput()[Index]); -} - +void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) { + return TF_TensorData(Impl->getOutput()[Index]); +} + const void * TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) const { return TF_TensorData(Impl->getOutput()[Index]); -} - +} + #define TFUTILS_GETDATATYPE_IMPL(T, E) \ template <> int TensorSpec::getDataType<T>() { return E; } - + TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL) - + #undef TFUTILS_GETDATATYPE_IMPL - + TFModelEvaluator::EvaluationResult::~EvaluationResult() {} TFModelEvaluator::~TFModelEvaluator() {} - + void Logger::print(raw_ostream &OS) { if (RawLogData.empty()) return; @@ -489,17 +489,17 @@ void Logger::print(raw_ostream &OS) { size_t RewardSize = RewardSpec.getElementCount() * RewardSpec.getElementByteSize(); size_t NumberOfRewards = RawLogData.back().size() / RewardSize; - + OS << "feature_lists: {\n"; for (size_t I = 0; I < FeatureSpecs.size(); ++I) writeRawTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(), NumberOfRecords); - + if (IncludeReward) writeRawTensorsAsFeatureLists(OS, {RewardSpec, None}, RawLogData.back().data(), NumberOfRecords, NumberOfRewards == 1); - + OS << "}\n"; -} +} #endif // defined(LLVM_HAVE_TF_API) diff --git a/contrib/libs/llvm12/lib/Analysis/ya.make b/contrib/libs/llvm12/lib/Analysis/ya.make index a14158e86e..6e09c89fe4 100644 --- a/contrib/libs/llvm12/lib/Analysis/ya.make +++ b/contrib/libs/llvm12/lib/Analysis/ya.make @@ -2,18 +2,18 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE( + Apache-2.0 WITH LLVM-exception AND + NCSA +) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE( - Apache-2.0 WITH LLVM-exception AND - NCSA -) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -24,9 +24,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/Analysis -) +ADDINCL( + contrib/libs/llvm12/lib/Analysis +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/AsmParser/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/AsmParser/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/AsmParser/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/AsmParser/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/AsmParser/ya.make b/contrib/libs/llvm12/lib/AsmParser/ya.make index f8db95fce1..a84b5563eb 100644 --- a/contrib/libs/llvm12/lib/AsmParser/ya.make +++ b/contrib/libs/llvm12/lib/AsmParser/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -19,9 +19,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/AsmParser -) +ADDINCL( + contrib/libs/llvm12/lib/AsmParser +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/BinaryFormat/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/BinaryFormat/.yandex_meta/licenses.list.txt index b0b34714ca..ad3879fc45 100644 --- a/contrib/libs/llvm12/lib/BinaryFormat/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/BinaryFormat/.yandex_meta/licenses.list.txt @@ -1,303 +1,303 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================File: LICENSE.TXT==================== -============================================================================== -The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: -============================================================================== - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ----- LLVM Exceptions to the Apache 2.0 License ---- - -As an exception, if, as a result of your compiling your source code, portions -of this Software are embedded into an Object form of such source code, you -may redistribute such embedded portions in such Object form without complying -with the conditions of Sections 4(a), 4(b) and 4(d) of the License. - -In addition, if you combine or link compiled forms of this Software with -software that is licensed under the GPLv2 ("Combined Software") and if a -court of competent jurisdiction determines that the patent provision (Section -3), the indemnity provision (Section 9) or other Section of the License -conflicts with the conditions of the GPLv2, you may retroactively and -prospectively choose to deem waived or otherwise exclude such Section(s) of -the License, but only in their entirety and only with respect to the Combined -Software. - -============================================================================== -Software from third parties included in the LLVM Project: -============================================================================== -The LLVM Project contains third party software which is under different license -terms. All such code will be identified clearly using at least one of two -mechanisms: -1) It will be in a separate directory tree with its own `LICENSE.txt` or - `LICENSE` file at the top containing the specific license and restrictions - which apply to that software, or -2) It will contain specific license and restriction terms at the top of every - file. - -============================================================================== -Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): -============================================================================== -University of Illinois/NCSA -Open Source License - -Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - - - -====================File: include/llvm/Support/LICENSE.TXT==================== -LLVM System Interface Library -------------------------------------------------------------------------------- -The LLVM System Interface Library is licensed under the Illinois Open Source -License and has the following additional copyright: - -Copyright (C) 2004 eXtensible Systems, Inc. - - -====================NCSA==================== -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================File: LICENSE.TXT==================== +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +============================================================================== +Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + + + +====================File: include/llvm/Support/LICENSE.TXT==================== +LLVM System Interface Library +------------------------------------------------------------------------------- +The LLVM System Interface Library is licensed under the Illinois Open Source +License and has the following additional copyright: + +Copyright (C) 2004 eXtensible Systems, Inc. + + +====================NCSA==================== +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. diff --git a/contrib/libs/llvm12/lib/BinaryFormat/ya.make b/contrib/libs/llvm12/lib/BinaryFormat/ya.make index 29882be12d..c1d64fa5a6 100644 --- a/contrib/libs/llvm12/lib/BinaryFormat/ya.make +++ b/contrib/libs/llvm12/lib/BinaryFormat/ya.make @@ -2,26 +2,26 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE( - Apache-2.0 WITH LLVM-exception AND - NCSA -) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE( + Apache-2.0 WITH LLVM-exception AND + NCSA +) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/BinaryFormat -) +ADDINCL( + contrib/libs/llvm12/lib/BinaryFormat +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Bitcode/Reader/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Bitcode/Reader/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Bitcode/Reader/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Bitcode/Reader/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Bitcode/Reader/ya.make b/contrib/libs/llvm12/lib/Bitcode/Reader/ya.make index cc929740c5..c648d2fc66 100644 --- a/contrib/libs/llvm12/lib/Bitcode/Reader/ya.make +++ b/contrib/libs/llvm12/lib/Bitcode/Reader/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -19,9 +19,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/Bitcode/Reader -) +ADDINCL( + contrib/libs/llvm12/lib/Bitcode/Reader +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Bitcode/Writer/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Bitcode/Writer/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Bitcode/Writer/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Bitcode/Writer/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Bitcode/Writer/ya.make b/contrib/libs/llvm12/lib/Bitcode/Writer/ya.make index 0074cada72..624b4f8e7e 100644 --- a/contrib/libs/llvm12/lib/Bitcode/Writer/ya.make +++ b/contrib/libs/llvm12/lib/Bitcode/Writer/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -21,9 +21,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/Bitcode/Writer -) +ADDINCL( + contrib/libs/llvm12/lib/Bitcode/Writer +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Bitstream/Reader/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Bitstream/Reader/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Bitstream/Reader/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Bitstream/Reader/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Bitstream/Reader/ya.make b/contrib/libs/llvm12/lib/Bitstream/Reader/ya.make index e7f5730799..09acec1b47 100644 --- a/contrib/libs/llvm12/lib/Bitstream/Reader/ya.make +++ b/contrib/libs/llvm12/lib/Bitstream/Reader/ya.make @@ -2,23 +2,23 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/Bitstream/Reader -) +ADDINCL( + contrib/libs/llvm12/lib/Bitstream/Reader +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/CodeGen/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/CodeGen/.yandex_meta/licenses.list.txt index 2bfdc7cfd7..a55f77852a 100644 --- a/contrib/libs/llvm12/lib/CodeGen/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/CodeGen/.yandex_meta/licenses.list.txt @@ -1,324 +1,324 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================Apache-2.0 WITH LLVM-exception==================== -/// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -/// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================COPYRIGHT==================== - // Fix (C). - if (Restore && (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) { - if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) { - - -====================COPYRIGHT==================== - IntegerType *WordType = DL.getIntPtrType(C); - PointerType *InitPtrType = InitValue ? - PointerType::getUnqual(InitValue->getType()) : VoidPtrType; - - -====================File: LICENSE.TXT==================== -============================================================================== -The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: -============================================================================== - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ----- LLVM Exceptions to the Apache 2.0 License ---- - -As an exception, if, as a result of your compiling your source code, portions -of this Software are embedded into an Object form of such source code, you -may redistribute such embedded portions in such Object form without complying -with the conditions of Sections 4(a), 4(b) and 4(d) of the License. - -In addition, if you combine or link compiled forms of this Software with -software that is licensed under the GPLv2 ("Combined Software") and if a -court of competent jurisdiction determines that the patent provision (Section -3), the indemnity provision (Section 9) or other Section of the License -conflicts with the conditions of the GPLv2, you may retroactively and -prospectively choose to deem waived or otherwise exclude such Section(s) of -the License, but only in their entirety and only with respect to the Combined -Software. - -============================================================================== -Software from third parties included in the LLVM Project: -============================================================================== -The LLVM Project contains third party software which is under different license -terms. All such code will be identified clearly using at least one of two -mechanisms: -1) It will be in a separate directory tree with its own `LICENSE.txt` or - `LICENSE` file at the top containing the specific license and restrictions - which apply to that software, or -2) It will contain specific license and restriction terms at the top of every - file. - -============================================================================== -Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): -============================================================================== -University of Illinois/NCSA -Open Source License - -Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - - - -====================File: include/llvm/Support/LICENSE.TXT==================== -LLVM System Interface Library -------------------------------------------------------------------------------- -The LLVM System Interface Library is licensed under the Illinois Open Source -License and has the following additional copyright: - -Copyright (C) 2004 eXtensible Systems, Inc. - - -====================NCSA==================== -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================Apache-2.0 WITH LLVM-exception==================== +/// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +/// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================COPYRIGHT==================== + // Fix (C). + if (Restore && (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) { + if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) { + + +====================COPYRIGHT==================== + IntegerType *WordType = DL.getIntPtrType(C); + PointerType *InitPtrType = InitValue ? + PointerType::getUnqual(InitValue->getType()) : VoidPtrType; + + +====================File: LICENSE.TXT==================== +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +============================================================================== +Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + + + +====================File: include/llvm/Support/LICENSE.TXT==================== +LLVM System Interface Library +------------------------------------------------------------------------------- +The LLVM System Interface Library is licensed under the Illinois Open Source +License and has the following additional copyright: + +Copyright (C) 2004 eXtensible Systems, Inc. + + +====================NCSA==================== +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. diff --git a/contrib/libs/llvm12/lib/CodeGen/AsmPrinter/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/CodeGen/AsmPrinter/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/CodeGen/AsmPrinter/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/CodeGen/AsmPrinter/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/CodeGen/AsmPrinter/ya.make b/contrib/libs/llvm12/lib/CodeGen/AsmPrinter/ya.make index 6798ca503e..47d6139b3d 100644 --- a/contrib/libs/llvm12/lib/CodeGen/AsmPrinter/ya.make +++ b/contrib/libs/llvm12/lib/CodeGen/AsmPrinter/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -34,9 +34,9 @@ IF (SANITIZER_TYPE == "undefined") ) ENDIF() -ADDINCL( - contrib/libs/llvm12/lib/CodeGen/AsmPrinter -) +ADDINCL( + contrib/libs/llvm12/lib/CodeGen/AsmPrinter +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make index 92fea6604a..e6de0fe8d9 100644 --- a/contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make +++ b/contrib/libs/llvm12/lib/CodeGen/GlobalISel/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -24,9 +24,9 @@ PEERDIR( contrib/libs/llvm12/lib/Transforms/Utils ) -ADDINCL( - contrib/libs/llvm12/lib/CodeGen/GlobalISel -) +ADDINCL( + contrib/libs/llvm12/lib/CodeGen/GlobalISel +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/CodeGen/MIRParser/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/CodeGen/MIRParser/.yandex_meta/licenses.list.txt index 04e522abdd..de1a495b4f 100644 --- a/contrib/libs/llvm12/lib/CodeGen/MIRParser/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/CodeGen/MIRParser/.yandex_meta/licenses.list.txt @@ -1,12 +1,12 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================COPYRIGHT==================== - return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || - C == '$'; +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================COPYRIGHT==================== + return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || + C == '$'; diff --git a/contrib/libs/llvm12/lib/CodeGen/MIRParser/ya.make b/contrib/libs/llvm12/lib/CodeGen/MIRParser/ya.make index 4b52305f88..aa8e6d8693 100644 --- a/contrib/libs/llvm12/lib/CodeGen/MIRParser/ya.make +++ b/contrib/libs/llvm12/lib/CodeGen/MIRParser/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -23,9 +23,9 @@ PEERDIR( contrib/libs/llvm12/lib/Target ) -ADDINCL( - contrib/libs/llvm12/lib/CodeGen/MIRParser -) +ADDINCL( + contrib/libs/llvm12/lib/CodeGen/MIRParser +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/CodeGen/README.txt b/contrib/libs/llvm12/lib/CodeGen/README.txt index 87deb97526..d8958715c6 100644 --- a/contrib/libs/llvm12/lib/CodeGen/README.txt +++ b/contrib/libs/llvm12/lib/CodeGen/README.txt @@ -1,199 +1,199 @@ -//===---------------------------------------------------------------------===// - -Common register allocation / spilling problem: - - mul lr, r4, lr - str lr, [sp, #+52] - ldr lr, [r1, #+32] - sxth r3, r3 - ldr r4, [sp, #+52] - mla r4, r3, lr, r4 - -can be: - - mul lr, r4, lr - mov r4, lr - str lr, [sp, #+52] - ldr lr, [r1, #+32] - sxth r3, r3 - mla r4, r3, lr, r4 - -and then "merge" mul and mov: - - mul r4, r4, lr - str r4, [sp, #+52] - ldr lr, [r1, #+32] - sxth r3, r3 - mla r4, r3, lr, r4 - -It also increase the likelihood the store may become dead. - -//===---------------------------------------------------------------------===// - -bb27 ... - ... - %reg1037 = ADDri %reg1039, 1 - %reg1038 = ADDrs %reg1032, %reg1039, %noreg, 10 - Successors according to CFG: 0x8b03bf0 (#5) - -bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5): - Predecessors according to CFG: 0x8b0c5f0 (#3) 0x8b0a7c0 (#4) - %reg1039 = PHI %reg1070, mbb<bb76.outer,0x8b0c5f0>, %reg1037, mbb<bb27,0x8b0a7c0> - -Note ADDri is not a two-address instruction. However, its result %reg1037 is an -operand of the PHI node in bb76 and its operand %reg1039 is the result of the -PHI node. We should treat it as a two-address code and make sure the ADDri is -scheduled after any node that reads %reg1039. - -//===---------------------------------------------------------------------===// - -Use local info (i.e. register scavenger) to assign it a free register to allow -reuse: - ldr r3, [sp, #+4] - add r3, r3, #3 - ldr r2, [sp, #+8] - add r2, r2, #2 - ldr r1, [sp, #+4] <== - add r1, r1, #1 - ldr r0, [sp, #+4] - add r0, r0, #2 - -//===---------------------------------------------------------------------===// - -LLVM aggressively lift CSE out of loop. Sometimes this can be negative side- -effects: - -R1 = X + 4 -R2 = X + 7 -R3 = X + 15 - -loop: -load [i + R1] -... -load [i + R2] -... -load [i + R3] - -Suppose there is high register pressure, R1, R2, R3, can be spilled. We need -to implement proper re-materialization to handle this: - -R1 = X + 4 -R2 = X + 7 -R3 = X + 15 - -loop: -R1 = X + 4 @ re-materialized -load [i + R1] -... -R2 = X + 7 @ re-materialized -load [i + R2] -... -R3 = X + 15 @ re-materialized -load [i + R3] - -Furthermore, with re-association, we can enable sharing: - -R1 = X + 4 -R2 = X + 7 -R3 = X + 15 - -loop: -T = i + X -load [T + 4] -... -load [T + 7] -... -load [T + 15] -//===---------------------------------------------------------------------===// - -It's not always a good idea to choose rematerialization over spilling. If all -the load / store instructions would be folded then spilling is cheaper because -it won't require new live intervals / registers. See 2003-05-31-LongShifts for -an example. - -//===---------------------------------------------------------------------===// - -With a copying garbage collector, derived pointers must not be retained across -collector safe points; the collector could move the objects and invalidate the -derived pointer. This is bad enough in the first place, but safe points can -crop up unpredictably. Consider: - - %array = load { i32, [0 x %obj] }** %array_addr - %nth_el = getelementptr { i32, [0 x %obj] }* %array, i32 0, i32 %n - %old = load %obj** %nth_el - %z = div i64 %x, %y - store %obj* %new, %obj** %nth_el - -If the i64 division is lowered to a libcall, then a safe point will (must) -appear for the call site. If a collection occurs, %array and %nth_el no longer -point into the correct object. - -The fix for this is to copy address calculations so that dependent pointers -are never live across safe point boundaries. But the loads cannot be copied -like this if there was an intervening store, so may be hard to get right. - -Only a concurrent mutator can trigger a collection at the libcall safe point. -So single-threaded programs do not have this requirement, even with a copying -collector. Still, LLVM optimizations would probably undo a front-end's careful -work. - -//===---------------------------------------------------------------------===// - -The ocaml frametable structure supports liveness information. It would be good -to support it. - -//===---------------------------------------------------------------------===// - -The FIXME in ComputeCommonTailLength in BranchFolding.cpp needs to be -revisited. The check is there to work around a misuse of directives in inline -assembly. - -//===---------------------------------------------------------------------===// - -It would be good to detect collector/target compatibility instead of silently -doing the wrong thing. - -//===---------------------------------------------------------------------===// - -It would be really nice to be able to write patterns in .td files for copies, -which would eliminate a bunch of explicit predicates on them (e.g. no side -effects). Once this is in place, it would be even better to have tblgen -synthesize the various copy insertion/inspection methods in TargetInstrInfo. - -//===---------------------------------------------------------------------===// - -Stack coloring improvements: - -1. Do proper LiveStacks analysis on all stack objects including those which are - not spill slots. -2. Reorder objects to fill in gaps between objects. - e.g. 4, 1, <gap>, 4, 1, 1, 1, <gap>, 4 => 4, 1, 1, 1, 1, 4, 4 - -//===---------------------------------------------------------------------===// - -The scheduler should be able to sort nearby instructions by their address. For -example, in an expanded memset sequence it's not uncommon to see code like this: - - movl $0, 4(%rdi) - movl $0, 8(%rdi) - movl $0, 12(%rdi) - movl $0, 0(%rdi) - -Each of the stores is independent, and the scheduler is currently making an -arbitrary decision about the order. - -//===---------------------------------------------------------------------===// - -Another opportunitiy in this code is that the $0 could be moved to a register: - - movl $0, 4(%rdi) - movl $0, 8(%rdi) - movl $0, 12(%rdi) - movl $0, 0(%rdi) - -This would save substantial code size, especially for longer sequences like -this. It would be easy to have a rule telling isel to avoid matching MOV32mi -if the immediate has more than some fixed number of uses. It's more involved -to teach the register allocator how to do late folding to recover from -excessive register pressure. - +//===---------------------------------------------------------------------===// + +Common register allocation / spilling problem: + + mul lr, r4, lr + str lr, [sp, #+52] + ldr lr, [r1, #+32] + sxth r3, r3 + ldr r4, [sp, #+52] + mla r4, r3, lr, r4 + +can be: + + mul lr, r4, lr + mov r4, lr + str lr, [sp, #+52] + ldr lr, [r1, #+32] + sxth r3, r3 + mla r4, r3, lr, r4 + +and then "merge" mul and mov: + + mul r4, r4, lr + str r4, [sp, #+52] + ldr lr, [r1, #+32] + sxth r3, r3 + mla r4, r3, lr, r4 + +It also increase the likelihood the store may become dead. + +//===---------------------------------------------------------------------===// + +bb27 ... + ... + %reg1037 = ADDri %reg1039, 1 + %reg1038 = ADDrs %reg1032, %reg1039, %noreg, 10 + Successors according to CFG: 0x8b03bf0 (#5) + +bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5): + Predecessors according to CFG: 0x8b0c5f0 (#3) 0x8b0a7c0 (#4) + %reg1039 = PHI %reg1070, mbb<bb76.outer,0x8b0c5f0>, %reg1037, mbb<bb27,0x8b0a7c0> + +Note ADDri is not a two-address instruction. However, its result %reg1037 is an +operand of the PHI node in bb76 and its operand %reg1039 is the result of the +PHI node. We should treat it as a two-address code and make sure the ADDri is +scheduled after any node that reads %reg1039. + +//===---------------------------------------------------------------------===// + +Use local info (i.e. register scavenger) to assign it a free register to allow +reuse: + ldr r3, [sp, #+4] + add r3, r3, #3 + ldr r2, [sp, #+8] + add r2, r2, #2 + ldr r1, [sp, #+4] <== + add r1, r1, #1 + ldr r0, [sp, #+4] + add r0, r0, #2 + +//===---------------------------------------------------------------------===// + +LLVM aggressively lift CSE out of loop. Sometimes this can be negative side- +effects: + +R1 = X + 4 +R2 = X + 7 +R3 = X + 15 + +loop: +load [i + R1] +... +load [i + R2] +... +load [i + R3] + +Suppose there is high register pressure, R1, R2, R3, can be spilled. We need +to implement proper re-materialization to handle this: + +R1 = X + 4 +R2 = X + 7 +R3 = X + 15 + +loop: +R1 = X + 4 @ re-materialized +load [i + R1] +... +R2 = X + 7 @ re-materialized +load [i + R2] +... +R3 = X + 15 @ re-materialized +load [i + R3] + +Furthermore, with re-association, we can enable sharing: + +R1 = X + 4 +R2 = X + 7 +R3 = X + 15 + +loop: +T = i + X +load [T + 4] +... +load [T + 7] +... +load [T + 15] +//===---------------------------------------------------------------------===// + +It's not always a good idea to choose rematerialization over spilling. If all +the load / store instructions would be folded then spilling is cheaper because +it won't require new live intervals / registers. See 2003-05-31-LongShifts for +an example. + +//===---------------------------------------------------------------------===// + +With a copying garbage collector, derived pointers must not be retained across +collector safe points; the collector could move the objects and invalidate the +derived pointer. This is bad enough in the first place, but safe points can +crop up unpredictably. Consider: + + %array = load { i32, [0 x %obj] }** %array_addr + %nth_el = getelementptr { i32, [0 x %obj] }* %array, i32 0, i32 %n + %old = load %obj** %nth_el + %z = div i64 %x, %y + store %obj* %new, %obj** %nth_el + +If the i64 division is lowered to a libcall, then a safe point will (must) +appear for the call site. If a collection occurs, %array and %nth_el no longer +point into the correct object. + +The fix for this is to copy address calculations so that dependent pointers +are never live across safe point boundaries. But the loads cannot be copied +like this if there was an intervening store, so may be hard to get right. + +Only a concurrent mutator can trigger a collection at the libcall safe point. +So single-threaded programs do not have this requirement, even with a copying +collector. Still, LLVM optimizations would probably undo a front-end's careful +work. + +//===---------------------------------------------------------------------===// + +The ocaml frametable structure supports liveness information. It would be good +to support it. + +//===---------------------------------------------------------------------===// + +The FIXME in ComputeCommonTailLength in BranchFolding.cpp needs to be +revisited. The check is there to work around a misuse of directives in inline +assembly. + +//===---------------------------------------------------------------------===// + +It would be good to detect collector/target compatibility instead of silently +doing the wrong thing. + +//===---------------------------------------------------------------------===// + +It would be really nice to be able to write patterns in .td files for copies, +which would eliminate a bunch of explicit predicates on them (e.g. no side +effects). Once this is in place, it would be even better to have tblgen +synthesize the various copy insertion/inspection methods in TargetInstrInfo. + +//===---------------------------------------------------------------------===// + +Stack coloring improvements: + +1. Do proper LiveStacks analysis on all stack objects including those which are + not spill slots. +2. Reorder objects to fill in gaps between objects. + e.g. 4, 1, <gap>, 4, 1, 1, 1, <gap>, 4 => 4, 1, 1, 1, 1, 4, 4 + +//===---------------------------------------------------------------------===// + +The scheduler should be able to sort nearby instructions by their address. For +example, in an expanded memset sequence it's not uncommon to see code like this: + + movl $0, 4(%rdi) + movl $0, 8(%rdi) + movl $0, 12(%rdi) + movl $0, 0(%rdi) + +Each of the stores is independent, and the scheduler is currently making an +arbitrary decision about the order. + +//===---------------------------------------------------------------------===// + +Another opportunitiy in this code is that the $0 could be moved to a register: + + movl $0, 4(%rdi) + movl $0, 8(%rdi) + movl $0, 12(%rdi) + movl $0, 0(%rdi) + +This would save substantial code size, especially for longer sequences like +this. It would be easy to have a rule telling isel to avoid matching MOV32mi +if the immediate has more than some fixed number of uses. It's more involved +to teach the register allocator how to do late folding to recover from +excessive register pressure. + diff --git a/contrib/libs/llvm12/lib/CodeGen/SelectionDAG/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/CodeGen/SelectionDAG/.yandex_meta/licenses.list.txt index 1f5e5fea58..c106a10d3f 100644 --- a/contrib/libs/llvm12/lib/CodeGen/SelectionDAG/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/CodeGen/SelectionDAG/.yandex_meta/licenses.list.txt @@ -1,12 +1,12 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================COPYRIGHT==================== - Type *Ty = MemOps[0].getTypeForEVT(C); - Align NewAlign = DL.getABITypeAlign(Ty); +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================COPYRIGHT==================== + Type *Ty = MemOps[0].getTypeForEVT(C); + Align NewAlign = DL.getABITypeAlign(Ty); diff --git a/contrib/libs/llvm12/lib/CodeGen/SelectionDAG/ya.make b/contrib/libs/llvm12/lib/CodeGen/SelectionDAG/ya.make index b08e1e9283..9fa4f90d2f 100644 --- a/contrib/libs/llvm12/lib/CodeGen/SelectionDAG/ya.make +++ b/contrib/libs/llvm12/lib/CodeGen/SelectionDAG/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -23,9 +23,9 @@ PEERDIR( contrib/libs/llvm12/lib/Transforms/Utils ) -ADDINCL( - contrib/libs/llvm12/lib/CodeGen/SelectionDAG -) +ADDINCL( + contrib/libs/llvm12/lib/CodeGen/SelectionDAG +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/CodeGen/ya.make b/contrib/libs/llvm12/lib/CodeGen/ya.make index 736dcdaf0b..ed4f607e74 100644 --- a/contrib/libs/llvm12/lib/CodeGen/ya.make +++ b/contrib/libs/llvm12/lib/CodeGen/ya.make @@ -2,18 +2,18 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE( + Apache-2.0 WITH LLVM-exception AND + NCSA +) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE( - Apache-2.0 WITH LLVM-exception AND - NCSA -) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -29,9 +29,9 @@ PEERDIR( contrib/libs/llvm12/lib/Transforms/Utils ) -ADDINCL( - contrib/libs/llvm12/lib/CodeGen -) +ADDINCL( + contrib/libs/llvm12/lib/CodeGen +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/DebugInfo/CodeView/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/DebugInfo/CodeView/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/DebugInfo/CodeView/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/DebugInfo/CodeView/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/DebugInfo/CodeView/ya.make b/contrib/libs/llvm12/lib/DebugInfo/CodeView/ya.make index 7b71304599..8829a8f09f 100644 --- a/contrib/libs/llvm12/lib/DebugInfo/CodeView/ya.make +++ b/contrib/libs/llvm12/lib/DebugInfo/CodeView/ya.make @@ -2,24 +2,24 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/DebugInfo/MSF contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/DebugInfo/CodeView -) +ADDINCL( + contrib/libs/llvm12/lib/DebugInfo/CodeView +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/DebugInfo/DWARF/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/DebugInfo/DWARF/.yandex_meta/licenses.list.txt index b9304115fb..722dfab603 100644 --- a/contrib/libs/llvm12/lib/DebugInfo/DWARF/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/DebugInfo/DWARF/.yandex_meta/licenses.list.txt @@ -1,53 +1,53 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================COPYRIGHT==================== - uint64_t BlockLength = Data.getULEB128(C); - StringRef Expression = Data.getBytes(C, BlockLength); - DataExtractor Extractor(Expression, Data.isLittleEndian(), - - -====================COPYRIGHT==================== - : DebugLineData(Data), Context(C) { - LineToUnit = buildLineToUnitMap(Units); - if (!DebugLineData.isValidOffset(Offset)) - - -====================COPYRIGHT==================== - Value0 = Data.getULEB128(C); - Value1 = Data.getULEB128(C); - break; - case dwarf::DW_RLE_startx_length: { - Value0 = Data.getULEB128(C); - Value1 = Data.getULEB128(C); - break; - - -====================COPYRIGHT==================== - Version = AS.getU16(C); - AS.skip(C, 2); // padding - CompUnitCount = AS.getU32(C); - LocalTypeUnitCount = AS.getU32(C); - ForeignTypeUnitCount = AS.getU32(C); - BucketCount = AS.getU32(C); - NameCount = AS.getU32(C); - AbbrevTableSize = AS.getU32(C); - AugmentationStringSize = alignTo(AS.getU32(C), 4); - - -====================COPYRIGHT==================== - if (C) - Desc = StrOffsetsContributionDescriptor(C->Offset, C->Length, 4, - Header.getFormat()); - - -====================COPYRIGHT==================== - if (C) - Offset = C->Offset; - if (getVersion() >= 5) { +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================COPYRIGHT==================== + uint64_t BlockLength = Data.getULEB128(C); + StringRef Expression = Data.getBytes(C, BlockLength); + DataExtractor Extractor(Expression, Data.isLittleEndian(), + + +====================COPYRIGHT==================== + : DebugLineData(Data), Context(C) { + LineToUnit = buildLineToUnitMap(Units); + if (!DebugLineData.isValidOffset(Offset)) + + +====================COPYRIGHT==================== + Value0 = Data.getULEB128(C); + Value1 = Data.getULEB128(C); + break; + case dwarf::DW_RLE_startx_length: { + Value0 = Data.getULEB128(C); + Value1 = Data.getULEB128(C); + break; + + +====================COPYRIGHT==================== + Version = AS.getU16(C); + AS.skip(C, 2); // padding + CompUnitCount = AS.getU32(C); + LocalTypeUnitCount = AS.getU32(C); + ForeignTypeUnitCount = AS.getU32(C); + BucketCount = AS.getU32(C); + NameCount = AS.getU32(C); + AbbrevTableSize = AS.getU32(C); + AugmentationStringSize = alignTo(AS.getU32(C), 4); + + +====================COPYRIGHT==================== + if (C) + Desc = StrOffsetsContributionDescriptor(C->Offset, C->Length, 4, + Header.getFormat()); + + +====================COPYRIGHT==================== + if (C) + Offset = C->Offset; + if (getVersion() >= 5) { diff --git a/contrib/libs/llvm12/lib/DebugInfo/DWARF/ya.make b/contrib/libs/llvm12/lib/DebugInfo/DWARF/ya.make index 546bb7b340..5cb9397828 100644 --- a/contrib/libs/llvm12/lib/DebugInfo/DWARF/ya.make +++ b/contrib/libs/llvm12/lib/DebugInfo/DWARF/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/BinaryFormat @@ -19,9 +19,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/DebugInfo/DWARF -) +ADDINCL( + contrib/libs/llvm12/lib/DebugInfo/DWARF +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/DebugInfo/MSF/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/DebugInfo/MSF/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/DebugInfo/MSF/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/DebugInfo/MSF/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/DebugInfo/MSF/ya.make b/contrib/libs/llvm12/lib/DebugInfo/MSF/ya.make index 0281d8e95b..a9daa7d1aa 100644 --- a/contrib/libs/llvm12/lib/DebugInfo/MSF/ya.make +++ b/contrib/libs/llvm12/lib/DebugInfo/MSF/ya.make @@ -2,23 +2,23 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/DebugInfo/MSF -) +ADDINCL( + contrib/libs/llvm12/lib/DebugInfo/MSF +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/DebugInfo/PDB/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/DebugInfo/PDB/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/DebugInfo/PDB/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/DebugInfo/PDB/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/DebugInfo/PDB/ya.make b/contrib/libs/llvm12/lib/DebugInfo/PDB/ya.make index 2e433954f6..daa2f8ed3a 100644 --- a/contrib/libs/llvm12/lib/DebugInfo/PDB/ya.make +++ b/contrib/libs/llvm12/lib/DebugInfo/PDB/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/BinaryFormat @@ -20,9 +20,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/DebugInfo/PDB -) +ADDINCL( + contrib/libs/llvm12/lib/DebugInfo/PDB +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/DebugInfo/Symbolize/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/DebugInfo/Symbolize/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/DebugInfo/Symbolize/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/DebugInfo/Symbolize/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/DebugInfo/Symbolize/ya.make b/contrib/libs/llvm12/lib/DebugInfo/Symbolize/ya.make index de375af94c..b627533f82 100644 --- a/contrib/libs/llvm12/lib/DebugInfo/Symbolize/ya.make +++ b/contrib/libs/llvm12/lib/DebugInfo/Symbolize/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/DebugInfo/DWARF @@ -20,9 +20,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/DebugInfo/Symbolize -) +ADDINCL( + contrib/libs/llvm12/lib/DebugInfo/Symbolize +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Demangle/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Demangle/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Demangle/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Demangle/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Demangle/ya.make b/contrib/libs/llvm12/lib/Demangle/ya.make index dda2e10d05..16c0cf64f7 100644 --- a/contrib/libs/llvm12/lib/Demangle/ya.make +++ b/contrib/libs/llvm12/lib/Demangle/ya.make @@ -2,22 +2,22 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 ) -ADDINCL( - contrib/libs/llvm12/lib/Demangle -) +ADDINCL( + contrib/libs/llvm12/lib/Demangle +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/ExecutionEngine/MCJIT/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/ExecutionEngine/MCJIT/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/ExecutionEngine/MCJIT/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/ExecutionEngine/MCJIT/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/ExecutionEngine/MCJIT/ya.make b/contrib/libs/llvm12/lib/ExecutionEngine/MCJIT/ya.make index 05572da2aa..71377b654c 100644 --- a/contrib/libs/llvm12/lib/ExecutionEngine/MCJIT/ya.make +++ b/contrib/libs/llvm12/lib/ExecutionEngine/MCJIT/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -22,9 +22,9 @@ PEERDIR( contrib/libs/llvm12/lib/Target ) -ADDINCL( - contrib/libs/llvm12/lib/ExecutionEngine/MCJIT -) +ADDINCL( + contrib/libs/llvm12/lib/ExecutionEngine/MCJIT +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/ExecutionEngine/PerfJITEvents/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/ExecutionEngine/PerfJITEvents/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/ExecutionEngine/PerfJITEvents/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/ExecutionEngine/PerfJITEvents/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/ExecutionEngine/PerfJITEvents/ya.make b/contrib/libs/llvm12/lib/ExecutionEngine/PerfJITEvents/ya.make index 5510aeaee8..dc91e6b801 100644 --- a/contrib/libs/llvm12/lib/ExecutionEngine/PerfJITEvents/ya.make +++ b/contrib/libs/llvm12/lib/ExecutionEngine/PerfJITEvents/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/CodeGen @@ -22,9 +22,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/ExecutionEngine/PerfJITEvents -) +ADDINCL( + contrib/libs/llvm12/lib/ExecutionEngine/PerfJITEvents +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/ExecutionEngine/RuntimeDyld/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/ExecutionEngine/RuntimeDyld/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/ExecutionEngine/RuntimeDyld/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/ExecutionEngine/RuntimeDyld/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/ExecutionEngine/RuntimeDyld/ya.make b/contrib/libs/llvm12/lib/ExecutionEngine/RuntimeDyld/ya.make index dfbbbd4609..44d7dea118 100644 --- a/contrib/libs/llvm12/lib/ExecutionEngine/RuntimeDyld/ya.make +++ b/contrib/libs/llvm12/lib/ExecutionEngine/RuntimeDyld/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -20,9 +20,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/ExecutionEngine/RuntimeDyld -) +ADDINCL( + contrib/libs/llvm12/lib/ExecutionEngine/RuntimeDyld +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/ExecutionEngine/ya.make b/contrib/libs/llvm12/lib/ExecutionEngine/ya.make index 13c74b4a17..38849572cb 100644 --- a/contrib/libs/llvm12/lib/ExecutionEngine/ya.make +++ b/contrib/libs/llvm12/lib/ExecutionEngine/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -22,9 +22,9 @@ PEERDIR( contrib/libs/llvm12/lib/Target ) -ADDINCL( - contrib/libs/llvm12/lib/ExecutionEngine -) +ADDINCL( + contrib/libs/llvm12/lib/ExecutionEngine +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Frontend/OpenMP/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Frontend/OpenMP/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Frontend/OpenMP/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Frontend/OpenMP/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Frontend/OpenMP/ya.make b/contrib/libs/llvm12/lib/Frontend/OpenMP/ya.make index 45aec34c4d..3a64ea8b69 100644 --- a/contrib/libs/llvm12/lib/Frontend/OpenMP/ya.make +++ b/contrib/libs/llvm12/lib/Frontend/OpenMP/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -19,9 +19,9 @@ PEERDIR( contrib/libs/llvm12/lib/Transforms/Utils ) -ADDINCL( - contrib/libs/llvm12/lib/Frontend/OpenMP -) +ADDINCL( + contrib/libs/llvm12/lib/Frontend/OpenMP +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/IR/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/IR/.yandex_meta/licenses.list.txt index 7a45473cdd..dde681e762 100644 --- a/contrib/libs/llvm12/lib/IR/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/IR/.yandex_meta/licenses.list.txt @@ -1,19 +1,19 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================COPYRIGHT==================== - Type *I32Ty = Type::getInt32Ty(C); - Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), - ConstantInt::get(I32Ty, 0)); - - -====================COPYRIGHT==================== - LLVMContext &Context = *unwrap(C); - SmallVector<Metadata *, 8> MDs; - for (auto *OV : makeArrayRef(Vals, Count)) { +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================COPYRIGHT==================== + Type *I32Ty = Type::getInt32Ty(C); + Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), + ConstantInt::get(I32Ty, 0)); + + +====================COPYRIGHT==================== + LLVMContext &Context = *unwrap(C); + SmallVector<Metadata *, 8> MDs; + for (auto *OV : makeArrayRef(Vals, Count)) { diff --git a/contrib/libs/llvm12/lib/IR/ya.make b/contrib/libs/llvm12/lib/IR/ya.make index 12717aade7..5c27c5b009 100644 --- a/contrib/libs/llvm12/lib/IR/ya.make +++ b/contrib/libs/llvm12/lib/IR/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -19,9 +19,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/IR -) +ADDINCL( + contrib/libs/llvm12/lib/IR +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/IRReader/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/IRReader/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/IRReader/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/IRReader/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/IRReader/ya.make b/contrib/libs/llvm12/lib/IRReader/ya.make index 10d45a14f0..6ccecf0b7c 100644 --- a/contrib/libs/llvm12/lib/IRReader/ya.make +++ b/contrib/libs/llvm12/lib/IRReader/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -20,9 +20,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/IRReader -) +ADDINCL( + contrib/libs/llvm12/lib/IRReader +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Linker/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Linker/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Linker/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Linker/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Linker/ya.make b/contrib/libs/llvm12/lib/Linker/ya.make index 62043c593c..7dc901efba 100644 --- a/contrib/libs/llvm12/lib/Linker/ya.make +++ b/contrib/libs/llvm12/lib/Linker/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -20,9 +20,9 @@ PEERDIR( contrib/libs/llvm12/lib/Transforms/Utils ) -ADDINCL( - contrib/libs/llvm12/lib/Linker -) +ADDINCL( + contrib/libs/llvm12/lib/Linker +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/MC/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/MC/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/MC/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/MC/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/MC/MCDisassembler/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/MC/MCDisassembler/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/MC/MCDisassembler/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/MC/MCDisassembler/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/MC/MCDisassembler/ya.make b/contrib/libs/llvm12/lib/MC/MCDisassembler/ya.make index 882f3f7476..160728d237 100644 --- a/contrib/libs/llvm12/lib/MC/MCDisassembler/ya.make +++ b/contrib/libs/llvm12/lib/MC/MCDisassembler/ya.make @@ -2,24 +2,24 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/MC contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/MC/MCDisassembler -) +ADDINCL( + contrib/libs/llvm12/lib/MC/MCDisassembler +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/MC/MCParser/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/MC/MCParser/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/MC/MCParser/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/MC/MCParser/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/MC/MCParser/ya.make b/contrib/libs/llvm12/lib/MC/MCParser/ya.make index 29b587dd45..304ad7d65a 100644 --- a/contrib/libs/llvm12/lib/MC/MCParser/ya.make +++ b/contrib/libs/llvm12/lib/MC/MCParser/ya.make @@ -2,24 +2,24 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/MC contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/MC/MCParser -) +ADDINCL( + contrib/libs/llvm12/lib/MC/MCParser +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/MC/ya.make b/contrib/libs/llvm12/lib/MC/ya.make index 92f60a8aa3..3d136d7426 100644 --- a/contrib/libs/llvm12/lib/MC/ya.make +++ b/contrib/libs/llvm12/lib/MC/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/BinaryFormat @@ -18,9 +18,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/MC -) +ADDINCL( + contrib/libs/llvm12/lib/MC +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Object/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Object/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Object/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Object/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Object/ya.make b/contrib/libs/llvm12/lib/Object/ya.make index 21dc018f1f..3d746aaa36 100644 --- a/contrib/libs/llvm12/lib/Object/ya.make +++ b/contrib/libs/llvm12/lib/Object/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -23,9 +23,9 @@ PEERDIR( contrib/libs/llvm12/lib/TextAPI/MachO ) -ADDINCL( - contrib/libs/llvm12/lib/Object -) +ADDINCL( + contrib/libs/llvm12/lib/Object +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/ProfileData/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/ProfileData/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/ProfileData/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/ProfileData/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/ProfileData/Coverage/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/ProfileData/Coverage/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/ProfileData/Coverage/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/ProfileData/Coverage/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/ProfileData/Coverage/ya.make b/contrib/libs/llvm12/lib/ProfileData/Coverage/ya.make index 4a83ac3fdd..7485f519d1 100644 --- a/contrib/libs/llvm12/lib/ProfileData/Coverage/ya.make +++ b/contrib/libs/llvm12/lib/ProfileData/Coverage/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/IR @@ -19,9 +19,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/ProfileData/Coverage -) +ADDINCL( + contrib/libs/llvm12/lib/ProfileData/Coverage +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/ProfileData/ya.make b/contrib/libs/llvm12/lib/ProfileData/ya.make index 07c714dcd9..1c3b40612f 100644 --- a/contrib/libs/llvm12/lib/ProfileData/ya.make +++ b/contrib/libs/llvm12/lib/ProfileData/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -19,9 +19,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/ProfileData -) +ADDINCL( + contrib/libs/llvm12/lib/ProfileData +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Remarks/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Remarks/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Remarks/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Remarks/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Remarks/ya.make b/contrib/libs/llvm12/lib/Remarks/ya.make index 5696d1ff2c..4308033577 100644 --- a/contrib/libs/llvm12/lib/Remarks/ya.make +++ b/contrib/libs/llvm12/lib/Remarks/ya.make @@ -2,24 +2,24 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/Bitstream/Reader contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/Remarks -) +ADDINCL( + contrib/libs/llvm12/lib/Remarks +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Support/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Support/.yandex_meta/licenses.list.txt index 140ccaa15c..c854c46cf1 100644 --- a/contrib/libs/llvm12/lib/Support/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Support/.yandex_meta/licenses.list.txt @@ -1,499 +1,499 @@ -====================Apache-2.0 WITH LLVM-exception==================== - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================BSD-2-Clause==================== -* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - -====================BSD-2-Clause==================== -* Redistribution and use in source and binary forms, with or without -* modification, are permitted provided that the following conditions are -* met: -* -* * Redistributions of source code must retain the above copyright -* notice, this list of conditions and the following disclaimer. -* * Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following disclaimer -* in the documentation and/or other materials provided with the -* distribution. -* -* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -====================BSD-3-Clause==================== - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - - -====================COPYRIGHT==================== - : Code(C) { - ErrMsg = "Stream Error: "; - switch (C) { - case stream_error_code::unspecified: - ErrMsg += "An unspecified error has occurred."; - - -====================COPYRIGHT==================== - Copyright (C) 2012-2016, Yann Collet. - - -====================COPYRIGHT==================== - * Copyright (c) 1992 Henry Spencer. - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - - -====================COPYRIGHT==================== - * Copyright (c) 1992, 1993, 1994 Henry Spencer. - * Copyright (c) 1992, 1993, 1994 - * The Regents of the University of California. All rights reserved. - - -====================COPYRIGHT==================== - * Copyright (c) 1994 - * The Regents of the University of California. All rights reserved. - - -====================COPYRIGHT==================== - * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> - - -====================COPYRIGHT==================== - * Copyright 2001-2004 Unicode, Inc. - - -====================COPYRIGHT==================== - * This software was written by Alexander Peslyak in 2001. No copyright is - * claimed, and the software is hereby placed in the public domain. - * In case this attempt to disclaim copyright and place the software in the - * public domain is deemed null and void, then the software is - * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the - * general public under the following terms: - - -====================COPYRIGHT==================== -Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved. - - -====================File: LICENSE.TXT==================== -============================================================================== -The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: -============================================================================== - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ----- LLVM Exceptions to the Apache 2.0 License ---- - -As an exception, if, as a result of your compiling your source code, portions -of this Software are embedded into an Object form of such source code, you -may redistribute such embedded portions in such Object form without complying -with the conditions of Sections 4(a), 4(b) and 4(d) of the License. - -In addition, if you combine or link compiled forms of this Software with -software that is licensed under the GPLv2 ("Combined Software") and if a -court of competent jurisdiction determines that the patent provision (Section -3), the indemnity provision (Section 9) or other Section of the License -conflicts with the conditions of the GPLv2, you may retroactively and -prospectively choose to deem waived or otherwise exclude such Section(s) of -the License, but only in their entirety and only with respect to the Combined -Software. - -============================================================================== -Software from third parties included in the LLVM Project: -============================================================================== -The LLVM Project contains third party software which is under different license -terms. All such code will be identified clearly using at least one of two -mechanisms: -1) It will be in a separate directory tree with its own `LICENSE.txt` or - `LICENSE` file at the top containing the specific license and restrictions - which apply to that software, or -2) It will contain specific license and restriction terms at the top of every - file. - -============================================================================== -Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): -============================================================================== -University of Illinois/NCSA -Open Source License - -Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - - - -====================File: include/llvm/Support/LICENSE.TXT==================== -LLVM System Interface Library -------------------------------------------------------------------------------- -The LLVM System Interface Library is licensed under the Illinois Open Source -License and has the following additional copyright: - -Copyright (C) 2004 eXtensible Systems, Inc. - - -====================ISC==================== - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - -====================NCSA==================== -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. - - -====================Public-Domain==================== - * Homepage: - * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 - * - * Author: - * Alexander Peslyak, better known as Solar Designer <solar at openwall.com> - * - * This software was written by Alexander Peslyak in 2001. No copyright is - * claimed, and the software is hereby placed in the public domain. - * In case this attempt to disclaim copyright and place the software in the - * public domain is deemed null and void, then the software is - * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the - * general public under the following terms: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted. - * - * There's ABSOLUTELY NO WARRANTY, express or implied. - * - * (This is a heavily cut-down "BSD license".) - - -====================Public-Domain==================== -// This code is taken from public domain - - -====================Spencer-94==================== -This software is not subject to any license of the American Telephone -and Telegraph Company or of the Regents of the University of California. - -Permission is granted to anyone to use this software for any purpose on -any computer system, and to alter it and redistribute it, subject -to the following restrictions: - -1. The author is not responsible for the consequences of use of this - software, no matter how awful, even if they arise from flaws in it. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. Since few users ever read sources, - credits must appear in the documentation. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. Since few users - ever read sources, credits must appear in the documentation. - -4. This notice may not be removed or altered. - - -====================Unicode-Mappings==================== - * Disclaimer - * - * This source code is provided as is by Unicode, Inc. No claims are - * made as to fitness for any particular purpose. No warranties of any - * kind are expressed or implied. The recipient agrees to determine - * applicability of information provided. If this file has been - * purchased on magnetic or optical media from Unicode, Inc., the - * sole remedy for any claim will be exchange of defective media - * within 90 days of receipt. - * - * Limitations on Rights to Redistribute This Code - * - * Unicode, Inc. hereby grants the right to freely use the information - * supplied in this file in the creation of products supporting the - * Unicode Standard, and to make copies of this file in any form - * for internal or external distribution as long as this notice - * remains attached. +====================Apache-2.0 WITH LLVM-exception==================== + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================BSD-2-Clause==================== +* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + +====================BSD-2-Clause==================== +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following disclaimer +* in the documentation and/or other materials provided with the +* distribution. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +====================BSD-3-Clause==================== + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + + +====================COPYRIGHT==================== + : Code(C) { + ErrMsg = "Stream Error: "; + switch (C) { + case stream_error_code::unspecified: + ErrMsg += "An unspecified error has occurred."; + + +====================COPYRIGHT==================== + Copyright (C) 2012-2016, Yann Collet. + + +====================COPYRIGHT==================== + * Copyright (c) 1992 Henry Spencer. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + + +====================COPYRIGHT==================== + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + + +====================COPYRIGHT==================== + * Copyright (c) 1994 + * The Regents of the University of California. All rights reserved. + + +====================COPYRIGHT==================== + * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com> + + +====================COPYRIGHT==================== + * Copyright 2001-2004 Unicode, Inc. + + +====================COPYRIGHT==================== + * This software was written by Alexander Peslyak in 2001. No copyright is + * claimed, and the software is hereby placed in the public domain. + * In case this attempt to disclaim copyright and place the software in the + * public domain is deemed null and void, then the software is + * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the + * general public under the following terms: + + +====================COPYRIGHT==================== +Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved. + + +====================File: LICENSE.TXT==================== +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +============================================================================== +Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + + + +====================File: include/llvm/Support/LICENSE.TXT==================== +LLVM System Interface Library +------------------------------------------------------------------------------- +The LLVM System Interface Library is licensed under the Illinois Open Source +License and has the following additional copyright: + +Copyright (C) 2004 eXtensible Systems, Inc. + + +====================ISC==================== + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + +====================NCSA==================== +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + + +====================Public-Domain==================== + * Homepage: + * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 + * + * Author: + * Alexander Peslyak, better known as Solar Designer <solar at openwall.com> + * + * This software was written by Alexander Peslyak in 2001. No copyright is + * claimed, and the software is hereby placed in the public domain. + * In case this attempt to disclaim copyright and place the software in the + * public domain is deemed null and void, then the software is + * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the + * general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * There's ABSOLUTELY NO WARRANTY, express or implied. + * + * (This is a heavily cut-down "BSD license".) + + +====================Public-Domain==================== +// This code is taken from public domain + + +====================Spencer-94==================== +This software is not subject to any license of the American Telephone +and Telegraph Company or of the Regents of the University of California. + +Permission is granted to anyone to use this software for any purpose on +any computer system, and to alter it and redistribute it, subject +to the following restrictions: + +1. The author is not responsible for the consequences of use of this + software, no matter how awful, even if they arise from flaws in it. + +2. The origin of this software must not be misrepresented, either by + explicit claim or by omission. Since few users ever read sources, + credits must appear in the documentation. + +3. Altered versions must be plainly marked as such, and must not be + misrepresented as being the original software. Since few users + ever read sources, credits must appear in the documentation. + +4. This notice may not be removed or altered. + + +====================Unicode-Mappings==================== + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. diff --git a/contrib/libs/llvm12/lib/Support/COPYRIGHT.regex b/contrib/libs/llvm12/lib/Support/COPYRIGHT.regex index 38fe93e345..a6392fd37c 100644 --- a/contrib/libs/llvm12/lib/Support/COPYRIGHT.regex +++ b/contrib/libs/llvm12/lib/Support/COPYRIGHT.regex @@ -1,54 +1,54 @@ -$OpenBSD: COPYRIGHT,v 1.3 2003/06/02 20:18:36 millert Exp $ - -Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved. -This software is not subject to any license of the American Telephone -and Telegraph Company or of the Regents of the University of California. - -Permission is granted to anyone to use this software for any purpose on -any computer system, and to alter it and redistribute it, subject -to the following restrictions: - -1. The author is not responsible for the consequences of use of this - software, no matter how awful, even if they arise from flaws in it. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. Since few users ever read sources, - credits must appear in the documentation. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. Since few users - ever read sources, credits must appear in the documentation. - -4. This notice may not be removed or altered. - -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= -/*- - * Copyright (c) 1994 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)COPYRIGHT 8.1 (Berkeley) 3/16/94 - */ +$OpenBSD: COPYRIGHT,v 1.3 2003/06/02 20:18:36 millert Exp $ + +Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved. +This software is not subject to any license of the American Telephone +and Telegraph Company or of the Regents of the University of California. + +Permission is granted to anyone to use this software for any purpose on +any computer system, and to alter it and redistribute it, subject +to the following restrictions: + +1. The author is not responsible for the consequences of use of this + software, no matter how awful, even if they arise from flaws in it. + +2. The origin of this software must not be misrepresented, either by + explicit claim or by omission. Since few users ever read sources, + credits must appear in the documentation. + +3. Altered versions must be plainly marked as such, and must not be + misrepresented as being the original software. Since few users + ever read sources, credits must appear in the documentation. + +4. This notice may not be removed or altered. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +/*- + * Copyright (c) 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)COPYRIGHT 8.1 (Berkeley) 3/16/94 + */ diff --git a/contrib/libs/llvm12/lib/Support/Unix/README.txt b/contrib/libs/llvm12/lib/Support/Unix/README.txt index a30043116e..3d547c2990 100644 --- a/contrib/libs/llvm12/lib/Support/Unix/README.txt +++ b/contrib/libs/llvm12/lib/Support/Unix/README.txt @@ -1,16 +1,16 @@ -llvm/lib/Support/Unix README -=========================== - -This directory provides implementations of the lib/System classes that -are common to two or more variants of UNIX. For example, the directory -structure underneath this directory could look like this: - -Unix - only code that is truly generic to all UNIX platforms - Posix - code that is specific to Posix variants of UNIX - SUS - code that is specific to the Single Unix Specification - SysV - code that is specific to System V variants of UNIX - -As a rule, only those directories actually needing to be created should be -created. Also, further subdirectories could be created to reflect versions of -the various standards. For example, under SUS there could be v1, v2, and v3 -subdirectories to reflect the three major versions of SUS. +llvm/lib/Support/Unix README +=========================== + +This directory provides implementations of the lib/System classes that +are common to two or more variants of UNIX. For example, the directory +structure underneath this directory could look like this: + +Unix - only code that is truly generic to all UNIX platforms + Posix - code that is specific to Posix variants of UNIX + SUS - code that is specific to the Single Unix Specification + SysV - code that is specific to System V variants of UNIX + +As a rule, only those directories actually needing to be created should be +created. Also, further subdirectories could be created to reflect versions of +the various standards. For example, under SUS there could be v1, v2, and v3 +subdirectories to reflect the three major versions of SUS. diff --git a/contrib/libs/llvm12/lib/Support/VirtualFileSystem.cpp b/contrib/libs/llvm12/lib/Support/VirtualFileSystem.cpp index c05089f22a..c0ccadfce0 100644 --- a/contrib/libs/llvm12/lib/Support/VirtualFileSystem.cpp +++ b/contrib/libs/llvm12/lib/Support/VirtualFileSystem.cpp @@ -520,111 +520,111 @@ directory_iterator OverlayFileSystem::dir_begin(const Twine &Dir, void ProxyFileSystem::anchor() {} -//===-----------------------------------------------------------------------===/ -// CaseInsensitiveFileSystem implementation -//===-----------------------------------------------------------------------===/ - -bool CaseInsensitiveFileSystem::exclude(StringRef Dir, StringRef File) { - if (!Maps.count(Dir)) { - // We have no map for this Dir, but see if we can exclude the file by - // excluding Dir from its parent. - StringRef Parent = llvm::sys::path::parent_path(Dir); - if (!Parent.empty() && - exclude(Parent, llvm::sys::path::filename(Dir))) { - return true; - } - - return false; - } - - return !Maps[Dir].count(File.lower()); -} - -std::error_code CaseInsensitiveFileSystem::findCaseInsensitivePath( - StringRef Path, SmallVectorImpl<char> &FoundPath) { - StringRef FileName = llvm::sys::path::filename(Path); - StringRef Dir = llvm::sys::path::parent_path(Path); - - if (Dir.empty()) - Dir = "."; - - if (exclude(Dir, FileName)) - return llvm::errc::no_such_file_or_directory; - - if (Maps.count(Dir)) { - // If we have a map for this Dir and File wasn't excluded above, it must - // exist. - llvm::sys::path::append(FoundPath, Dir, Maps[Dir][FileName.lower()]); - return std::error_code(); - } - - std::error_code EC; - directory_iterator I = Base->dir_begin(Dir, EC); - if (EC == errc::no_such_file_or_directory) { - // If the dir doesn't exist, try to find it and try again. - SmallVector<char, 512> NewDir; - if (llvm::sys::path::parent_path(Dir).empty() || - (EC = findCaseInsensitivePath(Dir, NewDir))) { - // Insert a dummy map value to mark the dir as non-existent. - Maps.lookup(Dir); - return EC; - } - llvm::sys::path::append(NewDir, FileName); - return findCaseInsensitivePath(StringRef(NewDir.data(), NewDir.size()), - FoundPath); - } - - // These special entries always exist, but won't show up in the listing below. - Maps[Dir]["."] = "."; - Maps[Dir][".."] = ".."; - - directory_iterator E; - for (; I != E; I.increment(EC)) { - StringRef DirEntry = llvm::sys::path::filename(I->path()); - Maps[Dir][DirEntry.lower()] = DirEntry.str(); - } - if (EC) { - // If there were problems, scrap the whole map as it may not be complete. - Maps.erase(Dir); - return EC; - } - - auto MI = Maps[Dir].find(FileName.lower()); - if (MI != Maps[Dir].end()) { - llvm::sys::path::append(FoundPath, Dir, MI->second); - return std::error_code(); - } - - return llvm::errc::no_such_file_or_directory; -} - -llvm::ErrorOr<Status> CaseInsensitiveFileSystem::status(const Twine &Path) { - SmallVector<char, 512> NewPath; - if (std::error_code EC = findCaseInsensitivePath(Path.str(), NewPath)) - return EC; - - return Base->status(NewPath); -} - -llvm::ErrorOr<std::unique_ptr<File>> -CaseInsensitiveFileSystem::openFileForRead(const Twine &Path) { - SmallVector<char, 512> NewPath; - if (std::error_code EC = findCaseInsensitivePath(Path.str(), NewPath)) - return EC; - - return Base->openFileForRead(NewPath); -} - -directory_iterator CaseInsensitiveFileSystem::dir_begin(const Twine &Path, - std::error_code &EC) { - SmallVector<char, 512> NewPath; - if ((EC = findCaseInsensitivePath(Path.str(), NewPath))) - return directory_iterator(); - - return Base->dir_begin(NewPath, EC); -} - - +//===-----------------------------------------------------------------------===/ +// CaseInsensitiveFileSystem implementation +//===-----------------------------------------------------------------------===/ + +bool CaseInsensitiveFileSystem::exclude(StringRef Dir, StringRef File) { + if (!Maps.count(Dir)) { + // We have no map for this Dir, but see if we can exclude the file by + // excluding Dir from its parent. + StringRef Parent = llvm::sys::path::parent_path(Dir); + if (!Parent.empty() && + exclude(Parent, llvm::sys::path::filename(Dir))) { + return true; + } + + return false; + } + + return !Maps[Dir].count(File.lower()); +} + +std::error_code CaseInsensitiveFileSystem::findCaseInsensitivePath( + StringRef Path, SmallVectorImpl<char> &FoundPath) { + StringRef FileName = llvm::sys::path::filename(Path); + StringRef Dir = llvm::sys::path::parent_path(Path); + + if (Dir.empty()) + Dir = "."; + + if (exclude(Dir, FileName)) + return llvm::errc::no_such_file_or_directory; + + if (Maps.count(Dir)) { + // If we have a map for this Dir and File wasn't excluded above, it must + // exist. + llvm::sys::path::append(FoundPath, Dir, Maps[Dir][FileName.lower()]); + return std::error_code(); + } + + std::error_code EC; + directory_iterator I = Base->dir_begin(Dir, EC); + if (EC == errc::no_such_file_or_directory) { + // If the dir doesn't exist, try to find it and try again. + SmallVector<char, 512> NewDir; + if (llvm::sys::path::parent_path(Dir).empty() || + (EC = findCaseInsensitivePath(Dir, NewDir))) { + // Insert a dummy map value to mark the dir as non-existent. + Maps.lookup(Dir); + return EC; + } + llvm::sys::path::append(NewDir, FileName); + return findCaseInsensitivePath(StringRef(NewDir.data(), NewDir.size()), + FoundPath); + } + + // These special entries always exist, but won't show up in the listing below. + Maps[Dir]["."] = "."; + Maps[Dir][".."] = ".."; + + directory_iterator E; + for (; I != E; I.increment(EC)) { + StringRef DirEntry = llvm::sys::path::filename(I->path()); + Maps[Dir][DirEntry.lower()] = DirEntry.str(); + } + if (EC) { + // If there were problems, scrap the whole map as it may not be complete. + Maps.erase(Dir); + return EC; + } + + auto MI = Maps[Dir].find(FileName.lower()); + if (MI != Maps[Dir].end()) { + llvm::sys::path::append(FoundPath, Dir, MI->second); + return std::error_code(); + } + + return llvm::errc::no_such_file_or_directory; +} + +llvm::ErrorOr<Status> CaseInsensitiveFileSystem::status(const Twine &Path) { + SmallVector<char, 512> NewPath; + if (std::error_code EC = findCaseInsensitivePath(Path.str(), NewPath)) + return EC; + + return Base->status(NewPath); +} + +llvm::ErrorOr<std::unique_ptr<File>> +CaseInsensitiveFileSystem::openFileForRead(const Twine &Path) { + SmallVector<char, 512> NewPath; + if (std::error_code EC = findCaseInsensitivePath(Path.str(), NewPath)) + return EC; + + return Base->openFileForRead(NewPath); +} + +directory_iterator CaseInsensitiveFileSystem::dir_begin(const Twine &Path, + std::error_code &EC) { + SmallVector<char, 512> NewPath; + if ((EC = findCaseInsensitivePath(Path.str(), NewPath))) + return directory_iterator(); + + return Base->dir_begin(NewPath, EC); +} + + namespace llvm { namespace vfs { diff --git a/contrib/libs/llvm12/lib/Support/ya.make b/contrib/libs/llvm12/lib/Support/ya.make index 7815d67130..b3f49d0f2a 100644 --- a/contrib/libs/llvm12/lib/Support/ya.make +++ b/contrib/libs/llvm12/lib/Support/ya.make @@ -2,33 +2,33 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE( + Apache-2.0 WITH LLVM-exception AND + BSD-2-Clause AND + BSD-3-Clause AND + ISC AND + NCSA AND + Public-Domain AND + Spencer-94 AND + Unicode-Mappings +) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE( - Apache-2.0 WITH LLVM-exception AND - BSD-2-Clause AND - BSD-3-Clause AND - ISC AND - NCSA AND - Public-Domain AND - Spencer-94 AND - Unicode-Mappings -) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/Demangle contrib/libs/zlib ) -ADDINCL( - contrib/libs/llvm12/lib/Support -) +ADDINCL( + contrib/libs/llvm12/lib/Support +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/TableGen/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/TableGen/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/TableGen/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/TableGen/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/TableGen/ya.make b/contrib/libs/llvm12/lib/TableGen/ya.make index c657d0b4e5..a5ef0f22ef 100644 --- a/contrib/libs/llvm12/lib/TableGen/ya.make +++ b/contrib/libs/llvm12/lib/TableGen/ya.make @@ -2,23 +2,23 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/TableGen -) +ADDINCL( + contrib/libs/llvm12/lib/TableGen +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Target/AArch64/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/AArch64/.yandex_meta/licenses.list.txt index b0b34714ca..ad3879fc45 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/AArch64/.yandex_meta/licenses.list.txt @@ -1,303 +1,303 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================File: LICENSE.TXT==================== -============================================================================== -The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: -============================================================================== - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ----- LLVM Exceptions to the Apache 2.0 License ---- - -As an exception, if, as a result of your compiling your source code, portions -of this Software are embedded into an Object form of such source code, you -may redistribute such embedded portions in such Object form without complying -with the conditions of Sections 4(a), 4(b) and 4(d) of the License. - -In addition, if you combine or link compiled forms of this Software with -software that is licensed under the GPLv2 ("Combined Software") and if a -court of competent jurisdiction determines that the patent provision (Section -3), the indemnity provision (Section 9) or other Section of the License -conflicts with the conditions of the GPLv2, you may retroactively and -prospectively choose to deem waived or otherwise exclude such Section(s) of -the License, but only in their entirety and only with respect to the Combined -Software. - -============================================================================== -Software from third parties included in the LLVM Project: -============================================================================== -The LLVM Project contains third party software which is under different license -terms. All such code will be identified clearly using at least one of two -mechanisms: -1) It will be in a separate directory tree with its own `LICENSE.txt` or - `LICENSE` file at the top containing the specific license and restrictions - which apply to that software, or -2) It will contain specific license and restriction terms at the top of every - file. - -============================================================================== -Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): -============================================================================== -University of Illinois/NCSA -Open Source License - -Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - - - -====================File: include/llvm/Support/LICENSE.TXT==================== -LLVM System Interface Library -------------------------------------------------------------------------------- -The LLVM System Interface Library is licensed under the Illinois Open Source -License and has the following additional copyright: - -Copyright (C) 2004 eXtensible Systems, Inc. - - -====================NCSA==================== -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================File: LICENSE.TXT==================== +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +============================================================================== +Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + + + +====================File: include/llvm/Support/LICENSE.TXT==================== +LLVM System Interface Library +------------------------------------------------------------------------------- +The LLVM System Interface Library is licensed under the Illinois Open Source +License and has the following additional copyright: + +Copyright (C) 2004 eXtensible Systems, Inc. + + +====================NCSA==================== +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/ya.make index 0434db6bfa..512f510d85 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/ya.make +++ b/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/ya.make index 5c499b5c3e..096b55cd68 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/ya.make +++ b/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/ya.make index fee67a8c71..18b5c7460f 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/ya.make +++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/ya.make index 9d0c885590..bb7d4a2c89 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/ya.make +++ b/contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/Support diff --git a/contrib/libs/llvm12/lib/Target/AArch64/Utils/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/AArch64/Utils/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/Utils/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/AArch64/Utils/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/AArch64/Utils/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/Utils/ya.make index a0b39d5c95..3668c2a650 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/Utils/ya.make +++ b/contrib/libs/llvm12/lib/Target/AArch64/Utils/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/AArch64/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/ya.make index e5ef1b3dcb..244cbc7f34 100644 --- a/contrib/libs/llvm12/lib/Target/AArch64/ya.make +++ b/contrib/libs/llvm12/lib/Target/AArch64/ya.make @@ -2,18 +2,18 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE( + Apache-2.0 WITH LLVM-exception AND + NCSA +) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE( - Apache-2.0 WITH LLVM-exception AND - NCSA -) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/ARM/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/ARM/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/ARM/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/ARM/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/ARM/AsmParser/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/ARM/AsmParser/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/ARM/AsmParser/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/ARM/AsmParser/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ya.make b/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ya.make index f5c567afbc..572d301570 100644 --- a/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ya.make +++ b/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/ARM/Disassembler/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/ARM/Disassembler/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/ARM/Disassembler/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/ARM/Disassembler/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ya.make b/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ya.make index 5e4e1b3e6a..f8ce0c24d9 100644 --- a/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ya.make +++ b/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ya.make b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ya.make index 2a6b0715e7..b92b47d057 100644 --- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ya.make +++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/ARM/README-Thumb.txt b/contrib/libs/llvm12/lib/Target/ARM/README-Thumb.txt index 041f5508d7..d9cc086da8 100644 --- a/contrib/libs/llvm12/lib/Target/ARM/README-Thumb.txt +++ b/contrib/libs/llvm12/lib/Target/ARM/README-Thumb.txt @@ -1,261 +1,261 @@ -//===---------------------------------------------------------------------===// -// Random ideas for the ARM backend (Thumb specific). -//===---------------------------------------------------------------------===// - -* Add support for compiling functions in both ARM and Thumb mode, then taking - the smallest. - -* Add support for compiling individual basic blocks in thumb mode, when in a - larger ARM function. This can be used for presumed cold code, like paths - to abort (failure path of asserts), EH handling code, etc. - -* Thumb doesn't have normal pre/post increment addressing modes, but you can - load/store 32-bit integers with pre/postinc by using load/store multiple - instrs with a single register. - -* Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add - and cmp instructions can use high registers. Also, we can use them as - temporaries to spill values into. - -* In thumb mode, short, byte, and bool preferred alignments are currently set - to 4 to accommodate ISA restriction (i.e. add sp, #imm, imm must be multiple - of 4). - -//===---------------------------------------------------------------------===// - -Potential jumptable improvements: - -* If we know function size is less than (1 << 16) * 2 bytes, we can use 16-bit - jumptable entries (e.g. (L1 - L2) >> 1). Or even smaller entries if the - function is even smaller. This also applies to ARM. - -* Thumb jumptable codegen can improve given some help from the assembler. This - is what we generate right now: - - .set PCRELV0, (LJTI1_0_0-(LPCRELL0+4)) -LPCRELL0: - mov r1, #PCRELV0 - add r1, pc - ldr r0, [r0, r1] - mov pc, r0 - .align 2 -LJTI1_0_0: - .long LBB1_3 - ... - -Note there is another pc relative add that we can take advantage of. - add r1, pc, #imm_8 * 4 - -We should be able to generate: - -LPCRELL0: - add r1, LJTI1_0_0 - ldr r0, [r0, r1] - mov pc, r0 - .align 2 -LJTI1_0_0: - .long LBB1_3 - -if the assembler can translate the add to: - add r1, pc, #((LJTI1_0_0-(LPCRELL0+4))&0xfffffffc) - -Note the assembler also does something similar to constpool load: -LPCRELL0: - ldr r0, LCPI1_0 -=> - ldr r0, pc, #((LCPI1_0-(LPCRELL0+4))&0xfffffffc) - - -//===---------------------------------------------------------------------===// - -We compile the following: - -define i16 @func_entry_2E_ce(i32 %i) { - switch i32 %i, label %bb12.exitStub [ - i32 0, label %bb4.exitStub - i32 1, label %bb9.exitStub - i32 2, label %bb4.exitStub - i32 3, label %bb4.exitStub - i32 7, label %bb9.exitStub - i32 8, label %bb.exitStub - i32 9, label %bb9.exitStub - ] - -bb12.exitStub: - ret i16 0 - -bb4.exitStub: - ret i16 1 - -bb9.exitStub: - ret i16 2 - -bb.exitStub: - ret i16 3 -} - -into: - -_func_entry_2E_ce: - mov r2, #1 - lsl r2, r0 - cmp r0, #9 - bhi LBB1_4 @bb12.exitStub -LBB1_1: @newFuncRoot - mov r1, #13 - tst r2, r1 - bne LBB1_5 @bb4.exitStub -LBB1_2: @newFuncRoot - ldr r1, LCPI1_0 - tst r2, r1 - bne LBB1_6 @bb9.exitStub -LBB1_3: @newFuncRoot - mov r1, #1 - lsl r1, r1, #8 - tst r2, r1 - bne LBB1_7 @bb.exitStub -LBB1_4: @bb12.exitStub - mov r0, #0 - bx lr -LBB1_5: @bb4.exitStub - mov r0, #1 - bx lr -LBB1_6: @bb9.exitStub - mov r0, #2 - bx lr -LBB1_7: @bb.exitStub - mov r0, #3 - bx lr -LBB1_8: - .align 2 -LCPI1_0: - .long 642 - - -gcc compiles to: - - cmp r0, #9 - @ lr needed for prologue - bhi L2 - ldr r3, L11 - mov r2, #1 - mov r1, r2, asl r0 - ands r0, r3, r2, asl r0 - movne r0, #2 - bxne lr - tst r1, #13 - beq L9 -L3: - mov r0, r2 - bx lr -L9: - tst r1, #256 - movne r0, #3 - bxne lr -L2: - mov r0, #0 - bx lr -L12: - .align 2 -L11: - .long 642 - - -GCC is doing a couple of clever things here: - 1. It is predicating one of the returns. This isn't a clear win though: in - cases where that return isn't taken, it is replacing one condbranch with - two 'ne' predicated instructions. - 2. It is sinking the shift of "1 << i" into the tst, and using ands instead of - tst. This will probably require whole function isel. - 3. GCC emits: - tst r1, #256 - we emit: - mov r1, #1 - lsl r1, r1, #8 - tst r2, r1 - -//===---------------------------------------------------------------------===// - -When spilling in thumb mode and the sp offset is too large to fit in the ldr / -str offset field, we load the offset from a constpool entry and add it to sp: - -ldr r2, LCPI -add r2, sp -ldr r2, [r2] - -These instructions preserve the condition code which is important if the spill -is between a cmp and a bcc instruction. However, we can use the (potentially) -cheaper sequence if we know it's ok to clobber the condition register. - -add r2, sp, #255 * 4 -add r2, #132 -ldr r2, [r2, #7 * 4] - -This is especially bad when dynamic alloca is used. The all fixed size stack -objects are referenced off the frame pointer with negative offsets. See -oggenc for an example. - -//===---------------------------------------------------------------------===// - -Poor codegen test/CodeGen/ARM/select.ll f7: - - ldr r5, LCPI1_0 -LPC0: - add r5, pc - ldr r6, LCPI1_1 - ldr r2, LCPI1_2 - mov r3, r6 - mov lr, pc - bx r5 - -//===---------------------------------------------------------------------===// - -Make register allocator / spiller smarter so we can re-materialize "mov r, imm", -etc. Almost all Thumb instructions clobber condition code. - -//===---------------------------------------------------------------------===// - -Thumb load / store address mode offsets are scaled. The values kept in the -instruction operands are pre-scale values. This probably ought to be changed -to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions. - -//===---------------------------------------------------------------------===// - -We need to make (some of the) Thumb1 instructions predicable. That will allow -shrinking of predicated Thumb2 instructions. To allow this, we need to be able -to toggle the 's' bit since they do not set CPSR when they are inside IT blocks. - -//===---------------------------------------------------------------------===// - -Make use of hi register variants of cmp: tCMPhir / tCMPZhir. - -//===---------------------------------------------------------------------===// - -Thumb1 immediate field sometimes keep pre-scaled values. See -ThumbRegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and -Thumb2. - -//===---------------------------------------------------------------------===// - -Rather than having tBR_JTr print a ".align 2" and constant island pass pad it, -add a target specific ALIGN instruction instead. That way, getInstSizeInBytes -won't have to over-estimate. It can also be used for loop alignment pass. - -//===---------------------------------------------------------------------===// - -We generate conditional code for icmp when we don't need to. This code: - - int foo(int s) { - return s == 1; - } - -produces: - -foo: - cmp r0, #1 - mov.w r0, #0 - it eq - moveq r0, #1 - bx lr - -when it could use subs + adcs. This is GCC PR46975. +//===---------------------------------------------------------------------===// +// Random ideas for the ARM backend (Thumb specific). +//===---------------------------------------------------------------------===// + +* Add support for compiling functions in both ARM and Thumb mode, then taking + the smallest. + +* Add support for compiling individual basic blocks in thumb mode, when in a + larger ARM function. This can be used for presumed cold code, like paths + to abort (failure path of asserts), EH handling code, etc. + +* Thumb doesn't have normal pre/post increment addressing modes, but you can + load/store 32-bit integers with pre/postinc by using load/store multiple + instrs with a single register. + +* Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add + and cmp instructions can use high registers. Also, we can use them as + temporaries to spill values into. + +* In thumb mode, short, byte, and bool preferred alignments are currently set + to 4 to accommodate ISA restriction (i.e. add sp, #imm, imm must be multiple + of 4). + +//===---------------------------------------------------------------------===// + +Potential jumptable improvements: + +* If we know function size is less than (1 << 16) * 2 bytes, we can use 16-bit + jumptable entries (e.g. (L1 - L2) >> 1). Or even smaller entries if the + function is even smaller. This also applies to ARM. + +* Thumb jumptable codegen can improve given some help from the assembler. This + is what we generate right now: + + .set PCRELV0, (LJTI1_0_0-(LPCRELL0+4)) +LPCRELL0: + mov r1, #PCRELV0 + add r1, pc + ldr r0, [r0, r1] + mov pc, r0 + .align 2 +LJTI1_0_0: + .long LBB1_3 + ... + +Note there is another pc relative add that we can take advantage of. + add r1, pc, #imm_8 * 4 + +We should be able to generate: + +LPCRELL0: + add r1, LJTI1_0_0 + ldr r0, [r0, r1] + mov pc, r0 + .align 2 +LJTI1_0_0: + .long LBB1_3 + +if the assembler can translate the add to: + add r1, pc, #((LJTI1_0_0-(LPCRELL0+4))&0xfffffffc) + +Note the assembler also does something similar to constpool load: +LPCRELL0: + ldr r0, LCPI1_0 +=> + ldr r0, pc, #((LCPI1_0-(LPCRELL0+4))&0xfffffffc) + + +//===---------------------------------------------------------------------===// + +We compile the following: + +define i16 @func_entry_2E_ce(i32 %i) { + switch i32 %i, label %bb12.exitStub [ + i32 0, label %bb4.exitStub + i32 1, label %bb9.exitStub + i32 2, label %bb4.exitStub + i32 3, label %bb4.exitStub + i32 7, label %bb9.exitStub + i32 8, label %bb.exitStub + i32 9, label %bb9.exitStub + ] + +bb12.exitStub: + ret i16 0 + +bb4.exitStub: + ret i16 1 + +bb9.exitStub: + ret i16 2 + +bb.exitStub: + ret i16 3 +} + +into: + +_func_entry_2E_ce: + mov r2, #1 + lsl r2, r0 + cmp r0, #9 + bhi LBB1_4 @bb12.exitStub +LBB1_1: @newFuncRoot + mov r1, #13 + tst r2, r1 + bne LBB1_5 @bb4.exitStub +LBB1_2: @newFuncRoot + ldr r1, LCPI1_0 + tst r2, r1 + bne LBB1_6 @bb9.exitStub +LBB1_3: @newFuncRoot + mov r1, #1 + lsl r1, r1, #8 + tst r2, r1 + bne LBB1_7 @bb.exitStub +LBB1_4: @bb12.exitStub + mov r0, #0 + bx lr +LBB1_5: @bb4.exitStub + mov r0, #1 + bx lr +LBB1_6: @bb9.exitStub + mov r0, #2 + bx lr +LBB1_7: @bb.exitStub + mov r0, #3 + bx lr +LBB1_8: + .align 2 +LCPI1_0: + .long 642 + + +gcc compiles to: + + cmp r0, #9 + @ lr needed for prologue + bhi L2 + ldr r3, L11 + mov r2, #1 + mov r1, r2, asl r0 + ands r0, r3, r2, asl r0 + movne r0, #2 + bxne lr + tst r1, #13 + beq L9 +L3: + mov r0, r2 + bx lr +L9: + tst r1, #256 + movne r0, #3 + bxne lr +L2: + mov r0, #0 + bx lr +L12: + .align 2 +L11: + .long 642 + + +GCC is doing a couple of clever things here: + 1. It is predicating one of the returns. This isn't a clear win though: in + cases where that return isn't taken, it is replacing one condbranch with + two 'ne' predicated instructions. + 2. It is sinking the shift of "1 << i" into the tst, and using ands instead of + tst. This will probably require whole function isel. + 3. GCC emits: + tst r1, #256 + we emit: + mov r1, #1 + lsl r1, r1, #8 + tst r2, r1 + +//===---------------------------------------------------------------------===// + +When spilling in thumb mode and the sp offset is too large to fit in the ldr / +str offset field, we load the offset from a constpool entry and add it to sp: + +ldr r2, LCPI +add r2, sp +ldr r2, [r2] + +These instructions preserve the condition code which is important if the spill +is between a cmp and a bcc instruction. However, we can use the (potentially) +cheaper sequence if we know it's ok to clobber the condition register. + +add r2, sp, #255 * 4 +add r2, #132 +ldr r2, [r2, #7 * 4] + +This is especially bad when dynamic alloca is used. The all fixed size stack +objects are referenced off the frame pointer with negative offsets. See +oggenc for an example. + +//===---------------------------------------------------------------------===// + +Poor codegen test/CodeGen/ARM/select.ll f7: + + ldr r5, LCPI1_0 +LPC0: + add r5, pc + ldr r6, LCPI1_1 + ldr r2, LCPI1_2 + mov r3, r6 + mov lr, pc + bx r5 + +//===---------------------------------------------------------------------===// + +Make register allocator / spiller smarter so we can re-materialize "mov r, imm", +etc. Almost all Thumb instructions clobber condition code. + +//===---------------------------------------------------------------------===// + +Thumb load / store address mode offsets are scaled. The values kept in the +instruction operands are pre-scale values. This probably ought to be changed +to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions. + +//===---------------------------------------------------------------------===// + +We need to make (some of the) Thumb1 instructions predicable. That will allow +shrinking of predicated Thumb2 instructions. To allow this, we need to be able +to toggle the 's' bit since they do not set CPSR when they are inside IT blocks. + +//===---------------------------------------------------------------------===// + +Make use of hi register variants of cmp: tCMPhir / tCMPZhir. + +//===---------------------------------------------------------------------===// + +Thumb1 immediate field sometimes keep pre-scaled values. See +ThumbRegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and +Thumb2. + +//===---------------------------------------------------------------------===// + +Rather than having tBR_JTr print a ".align 2" and constant island pass pad it, +add a target specific ALIGN instruction instead. That way, getInstSizeInBytes +won't have to over-estimate. It can also be used for loop alignment pass. + +//===---------------------------------------------------------------------===// + +We generate conditional code for icmp when we don't need to. This code: + + int foo(int s) { + return s == 1; + } + +produces: + +foo: + cmp r0, #1 + mov.w r0, #0 + it eq + moveq r0, #1 + bx lr + +when it could use subs + adcs. This is GCC PR46975. diff --git a/contrib/libs/llvm12/lib/Target/ARM/README-Thumb2.txt b/contrib/libs/llvm12/lib/Target/ARM/README-Thumb2.txt index 227746ec13..e7c2552d9e 100644 --- a/contrib/libs/llvm12/lib/Target/ARM/README-Thumb2.txt +++ b/contrib/libs/llvm12/lib/Target/ARM/README-Thumb2.txt @@ -1,6 +1,6 @@ -//===---------------------------------------------------------------------===// -// Random ideas for the ARM backend (Thumb2 specific). -//===---------------------------------------------------------------------===// - -Make sure jumptable destinations are below the jumptable in order to make use -of tbb / tbh. +//===---------------------------------------------------------------------===// +// Random ideas for the ARM backend (Thumb2 specific). +//===---------------------------------------------------------------------===// + +Make sure jumptable destinations are below the jumptable in order to make use +of tbb / tbh. diff --git a/contrib/libs/llvm12/lib/Target/ARM/README.txt b/contrib/libs/llvm12/lib/Target/ARM/README.txt index 1a93bc7bb7..def67cfae7 100644 --- a/contrib/libs/llvm12/lib/Target/ARM/README.txt +++ b/contrib/libs/llvm12/lib/Target/ARM/README.txt @@ -1,732 +1,732 @@ -//===---------------------------------------------------------------------===// -// Random ideas for the ARM backend. -//===---------------------------------------------------------------------===// - -Reimplement 'select' in terms of 'SEL'. - -* We would really like to support UXTAB16, but we need to prove that the - add doesn't need to overflow between the two 16-bit chunks. - -* Implement pre/post increment support. (e.g. PR935) -* Implement smarter constant generation for binops with large immediates. - -A few ARMv6T2 ops should be pattern matched: BFI, SBFX, and UBFX - -Interesting optimization for PIC codegen on arm-linux: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43129 - -//===---------------------------------------------------------------------===// - -Crazy idea: Consider code that uses lots of 8-bit or 16-bit values. By the -time regalloc happens, these values are now in a 32-bit register, usually with -the top-bits known to be sign or zero extended. If spilled, we should be able -to spill these to a 8-bit or 16-bit stack slot, zero or sign extending as part -of the reload. - -Doing this reduces the size of the stack frame (important for thumb etc), and -also increases the likelihood that we will be able to reload multiple values -from the stack with a single load. - -//===---------------------------------------------------------------------===// - -The constant island pass is in good shape. Some cleanups might be desirable, -but there is unlikely to be much improvement in the generated code. - -1. There may be some advantage to trying to be smarter about the initial -placement, rather than putting everything at the end. - -2. There might be some compile-time efficiency to be had by representing -consecutive islands as a single block rather than multiple blocks. - -3. Use a priority queue to sort constant pool users in inverse order of - position so we always process the one closed to the end of functions - first. This may simply CreateNewWater. - -//===---------------------------------------------------------------------===// - -Eliminate copysign custom expansion. We are still generating crappy code with -default expansion + if-conversion. - -//===---------------------------------------------------------------------===// - -Eliminate one instruction from: - -define i32 @_Z6slow4bii(i32 %x, i32 %y) { - %tmp = icmp sgt i32 %x, %y - %retval = select i1 %tmp, i32 %x, i32 %y - ret i32 %retval -} - -__Z6slow4bii: - cmp r0, r1 - movgt r1, r0 - mov r0, r1 - bx lr -=> - -__Z6slow4bii: - cmp r0, r1 - movle r0, r1 - bx lr - -//===---------------------------------------------------------------------===// - -Implement long long "X-3" with instructions that fold the immediate in. These -were disabled due to badness with the ARM carry flag on subtracts. - -//===---------------------------------------------------------------------===// - -More load / store optimizations: -1) Better representation for block transfer? This is from Olden/power: - - fldd d0, [r4] - fstd d0, [r4, #+32] - fldd d0, [r4, #+8] - fstd d0, [r4, #+40] - fldd d0, [r4, #+16] - fstd d0, [r4, #+48] - fldd d0, [r4, #+24] - fstd d0, [r4, #+56] - -If we can spare the registers, it would be better to use fldm and fstm here. -Need major register allocator enhancement though. - -2) Can we recognize the relative position of constantpool entries? i.e. Treat - - ldr r0, LCPI17_3 - ldr r1, LCPI17_4 - ldr r2, LCPI17_5 - - as - ldr r0, LCPI17 - ldr r1, LCPI17+4 - ldr r2, LCPI17+8 - - Then the ldr's can be combined into a single ldm. See Olden/power. - -Note for ARM v4 gcc uses ldmia to load a pair of 32-bit values to represent a -double 64-bit FP constant: - - adr r0, L6 - ldmia r0, {r0-r1} - - .align 2 -L6: - .long -858993459 - .long 1074318540 - -3) struct copies appear to be done field by field -instead of by words, at least sometimes: - -struct foo { int x; short s; char c1; char c2; }; -void cpy(struct foo*a, struct foo*b) { *a = *b; } - -llvm code (-O2) - ldrb r3, [r1, #+6] - ldr r2, [r1] - ldrb r12, [r1, #+7] - ldrh r1, [r1, #+4] - str r2, [r0] - strh r1, [r0, #+4] - strb r3, [r0, #+6] - strb r12, [r0, #+7] -gcc code (-O2) - ldmia r1, {r1-r2} - stmia r0, {r1-r2} - -In this benchmark poor handling of aggregate copies has shown up as -having a large effect on size, and possibly speed as well (we don't have -a good way to measure on ARM). - -//===---------------------------------------------------------------------===// - -* Consider this silly example: - -double bar(double x) { - double r = foo(3.1); - return x+r; -} - -_bar: - stmfd sp!, {r4, r5, r7, lr} - add r7, sp, #8 - mov r4, r0 - mov r5, r1 - fldd d0, LCPI1_0 - fmrrd r0, r1, d0 - bl _foo - fmdrr d0, r4, r5 - fmsr s2, r0 - fsitod d1, s2 - faddd d0, d1, d0 - fmrrd r0, r1, d0 - ldmfd sp!, {r4, r5, r7, pc} - -Ignore the prologue and epilogue stuff for a second. Note - mov r4, r0 - mov r5, r1 -the copys to callee-save registers and the fact they are only being used by the -fmdrr instruction. It would have been better had the fmdrr been scheduled -before the call and place the result in a callee-save DPR register. The two -mov ops would not have been necessary. - -//===---------------------------------------------------------------------===// - -Calling convention related stuff: - -* gcc's parameter passing implementation is terrible and we suffer as a result: - -e.g. -struct s { - double d1; - int s1; -}; - -void foo(struct s S) { - printf("%g, %d\n", S.d1, S.s1); -} - -'S' is passed via registers r0, r1, r2. But gcc stores them to the stack, and -then reload them to r1, r2, and r3 before issuing the call (r0 contains the -address of the format string): - - stmfd sp!, {r7, lr} - add r7, sp, #0 - sub sp, sp, #12 - stmia sp, {r0, r1, r2} - ldmia sp, {r1-r2} - ldr r0, L5 - ldr r3, [sp, #8] -L2: - add r0, pc, r0 - bl L_printf$stub - -Instead of a stmia, ldmia, and a ldr, wouldn't it be better to do three moves? - -* Return an aggregate type is even worse: - -e.g. -struct s foo(void) { - struct s S = {1.1, 2}; - return S; -} - - mov ip, r0 - ldr r0, L5 - sub sp, sp, #12 -L2: - add r0, pc, r0 - @ lr needed for prologue - ldmia r0, {r0, r1, r2} - stmia sp, {r0, r1, r2} - stmia ip, {r0, r1, r2} - mov r0, ip - add sp, sp, #12 - bx lr - -r0 (and later ip) is the hidden parameter from caller to store the value in. The -first ldmia loads the constants into r0, r1, r2. The last stmia stores r0, r1, -r2 into the address passed in. However, there is one additional stmia that -stores r0, r1, and r2 to some stack location. The store is dead. - -The llvm-gcc generated code looks like this: - -csretcc void %foo(%struct.s* %agg.result) { -entry: - %S = alloca %struct.s, align 4 ; <%struct.s*> [#uses=1] - %memtmp = alloca %struct.s ; <%struct.s*> [#uses=1] - cast %struct.s* %S to sbyte* ; <sbyte*>:0 [#uses=2] - call void %llvm.memcpy.i32( sbyte* %0, sbyte* cast ({ double, int }* %C.0.904 to sbyte*), uint 12, uint 4 ) - cast %struct.s* %agg.result to sbyte* ; <sbyte*>:1 [#uses=2] - call void %llvm.memcpy.i32( sbyte* %1, sbyte* %0, uint 12, uint 0 ) - cast %struct.s* %memtmp to sbyte* ; <sbyte*>:2 [#uses=1] - call void %llvm.memcpy.i32( sbyte* %2, sbyte* %1, uint 12, uint 0 ) - ret void -} - -llc ends up issuing two memcpy's (the first memcpy becomes 3 loads from -constantpool). Perhaps we should 1) fix llvm-gcc so the memcpy is translated -into a number of load and stores, or 2) custom lower memcpy (of small size) to -be ldmia / stmia. I think option 2 is better but the current register -allocator cannot allocate a chunk of registers at a time. - -A feasible temporary solution is to use specific physical registers at the -lowering time for small (<= 4 words?) transfer size. - -* ARM CSRet calling convention requires the hidden argument to be returned by -the callee. - -//===---------------------------------------------------------------------===// - -We can definitely do a better job on BB placements to eliminate some branches. -It's very common to see llvm generated assembly code that looks like this: - -LBB3: - ... -LBB4: -... - beq LBB3 - b LBB2 - -If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can -then eliminate beq and turn the unconditional branch to LBB2 to a bne. - -See McCat/18-imp/ComputeBoundingBoxes for an example. - -//===---------------------------------------------------------------------===// - -Pre-/post- indexed load / stores: - -1) We should not make the pre/post- indexed load/store transform if the base ptr -is guaranteed to be live beyond the load/store. This can happen if the base -ptr is live out of the block we are performing the optimization. e.g. - -mov r1, r2 -ldr r3, [r1], #4 -... - -vs. - -ldr r3, [r2] -add r1, r2, #4 -... - -In most cases, this is just a wasted optimization. However, sometimes it can -negatively impact the performance because two-address code is more restrictive -when it comes to scheduling. - -Unfortunately, liveout information is currently unavailable during DAG combine -time. - -2) Consider spliting a indexed load / store into a pair of add/sub + load/store - to solve #1 (in TwoAddressInstructionPass.cpp). - -3) Enhance LSR to generate more opportunities for indexed ops. - -4) Once we added support for multiple result patterns, write indexed loads - patterns instead of C++ instruction selection code. - -5) Use VLDM / VSTM to emulate indexed FP load / store. - -//===---------------------------------------------------------------------===// - -Implement support for some more tricky ways to materialize immediates. For -example, to get 0xffff8000, we can use: - -mov r9, #&3f8000 -sub r9, r9, #&400000 - -//===---------------------------------------------------------------------===// - -We sometimes generate multiple add / sub instructions to update sp in prologue -and epilogue if the inc / dec value is too large to fit in a single immediate -operand. In some cases, perhaps it might be better to load the value from a -constantpool instead. - -//===---------------------------------------------------------------------===// - -GCC generates significantly better code for this function. - -int foo(int StackPtr, unsigned char *Line, unsigned char *Stack, int LineLen) { - int i = 0; - - if (StackPtr != 0) { - while (StackPtr != 0 && i < (((LineLen) < (32768))? (LineLen) : (32768))) - Line[i++] = Stack[--StackPtr]; - if (LineLen > 32768) - { - while (StackPtr != 0 && i < LineLen) - { - i++; - --StackPtr; - } - } - } - return StackPtr; -} - -//===---------------------------------------------------------------------===// - -This should compile to the mlas instruction: -int mlas(int x, int y, int z) { return ((x * y + z) < 0) ? 7 : 13; } - -//===---------------------------------------------------------------------===// - -At some point, we should triage these to see if they still apply to us: - -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19598 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18560 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27016 - -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11831 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11826 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11825 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11824 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11823 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11820 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10982 - -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10242 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9831 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9760 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9759 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9703 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9702 -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9663 - -http://www.inf.u-szeged.hu/gcc-arm/ -http://citeseer.ist.psu.edu/debus04linktime.html - -//===---------------------------------------------------------------------===// - -gcc generates smaller code for this function at -O2 or -Os: - -void foo(signed char* p) { - if (*p == 3) - bar(); - else if (*p == 4) - baz(); - else if (*p == 5) - quux(); -} - -llvm decides it's a good idea to turn the repeated if...else into a -binary tree, as if it were a switch; the resulting code requires -1 -compare-and-branches when *p<=2 or *p==5, the same number if *p==4 -or *p>6, and +1 if *p==3. So it should be a speed win -(on balance). However, the revised code is larger, with 4 conditional -branches instead of 3. - -More seriously, there is a byte->word extend before -each comparison, where there should be only one, and the condition codes -are not remembered when the same two values are compared twice. - -//===---------------------------------------------------------------------===// - -More LSR enhancements possible: - -1. Teach LSR about pre- and post- indexed ops to allow iv increment be merged - in a load / store. -2. Allow iv reuse even when a type conversion is required. For example, i8 - and i32 load / store addressing modes are identical. - - -//===---------------------------------------------------------------------===// - -This: - -int foo(int a, int b, int c, int d) { - long long acc = (long long)a * (long long)b; - acc += (long long)c * (long long)d; - return (int)(acc >> 32); -} - -Should compile to use SMLAL (Signed Multiply Accumulate Long) which multiplies -two signed 32-bit values to produce a 64-bit value, and accumulates this with -a 64-bit value. - -We currently get this with both v4 and v6: - -_foo: - smull r1, r0, r1, r0 - smull r3, r2, r3, r2 - adds r3, r3, r1 - adc r0, r2, r0 - bx lr - -//===---------------------------------------------------------------------===// - -This: - #include <algorithm> - std::pair<unsigned, bool> full_add(unsigned a, unsigned b) - { return std::make_pair(a + b, a + b < a); } - bool no_overflow(unsigned a, unsigned b) - { return !full_add(a, b).second; } - -Should compile to: - -_Z8full_addjj: - adds r2, r1, r2 - movcc r1, #0 - movcs r1, #1 - str r2, [r0, #0] - strb r1, [r0, #4] - mov pc, lr - -_Z11no_overflowjj: - cmn r0, r1 - movcs r0, #0 - movcc r0, #1 - mov pc, lr - -not: - -__Z8full_addjj: - add r3, r2, r1 - str r3, [r0] - mov r2, #1 - mov r12, #0 - cmp r3, r1 - movlo r12, r2 - str r12, [r0, #+4] - bx lr -__Z11no_overflowjj: - add r3, r1, r0 - mov r2, #1 - mov r1, #0 - cmp r3, r0 - movhs r1, r2 - mov r0, r1 - bx lr - -//===---------------------------------------------------------------------===// - -Some of the NEON intrinsics may be appropriate for more general use, either -as target-independent intrinsics or perhaps elsewhere in the ARM backend. -Some of them may also be lowered to target-independent SDNodes, and perhaps -some new SDNodes could be added. - -For example, maximum, minimum, and absolute value operations are well-defined -and standard operations, both for vector and scalar types. - -The current NEON-specific intrinsics for count leading zeros and count one -bits could perhaps be replaced by the target-independent ctlz and ctpop -intrinsics. It may also make sense to add a target-independent "ctls" -intrinsic for "count leading sign bits". Likewise, the backend could use -the target-independent SDNodes for these operations. - -ARMv6 has scalar saturating and halving adds and subtracts. The same -intrinsics could possibly be used for both NEON's vector implementations of -those operations and the ARMv6 scalar versions. - -//===---------------------------------------------------------------------===// - -Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting -LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g. -ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg) -while ARMConstantIslandPass only need to worry about LDR (literal). - -//===---------------------------------------------------------------------===// - -Constant island pass should make use of full range SoImm values for LEApcrel. -Be careful though as the last attempt caused infinite looping on lencod. - -//===---------------------------------------------------------------------===// - -Predication issue. This function: - -extern unsigned array[ 128 ]; -int foo( int x ) { - int y; - y = array[ x & 127 ]; - if ( x & 128 ) - y = 123456789 & ( y >> 2 ); - else - y = 123456789 & y; - return y; -} - -compiles to: - -_foo: - and r1, r0, #127 - ldr r2, LCPI1_0 - ldr r2, [r2] - ldr r1, [r2, +r1, lsl #2] - mov r2, r1, lsr #2 - tst r0, #128 - moveq r2, r1 - ldr r0, LCPI1_1 - and r0, r2, r0 - bx lr - -It would be better to do something like this, to fold the shift into the -conditional move: - - and r1, r0, #127 - ldr r2, LCPI1_0 - ldr r2, [r2] - ldr r1, [r2, +r1, lsl #2] - tst r0, #128 - movne r1, r1, lsr #2 - ldr r0, LCPI1_1 - and r0, r1, r0 - bx lr - -it saves an instruction and a register. - -//===---------------------------------------------------------------------===// - -It might be profitable to cse MOVi16 if there are lots of 32-bit immediates -with the same bottom half. - -//===---------------------------------------------------------------------===// - -Robert Muth started working on an alternate jump table implementation that -does not put the tables in-line in the text. This is more like the llvm -default jump table implementation. This might be useful sometime. Several -revisions of patches are on the mailing list, beginning at: -http://lists.llvm.org/pipermail/llvm-dev/2009-June/022763.html - -//===---------------------------------------------------------------------===// - -Make use of the "rbit" instruction. - -//===---------------------------------------------------------------------===// - -Take a look at test/CodeGen/Thumb2/machine-licm.ll. ARM should be taught how -to licm and cse the unnecessary load from cp#1. - -//===---------------------------------------------------------------------===// - -The CMN instruction sets the flags like an ADD instruction, while CMP sets -them like a subtract. Therefore to be able to use CMN for comparisons other -than the Z bit, we'll need additional logic to reverse the conditionals -associated with the comparison. Perhaps a pseudo-instruction for the comparison, -with a post-codegen pass to clean up and handle the condition codes? -See PR5694 for testcase. - -//===---------------------------------------------------------------------===// - -Given the following on armv5: -int test1(int A, int B) { - return (A&-8388481)|(B&8388480); -} - -We currently generate: - ldr r2, .LCPI0_0 - and r0, r0, r2 - ldr r2, .LCPI0_1 - and r1, r1, r2 - orr r0, r1, r0 - bx lr - -We should be able to replace the second ldr+and with a bic (i.e. reuse the -constant which was already loaded). Not sure what's necessary to do that. - -//===---------------------------------------------------------------------===// - -The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal: - -int a(int x) { return __builtin_bswap32(x); } - -a: - mov r1, #255, 24 - mov r2, #255, 16 - and r1, r1, r0, lsr #8 - and r2, r2, r0, lsl #8 - orr r1, r1, r0, lsr #24 - orr r0, r2, r0, lsl #24 - orr r0, r0, r1 - bx lr - -Something like the following would be better (fewer instructions/registers): - eor r1, r0, r0, ror #16 - bic r1, r1, #0xff0000 - mov r1, r1, lsr #8 - eor r0, r1, r0, ror #8 - bx lr - -A custom Thumb version would also be a slight improvement over the generic -version. - -//===---------------------------------------------------------------------===// - -Consider the following simple C code: - -void foo(unsigned char *a, unsigned char *b, int *c) { - if ((*a | *b) == 0) *c = 0; -} - -currently llvm-gcc generates something like this (nice branchless code I'd say): - - ldrb r0, [r0] - ldrb r1, [r1] - orr r0, r1, r0 - tst r0, #255 - moveq r0, #0 - streq r0, [r2] - bx lr - -Note that both "tst" and "moveq" are redundant. - -//===---------------------------------------------------------------------===// - -When loading immediate constants with movt/movw, if there are multiple -constants needed with the same low 16 bits, and those values are not live at -the same time, it would be possible to use a single movw instruction, followed -by multiple movt instructions to rewrite the high bits to different values. -For example: - - volatile store i32 -1, i32* inttoptr (i32 1342210076 to i32*), align 4, - !tbaa -!0 - volatile store i32 -1, i32* inttoptr (i32 1342341148 to i32*), align 4, - !tbaa -!0 - -is compiled and optimized to: - - movw r0, #32796 - mov.w r1, #-1 - movt r0, #20480 - str r1, [r0] - movw r0, #32796 @ <= this MOVW is not needed, value is there already - movt r0, #20482 - str r1, [r0] - -//===---------------------------------------------------------------------===// - -Improve codegen for select's: -if (x != 0) x = 1 -if (x == 1) x = 1 - -ARM codegen used to look like this: - mov r1, r0 - cmp r1, #1 - mov r0, #0 - moveq r0, #1 - -The naive lowering select between two different values. It should recognize the -test is equality test so it's more a conditional move rather than a select: - cmp r0, #1 - movne r0, #0 - -Currently this is a ARM specific dag combine. We probably should make it into a -target-neutral one. - -//===---------------------------------------------------------------------===// - -Optimize unnecessary checks for zero with __builtin_clz/ctz. Those builtins -are specified to be undefined at zero, so portable code must check for zero -and handle it as a special case. That is unnecessary on ARM where those -operations are implemented in a way that is well-defined for zero. For -example: - -int f(int x) { return x ? __builtin_clz(x) : sizeof(int)*8; } - -should just be implemented with a CLZ instruction. Since there are other -targets, e.g., PPC, that share this behavior, it would be best to implement -this in a target-independent way: we should probably fold that (when using -"undefined at zero" semantics) to set the "defined at zero" bit and have -the code generator expand out the right code. - -//===---------------------------------------------------------------------===// - -Clean up the test/MC/ARM files to have more robust register choices. - -R0 should not be used as a register operand in the assembler tests as it's then -not possible to distinguish between a correct encoding and a missing operand -encoding, as zero is the default value for the binary encoder. -e.g., - add r0, r0 // bad - add r3, r5 // good - -Register operands should be distinct. That is, when the encoding does not -require two syntactical operands to refer to the same register, two different -registers should be used in the test so as to catch errors where the -operands are swapped in the encoding. -e.g., - subs.w r1, r1, r1 // bad - subs.w r1, r2, r3 // good - +//===---------------------------------------------------------------------===// +// Random ideas for the ARM backend. +//===---------------------------------------------------------------------===// + +Reimplement 'select' in terms of 'SEL'. + +* We would really like to support UXTAB16, but we need to prove that the + add doesn't need to overflow between the two 16-bit chunks. + +* Implement pre/post increment support. (e.g. PR935) +* Implement smarter constant generation for binops with large immediates. + +A few ARMv6T2 ops should be pattern matched: BFI, SBFX, and UBFX + +Interesting optimization for PIC codegen on arm-linux: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43129 + +//===---------------------------------------------------------------------===// + +Crazy idea: Consider code that uses lots of 8-bit or 16-bit values. By the +time regalloc happens, these values are now in a 32-bit register, usually with +the top-bits known to be sign or zero extended. If spilled, we should be able +to spill these to a 8-bit or 16-bit stack slot, zero or sign extending as part +of the reload. + +Doing this reduces the size of the stack frame (important for thumb etc), and +also increases the likelihood that we will be able to reload multiple values +from the stack with a single load. + +//===---------------------------------------------------------------------===// + +The constant island pass is in good shape. Some cleanups might be desirable, +but there is unlikely to be much improvement in the generated code. + +1. There may be some advantage to trying to be smarter about the initial +placement, rather than putting everything at the end. + +2. There might be some compile-time efficiency to be had by representing +consecutive islands as a single block rather than multiple blocks. + +3. Use a priority queue to sort constant pool users in inverse order of + position so we always process the one closed to the end of functions + first. This may simply CreateNewWater. + +//===---------------------------------------------------------------------===// + +Eliminate copysign custom expansion. We are still generating crappy code with +default expansion + if-conversion. + +//===---------------------------------------------------------------------===// + +Eliminate one instruction from: + +define i32 @_Z6slow4bii(i32 %x, i32 %y) { + %tmp = icmp sgt i32 %x, %y + %retval = select i1 %tmp, i32 %x, i32 %y + ret i32 %retval +} + +__Z6slow4bii: + cmp r0, r1 + movgt r1, r0 + mov r0, r1 + bx lr +=> + +__Z6slow4bii: + cmp r0, r1 + movle r0, r1 + bx lr + +//===---------------------------------------------------------------------===// + +Implement long long "X-3" with instructions that fold the immediate in. These +were disabled due to badness with the ARM carry flag on subtracts. + +//===---------------------------------------------------------------------===// + +More load / store optimizations: +1) Better representation for block transfer? This is from Olden/power: + + fldd d0, [r4] + fstd d0, [r4, #+32] + fldd d0, [r4, #+8] + fstd d0, [r4, #+40] + fldd d0, [r4, #+16] + fstd d0, [r4, #+48] + fldd d0, [r4, #+24] + fstd d0, [r4, #+56] + +If we can spare the registers, it would be better to use fldm and fstm here. +Need major register allocator enhancement though. + +2) Can we recognize the relative position of constantpool entries? i.e. Treat + + ldr r0, LCPI17_3 + ldr r1, LCPI17_4 + ldr r2, LCPI17_5 + + as + ldr r0, LCPI17 + ldr r1, LCPI17+4 + ldr r2, LCPI17+8 + + Then the ldr's can be combined into a single ldm. See Olden/power. + +Note for ARM v4 gcc uses ldmia to load a pair of 32-bit values to represent a +double 64-bit FP constant: + + adr r0, L6 + ldmia r0, {r0-r1} + + .align 2 +L6: + .long -858993459 + .long 1074318540 + +3) struct copies appear to be done field by field +instead of by words, at least sometimes: + +struct foo { int x; short s; char c1; char c2; }; +void cpy(struct foo*a, struct foo*b) { *a = *b; } + +llvm code (-O2) + ldrb r3, [r1, #+6] + ldr r2, [r1] + ldrb r12, [r1, #+7] + ldrh r1, [r1, #+4] + str r2, [r0] + strh r1, [r0, #+4] + strb r3, [r0, #+6] + strb r12, [r0, #+7] +gcc code (-O2) + ldmia r1, {r1-r2} + stmia r0, {r1-r2} + +In this benchmark poor handling of aggregate copies has shown up as +having a large effect on size, and possibly speed as well (we don't have +a good way to measure on ARM). + +//===---------------------------------------------------------------------===// + +* Consider this silly example: + +double bar(double x) { + double r = foo(3.1); + return x+r; +} + +_bar: + stmfd sp!, {r4, r5, r7, lr} + add r7, sp, #8 + mov r4, r0 + mov r5, r1 + fldd d0, LCPI1_0 + fmrrd r0, r1, d0 + bl _foo + fmdrr d0, r4, r5 + fmsr s2, r0 + fsitod d1, s2 + faddd d0, d1, d0 + fmrrd r0, r1, d0 + ldmfd sp!, {r4, r5, r7, pc} + +Ignore the prologue and epilogue stuff for a second. Note + mov r4, r0 + mov r5, r1 +the copys to callee-save registers and the fact they are only being used by the +fmdrr instruction. It would have been better had the fmdrr been scheduled +before the call and place the result in a callee-save DPR register. The two +mov ops would not have been necessary. + +//===---------------------------------------------------------------------===// + +Calling convention related stuff: + +* gcc's parameter passing implementation is terrible and we suffer as a result: + +e.g. +struct s { + double d1; + int s1; +}; + +void foo(struct s S) { + printf("%g, %d\n", S.d1, S.s1); +} + +'S' is passed via registers r0, r1, r2. But gcc stores them to the stack, and +then reload them to r1, r2, and r3 before issuing the call (r0 contains the +address of the format string): + + stmfd sp!, {r7, lr} + add r7, sp, #0 + sub sp, sp, #12 + stmia sp, {r0, r1, r2} + ldmia sp, {r1-r2} + ldr r0, L5 + ldr r3, [sp, #8] +L2: + add r0, pc, r0 + bl L_printf$stub + +Instead of a stmia, ldmia, and a ldr, wouldn't it be better to do three moves? + +* Return an aggregate type is even worse: + +e.g. +struct s foo(void) { + struct s S = {1.1, 2}; + return S; +} + + mov ip, r0 + ldr r0, L5 + sub sp, sp, #12 +L2: + add r0, pc, r0 + @ lr needed for prologue + ldmia r0, {r0, r1, r2} + stmia sp, {r0, r1, r2} + stmia ip, {r0, r1, r2} + mov r0, ip + add sp, sp, #12 + bx lr + +r0 (and later ip) is the hidden parameter from caller to store the value in. The +first ldmia loads the constants into r0, r1, r2. The last stmia stores r0, r1, +r2 into the address passed in. However, there is one additional stmia that +stores r0, r1, and r2 to some stack location. The store is dead. + +The llvm-gcc generated code looks like this: + +csretcc void %foo(%struct.s* %agg.result) { +entry: + %S = alloca %struct.s, align 4 ; <%struct.s*> [#uses=1] + %memtmp = alloca %struct.s ; <%struct.s*> [#uses=1] + cast %struct.s* %S to sbyte* ; <sbyte*>:0 [#uses=2] + call void %llvm.memcpy.i32( sbyte* %0, sbyte* cast ({ double, int }* %C.0.904 to sbyte*), uint 12, uint 4 ) + cast %struct.s* %agg.result to sbyte* ; <sbyte*>:1 [#uses=2] + call void %llvm.memcpy.i32( sbyte* %1, sbyte* %0, uint 12, uint 0 ) + cast %struct.s* %memtmp to sbyte* ; <sbyte*>:2 [#uses=1] + call void %llvm.memcpy.i32( sbyte* %2, sbyte* %1, uint 12, uint 0 ) + ret void +} + +llc ends up issuing two memcpy's (the first memcpy becomes 3 loads from +constantpool). Perhaps we should 1) fix llvm-gcc so the memcpy is translated +into a number of load and stores, or 2) custom lower memcpy (of small size) to +be ldmia / stmia. I think option 2 is better but the current register +allocator cannot allocate a chunk of registers at a time. + +A feasible temporary solution is to use specific physical registers at the +lowering time for small (<= 4 words?) transfer size. + +* ARM CSRet calling convention requires the hidden argument to be returned by +the callee. + +//===---------------------------------------------------------------------===// + +We can definitely do a better job on BB placements to eliminate some branches. +It's very common to see llvm generated assembly code that looks like this: + +LBB3: + ... +LBB4: +... + beq LBB3 + b LBB2 + +If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can +then eliminate beq and turn the unconditional branch to LBB2 to a bne. + +See McCat/18-imp/ComputeBoundingBoxes for an example. + +//===---------------------------------------------------------------------===// + +Pre-/post- indexed load / stores: + +1) We should not make the pre/post- indexed load/store transform if the base ptr +is guaranteed to be live beyond the load/store. This can happen if the base +ptr is live out of the block we are performing the optimization. e.g. + +mov r1, r2 +ldr r3, [r1], #4 +... + +vs. + +ldr r3, [r2] +add r1, r2, #4 +... + +In most cases, this is just a wasted optimization. However, sometimes it can +negatively impact the performance because two-address code is more restrictive +when it comes to scheduling. + +Unfortunately, liveout information is currently unavailable during DAG combine +time. + +2) Consider spliting a indexed load / store into a pair of add/sub + load/store + to solve #1 (in TwoAddressInstructionPass.cpp). + +3) Enhance LSR to generate more opportunities for indexed ops. + +4) Once we added support for multiple result patterns, write indexed loads + patterns instead of C++ instruction selection code. + +5) Use VLDM / VSTM to emulate indexed FP load / store. + +//===---------------------------------------------------------------------===// + +Implement support for some more tricky ways to materialize immediates. For +example, to get 0xffff8000, we can use: + +mov r9, #&3f8000 +sub r9, r9, #&400000 + +//===---------------------------------------------------------------------===// + +We sometimes generate multiple add / sub instructions to update sp in prologue +and epilogue if the inc / dec value is too large to fit in a single immediate +operand. In some cases, perhaps it might be better to load the value from a +constantpool instead. + +//===---------------------------------------------------------------------===// + +GCC generates significantly better code for this function. + +int foo(int StackPtr, unsigned char *Line, unsigned char *Stack, int LineLen) { + int i = 0; + + if (StackPtr != 0) { + while (StackPtr != 0 && i < (((LineLen) < (32768))? (LineLen) : (32768))) + Line[i++] = Stack[--StackPtr]; + if (LineLen > 32768) + { + while (StackPtr != 0 && i < LineLen) + { + i++; + --StackPtr; + } + } + } + return StackPtr; +} + +//===---------------------------------------------------------------------===// + +This should compile to the mlas instruction: +int mlas(int x, int y, int z) { return ((x * y + z) < 0) ? 7 : 13; } + +//===---------------------------------------------------------------------===// + +At some point, we should triage these to see if they still apply to us: + +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19598 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18560 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27016 + +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11831 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11826 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11825 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11824 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11823 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11820 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10982 + +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10242 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9831 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9760 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9759 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9703 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9702 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9663 + +http://www.inf.u-szeged.hu/gcc-arm/ +http://citeseer.ist.psu.edu/debus04linktime.html + +//===---------------------------------------------------------------------===// + +gcc generates smaller code for this function at -O2 or -Os: + +void foo(signed char* p) { + if (*p == 3) + bar(); + else if (*p == 4) + baz(); + else if (*p == 5) + quux(); +} + +llvm decides it's a good idea to turn the repeated if...else into a +binary tree, as if it were a switch; the resulting code requires -1 +compare-and-branches when *p<=2 or *p==5, the same number if *p==4 +or *p>6, and +1 if *p==3. So it should be a speed win +(on balance). However, the revised code is larger, with 4 conditional +branches instead of 3. + +More seriously, there is a byte->word extend before +each comparison, where there should be only one, and the condition codes +are not remembered when the same two values are compared twice. + +//===---------------------------------------------------------------------===// + +More LSR enhancements possible: + +1. Teach LSR about pre- and post- indexed ops to allow iv increment be merged + in a load / store. +2. Allow iv reuse even when a type conversion is required. For example, i8 + and i32 load / store addressing modes are identical. + + +//===---------------------------------------------------------------------===// + +This: + +int foo(int a, int b, int c, int d) { + long long acc = (long long)a * (long long)b; + acc += (long long)c * (long long)d; + return (int)(acc >> 32); +} + +Should compile to use SMLAL (Signed Multiply Accumulate Long) which multiplies +two signed 32-bit values to produce a 64-bit value, and accumulates this with +a 64-bit value. + +We currently get this with both v4 and v6: + +_foo: + smull r1, r0, r1, r0 + smull r3, r2, r3, r2 + adds r3, r3, r1 + adc r0, r2, r0 + bx lr + +//===---------------------------------------------------------------------===// + +This: + #include <algorithm> + std::pair<unsigned, bool> full_add(unsigned a, unsigned b) + { return std::make_pair(a + b, a + b < a); } + bool no_overflow(unsigned a, unsigned b) + { return !full_add(a, b).second; } + +Should compile to: + +_Z8full_addjj: + adds r2, r1, r2 + movcc r1, #0 + movcs r1, #1 + str r2, [r0, #0] + strb r1, [r0, #4] + mov pc, lr + +_Z11no_overflowjj: + cmn r0, r1 + movcs r0, #0 + movcc r0, #1 + mov pc, lr + +not: + +__Z8full_addjj: + add r3, r2, r1 + str r3, [r0] + mov r2, #1 + mov r12, #0 + cmp r3, r1 + movlo r12, r2 + str r12, [r0, #+4] + bx lr +__Z11no_overflowjj: + add r3, r1, r0 + mov r2, #1 + mov r1, #0 + cmp r3, r0 + movhs r1, r2 + mov r0, r1 + bx lr + +//===---------------------------------------------------------------------===// + +Some of the NEON intrinsics may be appropriate for more general use, either +as target-independent intrinsics or perhaps elsewhere in the ARM backend. +Some of them may also be lowered to target-independent SDNodes, and perhaps +some new SDNodes could be added. + +For example, maximum, minimum, and absolute value operations are well-defined +and standard operations, both for vector and scalar types. + +The current NEON-specific intrinsics for count leading zeros and count one +bits could perhaps be replaced by the target-independent ctlz and ctpop +intrinsics. It may also make sense to add a target-independent "ctls" +intrinsic for "count leading sign bits". Likewise, the backend could use +the target-independent SDNodes for these operations. + +ARMv6 has scalar saturating and halving adds and subtracts. The same +intrinsics could possibly be used for both NEON's vector implementations of +those operations and the ARMv6 scalar versions. + +//===---------------------------------------------------------------------===// + +Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting +LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g. +ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg) +while ARMConstantIslandPass only need to worry about LDR (literal). + +//===---------------------------------------------------------------------===// + +Constant island pass should make use of full range SoImm values for LEApcrel. +Be careful though as the last attempt caused infinite looping on lencod. + +//===---------------------------------------------------------------------===// + +Predication issue. This function: + +extern unsigned array[ 128 ]; +int foo( int x ) { + int y; + y = array[ x & 127 ]; + if ( x & 128 ) + y = 123456789 & ( y >> 2 ); + else + y = 123456789 & y; + return y; +} + +compiles to: + +_foo: + and r1, r0, #127 + ldr r2, LCPI1_0 + ldr r2, [r2] + ldr r1, [r2, +r1, lsl #2] + mov r2, r1, lsr #2 + tst r0, #128 + moveq r2, r1 + ldr r0, LCPI1_1 + and r0, r2, r0 + bx lr + +It would be better to do something like this, to fold the shift into the +conditional move: + + and r1, r0, #127 + ldr r2, LCPI1_0 + ldr r2, [r2] + ldr r1, [r2, +r1, lsl #2] + tst r0, #128 + movne r1, r1, lsr #2 + ldr r0, LCPI1_1 + and r0, r1, r0 + bx lr + +it saves an instruction and a register. + +//===---------------------------------------------------------------------===// + +It might be profitable to cse MOVi16 if there are lots of 32-bit immediates +with the same bottom half. + +//===---------------------------------------------------------------------===// + +Robert Muth started working on an alternate jump table implementation that +does not put the tables in-line in the text. This is more like the llvm +default jump table implementation. This might be useful sometime. Several +revisions of patches are on the mailing list, beginning at: +http://lists.llvm.org/pipermail/llvm-dev/2009-June/022763.html + +//===---------------------------------------------------------------------===// + +Make use of the "rbit" instruction. + +//===---------------------------------------------------------------------===// + +Take a look at test/CodeGen/Thumb2/machine-licm.ll. ARM should be taught how +to licm and cse the unnecessary load from cp#1. + +//===---------------------------------------------------------------------===// + +The CMN instruction sets the flags like an ADD instruction, while CMP sets +them like a subtract. Therefore to be able to use CMN for comparisons other +than the Z bit, we'll need additional logic to reverse the conditionals +associated with the comparison. Perhaps a pseudo-instruction for the comparison, +with a post-codegen pass to clean up and handle the condition codes? +See PR5694 for testcase. + +//===---------------------------------------------------------------------===// + +Given the following on armv5: +int test1(int A, int B) { + return (A&-8388481)|(B&8388480); +} + +We currently generate: + ldr r2, .LCPI0_0 + and r0, r0, r2 + ldr r2, .LCPI0_1 + and r1, r1, r2 + orr r0, r1, r0 + bx lr + +We should be able to replace the second ldr+and with a bic (i.e. reuse the +constant which was already loaded). Not sure what's necessary to do that. + +//===---------------------------------------------------------------------===// + +The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal: + +int a(int x) { return __builtin_bswap32(x); } + +a: + mov r1, #255, 24 + mov r2, #255, 16 + and r1, r1, r0, lsr #8 + and r2, r2, r0, lsl #8 + orr r1, r1, r0, lsr #24 + orr r0, r2, r0, lsl #24 + orr r0, r0, r1 + bx lr + +Something like the following would be better (fewer instructions/registers): + eor r1, r0, r0, ror #16 + bic r1, r1, #0xff0000 + mov r1, r1, lsr #8 + eor r0, r1, r0, ror #8 + bx lr + +A custom Thumb version would also be a slight improvement over the generic +version. + +//===---------------------------------------------------------------------===// + +Consider the following simple C code: + +void foo(unsigned char *a, unsigned char *b, int *c) { + if ((*a | *b) == 0) *c = 0; +} + +currently llvm-gcc generates something like this (nice branchless code I'd say): + + ldrb r0, [r0] + ldrb r1, [r1] + orr r0, r1, r0 + tst r0, #255 + moveq r0, #0 + streq r0, [r2] + bx lr + +Note that both "tst" and "moveq" are redundant. + +//===---------------------------------------------------------------------===// + +When loading immediate constants with movt/movw, if there are multiple +constants needed with the same low 16 bits, and those values are not live at +the same time, it would be possible to use a single movw instruction, followed +by multiple movt instructions to rewrite the high bits to different values. +For example: + + volatile store i32 -1, i32* inttoptr (i32 1342210076 to i32*), align 4, + !tbaa +!0 + volatile store i32 -1, i32* inttoptr (i32 1342341148 to i32*), align 4, + !tbaa +!0 + +is compiled and optimized to: + + movw r0, #32796 + mov.w r1, #-1 + movt r0, #20480 + str r1, [r0] + movw r0, #32796 @ <= this MOVW is not needed, value is there already + movt r0, #20482 + str r1, [r0] + +//===---------------------------------------------------------------------===// + +Improve codegen for select's: +if (x != 0) x = 1 +if (x == 1) x = 1 + +ARM codegen used to look like this: + mov r1, r0 + cmp r1, #1 + mov r0, #0 + moveq r0, #1 + +The naive lowering select between two different values. It should recognize the +test is equality test so it's more a conditional move rather than a select: + cmp r0, #1 + movne r0, #0 + +Currently this is a ARM specific dag combine. We probably should make it into a +target-neutral one. + +//===---------------------------------------------------------------------===// + +Optimize unnecessary checks for zero with __builtin_clz/ctz. Those builtins +are specified to be undefined at zero, so portable code must check for zero +and handle it as a special case. That is unnecessary on ARM where those +operations are implemented in a way that is well-defined for zero. For +example: + +int f(int x) { return x ? __builtin_clz(x) : sizeof(int)*8; } + +should just be implemented with a CLZ instruction. Since there are other +targets, e.g., PPC, that share this behavior, it would be best to implement +this in a target-independent way: we should probably fold that (when using +"undefined at zero" semantics) to set the "defined at zero" bit and have +the code generator expand out the right code. + +//===---------------------------------------------------------------------===// + +Clean up the test/MC/ARM files to have more robust register choices. + +R0 should not be used as a register operand in the assembler tests as it's then +not possible to distinguish between a correct encoding and a missing operand +encoding, as zero is the default value for the binary encoder. +e.g., + add r0, r0 // bad + add r3, r5 // good + +Register operands should be distinct. That is, when the encoding does not +require two syntactical operands to refer to the same register, two different +registers should be used in the test so as to catch errors where the +operands are swapped in the encoding. +e.g., + subs.w r1, r1, r1 // bad + subs.w r1, r2, r3 // good + diff --git a/contrib/libs/llvm12/lib/Target/ARM/TargetInfo/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/ARM/TargetInfo/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/ARM/TargetInfo/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/ARM/TargetInfo/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/ARM/TargetInfo/ya.make b/contrib/libs/llvm12/lib/Target/ARM/TargetInfo/ya.make index 260ad44190..089e7bf206 100644 --- a/contrib/libs/llvm12/lib/Target/ARM/TargetInfo/ya.make +++ b/contrib/libs/llvm12/lib/Target/ARM/TargetInfo/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/Support diff --git a/contrib/libs/llvm12/lib/Target/ARM/Utils/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/ARM/Utils/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/ARM/Utils/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/ARM/Utils/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/ARM/Utils/ya.make b/contrib/libs/llvm12/lib/Target/ARM/Utils/ya.make index 216fd023f6..7a980b708c 100644 --- a/contrib/libs/llvm12/lib/Target/ARM/Utils/ya.make +++ b/contrib/libs/llvm12/lib/Target/ARM/Utils/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/ARM/ya.make b/contrib/libs/llvm12/lib/Target/ARM/ya.make index 1fe4babbea..9551f9f11b 100644 --- a/contrib/libs/llvm12/lib/Target/ARM/ya.make +++ b/contrib/libs/llvm12/lib/Target/ARM/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/BPF/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/BPF/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/BPF/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/BPF/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/BPF/AsmParser/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/BPF/AsmParser/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/BPF/AsmParser/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/BPF/AsmParser/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/BPF/AsmParser/ya.make b/contrib/libs/llvm12/lib/Target/BPF/AsmParser/ya.make index adaba48b95..b61ac06cdd 100644 --- a/contrib/libs/llvm12/lib/Target/BPF/AsmParser/ya.make +++ b/contrib/libs/llvm12/lib/Target/BPF/AsmParser/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/BPF/Disassembler/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/BPF/Disassembler/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/BPF/Disassembler/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/BPF/Disassembler/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/BPF/Disassembler/ya.make b/contrib/libs/llvm12/lib/Target/BPF/Disassembler/ya.make index f31d0e8200..cb7872eeee 100644 --- a/contrib/libs/llvm12/lib/Target/BPF/Disassembler/ya.make +++ b/contrib/libs/llvm12/lib/Target/BPF/Disassembler/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/BPF/MCTargetDesc/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/BPF/MCTargetDesc/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/BPF/MCTargetDesc/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/BPF/MCTargetDesc/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/BPF/MCTargetDesc/ya.make b/contrib/libs/llvm12/lib/Target/BPF/MCTargetDesc/ya.make index b44f9daa1a..6522c7ef00 100644 --- a/contrib/libs/llvm12/lib/Target/BPF/MCTargetDesc/ya.make +++ b/contrib/libs/llvm12/lib/Target/BPF/MCTargetDesc/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/BPF/TargetInfo/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/BPF/TargetInfo/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/BPF/TargetInfo/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/BPF/TargetInfo/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/BPF/TargetInfo/ya.make b/contrib/libs/llvm12/lib/Target/BPF/TargetInfo/ya.make index 6a1c950d09..3a882dad3e 100644 --- a/contrib/libs/llvm12/lib/Target/BPF/TargetInfo/ya.make +++ b/contrib/libs/llvm12/lib/Target/BPF/TargetInfo/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/Support diff --git a/contrib/libs/llvm12/lib/Target/BPF/ya.make b/contrib/libs/llvm12/lib/Target/BPF/ya.make index 0a3900df45..0f122e4afe 100644 --- a/contrib/libs/llvm12/lib/Target/BPF/ya.make +++ b/contrib/libs/llvm12/lib/Target/BPF/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/NVPTX/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/NVPTX/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/ya.make b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/ya.make index 51150a2c8e..81ad30663e 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/ya.make +++ b/contrib/libs/llvm12/lib/Target/NVPTX/MCTargetDesc/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/ya.make b/contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/ya.make index c49c23bb18..52ef1e5f5b 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/ya.make +++ b/contrib/libs/llvm12/lib/Target/NVPTX/TargetInfo/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/Support diff --git a/contrib/libs/llvm12/lib/Target/NVPTX/ya.make b/contrib/libs/llvm12/lib/Target/NVPTX/ya.make index 7701b9ded4..4f7542eb65 100644 --- a/contrib/libs/llvm12/lib/Target/NVPTX/ya.make +++ b/contrib/libs/llvm12/lib/Target/NVPTX/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/PowerPC/.yandex_meta/licenses.list.txt index 3a4cf0af9f..2f43d3f272 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/PowerPC/.yandex_meta/licenses.list.txt @@ -1,16 +1,16 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https)//llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier) Apache-2.0 WITH LLVM-exception - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https)//llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier) Apache-2.0 WITH LLVM-exception + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make index 2388d58641..24183440dc 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make +++ b/contrib/libs/llvm12/lib/Target/PowerPC/AsmParser/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make index c43266cf40..a412740df2 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make +++ b/contrib/libs/llvm12/lib/Target/PowerPC/Disassembler/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/.yandex_meta/licenses.list.txt index b0b34714ca..ad3879fc45 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/.yandex_meta/licenses.list.txt @@ -1,303 +1,303 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================File: LICENSE.TXT==================== -============================================================================== -The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: -============================================================================== - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ----- LLVM Exceptions to the Apache 2.0 License ---- - -As an exception, if, as a result of your compiling your source code, portions -of this Software are embedded into an Object form of such source code, you -may redistribute such embedded portions in such Object form without complying -with the conditions of Sections 4(a), 4(b) and 4(d) of the License. - -In addition, if you combine or link compiled forms of this Software with -software that is licensed under the GPLv2 ("Combined Software") and if a -court of competent jurisdiction determines that the patent provision (Section -3), the indemnity provision (Section 9) or other Section of the License -conflicts with the conditions of the GPLv2, you may retroactively and -prospectively choose to deem waived or otherwise exclude such Section(s) of -the License, but only in their entirety and only with respect to the Combined -Software. - -============================================================================== -Software from third parties included in the LLVM Project: -============================================================================== -The LLVM Project contains third party software which is under different license -terms. All such code will be identified clearly using at least one of two -mechanisms: -1) It will be in a separate directory tree with its own `LICENSE.txt` or - `LICENSE` file at the top containing the specific license and restrictions - which apply to that software, or -2) It will contain specific license and restriction terms at the top of every - file. - -============================================================================== -Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): -============================================================================== -University of Illinois/NCSA -Open Source License - -Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - - - -====================File: include/llvm/Support/LICENSE.TXT==================== -LLVM System Interface Library -------------------------------------------------------------------------------- -The LLVM System Interface Library is licensed under the Illinois Open Source -License and has the following additional copyright: - -Copyright (C) 2004 eXtensible Systems, Inc. - - -====================NCSA==================== -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================File: LICENSE.TXT==================== +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +============================================================================== +Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + + + +====================File: include/llvm/Support/LICENSE.TXT==================== +LLVM System Interface Library +------------------------------------------------------------------------------- +The LLVM System Interface Library is licensed under the Illinois Open Source +License and has the following additional copyright: + +Copyright (C) 2004 eXtensible Systems, Inc. + + +====================NCSA==================== +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make index 0e037d61de..903dc6ec7f 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make +++ b/contrib/libs/llvm12/lib/Target/PowerPC/MCTargetDesc/ya.make @@ -2,18 +2,18 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE( - Apache-2.0 WITH LLVM-exception AND - NCSA -) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE( + Apache-2.0 WITH LLVM-exception AND + NCSA +) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/README.txt b/contrib/libs/llvm12/lib/Target/PowerPC/README.txt index 0902298a4f..492eb22af2 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/README.txt +++ b/contrib/libs/llvm12/lib/Target/PowerPC/README.txt @@ -1,607 +1,607 @@ -//===- README.txt - Notes for improving PowerPC-specific code gen ---------===// - -TODO: -* lmw/stmw pass a la arm load store optimizer for prolog/epilog - -===-------------------------------------------------------------------------=== - -This code: - -unsigned add32carry(unsigned sum, unsigned x) { - unsigned z = sum + x; - if (sum + x < x) - z++; - return z; -} - -Should compile to something like: - - addc r3,r3,r4 - addze r3,r3 - -instead we get: - - add r3, r4, r3 - cmplw cr7, r3, r4 - mfcr r4 ; 1 - rlwinm r4, r4, 29, 31, 31 - add r3, r3, r4 - -Ick. - -===-------------------------------------------------------------------------=== - -We compile the hottest inner loop of viterbi to: - - li r6, 0 - b LBB1_84 ;bb432.i -LBB1_83: ;bb420.i - lbzx r8, r5, r7 - addi r6, r7, 1 - stbx r8, r4, r7 -LBB1_84: ;bb432.i - mr r7, r6 - cmplwi cr0, r7, 143 - bne cr0, LBB1_83 ;bb420.i - -The CBE manages to produce: - - li r0, 143 - mtctr r0 -loop: - lbzx r2, r2, r11 - stbx r0, r2, r9 - addi r2, r2, 1 - bdz later - b loop - -This could be much better (bdnz instead of bdz) but it still beats us. If we -produced this with bdnz, the loop would be a single dispatch group. - -===-------------------------------------------------------------------------=== - -Lump the constant pool for each function into ONE pic object, and reference -pieces of it as offsets from the start. For functions like this (contrived -to have lots of constants obviously): - -double X(double Y) { return (Y*1.23 + 4.512)*2.34 + 14.38; } - -We generate: - -_X: - lis r2, ha16(.CPI_X_0) - lfd f0, lo16(.CPI_X_0)(r2) - lis r2, ha16(.CPI_X_1) - lfd f2, lo16(.CPI_X_1)(r2) - fmadd f0, f1, f0, f2 - lis r2, ha16(.CPI_X_2) - lfd f1, lo16(.CPI_X_2)(r2) - lis r2, ha16(.CPI_X_3) - lfd f2, lo16(.CPI_X_3)(r2) - fmadd f1, f0, f1, f2 +//===- README.txt - Notes for improving PowerPC-specific code gen ---------===// + +TODO: +* lmw/stmw pass a la arm load store optimizer for prolog/epilog + +===-------------------------------------------------------------------------=== + +This code: + +unsigned add32carry(unsigned sum, unsigned x) { + unsigned z = sum + x; + if (sum + x < x) + z++; + return z; +} + +Should compile to something like: + + addc r3,r3,r4 + addze r3,r3 + +instead we get: + + add r3, r4, r3 + cmplw cr7, r3, r4 + mfcr r4 ; 1 + rlwinm r4, r4, 29, 31, 31 + add r3, r3, r4 + +Ick. + +===-------------------------------------------------------------------------=== + +We compile the hottest inner loop of viterbi to: + + li r6, 0 + b LBB1_84 ;bb432.i +LBB1_83: ;bb420.i + lbzx r8, r5, r7 + addi r6, r7, 1 + stbx r8, r4, r7 +LBB1_84: ;bb432.i + mr r7, r6 + cmplwi cr0, r7, 143 + bne cr0, LBB1_83 ;bb420.i + +The CBE manages to produce: + + li r0, 143 + mtctr r0 +loop: + lbzx r2, r2, r11 + stbx r0, r2, r9 + addi r2, r2, 1 + bdz later + b loop + +This could be much better (bdnz instead of bdz) but it still beats us. If we +produced this with bdnz, the loop would be a single dispatch group. + +===-------------------------------------------------------------------------=== + +Lump the constant pool for each function into ONE pic object, and reference +pieces of it as offsets from the start. For functions like this (contrived +to have lots of constants obviously): + +double X(double Y) { return (Y*1.23 + 4.512)*2.34 + 14.38; } + +We generate: + +_X: + lis r2, ha16(.CPI_X_0) + lfd f0, lo16(.CPI_X_0)(r2) + lis r2, ha16(.CPI_X_1) + lfd f2, lo16(.CPI_X_1)(r2) + fmadd f0, f1, f0, f2 + lis r2, ha16(.CPI_X_2) + lfd f1, lo16(.CPI_X_2)(r2) + lis r2, ha16(.CPI_X_3) + lfd f2, lo16(.CPI_X_3)(r2) + fmadd f1, f0, f1, f2 + blr + +It would be better to materialize .CPI_X into a register, then use immediates +off of the register to avoid the lis's. This is even more important in PIC +mode. + +Note that this (and the static variable version) is discussed here for GCC: +http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html + +Here's another example (the sgn function): +double testf(double a) { + return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0); +} + +it produces a BB like this: +LBB1_1: ; cond_true + lis r2, ha16(LCPI1_0) + lfs f0, lo16(LCPI1_0)(r2) + lis r2, ha16(LCPI1_1) + lis r3, ha16(LCPI1_2) + lfs f2, lo16(LCPI1_2)(r3) + lfs f3, lo16(LCPI1_1)(r2) + fsub f0, f0, f1 + fsel f1, f0, f2, f3 blr - -It would be better to materialize .CPI_X into a register, then use immediates -off of the register to avoid the lis's. This is even more important in PIC -mode. - -Note that this (and the static variable version) is discussed here for GCC: -http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html - -Here's another example (the sgn function): -double testf(double a) { - return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0); -} - -it produces a BB like this: -LBB1_1: ; cond_true - lis r2, ha16(LCPI1_0) - lfs f0, lo16(LCPI1_0)(r2) - lis r2, ha16(LCPI1_1) - lis r3, ha16(LCPI1_2) - lfs f2, lo16(LCPI1_2)(r3) - lfs f3, lo16(LCPI1_1)(r2) - fsub f0, f0, f1 - fsel f1, f0, f2, f3 - blr - -===-------------------------------------------------------------------------=== - -PIC Code Gen IPO optimization: - -Squish small scalar globals together into a single global struct, allowing the -address of the struct to be CSE'd, avoiding PIC accesses (also reduces the size -of the GOT on targets with one). - -Note that this is discussed here for GCC: -http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html - -===-------------------------------------------------------------------------=== - -Fold add and sub with constant into non-extern, non-weak addresses so this: - -static int a; -void bar(int b) { a = b; } -void foo(unsigned char *c) { - *c = a; -} - -So that - -_foo: - lis r2, ha16(_a) - la r2, lo16(_a)(r2) - lbz r2, 3(r2) - stb r2, 0(r3) - blr - -Becomes - -_foo: - lis r2, ha16(_a+3) - lbz r2, lo16(_a+3)(r2) - stb r2, 0(r3) - blr - -===-------------------------------------------------------------------------=== - -We should compile these two functions to the same thing: - -#include <stdlib.h> -void f(int a, int b, int *P) { - *P = (a-b)>=0?(a-b):(b-a); -} -void g(int a, int b, int *P) { - *P = abs(a-b); -} - -Further, they should compile to something better than: - -_g: - subf r2, r4, r3 - subfic r3, r2, 0 - cmpwi cr0, r2, -1 - bgt cr0, LBB2_2 ; entry -LBB2_1: ; entry - mr r2, r3 -LBB2_2: ; entry - stw r2, 0(r5) - blr - -GCC produces: - -_g: - subf r4,r4,r3 - srawi r2,r4,31 - xor r0,r2,r4 - subf r0,r2,r0 - stw r0,0(r5) - blr - -... which is much nicer. - -This theoretically may help improve twolf slightly (used in dimbox.c:142?). - -===-------------------------------------------------------------------------=== - -PR5945: This: -define i32 @clamp0g(i32 %a) { -entry: - %cmp = icmp slt i32 %a, 0 - %sel = select i1 %cmp, i32 0, i32 %a - ret i32 %sel -} - -Is compile to this with the PowerPC (32-bit) backend: - -_clamp0g: - cmpwi cr0, r3, 0 - li r2, 0 - blt cr0, LBB1_2 -; %bb.1: ; %entry - mr r2, r3 -LBB1_2: ; %entry - mr r3, r2 - blr - -This could be reduced to the much simpler: - -_clamp0g: - srawi r2, r3, 31 - andc r3, r3, r2 - blr - -===-------------------------------------------------------------------------=== - -int foo(int N, int ***W, int **TK, int X) { - int t, i; - - for (t = 0; t < N; ++t) - for (i = 0; i < 4; ++i) - W[t / X][i][t % X] = TK[i][t]; - - return 5; -} - -We generate relatively atrocious code for this loop compared to gcc. - -We could also strength reduce the rem and the div: -http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf - -===-------------------------------------------------------------------------=== - -We generate ugly code for this: - -void func(unsigned int *ret, float dx, float dy, float dz, float dw) { - unsigned code = 0; - if(dx < -dw) code |= 1; - if(dx > dw) code |= 2; - if(dy < -dw) code |= 4; - if(dy > dw) code |= 8; - if(dz < -dw) code |= 16; - if(dz > dw) code |= 32; - *ret = code; -} - -===-------------------------------------------------------------------------=== - -%struct.B = type { i8, [3 x i8] } - -define void @bar(%struct.B* %b) { -entry: - %tmp = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1] - %tmp = load i32* %tmp ; <uint> [#uses=1] - %tmp3 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1] - %tmp4 = load i32* %tmp3 ; <uint> [#uses=1] - %tmp8 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=2] - %tmp9 = load i32* %tmp8 ; <uint> [#uses=1] - %tmp4.mask17 = shl i32 %tmp4, i8 1 ; <uint> [#uses=1] - %tmp1415 = and i32 %tmp4.mask17, 2147483648 ; <uint> [#uses=1] - %tmp.masked = and i32 %tmp, 2147483648 ; <uint> [#uses=1] - %tmp11 = or i32 %tmp1415, %tmp.masked ; <uint> [#uses=1] - %tmp12 = and i32 %tmp9, 2147483647 ; <uint> [#uses=1] - %tmp13 = or i32 %tmp12, %tmp11 ; <uint> [#uses=1] - store i32 %tmp13, i32* %tmp8 - ret void -} - -We emit: - -_foo: - lwz r2, 0(r3) - slwi r4, r2, 1 - or r4, r4, r2 - rlwimi r2, r4, 0, 0, 0 - stw r2, 0(r3) - blr - -We could collapse a bunch of those ORs and ANDs and generate the following -equivalent code: - -_foo: - lwz r2, 0(r3) - rlwinm r4, r2, 1, 0, 0 - or r2, r2, r4 - stw r2, 0(r3) - blr - -===-------------------------------------------------------------------------=== - -Consider a function like this: - -float foo(float X) { return X + 1234.4123f; } - -The FP constant ends up in the constant pool, so we need to get the LR register. - This ends up producing code like this: - -_foo: -.LBB_foo_0: ; entry - mflr r11 -*** stw r11, 8(r1) - bl "L00000$pb" -"L00000$pb": - mflr r2 - addis r2, r2, ha16(.CPI_foo_0-"L00000$pb") - lfs f0, lo16(.CPI_foo_0-"L00000$pb")(r2) - fadds f1, f1, f0 -*** lwz r11, 8(r1) - mtlr r11 - blr - -This is functional, but there is no reason to spill the LR register all the way -to the stack (the two marked instrs): spilling it to a GPR is quite enough. - -Implementing this will require some codegen improvements. Nate writes: - -"So basically what we need to support the "no stack frame save and restore" is a -generalization of the LR optimization to "callee-save regs". - -Currently, we have LR marked as a callee-save reg. The register allocator sees -that it's callee save, and spills it directly to the stack. - -Ideally, something like this would happen: - -LR would be in a separate register class from the GPRs. The class of LR would be -marked "unspillable". When the register allocator came across an unspillable -reg, it would ask "what is the best class to copy this into that I *can* spill" -If it gets a class back, which it will in this case (the gprs), it grabs a free -register of that class. If it is then later necessary to spill that reg, so be -it. - -===-------------------------------------------------------------------------=== - -We compile this: -int test(_Bool X) { - return X ? 524288 : 0; -} - -to: -_test: - cmplwi cr0, r3, 0 - lis r2, 8 - li r3, 0 - beq cr0, LBB1_2 ;entry -LBB1_1: ;entry - mr r3, r2 -LBB1_2: ;entry - blr - -instead of: -_test: - addic r2,r3,-1 - subfe r0,r2,r3 - slwi r3,r0,19 - blr - -This sort of thing occurs a lot due to globalopt. - -===-------------------------------------------------------------------------=== - -We compile: - -define i32 @bar(i32 %x) nounwind readnone ssp { -entry: - %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1] - %neg = sext i1 %0 to i32 ; <i32> [#uses=1] - ret i32 %neg -} - + +===-------------------------------------------------------------------------=== + +PIC Code Gen IPO optimization: + +Squish small scalar globals together into a single global struct, allowing the +address of the struct to be CSE'd, avoiding PIC accesses (also reduces the size +of the GOT on targets with one). + +Note that this is discussed here for GCC: +http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html + +===-------------------------------------------------------------------------=== + +Fold add and sub with constant into non-extern, non-weak addresses so this: + +static int a; +void bar(int b) { a = b; } +void foo(unsigned char *c) { + *c = a; +} + +So that + +_foo: + lis r2, ha16(_a) + la r2, lo16(_a)(r2) + lbz r2, 3(r2) + stb r2, 0(r3) + blr + +Becomes + +_foo: + lis r2, ha16(_a+3) + lbz r2, lo16(_a+3)(r2) + stb r2, 0(r3) + blr + +===-------------------------------------------------------------------------=== + +We should compile these two functions to the same thing: + +#include <stdlib.h> +void f(int a, int b, int *P) { + *P = (a-b)>=0?(a-b):(b-a); +} +void g(int a, int b, int *P) { + *P = abs(a-b); +} + +Further, they should compile to something better than: + +_g: + subf r2, r4, r3 + subfic r3, r2, 0 + cmpwi cr0, r2, -1 + bgt cr0, LBB2_2 ; entry +LBB2_1: ; entry + mr r2, r3 +LBB2_2: ; entry + stw r2, 0(r5) + blr + +GCC produces: + +_g: + subf r4,r4,r3 + srawi r2,r4,31 + xor r0,r2,r4 + subf r0,r2,r0 + stw r0,0(r5) + blr + +... which is much nicer. + +This theoretically may help improve twolf slightly (used in dimbox.c:142?). + +===-------------------------------------------------------------------------=== + +PR5945: This: +define i32 @clamp0g(i32 %a) { +entry: + %cmp = icmp slt i32 %a, 0 + %sel = select i1 %cmp, i32 0, i32 %a + ret i32 %sel +} + +Is compile to this with the PowerPC (32-bit) backend: + +_clamp0g: + cmpwi cr0, r3, 0 + li r2, 0 + blt cr0, LBB1_2 +; %bb.1: ; %entry + mr r2, r3 +LBB1_2: ; %entry + mr r3, r2 + blr + +This could be reduced to the much simpler: + +_clamp0g: + srawi r2, r3, 31 + andc r3, r3, r2 + blr + +===-------------------------------------------------------------------------=== + +int foo(int N, int ***W, int **TK, int X) { + int t, i; + + for (t = 0; t < N; ++t) + for (i = 0; i < 4; ++i) + W[t / X][i][t % X] = TK[i][t]; + + return 5; +} + +We generate relatively atrocious code for this loop compared to gcc. + +We could also strength reduce the rem and the div: +http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf + +===-------------------------------------------------------------------------=== + +We generate ugly code for this: + +void func(unsigned int *ret, float dx, float dy, float dz, float dw) { + unsigned code = 0; + if(dx < -dw) code |= 1; + if(dx > dw) code |= 2; + if(dy < -dw) code |= 4; + if(dy > dw) code |= 8; + if(dz < -dw) code |= 16; + if(dz > dw) code |= 32; + *ret = code; +} + +===-------------------------------------------------------------------------=== + +%struct.B = type { i8, [3 x i8] } + +define void @bar(%struct.B* %b) { +entry: + %tmp = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1] + %tmp = load i32* %tmp ; <uint> [#uses=1] + %tmp3 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1] + %tmp4 = load i32* %tmp3 ; <uint> [#uses=1] + %tmp8 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=2] + %tmp9 = load i32* %tmp8 ; <uint> [#uses=1] + %tmp4.mask17 = shl i32 %tmp4, i8 1 ; <uint> [#uses=1] + %tmp1415 = and i32 %tmp4.mask17, 2147483648 ; <uint> [#uses=1] + %tmp.masked = and i32 %tmp, 2147483648 ; <uint> [#uses=1] + %tmp11 = or i32 %tmp1415, %tmp.masked ; <uint> [#uses=1] + %tmp12 = and i32 %tmp9, 2147483647 ; <uint> [#uses=1] + %tmp13 = or i32 %tmp12, %tmp11 ; <uint> [#uses=1] + store i32 %tmp13, i32* %tmp8 + ret void +} + +We emit: + +_foo: + lwz r2, 0(r3) + slwi r4, r2, 1 + or r4, r4, r2 + rlwimi r2, r4, 0, 0, 0 + stw r2, 0(r3) + blr + +We could collapse a bunch of those ORs and ANDs and generate the following +equivalent code: + +_foo: + lwz r2, 0(r3) + rlwinm r4, r2, 1, 0, 0 + or r2, r2, r4 + stw r2, 0(r3) + blr + +===-------------------------------------------------------------------------=== + +Consider a function like this: + +float foo(float X) { return X + 1234.4123f; } + +The FP constant ends up in the constant pool, so we need to get the LR register. + This ends up producing code like this: + +_foo: +.LBB_foo_0: ; entry + mflr r11 +*** stw r11, 8(r1) + bl "L00000$pb" +"L00000$pb": + mflr r2 + addis r2, r2, ha16(.CPI_foo_0-"L00000$pb") + lfs f0, lo16(.CPI_foo_0-"L00000$pb")(r2) + fadds f1, f1, f0 +*** lwz r11, 8(r1) + mtlr r11 + blr + +This is functional, but there is no reason to spill the LR register all the way +to the stack (the two marked instrs): spilling it to a GPR is quite enough. + +Implementing this will require some codegen improvements. Nate writes: + +"So basically what we need to support the "no stack frame save and restore" is a +generalization of the LR optimization to "callee-save regs". + +Currently, we have LR marked as a callee-save reg. The register allocator sees +that it's callee save, and spills it directly to the stack. + +Ideally, something like this would happen: + +LR would be in a separate register class from the GPRs. The class of LR would be +marked "unspillable". When the register allocator came across an unspillable +reg, it would ask "what is the best class to copy this into that I *can* spill" +If it gets a class back, which it will in this case (the gprs), it grabs a free +register of that class. If it is then later necessary to spill that reg, so be +it. + +===-------------------------------------------------------------------------=== + +We compile this: +int test(_Bool X) { + return X ? 524288 : 0; +} + to: - -_bar: - cntlzw r2, r3 - slwi r2, r2, 26 - srawi r3, r2, 31 - blr - -it would be better to produce: - -_bar: - addic r3,r3,-1 - subfe r3,r3,r3 +_test: + cmplwi cr0, r3, 0 + lis r2, 8 + li r3, 0 + beq cr0, LBB1_2 ;entry +LBB1_1: ;entry + mr r3, r2 +LBB1_2: ;entry blr - -===-------------------------------------------------------------------------=== - -We generate horrible ppc code for this: - -#define N 2000000 -double a[N],c[N]; -void simpleloop() { - int j; - for (j=0; j<N; j++) - c[j] = a[j]; -} - -LBB1_1: ;bb - lfdx f0, r3, r4 - addi r5, r5, 1 ;; Extra IV for the exit value compare. - stfdx f0, r2, r4 - addi r4, r4, 8 - - xoris r6, r5, 30 ;; This is due to a large immediate. - cmplwi cr0, r6, 33920 - bne cr0, LBB1_1 - -//===---------------------------------------------------------------------===// - -This: - #include <algorithm> - inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b) - { return std::make_pair(a + b, a + b < a); } - bool no_overflow(unsigned a, unsigned b) - { return !full_add(a, b).second; } - -Should compile to: - -__Z11no_overflowjj: - add r4,r3,r4 - subfc r3,r3,r4 - li r3,0 - adde r3,r3,r3 + +instead of: +_test: + addic r2,r3,-1 + subfe r0,r2,r3 + slwi r3,r0,19 + blr + +This sort of thing occurs a lot due to globalopt. + +===-------------------------------------------------------------------------=== + +We compile: + +define i32 @bar(i32 %x) nounwind readnone ssp { +entry: + %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1] + %neg = sext i1 %0 to i32 ; <i32> [#uses=1] + ret i32 %neg +} + +to: + +_bar: + cntlzw r2, r3 + slwi r2, r2, 26 + srawi r3, r2, 31 + blr + +it would be better to produce: + +_bar: + addic r3,r3,-1 + subfe r3,r3,r3 + blr + +===-------------------------------------------------------------------------=== + +We generate horrible ppc code for this: + +#define N 2000000 +double a[N],c[N]; +void simpleloop() { + int j; + for (j=0; j<N; j++) + c[j] = a[j]; +} + +LBB1_1: ;bb + lfdx f0, r3, r4 + addi r5, r5, 1 ;; Extra IV for the exit value compare. + stfdx f0, r2, r4 + addi r4, r4, 8 + + xoris r6, r5, 30 ;; This is due to a large immediate. + cmplwi cr0, r6, 33920 + bne cr0, LBB1_1 + +//===---------------------------------------------------------------------===// + +This: + #include <algorithm> + inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b) + { return std::make_pair(a + b, a + b < a); } + bool no_overflow(unsigned a, unsigned b) + { return !full_add(a, b).second; } + +Should compile to: + +__Z11no_overflowjj: + add r4,r3,r4 + subfc r3,r3,r4 + li r3,0 + adde r3,r3,r3 + blr + +(or better) not: + +__Z11no_overflowjj: + add r2, r4, r3 + cmplw cr7, r2, r3 + mfcr r2 + rlwinm r2, r2, 29, 31, 31 + xori r3, r2, 1 blr - -(or better) not: - -__Z11no_overflowjj: - add r2, r4, r3 - cmplw cr7, r2, r3 - mfcr r2 - rlwinm r2, r2, 29, 31, 31 - xori r3, r2, 1 - blr - -//===---------------------------------------------------------------------===// - -We compile some FP comparisons into an mfcr with two rlwinms and an or. For -example: -#include <math.h> -int test(double x, double y) { return islessequal(x, y);} -int test2(double x, double y) { return islessgreater(x, y);} -int test3(double x, double y) { return !islessequal(x, y);} - -Compiles into (all three are similar, but the bits differ): - -_test: - fcmpu cr7, f1, f2 - mfcr r2 - rlwinm r3, r2, 29, 31, 31 - rlwinm r2, r2, 31, 31, 31 - or r3, r2, r3 - blr - -GCC compiles this into: - - _test: - fcmpu cr7,f1,f2 - cror 30,28,30 - mfcr r3 - rlwinm r3,r3,31,1 + +//===---------------------------------------------------------------------===// + +We compile some FP comparisons into an mfcr with two rlwinms and an or. For +example: +#include <math.h> +int test(double x, double y) { return islessequal(x, y);} +int test2(double x, double y) { return islessgreater(x, y);} +int test3(double x, double y) { return !islessequal(x, y);} + +Compiles into (all three are similar, but the bits differ): + +_test: + fcmpu cr7, f1, f2 + mfcr r2 + rlwinm r3, r2, 29, 31, 31 + rlwinm r2, r2, 31, 31, 31 + or r3, r2, r3 + blr + +GCC compiles this into: + + _test: + fcmpu cr7,f1,f2 + cror 30,28,30 + mfcr r3 + rlwinm r3,r3,31,1 + blr + +which is more efficient and can use mfocr. See PR642 for some more context. + +//===---------------------------------------------------------------------===// + +void foo(float *data, float d) { + long i; + for (i = 0; i < 8000; i++) + data[i] = d; +} +void foo2(float *data, float d) { + long i; + data--; + for (i = 0; i < 8000; i++) { + data[1] = d; + data++; + } +} + +These compile to: + +_foo: + li r2, 0 +LBB1_1: ; bb + addi r4, r2, 4 + stfsx f1, r3, r2 + cmplwi cr0, r4, 32000 + mr r2, r4 + bne cr0, LBB1_1 ; bb + blr +_foo2: + li r2, 0 +LBB2_1: ; bb + addi r4, r2, 4 + stfsx f1, r3, r2 + cmplwi cr0, r4, 32000 + mr r2, r4 + bne cr0, LBB2_1 ; bb blr - -which is more efficient and can use mfocr. See PR642 for some more context. - -//===---------------------------------------------------------------------===// - -void foo(float *data, float d) { - long i; - for (i = 0; i < 8000; i++) - data[i] = d; -} -void foo2(float *data, float d) { - long i; - data--; - for (i = 0; i < 8000; i++) { - data[1] = d; - data++; - } -} - -These compile to: - -_foo: - li r2, 0 -LBB1_1: ; bb - addi r4, r2, 4 - stfsx f1, r3, r2 - cmplwi cr0, r4, 32000 - mr r2, r4 - bne cr0, LBB1_1 ; bb - blr -_foo2: - li r2, 0 -LBB2_1: ; bb - addi r4, r2, 4 - stfsx f1, r3, r2 - cmplwi cr0, r4, 32000 - mr r2, r4 - bne cr0, LBB2_1 ; bb - blr - -The 'mr' could be eliminated to folding the add into the cmp better. - -//===---------------------------------------------------------------------===// -Codegen for the following (low-probability) case deteriorated considerably -when the correctness fixes for unordered comparisons went in (PR 642, 58871). -It should be possible to recover the code quality described in the comments. - -; RUN: llvm-as < %s | llc -march=ppc32 | grep or | count 3 -; This should produce one 'or' or 'cror' instruction per function. - -; RUN: llvm-as < %s | llc -march=ppc32 | grep mfcr | count 3 -; PR2964 - -define i32 @test(double %x, double %y) nounwind { -entry: - %tmp3 = fcmp ole double %x, %y ; <i1> [#uses=1] - %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] - ret i32 %tmp345 -} - -define i32 @test2(double %x, double %y) nounwind { -entry: - %tmp3 = fcmp one double %x, %y ; <i1> [#uses=1] - %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] - ret i32 %tmp345 -} - -define i32 @test3(double %x, double %y) nounwind { -entry: - %tmp3 = fcmp ugt double %x, %y ; <i1> [#uses=1] - %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] - ret i32 %tmp34 -} - -//===---------------------------------------------------------------------===// -for the following code: - -void foo (float *__restrict__ a, int *__restrict__ b, int n) { - a[n] = b[n] * 2.321; -} - -we load b[n] to GPR, then move it VSX register and convert it float. We should -use vsx scalar integer load instructions to avoid direct moves - -//===----------------------------------------------------------------------===// -; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg - -; This could generate FSEL with appropriate flags (FSEL is not IEEE-safe, and -; should not be generated except with -enable-finite-only-fp-math or the like). -; With the correctness fixes for PR642 (58871) LowerSELECT_CC would need to -; recognize a more elaborate tree than a simple SETxx. - -define double @test_FNEG_sel(double %A, double %B, double %C) { - %D = fsub double -0.000000e+00, %A ; <double> [#uses=1] - %Cond = fcmp ugt double %D, -0.000000e+00 ; <i1> [#uses=1] - %E = select i1 %Cond, double %B, double %C ; <double> [#uses=1] - ret double %E -} - -//===----------------------------------------------------------------------===// -The save/restore sequence for CR in prolog/epilog is terrible: -- Each CR subreg is saved individually, rather than doing one save as a unit. -- On Darwin, the save is done after the decrement of SP, which means the offset -from SP of the save slot can be too big for a store instruction, which means we -need an additional register (currently hacked in 96015+96020; the solution there -is correct, but poor). -- On SVR4 the same thing can happen, and I don't think saving before the SP -decrement is safe on that target, as there is no red zone. This is currently -broken AFAIK, although it's not a target I can exercise. -The following demonstrates the problem: -extern void bar(char *p); -void foo() { - char x[100000]; - bar(x); - __asm__("" ::: "cr2"); -} - -//===-------------------------------------------------------------------------=== -Naming convention for instruction formats is very haphazard. -We have agreed on a naming scheme as follows: - -<INST_form>{_<OP_type><OP_len>}+ - -Where: -INST_form is the instruction format (X-form, etc.) -OP_type is the operand type - one of OPC (opcode), RD (register destination), - RS (register source), - RDp (destination register pair), - RSp (source register pair), IM (immediate), - XO (extended opcode) -OP_len is the length of the operand in bits - -VSX register operands would be of length 6 (split across two fields), -condition register fields of length 3. -We would not need denote reserved fields in names of instruction formats. - -//===----------------------------------------------------------------------===// - -Instruction fusion was introduced in ISA 2.06 and more opportunities added in -ISA 2.07. LLVM needs to add infrastructure to recognize fusion opportunities -and force instruction pairs to be scheduled together. - ------------------------------------------------------------------------------ - -More general handling of any_extend and zero_extend: - -See https://reviews.llvm.org/D24924#555306 + +The 'mr' could be eliminated to folding the add into the cmp better. + +//===---------------------------------------------------------------------===// +Codegen for the following (low-probability) case deteriorated considerably +when the correctness fixes for unordered comparisons went in (PR 642, 58871). +It should be possible to recover the code quality described in the comments. + +; RUN: llvm-as < %s | llc -march=ppc32 | grep or | count 3 +; This should produce one 'or' or 'cror' instruction per function. + +; RUN: llvm-as < %s | llc -march=ppc32 | grep mfcr | count 3 +; PR2964 + +define i32 @test(double %x, double %y) nounwind { +entry: + %tmp3 = fcmp ole double %x, %y ; <i1> [#uses=1] + %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] + ret i32 %tmp345 +} + +define i32 @test2(double %x, double %y) nounwind { +entry: + %tmp3 = fcmp one double %x, %y ; <i1> [#uses=1] + %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] + ret i32 %tmp345 +} + +define i32 @test3(double %x, double %y) nounwind { +entry: + %tmp3 = fcmp ugt double %x, %y ; <i1> [#uses=1] + %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] + ret i32 %tmp34 +} + +//===---------------------------------------------------------------------===// +for the following code: + +void foo (float *__restrict__ a, int *__restrict__ b, int n) { + a[n] = b[n] * 2.321; +} + +we load b[n] to GPR, then move it VSX register and convert it float. We should +use vsx scalar integer load instructions to avoid direct moves + +//===----------------------------------------------------------------------===// +; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg + +; This could generate FSEL with appropriate flags (FSEL is not IEEE-safe, and +; should not be generated except with -enable-finite-only-fp-math or the like). +; With the correctness fixes for PR642 (58871) LowerSELECT_CC would need to +; recognize a more elaborate tree than a simple SETxx. + +define double @test_FNEG_sel(double %A, double %B, double %C) { + %D = fsub double -0.000000e+00, %A ; <double> [#uses=1] + %Cond = fcmp ugt double %D, -0.000000e+00 ; <i1> [#uses=1] + %E = select i1 %Cond, double %B, double %C ; <double> [#uses=1] + ret double %E +} + +//===----------------------------------------------------------------------===// +The save/restore sequence for CR in prolog/epilog is terrible: +- Each CR subreg is saved individually, rather than doing one save as a unit. +- On Darwin, the save is done after the decrement of SP, which means the offset +from SP of the save slot can be too big for a store instruction, which means we +need an additional register (currently hacked in 96015+96020; the solution there +is correct, but poor). +- On SVR4 the same thing can happen, and I don't think saving before the SP +decrement is safe on that target, as there is no red zone. This is currently +broken AFAIK, although it's not a target I can exercise. +The following demonstrates the problem: +extern void bar(char *p); +void foo() { + char x[100000]; + bar(x); + __asm__("" ::: "cr2"); +} + +//===-------------------------------------------------------------------------=== +Naming convention for instruction formats is very haphazard. +We have agreed on a naming scheme as follows: + +<INST_form>{_<OP_type><OP_len>}+ + +Where: +INST_form is the instruction format (X-form, etc.) +OP_type is the operand type - one of OPC (opcode), RD (register destination), + RS (register source), + RDp (destination register pair), + RSp (source register pair), IM (immediate), + XO (extended opcode) +OP_len is the length of the operand in bits + +VSX register operands would be of length 6 (split across two fields), +condition register fields of length 3. +We would not need denote reserved fields in names of instruction formats. + +//===----------------------------------------------------------------------===// + +Instruction fusion was introduced in ISA 2.06 and more opportunities added in +ISA 2.07. LLVM needs to add infrastructure to recognize fusion opportunities +and force instruction pairs to be scheduled together. + +----------------------------------------------------------------------------- + +More general handling of any_extend and zero_extend: + +See https://reviews.llvm.org/D24924#555306 diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/README_ALTIVEC.txt b/contrib/libs/llvm12/lib/Target/PowerPC/README_ALTIVEC.txt index 47d18ecfca..6d32e76ed8 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/README_ALTIVEC.txt +++ b/contrib/libs/llvm12/lib/Target/PowerPC/README_ALTIVEC.txt @@ -1,338 +1,338 @@ -//===- README_ALTIVEC.txt - Notes for improving Altivec code gen ----------===// - -Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector -registers, to generate better spill code. - -//===----------------------------------------------------------------------===// - -The first should be a single lvx from the constant pool, the second should be -a xor/stvx: - -void foo(void) { - int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 17, 1, 1, 1, 1 }; - bar (x); -} - -#include <string.h> -void foo(void) { - int x[8] __attribute__((aligned(128))); - memset (x, 0, sizeof (x)); - bar (x); -} - -//===----------------------------------------------------------------------===// - -Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763 - -When -ffast-math is on, we can use 0.0. - -//===----------------------------------------------------------------------===// - - Consider this: - v4f32 Vector; - v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X }; - -Since we know that "Vector" is 16-byte aligned and we know the element offset -of ".X", we should change the load into a lve*x instruction, instead of doing -a load/store/lve*x sequence. - -//===----------------------------------------------------------------------===// - -Implement passing vectors by value into calls and receiving them as arguments. - -//===----------------------------------------------------------------------===// - -GCC apparently tries to codegen { C1, C2, Variable, C3 } as a constant pool load -of C1/C2/C3, then a load and vperm of Variable. - -//===----------------------------------------------------------------------===// - -We need a way to teach tblgen that some operands of an intrinsic are required to -be constants. The verifier should enforce this constraint. - -//===----------------------------------------------------------------------===// - -We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a 16-byte -aligned stack slot, followed by a load/vperm. We should probably just store it -to a scalar stack slot, then use lvsl/vperm to load it. If the value is already -in memory this is a big win. - -//===----------------------------------------------------------------------===// - -extract_vector_elt of an arbitrary constant vector can be done with the -following instructions: - -vTemp = vec_splat(v0,2); // 2 is the element the src is in. -vec_ste(&destloc,0,vTemp); - -We can do an arbitrary non-constant value by using lvsr/perm/ste. - -//===----------------------------------------------------------------------===// - -If we want to tie instruction selection into the scheduler, we can do some -constant formation with different instructions. For example, we can generate -"vsplti -1" with "vcmpequw R,R" and 1,1,1,1 with "vsubcuw R,R", and 0,0,0,0 with -"vsplti 0" or "vxor", each of which use different execution units, thus could -help scheduling. - -This is probably only reasonable for a post-pass scheduler. - -//===----------------------------------------------------------------------===// - -For this function: - -void test(vector float *A, vector float *B) { - vector float C = (vector float)vec_cmpeq(*A, *B); - if (!vec_any_eq(*A, *B)) - *B = (vector float){0,0,0,0}; - *A = C; -} - -we get the following basic block: - - ... - lvx v2, 0, r4 - lvx v3, 0, r3 - vcmpeqfp v4, v3, v2 - vcmpeqfp. v2, v3, v2 - bne cr6, LBB1_2 ; cond_next - -The vcmpeqfp/vcmpeqfp. instructions currently cannot be merged when the -vcmpeqfp. result is used by a branch. This can be improved. - -//===----------------------------------------------------------------------===// - -The code generated for this is truly aweful: - -vector float test(float a, float b) { - return (vector float){ 0.0, a, 0.0, 0.0}; -} - -LCPI1_0: ; float - .space 4 - .text - .globl _test - .align 4 -_test: - mfspr r2, 256 - oris r3, r2, 4096 - mtspr 256, r3 - lis r3, ha16(LCPI1_0) - addi r4, r1, -32 - stfs f1, -16(r1) - addi r5, r1, -16 - lfs f0, lo16(LCPI1_0)(r3) - stfs f0, -32(r1) - lvx v2, 0, r4 - lvx v3, 0, r5 - vmrghw v3, v3, v2 - vspltw v2, v2, 0 - vmrghw v2, v2, v3 - mtspr 256, r2 - blr - -//===----------------------------------------------------------------------===// - -int foo(vector float *x, vector float *y) { - if (vec_all_eq(*x,*y)) return 3245; - else return 12; -} - -A predicate compare being used in a select_cc should have the same peephole -applied to it as a predicate compare used by a br_cc. There should be no -mfcr here: - -_foo: - mfspr r2, 256 - oris r5, r2, 12288 - mtspr 256, r5 - li r5, 12 - li r6, 3245 - lvx v2, 0, r4 - lvx v3, 0, r3 - vcmpeqfp. v2, v3, v2 - mfcr r3, 2 - rlwinm r3, r3, 25, 31, 31 - cmpwi cr0, r3, 0 - bne cr0, LBB1_2 ; entry -LBB1_1: ; entry - mr r6, r5 -LBB1_2: ; entry - mr r3, r6 - mtspr 256, r2 - blr - -//===----------------------------------------------------------------------===// - -CodeGen/PowerPC/vec_constants.ll has an and operation that should be -codegen'd to andc. The issue is that the 'all ones' build vector is -SelectNodeTo'd a VSPLTISB instruction node before the and/xor is selected -which prevents the vnot pattern from matching. - - -//===----------------------------------------------------------------------===// - -An alternative to the store/store/load approach for illegal insert element -lowering would be: - -1. store element to any ol' slot -2. lvx the slot -3. lvsl 0; splat index; vcmpeq to generate a select mask -4. lvsl slot + x; vperm to rotate result into correct slot -5. vsel result together. - -//===----------------------------------------------------------------------===// - -Should codegen branches on vec_any/vec_all to avoid mfcr. Two examples: - -#include <altivec.h> - int f(vector float a, vector float b) - { - int aa = 0; - if (vec_all_ge(a, b)) - aa |= 0x1; - if (vec_any_ge(a,b)) - aa |= 0x2; - return aa; -} - -vector float f(vector float a, vector float b) { - if (vec_any_eq(a, b)) - return a; - else - return b; -} - -//===----------------------------------------------------------------------===// - -We should do a little better with eliminating dead stores. -The stores to the stack are dead since %a and %b are not needed - -; Function Attrs: nounwind -define <16 x i8> @test_vpmsumb() #0 { - entry: - %a = alloca <16 x i8>, align 16 - %b = alloca <16 x i8>, align 16 - store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16 - store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16 - %0 = load <16 x i8>* %a, align 16 - %1 = load <16 x i8>* %b, align 16 - %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1) - ret <16 x i8> %2 -} - - -; Function Attrs: nounwind readnone -declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1 - - -Produces the following code with -mtriple=powerpc64-unknown-linux-gnu: -# %bb.0: # %entry - addis 3, 2, .LCPI0_0@toc@ha - addis 4, 2, .LCPI0_1@toc@ha - addi 3, 3, .LCPI0_0@toc@l - addi 4, 4, .LCPI0_1@toc@l - lxvw4x 0, 0, 3 - addi 3, 1, -16 - lxvw4x 35, 0, 4 - stxvw4x 0, 0, 3 - ori 2, 2, 0 - lxvw4x 34, 0, 3 - addi 3, 1, -32 - stxvw4x 35, 0, 3 - vpmsumb 2, 2, 3 - blr - .long 0 - .quad 0 - -The two stxvw4x instructions are not needed. -With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes -are present too. - -//===----------------------------------------------------------------------===// - -The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll: - -define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind { - %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0 - %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1 - %result = add <2 x i64> %x, %tmpvec2 - ret <2 x i64> %result - -This will generate the following instruction sequence: - std 5, -8(1) - std 5, -16(1) - addi 3, 1, -16 - ori 2, 2, 0 - lxvd2x 35, 0, 3 - vaddudm 2, 2, 3 - blr - -This will almost certainly cause a load-hit-store hazard. -Since val is a value parameter, it should not need to be saved onto -the stack, unless it's being done set up the vector register. Instead, -it would be better to splat the value into a vector register, and then -remove the (dead) stores to the stack. - -//===----------------------------------------------------------------------===// - -At the moment we always generate a lxsdx in preference to lfd, or stxsdx in -preference to stfd. When we have a reg-immediate addressing mode, this is a -poor choice, since we have to load the address into an index register. This -should be fixed for P7/P8. - -//===----------------------------------------------------------------------===// - -Right now, ShuffleKind 0 is supported only on BE, and ShuffleKind 2 only on LE. -However, we could actually support both kinds on either endianness, if we check -for the appropriate shufflevector pattern for each case ... this would cause -some additional shufflevectors to be recognized and implemented via the -"swapped" form. - -//===----------------------------------------------------------------------===// - -There is a utility program called PerfectShuffle that generates a table of the -shortest instruction sequence for implementing a shufflevector operation on -PowerPC. However, this was designed for big-endian code generation. We could -modify this program to create a little endian version of the table. The table -is used in PPCISelLowering.cpp, PPCTargetLowering::LOWERVECTOR_SHUFFLE(). - -//===----------------------------------------------------------------------===// - -Opportunies to use instructions from PPCInstrVSX.td during code gen - - Conversion instructions (Sections 7.6.1.5 and 7.6.1.6 of ISA 2.07) - - Scalar comparisons (xscmpodp and xscmpudp) - - Min and max (xsmaxdp, xsmindp, xvmaxdp, xvmindp, xvmaxsp, xvminsp) - -Related to this: we currently do not generate the lxvw4x instruction for either -v4f32 or v4i32, probably because adding a dag pattern to the recognizer requires -a single target type. This should probably be addressed in the PPCISelDAGToDAG logic. - -//===----------------------------------------------------------------------===// - -Currently EXTRACT_VECTOR_ELT and INSERT_VECTOR_ELT are type-legal only -for v2f64 with VSX available. We should create custom lowering -support for the other vector types. Without this support, we generate -sequences with load-hit-store hazards. - -v4f32 can be supported with VSX by shifting the correct element into -big-endian lane 0, using xscvspdpn to produce a double-precision -representation of the single-precision value in big-endian -double-precision lane 0, and reinterpreting lane 0 as an FPR or -vector-scalar register. - -v2i64 can be supported with VSX and P8Vector in the same manner as -v2f64, followed by a direct move to a GPR. - -v4i32 can be supported with VSX and P8Vector by shifting the correct -element into big-endian lane 1, using a direct move to a GPR, and -sign-extending the 32-bit result to 64 bits. - -v8i16 can be supported with VSX and P8Vector by shifting the correct -element into big-endian lane 3, using a direct move to a GPR, and -sign-extending the 16-bit result to 64 bits. - -v16i8 can be supported with VSX and P8Vector by shifting the correct -element into big-endian lane 7, using a direct move to a GPR, and -sign-extending the 8-bit result to 64 bits. +//===- README_ALTIVEC.txt - Notes for improving Altivec code gen ----------===// + +Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector +registers, to generate better spill code. + +//===----------------------------------------------------------------------===// + +The first should be a single lvx from the constant pool, the second should be +a xor/stvx: + +void foo(void) { + int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 17, 1, 1, 1, 1 }; + bar (x); +} + +#include <string.h> +void foo(void) { + int x[8] __attribute__((aligned(128))); + memset (x, 0, sizeof (x)); + bar (x); +} + +//===----------------------------------------------------------------------===// + +Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763 + +When -ffast-math is on, we can use 0.0. + +//===----------------------------------------------------------------------===// + + Consider this: + v4f32 Vector; + v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X }; + +Since we know that "Vector" is 16-byte aligned and we know the element offset +of ".X", we should change the load into a lve*x instruction, instead of doing +a load/store/lve*x sequence. + +//===----------------------------------------------------------------------===// + +Implement passing vectors by value into calls and receiving them as arguments. + +//===----------------------------------------------------------------------===// + +GCC apparently tries to codegen { C1, C2, Variable, C3 } as a constant pool load +of C1/C2/C3, then a load and vperm of Variable. + +//===----------------------------------------------------------------------===// + +We need a way to teach tblgen that some operands of an intrinsic are required to +be constants. The verifier should enforce this constraint. + +//===----------------------------------------------------------------------===// + +We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a 16-byte +aligned stack slot, followed by a load/vperm. We should probably just store it +to a scalar stack slot, then use lvsl/vperm to load it. If the value is already +in memory this is a big win. + +//===----------------------------------------------------------------------===// + +extract_vector_elt of an arbitrary constant vector can be done with the +following instructions: + +vTemp = vec_splat(v0,2); // 2 is the element the src is in. +vec_ste(&destloc,0,vTemp); + +We can do an arbitrary non-constant value by using lvsr/perm/ste. + +//===----------------------------------------------------------------------===// + +If we want to tie instruction selection into the scheduler, we can do some +constant formation with different instructions. For example, we can generate +"vsplti -1" with "vcmpequw R,R" and 1,1,1,1 with "vsubcuw R,R", and 0,0,0,0 with +"vsplti 0" or "vxor", each of which use different execution units, thus could +help scheduling. + +This is probably only reasonable for a post-pass scheduler. + +//===----------------------------------------------------------------------===// + +For this function: + +void test(vector float *A, vector float *B) { + vector float C = (vector float)vec_cmpeq(*A, *B); + if (!vec_any_eq(*A, *B)) + *B = (vector float){0,0,0,0}; + *A = C; +} + +we get the following basic block: + + ... + lvx v2, 0, r4 + lvx v3, 0, r3 + vcmpeqfp v4, v3, v2 + vcmpeqfp. v2, v3, v2 + bne cr6, LBB1_2 ; cond_next + +The vcmpeqfp/vcmpeqfp. instructions currently cannot be merged when the +vcmpeqfp. result is used by a branch. This can be improved. + +//===----------------------------------------------------------------------===// + +The code generated for this is truly aweful: + +vector float test(float a, float b) { + return (vector float){ 0.0, a, 0.0, 0.0}; +} + +LCPI1_0: ; float + .space 4 + .text + .globl _test + .align 4 +_test: + mfspr r2, 256 + oris r3, r2, 4096 + mtspr 256, r3 + lis r3, ha16(LCPI1_0) + addi r4, r1, -32 + stfs f1, -16(r1) + addi r5, r1, -16 + lfs f0, lo16(LCPI1_0)(r3) + stfs f0, -32(r1) + lvx v2, 0, r4 + lvx v3, 0, r5 + vmrghw v3, v3, v2 + vspltw v2, v2, 0 + vmrghw v2, v2, v3 + mtspr 256, r2 + blr + +//===----------------------------------------------------------------------===// + +int foo(vector float *x, vector float *y) { + if (vec_all_eq(*x,*y)) return 3245; + else return 12; +} + +A predicate compare being used in a select_cc should have the same peephole +applied to it as a predicate compare used by a br_cc. There should be no +mfcr here: + +_foo: + mfspr r2, 256 + oris r5, r2, 12288 + mtspr 256, r5 + li r5, 12 + li r6, 3245 + lvx v2, 0, r4 + lvx v3, 0, r3 + vcmpeqfp. v2, v3, v2 + mfcr r3, 2 + rlwinm r3, r3, 25, 31, 31 + cmpwi cr0, r3, 0 + bne cr0, LBB1_2 ; entry +LBB1_1: ; entry + mr r6, r5 +LBB1_2: ; entry + mr r3, r6 + mtspr 256, r2 + blr + +//===----------------------------------------------------------------------===// + +CodeGen/PowerPC/vec_constants.ll has an and operation that should be +codegen'd to andc. The issue is that the 'all ones' build vector is +SelectNodeTo'd a VSPLTISB instruction node before the and/xor is selected +which prevents the vnot pattern from matching. + + +//===----------------------------------------------------------------------===// + +An alternative to the store/store/load approach for illegal insert element +lowering would be: + +1. store element to any ol' slot +2. lvx the slot +3. lvsl 0; splat index; vcmpeq to generate a select mask +4. lvsl slot + x; vperm to rotate result into correct slot +5. vsel result together. + +//===----------------------------------------------------------------------===// + +Should codegen branches on vec_any/vec_all to avoid mfcr. Two examples: + +#include <altivec.h> + int f(vector float a, vector float b) + { + int aa = 0; + if (vec_all_ge(a, b)) + aa |= 0x1; + if (vec_any_ge(a,b)) + aa |= 0x2; + return aa; +} + +vector float f(vector float a, vector float b) { + if (vec_any_eq(a, b)) + return a; + else + return b; +} + +//===----------------------------------------------------------------------===// + +We should do a little better with eliminating dead stores. +The stores to the stack are dead since %a and %b are not needed + +; Function Attrs: nounwind +define <16 x i8> @test_vpmsumb() #0 { + entry: + %a = alloca <16 x i8>, align 16 + %b = alloca <16 x i8>, align 16 + store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16 + store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16 + %0 = load <16 x i8>* %a, align 16 + %1 = load <16 x i8>* %b, align 16 + %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1) + ret <16 x i8> %2 +} + + +; Function Attrs: nounwind readnone +declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1 + + +Produces the following code with -mtriple=powerpc64-unknown-linux-gnu: +# %bb.0: # %entry + addis 3, 2, .LCPI0_0@toc@ha + addis 4, 2, .LCPI0_1@toc@ha + addi 3, 3, .LCPI0_0@toc@l + addi 4, 4, .LCPI0_1@toc@l + lxvw4x 0, 0, 3 + addi 3, 1, -16 + lxvw4x 35, 0, 4 + stxvw4x 0, 0, 3 + ori 2, 2, 0 + lxvw4x 34, 0, 3 + addi 3, 1, -32 + stxvw4x 35, 0, 3 + vpmsumb 2, 2, 3 + blr + .long 0 + .quad 0 + +The two stxvw4x instructions are not needed. +With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes +are present too. + +//===----------------------------------------------------------------------===// + +The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll: + +define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind { + %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0 + %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1 + %result = add <2 x i64> %x, %tmpvec2 + ret <2 x i64> %result + +This will generate the following instruction sequence: + std 5, -8(1) + std 5, -16(1) + addi 3, 1, -16 + ori 2, 2, 0 + lxvd2x 35, 0, 3 + vaddudm 2, 2, 3 + blr + +This will almost certainly cause a load-hit-store hazard. +Since val is a value parameter, it should not need to be saved onto +the stack, unless it's being done set up the vector register. Instead, +it would be better to splat the value into a vector register, and then +remove the (dead) stores to the stack. + +//===----------------------------------------------------------------------===// + +At the moment we always generate a lxsdx in preference to lfd, or stxsdx in +preference to stfd. When we have a reg-immediate addressing mode, this is a +poor choice, since we have to load the address into an index register. This +should be fixed for P7/P8. + +//===----------------------------------------------------------------------===// + +Right now, ShuffleKind 0 is supported only on BE, and ShuffleKind 2 only on LE. +However, we could actually support both kinds on either endianness, if we check +for the appropriate shufflevector pattern for each case ... this would cause +some additional shufflevectors to be recognized and implemented via the +"swapped" form. + +//===----------------------------------------------------------------------===// + +There is a utility program called PerfectShuffle that generates a table of the +shortest instruction sequence for implementing a shufflevector operation on +PowerPC. However, this was designed for big-endian code generation. We could +modify this program to create a little endian version of the table. The table +is used in PPCISelLowering.cpp, PPCTargetLowering::LOWERVECTOR_SHUFFLE(). + +//===----------------------------------------------------------------------===// + +Opportunies to use instructions from PPCInstrVSX.td during code gen + - Conversion instructions (Sections 7.6.1.5 and 7.6.1.6 of ISA 2.07) + - Scalar comparisons (xscmpodp and xscmpudp) + - Min and max (xsmaxdp, xsmindp, xvmaxdp, xvmindp, xvmaxsp, xvminsp) + +Related to this: we currently do not generate the lxvw4x instruction for either +v4f32 or v4i32, probably because adding a dag pattern to the recognizer requires +a single target type. This should probably be addressed in the PPCISelDAGToDAG logic. + +//===----------------------------------------------------------------------===// + +Currently EXTRACT_VECTOR_ELT and INSERT_VECTOR_ELT are type-legal only +for v2f64 with VSX available. We should create custom lowering +support for the other vector types. Without this support, we generate +sequences with load-hit-store hazards. + +v4f32 can be supported with VSX by shifting the correct element into +big-endian lane 0, using xscvspdpn to produce a double-precision +representation of the single-precision value in big-endian +double-precision lane 0, and reinterpreting lane 0 as an FPR or +vector-scalar register. + +v2i64 can be supported with VSX and P8Vector in the same manner as +v2f64, followed by a direct move to a GPR. + +v4i32 can be supported with VSX and P8Vector by shifting the correct +element into big-endian lane 1, using a direct move to a GPR, and +sign-extending the 32-bit result to 64 bits. + +v8i16 can be supported with VSX and P8Vector by shifting the correct +element into big-endian lane 3, using a direct move to a GPR, and +sign-extending the 16-bit result to 64 bits. + +v16i8 can be supported with VSX and P8Vector by shifting the correct +element into big-endian lane 7, using a direct move to a GPR, and +sign-extending the 8-bit result to 64 bits. diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/README_P9.txt b/contrib/libs/llvm12/lib/Target/PowerPC/README_P9.txt index 79cb6cceca..c9984b7604 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/README_P9.txt +++ b/contrib/libs/llvm12/lib/Target/PowerPC/README_P9.txt @@ -1,605 +1,605 @@ -//===- README_P9.txt - Notes for improving Power9 code gen ----------------===// - -TODO: Instructions Need Implement Instrinstics or Map to LLVM IR - -Altivec: -- Vector Compare Not Equal (Zero): - vcmpneb(.) vcmpneh(.) vcmpnew(.) - vcmpnezb(.) vcmpnezh(.) vcmpnezw(.) - . Same as other VCMP*, use VCMP/VCMPo form (support intrinsic) - -- Vector Extract Unsigned: vextractub vextractuh vextractuw vextractd - . Don't use llvm extractelement because they have different semantics - . Use instrinstics: - (set v2i64:$vD, (int_ppc_altivec_vextractub v16i8:$vA, imm:$UIMM)) - (set v2i64:$vD, (int_ppc_altivec_vextractuh v8i16:$vA, imm:$UIMM)) - (set v2i64:$vD, (int_ppc_altivec_vextractuw v4i32:$vA, imm:$UIMM)) - (set v2i64:$vD, (int_ppc_altivec_vextractd v2i64:$vA, imm:$UIMM)) - -- Vector Extract Unsigned Byte Left/Right-Indexed: - vextublx vextubrx vextuhlx vextuhrx vextuwlx vextuwrx - . Use instrinstics: - // Left-Indexed - (set i64:$rD, (int_ppc_altivec_vextublx i64:$rA, v16i8:$vB)) - (set i64:$rD, (int_ppc_altivec_vextuhlx i64:$rA, v8i16:$vB)) - (set i64:$rD, (int_ppc_altivec_vextuwlx i64:$rA, v4i32:$vB)) - - // Right-Indexed - (set i64:$rD, (int_ppc_altivec_vextubrx i64:$rA, v16i8:$vB)) - (set i64:$rD, (int_ppc_altivec_vextuhrx i64:$rA, v8i16:$vB)) - (set i64:$rD, (int_ppc_altivec_vextuwrx i64:$rA, v4i32:$vB)) - -- Vector Insert Element Instructions: vinsertb vinsertd vinserth vinsertw - (set v16i8:$vD, (int_ppc_altivec_vinsertb v16i8:$vA, imm:$UIMM)) - (set v8i16:$vD, (int_ppc_altivec_vinsertd v8i16:$vA, imm:$UIMM)) - (set v4i32:$vD, (int_ppc_altivec_vinserth v4i32:$vA, imm:$UIMM)) - (set v2i64:$vD, (int_ppc_altivec_vinsertw v2i64:$vA, imm:$UIMM)) - -- Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD]: - vclzlsbb vctzlsbb - . Use intrinsic: - (set i64:$rD, (int_ppc_altivec_vclzlsbb v16i8:$vB)) - (set i64:$rD, (int_ppc_altivec_vctzlsbb v16i8:$vB)) - -- Vector Count Trailing Zeros: vctzb vctzh vctzw vctzd - . Map to llvm cttz - (set v16i8:$vD, (cttz v16i8:$vB)) // vctzb - (set v8i16:$vD, (cttz v8i16:$vB)) // vctzh - (set v4i32:$vD, (cttz v4i32:$vB)) // vctzw - (set v2i64:$vD, (cttz v2i64:$vB)) // vctzd - -- Vector Extend Sign: vextsb2w vextsh2w vextsb2d vextsh2d vextsw2d - . vextsb2w: - (set v4i32:$vD, (sext v4i8:$vB)) - - // PowerISA_V3.0: - do i = 0 to 3 - VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].byte[3]) - end - - . vextsh2w: - (set v4i32:$vD, (sext v4i16:$vB)) - - // PowerISA_V3.0: - do i = 0 to 3 - VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].hword[1]) - end - - . vextsb2d - (set v2i64:$vD, (sext v2i8:$vB)) - - // PowerISA_V3.0: - do i = 0 to 1 - VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].byte[7]) - end - - . vextsh2d - (set v2i64:$vD, (sext v2i16:$vB)) - - // PowerISA_V3.0: - do i = 0 to 1 - VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].hword[3]) - end - - . vextsw2d - (set v2i64:$vD, (sext v2i32:$vB)) - - // PowerISA_V3.0: - do i = 0 to 1 - VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].word[1]) - end - -- Vector Integer Negate: vnegw vnegd - . Map to llvm ineg - (set v4i32:$rT, (ineg v4i32:$rA)) // vnegw - (set v2i64:$rT, (ineg v2i64:$rA)) // vnegd - -- Vector Parity Byte: vprtybw vprtybd vprtybq - . Use intrinsic: - (set v4i32:$rD, (int_ppc_altivec_vprtybw v4i32:$vB)) - (set v2i64:$rD, (int_ppc_altivec_vprtybd v2i64:$vB)) - (set v1i128:$rD, (int_ppc_altivec_vprtybq v1i128:$vB)) - -- Vector (Bit) Permute (Right-indexed): - . vbpermd: Same as "vbpermq", use VX1_Int_Ty2: - VX1_Int_Ty2<1484, "vbpermd", int_ppc_altivec_vbpermd, v2i64, v2i64>; - - . vpermr: use VA1a_Int_Ty3 - VA1a_Int_Ty3<59, "vpermr", int_ppc_altivec_vpermr, v16i8, v16i8, v16i8>; - -- Vector Rotate Left Mask/Mask-Insert: vrlwnm vrlwmi vrldnm vrldmi - . Use intrinsic: - VX1_Int_Ty<389, "vrlwnm", int_ppc_altivec_vrlwnm, v4i32>; - VX1_Int_Ty<133, "vrlwmi", int_ppc_altivec_vrlwmi, v4i32>; - VX1_Int_Ty<453, "vrldnm", int_ppc_altivec_vrldnm, v2i64>; - VX1_Int_Ty<197, "vrldmi", int_ppc_altivec_vrldmi, v2i64>; - -- Vector Shift Left/Right: vslv vsrv - . Use intrinsic, don't map to llvm shl and lshr, because they have different - semantics, e.g. vslv: - - do i = 0 to 15 - sh ← VR[VRB].byte[i].bit[5:7] - VR[VRT].byte[i] ← src.byte[i:i+1].bit[sh:sh+7] - end - - VR[VRT].byte[i] is composed of 2 bytes from src.byte[i:i+1] - - . VX1_Int_Ty<1860, "vslv", int_ppc_altivec_vslv, v16i8>; - VX1_Int_Ty<1796, "vsrv", int_ppc_altivec_vsrv, v16i8>; - -- Vector Multiply-by-10 (& Write Carry) Unsigned Quadword: - vmul10uq vmul10cuq - . Use intrinsic: - VX1_Int_Ty<513, "vmul10uq", int_ppc_altivec_vmul10uq, v1i128>; - VX1_Int_Ty< 1, "vmul10cuq", int_ppc_altivec_vmul10cuq, v1i128>; - -- Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword: - vmul10euq vmul10ecuq - . Use intrinsic: - VX1_Int_Ty<577, "vmul10euq", int_ppc_altivec_vmul10euq, v1i128>; - VX1_Int_Ty< 65, "vmul10ecuq", int_ppc_altivec_vmul10ecuq, v1i128>; - -- Decimal Convert From/to National/Zoned/Signed-QWord: - bcdcfn. bcdcfz. bcdctn. bcdctz. bcdcfsq. bcdctsq. - . Use instrinstics: - (set v1i128:$vD, (int_ppc_altivec_bcdcfno v1i128:$vB, i1:$PS)) - (set v1i128:$vD, (int_ppc_altivec_bcdcfzo v1i128:$vB, i1:$PS)) - (set v1i128:$vD, (int_ppc_altivec_bcdctno v1i128:$vB)) - (set v1i128:$vD, (int_ppc_altivec_bcdctzo v1i128:$vB, i1:$PS)) - (set v1i128:$vD, (int_ppc_altivec_bcdcfsqo v1i128:$vB, i1:$PS)) - (set v1i128:$vD, (int_ppc_altivec_bcdctsqo v1i128:$vB)) - -- Decimal Copy-Sign/Set-Sign: bcdcpsgn. bcdsetsgn. - . Use instrinstics: - (set v1i128:$vD, (int_ppc_altivec_bcdcpsgno v1i128:$vA, v1i128:$vB)) - (set v1i128:$vD, (int_ppc_altivec_bcdsetsgno v1i128:$vB, i1:$PS)) - -- Decimal Shift/Unsigned-Shift/Shift-and-Round: bcds. bcdus. bcdsr. - . Use instrinstics: - (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS)) - (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB)) - (set v1i128:$vD, (int_ppc_altivec_bcdsro v1i128:$vA, v1i128:$vB, i1:$PS)) - - . Note! Their VA is accessed only 1 byte, i.e. VA.byte[7] - -- Decimal (Unsigned) Truncate: bcdtrunc. bcdutrunc. - . Use instrinstics: - (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS)) - (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB)) - - . Note! Their VA is accessed only 2 byte, i.e. VA.hword[3] (VA.bit[48:63]) - -VSX: -- QP Copy Sign: xscpsgnqp - . Similar to xscpsgndp - . (set f128:$vT, (fcopysign f128:$vB, f128:$vA) - -- QP Absolute/Negative-Absolute/Negate: xsabsqp xsnabsqp xsnegqp - . Similar to xsabsdp/xsnabsdp/xsnegdp - . (set f128:$vT, (fabs f128:$vB)) // xsabsqp - (set f128:$vT, (fneg (fabs f128:$vB))) // xsnabsqp - (set f128:$vT, (fneg f128:$vB)) // xsnegqp - -- QP Add/Divide/Multiply/Subtract/Square-Root: - xsaddqp xsdivqp xsmulqp xssubqp xssqrtqp - . Similar to xsadddp - . isCommutable = 1 - (set f128:$vT, (fadd f128:$vA, f128:$vB)) // xsaddqp - (set f128:$vT, (fmul f128:$vA, f128:$vB)) // xsmulqp - - . isCommutable = 0 - (set f128:$vT, (fdiv f128:$vA, f128:$vB)) // xsdivqp - (set f128:$vT, (fsub f128:$vA, f128:$vB)) // xssubqp - (set f128:$vT, (fsqrt f128:$vB))) // xssqrtqp - -- Round to Odd of QP Add/Divide/Multiply/Subtract/Square-Root: - xsaddqpo xsdivqpo xsmulqpo xssubqpo xssqrtqpo - . Similar to xsrsqrtedp?? - def XSRSQRTEDP : XX2Form<60, 74, - (outs vsfrc:$XT), (ins vsfrc:$XB), - "xsrsqrtedp $XT, $XB", IIC_VecFP, - [(set f64:$XT, (PPCfrsqrte f64:$XB))]>; - - . Define DAG Node in PPCInstrInfo.td: - def PPCfaddrto: SDNode<"PPCISD::FADDRTO", SDTFPBinOp, []>; - def PPCfdivrto: SDNode<"PPCISD::FDIVRTO", SDTFPBinOp, []>; - def PPCfmulrto: SDNode<"PPCISD::FMULRTO", SDTFPBinOp, []>; - def PPCfsubrto: SDNode<"PPCISD::FSUBRTO", SDTFPBinOp, []>; - def PPCfsqrtrto: SDNode<"PPCISD::FSQRTRTO", SDTFPUnaryOp, []>; - - DAG patterns of each instruction (PPCInstrVSX.td): - . isCommutable = 1 - (set f128:$vT, (PPCfaddrto f128:$vA, f128:$vB)) // xsaddqpo - (set f128:$vT, (PPCfmulrto f128:$vA, f128:$vB)) // xsmulqpo - - . isCommutable = 0 - (set f128:$vT, (PPCfdivrto f128:$vA, f128:$vB)) // xsdivqpo - (set f128:$vT, (PPCfsubrto f128:$vA, f128:$vB)) // xssubqpo - (set f128:$vT, (PPCfsqrtrto f128:$vB)) // xssqrtqpo - -- QP (Negative) Multiply-{Add/Subtract}: xsmaddqp xsmsubqp xsnmaddqp xsnmsubqp - . Ref: xsmaddadp/xsmsubadp/xsnmaddadp/xsnmsubadp - - . isCommutable = 1 - // xsmaddqp - [(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, - AltVSXFMARel; - - // xsmsubqp - [(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, - AltVSXFMARel; - - // xsnmaddqp - [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, - AltVSXFMARel; - - // xsnmsubqp - [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, - AltVSXFMARel; - -- Round to Odd of QP (Negative) Multiply-{Add/Subtract}: - xsmaddqpo xsmsubqpo xsnmaddqpo xsnmsubqpo - . Similar to xsrsqrtedp?? - - . Define DAG Node in PPCInstrInfo.td: - def PPCfmarto: SDNode<"PPCISD::FMARTO", SDTFPTernaryOp, []>; - - It looks like we only need to define "PPCfmarto" for these instructions, - because according to PowerISA_V3.0, these instructions perform RTO on - fma's result: - xsmaddqp(o) - v ← bfp_MULTIPLY_ADD(src1, src3, src2) - rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v) - result ← bfp_CONVERT_TO_BFP128(rnd) - - xsmsubqp(o) - v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2)) - rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v) - result ← bfp_CONVERT_TO_BFP128(rnd) - - xsnmaddqp(o) - v ← bfp_MULTIPLY_ADD(src1,src3,src2) - rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)) - result ← bfp_CONVERT_TO_BFP128(rnd) - - xsnmsubqp(o) - v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2)) - rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)) - result ← bfp_CONVERT_TO_BFP128(rnd) - - DAG patterns of each instruction (PPCInstrVSX.td): - . isCommutable = 1 - // xsmaddqpo - [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, f128:$vTi))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, - AltVSXFMARel; - - // xsmsubqpo - [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi)))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, - AltVSXFMARel; - - // xsnmaddqpo - [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, f128:$vTi)))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, - AltVSXFMARel; - - // xsnmsubqpo - [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi))))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, - AltVSXFMARel; - -- QP Compare Ordered/Unordered: xscmpoqp xscmpuqp - . ref: XSCMPUDP - def XSCMPUDP : XX3Form_1<60, 35, - (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), - "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>; - - . No SDAG, intrinsic, builtin are required?? - Or llvm fcmp order/unorder compare?? - -- DP/QP Compare Exponents: xscmpexpdp xscmpexpqp - . No SDAG, intrinsic, builtin are required? - -- DP Compare ==, >=, >, !=: xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp - . I checked existing instruction "XSCMPUDP". They are different in target - register. "XSCMPUDP" write to CR field, xscmp*dp write to VSX register - - . Use instrinsic: - (set i128:$XT, (int_ppc_vsx_xscmpeqdp f64:$XA, f64:$XB)) - (set i128:$XT, (int_ppc_vsx_xscmpgedp f64:$XA, f64:$XB)) - (set i128:$XT, (int_ppc_vsx_xscmpgtdp f64:$XA, f64:$XB)) - (set i128:$XT, (int_ppc_vsx_xscmpnedp f64:$XA, f64:$XB)) - -- Vector Compare Not Equal: xvcmpnedp xvcmpnedp. xvcmpnesp xvcmpnesp. - . Similar to xvcmpeqdp: - defm XVCMPEQDP : XX3Form_Rcr<60, 99, - "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, - int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>; - - . So we should use "XX3Form_Rcr" to implement instrinsic - -- Convert DP -> QP: xscvdpqp - . Similar to XSCVDPSP: - def XSCVDPSP : XX2Form<60, 265, - (outs vsfrc:$XT), (ins vsfrc:$XB), - "xscvdpsp $XT, $XB", IIC_VecFP, []>; - . So, No SDAG, intrinsic, builtin are required?? - -- Round & Convert QP -> DP (dword[1] is set to zero): xscvqpdp xscvqpdpo - . Similar to XSCVDPSP - . No SDAG, intrinsic, builtin are required?? - -- Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero): - xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz - . According to PowerISA_V3.0, these are similar to "XSCVDPSXDS", "XSCVDPSXWS", - "XSCVDPUXDS", "XSCVDPUXWS" - - . DAG patterns: - (set f128:$XT, (PPCfctidz f128:$XB)) // xscvqpsdz - (set f128:$XT, (PPCfctiwz f128:$XB)) // xscvqpswz - (set f128:$XT, (PPCfctiduz f128:$XB)) // xscvqpudz - (set f128:$XT, (PPCfctiwuz f128:$XB)) // xscvqpuwz - -- Convert (Un)Signed DWord -> QP: xscvsdqp xscvudqp - . Similar to XSCVSXDSP - . (set f128:$XT, (PPCfcfids f64:$XB)) // xscvsdqp - (set f128:$XT, (PPCfcfidus f64:$XB)) // xscvudqp - -- (Round &) Convert DP <-> HP: xscvdphp xscvhpdp - . Similar to XSCVDPSP - . No SDAG, intrinsic, builtin are required?? - -- Vector HP -> SP: xvcvhpsp xvcvsphp - . Similar to XVCVDPSP: - def XVCVDPSP : XX2Form<60, 393, - (outs vsrc:$XT), (ins vsrc:$XB), - "xvcvdpsp $XT, $XB", IIC_VecFP, []>; - . No SDAG, intrinsic, builtin are required?? - -- Round to Quad-Precision Integer: xsrqpi xsrqpix - . These are combination of "XSRDPI", "XSRDPIC", "XSRDPIM", .., because you - need to assign rounding mode in instruction - . Provide builtin? - (set f128:$vT, (int_ppc_vsx_xsrqpi f128:$vB)) - (set f128:$vT, (int_ppc_vsx_xsrqpix f128:$vB)) - -- Round Quad-Precision to Double-Extended Precision (fp80): xsrqpxp - . Provide builtin? - (set f128:$vT, (int_ppc_vsx_xsrqpxp f128:$vB)) - -Fixed Point Facility: - -- Exploit cmprb and cmpeqb (perhaps for something like - isalpha/isdigit/isupper/islower and isspace respectivelly). This can - perhaps be done through a builtin. - -- Provide testing for cnttz[dw] -- Insert Exponent DP/QP: xsiexpdp xsiexpqp - . Use intrinsic? - . xsiexpdp: - // Note: rA and rB are the unsigned integer value. - (set f128:$XT, (int_ppc_vsx_xsiexpdp i64:$rA, i64:$rB)) - - . xsiexpqp: - (set f128:$vT, (int_ppc_vsx_xsiexpqp f128:$vA, f64:$vB)) - -- Extract Exponent/Significand DP/QP: xsxexpdp xsxsigdp xsxexpqp xsxsigqp - . Use intrinsic? - . (set i64:$rT, (int_ppc_vsx_xsxexpdp f64$XB)) // xsxexpdp - (set i64:$rT, (int_ppc_vsx_xsxsigdp f64$XB)) // xsxsigdp - (set f128:$vT, (int_ppc_vsx_xsxexpqp f128$vB)) // xsxexpqp - (set f128:$vT, (int_ppc_vsx_xsxsigqp f128$vB)) // xsxsigqp - -- Vector Insert Word: xxinsertw - - Useful for inserting f32/i32 elements into vectors (the element to be - inserted needs to be prepared) - . Note: llvm has insertelem in "Vector Operations" - ; yields <n x <ty>> - <result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx> - - But how to map to it?? - [(set v1f128:$XT, (insertelement v1f128:$XTi, f128:$XB, i4:$UIMM))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, - - . Or use intrinsic? - (set v1f128:$XT, (int_ppc_vsx_xxinsertw v1f128:$XTi, f128:$XB, i4:$UIMM)) - -- Vector Extract Unsigned Word: xxextractuw - - Not useful for extraction of f32 from v4f32 (the current pattern is better - - shift->convert) - - It is useful for (uint_to_fp (vector_extract v4i32, N)) - - Unfortunately, it can't be used for (sint_to_fp (vector_extract v4i32, N)) - . Note: llvm has extractelement in "Vector Operations" - ; yields <ty> - <result> = extractelement <n x <ty>> <val>, <ty2> <idx> - - How to map to it?? - [(set f128:$XT, (extractelement v1f128:$XB, i4:$UIMM))] - - . Or use intrinsic? - (set f128:$XT, (int_ppc_vsx_xxextractuw v1f128:$XB, i4:$UIMM)) - -- Vector Insert Exponent DP/SP: xviexpdp xviexpsp - . Use intrinsic - (set v2f64:$XT, (int_ppc_vsx_xviexpdp v2f64:$XA, v2f64:$XB)) - (set v4f32:$XT, (int_ppc_vsx_xviexpsp v4f32:$XA, v4f32:$XB)) - -- Vector Extract Exponent/Significand DP/SP: xvxexpdp xvxexpsp xvxsigdp xvxsigsp - . Use intrinsic - (set v2f64:$XT, (int_ppc_vsx_xvxexpdp v2f64:$XB)) - (set v4f32:$XT, (int_ppc_vsx_xvxexpsp v4f32:$XB)) - (set v2f64:$XT, (int_ppc_vsx_xvxsigdp v2f64:$XB)) - (set v4f32:$XT, (int_ppc_vsx_xvxsigsp v4f32:$XB)) - -- Test Data Class SP/DP/QP: xststdcsp xststdcdp xststdcqp - . No SDAG, intrinsic, builtin are required? - Because it seems that we have no way to map BF field? - - Instruction Form: [PO T XO B XO BX TX] - Asm: xststd* BF,XB,DCMX - - BF is an index to CR register field. - -- Vector Test Data Class SP/DP: xvtstdcsp xvtstdcdp - . Use intrinsic - (set v4f32:$XT, (int_ppc_vsx_xvtstdcsp v4f32:$XB, i7:$DCMX)) - (set v2f64:$XT, (int_ppc_vsx_xvtstdcdp v2f64:$XB, i7:$DCMX)) - -- Maximum/Minimum Type-C/Type-J DP: xsmaxcdp xsmaxjdp xsmincdp xsminjdp - . PowerISA_V3.0: - "xsmaxcdp can be used to implement the C/C++/Java conditional operation - (x>y)?x:y for single-precision and double-precision arguments." - - Note! c type and j type have different behavior when: - 1. Either input is NaN - 2. Both input are +-Infinity, +-Zero - - . dtype map to llvm fmaxnum/fminnum - jtype use intrinsic - - . xsmaxcdp xsmincdp - (set f64:$XT, (fmaxnum f64:$XA, f64:$XB)) - (set f64:$XT, (fminnum f64:$XA, f64:$XB)) - - . xsmaxjdp xsminjdp - (set f64:$XT, (int_ppc_vsx_xsmaxjdp f64:$XA, f64:$XB)) - (set f64:$XT, (int_ppc_vsx_xsminjdp f64:$XA, f64:$XB)) - -- Vector Byte-Reverse H/W/D/Q Word: xxbrh xxbrw xxbrd xxbrq - . Use intrinsic - (set v8i16:$XT, (int_ppc_vsx_xxbrh v8i16:$XB)) - (set v4i32:$XT, (int_ppc_vsx_xxbrw v4i32:$XB)) - (set v2i64:$XT, (int_ppc_vsx_xxbrd v2i64:$XB)) - (set v1i128:$XT, (int_ppc_vsx_xxbrq v1i128:$XB)) - -- Vector Permute: xxperm xxpermr - . I have checked "PPCxxswapd" in PPCInstrVSX.td, but they are different - . Use intrinsic - (set v16i8:$XT, (int_ppc_vsx_xxperm v16i8:$XA, v16i8:$XB)) - (set v16i8:$XT, (int_ppc_vsx_xxpermr v16i8:$XA, v16i8:$XB)) - -- Vector Splat Immediate Byte: xxspltib - . Similar to XXSPLTW: - def XXSPLTW : XX2Form_2<60, 164, - (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), - "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; - - . No SDAG, intrinsic, builtin are required? - -- Load/Store Vector: lxv stxv - . Has likely SDAG match: - (set v?:$XT, (load ix16addr:$src)) - (set v?:$XT, (store ix16addr:$dst)) - - . Need define ix16addr in PPCInstrInfo.td - ix16addr: 16-byte aligned, see "def memrix16" in PPCInstrInfo.td - -- Load/Store Vector Indexed: lxvx stxvx - . Has likely SDAG match: - (set v?:$XT, (load xoaddr:$src)) - (set v?:$XT, (store xoaddr:$dst)) - -- Load/Store DWord: lxsd stxsd - . Similar to lxsdx/stxsdx: - def LXSDX : XX1Form<31, 588, - (outs vsfrc:$XT), (ins memrr:$src), - "lxsdx $XT, $src", IIC_LdStLFD, - [(set f64:$XT, (load xoaddr:$src))]>; - - . (set f64:$XT, (load iaddrX4:$src)) - (set f64:$XT, (store iaddrX4:$dst)) - -- Load/Store SP, with conversion from/to DP: lxssp stxssp - . Similar to lxsspx/stxsspx: - def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src), - "lxsspx $XT, $src", IIC_LdStLFD, - [(set f32:$XT, (load xoaddr:$src))]>; - - . (set f32:$XT, (load iaddrX4:$src)) - (set f32:$XT, (store iaddrX4:$dst)) - -- Load as Integer Byte/Halfword & Zero Indexed: lxsibzx lxsihzx - . Similar to lxsiwzx: - def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src), - "lxsiwzx $XT, $src", IIC_LdStLFD, - [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; - - . (set f64:$XT, (PPClfiwzx xoaddr:$src)) - -- Store as Integer Byte/Halfword Indexed: stxsibx stxsihx - . Similar to stxsiwx: - def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst), - "stxsiwx $XT, $dst", IIC_LdStSTFD, - [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; - - . (PPCstfiwx f64:$XT, xoaddr:$dst) - -- Load Vector Halfword*8/Byte*16 Indexed: lxvh8x lxvb16x - . Similar to lxvd2x/lxvw4x: - def LXVD2X : XX1Form<31, 844, - (outs vsrc:$XT), (ins memrr:$src), - "lxvd2x $XT, $src", IIC_LdStLFD, - [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>; - - . (set v8i16:$XT, (int_ppc_vsx_lxvh8x xoaddr:$src)) - (set v16i8:$XT, (int_ppc_vsx_lxvb16x xoaddr:$src)) - -- Store Vector Halfword*8/Byte*16 Indexed: stxvh8x stxvb16x - . Similar to stxvd2x/stxvw4x: - def STXVD2X : XX1Form<31, 972, - (outs), (ins vsrc:$XT, memrr:$dst), - "stxvd2x $XT, $dst", IIC_LdStSTFD, - [(store v2f64:$XT, xoaddr:$dst)]>; - - . (store v8i16:$XT, xoaddr:$dst) - (store v16i8:$XT, xoaddr:$dst) - -- Load/Store Vector (Left-justified) with Length: lxvl lxvll stxvl stxvll - . Likely needs an intrinsic - . (set v?:$XT, (int_ppc_vsx_lxvl xoaddr:$src)) - (set v?:$XT, (int_ppc_vsx_lxvll xoaddr:$src)) - - . (int_ppc_vsx_stxvl xoaddr:$dst)) - (int_ppc_vsx_stxvll xoaddr:$dst)) - -- Load Vector Word & Splat Indexed: lxvwsx - . Likely needs an intrinsic - . (set v?:$XT, (int_ppc_vsx_lxvwsx xoaddr:$src)) - -Atomic operations (l[dw]at, st[dw]at): -- Provide custom lowering for common atomic operations to use these - instructions with the correct Function Code -- Ensure the operands are in the correct register (i.e. RT+1, RT+2) -- Provide builtins since not all FC's necessarily have an existing LLVM - atomic operation - -Load Doubleword Monitored (ldmx): -- Investigate whether there are any uses for this. It seems to be related to - Garbage Collection so it isn't likely to be all that useful for most - languages we deal with. - -Move to CR from XER Extended (mcrxrx): -- Is there a use for this in LLVM? - -Fixed Point Facility: - -- Copy-Paste Facility: copy copy_first cp_abort paste paste. paste_last - . Use instrinstics: - (int_ppc_copy_first i32:$rA, i32:$rB) - (int_ppc_copy i32:$rA, i32:$rB) - - (int_ppc_paste i32:$rA, i32:$rB) - (int_ppc_paste_last i32:$rA, i32:$rB) - - (int_cp_abort) - -- Message Synchronize: msgsync -- SLB*: slbieg slbsync -- stop - . No instrinstics +//===- README_P9.txt - Notes for improving Power9 code gen ----------------===// + +TODO: Instructions Need Implement Instrinstics or Map to LLVM IR + +Altivec: +- Vector Compare Not Equal (Zero): + vcmpneb(.) vcmpneh(.) vcmpnew(.) + vcmpnezb(.) vcmpnezh(.) vcmpnezw(.) + . Same as other VCMP*, use VCMP/VCMPo form (support intrinsic) + +- Vector Extract Unsigned: vextractub vextractuh vextractuw vextractd + . Don't use llvm extractelement because they have different semantics + . Use instrinstics: + (set v2i64:$vD, (int_ppc_altivec_vextractub v16i8:$vA, imm:$UIMM)) + (set v2i64:$vD, (int_ppc_altivec_vextractuh v8i16:$vA, imm:$UIMM)) + (set v2i64:$vD, (int_ppc_altivec_vextractuw v4i32:$vA, imm:$UIMM)) + (set v2i64:$vD, (int_ppc_altivec_vextractd v2i64:$vA, imm:$UIMM)) + +- Vector Extract Unsigned Byte Left/Right-Indexed: + vextublx vextubrx vextuhlx vextuhrx vextuwlx vextuwrx + . Use instrinstics: + // Left-Indexed + (set i64:$rD, (int_ppc_altivec_vextublx i64:$rA, v16i8:$vB)) + (set i64:$rD, (int_ppc_altivec_vextuhlx i64:$rA, v8i16:$vB)) + (set i64:$rD, (int_ppc_altivec_vextuwlx i64:$rA, v4i32:$vB)) + + // Right-Indexed + (set i64:$rD, (int_ppc_altivec_vextubrx i64:$rA, v16i8:$vB)) + (set i64:$rD, (int_ppc_altivec_vextuhrx i64:$rA, v8i16:$vB)) + (set i64:$rD, (int_ppc_altivec_vextuwrx i64:$rA, v4i32:$vB)) + +- Vector Insert Element Instructions: vinsertb vinsertd vinserth vinsertw + (set v16i8:$vD, (int_ppc_altivec_vinsertb v16i8:$vA, imm:$UIMM)) + (set v8i16:$vD, (int_ppc_altivec_vinsertd v8i16:$vA, imm:$UIMM)) + (set v4i32:$vD, (int_ppc_altivec_vinserth v4i32:$vA, imm:$UIMM)) + (set v2i64:$vD, (int_ppc_altivec_vinsertw v2i64:$vA, imm:$UIMM)) + +- Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD]: + vclzlsbb vctzlsbb + . Use intrinsic: + (set i64:$rD, (int_ppc_altivec_vclzlsbb v16i8:$vB)) + (set i64:$rD, (int_ppc_altivec_vctzlsbb v16i8:$vB)) + +- Vector Count Trailing Zeros: vctzb vctzh vctzw vctzd + . Map to llvm cttz + (set v16i8:$vD, (cttz v16i8:$vB)) // vctzb + (set v8i16:$vD, (cttz v8i16:$vB)) // vctzh + (set v4i32:$vD, (cttz v4i32:$vB)) // vctzw + (set v2i64:$vD, (cttz v2i64:$vB)) // vctzd + +- Vector Extend Sign: vextsb2w vextsh2w vextsb2d vextsh2d vextsw2d + . vextsb2w: + (set v4i32:$vD, (sext v4i8:$vB)) + + // PowerISA_V3.0: + do i = 0 to 3 + VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].byte[3]) + end + + . vextsh2w: + (set v4i32:$vD, (sext v4i16:$vB)) + + // PowerISA_V3.0: + do i = 0 to 3 + VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].hword[1]) + end + + . vextsb2d + (set v2i64:$vD, (sext v2i8:$vB)) + + // PowerISA_V3.0: + do i = 0 to 1 + VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].byte[7]) + end + + . vextsh2d + (set v2i64:$vD, (sext v2i16:$vB)) + + // PowerISA_V3.0: + do i = 0 to 1 + VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].hword[3]) + end + + . vextsw2d + (set v2i64:$vD, (sext v2i32:$vB)) + + // PowerISA_V3.0: + do i = 0 to 1 + VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].word[1]) + end + +- Vector Integer Negate: vnegw vnegd + . Map to llvm ineg + (set v4i32:$rT, (ineg v4i32:$rA)) // vnegw + (set v2i64:$rT, (ineg v2i64:$rA)) // vnegd + +- Vector Parity Byte: vprtybw vprtybd vprtybq + . Use intrinsic: + (set v4i32:$rD, (int_ppc_altivec_vprtybw v4i32:$vB)) + (set v2i64:$rD, (int_ppc_altivec_vprtybd v2i64:$vB)) + (set v1i128:$rD, (int_ppc_altivec_vprtybq v1i128:$vB)) + +- Vector (Bit) Permute (Right-indexed): + . vbpermd: Same as "vbpermq", use VX1_Int_Ty2: + VX1_Int_Ty2<1484, "vbpermd", int_ppc_altivec_vbpermd, v2i64, v2i64>; + + . vpermr: use VA1a_Int_Ty3 + VA1a_Int_Ty3<59, "vpermr", int_ppc_altivec_vpermr, v16i8, v16i8, v16i8>; + +- Vector Rotate Left Mask/Mask-Insert: vrlwnm vrlwmi vrldnm vrldmi + . Use intrinsic: + VX1_Int_Ty<389, "vrlwnm", int_ppc_altivec_vrlwnm, v4i32>; + VX1_Int_Ty<133, "vrlwmi", int_ppc_altivec_vrlwmi, v4i32>; + VX1_Int_Ty<453, "vrldnm", int_ppc_altivec_vrldnm, v2i64>; + VX1_Int_Ty<197, "vrldmi", int_ppc_altivec_vrldmi, v2i64>; + +- Vector Shift Left/Right: vslv vsrv + . Use intrinsic, don't map to llvm shl and lshr, because they have different + semantics, e.g. vslv: + + do i = 0 to 15 + sh ← VR[VRB].byte[i].bit[5:7] + VR[VRT].byte[i] ← src.byte[i:i+1].bit[sh:sh+7] + end + + VR[VRT].byte[i] is composed of 2 bytes from src.byte[i:i+1] + + . VX1_Int_Ty<1860, "vslv", int_ppc_altivec_vslv, v16i8>; + VX1_Int_Ty<1796, "vsrv", int_ppc_altivec_vsrv, v16i8>; + +- Vector Multiply-by-10 (& Write Carry) Unsigned Quadword: + vmul10uq vmul10cuq + . Use intrinsic: + VX1_Int_Ty<513, "vmul10uq", int_ppc_altivec_vmul10uq, v1i128>; + VX1_Int_Ty< 1, "vmul10cuq", int_ppc_altivec_vmul10cuq, v1i128>; + +- Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword: + vmul10euq vmul10ecuq + . Use intrinsic: + VX1_Int_Ty<577, "vmul10euq", int_ppc_altivec_vmul10euq, v1i128>; + VX1_Int_Ty< 65, "vmul10ecuq", int_ppc_altivec_vmul10ecuq, v1i128>; + +- Decimal Convert From/to National/Zoned/Signed-QWord: + bcdcfn. bcdcfz. bcdctn. bcdctz. bcdcfsq. bcdctsq. + . Use instrinstics: + (set v1i128:$vD, (int_ppc_altivec_bcdcfno v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcdcfzo v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcdctno v1i128:$vB)) + (set v1i128:$vD, (int_ppc_altivec_bcdctzo v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcdcfsqo v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcdctsqo v1i128:$vB)) + +- Decimal Copy-Sign/Set-Sign: bcdcpsgn. bcdsetsgn. + . Use instrinstics: + (set v1i128:$vD, (int_ppc_altivec_bcdcpsgno v1i128:$vA, v1i128:$vB)) + (set v1i128:$vD, (int_ppc_altivec_bcdsetsgno v1i128:$vB, i1:$PS)) + +- Decimal Shift/Unsigned-Shift/Shift-and-Round: bcds. bcdus. bcdsr. + . Use instrinstics: + (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB)) + (set v1i128:$vD, (int_ppc_altivec_bcdsro v1i128:$vA, v1i128:$vB, i1:$PS)) + + . Note! Their VA is accessed only 1 byte, i.e. VA.byte[7] + +- Decimal (Unsigned) Truncate: bcdtrunc. bcdutrunc. + . Use instrinstics: + (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS)) + (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB)) + + . Note! Their VA is accessed only 2 byte, i.e. VA.hword[3] (VA.bit[48:63]) + +VSX: +- QP Copy Sign: xscpsgnqp + . Similar to xscpsgndp + . (set f128:$vT, (fcopysign f128:$vB, f128:$vA) + +- QP Absolute/Negative-Absolute/Negate: xsabsqp xsnabsqp xsnegqp + . Similar to xsabsdp/xsnabsdp/xsnegdp + . (set f128:$vT, (fabs f128:$vB)) // xsabsqp + (set f128:$vT, (fneg (fabs f128:$vB))) // xsnabsqp + (set f128:$vT, (fneg f128:$vB)) // xsnegqp + +- QP Add/Divide/Multiply/Subtract/Square-Root: + xsaddqp xsdivqp xsmulqp xssubqp xssqrtqp + . Similar to xsadddp + . isCommutable = 1 + (set f128:$vT, (fadd f128:$vA, f128:$vB)) // xsaddqp + (set f128:$vT, (fmul f128:$vA, f128:$vB)) // xsmulqp + + . isCommutable = 0 + (set f128:$vT, (fdiv f128:$vA, f128:$vB)) // xsdivqp + (set f128:$vT, (fsub f128:$vA, f128:$vB)) // xssubqp + (set f128:$vT, (fsqrt f128:$vB))) // xssqrtqp + +- Round to Odd of QP Add/Divide/Multiply/Subtract/Square-Root: + xsaddqpo xsdivqpo xsmulqpo xssubqpo xssqrtqpo + . Similar to xsrsqrtedp?? + def XSRSQRTEDP : XX2Form<60, 74, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrsqrtedp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfrsqrte f64:$XB))]>; + + . Define DAG Node in PPCInstrInfo.td: + def PPCfaddrto: SDNode<"PPCISD::FADDRTO", SDTFPBinOp, []>; + def PPCfdivrto: SDNode<"PPCISD::FDIVRTO", SDTFPBinOp, []>; + def PPCfmulrto: SDNode<"PPCISD::FMULRTO", SDTFPBinOp, []>; + def PPCfsubrto: SDNode<"PPCISD::FSUBRTO", SDTFPBinOp, []>; + def PPCfsqrtrto: SDNode<"PPCISD::FSQRTRTO", SDTFPUnaryOp, []>; + + DAG patterns of each instruction (PPCInstrVSX.td): + . isCommutable = 1 + (set f128:$vT, (PPCfaddrto f128:$vA, f128:$vB)) // xsaddqpo + (set f128:$vT, (PPCfmulrto f128:$vA, f128:$vB)) // xsmulqpo + + . isCommutable = 0 + (set f128:$vT, (PPCfdivrto f128:$vA, f128:$vB)) // xsdivqpo + (set f128:$vT, (PPCfsubrto f128:$vA, f128:$vB)) // xssubqpo + (set f128:$vT, (PPCfsqrtrto f128:$vB)) // xssqrtqpo + +- QP (Negative) Multiply-{Add/Subtract}: xsmaddqp xsmsubqp xsnmaddqp xsnmsubqp + . Ref: xsmaddadp/xsmsubadp/xsnmaddadp/xsnmsubadp + + . isCommutable = 1 + // xsmaddqp + [(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsmsubqp + [(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsnmaddqp + [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsnmsubqp + [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + +- Round to Odd of QP (Negative) Multiply-{Add/Subtract}: + xsmaddqpo xsmsubqpo xsnmaddqpo xsnmsubqpo + . Similar to xsrsqrtedp?? + + . Define DAG Node in PPCInstrInfo.td: + def PPCfmarto: SDNode<"PPCISD::FMARTO", SDTFPTernaryOp, []>; + + It looks like we only need to define "PPCfmarto" for these instructions, + because according to PowerISA_V3.0, these instructions perform RTO on + fma's result: + xsmaddqp(o) + v ← bfp_MULTIPLY_ADD(src1, src3, src2) + rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v) + result ← bfp_CONVERT_TO_BFP128(rnd) + + xsmsubqp(o) + v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2)) + rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v) + result ← bfp_CONVERT_TO_BFP128(rnd) + + xsnmaddqp(o) + v ← bfp_MULTIPLY_ADD(src1,src3,src2) + rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)) + result ← bfp_CONVERT_TO_BFP128(rnd) + + xsnmsubqp(o) + v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2)) + rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)) + result ← bfp_CONVERT_TO_BFP128(rnd) + + DAG patterns of each instruction (PPCInstrVSX.td): + . isCommutable = 1 + // xsmaddqpo + [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, f128:$vTi))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsmsubqpo + [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi)))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsnmaddqpo + [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, f128:$vTi)))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + + // xsnmsubqpo + [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi))))]>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + AltVSXFMARel; + +- QP Compare Ordered/Unordered: xscmpoqp xscmpuqp + . ref: XSCMPUDP + def XSCMPUDP : XX3Form_1<60, 35, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>; + + . No SDAG, intrinsic, builtin are required?? + Or llvm fcmp order/unorder compare?? + +- DP/QP Compare Exponents: xscmpexpdp xscmpexpqp + . No SDAG, intrinsic, builtin are required? + +- DP Compare ==, >=, >, !=: xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp + . I checked existing instruction "XSCMPUDP". They are different in target + register. "XSCMPUDP" write to CR field, xscmp*dp write to VSX register + + . Use instrinsic: + (set i128:$XT, (int_ppc_vsx_xscmpeqdp f64:$XA, f64:$XB)) + (set i128:$XT, (int_ppc_vsx_xscmpgedp f64:$XA, f64:$XB)) + (set i128:$XT, (int_ppc_vsx_xscmpgtdp f64:$XA, f64:$XB)) + (set i128:$XT, (int_ppc_vsx_xscmpnedp f64:$XA, f64:$XB)) + +- Vector Compare Not Equal: xvcmpnedp xvcmpnedp. xvcmpnesp xvcmpnesp. + . Similar to xvcmpeqdp: + defm XVCMPEQDP : XX3Form_Rcr<60, 99, + "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, + int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>; + + . So we should use "XX3Form_Rcr" to implement instrinsic + +- Convert DP -> QP: xscvdpqp + . Similar to XSCVDPSP: + def XSCVDPSP : XX2Form<60, 265, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpsp $XT, $XB", IIC_VecFP, []>; + . So, No SDAG, intrinsic, builtin are required?? + +- Round & Convert QP -> DP (dword[1] is set to zero): xscvqpdp xscvqpdpo + . Similar to XSCVDPSP + . No SDAG, intrinsic, builtin are required?? + +- Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero): + xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz + . According to PowerISA_V3.0, these are similar to "XSCVDPSXDS", "XSCVDPSXWS", + "XSCVDPUXDS", "XSCVDPUXWS" + + . DAG patterns: + (set f128:$XT, (PPCfctidz f128:$XB)) // xscvqpsdz + (set f128:$XT, (PPCfctiwz f128:$XB)) // xscvqpswz + (set f128:$XT, (PPCfctiduz f128:$XB)) // xscvqpudz + (set f128:$XT, (PPCfctiwuz f128:$XB)) // xscvqpuwz + +- Convert (Un)Signed DWord -> QP: xscvsdqp xscvudqp + . Similar to XSCVSXDSP + . (set f128:$XT, (PPCfcfids f64:$XB)) // xscvsdqp + (set f128:$XT, (PPCfcfidus f64:$XB)) // xscvudqp + +- (Round &) Convert DP <-> HP: xscvdphp xscvhpdp + . Similar to XSCVDPSP + . No SDAG, intrinsic, builtin are required?? + +- Vector HP -> SP: xvcvhpsp xvcvsphp + . Similar to XVCVDPSP: + def XVCVDPSP : XX2Form<60, 393, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpsp $XT, $XB", IIC_VecFP, []>; + . No SDAG, intrinsic, builtin are required?? + +- Round to Quad-Precision Integer: xsrqpi xsrqpix + . These are combination of "XSRDPI", "XSRDPIC", "XSRDPIM", .., because you + need to assign rounding mode in instruction + . Provide builtin? + (set f128:$vT, (int_ppc_vsx_xsrqpi f128:$vB)) + (set f128:$vT, (int_ppc_vsx_xsrqpix f128:$vB)) + +- Round Quad-Precision to Double-Extended Precision (fp80): xsrqpxp + . Provide builtin? + (set f128:$vT, (int_ppc_vsx_xsrqpxp f128:$vB)) + +Fixed Point Facility: + +- Exploit cmprb and cmpeqb (perhaps for something like + isalpha/isdigit/isupper/islower and isspace respectivelly). This can + perhaps be done through a builtin. + +- Provide testing for cnttz[dw] +- Insert Exponent DP/QP: xsiexpdp xsiexpqp + . Use intrinsic? + . xsiexpdp: + // Note: rA and rB are the unsigned integer value. + (set f128:$XT, (int_ppc_vsx_xsiexpdp i64:$rA, i64:$rB)) + + . xsiexpqp: + (set f128:$vT, (int_ppc_vsx_xsiexpqp f128:$vA, f64:$vB)) + +- Extract Exponent/Significand DP/QP: xsxexpdp xsxsigdp xsxexpqp xsxsigqp + . Use intrinsic? + . (set i64:$rT, (int_ppc_vsx_xsxexpdp f64$XB)) // xsxexpdp + (set i64:$rT, (int_ppc_vsx_xsxsigdp f64$XB)) // xsxsigdp + (set f128:$vT, (int_ppc_vsx_xsxexpqp f128$vB)) // xsxexpqp + (set f128:$vT, (int_ppc_vsx_xsxsigqp f128$vB)) // xsxsigqp + +- Vector Insert Word: xxinsertw + - Useful for inserting f32/i32 elements into vectors (the element to be + inserted needs to be prepared) + . Note: llvm has insertelem in "Vector Operations" + ; yields <n x <ty>> + <result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx> + + But how to map to it?? + [(set v1f128:$XT, (insertelement v1f128:$XTi, f128:$XB, i4:$UIMM))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + + . Or use intrinsic? + (set v1f128:$XT, (int_ppc_vsx_xxinsertw v1f128:$XTi, f128:$XB, i4:$UIMM)) + +- Vector Extract Unsigned Word: xxextractuw + - Not useful for extraction of f32 from v4f32 (the current pattern is better - + shift->convert) + - It is useful for (uint_to_fp (vector_extract v4i32, N)) + - Unfortunately, it can't be used for (sint_to_fp (vector_extract v4i32, N)) + . Note: llvm has extractelement in "Vector Operations" + ; yields <ty> + <result> = extractelement <n x <ty>> <val>, <ty2> <idx> + + How to map to it?? + [(set f128:$XT, (extractelement v1f128:$XB, i4:$UIMM))] + + . Or use intrinsic? + (set f128:$XT, (int_ppc_vsx_xxextractuw v1f128:$XB, i4:$UIMM)) + +- Vector Insert Exponent DP/SP: xviexpdp xviexpsp + . Use intrinsic + (set v2f64:$XT, (int_ppc_vsx_xviexpdp v2f64:$XA, v2f64:$XB)) + (set v4f32:$XT, (int_ppc_vsx_xviexpsp v4f32:$XA, v4f32:$XB)) + +- Vector Extract Exponent/Significand DP/SP: xvxexpdp xvxexpsp xvxsigdp xvxsigsp + . Use intrinsic + (set v2f64:$XT, (int_ppc_vsx_xvxexpdp v2f64:$XB)) + (set v4f32:$XT, (int_ppc_vsx_xvxexpsp v4f32:$XB)) + (set v2f64:$XT, (int_ppc_vsx_xvxsigdp v2f64:$XB)) + (set v4f32:$XT, (int_ppc_vsx_xvxsigsp v4f32:$XB)) + +- Test Data Class SP/DP/QP: xststdcsp xststdcdp xststdcqp + . No SDAG, intrinsic, builtin are required? + Because it seems that we have no way to map BF field? + + Instruction Form: [PO T XO B XO BX TX] + Asm: xststd* BF,XB,DCMX + + BF is an index to CR register field. + +- Vector Test Data Class SP/DP: xvtstdcsp xvtstdcdp + . Use intrinsic + (set v4f32:$XT, (int_ppc_vsx_xvtstdcsp v4f32:$XB, i7:$DCMX)) + (set v2f64:$XT, (int_ppc_vsx_xvtstdcdp v2f64:$XB, i7:$DCMX)) + +- Maximum/Minimum Type-C/Type-J DP: xsmaxcdp xsmaxjdp xsmincdp xsminjdp + . PowerISA_V3.0: + "xsmaxcdp can be used to implement the C/C++/Java conditional operation + (x>y)?x:y for single-precision and double-precision arguments." + + Note! c type and j type have different behavior when: + 1. Either input is NaN + 2. Both input are +-Infinity, +-Zero + + . dtype map to llvm fmaxnum/fminnum + jtype use intrinsic + + . xsmaxcdp xsmincdp + (set f64:$XT, (fmaxnum f64:$XA, f64:$XB)) + (set f64:$XT, (fminnum f64:$XA, f64:$XB)) + + . xsmaxjdp xsminjdp + (set f64:$XT, (int_ppc_vsx_xsmaxjdp f64:$XA, f64:$XB)) + (set f64:$XT, (int_ppc_vsx_xsminjdp f64:$XA, f64:$XB)) + +- Vector Byte-Reverse H/W/D/Q Word: xxbrh xxbrw xxbrd xxbrq + . Use intrinsic + (set v8i16:$XT, (int_ppc_vsx_xxbrh v8i16:$XB)) + (set v4i32:$XT, (int_ppc_vsx_xxbrw v4i32:$XB)) + (set v2i64:$XT, (int_ppc_vsx_xxbrd v2i64:$XB)) + (set v1i128:$XT, (int_ppc_vsx_xxbrq v1i128:$XB)) + +- Vector Permute: xxperm xxpermr + . I have checked "PPCxxswapd" in PPCInstrVSX.td, but they are different + . Use intrinsic + (set v16i8:$XT, (int_ppc_vsx_xxperm v16i8:$XA, v16i8:$XB)) + (set v16i8:$XT, (int_ppc_vsx_xxpermr v16i8:$XA, v16i8:$XB)) + +- Vector Splat Immediate Byte: xxspltib + . Similar to XXSPLTW: + def XXSPLTW : XX2Form_2<60, 164, + (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), + "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; + + . No SDAG, intrinsic, builtin are required? + +- Load/Store Vector: lxv stxv + . Has likely SDAG match: + (set v?:$XT, (load ix16addr:$src)) + (set v?:$XT, (store ix16addr:$dst)) + + . Need define ix16addr in PPCInstrInfo.td + ix16addr: 16-byte aligned, see "def memrix16" in PPCInstrInfo.td + +- Load/Store Vector Indexed: lxvx stxvx + . Has likely SDAG match: + (set v?:$XT, (load xoaddr:$src)) + (set v?:$XT, (store xoaddr:$dst)) + +- Load/Store DWord: lxsd stxsd + . Similar to lxsdx/stxsdx: + def LXSDX : XX1Form<31, 588, + (outs vsfrc:$XT), (ins memrr:$src), + "lxsdx $XT, $src", IIC_LdStLFD, + [(set f64:$XT, (load xoaddr:$src))]>; + + . (set f64:$XT, (load iaddrX4:$src)) + (set f64:$XT, (store iaddrX4:$dst)) + +- Load/Store SP, with conversion from/to DP: lxssp stxssp + . Similar to lxsspx/stxsspx: + def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src), + "lxsspx $XT, $src", IIC_LdStLFD, + [(set f32:$XT, (load xoaddr:$src))]>; + + . (set f32:$XT, (load iaddrX4:$src)) + (set f32:$XT, (store iaddrX4:$dst)) + +- Load as Integer Byte/Halfword & Zero Indexed: lxsibzx lxsihzx + . Similar to lxsiwzx: + def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src), + "lxsiwzx $XT, $src", IIC_LdStLFD, + [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; + + . (set f64:$XT, (PPClfiwzx xoaddr:$src)) + +- Store as Integer Byte/Halfword Indexed: stxsibx stxsihx + . Similar to stxsiwx: + def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst), + "stxsiwx $XT, $dst", IIC_LdStSTFD, + [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; + + . (PPCstfiwx f64:$XT, xoaddr:$dst) + +- Load Vector Halfword*8/Byte*16 Indexed: lxvh8x lxvb16x + . Similar to lxvd2x/lxvw4x: + def LXVD2X : XX1Form<31, 844, + (outs vsrc:$XT), (ins memrr:$src), + "lxvd2x $XT, $src", IIC_LdStLFD, + [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>; + + . (set v8i16:$XT, (int_ppc_vsx_lxvh8x xoaddr:$src)) + (set v16i8:$XT, (int_ppc_vsx_lxvb16x xoaddr:$src)) + +- Store Vector Halfword*8/Byte*16 Indexed: stxvh8x stxvb16x + . Similar to stxvd2x/stxvw4x: + def STXVD2X : XX1Form<31, 972, + (outs), (ins vsrc:$XT, memrr:$dst), + "stxvd2x $XT, $dst", IIC_LdStSTFD, + [(store v2f64:$XT, xoaddr:$dst)]>; + + . (store v8i16:$XT, xoaddr:$dst) + (store v16i8:$XT, xoaddr:$dst) + +- Load/Store Vector (Left-justified) with Length: lxvl lxvll stxvl stxvll + . Likely needs an intrinsic + . (set v?:$XT, (int_ppc_vsx_lxvl xoaddr:$src)) + (set v?:$XT, (int_ppc_vsx_lxvll xoaddr:$src)) + + . (int_ppc_vsx_stxvl xoaddr:$dst)) + (int_ppc_vsx_stxvll xoaddr:$dst)) + +- Load Vector Word & Splat Indexed: lxvwsx + . Likely needs an intrinsic + . (set v?:$XT, (int_ppc_vsx_lxvwsx xoaddr:$src)) + +Atomic operations (l[dw]at, st[dw]at): +- Provide custom lowering for common atomic operations to use these + instructions with the correct Function Code +- Ensure the operands are in the correct register (i.e. RT+1, RT+2) +- Provide builtins since not all FC's necessarily have an existing LLVM + atomic operation + +Load Doubleword Monitored (ldmx): +- Investigate whether there are any uses for this. It seems to be related to + Garbage Collection so it isn't likely to be all that useful for most + languages we deal with. + +Move to CR from XER Extended (mcrxrx): +- Is there a use for this in LLVM? + +Fixed Point Facility: + +- Copy-Paste Facility: copy copy_first cp_abort paste paste. paste_last + . Use instrinstics: + (int_ppc_copy_first i32:$rA, i32:$rB) + (int_ppc_copy i32:$rA, i32:$rB) + + (int_ppc_paste i32:$rA, i32:$rB) + (int_ppc_paste_last i32:$rA, i32:$rB) + + (int_cp_abort) + +- Message Synchronize: msgsync +- SLB*: slbieg slbsync +- stop + . No instrinstics diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make index 9903560dcc..68badb4490 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make +++ b/contrib/libs/llvm12/lib/Target/PowerPC/TargetInfo/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/Support diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/ya.make b/contrib/libs/llvm12/lib/Target/PowerPC/ya.make index a6812524a8..8c7039a575 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/ya.make +++ b/contrib/libs/llvm12/lib/Target/PowerPC/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/README.txt b/contrib/libs/llvm12/lib/Target/README.txt index d918287ed0..e172abbbd8 100644 --- a/contrib/libs/llvm12/lib/Target/README.txt +++ b/contrib/libs/llvm12/lib/Target/README.txt @@ -1,2279 +1,2279 @@ -Target Independent Opportunities: - -//===---------------------------------------------------------------------===// - -We should recognized various "overflow detection" idioms and translate them into -llvm.uadd.with.overflow and similar intrinsics. Here is a multiply idiom: - -unsigned int mul(unsigned int a,unsigned int b) { - if ((unsigned long long)a*b>0xffffffff) - exit(0); - return a*b; -} - -The legalization code for mul-with-overflow needs to be made more robust before -this can be implemented though. - -//===---------------------------------------------------------------------===// - -Get the C front-end to expand hypot(x,y) -> llvm.sqrt(x*x+y*y) when errno and -precision don't matter (ffastmath). Misc/mandel will like this. :) This isn't -safe in general, even on darwin. See the libm implementation of hypot for -examples (which special case when x/y are exactly zero to get signed zeros etc -right). - -//===---------------------------------------------------------------------===// - -On targets with expensive 64-bit multiply, we could LSR this: - -for (i = ...; ++i) { - x = 1ULL << i; - -into: - long long tmp = 1; - for (i = ...; ++i, tmp+=tmp) - x = tmp; - -This would be a win on ppc32, but not x86 or ppc64. - -//===---------------------------------------------------------------------===// - -Shrink: (setlt (loadi32 P), 0) -> (setlt (loadi8 Phi), 0) - -//===---------------------------------------------------------------------===// - -Reassociate should turn things like: - -int factorial(int X) { - return X*X*X*X*X*X*X*X; -} - -into llvm.powi calls, allowing the code generator to produce balanced -multiplication trees. - -First, the intrinsic needs to be extended to support integers, and second the -code generator needs to be enhanced to lower these to multiplication trees. - -//===---------------------------------------------------------------------===// - -Interesting? testcase for add/shift/mul reassoc: - -int bar(int x, int y) { - return x*x*x+y+x*x*x*x*x*y*y*y*y; -} -int foo(int z, int n) { - return bar(z, n) + bar(2*z, 2*n); -} - -This is blocked on not handling X*X*X -> powi(X, 3) (see note above). The issue -is that we end up getting t = 2*X s = t*t and don't turn this into 4*X*X, -which is the same number of multiplies and is canonical, because the 2*X has -multiple uses. Here's a simple example: - -define i32 @test15(i32 %X1) { - %B = mul i32 %X1, 47 ; X1*47 - %C = mul i32 %B, %B - ret i32 %C -} - - -//===---------------------------------------------------------------------===// - -Reassociate should handle the example in GCC PR16157: - -extern int a0, a1, a2, a3, a4; extern int b0, b1, b2, b3, b4; -void f () { /* this can be optimized to four additions... */ - b4 = a4 + a3 + a2 + a1 + a0; - b3 = a3 + a2 + a1 + a0; - b2 = a2 + a1 + a0; - b1 = a1 + a0; -} - -This requires reassociating to forms of expressions that are already available, -something that reassoc doesn't think about yet. - - -//===---------------------------------------------------------------------===// - -These two functions should generate the same code on big-endian systems: - -int g(int *j,int *l) { return memcmp(j,l,4); } -int h(int *j, int *l) { return *j - *l; } - -this could be done in SelectionDAGISel.cpp, along with other special cases, -for 1,2,4,8 bytes. - -//===---------------------------------------------------------------------===// - -It would be nice to revert this patch: -http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20060213/031986.html - -And teach the dag combiner enough to simplify the code expanded before -legalize. It seems plausible that this knowledge would let it simplify other -stuff too. - -//===---------------------------------------------------------------------===// - -For vector types, DataLayout.cpp::getTypeInfo() returns alignment that is equal -to the type size. It works but can be overly conservative as the alignment of -specific vector types are target dependent. - -//===---------------------------------------------------------------------===// - -We should produce an unaligned load from code like this: - -v4sf example(float *P) { - return (v4sf){P[0], P[1], P[2], P[3] }; -} - -//===---------------------------------------------------------------------===// - -Add support for conditional increments, and other related patterns. Instead -of: - - movl 136(%esp), %eax - cmpl $0, %eax - je LBB16_2 #cond_next -LBB16_1: #cond_true - incl _foo -LBB16_2: #cond_next - -emit: - movl _foo, %eax - cmpl $1, %edi - sbbl $-1, %eax - movl %eax, _foo - -//===---------------------------------------------------------------------===// - -Combine: a = sin(x), b = cos(x) into a,b = sincos(x). - -Expand these to calls of sin/cos and stores: - double sincos(double x, double *sin, double *cos); - float sincosf(float x, float *sin, float *cos); - long double sincosl(long double x, long double *sin, long double *cos); - -Doing so could allow SROA of the destination pointers. See also: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17687 - -This is now easily doable with MRVs. We could even make an intrinsic for this -if anyone cared enough about sincos. - -//===---------------------------------------------------------------------===// - -quantum_sigma_x in 462.libquantum contains the following loop: - - for(i=0; i<reg->size; i++) - { - /* Flip the target bit of each basis state */ - reg->node[i].state ^= ((MAX_UNSIGNED) 1 << target); - } - -Where MAX_UNSIGNED/state is a 64-bit int. On a 32-bit platform it would be just -so cool to turn it into something like: - - long long Res = ((MAX_UNSIGNED) 1 << target); - if (target < 32) { - for(i=0; i<reg->size; i++) - reg->node[i].state ^= Res & 0xFFFFFFFFULL; - } else { - for(i=0; i<reg->size; i++) - reg->node[i].state ^= Res & 0xFFFFFFFF00000000ULL - } - -... which would only do one 32-bit XOR per loop iteration instead of two. - -It would also be nice to recognize the reg->size doesn't alias reg->node[i], -but this requires TBAA. - -//===---------------------------------------------------------------------===// - -This isn't recognized as bswap by instcombine (yes, it really is bswap): - -unsigned long reverse(unsigned v) { - unsigned t; - t = v ^ ((v << 16) | (v >> 16)); - t &= ~0xff0000; - v = (v << 24) | (v >> 8); - return v ^ (t >> 8); -} - -//===---------------------------------------------------------------------===// - -[LOOP DELETION] - -We don't delete this output free loop, because trip count analysis doesn't -realize that it is finite (if it were infinite, it would be undefined). Not -having this blocks Loop Idiom from matching strlen and friends. - -void foo(char *C) { - int x = 0; - while (*C) - ++x,++C; -} - -//===---------------------------------------------------------------------===// - -[LOOP RECOGNITION] - -These idioms should be recognized as popcount (see PR1488): - -unsigned countbits_slow(unsigned v) { - unsigned c; - for (c = 0; v; v >>= 1) - c += v & 1; - return c; -} - -unsigned int popcount(unsigned int input) { - unsigned int count = 0; - for (unsigned int i = 0; i < 4 * 8; i++) - count += (input >> i) & i; - return count; -} - -This should be recognized as CLZ: rdar://8459039 - -unsigned clz_a(unsigned a) { - int i; - for (i=0;i<32;i++) - if (a & (1<<(31-i))) - return i; - return 32; -} - -This sort of thing should be added to the loop idiom pass. - -//===---------------------------------------------------------------------===// - -These should turn into single 16-bit (unaligned?) loads on little/big endian -processors. - -unsigned short read_16_le(const unsigned char *adr) { - return adr[0] | (adr[1] << 8); -} -unsigned short read_16_be(const unsigned char *adr) { - return (adr[0] << 8) | adr[1]; -} - -//===---------------------------------------------------------------------===// - --instcombine should handle this transform: - icmp pred (sdiv X / C1 ), C2 -when X, C1, and C2 are unsigned. Similarly for udiv and signed operands. - -Currently InstCombine avoids this transform but will do it when the signs of -the operands and the sign of the divide match. See the FIXME in -InstructionCombining.cpp in the visitSetCondInst method after the switch case -for Instruction::UDiv (around line 4447) for more details. - -The SingleSource/Benchmarks/Shootout-C++/hash and hash2 tests have examples of -this construct. - -//===---------------------------------------------------------------------===// - -[LOOP OPTIMIZATION] - -SingleSource/Benchmarks/Misc/dt.c shows several interesting optimization -opportunities in its double_array_divs_variable function: it needs loop -interchange, memory promotion (which LICM already does), vectorization and -variable trip count loop unrolling (since it has a constant trip count). ICC -apparently produces this very nice code with -ffast-math: - -..B1.70: # Preds ..B1.70 ..B1.69 - mulpd %xmm0, %xmm1 #108.2 - mulpd %xmm0, %xmm1 #108.2 - mulpd %xmm0, %xmm1 #108.2 - mulpd %xmm0, %xmm1 #108.2 - addl $8, %edx # - cmpl $131072, %edx #108.2 - jb ..B1.70 # Prob 99% #108.2 - -It would be better to count down to zero, but this is a lot better than what we -do. - -//===---------------------------------------------------------------------===// - -Consider: - -typedef unsigned U32; -typedef unsigned long long U64; -int test (U32 *inst, U64 *regs) { - U64 effective_addr2; - U32 temp = *inst; - int r1 = (temp >> 20) & 0xf; - int b2 = (temp >> 16) & 0xf; - effective_addr2 = temp & 0xfff; - if (b2) effective_addr2 += regs[b2]; - b2 = (temp >> 12) & 0xf; - if (b2) effective_addr2 += regs[b2]; - effective_addr2 &= regs[4]; - if ((effective_addr2 & 3) == 0) - return 1; - return 0; -} - -Note that only the low 2 bits of effective_addr2 are used. On 32-bit systems, -we don't eliminate the computation of the top half of effective_addr2 because -we don't have whole-function selection dags. On x86, this means we use one -extra register for the function when effective_addr2 is declared as U64 than -when it is declared U32. - -PHI Slicing could be extended to do this. - -//===---------------------------------------------------------------------===// - -Tail call elim should be more aggressive, checking to see if the call is -followed by an uncond branch to an exit block. - -; This testcase is due to tail-duplication not wanting to copy the return -; instruction into the terminating blocks because there was other code -; optimized out of the function after the taildup happened. -; RUN: llvm-as < %s | opt -tailcallelim | llvm-dis | not grep call - -define i32 @t4(i32 %a) { -entry: - %tmp.1 = and i32 %a, 1 ; <i32> [#uses=1] - %tmp.2 = icmp ne i32 %tmp.1, 0 ; <i1> [#uses=1] - br i1 %tmp.2, label %then.0, label %else.0 - -then.0: ; preds = %entry - %tmp.5 = add i32 %a, -1 ; <i32> [#uses=1] - %tmp.3 = call i32 @t4( i32 %tmp.5 ) ; <i32> [#uses=1] - br label %return - -else.0: ; preds = %entry - %tmp.7 = icmp ne i32 %a, 0 ; <i1> [#uses=1] - br i1 %tmp.7, label %then.1, label %return - -then.1: ; preds = %else.0 - %tmp.11 = add i32 %a, -2 ; <i32> [#uses=1] - %tmp.9 = call i32 @t4( i32 %tmp.11 ) ; <i32> [#uses=1] - br label %return - -return: ; preds = %then.1, %else.0, %then.0 - %result.0 = phi i32 [ 0, %else.0 ], [ %tmp.3, %then.0 ], - [ %tmp.9, %then.1 ] - ret i32 %result.0 -} - -//===---------------------------------------------------------------------===// - -Tail recursion elimination should handle: - -int pow2m1(int n) { - if (n == 0) - return 0; - return 2 * pow2m1 (n - 1) + 1; -} - -Also, multiplies can be turned into SHL's, so they should be handled as if -they were associative. "return foo() << 1" can be tail recursion eliminated. - -//===---------------------------------------------------------------------===// - -Argument promotion should promote arguments for recursive functions, like -this: - -; RUN: llvm-as < %s | opt -argpromotion | llvm-dis | grep x.val - -define internal i32 @foo(i32* %x) { -entry: - %tmp = load i32* %x ; <i32> [#uses=0] - %tmp.foo = call i32 @foo( i32* %x ) ; <i32> [#uses=1] - ret i32 %tmp.foo -} - -define i32 @bar(i32* %x) { -entry: - %tmp3 = call i32 @foo( i32* %x ) ; <i32> [#uses=1] - ret i32 %tmp3 -} - -//===---------------------------------------------------------------------===// - -We should investigate an instruction sinking pass. Consider this silly -example in pic mode: - -#include <assert.h> -void foo(int x) { - assert(x); - //... -} - -we compile this to: -_foo: - subl $28, %esp - call "L1$pb" -"L1$pb": - popl %eax - cmpl $0, 32(%esp) - je LBB1_2 # cond_true -LBB1_1: # return - # ... - addl $28, %esp - ret -LBB1_2: # cond_true -... - -The PIC base computation (call+popl) is only used on one path through the -code, but is currently always computed in the entry block. It would be -better to sink the picbase computation down into the block for the -assertion, as it is the only one that uses it. This happens for a lot of -code with early outs. - -Another example is loads of arguments, which are usually emitted into the -entry block on targets like x86. If not used in all paths through a -function, they should be sunk into the ones that do. - -In this case, whole-function-isel would also handle this. - -//===---------------------------------------------------------------------===// - -Investigate lowering of sparse switch statements into perfect hash tables: -http://burtleburtle.net/bob/hash/perfect.html - -//===---------------------------------------------------------------------===// - -We should turn things like "load+fabs+store" and "load+fneg+store" into the -corresponding integer operations. On a yonah, this loop: - -double a[256]; -void foo() { - int i, b; - for (b = 0; b < 10000000; b++) - for (i = 0; i < 256; i++) - a[i] = -a[i]; -} - -is twice as slow as this loop: - -long long a[256]; -void foo() { - int i, b; - for (b = 0; b < 10000000; b++) - for (i = 0; i < 256; i++) - a[i] ^= (1ULL << 63); -} - -and I suspect other processors are similar. On X86 in particular this is a -big win because doing this with integers allows the use of read/modify/write -instructions. - -//===---------------------------------------------------------------------===// - -DAG Combiner should try to combine small loads into larger loads when -profitable. For example, we compile this C++ example: - -struct THotKey { short Key; bool Control; bool Shift; bool Alt; }; -extern THotKey m_HotKey; -THotKey GetHotKey () { return m_HotKey; } - -into (-m64 -O3 -fno-exceptions -static -fomit-frame-pointer): - -__Z9GetHotKeyv: ## @_Z9GetHotKeyv - movq _m_HotKey@GOTPCREL(%rip), %rax - movzwl (%rax), %ecx - movzbl 2(%rax), %edx - shlq $16, %rdx - orq %rcx, %rdx - movzbl 3(%rax), %ecx - shlq $24, %rcx - orq %rdx, %rcx - movzbl 4(%rax), %eax - shlq $32, %rax - orq %rcx, %rax - ret - -//===---------------------------------------------------------------------===// - -We should add an FRINT node to the DAG to model targets that have legal -implementations of ceil/floor/rint. - -//===---------------------------------------------------------------------===// - -Consider: - -int test() { - long long input[8] = {1,0,1,0,1,0,1,0}; - foo(input); -} - -Clang compiles this into: - - call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 16, i1 false) - %0 = getelementptr [8 x i64]* %input, i64 0, i64 0 - store i64 1, i64* %0, align 16 - %1 = getelementptr [8 x i64]* %input, i64 0, i64 2 - store i64 1, i64* %1, align 16 - %2 = getelementptr [8 x i64]* %input, i64 0, i64 4 - store i64 1, i64* %2, align 16 - %3 = getelementptr [8 x i64]* %input, i64 0, i64 6 - store i64 1, i64* %3, align 16 - -Which gets codegen'd into: - - pxor %xmm0, %xmm0 - movaps %xmm0, -16(%rbp) - movaps %xmm0, -32(%rbp) - movaps %xmm0, -48(%rbp) - movaps %xmm0, -64(%rbp) - movq $1, -64(%rbp) - movq $1, -48(%rbp) - movq $1, -32(%rbp) - movq $1, -16(%rbp) - -It would be better to have 4 movq's of 0 instead of the movaps's. - -//===---------------------------------------------------------------------===// - -http://llvm.org/PR717: - -The following code should compile into "ret int undef". Instead, LLVM -produces "ret int 0": - -int f() { - int x = 4; - int y; - if (x == 3) y = 0; - return y; -} - -//===---------------------------------------------------------------------===// - -The loop unroller should partially unroll loops (instead of peeling them) -when code growth isn't too bad and when an unroll count allows simplification -of some code within the loop. One trivial example is: - -#include <stdio.h> -int main() { - int nRet = 17; - int nLoop; - for ( nLoop = 0; nLoop < 1000; nLoop++ ) { - if ( nLoop & 1 ) - nRet += 2; - else - nRet -= 1; - } - return nRet; -} - -Unrolling by 2 would eliminate the '&1' in both copies, leading to a net -reduction in code size. The resultant code would then also be suitable for -exit value computation. - -//===---------------------------------------------------------------------===// - -We miss a bunch of rotate opportunities on various targets, including ppc, x86, -etc. On X86, we miss a bunch of 'rotate by variable' cases because the rotate -matching code in dag combine doesn't look through truncates aggressively -enough. Here are some testcases reduces from GCC PR17886: - -unsigned long long f5(unsigned long long x, unsigned long long y) { - return (x << 8) | ((y >> 48) & 0xffull); -} -unsigned long long f6(unsigned long long x, unsigned long long y, int z) { - switch(z) { - case 1: - return (x << 8) | ((y >> 48) & 0xffull); - case 2: - return (x << 16) | ((y >> 40) & 0xffffull); - case 3: - return (x << 24) | ((y >> 32) & 0xffffffull); - case 4: - return (x << 32) | ((y >> 24) & 0xffffffffull); - default: - return (x << 40) | ((y >> 16) & 0xffffffffffull); - } -} - -//===---------------------------------------------------------------------===// - -This (and similar related idioms): - -unsigned int foo(unsigned char i) { - return i | (i<<8) | (i<<16) | (i<<24); -} - -compiles into: - -define i32 @foo(i8 zeroext %i) nounwind readnone ssp noredzone { -entry: - %conv = zext i8 %i to i32 - %shl = shl i32 %conv, 8 - %shl5 = shl i32 %conv, 16 - %shl9 = shl i32 %conv, 24 - %or = or i32 %shl9, %conv - %or6 = or i32 %or, %shl5 - %or10 = or i32 %or6, %shl - ret i32 %or10 -} - -it would be better as: - -unsigned int bar(unsigned char i) { - unsigned int j=i | (i << 8); - return j | (j<<16); -} - -aka: - -define i32 @bar(i8 zeroext %i) nounwind readnone ssp noredzone { -entry: - %conv = zext i8 %i to i32 - %shl = shl i32 %conv, 8 - %or = or i32 %shl, %conv - %shl5 = shl i32 %or, 16 - %or6 = or i32 %shl5, %or - ret i32 %or6 -} - -or even i*0x01010101, depending on the speed of the multiplier. The best way to -handle this is to canonicalize it to a multiply in IR and have codegen handle -lowering multiplies to shifts on cpus where shifts are faster. - -//===---------------------------------------------------------------------===// - -We do a number of simplifications in simplify libcalls to strength reduce -standard library functions, but we don't currently merge them together. For -example, it is useful to merge memcpy(a,b,strlen(b)) -> strcpy. This can only -be done safely if "b" isn't modified between the strlen and memcpy of course. - -//===---------------------------------------------------------------------===// - -We compile this program: (from GCC PR11680) -http://gcc.gnu.org/bugzilla/attachment.cgi?id=4487 - -Into code that runs the same speed in fast/slow modes, but both modes run 2x -slower than when compile with GCC (either 4.0 or 4.2): - -$ llvm-g++ perf.cpp -O3 -fno-exceptions -$ time ./a.out fast -1.821u 0.003s 0:01.82 100.0% 0+0k 0+0io 0pf+0w - -$ g++ perf.cpp -O3 -fno-exceptions -$ time ./a.out fast -0.821u 0.001s 0:00.82 100.0% 0+0k 0+0io 0pf+0w - -It looks like we are making the same inlining decisions, so this may be raw -codegen badness or something else (haven't investigated). - -//===---------------------------------------------------------------------===// - -Divisibility by constant can be simplified (according to GCC PR12849) from -being a mulhi to being a mul lo (cheaper). Testcase: - -void bar(unsigned n) { - if (n % 3 == 0) - true(); -} - -This is equivalent to the following, where 2863311531 is the multiplicative -inverse of 3, and 1431655766 is ((2^32)-1)/3+1: -void bar(unsigned n) { - if (n * 2863311531U < 1431655766U) - true(); -} - -The same transformation can work with an even modulo with the addition of a -rotate: rotate the result of the multiply to the right by the number of bits -which need to be zero for the condition to be true, and shrink the compare RHS -by the same amount. Unless the target supports rotates, though, that -transformation probably isn't worthwhile. - -The transformation can also easily be made to work with non-zero equality -comparisons: just transform, for example, "n % 3 == 1" to "(n-1) % 3 == 0". - -//===---------------------------------------------------------------------===// - -Better mod/ref analysis for scanf would allow us to eliminate the vtable and a -bunch of other stuff from this example (see PR1604): - -#include <cstdio> -struct test { - int val; - virtual ~test() {} -}; - -int main() { - test t; - std::scanf("%d", &t.val); - std::printf("%d\n", t.val); -} - -//===---------------------------------------------------------------------===// - -These functions perform the same computation, but produce different assembly. - -define i8 @select(i8 %x) readnone nounwind { - %A = icmp ult i8 %x, 250 - %B = select i1 %A, i8 0, i8 1 - ret i8 %B -} - -define i8 @addshr(i8 %x) readnone nounwind { - %A = zext i8 %x to i9 - %B = add i9 %A, 6 ;; 256 - 250 == 6 - %C = lshr i9 %B, 8 - %D = trunc i9 %C to i8 - ret i8 %D -} - -//===---------------------------------------------------------------------===// - -From gcc bug 24696: -int -f (unsigned long a, unsigned long b, unsigned long c) -{ - return ((a & (c - 1)) != 0) || ((b & (c - 1)) != 0); -} -int -f (unsigned long a, unsigned long b, unsigned long c) -{ - return ((a & (c - 1)) != 0) | ((b & (c - 1)) != 0); -} -Both should combine to ((a|b) & (c-1)) != 0. Currently not optimized with -"clang -emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -From GCC Bug 20192: -#define PMD_MASK (~((1UL << 23) - 1)) -void clear_pmd_range(unsigned long start, unsigned long end) -{ - if (!(start & ~PMD_MASK) && !(end & ~PMD_MASK)) - f(); -} -The expression should optimize to something like -"!((start|end)&~PMD_MASK). Currently not optimized with "clang --emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return -i;} -unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;} -These should combine to the same thing. Currently, the first function -produces better code on X86. - -//===---------------------------------------------------------------------===// - -From GCC Bug 15784: -#define abs(x) x>0?x:-x -int f(int x, int y) -{ - return (abs(x)) >= 0; -} -This should optimize to x == INT_MIN. (With -fwrapv.) Currently not -optimized with "clang -emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -From GCC Bug 14753: -void -rotate_cst (unsigned int a) -{ - a = (a << 10) | (a >> 22); - if (a == 123) - bar (); -} -void -minus_cst (unsigned int a) -{ - unsigned int tem; - - tem = 20 - a; - if (tem == 5) - bar (); -} -void -mask_gt (unsigned int a) -{ - /* This is equivalent to a > 15. */ - if ((a & ~7) > 8) - bar (); -} -void -rshift_gt (unsigned int a) -{ - /* This is equivalent to a > 23. */ - if ((a >> 2) > 5) - bar (); -} - -All should simplify to a single comparison. All of these are -currently not optimized with "clang -emit-llvm-bc | opt --O3". - -//===---------------------------------------------------------------------===// - -From GCC Bug 32605: -int c(int* x) {return (char*)x+2 == (char*)x;} -Should combine to 0. Currently not optimized with "clang --emit-llvm-bc | opt -O3" (although llc can optimize it). - -//===---------------------------------------------------------------------===// - -int a(unsigned b) {return ((b << 31) | (b << 30)) >> 31;} -Should be combined to "((b >> 1) | b) & 1". Currently not optimized -with "clang -emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -unsigned a(unsigned x, unsigned y) { return x | (y & 1) | (y & 2);} -Should combine to "x | (y & 3)". Currently not optimized with "clang --emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -int a(int a, int b, int c) {return (~a & c) | ((c|a) & b);} -Should fold to "(~a & c) | (a & b)". Currently not optimized with -"clang -emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -int a(int a,int b) {return (~(a|b))|a;} -Should fold to "a|~b". Currently not optimized with "clang --emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -int a(int a, int b) {return (a&&b) || (a&&!b);} -Should fold to "a". Currently not optimized with "clang -emit-llvm-bc -| opt -O3". - -//===---------------------------------------------------------------------===// - -int a(int a, int b, int c) {return (a&&b) || (!a&&c);} -Should fold to "a ? b : c", or at least something sane. Currently not -optimized with "clang -emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -int a(int a, int b, int c) {return (a&&b) || (a&&c) || (a&&b&&c);} -Should fold to a && (b || c). Currently not optimized with "clang --emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -int a(int x) {return x | ((x & 8) ^ 8);} -Should combine to x | 8. Currently not optimized with "clang --emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -int a(int x) {return x ^ ((x & 8) ^ 8);} -Should also combine to x | 8. Currently not optimized with "clang --emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -int a(int x) {return ((x | -9) ^ 8) & x;} -Should combine to x & -9. Currently not optimized with "clang --emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -unsigned a(unsigned a) {return a * 0x11111111 >> 28 & 1;} -Should combine to "a * 0x88888888 >> 31". Currently not optimized -with "clang -emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -unsigned a(char* x) {if ((*x & 32) == 0) return b();} -There's an unnecessary zext in the generated code with "clang --emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -unsigned a(unsigned long long x) {return 40 * (x >> 1);} -Should combine to "20 * (((unsigned)x) & -2)". Currently not -optimized with "clang -emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -int g(int x) { return (x - 10) < 0; } -Should combine to "x <= 9" (the sub has nsw). Currently not -optimized with "clang -emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -int g(int x) { return (x + 10) < 0; } -Should combine to "x < -10" (the add has nsw). Currently not -optimized with "clang -emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -int f(int i, int j) { return i < j + 1; } -int g(int i, int j) { return j > i - 1; } -Should combine to "i <= j" (the add/sub has nsw). Currently not -optimized with "clang -emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -unsigned f(unsigned x) { return ((x & 7) + 1) & 15; } -The & 15 part should be optimized away, it doesn't change the result. Currently -not optimized with "clang -emit-llvm-bc | opt -O3". - -//===---------------------------------------------------------------------===// - -This was noticed in the entryblock for grokdeclarator in 403.gcc: - - %tmp = icmp eq i32 %decl_context, 4 - %decl_context_addr.0 = select i1 %tmp, i32 3, i32 %decl_context - %tmp1 = icmp eq i32 %decl_context_addr.0, 1 - %decl_context_addr.1 = select i1 %tmp1, i32 0, i32 %decl_context_addr.0 - -tmp1 should be simplified to something like: - (!tmp || decl_context == 1) - -This allows recursive simplifications, tmp1 is used all over the place in -the function, e.g. by: - - %tmp23 = icmp eq i32 %decl_context_addr.1, 0 ; <i1> [#uses=1] - %tmp24 = xor i1 %tmp1, true ; <i1> [#uses=1] - %or.cond8 = and i1 %tmp23, %tmp24 ; <i1> [#uses=1] - -later. - -//===---------------------------------------------------------------------===// - -[STORE SINKING] - -Store sinking: This code: - -void f (int n, int *cond, int *res) { - int i; - *res = 0; - for (i = 0; i < n; i++) - if (*cond) - *res ^= 234; /* (*) */ -} - -On this function GVN hoists the fully redundant value of *res, but nothing -moves the store out. This gives us this code: - -bb: ; preds = %bb2, %entry - %.rle = phi i32 [ 0, %entry ], [ %.rle6, %bb2 ] - %i.05 = phi i32 [ 0, %entry ], [ %indvar.next, %bb2 ] - %1 = load i32* %cond, align 4 - %2 = icmp eq i32 %1, 0 - br i1 %2, label %bb2, label %bb1 - -bb1: ; preds = %bb - %3 = xor i32 %.rle, 234 - store i32 %3, i32* %res, align 4 - br label %bb2 - -bb2: ; preds = %bb, %bb1 - %.rle6 = phi i32 [ %3, %bb1 ], [ %.rle, %bb ] - %indvar.next = add i32 %i.05, 1 - %exitcond = icmp eq i32 %indvar.next, %n - br i1 %exitcond, label %return, label %bb - -DSE should sink partially dead stores to get the store out of the loop. - -Here's another partial dead case: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12395 - -//===---------------------------------------------------------------------===// - -Scalar PRE hoists the mul in the common block up to the else: - -int test (int a, int b, int c, int g) { - int d, e; - if (a) - d = b * c; - else - d = b - c; - e = b * c + g; - return d + e; -} - -It would be better to do the mul once to reduce codesize above the if. -This is GCC PR38204. - - -//===---------------------------------------------------------------------===// -This simple function from 179.art: - -int winner, numf2s; -struct { double y; int reset; } *Y; - -void find_match() { - int i; - winner = 0; - for (i=0;i<numf2s;i++) - if (Y[i].y > Y[winner].y) - winner =i; -} - -Compiles into (with clang TBAA): - -for.body: ; preds = %for.inc, %bb.nph - %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.inc ] - %i.01718 = phi i32 [ 0, %bb.nph ], [ %i.01719, %for.inc ] - %tmp4 = getelementptr inbounds %struct.anon* %tmp3, i64 %indvar, i32 0 - %tmp5 = load double* %tmp4, align 8, !tbaa !4 - %idxprom7 = sext i32 %i.01718 to i64 - %tmp10 = getelementptr inbounds %struct.anon* %tmp3, i64 %idxprom7, i32 0 - %tmp11 = load double* %tmp10, align 8, !tbaa !4 - %cmp12 = fcmp ogt double %tmp5, %tmp11 - br i1 %cmp12, label %if.then, label %for.inc - -if.then: ; preds = %for.body - %i.017 = trunc i64 %indvar to i32 - br label %for.inc - -for.inc: ; preds = %for.body, %if.then - %i.01719 = phi i32 [ %i.01718, %for.body ], [ %i.017, %if.then ] - %indvar.next = add i64 %indvar, 1 - %exitcond = icmp eq i64 %indvar.next, %tmp22 - br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body - - -It is good that we hoisted the reloads of numf2's, and Y out of the loop and -sunk the store to winner out. - -However, this is awful on several levels: the conditional truncate in the loop -(-indvars at fault? why can't we completely promote the IV to i64?). - -Beyond that, we have a partially redundant load in the loop: if "winner" (aka -%i.01718) isn't updated, we reload Y[winner].y the next time through the loop. -Similarly, the addressing that feeds it (including the sext) is redundant. In -the end we get this generated assembly: - -LBB0_2: ## %for.body - ## =>This Inner Loop Header: Depth=1 - movsd (%rdi), %xmm0 - movslq %edx, %r8 - shlq $4, %r8 - ucomisd (%rcx,%r8), %xmm0 - jbe LBB0_4 - movl %esi, %edx -LBB0_4: ## %for.inc - addq $16, %rdi - incq %rsi - cmpq %rsi, %rax - jne LBB0_2 - -All things considered this isn't too bad, but we shouldn't need the movslq or -the shlq instruction, or the load folded into ucomisd every time through the -loop. - -On an x86-specific topic, if the loop can't be restructure, the movl should be a -cmov. - -//===---------------------------------------------------------------------===// - -[STORE SINKING] - -GCC PR37810 is an interesting case where we should sink load/store reload -into the if block and outside the loop, so we don't reload/store it on the -non-call path. - -for () { - *P += 1; - if () - call(); - else - ... --> -tmp = *P -for () { - tmp += 1; - if () { - *P = tmp; - call(); - tmp = *P; - } else ... -} -*P = tmp; - -We now hoist the reload after the call (Transforms/GVN/lpre-call-wrap.ll), but -we don't sink the store. We need partially dead store sinking. - -//===---------------------------------------------------------------------===// - -[LOAD PRE CRIT EDGE SPLITTING] - -GCC PR37166: Sinking of loads prevents SROA'ing the "g" struct on the stack -leading to excess stack traffic. This could be handled by GVN with some crazy -symbolic phi translation. The code we get looks like (g is on the stack): - -bb2: ; preds = %bb1 -.. - %9 = getelementptr %struct.f* %g, i32 0, i32 0 - store i32 %8, i32* %9, align bel %bb3 - -bb3: ; preds = %bb1, %bb2, %bb - %c_addr.0 = phi %struct.f* [ %g, %bb2 ], [ %c, %bb ], [ %c, %bb1 ] - %b_addr.0 = phi %struct.f* [ %b, %bb2 ], [ %g, %bb ], [ %b, %bb1 ] - %10 = getelementptr %struct.f* %c_addr.0, i32 0, i32 0 - %11 = load i32* %10, align 4 - -%11 is partially redundant, an in BB2 it should have the value %8. - -GCC PR33344 and PR35287 are similar cases. - - -//===---------------------------------------------------------------------===// - -[LOAD PRE] - -There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the -GCC testsuite, ones we don't get yet are (checked through loadpre25): - -[CRIT EDGE BREAKING] -predcom-4.c - -[PRE OF READONLY CALL] -loadpre5.c - -[TURN SELECT INTO BRANCH] -loadpre14.c loadpre15.c - -actually a conditional increment: loadpre18.c loadpre19.c - -//===---------------------------------------------------------------------===// - -[LOAD PRE / STORE SINKING / SPEC HACK] - -This is a chunk of code from 456.hmmer: - -int f(int M, int *mc, int *mpp, int *tpmm, int *ip, int *tpim, int *dpp, - int *tpdm, int xmb, int *bp, int *ms) { - int k, sc; - for (k = 1; k <= M; k++) { - mc[k] = mpp[k-1] + tpmm[k-1]; - if ((sc = ip[k-1] + tpim[k-1]) > mc[k]) mc[k] = sc; - if ((sc = dpp[k-1] + tpdm[k-1]) > mc[k]) mc[k] = sc; - if ((sc = xmb + bp[k]) > mc[k]) mc[k] = sc; - mc[k] += ms[k]; - } -} - -It is very profitable for this benchmark to turn the conditional stores to mc[k] -into a conditional move (select instr in IR) and allow the final store to do the -store. See GCC PR27313 for more details. Note that this is valid to xform even -with the new C++ memory model, since mc[k] is previously loaded and later -stored. - -//===---------------------------------------------------------------------===// - -[SCALAR PRE] -There are many PRE testcases in testsuite/gcc.dg/tree-ssa/ssa-pre-*.c in the -GCC testsuite. - -//===---------------------------------------------------------------------===// - -There are some interesting cases in testsuite/gcc.dg/tree-ssa/pred-comm* in the -GCC testsuite. For example, we get the first example in predcom-1.c, but -miss the second one: - -unsigned fib[1000]; -unsigned avg[1000]; - -__attribute__ ((noinline)) -void count_averages(int n) { - int i; - for (i = 1; i < n; i++) - avg[i] = (((unsigned long) fib[i - 1] + fib[i] + fib[i + 1]) / 3) & 0xffff; -} - -which compiles into two loads instead of one in the loop. - -predcom-2.c is the same as predcom-1.c - -predcom-3.c is very similar but needs loads feeding each other instead of -store->load. - - -//===---------------------------------------------------------------------===// - -[ALIAS ANALYSIS] - -Type based alias analysis: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14705 - -We should do better analysis of posix_memalign. At the least it should -no-capture its pointer argument, at best, we should know that the out-value -result doesn't point to anything (like malloc). One example of this is in -SingleSource/Benchmarks/Misc/dt.c - -//===---------------------------------------------------------------------===// - -Interesting missed case because of control flow flattening (should be 2 loads): -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629 -With: llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | - opt -mem2reg -gvn -instcombine | llvm-dis -we miss it because we need 1) CRIT EDGE 2) MULTIPLE DIFFERENT -VALS PRODUCED BY ONE BLOCK OVER DIFFERENT PATHS - -//===---------------------------------------------------------------------===// - -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19633 -We could eliminate the branch condition here, loading from null is undefined: - -struct S { int w, x, y, z; }; -struct T { int r; struct S s; }; -void bar (struct S, int); -void foo (int a, struct T b) -{ - struct S *c = 0; - if (a) - c = &b.s; - bar (*c, a); -} - -//===---------------------------------------------------------------------===// - -simplifylibcalls should do several optimizations for strspn/strcspn: - -strcspn(x, "a") -> inlined loop for up to 3 letters (similarly for strspn): - -size_t __strcspn_c3 (__const char *__s, int __reject1, int __reject2, - int __reject3) { - register size_t __result = 0; - while (__s[__result] != '\0' && __s[__result] != __reject1 && - __s[__result] != __reject2 && __s[__result] != __reject3) - ++__result; - return __result; -} - -This should turn into a switch on the character. See PR3253 for some notes on -codegen. - -456.hmmer apparently uses strcspn and strspn a lot. 471.omnetpp uses strspn. - -//===---------------------------------------------------------------------===// - -simplifylibcalls should turn these snprintf idioms into memcpy (GCC PR47917) - -char buf1[6], buf2[6], buf3[4], buf4[4]; -int i; - -int foo (void) { - int ret = snprintf (buf1, sizeof buf1, "abcde"); - ret += snprintf (buf2, sizeof buf2, "abcdef") * 16; - ret += snprintf (buf3, sizeof buf3, "%s", i++ < 6 ? "abc" : "def") * 256; - ret += snprintf (buf4, sizeof buf4, "%s", i++ > 10 ? "abcde" : "defgh")*4096; - return ret; -} - -//===---------------------------------------------------------------------===// - -"gas" uses this idiom: - else if (strchr ("+-/*%|&^:[]()~", *intel_parser.op_string)) -.. - else if (strchr ("<>", *intel_parser.op_string) - -Those should be turned into a switch. SimplifyLibCalls only gets the second -case. - -//===---------------------------------------------------------------------===// - -252.eon contains this interesting code: - - %3072 = getelementptr [100 x i8]* %tempString, i32 0, i32 0 - %3073 = call i8* @strcpy(i8* %3072, i8* %3071) nounwind - %strlen = call i32 @strlen(i8* %3072) ; uses = 1 - %endptr = getelementptr [100 x i8]* %tempString, i32 0, i32 %strlen - call void @llvm.memcpy.i32(i8* %endptr, - i8* getelementptr ([5 x i8]* @"\01LC42", i32 0, i32 0), i32 5, i32 1) - %3074 = call i32 @strlen(i8* %endptr) nounwind readonly - -This is interesting for a couple reasons. First, in this: - -The memcpy+strlen strlen can be replaced with: - - %3074 = call i32 @strlen([5 x i8]* @"\01LC42") nounwind readonly - -Because the destination was just copied into the specified memory buffer. This, -in turn, can be constant folded to "4". - -In other code, it contains: - - %endptr6978 = bitcast i8* %endptr69 to i32* - store i32 7107374, i32* %endptr6978, align 1 - %3167 = call i32 @strlen(i8* %endptr69) nounwind readonly - -Which could also be constant folded. Whatever is producing this should probably -be fixed to leave this as a memcpy from a string. - -Further, eon also has an interesting partially redundant strlen call: - -bb8: ; preds = %_ZN18eonImageCalculatorC1Ev.exit - %682 = getelementptr i8** %argv, i32 6 ; <i8**> [#uses=2] - %683 = load i8** %682, align 4 ; <i8*> [#uses=4] - %684 = load i8* %683, align 1 ; <i8> [#uses=1] - %685 = icmp eq i8 %684, 0 ; <i1> [#uses=1] - br i1 %685, label %bb10, label %bb9 - -bb9: ; preds = %bb8 - %686 = call i32 @strlen(i8* %683) nounwind readonly - %687 = icmp ugt i32 %686, 254 ; <i1> [#uses=1] - br i1 %687, label %bb10, label %bb11 - -bb10: ; preds = %bb9, %bb8 - %688 = call i32 @strlen(i8* %683) nounwind readonly - -This could be eliminated by doing the strlen once in bb8, saving code size and -improving perf on the bb8->9->10 path. - -//===---------------------------------------------------------------------===// - -I see an interesting fully redundant call to strlen left in 186.crafty:InputMove -which looks like: - %movetext11 = getelementptr [128 x i8]* %movetext, i32 0, i32 0 - - -bb62: ; preds = %bb55, %bb53 - %promote.0 = phi i32 [ %169, %bb55 ], [ 0, %bb53 ] - %171 = call i32 @strlen(i8* %movetext11) nounwind readonly align 1 - %172 = add i32 %171, -1 ; <i32> [#uses=1] - %173 = getelementptr [128 x i8]* %movetext, i32 0, i32 %172 - -... no stores ... - br i1 %or.cond, label %bb65, label %bb72 - -bb65: ; preds = %bb62 - store i8 0, i8* %173, align 1 - br label %bb72 - -bb72: ; preds = %bb65, %bb62 - %trank.1 = phi i32 [ %176, %bb65 ], [ -1, %bb62 ] - %177 = call i32 @strlen(i8* %movetext11) nounwind readonly align 1 - -Note that on the bb62->bb72 path, that the %177 strlen call is partially -redundant with the %171 call. At worst, we could shove the %177 strlen call -up into the bb65 block moving it out of the bb62->bb72 path. However, note -that bb65 stores to the string, zeroing out the last byte. This means that on -that path the value of %177 is actually just %171-1. A sub is cheaper than a -strlen! - -This pattern repeats several times, basically doing: - - A = strlen(P); - P[A-1] = 0; - B = strlen(P); - where it is "obvious" that B = A-1. - -//===---------------------------------------------------------------------===// - -186.crafty has this interesting pattern with the "out.4543" variable: - -call void @llvm.memcpy.i32( - i8* getelementptr ([10 x i8]* @out.4543, i32 0, i32 0), - i8* getelementptr ([7 x i8]* @"\01LC28700", i32 0, i32 0), i32 7, i32 1) -%101 = call@printf(i8* ... @out.4543, i32 0, i32 0)) nounwind - -It is basically doing: - - memcpy(globalarray, "string"); - printf(..., globalarray); +Target Independent Opportunities: + +//===---------------------------------------------------------------------===// + +We should recognized various "overflow detection" idioms and translate them into +llvm.uadd.with.overflow and similar intrinsics. Here is a multiply idiom: + +unsigned int mul(unsigned int a,unsigned int b) { + if ((unsigned long long)a*b>0xffffffff) + exit(0); + return a*b; +} + +The legalization code for mul-with-overflow needs to be made more robust before +this can be implemented though. + +//===---------------------------------------------------------------------===// + +Get the C front-end to expand hypot(x,y) -> llvm.sqrt(x*x+y*y) when errno and +precision don't matter (ffastmath). Misc/mandel will like this. :) This isn't +safe in general, even on darwin. See the libm implementation of hypot for +examples (which special case when x/y are exactly zero to get signed zeros etc +right). + +//===---------------------------------------------------------------------===// + +On targets with expensive 64-bit multiply, we could LSR this: + +for (i = ...; ++i) { + x = 1ULL << i; + +into: + long long tmp = 1; + for (i = ...; ++i, tmp+=tmp) + x = tmp; + +This would be a win on ppc32, but not x86 or ppc64. + +//===---------------------------------------------------------------------===// + +Shrink: (setlt (loadi32 P), 0) -> (setlt (loadi8 Phi), 0) + +//===---------------------------------------------------------------------===// + +Reassociate should turn things like: + +int factorial(int X) { + return X*X*X*X*X*X*X*X; +} + +into llvm.powi calls, allowing the code generator to produce balanced +multiplication trees. + +First, the intrinsic needs to be extended to support integers, and second the +code generator needs to be enhanced to lower these to multiplication trees. + +//===---------------------------------------------------------------------===// + +Interesting? testcase for add/shift/mul reassoc: + +int bar(int x, int y) { + return x*x*x+y+x*x*x*x*x*y*y*y*y; +} +int foo(int z, int n) { + return bar(z, n) + bar(2*z, 2*n); +} + +This is blocked on not handling X*X*X -> powi(X, 3) (see note above). The issue +is that we end up getting t = 2*X s = t*t and don't turn this into 4*X*X, +which is the same number of multiplies and is canonical, because the 2*X has +multiple uses. Here's a simple example: + +define i32 @test15(i32 %X1) { + %B = mul i32 %X1, 47 ; X1*47 + %C = mul i32 %B, %B + ret i32 %C +} + + +//===---------------------------------------------------------------------===// + +Reassociate should handle the example in GCC PR16157: + +extern int a0, a1, a2, a3, a4; extern int b0, b1, b2, b3, b4; +void f () { /* this can be optimized to four additions... */ + b4 = a4 + a3 + a2 + a1 + a0; + b3 = a3 + a2 + a1 + a0; + b2 = a2 + a1 + a0; + b1 = a1 + a0; +} + +This requires reassociating to forms of expressions that are already available, +something that reassoc doesn't think about yet. + + +//===---------------------------------------------------------------------===// + +These two functions should generate the same code on big-endian systems: + +int g(int *j,int *l) { return memcmp(j,l,4); } +int h(int *j, int *l) { return *j - *l; } + +this could be done in SelectionDAGISel.cpp, along with other special cases, +for 1,2,4,8 bytes. + +//===---------------------------------------------------------------------===// + +It would be nice to revert this patch: +http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20060213/031986.html + +And teach the dag combiner enough to simplify the code expanded before +legalize. It seems plausible that this knowledge would let it simplify other +stuff too. + +//===---------------------------------------------------------------------===// + +For vector types, DataLayout.cpp::getTypeInfo() returns alignment that is equal +to the type size. It works but can be overly conservative as the alignment of +specific vector types are target dependent. + +//===---------------------------------------------------------------------===// + +We should produce an unaligned load from code like this: + +v4sf example(float *P) { + return (v4sf){P[0], P[1], P[2], P[3] }; +} + +//===---------------------------------------------------------------------===// + +Add support for conditional increments, and other related patterns. Instead +of: + + movl 136(%esp), %eax + cmpl $0, %eax + je LBB16_2 #cond_next +LBB16_1: #cond_true + incl _foo +LBB16_2: #cond_next + +emit: + movl _foo, %eax + cmpl $1, %edi + sbbl $-1, %eax + movl %eax, _foo + +//===---------------------------------------------------------------------===// + +Combine: a = sin(x), b = cos(x) into a,b = sincos(x). + +Expand these to calls of sin/cos and stores: + double sincos(double x, double *sin, double *cos); + float sincosf(float x, float *sin, float *cos); + long double sincosl(long double x, long double *sin, long double *cos); + +Doing so could allow SROA of the destination pointers. See also: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17687 + +This is now easily doable with MRVs. We could even make an intrinsic for this +if anyone cared enough about sincos. + +//===---------------------------------------------------------------------===// + +quantum_sigma_x in 462.libquantum contains the following loop: + + for(i=0; i<reg->size; i++) + { + /* Flip the target bit of each basis state */ + reg->node[i].state ^= ((MAX_UNSIGNED) 1 << target); + } + +Where MAX_UNSIGNED/state is a 64-bit int. On a 32-bit platform it would be just +so cool to turn it into something like: + + long long Res = ((MAX_UNSIGNED) 1 << target); + if (target < 32) { + for(i=0; i<reg->size; i++) + reg->node[i].state ^= Res & 0xFFFFFFFFULL; + } else { + for(i=0; i<reg->size; i++) + reg->node[i].state ^= Res & 0xFFFFFFFF00000000ULL + } -Anyway, by knowing that printf just reads the memory and forward substituting -the string directly into the printf, this eliminates reads from globalarray. -Since this pattern occurs frequently in crafty (due to the "DisplayTime" and -other similar functions) there are many stores to "out". Once all the printfs -stop using "out", all that is left is the memcpy's into it. This should allow -globalopt to remove the "stored only" global. - -//===---------------------------------------------------------------------===// - -This code: - -define inreg i32 @foo(i8* inreg %p) nounwind { - %tmp0 = load i8* %p - %tmp1 = ashr i8 %tmp0, 5 - %tmp2 = sext i8 %tmp1 to i32 - ret i32 %tmp2 -} - -could be dagcombine'd to a sign-extending load with a shift. -For example, on x86 this currently gets this: - - movb (%eax), %al - sarb $5, %al - movsbl %al, %eax - -while it could get this: - - movsbl (%eax), %eax - sarl $5, %eax - -//===---------------------------------------------------------------------===// - -GCC PR31029: - -int test(int x) { return 1-x == x; } // --> return false -int test2(int x) { return 2-x == x; } // --> return x == 1 ? - -Always foldable for odd constants, what is the rule for even? - -//===---------------------------------------------------------------------===// - -PR 3381: GEP to field of size 0 inside a struct could be turned into GEP -for next field in struct (which is at same address). - -For example: store of float into { {{}}, float } could be turned into a store to -the float directly. - -//===---------------------------------------------------------------------===// - -The arg promotion pass should make use of nocapture to make its alias analysis -stuff much more precise. - -//===---------------------------------------------------------------------===// - -The following functions should be optimized to use a select instead of a -branch (from gcc PR40072): - -char char_int(int m) {if(m>7) return 0; return m;} -int int_char(char m) {if(m>7) return 0; return m;} - -//===---------------------------------------------------------------------===// - -int func(int a, int b) { if (a & 0x80) b |= 0x80; else b &= ~0x80; return b; } - -Generates this: - -define i32 @func(i32 %a, i32 %b) nounwind readnone ssp { -entry: - %0 = and i32 %a, 128 ; <i32> [#uses=1] - %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1] - %2 = or i32 %b, 128 ; <i32> [#uses=1] - %3 = and i32 %b, -129 ; <i32> [#uses=1] - %b_addr.0 = select i1 %1, i32 %3, i32 %2 ; <i32> [#uses=1] - ret i32 %b_addr.0 -} - -However, it's functionally equivalent to: - - b = (b & ~0x80) | (a & 0x80); - -Which generates this: - -define i32 @func(i32 %a, i32 %b) nounwind readnone ssp { -entry: - %0 = and i32 %b, -129 ; <i32> [#uses=1] - %1 = and i32 %a, 128 ; <i32> [#uses=1] - %2 = or i32 %0, %1 ; <i32> [#uses=1] - ret i32 %2 -} - -This can be generalized for other forms: - - b = (b & ~0x80) | (a & 0x40) << 1; - -//===---------------------------------------------------------------------===// - -These two functions produce different code. They shouldn't: - -#include <stdint.h> - -uint8_t p1(uint8_t b, uint8_t a) { - b = (b & ~0xc0) | (a & 0xc0); - return (b); -} +... which would only do one 32-bit XOR per loop iteration instead of two. + +It would also be nice to recognize the reg->size doesn't alias reg->node[i], +but this requires TBAA. + +//===---------------------------------------------------------------------===// + +This isn't recognized as bswap by instcombine (yes, it really is bswap): + +unsigned long reverse(unsigned v) { + unsigned t; + t = v ^ ((v << 16) | (v >> 16)); + t &= ~0xff0000; + v = (v << 24) | (v >> 8); + return v ^ (t >> 8); +} + +//===---------------------------------------------------------------------===// + +[LOOP DELETION] + +We don't delete this output free loop, because trip count analysis doesn't +realize that it is finite (if it were infinite, it would be undefined). Not +having this blocks Loop Idiom from matching strlen and friends. + +void foo(char *C) { + int x = 0; + while (*C) + ++x,++C; +} + +//===---------------------------------------------------------------------===// + +[LOOP RECOGNITION] + +These idioms should be recognized as popcount (see PR1488): + +unsigned countbits_slow(unsigned v) { + unsigned c; + for (c = 0; v; v >>= 1) + c += v & 1; + return c; +} + +unsigned int popcount(unsigned int input) { + unsigned int count = 0; + for (unsigned int i = 0; i < 4 * 8; i++) + count += (input >> i) & i; + return count; +} + +This should be recognized as CLZ: rdar://8459039 + +unsigned clz_a(unsigned a) { + int i; + for (i=0;i<32;i++) + if (a & (1<<(31-i))) + return i; + return 32; +} + +This sort of thing should be added to the loop idiom pass. + +//===---------------------------------------------------------------------===// + +These should turn into single 16-bit (unaligned?) loads on little/big endian +processors. + +unsigned short read_16_le(const unsigned char *adr) { + return adr[0] | (adr[1] << 8); +} +unsigned short read_16_be(const unsigned char *adr) { + return (adr[0] << 8) | adr[1]; +} + +//===---------------------------------------------------------------------===// + +-instcombine should handle this transform: + icmp pred (sdiv X / C1 ), C2 +when X, C1, and C2 are unsigned. Similarly for udiv and signed operands. + +Currently InstCombine avoids this transform but will do it when the signs of +the operands and the sign of the divide match. See the FIXME in +InstructionCombining.cpp in the visitSetCondInst method after the switch case +for Instruction::UDiv (around line 4447) for more details. + +The SingleSource/Benchmarks/Shootout-C++/hash and hash2 tests have examples of +this construct. + +//===---------------------------------------------------------------------===// + +[LOOP OPTIMIZATION] + +SingleSource/Benchmarks/Misc/dt.c shows several interesting optimization +opportunities in its double_array_divs_variable function: it needs loop +interchange, memory promotion (which LICM already does), vectorization and +variable trip count loop unrolling (since it has a constant trip count). ICC +apparently produces this very nice code with -ffast-math: + +..B1.70: # Preds ..B1.70 ..B1.69 + mulpd %xmm0, %xmm1 #108.2 + mulpd %xmm0, %xmm1 #108.2 + mulpd %xmm0, %xmm1 #108.2 + mulpd %xmm0, %xmm1 #108.2 + addl $8, %edx # + cmpl $131072, %edx #108.2 + jb ..B1.70 # Prob 99% #108.2 + +It would be better to count down to zero, but this is a lot better than what we +do. + +//===---------------------------------------------------------------------===// + +Consider: + +typedef unsigned U32; +typedef unsigned long long U64; +int test (U32 *inst, U64 *regs) { + U64 effective_addr2; + U32 temp = *inst; + int r1 = (temp >> 20) & 0xf; + int b2 = (temp >> 16) & 0xf; + effective_addr2 = temp & 0xfff; + if (b2) effective_addr2 += regs[b2]; + b2 = (temp >> 12) & 0xf; + if (b2) effective_addr2 += regs[b2]; + effective_addr2 &= regs[4]; + if ((effective_addr2 & 3) == 0) + return 1; + return 0; +} + +Note that only the low 2 bits of effective_addr2 are used. On 32-bit systems, +we don't eliminate the computation of the top half of effective_addr2 because +we don't have whole-function selection dags. On x86, this means we use one +extra register for the function when effective_addr2 is declared as U64 than +when it is declared U32. + +PHI Slicing could be extended to do this. + +//===---------------------------------------------------------------------===// + +Tail call elim should be more aggressive, checking to see if the call is +followed by an uncond branch to an exit block. + +; This testcase is due to tail-duplication not wanting to copy the return +; instruction into the terminating blocks because there was other code +; optimized out of the function after the taildup happened. +; RUN: llvm-as < %s | opt -tailcallelim | llvm-dis | not grep call + +define i32 @t4(i32 %a) { +entry: + %tmp.1 = and i32 %a, 1 ; <i32> [#uses=1] + %tmp.2 = icmp ne i32 %tmp.1, 0 ; <i1> [#uses=1] + br i1 %tmp.2, label %then.0, label %else.0 + +then.0: ; preds = %entry + %tmp.5 = add i32 %a, -1 ; <i32> [#uses=1] + %tmp.3 = call i32 @t4( i32 %tmp.5 ) ; <i32> [#uses=1] + br label %return + +else.0: ; preds = %entry + %tmp.7 = icmp ne i32 %a, 0 ; <i1> [#uses=1] + br i1 %tmp.7, label %then.1, label %return + +then.1: ; preds = %else.0 + %tmp.11 = add i32 %a, -2 ; <i32> [#uses=1] + %tmp.9 = call i32 @t4( i32 %tmp.11 ) ; <i32> [#uses=1] + br label %return + +return: ; preds = %then.1, %else.0, %then.0 + %result.0 = phi i32 [ 0, %else.0 ], [ %tmp.3, %then.0 ], + [ %tmp.9, %then.1 ] + ret i32 %result.0 +} + +//===---------------------------------------------------------------------===// + +Tail recursion elimination should handle: + +int pow2m1(int n) { + if (n == 0) + return 0; + return 2 * pow2m1 (n - 1) + 1; +} + +Also, multiplies can be turned into SHL's, so they should be handled as if +they were associative. "return foo() << 1" can be tail recursion eliminated. + +//===---------------------------------------------------------------------===// + +Argument promotion should promote arguments for recursive functions, like +this: + +; RUN: llvm-as < %s | opt -argpromotion | llvm-dis | grep x.val + +define internal i32 @foo(i32* %x) { +entry: + %tmp = load i32* %x ; <i32> [#uses=0] + %tmp.foo = call i32 @foo( i32* %x ) ; <i32> [#uses=1] + ret i32 %tmp.foo +} + +define i32 @bar(i32* %x) { +entry: + %tmp3 = call i32 @foo( i32* %x ) ; <i32> [#uses=1] + ret i32 %tmp3 +} + +//===---------------------------------------------------------------------===// + +We should investigate an instruction sinking pass. Consider this silly +example in pic mode: + +#include <assert.h> +void foo(int x) { + assert(x); + //... +} + +we compile this to: +_foo: + subl $28, %esp + call "L1$pb" +"L1$pb": + popl %eax + cmpl $0, 32(%esp) + je LBB1_2 # cond_true +LBB1_1: # return + # ... + addl $28, %esp + ret +LBB1_2: # cond_true +... + +The PIC base computation (call+popl) is only used on one path through the +code, but is currently always computed in the entry block. It would be +better to sink the picbase computation down into the block for the +assertion, as it is the only one that uses it. This happens for a lot of +code with early outs. + +Another example is loads of arguments, which are usually emitted into the +entry block on targets like x86. If not used in all paths through a +function, they should be sunk into the ones that do. + +In this case, whole-function-isel would also handle this. + +//===---------------------------------------------------------------------===// + +Investigate lowering of sparse switch statements into perfect hash tables: +http://burtleburtle.net/bob/hash/perfect.html + +//===---------------------------------------------------------------------===// + +We should turn things like "load+fabs+store" and "load+fneg+store" into the +corresponding integer operations. On a yonah, this loop: + +double a[256]; +void foo() { + int i, b; + for (b = 0; b < 10000000; b++) + for (i = 0; i < 256; i++) + a[i] = -a[i]; +} + +is twice as slow as this loop: + +long long a[256]; +void foo() { + int i, b; + for (b = 0; b < 10000000; b++) + for (i = 0; i < 256; i++) + a[i] ^= (1ULL << 63); +} + +and I suspect other processors are similar. On X86 in particular this is a +big win because doing this with integers allows the use of read/modify/write +instructions. + +//===---------------------------------------------------------------------===// + +DAG Combiner should try to combine small loads into larger loads when +profitable. For example, we compile this C++ example: + +struct THotKey { short Key; bool Control; bool Shift; bool Alt; }; +extern THotKey m_HotKey; +THotKey GetHotKey () { return m_HotKey; } + +into (-m64 -O3 -fno-exceptions -static -fomit-frame-pointer): + +__Z9GetHotKeyv: ## @_Z9GetHotKeyv + movq _m_HotKey@GOTPCREL(%rip), %rax + movzwl (%rax), %ecx + movzbl 2(%rax), %edx + shlq $16, %rdx + orq %rcx, %rdx + movzbl 3(%rax), %ecx + shlq $24, %rcx + orq %rdx, %rcx + movzbl 4(%rax), %eax + shlq $32, %rax + orq %rcx, %rax + ret + +//===---------------------------------------------------------------------===// + +We should add an FRINT node to the DAG to model targets that have legal +implementations of ceil/floor/rint. + +//===---------------------------------------------------------------------===// + +Consider: + +int test() { + long long input[8] = {1,0,1,0,1,0,1,0}; + foo(input); +} + +Clang compiles this into: + + call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 16, i1 false) + %0 = getelementptr [8 x i64]* %input, i64 0, i64 0 + store i64 1, i64* %0, align 16 + %1 = getelementptr [8 x i64]* %input, i64 0, i64 2 + store i64 1, i64* %1, align 16 + %2 = getelementptr [8 x i64]* %input, i64 0, i64 4 + store i64 1, i64* %2, align 16 + %3 = getelementptr [8 x i64]* %input, i64 0, i64 6 + store i64 1, i64* %3, align 16 + +Which gets codegen'd into: + + pxor %xmm0, %xmm0 + movaps %xmm0, -16(%rbp) + movaps %xmm0, -32(%rbp) + movaps %xmm0, -48(%rbp) + movaps %xmm0, -64(%rbp) + movq $1, -64(%rbp) + movq $1, -48(%rbp) + movq $1, -32(%rbp) + movq $1, -16(%rbp) + +It would be better to have 4 movq's of 0 instead of the movaps's. + +//===---------------------------------------------------------------------===// + +http://llvm.org/PR717: + +The following code should compile into "ret int undef". Instead, LLVM +produces "ret int 0": + +int f() { + int x = 4; + int y; + if (x == 3) y = 0; + return y; +} + +//===---------------------------------------------------------------------===// + +The loop unroller should partially unroll loops (instead of peeling them) +when code growth isn't too bad and when an unroll count allows simplification +of some code within the loop. One trivial example is: + +#include <stdio.h> +int main() { + int nRet = 17; + int nLoop; + for ( nLoop = 0; nLoop < 1000; nLoop++ ) { + if ( nLoop & 1 ) + nRet += 2; + else + nRet -= 1; + } + return nRet; +} + +Unrolling by 2 would eliminate the '&1' in both copies, leading to a net +reduction in code size. The resultant code would then also be suitable for +exit value computation. + +//===---------------------------------------------------------------------===// + +We miss a bunch of rotate opportunities on various targets, including ppc, x86, +etc. On X86, we miss a bunch of 'rotate by variable' cases because the rotate +matching code in dag combine doesn't look through truncates aggressively +enough. Here are some testcases reduces from GCC PR17886: + +unsigned long long f5(unsigned long long x, unsigned long long y) { + return (x << 8) | ((y >> 48) & 0xffull); +} +unsigned long long f6(unsigned long long x, unsigned long long y, int z) { + switch(z) { + case 1: + return (x << 8) | ((y >> 48) & 0xffull); + case 2: + return (x << 16) | ((y >> 40) & 0xffffull); + case 3: + return (x << 24) | ((y >> 32) & 0xffffffull); + case 4: + return (x << 32) | ((y >> 24) & 0xffffffffull); + default: + return (x << 40) | ((y >> 16) & 0xffffffffffull); + } +} + +//===---------------------------------------------------------------------===// + +This (and similar related idioms): + +unsigned int foo(unsigned char i) { + return i | (i<<8) | (i<<16) | (i<<24); +} + +compiles into: + +define i32 @foo(i8 zeroext %i) nounwind readnone ssp noredzone { +entry: + %conv = zext i8 %i to i32 + %shl = shl i32 %conv, 8 + %shl5 = shl i32 %conv, 16 + %shl9 = shl i32 %conv, 24 + %or = or i32 %shl9, %conv + %or6 = or i32 %or, %shl5 + %or10 = or i32 %or6, %shl + ret i32 %or10 +} + +it would be better as: + +unsigned int bar(unsigned char i) { + unsigned int j=i | (i << 8); + return j | (j<<16); +} + +aka: + +define i32 @bar(i8 zeroext %i) nounwind readnone ssp noredzone { +entry: + %conv = zext i8 %i to i32 + %shl = shl i32 %conv, 8 + %or = or i32 %shl, %conv + %shl5 = shl i32 %or, 16 + %or6 = or i32 %shl5, %or + ret i32 %or6 +} + +or even i*0x01010101, depending on the speed of the multiplier. The best way to +handle this is to canonicalize it to a multiply in IR and have codegen handle +lowering multiplies to shifts on cpus where shifts are faster. + +//===---------------------------------------------------------------------===// + +We do a number of simplifications in simplify libcalls to strength reduce +standard library functions, but we don't currently merge them together. For +example, it is useful to merge memcpy(a,b,strlen(b)) -> strcpy. This can only +be done safely if "b" isn't modified between the strlen and memcpy of course. + +//===---------------------------------------------------------------------===// + +We compile this program: (from GCC PR11680) +http://gcc.gnu.org/bugzilla/attachment.cgi?id=4487 + +Into code that runs the same speed in fast/slow modes, but both modes run 2x +slower than when compile with GCC (either 4.0 or 4.2): + +$ llvm-g++ perf.cpp -O3 -fno-exceptions +$ time ./a.out fast +1.821u 0.003s 0:01.82 100.0% 0+0k 0+0io 0pf+0w + +$ g++ perf.cpp -O3 -fno-exceptions +$ time ./a.out fast +0.821u 0.001s 0:00.82 100.0% 0+0k 0+0io 0pf+0w + +It looks like we are making the same inlining decisions, so this may be raw +codegen badness or something else (haven't investigated). + +//===---------------------------------------------------------------------===// + +Divisibility by constant can be simplified (according to GCC PR12849) from +being a mulhi to being a mul lo (cheaper). Testcase: + +void bar(unsigned n) { + if (n % 3 == 0) + true(); +} + +This is equivalent to the following, where 2863311531 is the multiplicative +inverse of 3, and 1431655766 is ((2^32)-1)/3+1: +void bar(unsigned n) { + if (n * 2863311531U < 1431655766U) + true(); +} + +The same transformation can work with an even modulo with the addition of a +rotate: rotate the result of the multiply to the right by the number of bits +which need to be zero for the condition to be true, and shrink the compare RHS +by the same amount. Unless the target supports rotates, though, that +transformation probably isn't worthwhile. + +The transformation can also easily be made to work with non-zero equality +comparisons: just transform, for example, "n % 3 == 1" to "(n-1) % 3 == 0". + +//===---------------------------------------------------------------------===// + +Better mod/ref analysis for scanf would allow us to eliminate the vtable and a +bunch of other stuff from this example (see PR1604): + +#include <cstdio> +struct test { + int val; + virtual ~test() {} +}; + +int main() { + test t; + std::scanf("%d", &t.val); + std::printf("%d\n", t.val); +} + +//===---------------------------------------------------------------------===// + +These functions perform the same computation, but produce different assembly. + +define i8 @select(i8 %x) readnone nounwind { + %A = icmp ult i8 %x, 250 + %B = select i1 %A, i8 0, i8 1 + ret i8 %B +} + +define i8 @addshr(i8 %x) readnone nounwind { + %A = zext i8 %x to i9 + %B = add i9 %A, 6 ;; 256 - 250 == 6 + %C = lshr i9 %B, 8 + %D = trunc i9 %C to i8 + ret i8 %D +} + +//===---------------------------------------------------------------------===// + +From gcc bug 24696: +int +f (unsigned long a, unsigned long b, unsigned long c) +{ + return ((a & (c - 1)) != 0) || ((b & (c - 1)) != 0); +} +int +f (unsigned long a, unsigned long b, unsigned long c) +{ + return ((a & (c - 1)) != 0) | ((b & (c - 1)) != 0); +} +Both should combine to ((a|b) & (c-1)) != 0. Currently not optimized with +"clang -emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +From GCC Bug 20192: +#define PMD_MASK (~((1UL << 23) - 1)) +void clear_pmd_range(unsigned long start, unsigned long end) +{ + if (!(start & ~PMD_MASK) && !(end & ~PMD_MASK)) + f(); +} +The expression should optimize to something like +"!((start|end)&~PMD_MASK). Currently not optimized with "clang +-emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return +i;} +unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;} +These should combine to the same thing. Currently, the first function +produces better code on X86. + +//===---------------------------------------------------------------------===// + +From GCC Bug 15784: +#define abs(x) x>0?x:-x +int f(int x, int y) +{ + return (abs(x)) >= 0; +} +This should optimize to x == INT_MIN. (With -fwrapv.) Currently not +optimized with "clang -emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +From GCC Bug 14753: +void +rotate_cst (unsigned int a) +{ + a = (a << 10) | (a >> 22); + if (a == 123) + bar (); +} +void +minus_cst (unsigned int a) +{ + unsigned int tem; + + tem = 20 - a; + if (tem == 5) + bar (); +} +void +mask_gt (unsigned int a) +{ + /* This is equivalent to a > 15. */ + if ((a & ~7) > 8) + bar (); +} +void +rshift_gt (unsigned int a) +{ + /* This is equivalent to a > 23. */ + if ((a >> 2) > 5) + bar (); +} + +All should simplify to a single comparison. All of these are +currently not optimized with "clang -emit-llvm-bc | opt +-O3". + +//===---------------------------------------------------------------------===// + +From GCC Bug 32605: +int c(int* x) {return (char*)x+2 == (char*)x;} +Should combine to 0. Currently not optimized with "clang +-emit-llvm-bc | opt -O3" (although llc can optimize it). + +//===---------------------------------------------------------------------===// + +int a(unsigned b) {return ((b << 31) | (b << 30)) >> 31;} +Should be combined to "((b >> 1) | b) & 1". Currently not optimized +with "clang -emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +unsigned a(unsigned x, unsigned y) { return x | (y & 1) | (y & 2);} +Should combine to "x | (y & 3)". Currently not optimized with "clang +-emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +int a(int a, int b, int c) {return (~a & c) | ((c|a) & b);} +Should fold to "(~a & c) | (a & b)". Currently not optimized with +"clang -emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +int a(int a,int b) {return (~(a|b))|a;} +Should fold to "a|~b". Currently not optimized with "clang +-emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +int a(int a, int b) {return (a&&b) || (a&&!b);} +Should fold to "a". Currently not optimized with "clang -emit-llvm-bc +| opt -O3". + +//===---------------------------------------------------------------------===// + +int a(int a, int b, int c) {return (a&&b) || (!a&&c);} +Should fold to "a ? b : c", or at least something sane. Currently not +optimized with "clang -emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +int a(int a, int b, int c) {return (a&&b) || (a&&c) || (a&&b&&c);} +Should fold to a && (b || c). Currently not optimized with "clang +-emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +int a(int x) {return x | ((x & 8) ^ 8);} +Should combine to x | 8. Currently not optimized with "clang +-emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +int a(int x) {return x ^ ((x & 8) ^ 8);} +Should also combine to x | 8. Currently not optimized with "clang +-emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +int a(int x) {return ((x | -9) ^ 8) & x;} +Should combine to x & -9. Currently not optimized with "clang +-emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +unsigned a(unsigned a) {return a * 0x11111111 >> 28 & 1;} +Should combine to "a * 0x88888888 >> 31". Currently not optimized +with "clang -emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +unsigned a(char* x) {if ((*x & 32) == 0) return b();} +There's an unnecessary zext in the generated code with "clang +-emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +unsigned a(unsigned long long x) {return 40 * (x >> 1);} +Should combine to "20 * (((unsigned)x) & -2)". Currently not +optimized with "clang -emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +int g(int x) { return (x - 10) < 0; } +Should combine to "x <= 9" (the sub has nsw). Currently not +optimized with "clang -emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +int g(int x) { return (x + 10) < 0; } +Should combine to "x < -10" (the add has nsw). Currently not +optimized with "clang -emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +int f(int i, int j) { return i < j + 1; } +int g(int i, int j) { return j > i - 1; } +Should combine to "i <= j" (the add/sub has nsw). Currently not +optimized with "clang -emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +unsigned f(unsigned x) { return ((x & 7) + 1) & 15; } +The & 15 part should be optimized away, it doesn't change the result. Currently +not optimized with "clang -emit-llvm-bc | opt -O3". + +//===---------------------------------------------------------------------===// + +This was noticed in the entryblock for grokdeclarator in 403.gcc: + + %tmp = icmp eq i32 %decl_context, 4 + %decl_context_addr.0 = select i1 %tmp, i32 3, i32 %decl_context + %tmp1 = icmp eq i32 %decl_context_addr.0, 1 + %decl_context_addr.1 = select i1 %tmp1, i32 0, i32 %decl_context_addr.0 + +tmp1 should be simplified to something like: + (!tmp || decl_context == 1) + +This allows recursive simplifications, tmp1 is used all over the place in +the function, e.g. by: + + %tmp23 = icmp eq i32 %decl_context_addr.1, 0 ; <i1> [#uses=1] + %tmp24 = xor i1 %tmp1, true ; <i1> [#uses=1] + %or.cond8 = and i1 %tmp23, %tmp24 ; <i1> [#uses=1] + +later. + +//===---------------------------------------------------------------------===// + +[STORE SINKING] + +Store sinking: This code: + +void f (int n, int *cond, int *res) { + int i; + *res = 0; + for (i = 0; i < n; i++) + if (*cond) + *res ^= 234; /* (*) */ +} + +On this function GVN hoists the fully redundant value of *res, but nothing +moves the store out. This gives us this code: + +bb: ; preds = %bb2, %entry + %.rle = phi i32 [ 0, %entry ], [ %.rle6, %bb2 ] + %i.05 = phi i32 [ 0, %entry ], [ %indvar.next, %bb2 ] + %1 = load i32* %cond, align 4 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb2, label %bb1 + +bb1: ; preds = %bb + %3 = xor i32 %.rle, 234 + store i32 %3, i32* %res, align 4 + br label %bb2 + +bb2: ; preds = %bb, %bb1 + %.rle6 = phi i32 [ %3, %bb1 ], [ %.rle, %bb ] + %indvar.next = add i32 %i.05, 1 + %exitcond = icmp eq i32 %indvar.next, %n + br i1 %exitcond, label %return, label %bb + +DSE should sink partially dead stores to get the store out of the loop. + +Here's another partial dead case: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12395 + +//===---------------------------------------------------------------------===// + +Scalar PRE hoists the mul in the common block up to the else: + +int test (int a, int b, int c, int g) { + int d, e; + if (a) + d = b * c; + else + d = b - c; + e = b * c + g; + return d + e; +} + +It would be better to do the mul once to reduce codesize above the if. +This is GCC PR38204. + + +//===---------------------------------------------------------------------===// +This simple function from 179.art: + +int winner, numf2s; +struct { double y; int reset; } *Y; + +void find_match() { + int i; + winner = 0; + for (i=0;i<numf2s;i++) + if (Y[i].y > Y[winner].y) + winner =i; +} + +Compiles into (with clang TBAA): + +for.body: ; preds = %for.inc, %bb.nph + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.inc ] + %i.01718 = phi i32 [ 0, %bb.nph ], [ %i.01719, %for.inc ] + %tmp4 = getelementptr inbounds %struct.anon* %tmp3, i64 %indvar, i32 0 + %tmp5 = load double* %tmp4, align 8, !tbaa !4 + %idxprom7 = sext i32 %i.01718 to i64 + %tmp10 = getelementptr inbounds %struct.anon* %tmp3, i64 %idxprom7, i32 0 + %tmp11 = load double* %tmp10, align 8, !tbaa !4 + %cmp12 = fcmp ogt double %tmp5, %tmp11 + br i1 %cmp12, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %i.017 = trunc i64 %indvar to i32 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %i.01719 = phi i32 [ %i.01718, %for.body ], [ %i.017, %if.then ] + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %tmp22 + br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body + + +It is good that we hoisted the reloads of numf2's, and Y out of the loop and +sunk the store to winner out. + +However, this is awful on several levels: the conditional truncate in the loop +(-indvars at fault? why can't we completely promote the IV to i64?). + +Beyond that, we have a partially redundant load in the loop: if "winner" (aka +%i.01718) isn't updated, we reload Y[winner].y the next time through the loop. +Similarly, the addressing that feeds it (including the sext) is redundant. In +the end we get this generated assembly: + +LBB0_2: ## %for.body + ## =>This Inner Loop Header: Depth=1 + movsd (%rdi), %xmm0 + movslq %edx, %r8 + shlq $4, %r8 + ucomisd (%rcx,%r8), %xmm0 + jbe LBB0_4 + movl %esi, %edx +LBB0_4: ## %for.inc + addq $16, %rdi + incq %rsi + cmpq %rsi, %rax + jne LBB0_2 + +All things considered this isn't too bad, but we shouldn't need the movslq or +the shlq instruction, or the load folded into ucomisd every time through the +loop. + +On an x86-specific topic, if the loop can't be restructure, the movl should be a +cmov. + +//===---------------------------------------------------------------------===// + +[STORE SINKING] + +GCC PR37810 is an interesting case where we should sink load/store reload +into the if block and outside the loop, so we don't reload/store it on the +non-call path. + +for () { + *P += 1; + if () + call(); + else + ... +-> +tmp = *P +for () { + tmp += 1; + if () { + *P = tmp; + call(); + tmp = *P; + } else ... +} +*P = tmp; + +We now hoist the reload after the call (Transforms/GVN/lpre-call-wrap.ll), but +we don't sink the store. We need partially dead store sinking. + +//===---------------------------------------------------------------------===// + +[LOAD PRE CRIT EDGE SPLITTING] + +GCC PR37166: Sinking of loads prevents SROA'ing the "g" struct on the stack +leading to excess stack traffic. This could be handled by GVN with some crazy +symbolic phi translation. The code we get looks like (g is on the stack): + +bb2: ; preds = %bb1 +.. + %9 = getelementptr %struct.f* %g, i32 0, i32 0 + store i32 %8, i32* %9, align bel %bb3 + +bb3: ; preds = %bb1, %bb2, %bb + %c_addr.0 = phi %struct.f* [ %g, %bb2 ], [ %c, %bb ], [ %c, %bb1 ] + %b_addr.0 = phi %struct.f* [ %b, %bb2 ], [ %g, %bb ], [ %b, %bb1 ] + %10 = getelementptr %struct.f* %c_addr.0, i32 0, i32 0 + %11 = load i32* %10, align 4 + +%11 is partially redundant, an in BB2 it should have the value %8. + +GCC PR33344 and PR35287 are similar cases. + + +//===---------------------------------------------------------------------===// + +[LOAD PRE] + +There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the +GCC testsuite, ones we don't get yet are (checked through loadpre25): + +[CRIT EDGE BREAKING] +predcom-4.c + +[PRE OF READONLY CALL] +loadpre5.c + +[TURN SELECT INTO BRANCH] +loadpre14.c loadpre15.c + +actually a conditional increment: loadpre18.c loadpre19.c + +//===---------------------------------------------------------------------===// + +[LOAD PRE / STORE SINKING / SPEC HACK] + +This is a chunk of code from 456.hmmer: + +int f(int M, int *mc, int *mpp, int *tpmm, int *ip, int *tpim, int *dpp, + int *tpdm, int xmb, int *bp, int *ms) { + int k, sc; + for (k = 1; k <= M; k++) { + mc[k] = mpp[k-1] + tpmm[k-1]; + if ((sc = ip[k-1] + tpim[k-1]) > mc[k]) mc[k] = sc; + if ((sc = dpp[k-1] + tpdm[k-1]) > mc[k]) mc[k] = sc; + if ((sc = xmb + bp[k]) > mc[k]) mc[k] = sc; + mc[k] += ms[k]; + } +} + +It is very profitable for this benchmark to turn the conditional stores to mc[k] +into a conditional move (select instr in IR) and allow the final store to do the +store. See GCC PR27313 for more details. Note that this is valid to xform even +with the new C++ memory model, since mc[k] is previously loaded and later +stored. + +//===---------------------------------------------------------------------===// + +[SCALAR PRE] +There are many PRE testcases in testsuite/gcc.dg/tree-ssa/ssa-pre-*.c in the +GCC testsuite. + +//===---------------------------------------------------------------------===// + +There are some interesting cases in testsuite/gcc.dg/tree-ssa/pred-comm* in the +GCC testsuite. For example, we get the first example in predcom-1.c, but +miss the second one: + +unsigned fib[1000]; +unsigned avg[1000]; + +__attribute__ ((noinline)) +void count_averages(int n) { + int i; + for (i = 1; i < n; i++) + avg[i] = (((unsigned long) fib[i - 1] + fib[i] + fib[i + 1]) / 3) & 0xffff; +} + +which compiles into two loads instead of one in the loop. + +predcom-2.c is the same as predcom-1.c + +predcom-3.c is very similar but needs loads feeding each other instead of +store->load. + + +//===---------------------------------------------------------------------===// + +[ALIAS ANALYSIS] + +Type based alias analysis: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14705 + +We should do better analysis of posix_memalign. At the least it should +no-capture its pointer argument, at best, we should know that the out-value +result doesn't point to anything (like malloc). One example of this is in +SingleSource/Benchmarks/Misc/dt.c + +//===---------------------------------------------------------------------===// + +Interesting missed case because of control flow flattening (should be 2 loads): +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629 +With: llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | + opt -mem2reg -gvn -instcombine | llvm-dis +we miss it because we need 1) CRIT EDGE 2) MULTIPLE DIFFERENT +VALS PRODUCED BY ONE BLOCK OVER DIFFERENT PATHS + +//===---------------------------------------------------------------------===// + +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19633 +We could eliminate the branch condition here, loading from null is undefined: + +struct S { int w, x, y, z; }; +struct T { int r; struct S s; }; +void bar (struct S, int); +void foo (int a, struct T b) +{ + struct S *c = 0; + if (a) + c = &b.s; + bar (*c, a); +} + +//===---------------------------------------------------------------------===// + +simplifylibcalls should do several optimizations for strspn/strcspn: + +strcspn(x, "a") -> inlined loop for up to 3 letters (similarly for strspn): + +size_t __strcspn_c3 (__const char *__s, int __reject1, int __reject2, + int __reject3) { + register size_t __result = 0; + while (__s[__result] != '\0' && __s[__result] != __reject1 && + __s[__result] != __reject2 && __s[__result] != __reject3) + ++__result; + return __result; +} + +This should turn into a switch on the character. See PR3253 for some notes on +codegen. + +456.hmmer apparently uses strcspn and strspn a lot. 471.omnetpp uses strspn. + +//===---------------------------------------------------------------------===// + +simplifylibcalls should turn these snprintf idioms into memcpy (GCC PR47917) + +char buf1[6], buf2[6], buf3[4], buf4[4]; +int i; + +int foo (void) { + int ret = snprintf (buf1, sizeof buf1, "abcde"); + ret += snprintf (buf2, sizeof buf2, "abcdef") * 16; + ret += snprintf (buf3, sizeof buf3, "%s", i++ < 6 ? "abc" : "def") * 256; + ret += snprintf (buf4, sizeof buf4, "%s", i++ > 10 ? "abcde" : "defgh")*4096; + return ret; +} + +//===---------------------------------------------------------------------===// + +"gas" uses this idiom: + else if (strchr ("+-/*%|&^:[]()~", *intel_parser.op_string)) +.. + else if (strchr ("<>", *intel_parser.op_string) + +Those should be turned into a switch. SimplifyLibCalls only gets the second +case. + +//===---------------------------------------------------------------------===// + +252.eon contains this interesting code: + + %3072 = getelementptr [100 x i8]* %tempString, i32 0, i32 0 + %3073 = call i8* @strcpy(i8* %3072, i8* %3071) nounwind + %strlen = call i32 @strlen(i8* %3072) ; uses = 1 + %endptr = getelementptr [100 x i8]* %tempString, i32 0, i32 %strlen + call void @llvm.memcpy.i32(i8* %endptr, + i8* getelementptr ([5 x i8]* @"\01LC42", i32 0, i32 0), i32 5, i32 1) + %3074 = call i32 @strlen(i8* %endptr) nounwind readonly + +This is interesting for a couple reasons. First, in this: + +The memcpy+strlen strlen can be replaced with: + + %3074 = call i32 @strlen([5 x i8]* @"\01LC42") nounwind readonly + +Because the destination was just copied into the specified memory buffer. This, +in turn, can be constant folded to "4". + +In other code, it contains: + + %endptr6978 = bitcast i8* %endptr69 to i32* + store i32 7107374, i32* %endptr6978, align 1 + %3167 = call i32 @strlen(i8* %endptr69) nounwind readonly + +Which could also be constant folded. Whatever is producing this should probably +be fixed to leave this as a memcpy from a string. + +Further, eon also has an interesting partially redundant strlen call: + +bb8: ; preds = %_ZN18eonImageCalculatorC1Ev.exit + %682 = getelementptr i8** %argv, i32 6 ; <i8**> [#uses=2] + %683 = load i8** %682, align 4 ; <i8*> [#uses=4] + %684 = load i8* %683, align 1 ; <i8> [#uses=1] + %685 = icmp eq i8 %684, 0 ; <i1> [#uses=1] + br i1 %685, label %bb10, label %bb9 + +bb9: ; preds = %bb8 + %686 = call i32 @strlen(i8* %683) nounwind readonly + %687 = icmp ugt i32 %686, 254 ; <i1> [#uses=1] + br i1 %687, label %bb10, label %bb11 + +bb10: ; preds = %bb9, %bb8 + %688 = call i32 @strlen(i8* %683) nounwind readonly + +This could be eliminated by doing the strlen once in bb8, saving code size and +improving perf on the bb8->9->10 path. + +//===---------------------------------------------------------------------===// + +I see an interesting fully redundant call to strlen left in 186.crafty:InputMove +which looks like: + %movetext11 = getelementptr [128 x i8]* %movetext, i32 0, i32 0 + + +bb62: ; preds = %bb55, %bb53 + %promote.0 = phi i32 [ %169, %bb55 ], [ 0, %bb53 ] + %171 = call i32 @strlen(i8* %movetext11) nounwind readonly align 1 + %172 = add i32 %171, -1 ; <i32> [#uses=1] + %173 = getelementptr [128 x i8]* %movetext, i32 0, i32 %172 + +... no stores ... + br i1 %or.cond, label %bb65, label %bb72 + +bb65: ; preds = %bb62 + store i8 0, i8* %173, align 1 + br label %bb72 + +bb72: ; preds = %bb65, %bb62 + %trank.1 = phi i32 [ %176, %bb65 ], [ -1, %bb62 ] + %177 = call i32 @strlen(i8* %movetext11) nounwind readonly align 1 + +Note that on the bb62->bb72 path, that the %177 strlen call is partially +redundant with the %171 call. At worst, we could shove the %177 strlen call +up into the bb65 block moving it out of the bb62->bb72 path. However, note +that bb65 stores to the string, zeroing out the last byte. This means that on +that path the value of %177 is actually just %171-1. A sub is cheaper than a +strlen! + +This pattern repeats several times, basically doing: + + A = strlen(P); + P[A-1] = 0; + B = strlen(P); + where it is "obvious" that B = A-1. + +//===---------------------------------------------------------------------===// + +186.crafty has this interesting pattern with the "out.4543" variable: + +call void @llvm.memcpy.i32( + i8* getelementptr ([10 x i8]* @out.4543, i32 0, i32 0), + i8* getelementptr ([7 x i8]* @"\01LC28700", i32 0, i32 0), i32 7, i32 1) +%101 = call@printf(i8* ... @out.4543, i32 0, i32 0)) nounwind + +It is basically doing: + + memcpy(globalarray, "string"); + printf(..., globalarray); -uint8_t p2(uint8_t b, uint8_t a) { - b = (b & ~0x40) | (a & 0x40); - b = (b & ~0x80) | (a & 0x80); - return (b); -} - -define zeroext i8 @p1(i8 zeroext %b, i8 zeroext %a) nounwind readnone ssp { -entry: - %0 = and i8 %b, 63 ; <i8> [#uses=1] - %1 = and i8 %a, -64 ; <i8> [#uses=1] - %2 = or i8 %1, %0 ; <i8> [#uses=1] - ret i8 %2 -} - -define zeroext i8 @p2(i8 zeroext %b, i8 zeroext %a) nounwind readnone ssp { -entry: - %0 = and i8 %b, 63 ; <i8> [#uses=1] - %.masked = and i8 %a, 64 ; <i8> [#uses=1] - %1 = and i8 %a, -128 ; <i8> [#uses=1] - %2 = or i8 %1, %0 ; <i8> [#uses=1] - %3 = or i8 %2, %.masked ; <i8> [#uses=1] - ret i8 %3 -} - -//===---------------------------------------------------------------------===// - -IPSCCP does not currently propagate argument dependent constants through -functions where it does not not all of the callers. This includes functions -with normal external linkage as well as templates, C99 inline functions etc. -Specifically, it does nothing to: - -define i32 @test(i32 %x, i32 %y, i32 %z) nounwind { -entry: - %0 = add nsw i32 %y, %z - %1 = mul i32 %0, %x - %2 = mul i32 %y, %z - %3 = add nsw i32 %1, %2 - ret i32 %3 -} - -define i32 @test2() nounwind { -entry: - %0 = call i32 @test(i32 1, i32 2, i32 4) nounwind - ret i32 %0 -} - -It would be interesting extend IPSCCP to be able to handle simple cases like -this, where all of the arguments to a call are constant. Because IPSCCP runs -before inlining, trivial templates and inline functions are not yet inlined. -The results for a function + set of constant arguments should be memoized in a -map. - -//===---------------------------------------------------------------------===// - -The libcall constant folding stuff should be moved out of SimplifyLibcalls into -libanalysis' constantfolding logic. This would allow IPSCCP to be able to -handle simple things like this: - -static int foo(const char *X) { return strlen(X); } -int bar() { return foo("abcd"); } - -//===---------------------------------------------------------------------===// - +Anyway, by knowing that printf just reads the memory and forward substituting +the string directly into the printf, this eliminates reads from globalarray. +Since this pattern occurs frequently in crafty (due to the "DisplayTime" and +other similar functions) there are many stores to "out". Once all the printfs +stop using "out", all that is left is the memcpy's into it. This should allow +globalopt to remove the "stored only" global. + +//===---------------------------------------------------------------------===// + +This code: + +define inreg i32 @foo(i8* inreg %p) nounwind { + %tmp0 = load i8* %p + %tmp1 = ashr i8 %tmp0, 5 + %tmp2 = sext i8 %tmp1 to i32 + ret i32 %tmp2 +} + +could be dagcombine'd to a sign-extending load with a shift. +For example, on x86 this currently gets this: + + movb (%eax), %al + sarb $5, %al + movsbl %al, %eax + +while it could get this: + + movsbl (%eax), %eax + sarl $5, %eax + +//===---------------------------------------------------------------------===// + +GCC PR31029: + +int test(int x) { return 1-x == x; } // --> return false +int test2(int x) { return 2-x == x; } // --> return x == 1 ? + +Always foldable for odd constants, what is the rule for even? + +//===---------------------------------------------------------------------===// + +PR 3381: GEP to field of size 0 inside a struct could be turned into GEP +for next field in struct (which is at same address). + +For example: store of float into { {{}}, float } could be turned into a store to +the float directly. + +//===---------------------------------------------------------------------===// + +The arg promotion pass should make use of nocapture to make its alias analysis +stuff much more precise. + +//===---------------------------------------------------------------------===// + +The following functions should be optimized to use a select instead of a +branch (from gcc PR40072): + +char char_int(int m) {if(m>7) return 0; return m;} +int int_char(char m) {if(m>7) return 0; return m;} + +//===---------------------------------------------------------------------===// + +int func(int a, int b) { if (a & 0x80) b |= 0x80; else b &= ~0x80; return b; } + +Generates this: + +define i32 @func(i32 %a, i32 %b) nounwind readnone ssp { +entry: + %0 = and i32 %a, 128 ; <i32> [#uses=1] + %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1] + %2 = or i32 %b, 128 ; <i32> [#uses=1] + %3 = and i32 %b, -129 ; <i32> [#uses=1] + %b_addr.0 = select i1 %1, i32 %3, i32 %2 ; <i32> [#uses=1] + ret i32 %b_addr.0 +} + +However, it's functionally equivalent to: + + b = (b & ~0x80) | (a & 0x80); + +Which generates this: + +define i32 @func(i32 %a, i32 %b) nounwind readnone ssp { +entry: + %0 = and i32 %b, -129 ; <i32> [#uses=1] + %1 = and i32 %a, 128 ; <i32> [#uses=1] + %2 = or i32 %0, %1 ; <i32> [#uses=1] + ret i32 %2 +} + +This can be generalized for other forms: + + b = (b & ~0x80) | (a & 0x40) << 1; + +//===---------------------------------------------------------------------===// + +These two functions produce different code. They shouldn't: + +#include <stdint.h> + +uint8_t p1(uint8_t b, uint8_t a) { + b = (b & ~0xc0) | (a & 0xc0); + return (b); +} + +uint8_t p2(uint8_t b, uint8_t a) { + b = (b & ~0x40) | (a & 0x40); + b = (b & ~0x80) | (a & 0x80); + return (b); +} + +define zeroext i8 @p1(i8 zeroext %b, i8 zeroext %a) nounwind readnone ssp { +entry: + %0 = and i8 %b, 63 ; <i8> [#uses=1] + %1 = and i8 %a, -64 ; <i8> [#uses=1] + %2 = or i8 %1, %0 ; <i8> [#uses=1] + ret i8 %2 +} + +define zeroext i8 @p2(i8 zeroext %b, i8 zeroext %a) nounwind readnone ssp { +entry: + %0 = and i8 %b, 63 ; <i8> [#uses=1] + %.masked = and i8 %a, 64 ; <i8> [#uses=1] + %1 = and i8 %a, -128 ; <i8> [#uses=1] + %2 = or i8 %1, %0 ; <i8> [#uses=1] + %3 = or i8 %2, %.masked ; <i8> [#uses=1] + ret i8 %3 +} + +//===---------------------------------------------------------------------===// + +IPSCCP does not currently propagate argument dependent constants through +functions where it does not not all of the callers. This includes functions +with normal external linkage as well as templates, C99 inline functions etc. +Specifically, it does nothing to: + +define i32 @test(i32 %x, i32 %y, i32 %z) nounwind { +entry: + %0 = add nsw i32 %y, %z + %1 = mul i32 %0, %x + %2 = mul i32 %y, %z + %3 = add nsw i32 %1, %2 + ret i32 %3 +} + +define i32 @test2() nounwind { +entry: + %0 = call i32 @test(i32 1, i32 2, i32 4) nounwind + ret i32 %0 +} + +It would be interesting extend IPSCCP to be able to handle simple cases like +this, where all of the arguments to a call are constant. Because IPSCCP runs +before inlining, trivial templates and inline functions are not yet inlined. +The results for a function + set of constant arguments should be memoized in a +map. + +//===---------------------------------------------------------------------===// + +The libcall constant folding stuff should be moved out of SimplifyLibcalls into +libanalysis' constantfolding logic. This would allow IPSCCP to be able to +handle simple things like this: + +static int foo(const char *X) { return strlen(X); } +int bar() { return foo("abcd"); } + +//===---------------------------------------------------------------------===// + function-attrs doesn't know much about memcpy/memset. This function should be -marked readnone rather than readonly, since it only twiddles local memory, but +marked readnone rather than readonly, since it only twiddles local memory, but function-attrs doesn't handle memset/memcpy/memmove aggressively: - -struct X { int *p; int *q; }; -int foo() { - int i = 0, j = 1; - struct X x, y; - int **p; - y.p = &i; - x.q = &j; - p = __builtin_memcpy (&x, &y, sizeof (int *)); - return **p; -} - -This can be seen at: + +struct X { int *p; int *q; }; +int foo() { + int i = 0, j = 1; + struct X x, y; + int **p; + y.p = &i; + x.q = &j; + p = __builtin_memcpy (&x, &y, sizeof (int *)); + return **p; +} + +This can be seen at: $ clang t.c -S -o - -mkernel -O0 -emit-llvm | opt -function-attrs -S - - -//===---------------------------------------------------------------------===// - -Missed instcombine transformation: -define i1 @a(i32 %x) nounwind readnone { -entry: - %cmp = icmp eq i32 %x, 30 - %sub = add i32 %x, -30 - %cmp2 = icmp ugt i32 %sub, 9 - %or = or i1 %cmp, %cmp2 - ret i1 %or -} -This should be optimized to a single compare. Testcase derived from gcc. - -//===---------------------------------------------------------------------===// - -Missed instcombine or reassociate transformation: -int a(int a, int b) { return (a==12)&(b>47)&(b<58); } - -The sgt and slt should be combined into a single comparison. Testcase derived -from gcc. - -//===---------------------------------------------------------------------===// - -Missed instcombine transformation: - - %382 = srem i32 %tmp14.i, 64 ; [#uses=1] - %383 = zext i32 %382 to i64 ; [#uses=1] - %384 = shl i64 %381, %383 ; [#uses=1] - %385 = icmp slt i32 %tmp14.i, 64 ; [#uses=1] - -The srem can be transformed to an and because if %tmp14.i is negative, the -shift is undefined. Testcase derived from 403.gcc. - -//===---------------------------------------------------------------------===// - -This is a range comparison on a divided result (from 403.gcc): - - %1337 = sdiv i32 %1336, 8 ; [#uses=1] - %.off.i208 = add i32 %1336, 7 ; [#uses=1] - %1338 = icmp ult i32 %.off.i208, 15 ; [#uses=1] - -We already catch this (removing the sdiv) if there isn't an add, we should -handle the 'add' as well. This is a common idiom with it's builtin_alloca code. -C testcase: - -int a(int x) { return (unsigned)(x/16+7) < 15; } - -Another similar case involves truncations on 64-bit targets: - - %361 = sdiv i64 %.046, 8 ; [#uses=1] - %362 = trunc i64 %361 to i32 ; [#uses=2] -... - %367 = icmp eq i32 %362, 0 ; [#uses=1] - -//===---------------------------------------------------------------------===// - -Missed instcombine/dagcombine transformation: -define void @lshift_lt(i8 zeroext %a) nounwind { -entry: - %conv = zext i8 %a to i32 - %shl = shl i32 %conv, 3 - %cmp = icmp ult i32 %shl, 33 - br i1 %cmp, label %if.then, label %if.end - -if.then: - tail call void @bar() nounwind - ret void - -if.end: - ret void -} -declare void @bar() nounwind - -The shift should be eliminated. Testcase derived from gcc. - -//===---------------------------------------------------------------------===// - -These compile into different code, one gets recognized as a switch and the -other doesn't due to phase ordering issues (PR6212): - -int test1(int mainType, int subType) { - if (mainType == 7) - subType = 4; - else if (mainType == 9) - subType = 6; - else if (mainType == 11) - subType = 9; - return subType; -} - -int test2(int mainType, int subType) { - if (mainType == 7) - subType = 4; - if (mainType == 9) - subType = 6; - if (mainType == 11) - subType = 9; - return subType; -} - -//===---------------------------------------------------------------------===// - -The following test case (from PR6576): - -define i32 @mul(i32 %a, i32 %b) nounwind readnone { -entry: - %cond1 = icmp eq i32 %b, 0 ; <i1> [#uses=1] - br i1 %cond1, label %exit, label %bb.nph -bb.nph: ; preds = %entry - %tmp = mul i32 %b, %a ; <i32> [#uses=1] - ret i32 %tmp -exit: ; preds = %entry - ret i32 0 -} - -could be reduced to: - -define i32 @mul(i32 %a, i32 %b) nounwind readnone { -entry: - %tmp = mul i32 %b, %a - ret i32 %tmp -} - -//===---------------------------------------------------------------------===// - -We should use DSE + llvm.lifetime.end to delete dead vtable pointer updates. -See GCC PR34949 - -Another interesting case is that something related could be used for variables -that go const after their ctor has finished. In these cases, globalopt (which -can statically run the constructor) could mark the global const (so it gets put -in the readonly section). A testcase would be: - -#include <complex> -using namespace std; -const complex<char> should_be_in_rodata (42,-42); -complex<char> should_be_in_data (42,-42); -complex<char> should_be_in_bss; - -Where we currently evaluate the ctors but the globals don't become const because -the optimizer doesn't know they "become const" after the ctor is done. See -GCC PR4131 for more examples. - -//===---------------------------------------------------------------------===// - -In this code: - -long foo(long x) { - return x > 1 ? x : 1; -} - -LLVM emits a comparison with 1 instead of 0. 0 would be equivalent -and cheaper on most targets. - -LLVM prefers comparisons with zero over non-zero in general, but in this -case it choses instead to keep the max operation obvious. - -//===---------------------------------------------------------------------===// - -define void @a(i32 %x) nounwind { -entry: - switch i32 %x, label %if.end [ - i32 0, label %if.then - i32 1, label %if.then - i32 2, label %if.then - i32 3, label %if.then - i32 5, label %if.then - ] -if.then: - tail call void @foo() nounwind - ret void -if.end: - ret void -} -declare void @foo() - -Generated code on x86-64 (other platforms give similar results): -a: - cmpl $5, %edi - ja LBB2_2 - cmpl $4, %edi - jne LBB2_3 -.LBB0_2: - ret -.LBB0_3: - jmp foo # TAILCALL - -If we wanted to be really clever, we could simplify the whole thing to -something like the following, which eliminates a branch: - xorl $1, %edi - cmpl $4, %edi - ja .LBB0_2 - ret -.LBB0_2: - jmp foo # TAILCALL - -//===---------------------------------------------------------------------===// - -We compile this: - -int foo(int a) { return (a & (~15)) / 16; } - -Into: - -define i32 @foo(i32 %a) nounwind readnone ssp { -entry: - %and = and i32 %a, -16 - %div = sdiv i32 %and, 16 - ret i32 %div -} - -but this code (X & -A)/A is X >> log2(A) when A is a power of 2, so this case -should be instcombined into just "a >> 4". - -We do get this at the codegen level, so something knows about it, but -instcombine should catch it earlier: - -_foo: ## @foo -## %bb.0: ## %entry - movl %edi, %eax - sarl $4, %eax - ret - -//===---------------------------------------------------------------------===// - -This code (from GCC PR28685): - -int test(int a, int b) { - int lt = a < b; - int eq = a == b; - if (lt) - return 1; - return eq; -} - -Is compiled to: - -define i32 @test(i32 %a, i32 %b) nounwind readnone ssp { -entry: - %cmp = icmp slt i32 %a, %b - br i1 %cmp, label %return, label %if.end - -if.end: ; preds = %entry - %cmp5 = icmp eq i32 %a, %b - %conv6 = zext i1 %cmp5 to i32 - ret i32 %conv6 - -return: ; preds = %entry - ret i32 1 -} - -it could be: - -define i32 @test__(i32 %a, i32 %b) nounwind readnone ssp { -entry: - %0 = icmp sle i32 %a, %b - %retval = zext i1 %0 to i32 - ret i32 %retval -} - -//===---------------------------------------------------------------------===// - -This code can be seen in viterbi: - - %64 = call noalias i8* @malloc(i64 %62) nounwind -... - %67 = call i64 @llvm.objectsize.i64(i8* %64, i1 false) nounwind - %68 = call i8* @__memset_chk(i8* %64, i32 0, i64 %62, i64 %67) nounwind - -llvm.objectsize.i64 should be taught about malloc/calloc, allowing it to -fold to %62. This is a security win (overflows of malloc will get caught) -and also a performance win by exposing more memsets to the optimizer. - -This occurs several times in viterbi. - -Note that this would change the semantics of @llvm.objectsize which by its -current definition always folds to a constant. We also should make sure that -we remove checking in code like - - char *p = malloc(strlen(s)+1); + + +//===---------------------------------------------------------------------===// + +Missed instcombine transformation: +define i1 @a(i32 %x) nounwind readnone { +entry: + %cmp = icmp eq i32 %x, 30 + %sub = add i32 %x, -30 + %cmp2 = icmp ugt i32 %sub, 9 + %or = or i1 %cmp, %cmp2 + ret i1 %or +} +This should be optimized to a single compare. Testcase derived from gcc. + +//===---------------------------------------------------------------------===// + +Missed instcombine or reassociate transformation: +int a(int a, int b) { return (a==12)&(b>47)&(b<58); } + +The sgt and slt should be combined into a single comparison. Testcase derived +from gcc. + +//===---------------------------------------------------------------------===// + +Missed instcombine transformation: + + %382 = srem i32 %tmp14.i, 64 ; [#uses=1] + %383 = zext i32 %382 to i64 ; [#uses=1] + %384 = shl i64 %381, %383 ; [#uses=1] + %385 = icmp slt i32 %tmp14.i, 64 ; [#uses=1] + +The srem can be transformed to an and because if %tmp14.i is negative, the +shift is undefined. Testcase derived from 403.gcc. + +//===---------------------------------------------------------------------===// + +This is a range comparison on a divided result (from 403.gcc): + + %1337 = sdiv i32 %1336, 8 ; [#uses=1] + %.off.i208 = add i32 %1336, 7 ; [#uses=1] + %1338 = icmp ult i32 %.off.i208, 15 ; [#uses=1] + +We already catch this (removing the sdiv) if there isn't an add, we should +handle the 'add' as well. This is a common idiom with it's builtin_alloca code. +C testcase: + +int a(int x) { return (unsigned)(x/16+7) < 15; } + +Another similar case involves truncations on 64-bit targets: + + %361 = sdiv i64 %.046, 8 ; [#uses=1] + %362 = trunc i64 %361 to i32 ; [#uses=2] +... + %367 = icmp eq i32 %362, 0 ; [#uses=1] + +//===---------------------------------------------------------------------===// + +Missed instcombine/dagcombine transformation: +define void @lshift_lt(i8 zeroext %a) nounwind { +entry: + %conv = zext i8 %a to i32 + %shl = shl i32 %conv, 3 + %cmp = icmp ult i32 %shl, 33 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @bar() nounwind + ret void + +if.end: + ret void +} +declare void @bar() nounwind + +The shift should be eliminated. Testcase derived from gcc. + +//===---------------------------------------------------------------------===// + +These compile into different code, one gets recognized as a switch and the +other doesn't due to phase ordering issues (PR6212): + +int test1(int mainType, int subType) { + if (mainType == 7) + subType = 4; + else if (mainType == 9) + subType = 6; + else if (mainType == 11) + subType = 9; + return subType; +} + +int test2(int mainType, int subType) { + if (mainType == 7) + subType = 4; + if (mainType == 9) + subType = 6; + if (mainType == 11) + subType = 9; + return subType; +} + +//===---------------------------------------------------------------------===// + +The following test case (from PR6576): + +define i32 @mul(i32 %a, i32 %b) nounwind readnone { +entry: + %cond1 = icmp eq i32 %b, 0 ; <i1> [#uses=1] + br i1 %cond1, label %exit, label %bb.nph +bb.nph: ; preds = %entry + %tmp = mul i32 %b, %a ; <i32> [#uses=1] + ret i32 %tmp +exit: ; preds = %entry + ret i32 0 +} + +could be reduced to: + +define i32 @mul(i32 %a, i32 %b) nounwind readnone { +entry: + %tmp = mul i32 %b, %a + ret i32 %tmp +} + +//===---------------------------------------------------------------------===// + +We should use DSE + llvm.lifetime.end to delete dead vtable pointer updates. +See GCC PR34949 + +Another interesting case is that something related could be used for variables +that go const after their ctor has finished. In these cases, globalopt (which +can statically run the constructor) could mark the global const (so it gets put +in the readonly section). A testcase would be: + +#include <complex> +using namespace std; +const complex<char> should_be_in_rodata (42,-42); +complex<char> should_be_in_data (42,-42); +complex<char> should_be_in_bss; + +Where we currently evaluate the ctors but the globals don't become const because +the optimizer doesn't know they "become const" after the ctor is done. See +GCC PR4131 for more examples. + +//===---------------------------------------------------------------------===// + +In this code: + +long foo(long x) { + return x > 1 ? x : 1; +} + +LLVM emits a comparison with 1 instead of 0. 0 would be equivalent +and cheaper on most targets. + +LLVM prefers comparisons with zero over non-zero in general, but in this +case it choses instead to keep the max operation obvious. + +//===---------------------------------------------------------------------===// + +define void @a(i32 %x) nounwind { +entry: + switch i32 %x, label %if.end [ + i32 0, label %if.then + i32 1, label %if.then + i32 2, label %if.then + i32 3, label %if.then + i32 5, label %if.then + ] +if.then: + tail call void @foo() nounwind + ret void +if.end: + ret void +} +declare void @foo() + +Generated code on x86-64 (other platforms give similar results): +a: + cmpl $5, %edi + ja LBB2_2 + cmpl $4, %edi + jne LBB2_3 +.LBB0_2: + ret +.LBB0_3: + jmp foo # TAILCALL + +If we wanted to be really clever, we could simplify the whole thing to +something like the following, which eliminates a branch: + xorl $1, %edi + cmpl $4, %edi + ja .LBB0_2 + ret +.LBB0_2: + jmp foo # TAILCALL + +//===---------------------------------------------------------------------===// + +We compile this: + +int foo(int a) { return (a & (~15)) / 16; } + +Into: + +define i32 @foo(i32 %a) nounwind readnone ssp { +entry: + %and = and i32 %a, -16 + %div = sdiv i32 %and, 16 + ret i32 %div +} + +but this code (X & -A)/A is X >> log2(A) when A is a power of 2, so this case +should be instcombined into just "a >> 4". + +We do get this at the codegen level, so something knows about it, but +instcombine should catch it earlier: + +_foo: ## @foo +## %bb.0: ## %entry + movl %edi, %eax + sarl $4, %eax + ret + +//===---------------------------------------------------------------------===// + +This code (from GCC PR28685): + +int test(int a, int b) { + int lt = a < b; + int eq = a == b; + if (lt) + return 1; + return eq; +} + +Is compiled to: + +define i32 @test(i32 %a, i32 %b) nounwind readnone ssp { +entry: + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp5 = icmp eq i32 %a, %b + %conv6 = zext i1 %cmp5 to i32 + ret i32 %conv6 + +return: ; preds = %entry + ret i32 1 +} + +it could be: + +define i32 @test__(i32 %a, i32 %b) nounwind readnone ssp { +entry: + %0 = icmp sle i32 %a, %b + %retval = zext i1 %0 to i32 + ret i32 %retval +} + +//===---------------------------------------------------------------------===// + +This code can be seen in viterbi: + + %64 = call noalias i8* @malloc(i64 %62) nounwind +... + %67 = call i64 @llvm.objectsize.i64(i8* %64, i1 false) nounwind + %68 = call i8* @__memset_chk(i8* %64, i32 0, i64 %62, i64 %67) nounwind + +llvm.objectsize.i64 should be taught about malloc/calloc, allowing it to +fold to %62. This is a security win (overflows of malloc will get caught) +and also a performance win by exposing more memsets to the optimizer. + +This occurs several times in viterbi. + +Note that this would change the semantics of @llvm.objectsize which by its +current definition always folds to a constant. We also should make sure that +we remove checking in code like + + char *p = malloc(strlen(s)+1); __strcpy_chk(p, s, __builtin_object_size(p, 0)); - -//===---------------------------------------------------------------------===// - -clang -O3 currently compiles this code - -int g(unsigned int a) { - unsigned int c[100]; - c[10] = a; - c[11] = a; - unsigned int b = c[10] + c[11]; - if(b > a*2) a = 4; - else a = 8; - return a + 7; -} - -into - -define i32 @g(i32 a) nounwind readnone { - %add = shl i32 %a, 1 - %mul = shl i32 %a, 1 - %cmp = icmp ugt i32 %add, %mul - %a.addr.0 = select i1 %cmp, i32 11, i32 15 - ret i32 %a.addr.0 -} - -The icmp should fold to false. This CSE opportunity is only available -after GVN and InstCombine have run. - -//===---------------------------------------------------------------------===// - -memcpyopt should turn this: - -define i8* @test10(i32 %x) { - %alloc = call noalias i8* @malloc(i32 %x) nounwind - call void @llvm.memset.p0i8.i32(i8* %alloc, i8 0, i32 %x, i32 1, i1 false) - ret i8* %alloc -} - -into a call to calloc. We should make sure that we analyze calloc as -aggressively as malloc though. - -//===---------------------------------------------------------------------===// - -clang -O3 doesn't optimize this: - -void f1(int* begin, int* end) { - std::fill(begin, end, 0); -} - -into a memset. This is PR8942. - -//===---------------------------------------------------------------------===// - -clang -O3 -fno-exceptions currently compiles this code: - -void f(int N) { - std::vector<int> v(N); - - extern void sink(void*); sink(&v); -} - -into - -define void @_Z1fi(i32 %N) nounwind { -entry: - %v2 = alloca [3 x i32*], align 8 - %v2.sub = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 0 - %tmpcast = bitcast [3 x i32*]* %v2 to %"class.std::vector"* - %conv = sext i32 %N to i64 - store i32* null, i32** %v2.sub, align 8, !tbaa !0 - %tmp3.i.i.i.i.i = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 1 - store i32* null, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0 - %tmp4.i.i.i.i.i = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 2 - store i32* null, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0 - %cmp.i.i.i.i = icmp eq i32 %N, 0 - br i1 %cmp.i.i.i.i, label %_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.thread.i.i, label %cond.true.i.i.i.i - -_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.thread.i.i: ; preds = %entry - store i32* null, i32** %v2.sub, align 8, !tbaa !0 - store i32* null, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0 - %add.ptr.i5.i.i = getelementptr inbounds i32* null, i64 %conv - store i32* %add.ptr.i5.i.i, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0 - br label %_ZNSt6vectorIiSaIiEEC1EmRKiRKS0_.exit - -cond.true.i.i.i.i: ; preds = %entry - %cmp.i.i.i.i.i = icmp slt i32 %N, 0 - br i1 %cmp.i.i.i.i.i, label %if.then.i.i.i.i.i, label %_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.i.i - -if.then.i.i.i.i.i: ; preds = %cond.true.i.i.i.i - call void @_ZSt17__throw_bad_allocv() noreturn nounwind - unreachable - -_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.i.i: ; preds = %cond.true.i.i.i.i - %mul.i.i.i.i.i = shl i64 %conv, 2 - %call3.i.i.i.i.i = call noalias i8* @_Znwm(i64 %mul.i.i.i.i.i) nounwind - %0 = bitcast i8* %call3.i.i.i.i.i to i32* - store i32* %0, i32** %v2.sub, align 8, !tbaa !0 - store i32* %0, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0 - %add.ptr.i.i.i = getelementptr inbounds i32* %0, i64 %conv - store i32* %add.ptr.i.i.i, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0 - call void @llvm.memset.p0i8.i64(i8* %call3.i.i.i.i.i, i8 0, i64 %mul.i.i.i.i.i, i32 4, i1 false) - br label %_ZNSt6vectorIiSaIiEEC1EmRKiRKS0_.exit - -This is just the handling the construction of the vector. Most surprising here -is the fact that all three null stores in %entry are dead (because we do no -cross-block DSE). - -Also surprising is that %conv isn't simplified to 0 in %....exit.thread.i.i. -This is a because the client of LazyValueInfo doesn't simplify all instruction -operands, just selected ones. - -//===---------------------------------------------------------------------===// - -clang -O3 -fno-exceptions currently compiles this code: - -void f(char* a, int n) { - __builtin_memset(a, 0, n); - for (int i = 0; i < n; ++i) - a[i] = 0; -} - -into: - -define void @_Z1fPci(i8* nocapture %a, i32 %n) nounwind { -entry: - %conv = sext i32 %n to i64 - tail call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 %conv, i32 1, i1 false) - %cmp8 = icmp sgt i32 %n, 0 - br i1 %cmp8, label %for.body.lr.ph, label %for.end - -for.body.lr.ph: ; preds = %entry - %tmp10 = add i32 %n, -1 - %tmp11 = zext i32 %tmp10 to i64 - %tmp12 = add i64 %tmp11, 1 - call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 %tmp12, i32 1, i1 false) - ret void - -for.end: ; preds = %entry - ret void -} - -This shouldn't need the ((zext (%n - 1)) + 1) game, and it should ideally fold -the two memset's together. - -The issue with the addition only occurs in 64-bit mode, and appears to be at -least partially caused by Scalar Evolution not keeping its cache updated: it -returns the "wrong" result immediately after indvars runs, but figures out the -expected result if it is run from scratch on IR resulting from running indvars. - -//===---------------------------------------------------------------------===// - -clang -O3 -fno-exceptions currently compiles this code: - -struct S { - unsigned short m1, m2; - unsigned char m3, m4; -}; - -void f(int N) { - std::vector<S> v(N); - extern void sink(void*); sink(&v); -} - -into poor code for zero-initializing 'v' when N is >0. The problem is that -S is only 6 bytes, but each element is 8 byte-aligned. We generate a loop and -4 stores on each iteration. If the struct were 8 bytes, this gets turned into -a memset. - -In order to handle this we have to: - A) Teach clang to generate metadata for memsets of structs that have holes in - them. - B) Teach clang to use such a memset for zero init of this struct (since it has - a hole), instead of doing elementwise zeroing. - -//===---------------------------------------------------------------------===// - -clang -O3 currently compiles this code: - -extern const int magic; -double f() { return 0.0 * magic; } - -into - -@magic = external constant i32 - -define double @_Z1fv() nounwind readnone { -entry: - %tmp = load i32* @magic, align 4, !tbaa !0 - %conv = sitofp i32 %tmp to double - %mul = fmul double %conv, 0.000000e+00 - ret double %mul -} - -We should be able to fold away this fmul to 0.0. More generally, fmul(x,0.0) -can be folded to 0.0 if we can prove that the LHS is not -0.0, not a NaN, and -not an INF. The CannotBeNegativeZero predicate in value tracking should be -extended to support general "fpclassify" operations that can return -yes/no/unknown for each of these predicates. - -In this predicate, we know that uitofp is trivially never NaN or -0.0, and -we know that it isn't +/-Inf if the floating point type has enough exponent bits -to represent the largest integer value as < inf. - -//===---------------------------------------------------------------------===// - -When optimizing a transformation that can change the sign of 0.0 (such as the -0.0*val -> 0.0 transformation above), it might be provable that the sign of the -expression doesn't matter. For example, by the above rules, we can't transform -fmul(sitofp(x), 0.0) into 0.0, because x might be -1 and the result of the -expression is defined to be -0.0. - -If we look at the uses of the fmul for example, we might be able to prove that -all uses don't care about the sign of zero. For example, if we have: - - fadd(fmul(sitofp(x), 0.0), 2.0) - -Since we know that x+2.0 doesn't care about the sign of any zeros in X, we can -transform the fmul to 0.0, and then the fadd to 2.0. - -//===---------------------------------------------------------------------===// - -We should enhance memcpy/memcpy/memset to allow a metadata node on them -indicating that some bytes of the transfer are undefined. This is useful for -frontends like clang when lowering struct copies, when some elements of the -struct are undefined. Consider something like this: - -struct x { - char a; - int b[4]; -}; -void foo(struct x*P); -struct x testfunc() { - struct x V1, V2; - foo(&V1); - V2 = V1; - - return V2; -} - -We currently compile this to: -$ clang t.c -S -o - -O0 -emit-llvm | opt -sroa -S - - -%struct.x = type { i8, [4 x i32] } - -define void @testfunc(%struct.x* sret %agg.result) nounwind ssp { -entry: - %V1 = alloca %struct.x, align 4 - call void @foo(%struct.x* %V1) - %tmp1 = bitcast %struct.x* %V1 to i8* - %0 = bitcast %struct.x* %V1 to i160* - %srcval1 = load i160* %0, align 4 - %tmp2 = bitcast %struct.x* %agg.result to i8* - %1 = bitcast %struct.x* %agg.result to i160* - store i160 %srcval1, i160* %1, align 4 - ret void -} - -This happens because SRoA sees that the temp alloca has is being memcpy'd into -and out of and it has holes and it has to be conservative. If we knew about the -holes, then this could be much much better. - -Having information about these holes would also improve memcpy (etc) lowering at -llc time when it gets inlined, because we can use smaller transfers. This also -avoids partial register stalls in some important cases. - -//===---------------------------------------------------------------------===// - -We don't fold (icmp (add) (add)) unless the two adds only have a single use. -There are a lot of cases that we're refusing to fold in (e.g.) 256.bzip2, for -example: - - %indvar.next90 = add i64 %indvar89, 1 ;; Has 2 uses - %tmp96 = add i64 %tmp95, 1 ;; Has 1 use - %exitcond97 = icmp eq i64 %indvar.next90, %tmp96 - -We don't fold this because we don't want to introduce an overlapped live range -of the ivar. However if we can make this more aggressive without causing -performance issues in two ways: - -1. If *either* the LHS or RHS has a single use, we can definitely do the - transformation. In the overlapping liverange case we're trading one register - use for one fewer operation, which is a reasonable trade. Before doing this - we should verify that the llc output actually shrinks for some benchmarks. -2. If both ops have multiple uses, we can still fold it if the operations are - both sinkable to *after* the icmp (e.g. in a subsequent block) which doesn't - increase register pressure. - -There are a ton of icmp's we aren't simplifying because of the reg pressure -concern. Care is warranted here though because many of these are induction -variables and other cases that matter a lot to performance, like the above. -Here's a blob of code that you can drop into the bottom of visitICmp to see some -missed cases: - - { Value *A, *B, *C, *D; - if (match(Op0, m_Add(m_Value(A), m_Value(B))) && - match(Op1, m_Add(m_Value(C), m_Value(D))) && - (A == C || A == D || B == C || B == D)) { - errs() << "OP0 = " << *Op0 << " U=" << Op0->getNumUses() << "\n"; - errs() << "OP1 = " << *Op1 << " U=" << Op1->getNumUses() << "\n"; - errs() << "CMP = " << I << "\n\n"; - } - } - -//===---------------------------------------------------------------------===// - -define i1 @test1(i32 %x) nounwind { - %and = and i32 %x, 3 - %cmp = icmp ult i32 %and, 2 - ret i1 %cmp -} - -Can be folded to (x & 2) == 0. - -define i1 @test2(i32 %x) nounwind { - %and = and i32 %x, 3 - %cmp = icmp ugt i32 %and, 1 - ret i1 %cmp -} - -Can be folded to (x & 2) != 0. - -SimplifyDemandedBits shrinks the "and" constant to 2 but instcombine misses the -icmp transform. - -//===---------------------------------------------------------------------===// - -This code: - -typedef struct { -int f1:1; -int f2:1; -int f3:1; -int f4:29; -} t1; - -typedef struct { -int f1:1; -int f2:1; -int f3:30; -} t2; - -t1 s1; -t2 s2; - -void func1(void) -{ -s1.f1 = s2.f1; -s1.f2 = s2.f2; -} - -Compiles into this IR (on x86-64 at least): - -%struct.t1 = type { i8, [3 x i8] } -@s2 = global %struct.t1 zeroinitializer, align 4 -@s1 = global %struct.t1 zeroinitializer, align 4 -define void @func1() nounwind ssp noredzone { -entry: - %0 = load i32* bitcast (%struct.t1* @s2 to i32*), align 4 - %bf.val.sext5 = and i32 %0, 1 - %1 = load i32* bitcast (%struct.t1* @s1 to i32*), align 4 - %2 = and i32 %1, -4 - %3 = or i32 %2, %bf.val.sext5 - %bf.val.sext26 = and i32 %0, 2 - %4 = or i32 %3, %bf.val.sext26 - store i32 %4, i32* bitcast (%struct.t1* @s1 to i32*), align 4 - ret void -} - -The two or/and's should be merged into one each. - -//===---------------------------------------------------------------------===// - -Machine level code hoisting can be useful in some cases. For example, PR9408 -is about: - -typedef union { - void (*f1)(int); - void (*f2)(long); -} funcs; - -void foo(funcs f, int which) { - int a = 5; - if (which) { - f.f1(a); - } else { - f.f2(a); - } -} - -which we compile to: - -foo: # @foo -# %bb.0: # %entry - pushq %rbp - movq %rsp, %rbp - testl %esi, %esi - movq %rdi, %rax - je .LBB0_2 -# %bb.1: # %if.then - movl $5, %edi - callq *%rax - popq %rbp - ret -.LBB0_2: # %if.else - movl $5, %edi - callq *%rax - popq %rbp - ret - -Note that bb1 and bb2 are the same. This doesn't happen at the IR level -because one call is passing an i32 and the other is passing an i64. - -//===---------------------------------------------------------------------===// - -I see this sort of pattern in 176.gcc in a few places (e.g. the start of -store_bit_field). The rem should be replaced with a multiply and subtract: - - %3 = sdiv i32 %A, %B - %4 = srem i32 %A, %B - -Similarly for udiv/urem. Note that this shouldn't be done on X86 or ARM, -which can do this in a single operation (instruction or libcall). It is -probably best to do this in the code generator. - -//===---------------------------------------------------------------------===// - -unsigned foo(unsigned x, unsigned y) { return (x & y) == 0 || x == 0; } -should fold to (x & y) == 0. - -//===---------------------------------------------------------------------===// - -unsigned foo(unsigned x, unsigned y) { return x > y && x != 0; } -should fold to x > y. - -//===---------------------------------------------------------------------===// + +//===---------------------------------------------------------------------===// + +clang -O3 currently compiles this code + +int g(unsigned int a) { + unsigned int c[100]; + c[10] = a; + c[11] = a; + unsigned int b = c[10] + c[11]; + if(b > a*2) a = 4; + else a = 8; + return a + 7; +} + +into + +define i32 @g(i32 a) nounwind readnone { + %add = shl i32 %a, 1 + %mul = shl i32 %a, 1 + %cmp = icmp ugt i32 %add, %mul + %a.addr.0 = select i1 %cmp, i32 11, i32 15 + ret i32 %a.addr.0 +} + +The icmp should fold to false. This CSE opportunity is only available +after GVN and InstCombine have run. + +//===---------------------------------------------------------------------===// + +memcpyopt should turn this: + +define i8* @test10(i32 %x) { + %alloc = call noalias i8* @malloc(i32 %x) nounwind + call void @llvm.memset.p0i8.i32(i8* %alloc, i8 0, i32 %x, i32 1, i1 false) + ret i8* %alloc +} + +into a call to calloc. We should make sure that we analyze calloc as +aggressively as malloc though. + +//===---------------------------------------------------------------------===// + +clang -O3 doesn't optimize this: + +void f1(int* begin, int* end) { + std::fill(begin, end, 0); +} + +into a memset. This is PR8942. + +//===---------------------------------------------------------------------===// + +clang -O3 -fno-exceptions currently compiles this code: + +void f(int N) { + std::vector<int> v(N); + + extern void sink(void*); sink(&v); +} + +into + +define void @_Z1fi(i32 %N) nounwind { +entry: + %v2 = alloca [3 x i32*], align 8 + %v2.sub = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 0 + %tmpcast = bitcast [3 x i32*]* %v2 to %"class.std::vector"* + %conv = sext i32 %N to i64 + store i32* null, i32** %v2.sub, align 8, !tbaa !0 + %tmp3.i.i.i.i.i = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 1 + store i32* null, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0 + %tmp4.i.i.i.i.i = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 2 + store i32* null, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0 + %cmp.i.i.i.i = icmp eq i32 %N, 0 + br i1 %cmp.i.i.i.i, label %_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.thread.i.i, label %cond.true.i.i.i.i + +_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.thread.i.i: ; preds = %entry + store i32* null, i32** %v2.sub, align 8, !tbaa !0 + store i32* null, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0 + %add.ptr.i5.i.i = getelementptr inbounds i32* null, i64 %conv + store i32* %add.ptr.i5.i.i, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0 + br label %_ZNSt6vectorIiSaIiEEC1EmRKiRKS0_.exit + +cond.true.i.i.i.i: ; preds = %entry + %cmp.i.i.i.i.i = icmp slt i32 %N, 0 + br i1 %cmp.i.i.i.i.i, label %if.then.i.i.i.i.i, label %_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.i.i + +if.then.i.i.i.i.i: ; preds = %cond.true.i.i.i.i + call void @_ZSt17__throw_bad_allocv() noreturn nounwind + unreachable + +_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.i.i: ; preds = %cond.true.i.i.i.i + %mul.i.i.i.i.i = shl i64 %conv, 2 + %call3.i.i.i.i.i = call noalias i8* @_Znwm(i64 %mul.i.i.i.i.i) nounwind + %0 = bitcast i8* %call3.i.i.i.i.i to i32* + store i32* %0, i32** %v2.sub, align 8, !tbaa !0 + store i32* %0, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0 + %add.ptr.i.i.i = getelementptr inbounds i32* %0, i64 %conv + store i32* %add.ptr.i.i.i, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0 + call void @llvm.memset.p0i8.i64(i8* %call3.i.i.i.i.i, i8 0, i64 %mul.i.i.i.i.i, i32 4, i1 false) + br label %_ZNSt6vectorIiSaIiEEC1EmRKiRKS0_.exit + +This is just the handling the construction of the vector. Most surprising here +is the fact that all three null stores in %entry are dead (because we do no +cross-block DSE). + +Also surprising is that %conv isn't simplified to 0 in %....exit.thread.i.i. +This is a because the client of LazyValueInfo doesn't simplify all instruction +operands, just selected ones. + +//===---------------------------------------------------------------------===// + +clang -O3 -fno-exceptions currently compiles this code: + +void f(char* a, int n) { + __builtin_memset(a, 0, n); + for (int i = 0; i < n; ++i) + a[i] = 0; +} + +into: + +define void @_Z1fPci(i8* nocapture %a, i32 %n) nounwind { +entry: + %conv = sext i32 %n to i64 + tail call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 %conv, i32 1, i1 false) + %cmp8 = icmp sgt i32 %n, 0 + br i1 %cmp8, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %tmp10 = add i32 %n, -1 + %tmp11 = zext i32 %tmp10 to i64 + %tmp12 = add i64 %tmp11, 1 + call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 %tmp12, i32 1, i1 false) + ret void + +for.end: ; preds = %entry + ret void +} + +This shouldn't need the ((zext (%n - 1)) + 1) game, and it should ideally fold +the two memset's together. + +The issue with the addition only occurs in 64-bit mode, and appears to be at +least partially caused by Scalar Evolution not keeping its cache updated: it +returns the "wrong" result immediately after indvars runs, but figures out the +expected result if it is run from scratch on IR resulting from running indvars. + +//===---------------------------------------------------------------------===// + +clang -O3 -fno-exceptions currently compiles this code: + +struct S { + unsigned short m1, m2; + unsigned char m3, m4; +}; + +void f(int N) { + std::vector<S> v(N); + extern void sink(void*); sink(&v); +} + +into poor code for zero-initializing 'v' when N is >0. The problem is that +S is only 6 bytes, but each element is 8 byte-aligned. We generate a loop and +4 stores on each iteration. If the struct were 8 bytes, this gets turned into +a memset. + +In order to handle this we have to: + A) Teach clang to generate metadata for memsets of structs that have holes in + them. + B) Teach clang to use such a memset for zero init of this struct (since it has + a hole), instead of doing elementwise zeroing. + +//===---------------------------------------------------------------------===// + +clang -O3 currently compiles this code: + +extern const int magic; +double f() { return 0.0 * magic; } + +into + +@magic = external constant i32 + +define double @_Z1fv() nounwind readnone { +entry: + %tmp = load i32* @magic, align 4, !tbaa !0 + %conv = sitofp i32 %tmp to double + %mul = fmul double %conv, 0.000000e+00 + ret double %mul +} + +We should be able to fold away this fmul to 0.0. More generally, fmul(x,0.0) +can be folded to 0.0 if we can prove that the LHS is not -0.0, not a NaN, and +not an INF. The CannotBeNegativeZero predicate in value tracking should be +extended to support general "fpclassify" operations that can return +yes/no/unknown for each of these predicates. + +In this predicate, we know that uitofp is trivially never NaN or -0.0, and +we know that it isn't +/-Inf if the floating point type has enough exponent bits +to represent the largest integer value as < inf. + +//===---------------------------------------------------------------------===// + +When optimizing a transformation that can change the sign of 0.0 (such as the +0.0*val -> 0.0 transformation above), it might be provable that the sign of the +expression doesn't matter. For example, by the above rules, we can't transform +fmul(sitofp(x), 0.0) into 0.0, because x might be -1 and the result of the +expression is defined to be -0.0. + +If we look at the uses of the fmul for example, we might be able to prove that +all uses don't care about the sign of zero. For example, if we have: + + fadd(fmul(sitofp(x), 0.0), 2.0) + +Since we know that x+2.0 doesn't care about the sign of any zeros in X, we can +transform the fmul to 0.0, and then the fadd to 2.0. + +//===---------------------------------------------------------------------===// + +We should enhance memcpy/memcpy/memset to allow a metadata node on them +indicating that some bytes of the transfer are undefined. This is useful for +frontends like clang when lowering struct copies, when some elements of the +struct are undefined. Consider something like this: + +struct x { + char a; + int b[4]; +}; +void foo(struct x*P); +struct x testfunc() { + struct x V1, V2; + foo(&V1); + V2 = V1; + + return V2; +} + +We currently compile this to: +$ clang t.c -S -o - -O0 -emit-llvm | opt -sroa -S + + +%struct.x = type { i8, [4 x i32] } + +define void @testfunc(%struct.x* sret %agg.result) nounwind ssp { +entry: + %V1 = alloca %struct.x, align 4 + call void @foo(%struct.x* %V1) + %tmp1 = bitcast %struct.x* %V1 to i8* + %0 = bitcast %struct.x* %V1 to i160* + %srcval1 = load i160* %0, align 4 + %tmp2 = bitcast %struct.x* %agg.result to i8* + %1 = bitcast %struct.x* %agg.result to i160* + store i160 %srcval1, i160* %1, align 4 + ret void +} + +This happens because SRoA sees that the temp alloca has is being memcpy'd into +and out of and it has holes and it has to be conservative. If we knew about the +holes, then this could be much much better. + +Having information about these holes would also improve memcpy (etc) lowering at +llc time when it gets inlined, because we can use smaller transfers. This also +avoids partial register stalls in some important cases. + +//===---------------------------------------------------------------------===// + +We don't fold (icmp (add) (add)) unless the two adds only have a single use. +There are a lot of cases that we're refusing to fold in (e.g.) 256.bzip2, for +example: + + %indvar.next90 = add i64 %indvar89, 1 ;; Has 2 uses + %tmp96 = add i64 %tmp95, 1 ;; Has 1 use + %exitcond97 = icmp eq i64 %indvar.next90, %tmp96 + +We don't fold this because we don't want to introduce an overlapped live range +of the ivar. However if we can make this more aggressive without causing +performance issues in two ways: + +1. If *either* the LHS or RHS has a single use, we can definitely do the + transformation. In the overlapping liverange case we're trading one register + use for one fewer operation, which is a reasonable trade. Before doing this + we should verify that the llc output actually shrinks for some benchmarks. +2. If both ops have multiple uses, we can still fold it if the operations are + both sinkable to *after* the icmp (e.g. in a subsequent block) which doesn't + increase register pressure. + +There are a ton of icmp's we aren't simplifying because of the reg pressure +concern. Care is warranted here though because many of these are induction +variables and other cases that matter a lot to performance, like the above. +Here's a blob of code that you can drop into the bottom of visitICmp to see some +missed cases: + + { Value *A, *B, *C, *D; + if (match(Op0, m_Add(m_Value(A), m_Value(B))) && + match(Op1, m_Add(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + errs() << "OP0 = " << *Op0 << " U=" << Op0->getNumUses() << "\n"; + errs() << "OP1 = " << *Op1 << " U=" << Op1->getNumUses() << "\n"; + errs() << "CMP = " << I << "\n\n"; + } + } + +//===---------------------------------------------------------------------===// + +define i1 @test1(i32 %x) nounwind { + %and = and i32 %x, 3 + %cmp = icmp ult i32 %and, 2 + ret i1 %cmp +} + +Can be folded to (x & 2) == 0. + +define i1 @test2(i32 %x) nounwind { + %and = and i32 %x, 3 + %cmp = icmp ugt i32 %and, 1 + ret i1 %cmp +} + +Can be folded to (x & 2) != 0. + +SimplifyDemandedBits shrinks the "and" constant to 2 but instcombine misses the +icmp transform. + +//===---------------------------------------------------------------------===// + +This code: + +typedef struct { +int f1:1; +int f2:1; +int f3:1; +int f4:29; +} t1; + +typedef struct { +int f1:1; +int f2:1; +int f3:30; +} t2; + +t1 s1; +t2 s2; + +void func1(void) +{ +s1.f1 = s2.f1; +s1.f2 = s2.f2; +} + +Compiles into this IR (on x86-64 at least): + +%struct.t1 = type { i8, [3 x i8] } +@s2 = global %struct.t1 zeroinitializer, align 4 +@s1 = global %struct.t1 zeroinitializer, align 4 +define void @func1() nounwind ssp noredzone { +entry: + %0 = load i32* bitcast (%struct.t1* @s2 to i32*), align 4 + %bf.val.sext5 = and i32 %0, 1 + %1 = load i32* bitcast (%struct.t1* @s1 to i32*), align 4 + %2 = and i32 %1, -4 + %3 = or i32 %2, %bf.val.sext5 + %bf.val.sext26 = and i32 %0, 2 + %4 = or i32 %3, %bf.val.sext26 + store i32 %4, i32* bitcast (%struct.t1* @s1 to i32*), align 4 + ret void +} + +The two or/and's should be merged into one each. + +//===---------------------------------------------------------------------===// + +Machine level code hoisting can be useful in some cases. For example, PR9408 +is about: + +typedef union { + void (*f1)(int); + void (*f2)(long); +} funcs; + +void foo(funcs f, int which) { + int a = 5; + if (which) { + f.f1(a); + } else { + f.f2(a); + } +} + +which we compile to: + +foo: # @foo +# %bb.0: # %entry + pushq %rbp + movq %rsp, %rbp + testl %esi, %esi + movq %rdi, %rax + je .LBB0_2 +# %bb.1: # %if.then + movl $5, %edi + callq *%rax + popq %rbp + ret +.LBB0_2: # %if.else + movl $5, %edi + callq *%rax + popq %rbp + ret + +Note that bb1 and bb2 are the same. This doesn't happen at the IR level +because one call is passing an i32 and the other is passing an i64. + +//===---------------------------------------------------------------------===// + +I see this sort of pattern in 176.gcc in a few places (e.g. the start of +store_bit_field). The rem should be replaced with a multiply and subtract: + + %3 = sdiv i32 %A, %B + %4 = srem i32 %A, %B + +Similarly for udiv/urem. Note that this shouldn't be done on X86 or ARM, +which can do this in a single operation (instruction or libcall). It is +probably best to do this in the code generator. + +//===---------------------------------------------------------------------===// + +unsigned foo(unsigned x, unsigned y) { return (x & y) == 0 || x == 0; } +should fold to (x & y) == 0. + +//===---------------------------------------------------------------------===// + +unsigned foo(unsigned x, unsigned y) { return x > y && x != 0; } +should fold to x > y. + +//===---------------------------------------------------------------------===// diff --git a/contrib/libs/llvm12/lib/Target/X86/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/X86/.yandex_meta/licenses.list.txt index 92fbe1c084..f08f43f1d8 100644 --- a/contrib/libs/llvm12/lib/Target/X86/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/X86/.yandex_meta/licenses.list.txt @@ -1,309 +1,309 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================COPYRIGHT==================== - Trampoline->setComdat(C); - BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", Trampoline); - IRBuilder<> Builder(EntryBB); - - -====================File: LICENSE.TXT==================== -============================================================================== -The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: -============================================================================== - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ----- LLVM Exceptions to the Apache 2.0 License ---- - -As an exception, if, as a result of your compiling your source code, portions -of this Software are embedded into an Object form of such source code, you -may redistribute such embedded portions in such Object form without complying -with the conditions of Sections 4(a), 4(b) and 4(d) of the License. - -In addition, if you combine or link compiled forms of this Software with -software that is licensed under the GPLv2 ("Combined Software") and if a -court of competent jurisdiction determines that the patent provision (Section -3), the indemnity provision (Section 9) or other Section of the License -conflicts with the conditions of the GPLv2, you may retroactively and -prospectively choose to deem waived or otherwise exclude such Section(s) of -the License, but only in their entirety and only with respect to the Combined -Software. - -============================================================================== -Software from third parties included in the LLVM Project: -============================================================================== -The LLVM Project contains third party software which is under different license -terms. All such code will be identified clearly using at least one of two -mechanisms: -1) It will be in a separate directory tree with its own `LICENSE.txt` or - `LICENSE` file at the top containing the specific license and restrictions - which apply to that software, or -2) It will contain specific license and restriction terms at the top of every - file. - -============================================================================== -Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): -============================================================================== -University of Illinois/NCSA -Open Source License - -Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - - - -====================File: include/llvm/Support/LICENSE.TXT==================== -LLVM System Interface Library -------------------------------------------------------------------------------- -The LLVM System Interface Library is licensed under the Illinois Open Source -License and has the following additional copyright: - -Copyright (C) 2004 eXtensible Systems, Inc. - - -====================NCSA==================== -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================COPYRIGHT==================== + Trampoline->setComdat(C); + BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", Trampoline); + IRBuilder<> Builder(EntryBB); + + +====================File: LICENSE.TXT==================== +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +============================================================================== +Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + + + +====================File: include/llvm/Support/LICENSE.TXT==================== +LLVM System Interface Library +------------------------------------------------------------------------------- +The LLVM System Interface Library is licensed under the Illinois Open Source +License and has the following additional copyright: + +Copyright (C) 2004 eXtensible Systems, Inc. + + +====================NCSA==================== +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. diff --git a/contrib/libs/llvm12/lib/Target/X86/AsmParser/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/X86/AsmParser/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/X86/AsmParser/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/X86/AsmParser/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/X86/AsmParser/ya.make b/contrib/libs/llvm12/lib/Target/X86/AsmParser/ya.make index c30cd4cf65..f88283f4e5 100644 --- a/contrib/libs/llvm12/lib/Target/X86/AsmParser/ya.make +++ b/contrib/libs/llvm12/lib/Target/X86/AsmParser/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/X86/Disassembler/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/X86/Disassembler/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/X86/Disassembler/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/X86/Disassembler/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/X86/Disassembler/ya.make b/contrib/libs/llvm12/lib/Target/X86/Disassembler/ya.make index d1c75366c1..b55833692f 100644 --- a/contrib/libs/llvm12/lib/Target/X86/Disassembler/ya.make +++ b/contrib/libs/llvm12/lib/Target/X86/Disassembler/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/X86/MCTargetDesc/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/X86/MCTargetDesc/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/X86/MCTargetDesc/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/X86/MCTargetDesc/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/X86/MCTargetDesc/ya.make b/contrib/libs/llvm12/lib/Target/X86/MCTargetDesc/ya.make index 565dda72f5..8da0d02f5b 100644 --- a/contrib/libs/llvm12/lib/Target/X86/MCTargetDesc/ya.make +++ b/contrib/libs/llvm12/lib/Target/X86/MCTargetDesc/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/X86/README-FPStack.txt b/contrib/libs/llvm12/lib/Target/X86/README-FPStack.txt index aab9759b35..39efd2dbcf 100644 --- a/contrib/libs/llvm12/lib/Target/X86/README-FPStack.txt +++ b/contrib/libs/llvm12/lib/Target/X86/README-FPStack.txt @@ -1,85 +1,85 @@ -//===---------------------------------------------------------------------===// -// Random ideas for the X86 backend: FP stack related stuff -//===---------------------------------------------------------------------===// - -//===---------------------------------------------------------------------===// - -Some targets (e.g. athlons) prefer freep to fstp ST(0): -http://gcc.gnu.org/ml/gcc-patches/2004-04/msg00659.html - -//===---------------------------------------------------------------------===// - -This should use fiadd on chips where it is profitable: -double foo(double P, int *I) { return P+*I; } - -We have fiadd patterns now but the followings have the same cost and -complexity. We need a way to specify the later is more profitable. - -def FpADD32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fadd RFP:$src1, - (extloadf64f32 addr:$src2)))]>; - // ST(0) = ST(0) + [mem32] - -def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, - [(set RFP:$dst, (fadd RFP:$src1, - (X86fild addr:$src2, i32)))]>; - // ST(0) = ST(0) + [mem32int] - -//===---------------------------------------------------------------------===// - -The FP stackifier should handle simple permutates to reduce number of shuffle -instructions, e.g. turning: - -fld P -> fld Q -fld Q fld P -fxch - -or: - -fxch -> fucomi -fucomi jl X -jg X - -Ideas: -http://gcc.gnu.org/ml/gcc-patches/2004-11/msg02410.html - - -//===---------------------------------------------------------------------===// - -Add a target specific hook to DAG combiner to handle SINT_TO_FP and -FP_TO_SINT when the source operand is already in memory. - -//===---------------------------------------------------------------------===// - -Open code rint,floor,ceil,trunc: -http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02006.html -http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02011.html - -Opencode the sincos[f] libcall. - -//===---------------------------------------------------------------------===// - -None of the FPStack instructions are handled in -X86RegisterInfo::foldMemoryOperand, which prevents the spiller from -folding spill code into the instructions. - -//===---------------------------------------------------------------------===// - -Currently the x86 codegen isn't very good at mixing SSE and FPStack -code: - -unsigned int foo(double x) { return x; } - -foo: - subl $20, %esp - movsd 24(%esp), %xmm0 - movsd %xmm0, 8(%esp) - fldl 8(%esp) - fisttpll (%esp) - movl (%esp), %eax - addl $20, %esp - ret - -This just requires being smarter when custom expanding fptoui. - -//===---------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// +// Random ideas for the X86 backend: FP stack related stuff +//===---------------------------------------------------------------------===// + +//===---------------------------------------------------------------------===// + +Some targets (e.g. athlons) prefer freep to fstp ST(0): +http://gcc.gnu.org/ml/gcc-patches/2004-04/msg00659.html + +//===---------------------------------------------------------------------===// + +This should use fiadd on chips where it is profitable: +double foo(double P, int *I) { return P+*I; } + +We have fiadd patterns now but the followings have the same cost and +complexity. We need a way to specify the later is more profitable. + +def FpADD32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fadd RFP:$src1, + (extloadf64f32 addr:$src2)))]>; + // ST(0) = ST(0) + [mem32] + +def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, + [(set RFP:$dst, (fadd RFP:$src1, + (X86fild addr:$src2, i32)))]>; + // ST(0) = ST(0) + [mem32int] + +//===---------------------------------------------------------------------===// + +The FP stackifier should handle simple permutates to reduce number of shuffle +instructions, e.g. turning: + +fld P -> fld Q +fld Q fld P +fxch + +or: + +fxch -> fucomi +fucomi jl X +jg X + +Ideas: +http://gcc.gnu.org/ml/gcc-patches/2004-11/msg02410.html + + +//===---------------------------------------------------------------------===// + +Add a target specific hook to DAG combiner to handle SINT_TO_FP and +FP_TO_SINT when the source operand is already in memory. + +//===---------------------------------------------------------------------===// + +Open code rint,floor,ceil,trunc: +http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02006.html +http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02011.html + +Opencode the sincos[f] libcall. + +//===---------------------------------------------------------------------===// + +None of the FPStack instructions are handled in +X86RegisterInfo::foldMemoryOperand, which prevents the spiller from +folding spill code into the instructions. + +//===---------------------------------------------------------------------===// + +Currently the x86 codegen isn't very good at mixing SSE and FPStack +code: + +unsigned int foo(double x) { return x; } + +foo: + subl $20, %esp + movsd 24(%esp), %xmm0 + movsd %xmm0, 8(%esp) + fldl 8(%esp) + fisttpll (%esp) + movl (%esp), %eax + addl $20, %esp + ret + +This just requires being smarter when custom expanding fptoui. + +//===---------------------------------------------------------------------===// diff --git a/contrib/libs/llvm12/lib/Target/X86/README-SSE.txt b/contrib/libs/llvm12/lib/Target/X86/README-SSE.txt index 40f526b478..d52840e5c4 100644 --- a/contrib/libs/llvm12/lib/Target/X86/README-SSE.txt +++ b/contrib/libs/llvm12/lib/Target/X86/README-SSE.txt @@ -1,829 +1,829 @@ -//===---------------------------------------------------------------------===// -// Random ideas for the X86 backend: SSE-specific stuff. -//===---------------------------------------------------------------------===// - -//===---------------------------------------------------------------------===// - -SSE Variable shift can be custom lowered to something like this, which uses a -small table + unaligned load + shuffle instead of going through memory. - -__m128i_shift_right: - .byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 - .byte -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 - -... -__m128i shift_right(__m128i value, unsigned long offset) { - return _mm_shuffle_epi8(value, - _mm_loadu_si128((__m128 *) (___m128i_shift_right + offset))); -} - -//===---------------------------------------------------------------------===// - -SSE has instructions for doing operations on complex numbers, we should pattern -match them. For example, this should turn into a horizontal add: - -typedef float __attribute__((vector_size(16))) v4f32; -float f32(v4f32 A) { - return A[0]+A[1]+A[2]+A[3]; -} - -Instead we get this: - -_f32: ## @f32 - pshufd $1, %xmm0, %xmm1 ## xmm1 = xmm0[1,0,0,0] - addss %xmm0, %xmm1 - pshufd $3, %xmm0, %xmm2 ## xmm2 = xmm0[3,0,0,0] - movhlps %xmm0, %xmm0 ## xmm0 = xmm0[1,1] - movaps %xmm0, %xmm3 - addss %xmm1, %xmm3 - movdqa %xmm2, %xmm0 - addss %xmm3, %xmm0 - ret - -Also, there are cases where some simple local SLP would improve codegen a bit. -compiling this: - -_Complex float f32(_Complex float A, _Complex float B) { - return A+B; -} - -into: - -_f32: ## @f32 - movdqa %xmm0, %xmm2 - addss %xmm1, %xmm2 - pshufd $1, %xmm1, %xmm1 ## xmm1 = xmm1[1,0,0,0] - pshufd $1, %xmm0, %xmm3 ## xmm3 = xmm0[1,0,0,0] - addss %xmm1, %xmm3 - movaps %xmm2, %xmm0 - unpcklps %xmm3, %xmm0 ## xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] - ret - -seems silly when it could just be one addps. - - -//===---------------------------------------------------------------------===// - -Expand libm rounding functions inline: Significant speedups possible. -http://gcc.gnu.org/ml/gcc-patches/2006-10/msg00909.html - -//===---------------------------------------------------------------------===// - -When compiled with unsafemath enabled, "main" should enable SSE DAZ mode and -other fast SSE modes. - -//===---------------------------------------------------------------------===// - -Think about doing i64 math in SSE regs on x86-32. - -//===---------------------------------------------------------------------===// - -This testcase should have no SSE instructions in it, and only one load from -a constant pool: - -double %test3(bool %B) { - %C = select bool %B, double 123.412, double 523.01123123 - ret double %C -} - -Currently, the select is being lowered, which prevents the dag combiner from -turning 'select (load CPI1), (load CPI2)' -> 'load (select CPI1, CPI2)' - -The pattern isel got this one right. - -//===---------------------------------------------------------------------===// - -Lower memcpy / memset to a series of SSE 128 bit move instructions when it's -feasible. - -//===---------------------------------------------------------------------===// - -Codegen: - if (copysign(1.0, x) == copysign(1.0, y)) -into: - if (x^y & mask) -when using SSE. - -//===---------------------------------------------------------------------===// - -Use movhps to update upper 64-bits of a v4sf value. Also movlps on lower half -of a v4sf value. - -//===---------------------------------------------------------------------===// - -Better codegen for vector_shuffles like this { x, 0, 0, 0 } or { x, 0, x, 0}. -Perhaps use pxor / xorp* to clear a XMM register first? - -//===---------------------------------------------------------------------===// - -External test Nurbs exposed some problems. Look for -__ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc -emits: - - movaps (%edx), %xmm2 #59.21 - movaps (%edx), %xmm5 #60.21 - movaps (%edx), %xmm4 #61.21 - movaps (%edx), %xmm3 #62.21 - movl 40(%ecx), %ebp #69.49 - shufps $0, %xmm2, %xmm5 #60.21 - movl 100(%esp), %ebx #69.20 - movl (%ebx), %edi #69.20 - imull %ebp, %edi #69.49 - addl (%eax), %edi #70.33 - shufps $85, %xmm2, %xmm4 #61.21 - shufps $170, %xmm2, %xmm3 #62.21 - shufps $255, %xmm2, %xmm2 #63.21 - lea (%ebp,%ebp,2), %ebx #69.49 - negl %ebx #69.49 - lea -3(%edi,%ebx), %ebx #70.33 - shll $4, %ebx #68.37 - addl 32(%ecx), %ebx #68.37 - testb $15, %bl #91.13 - jne L_B1.24 # Prob 5% #91.13 - -This is the llvm code after instruction scheduling: - -cond_next140 (0xa910740, LLVM BB @0xa90beb0): - %reg1078 = MOV32ri -3 - %reg1079 = ADD32rm %reg1078, %reg1068, 1, %noreg, 0 - %reg1037 = MOV32rm %reg1024, 1, %noreg, 40 - %reg1080 = IMUL32rr %reg1079, %reg1037 - %reg1081 = MOV32rm %reg1058, 1, %noreg, 0 - %reg1038 = LEA32r %reg1081, 1, %reg1080, -3 - %reg1036 = MOV32rm %reg1024, 1, %noreg, 32 - %reg1082 = SHL32ri %reg1038, 4 - %reg1039 = ADD32rr %reg1036, %reg1082 - %reg1083 = MOVAPSrm %reg1059, 1, %noreg, 0 - %reg1034 = SHUFPSrr %reg1083, %reg1083, 170 - %reg1032 = SHUFPSrr %reg1083, %reg1083, 0 - %reg1035 = SHUFPSrr %reg1083, %reg1083, 255 - %reg1033 = SHUFPSrr %reg1083, %reg1083, 85 - %reg1040 = MOV32rr %reg1039 - %reg1084 = AND32ri8 %reg1039, 15 - CMP32ri8 %reg1084, 0 - JE mbb<cond_next204,0xa914d30> - -Still ok. After register allocation: - -cond_next140 (0xa910740, LLVM BB @0xa90beb0): - %eax = MOV32ri -3 - %edx = MOV32rm %stack.3, 1, %noreg, 0 - ADD32rm %eax<def&use>, %edx, 1, %noreg, 0 - %edx = MOV32rm %stack.7, 1, %noreg, 0 - %edx = MOV32rm %edx, 1, %noreg, 40 - IMUL32rr %eax<def&use>, %edx - %esi = MOV32rm %stack.5, 1, %noreg, 0 - %esi = MOV32rm %esi, 1, %noreg, 0 - MOV32mr %stack.4, 1, %noreg, 0, %esi - %eax = LEA32r %esi, 1, %eax, -3 - %esi = MOV32rm %stack.7, 1, %noreg, 0 - %esi = MOV32rm %esi, 1, %noreg, 32 - %edi = MOV32rr %eax - SHL32ri %edi<def&use>, 4 - ADD32rr %edi<def&use>, %esi - %xmm0 = MOVAPSrm %ecx, 1, %noreg, 0 - %xmm1 = MOVAPSrr %xmm0 - SHUFPSrr %xmm1<def&use>, %xmm1, 170 - %xmm2 = MOVAPSrr %xmm0 - SHUFPSrr %xmm2<def&use>, %xmm2, 0 - %xmm3 = MOVAPSrr %xmm0 - SHUFPSrr %xmm3<def&use>, %xmm3, 255 - SHUFPSrr %xmm0<def&use>, %xmm0, 85 - %ebx = MOV32rr %edi - AND32ri8 %ebx<def&use>, 15 - CMP32ri8 %ebx, 0 - JE mbb<cond_next204,0xa914d30> - -This looks really bad. The problem is shufps is a destructive opcode. Since it -appears as operand two in more than one shufps ops. It resulted in a number of -copies. Note icc also suffers from the same problem. Either the instruction -selector should select pshufd or The register allocator can made the two-address -to three-address transformation. - -It also exposes some other problems. See MOV32ri -3 and the spills. - -//===---------------------------------------------------------------------===// - -Consider: - -__m128 test(float a) { - return _mm_set_ps(0.0, 0.0, 0.0, a*a); -} - -This compiles into: - -movss 4(%esp), %xmm1 -mulss %xmm1, %xmm1 -xorps %xmm0, %xmm0 -movss %xmm1, %xmm0 -ret - -Because mulss doesn't modify the top 3 elements, the top elements of -xmm1 are already zero'd. We could compile this to: - -movss 4(%esp), %xmm0 -mulss %xmm0, %xmm0 -ret - -//===---------------------------------------------------------------------===// - -Here's a sick and twisted idea. Consider code like this: - -__m128 test(__m128 a) { - float b = *(float*)&A; - ... - return _mm_set_ps(0.0, 0.0, 0.0, b); -} - -This might compile to this code: - -movaps c(%esp), %xmm1 -xorps %xmm0, %xmm0 -movss %xmm1, %xmm0 -ret - -Now consider if the ... code caused xmm1 to get spilled. This might produce -this code: - -movaps c(%esp), %xmm1 -movaps %xmm1, c2(%esp) -... - -xorps %xmm0, %xmm0 -movaps c2(%esp), %xmm1 -movss %xmm1, %xmm0 -ret - -However, since the reload is only used by these instructions, we could -"fold" it into the uses, producing something like this: - -movaps c(%esp), %xmm1 -movaps %xmm1, c2(%esp) -... - -movss c2(%esp), %xmm0 -ret - -... saving two instructions. - -The basic idea is that a reload from a spill slot, can, if only one 4-byte -chunk is used, bring in 3 zeros the one element instead of 4 elements. -This can be used to simplify a variety of shuffle operations, where the -elements are fixed zeros. - -//===---------------------------------------------------------------------===// - -This code generates ugly code, probably due to costs being off or something: - -define void @test(float* %P, <4 x float>* %P2 ) { - %xFloat0.688 = load float* %P - %tmp = load <4 x float>* %P2 - %inFloat3.713 = insertelement <4 x float> %tmp, float 0.0, i32 3 - store <4 x float> %inFloat3.713, <4 x float>* %P2 - ret void -} - -Generates: - -_test: - movl 8(%esp), %eax - movaps (%eax), %xmm0 - pxor %xmm1, %xmm1 - movaps %xmm0, %xmm2 - shufps $50, %xmm1, %xmm2 - shufps $132, %xmm2, %xmm0 - movaps %xmm0, (%eax) - ret - -Would it be better to generate: - -_test: - movl 8(%esp), %ecx - movaps (%ecx), %xmm0 - xor %eax, %eax - pinsrw $6, %eax, %xmm0 - pinsrw $7, %eax, %xmm0 - movaps %xmm0, (%ecx) - ret - -? - -//===---------------------------------------------------------------------===// - -Some useful information in the Apple Altivec / SSE Migration Guide: - -http://developer.apple.com/documentation/Performance/Conceptual/ -Accelerate_sse_migration/index.html - -e.g. SSE select using and, andnot, or. Various SSE compare translations. - -//===---------------------------------------------------------------------===// - -Add hooks to commute some CMPP operations. - -//===---------------------------------------------------------------------===// - -Apply the same transformation that merged four float into a single 128-bit load -to loads from constant pool. - -//===---------------------------------------------------------------------===// - -Floating point max / min are commutable when -enable-unsafe-fp-path is -specified. We should turn int_x86_sse_max_ss and X86ISD::FMIN etc. into other -nodes which are selected to max / min instructions that are marked commutable. - -//===---------------------------------------------------------------------===// - -We should materialize vector constants like "all ones" and "signbit" with -code like: - - cmpeqps xmm1, xmm1 ; xmm1 = all-ones - -and: - cmpeqps xmm1, xmm1 ; xmm1 = all-ones - psrlq xmm1, 31 ; xmm1 = all 100000000000... - -instead of using a load from the constant pool. The later is important for -ABS/NEG/copysign etc. - -//===---------------------------------------------------------------------===// - -These functions: - -#include <xmmintrin.h> -__m128i a; -void x(unsigned short n) { - a = _mm_slli_epi32 (a, n); -} -void y(unsigned n) { - a = _mm_slli_epi32 (a, n); -} - -compile to ( -O3 -static -fomit-frame-pointer): -_x: - movzwl 4(%esp), %eax - movd %eax, %xmm0 - movaps _a, %xmm1 - pslld %xmm0, %xmm1 - movaps %xmm1, _a - ret -_y: - movd 4(%esp), %xmm0 - movaps _a, %xmm1 - pslld %xmm0, %xmm1 - movaps %xmm1, _a - ret - -"y" looks good, but "x" does silly movzwl stuff around into a GPR. It seems -like movd would be sufficient in both cases as the value is already zero -extended in the 32-bit stack slot IIRC. For signed short, it should also be -save, as a really-signed value would be undefined for pslld. - - -//===---------------------------------------------------------------------===// - -#include <math.h> -int t1(double d) { return signbit(d); } - -This currently compiles to: - subl $12, %esp - movsd 16(%esp), %xmm0 - movsd %xmm0, (%esp) - movl 4(%esp), %eax - shrl $31, %eax - addl $12, %esp - ret - -We should use movmskp{s|d} instead. - -//===---------------------------------------------------------------------===// - -CodeGen/X86/vec_align.ll tests whether we can turn 4 scalar loads into a single -(aligned) vector load. This functionality has a couple of problems. - -1. The code to infer alignment from loads of globals is in the X86 backend, - not the dag combiner. This is because dagcombine2 needs to be able to see - through the X86ISD::Wrapper node, which DAGCombine can't really do. -2. The code for turning 4 x load into a single vector load is target - independent and should be moved to the dag combiner. -3. The code for turning 4 x load into a vector load can only handle a direct - load from a global or a direct load from the stack. It should be generalized - to handle any load from P, P+4, P+8, P+12, where P can be anything. -4. The alignment inference code cannot handle loads from globals in non-static - mode because it doesn't look through the extra dyld stub load. If you try - vec_align.ll without -relocation-model=static, you'll see what I mean. - -//===---------------------------------------------------------------------===// - -We should lower store(fneg(load p), q) into an integer load+xor+store, which -eliminates a constant pool load. For example, consider: - -define i64 @ccosf(float %z.0, float %z.1) nounwind readonly { -entry: - %tmp6 = fsub float -0.000000e+00, %z.1 ; <float> [#uses=1] - %tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly - ret i64 %tmp20 -} -declare i64 @ccoshf(float %z.0, float %z.1) nounwind readonly - -This currently compiles to: - -LCPI1_0: # <4 x float> - .long 2147483648 # float -0 - .long 2147483648 # float -0 - .long 2147483648 # float -0 - .long 2147483648 # float -0 -_ccosf: - subl $12, %esp - movss 16(%esp), %xmm0 - movss %xmm0, 4(%esp) - movss 20(%esp), %xmm0 - xorps LCPI1_0, %xmm0 - movss %xmm0, (%esp) - call L_ccoshf$stub - addl $12, %esp - ret - -Note the load into xmm0, then xor (to negate), then store. In PIC mode, -this code computes the pic base and does two loads to do the constant pool -load, so the improvement is much bigger. - -The tricky part about this xform is that the argument load/store isn't exposed -until post-legalize, and at that point, the fneg has been custom expanded into -an X86 fxor. This means that we need to handle this case in the x86 backend -instead of in target independent code. - -//===---------------------------------------------------------------------===// - -Non-SSE4 insert into 16 x i8 is atrociously bad. - -//===---------------------------------------------------------------------===// - -<2 x i64> extract is substantially worse than <2 x f64>, even if the destination -is memory. - -//===---------------------------------------------------------------------===// - -INSERTPS can match any insert (extract, imm1), imm2 for 4 x float, and insert -any number of 0.0 simultaneously. Currently we only use it for simple -insertions. - -See comments in LowerINSERT_VECTOR_ELT_SSE4. - -//===---------------------------------------------------------------------===// - -On a random note, SSE2 should declare insert/extract of 2 x f64 as legal, not -Custom. All combinations of insert/extract reg-reg, reg-mem, and mem-reg are -legal, it'll just take a few extra patterns written in the .td file. - -Note: this is not a code quality issue; the custom lowered code happens to be -right, but we shouldn't have to custom lower anything. This is probably related -to <2 x i64> ops being so bad. - -//===---------------------------------------------------------------------===// - -LLVM currently generates stack realignment code, when it is not necessary -needed. The problem is that we need to know about stack alignment too early, -before RA runs. - -At that point we don't know, whether there will be vector spill, or not. -Stack realignment logic is overly conservative here, but otherwise we can -produce unaligned loads/stores. - -Fixing this will require some huge RA changes. - -Testcase: +//===---------------------------------------------------------------------===// +// Random ideas for the X86 backend: SSE-specific stuff. +//===---------------------------------------------------------------------===// + +//===---------------------------------------------------------------------===// + +SSE Variable shift can be custom lowered to something like this, which uses a +small table + unaligned load + shuffle instead of going through memory. + +__m128i_shift_right: + .byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + .byte -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 + +... +__m128i shift_right(__m128i value, unsigned long offset) { + return _mm_shuffle_epi8(value, + _mm_loadu_si128((__m128 *) (___m128i_shift_right + offset))); +} + +//===---------------------------------------------------------------------===// + +SSE has instructions for doing operations on complex numbers, we should pattern +match them. For example, this should turn into a horizontal add: + +typedef float __attribute__((vector_size(16))) v4f32; +float f32(v4f32 A) { + return A[0]+A[1]+A[2]+A[3]; +} + +Instead we get this: + +_f32: ## @f32 + pshufd $1, %xmm0, %xmm1 ## xmm1 = xmm0[1,0,0,0] + addss %xmm0, %xmm1 + pshufd $3, %xmm0, %xmm2 ## xmm2 = xmm0[3,0,0,0] + movhlps %xmm0, %xmm0 ## xmm0 = xmm0[1,1] + movaps %xmm0, %xmm3 + addss %xmm1, %xmm3 + movdqa %xmm2, %xmm0 + addss %xmm3, %xmm0 + ret + +Also, there are cases where some simple local SLP would improve codegen a bit. +compiling this: + +_Complex float f32(_Complex float A, _Complex float B) { + return A+B; +} + +into: + +_f32: ## @f32 + movdqa %xmm0, %xmm2 + addss %xmm1, %xmm2 + pshufd $1, %xmm1, %xmm1 ## xmm1 = xmm1[1,0,0,0] + pshufd $1, %xmm0, %xmm3 ## xmm3 = xmm0[1,0,0,0] + addss %xmm1, %xmm3 + movaps %xmm2, %xmm0 + unpcklps %xmm3, %xmm0 ## xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] + ret + +seems silly when it could just be one addps. + + +//===---------------------------------------------------------------------===// + +Expand libm rounding functions inline: Significant speedups possible. +http://gcc.gnu.org/ml/gcc-patches/2006-10/msg00909.html + +//===---------------------------------------------------------------------===// + +When compiled with unsafemath enabled, "main" should enable SSE DAZ mode and +other fast SSE modes. + +//===---------------------------------------------------------------------===// + +Think about doing i64 math in SSE regs on x86-32. + +//===---------------------------------------------------------------------===// + +This testcase should have no SSE instructions in it, and only one load from +a constant pool: + +double %test3(bool %B) { + %C = select bool %B, double 123.412, double 523.01123123 + ret double %C +} + +Currently, the select is being lowered, which prevents the dag combiner from +turning 'select (load CPI1), (load CPI2)' -> 'load (select CPI1, CPI2)' + +The pattern isel got this one right. + +//===---------------------------------------------------------------------===// + +Lower memcpy / memset to a series of SSE 128 bit move instructions when it's +feasible. + +//===---------------------------------------------------------------------===// + +Codegen: + if (copysign(1.0, x) == copysign(1.0, y)) +into: + if (x^y & mask) +when using SSE. + +//===---------------------------------------------------------------------===// + +Use movhps to update upper 64-bits of a v4sf value. Also movlps on lower half +of a v4sf value. + +//===---------------------------------------------------------------------===// + +Better codegen for vector_shuffles like this { x, 0, 0, 0 } or { x, 0, x, 0}. +Perhaps use pxor / xorp* to clear a XMM register first? + +//===---------------------------------------------------------------------===// + +External test Nurbs exposed some problems. Look for +__ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc +emits: + + movaps (%edx), %xmm2 #59.21 + movaps (%edx), %xmm5 #60.21 + movaps (%edx), %xmm4 #61.21 + movaps (%edx), %xmm3 #62.21 + movl 40(%ecx), %ebp #69.49 + shufps $0, %xmm2, %xmm5 #60.21 + movl 100(%esp), %ebx #69.20 + movl (%ebx), %edi #69.20 + imull %ebp, %edi #69.49 + addl (%eax), %edi #70.33 + shufps $85, %xmm2, %xmm4 #61.21 + shufps $170, %xmm2, %xmm3 #62.21 + shufps $255, %xmm2, %xmm2 #63.21 + lea (%ebp,%ebp,2), %ebx #69.49 + negl %ebx #69.49 + lea -3(%edi,%ebx), %ebx #70.33 + shll $4, %ebx #68.37 + addl 32(%ecx), %ebx #68.37 + testb $15, %bl #91.13 + jne L_B1.24 # Prob 5% #91.13 + +This is the llvm code after instruction scheduling: + +cond_next140 (0xa910740, LLVM BB @0xa90beb0): + %reg1078 = MOV32ri -3 + %reg1079 = ADD32rm %reg1078, %reg1068, 1, %noreg, 0 + %reg1037 = MOV32rm %reg1024, 1, %noreg, 40 + %reg1080 = IMUL32rr %reg1079, %reg1037 + %reg1081 = MOV32rm %reg1058, 1, %noreg, 0 + %reg1038 = LEA32r %reg1081, 1, %reg1080, -3 + %reg1036 = MOV32rm %reg1024, 1, %noreg, 32 + %reg1082 = SHL32ri %reg1038, 4 + %reg1039 = ADD32rr %reg1036, %reg1082 + %reg1083 = MOVAPSrm %reg1059, 1, %noreg, 0 + %reg1034 = SHUFPSrr %reg1083, %reg1083, 170 + %reg1032 = SHUFPSrr %reg1083, %reg1083, 0 + %reg1035 = SHUFPSrr %reg1083, %reg1083, 255 + %reg1033 = SHUFPSrr %reg1083, %reg1083, 85 + %reg1040 = MOV32rr %reg1039 + %reg1084 = AND32ri8 %reg1039, 15 + CMP32ri8 %reg1084, 0 + JE mbb<cond_next204,0xa914d30> + +Still ok. After register allocation: + +cond_next140 (0xa910740, LLVM BB @0xa90beb0): + %eax = MOV32ri -3 + %edx = MOV32rm %stack.3, 1, %noreg, 0 + ADD32rm %eax<def&use>, %edx, 1, %noreg, 0 + %edx = MOV32rm %stack.7, 1, %noreg, 0 + %edx = MOV32rm %edx, 1, %noreg, 40 + IMUL32rr %eax<def&use>, %edx + %esi = MOV32rm %stack.5, 1, %noreg, 0 + %esi = MOV32rm %esi, 1, %noreg, 0 + MOV32mr %stack.4, 1, %noreg, 0, %esi + %eax = LEA32r %esi, 1, %eax, -3 + %esi = MOV32rm %stack.7, 1, %noreg, 0 + %esi = MOV32rm %esi, 1, %noreg, 32 + %edi = MOV32rr %eax + SHL32ri %edi<def&use>, 4 + ADD32rr %edi<def&use>, %esi + %xmm0 = MOVAPSrm %ecx, 1, %noreg, 0 + %xmm1 = MOVAPSrr %xmm0 + SHUFPSrr %xmm1<def&use>, %xmm1, 170 + %xmm2 = MOVAPSrr %xmm0 + SHUFPSrr %xmm2<def&use>, %xmm2, 0 + %xmm3 = MOVAPSrr %xmm0 + SHUFPSrr %xmm3<def&use>, %xmm3, 255 + SHUFPSrr %xmm0<def&use>, %xmm0, 85 + %ebx = MOV32rr %edi + AND32ri8 %ebx<def&use>, 15 + CMP32ri8 %ebx, 0 + JE mbb<cond_next204,0xa914d30> + +This looks really bad. The problem is shufps is a destructive opcode. Since it +appears as operand two in more than one shufps ops. It resulted in a number of +copies. Note icc also suffers from the same problem. Either the instruction +selector should select pshufd or The register allocator can made the two-address +to three-address transformation. + +It also exposes some other problems. See MOV32ri -3 and the spills. + +//===---------------------------------------------------------------------===// + +Consider: + +__m128 test(float a) { + return _mm_set_ps(0.0, 0.0, 0.0, a*a); +} + +This compiles into: + +movss 4(%esp), %xmm1 +mulss %xmm1, %xmm1 +xorps %xmm0, %xmm0 +movss %xmm1, %xmm0 +ret + +Because mulss doesn't modify the top 3 elements, the top elements of +xmm1 are already zero'd. We could compile this to: + +movss 4(%esp), %xmm0 +mulss %xmm0, %xmm0 +ret + +//===---------------------------------------------------------------------===// + +Here's a sick and twisted idea. Consider code like this: + +__m128 test(__m128 a) { + float b = *(float*)&A; + ... + return _mm_set_ps(0.0, 0.0, 0.0, b); +} + +This might compile to this code: + +movaps c(%esp), %xmm1 +xorps %xmm0, %xmm0 +movss %xmm1, %xmm0 +ret + +Now consider if the ... code caused xmm1 to get spilled. This might produce +this code: + +movaps c(%esp), %xmm1 +movaps %xmm1, c2(%esp) +... + +xorps %xmm0, %xmm0 +movaps c2(%esp), %xmm1 +movss %xmm1, %xmm0 +ret + +However, since the reload is only used by these instructions, we could +"fold" it into the uses, producing something like this: + +movaps c(%esp), %xmm1 +movaps %xmm1, c2(%esp) +... + +movss c2(%esp), %xmm0 +ret + +... saving two instructions. + +The basic idea is that a reload from a spill slot, can, if only one 4-byte +chunk is used, bring in 3 zeros the one element instead of 4 elements. +This can be used to simplify a variety of shuffle operations, where the +elements are fixed zeros. + +//===---------------------------------------------------------------------===// + +This code generates ugly code, probably due to costs being off or something: + +define void @test(float* %P, <4 x float>* %P2 ) { + %xFloat0.688 = load float* %P + %tmp = load <4 x float>* %P2 + %inFloat3.713 = insertelement <4 x float> %tmp, float 0.0, i32 3 + store <4 x float> %inFloat3.713, <4 x float>* %P2 + ret void +} + +Generates: + +_test: + movl 8(%esp), %eax + movaps (%eax), %xmm0 + pxor %xmm1, %xmm1 + movaps %xmm0, %xmm2 + shufps $50, %xmm1, %xmm2 + shufps $132, %xmm2, %xmm0 + movaps %xmm0, (%eax) + ret + +Would it be better to generate: + +_test: + movl 8(%esp), %ecx + movaps (%ecx), %xmm0 + xor %eax, %eax + pinsrw $6, %eax, %xmm0 + pinsrw $7, %eax, %xmm0 + movaps %xmm0, (%ecx) + ret + +? + +//===---------------------------------------------------------------------===// + +Some useful information in the Apple Altivec / SSE Migration Guide: + +http://developer.apple.com/documentation/Performance/Conceptual/ +Accelerate_sse_migration/index.html + +e.g. SSE select using and, andnot, or. Various SSE compare translations. + +//===---------------------------------------------------------------------===// + +Add hooks to commute some CMPP operations. + +//===---------------------------------------------------------------------===// + +Apply the same transformation that merged four float into a single 128-bit load +to loads from constant pool. + +//===---------------------------------------------------------------------===// + +Floating point max / min are commutable when -enable-unsafe-fp-path is +specified. We should turn int_x86_sse_max_ss and X86ISD::FMIN etc. into other +nodes which are selected to max / min instructions that are marked commutable. + +//===---------------------------------------------------------------------===// + +We should materialize vector constants like "all ones" and "signbit" with +code like: + + cmpeqps xmm1, xmm1 ; xmm1 = all-ones + +and: + cmpeqps xmm1, xmm1 ; xmm1 = all-ones + psrlq xmm1, 31 ; xmm1 = all 100000000000... + +instead of using a load from the constant pool. The later is important for +ABS/NEG/copysign etc. + +//===---------------------------------------------------------------------===// + +These functions: + +#include <xmmintrin.h> +__m128i a; +void x(unsigned short n) { + a = _mm_slli_epi32 (a, n); +} +void y(unsigned n) { + a = _mm_slli_epi32 (a, n); +} + +compile to ( -O3 -static -fomit-frame-pointer): +_x: + movzwl 4(%esp), %eax + movd %eax, %xmm0 + movaps _a, %xmm1 + pslld %xmm0, %xmm1 + movaps %xmm1, _a + ret +_y: + movd 4(%esp), %xmm0 + movaps _a, %xmm1 + pslld %xmm0, %xmm1 + movaps %xmm1, _a + ret + +"y" looks good, but "x" does silly movzwl stuff around into a GPR. It seems +like movd would be sufficient in both cases as the value is already zero +extended in the 32-bit stack slot IIRC. For signed short, it should also be +save, as a really-signed value would be undefined for pslld. + + +//===---------------------------------------------------------------------===// + +#include <math.h> +int t1(double d) { return signbit(d); } + +This currently compiles to: + subl $12, %esp + movsd 16(%esp), %xmm0 + movsd %xmm0, (%esp) + movl 4(%esp), %eax + shrl $31, %eax + addl $12, %esp + ret + +We should use movmskp{s|d} instead. + +//===---------------------------------------------------------------------===// + +CodeGen/X86/vec_align.ll tests whether we can turn 4 scalar loads into a single +(aligned) vector load. This functionality has a couple of problems. + +1. The code to infer alignment from loads of globals is in the X86 backend, + not the dag combiner. This is because dagcombine2 needs to be able to see + through the X86ISD::Wrapper node, which DAGCombine can't really do. +2. The code for turning 4 x load into a single vector load is target + independent and should be moved to the dag combiner. +3. The code for turning 4 x load into a vector load can only handle a direct + load from a global or a direct load from the stack. It should be generalized + to handle any load from P, P+4, P+8, P+12, where P can be anything. +4. The alignment inference code cannot handle loads from globals in non-static + mode because it doesn't look through the extra dyld stub load. If you try + vec_align.ll without -relocation-model=static, you'll see what I mean. + +//===---------------------------------------------------------------------===// + +We should lower store(fneg(load p), q) into an integer load+xor+store, which +eliminates a constant pool load. For example, consider: + +define i64 @ccosf(float %z.0, float %z.1) nounwind readonly { +entry: + %tmp6 = fsub float -0.000000e+00, %z.1 ; <float> [#uses=1] + %tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly + ret i64 %tmp20 +} +declare i64 @ccoshf(float %z.0, float %z.1) nounwind readonly + +This currently compiles to: + +LCPI1_0: # <4 x float> + .long 2147483648 # float -0 + .long 2147483648 # float -0 + .long 2147483648 # float -0 + .long 2147483648 # float -0 +_ccosf: + subl $12, %esp + movss 16(%esp), %xmm0 + movss %xmm0, 4(%esp) + movss 20(%esp), %xmm0 + xorps LCPI1_0, %xmm0 + movss %xmm0, (%esp) + call L_ccoshf$stub + addl $12, %esp + ret + +Note the load into xmm0, then xor (to negate), then store. In PIC mode, +this code computes the pic base and does two loads to do the constant pool +load, so the improvement is much bigger. + +The tricky part about this xform is that the argument load/store isn't exposed +until post-legalize, and at that point, the fneg has been custom expanded into +an X86 fxor. This means that we need to handle this case in the x86 backend +instead of in target independent code. + +//===---------------------------------------------------------------------===// + +Non-SSE4 insert into 16 x i8 is atrociously bad. + +//===---------------------------------------------------------------------===// + +<2 x i64> extract is substantially worse than <2 x f64>, even if the destination +is memory. + +//===---------------------------------------------------------------------===// + +INSERTPS can match any insert (extract, imm1), imm2 for 4 x float, and insert +any number of 0.0 simultaneously. Currently we only use it for simple +insertions. + +See comments in LowerINSERT_VECTOR_ELT_SSE4. + +//===---------------------------------------------------------------------===// + +On a random note, SSE2 should declare insert/extract of 2 x f64 as legal, not +Custom. All combinations of insert/extract reg-reg, reg-mem, and mem-reg are +legal, it'll just take a few extra patterns written in the .td file. + +Note: this is not a code quality issue; the custom lowered code happens to be +right, but we shouldn't have to custom lower anything. This is probably related +to <2 x i64> ops being so bad. + +//===---------------------------------------------------------------------===// + +LLVM currently generates stack realignment code, when it is not necessary +needed. The problem is that we need to know about stack alignment too early, +before RA runs. + +At that point we don't know, whether there will be vector spill, or not. +Stack realignment logic is overly conservative here, but otherwise we can +produce unaligned loads/stores. + +Fixing this will require some huge RA changes. + +Testcase: +#include <emmintrin.h> + +typedef short vSInt16 __attribute__ ((__vector_size__ (16))); + +static const vSInt16 a = {- 22725, - 12873, - 22725, - 12873, - 22725, - 12873, +- 22725, - 12873};; + +vSInt16 madd(vSInt16 b) +{ + return _mm_madd_epi16(a, b); +} + +Generated code (x86-32, linux): +madd: + pushl %ebp + movl %esp, %ebp + andl $-16, %esp + movaps .LCPI1_0, %xmm1 + pmaddwd %xmm1, %xmm0 + movl %ebp, %esp + popl %ebp + ret + +//===---------------------------------------------------------------------===// + +Consider: #include <emmintrin.h> - -typedef short vSInt16 __attribute__ ((__vector_size__ (16))); - -static const vSInt16 a = {- 22725, - 12873, - 22725, - 12873, - 22725, - 12873, -- 22725, - 12873};; - -vSInt16 madd(vSInt16 b) -{ - return _mm_madd_epi16(a, b); -} - -Generated code (x86-32, linux): -madd: - pushl %ebp - movl %esp, %ebp - andl $-16, %esp - movaps .LCPI1_0, %xmm1 - pmaddwd %xmm1, %xmm0 - movl %ebp, %esp - popl %ebp - ret - -//===---------------------------------------------------------------------===// - -Consider: -#include <emmintrin.h> -__m128 foo2 (float x) { - return _mm_set_ps (0, 0, x, 0); -} - -In x86-32 mode, we generate this spiffy code: - -_foo2: - movss 4(%esp), %xmm0 - pshufd $81, %xmm0, %xmm0 - ret - -in x86-64 mode, we generate this code, which could be better: - -_foo2: - xorps %xmm1, %xmm1 - movss %xmm0, %xmm1 - pshufd $81, %xmm1, %xmm0 - ret - -In sse4 mode, we could use insertps to make both better. - -Here's another testcase that could use insertps [mem]: - -#include <xmmintrin.h> -extern float x2, x3; -__m128 foo1 (float x1, float x4) { - return _mm_set_ps (x2, x1, x3, x4); -} - -gcc mainline compiles it to: - -foo1: - insertps $0x10, x2(%rip), %xmm0 - insertps $0x10, x3(%rip), %xmm1 - movaps %xmm1, %xmm2 - movlhps %xmm0, %xmm2 - movaps %xmm2, %xmm0 - ret - -//===---------------------------------------------------------------------===// - -We compile vector multiply-by-constant into poor code: - -define <4 x i32> @f(<4 x i32> %i) nounwind { - %A = mul <4 x i32> %i, < i32 10, i32 10, i32 10, i32 10 > - ret <4 x i32> %A -} - -On targets without SSE4.1, this compiles into: - -LCPI1_0: ## <4 x i32> - .long 10 - .long 10 - .long 10 - .long 10 - .text - .align 4,0x90 - .globl _f -_f: - pshufd $3, %xmm0, %xmm1 - movd %xmm1, %eax - imull LCPI1_0+12, %eax - movd %eax, %xmm1 - pshufd $1, %xmm0, %xmm2 - movd %xmm2, %eax - imull LCPI1_0+4, %eax - movd %eax, %xmm2 - punpckldq %xmm1, %xmm2 - movd %xmm0, %eax - imull LCPI1_0, %eax - movd %eax, %xmm1 - movhlps %xmm0, %xmm0 - movd %xmm0, %eax - imull LCPI1_0+8, %eax - movd %eax, %xmm0 - punpckldq %xmm0, %xmm1 - movaps %xmm1, %xmm0 - punpckldq %xmm2, %xmm0 - ret - -It would be better to synthesize integer vector multiplication by constants -using shifts and adds, pslld and paddd here. And even on targets with SSE4.1, -simple cases such as multiplication by powers of two would be better as -vector shifts than as multiplications. - -//===---------------------------------------------------------------------===// - -We compile this: - -__m128i -foo2 (char x) -{ - return _mm_set_epi8 (1, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 1, 0, 0, 0, 0); -} - -into: - movl $1, %eax - xorps %xmm0, %xmm0 - pinsrw $2, %eax, %xmm0 - movzbl 4(%esp), %eax - pinsrw $3, %eax, %xmm0 - movl $256, %eax - pinsrw $7, %eax, %xmm0 - ret - - -gcc-4.2: - subl $12, %esp - movzbl 16(%esp), %eax - movdqa LC0, %xmm0 - pinsrw $3, %eax, %xmm0 - addl $12, %esp - ret - .const - .align 4 -LC0: - .word 0 - .word 0 - .word 1 - .word 0 - .word 0 - .word 0 - .word 0 - .word 256 - -With SSE4, it should be - movdqa .LC0(%rip), %xmm0 - pinsrb $6, %edi, %xmm0 - -//===---------------------------------------------------------------------===// - -We should transform a shuffle of two vectors of constants into a single vector -of constants. Also, insertelement of a constant into a vector of constants -should also result in a vector of constants. e.g. 2008-06-25-VecISelBug.ll. - -We compiled it to something horrible: - - .align 4 -LCPI1_1: ## float - .long 1065353216 ## float 1 - .const - - .align 4 -LCPI1_0: ## <4 x float> - .space 4 - .long 1065353216 ## float 1 - .space 4 - .long 1065353216 ## float 1 - .text - .align 4,0x90 - .globl _t -_t: - xorps %xmm0, %xmm0 - movhps LCPI1_0, %xmm0 - movss LCPI1_1, %xmm1 - movaps %xmm0, %xmm2 - shufps $2, %xmm1, %xmm2 - shufps $132, %xmm2, %xmm0 - movaps %xmm0, 0 - -//===---------------------------------------------------------------------===// -rdar://5907648 - -This function: - -float foo(unsigned char x) { - return x; -} - -compiles to (x86-32): - -define float @foo(i8 zeroext %x) nounwind { - %tmp12 = uitofp i8 %x to float ; <float> [#uses=1] - ret float %tmp12 -} - -compiles to: - -_foo: - subl $4, %esp - movzbl 8(%esp), %eax - cvtsi2ss %eax, %xmm0 - movss %xmm0, (%esp) - flds (%esp) - addl $4, %esp - ret - -We should be able to use: - cvtsi2ss 8($esp), %xmm0 -since we know the stack slot is already zext'd. - -//===---------------------------------------------------------------------===// - -Consider using movlps instead of movsd to implement (scalar_to_vector (loadf64)) -when code size is critical. movlps is slower than movsd on core2 but it's one -byte shorter. - -//===---------------------------------------------------------------------===// - -We should use a dynamic programming based approach to tell when using FPStack -operations is cheaper than SSE. SciMark montecarlo contains code like this -for example: - -double MonteCarlo_num_flops(int Num_samples) { - return ((double) Num_samples)* 4.0; -} - -In fpstack mode, this compiles into: - -LCPI1_0: - .long 1082130432 ## float 4.000000e+00 -_MonteCarlo_num_flops: - subl $4, %esp - movl 8(%esp), %eax - movl %eax, (%esp) - fildl (%esp) - fmuls LCPI1_0 - addl $4, %esp - ret - -in SSE mode, it compiles into significantly slower code: - -_MonteCarlo_num_flops: - subl $12, %esp - cvtsi2sd 16(%esp), %xmm0 - mulsd LCPI1_0, %xmm0 - movsd %xmm0, (%esp) - fldl (%esp) - addl $12, %esp - ret - -There are also other cases in scimark where using fpstack is better, it is -cheaper to do fld1 than load from a constant pool for example, so -"load, add 1.0, store" is better done in the fp stack, etc. - -//===---------------------------------------------------------------------===// - -These should compile into the same code (PR6214): Perhaps instcombine should -canonicalize the former into the later? - -define float @foo(float %x) nounwind { - %t = bitcast float %x to i32 - %s = and i32 %t, 2147483647 - %d = bitcast i32 %s to float - ret float %d -} - -declare float @fabsf(float %n) -define float @bar(float %x) nounwind { - %d = call float @fabsf(float %x) - ret float %d -} - -//===---------------------------------------------------------------------===// - -This IR (from PR6194): - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-apple-darwin10.0.0" - -%0 = type { double, double } -%struct.float3 = type { float, float, float } - -define void @test(%0, %struct.float3* nocapture %res) nounwind noinline ssp { -entry: - %tmp18 = extractvalue %0 %0, 0 ; <double> [#uses=1] - %tmp19 = bitcast double %tmp18 to i64 ; <i64> [#uses=1] - %tmp20 = zext i64 %tmp19 to i128 ; <i128> [#uses=1] - %tmp10 = lshr i128 %tmp20, 32 ; <i128> [#uses=1] - %tmp11 = trunc i128 %tmp10 to i32 ; <i32> [#uses=1] - %tmp12 = bitcast i32 %tmp11 to float ; <float> [#uses=1] - %tmp5 = getelementptr inbounds %struct.float3* %res, i64 0, i32 1 ; <float*> [#uses=1] - store float %tmp12, float* %tmp5 - ret void -} - -Compiles to: - -_test: ## @test - movd %xmm0, %rax - shrq $32, %rax - movl %eax, 4(%rdi) - ret - -This would be better kept in the SSE unit by treating XMM0 as a 4xfloat and -doing a shuffle from v[1] to v[0] then a float store. - -//===---------------------------------------------------------------------===// - -[UNSAFE FP] - -void foo(double, double, double); -void norm(double x, double y, double z) { - double scale = __builtin_sqrt(x*x + y*y + z*z); - foo(x/scale, y/scale, z/scale); -} - -We currently generate an sqrtsd and 3 divsd instructions. This is bad, fp div is -slow and not pipelined. In -ffast-math mode we could compute "1.0/scale" first -and emit 3 mulsd in place of the divs. This can be done as a target-independent -transform. - -If we're dealing with floats instead of doubles we could even replace the sqrtss -and inversion with an rsqrtss instruction, which computes 1/sqrt faster at the -cost of reduced accuracy. - -//===---------------------------------------------------------------------===// +__m128 foo2 (float x) { + return _mm_set_ps (0, 0, x, 0); +} + +In x86-32 mode, we generate this spiffy code: + +_foo2: + movss 4(%esp), %xmm0 + pshufd $81, %xmm0, %xmm0 + ret + +in x86-64 mode, we generate this code, which could be better: + +_foo2: + xorps %xmm1, %xmm1 + movss %xmm0, %xmm1 + pshufd $81, %xmm1, %xmm0 + ret + +In sse4 mode, we could use insertps to make both better. + +Here's another testcase that could use insertps [mem]: + +#include <xmmintrin.h> +extern float x2, x3; +__m128 foo1 (float x1, float x4) { + return _mm_set_ps (x2, x1, x3, x4); +} + +gcc mainline compiles it to: + +foo1: + insertps $0x10, x2(%rip), %xmm0 + insertps $0x10, x3(%rip), %xmm1 + movaps %xmm1, %xmm2 + movlhps %xmm0, %xmm2 + movaps %xmm2, %xmm0 + ret + +//===---------------------------------------------------------------------===// + +We compile vector multiply-by-constant into poor code: + +define <4 x i32> @f(<4 x i32> %i) nounwind { + %A = mul <4 x i32> %i, < i32 10, i32 10, i32 10, i32 10 > + ret <4 x i32> %A +} + +On targets without SSE4.1, this compiles into: + +LCPI1_0: ## <4 x i32> + .long 10 + .long 10 + .long 10 + .long 10 + .text + .align 4,0x90 + .globl _f +_f: + pshufd $3, %xmm0, %xmm1 + movd %xmm1, %eax + imull LCPI1_0+12, %eax + movd %eax, %xmm1 + pshufd $1, %xmm0, %xmm2 + movd %xmm2, %eax + imull LCPI1_0+4, %eax + movd %eax, %xmm2 + punpckldq %xmm1, %xmm2 + movd %xmm0, %eax + imull LCPI1_0, %eax + movd %eax, %xmm1 + movhlps %xmm0, %xmm0 + movd %xmm0, %eax + imull LCPI1_0+8, %eax + movd %eax, %xmm0 + punpckldq %xmm0, %xmm1 + movaps %xmm1, %xmm0 + punpckldq %xmm2, %xmm0 + ret + +It would be better to synthesize integer vector multiplication by constants +using shifts and adds, pslld and paddd here. And even on targets with SSE4.1, +simple cases such as multiplication by powers of two would be better as +vector shifts than as multiplications. + +//===---------------------------------------------------------------------===// + +We compile this: + +__m128i +foo2 (char x) +{ + return _mm_set_epi8 (1, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 1, 0, 0, 0, 0); +} + +into: + movl $1, %eax + xorps %xmm0, %xmm0 + pinsrw $2, %eax, %xmm0 + movzbl 4(%esp), %eax + pinsrw $3, %eax, %xmm0 + movl $256, %eax + pinsrw $7, %eax, %xmm0 + ret + + +gcc-4.2: + subl $12, %esp + movzbl 16(%esp), %eax + movdqa LC0, %xmm0 + pinsrw $3, %eax, %xmm0 + addl $12, %esp + ret + .const + .align 4 +LC0: + .word 0 + .word 0 + .word 1 + .word 0 + .word 0 + .word 0 + .word 0 + .word 256 + +With SSE4, it should be + movdqa .LC0(%rip), %xmm0 + pinsrb $6, %edi, %xmm0 + +//===---------------------------------------------------------------------===// + +We should transform a shuffle of two vectors of constants into a single vector +of constants. Also, insertelement of a constant into a vector of constants +should also result in a vector of constants. e.g. 2008-06-25-VecISelBug.ll. + +We compiled it to something horrible: + + .align 4 +LCPI1_1: ## float + .long 1065353216 ## float 1 + .const + + .align 4 +LCPI1_0: ## <4 x float> + .space 4 + .long 1065353216 ## float 1 + .space 4 + .long 1065353216 ## float 1 + .text + .align 4,0x90 + .globl _t +_t: + xorps %xmm0, %xmm0 + movhps LCPI1_0, %xmm0 + movss LCPI1_1, %xmm1 + movaps %xmm0, %xmm2 + shufps $2, %xmm1, %xmm2 + shufps $132, %xmm2, %xmm0 + movaps %xmm0, 0 + +//===---------------------------------------------------------------------===// +rdar://5907648 + +This function: + +float foo(unsigned char x) { + return x; +} + +compiles to (x86-32): + +define float @foo(i8 zeroext %x) nounwind { + %tmp12 = uitofp i8 %x to float ; <float> [#uses=1] + ret float %tmp12 +} + +compiles to: + +_foo: + subl $4, %esp + movzbl 8(%esp), %eax + cvtsi2ss %eax, %xmm0 + movss %xmm0, (%esp) + flds (%esp) + addl $4, %esp + ret + +We should be able to use: + cvtsi2ss 8($esp), %xmm0 +since we know the stack slot is already zext'd. + +//===---------------------------------------------------------------------===// + +Consider using movlps instead of movsd to implement (scalar_to_vector (loadf64)) +when code size is critical. movlps is slower than movsd on core2 but it's one +byte shorter. + +//===---------------------------------------------------------------------===// + +We should use a dynamic programming based approach to tell when using FPStack +operations is cheaper than SSE. SciMark montecarlo contains code like this +for example: + +double MonteCarlo_num_flops(int Num_samples) { + return ((double) Num_samples)* 4.0; +} + +In fpstack mode, this compiles into: + +LCPI1_0: + .long 1082130432 ## float 4.000000e+00 +_MonteCarlo_num_flops: + subl $4, %esp + movl 8(%esp), %eax + movl %eax, (%esp) + fildl (%esp) + fmuls LCPI1_0 + addl $4, %esp + ret + +in SSE mode, it compiles into significantly slower code: + +_MonteCarlo_num_flops: + subl $12, %esp + cvtsi2sd 16(%esp), %xmm0 + mulsd LCPI1_0, %xmm0 + movsd %xmm0, (%esp) + fldl (%esp) + addl $12, %esp + ret + +There are also other cases in scimark where using fpstack is better, it is +cheaper to do fld1 than load from a constant pool for example, so +"load, add 1.0, store" is better done in the fp stack, etc. + +//===---------------------------------------------------------------------===// + +These should compile into the same code (PR6214): Perhaps instcombine should +canonicalize the former into the later? + +define float @foo(float %x) nounwind { + %t = bitcast float %x to i32 + %s = and i32 %t, 2147483647 + %d = bitcast i32 %s to float + ret float %d +} + +declare float @fabsf(float %n) +define float @bar(float %x) nounwind { + %d = call float @fabsf(float %x) + ret float %d +} + +//===---------------------------------------------------------------------===// + +This IR (from PR6194): + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-darwin10.0.0" + +%0 = type { double, double } +%struct.float3 = type { float, float, float } + +define void @test(%0, %struct.float3* nocapture %res) nounwind noinline ssp { +entry: + %tmp18 = extractvalue %0 %0, 0 ; <double> [#uses=1] + %tmp19 = bitcast double %tmp18 to i64 ; <i64> [#uses=1] + %tmp20 = zext i64 %tmp19 to i128 ; <i128> [#uses=1] + %tmp10 = lshr i128 %tmp20, 32 ; <i128> [#uses=1] + %tmp11 = trunc i128 %tmp10 to i32 ; <i32> [#uses=1] + %tmp12 = bitcast i32 %tmp11 to float ; <float> [#uses=1] + %tmp5 = getelementptr inbounds %struct.float3* %res, i64 0, i32 1 ; <float*> [#uses=1] + store float %tmp12, float* %tmp5 + ret void +} + +Compiles to: + +_test: ## @test + movd %xmm0, %rax + shrq $32, %rax + movl %eax, 4(%rdi) + ret + +This would be better kept in the SSE unit by treating XMM0 as a 4xfloat and +doing a shuffle from v[1] to v[0] then a float store. + +//===---------------------------------------------------------------------===// + +[UNSAFE FP] + +void foo(double, double, double); +void norm(double x, double y, double z) { + double scale = __builtin_sqrt(x*x + y*y + z*z); + foo(x/scale, y/scale, z/scale); +} + +We currently generate an sqrtsd and 3 divsd instructions. This is bad, fp div is +slow and not pipelined. In -ffast-math mode we could compute "1.0/scale" first +and emit 3 mulsd in place of the divs. This can be done as a target-independent +transform. + +If we're dealing with floats instead of doubles we could even replace the sqrtss +and inversion with an rsqrtss instruction, which computes 1/sqrt faster at the +cost of reduced accuracy. + +//===---------------------------------------------------------------------===// diff --git a/contrib/libs/llvm12/lib/Target/X86/README-X86-64.txt b/contrib/libs/llvm12/lib/Target/X86/README-X86-64.txt index d919c697bd..a3ea4595ac 100644 --- a/contrib/libs/llvm12/lib/Target/X86/README-X86-64.txt +++ b/contrib/libs/llvm12/lib/Target/X86/README-X86-64.txt @@ -1,184 +1,184 @@ -//===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===// - -AMD64 Optimization Manual 8.2 has some nice information about optimizing integer -multiplication by a constant. How much of it applies to Intel's X86-64 -implementation? There are definite trade-offs to consider: latency vs. register -pressure vs. code size. - -//===---------------------------------------------------------------------===// - -Are we better off using branches instead of cmove to implement FP to -unsigned i64? - -_conv: - ucomiss LC0(%rip), %xmm0 - cvttss2siq %xmm0, %rdx - jb L3 - subss LC0(%rip), %xmm0 - movabsq $-9223372036854775808, %rax - cvttss2siq %xmm0, %rdx - xorq %rax, %rdx -L3: - movq %rdx, %rax - ret - -instead of - -_conv: - movss LCPI1_0(%rip), %xmm1 - cvttss2siq %xmm0, %rcx - movaps %xmm0, %xmm2 - subss %xmm1, %xmm2 - cvttss2siq %xmm2, %rax - movabsq $-9223372036854775808, %rdx - xorq %rdx, %rax - ucomiss %xmm1, %xmm0 - cmovb %rcx, %rax - ret - -Seems like the jb branch has high likelihood of being taken. It would have -saved a few instructions. - -//===---------------------------------------------------------------------===// - -It's not possible to reference AH, BH, CH, and DH registers in an instruction -requiring REX prefix. However, divb and mulb both produce results in AH. If isel -emits a CopyFromReg which gets turned into a movb and that can be allocated a -r8b - r15b. - -To get around this, isel emits a CopyFromReg from AX and then right shift it -down by 8 and truncate it. It's not pretty but it works. We need some register -allocation magic to make the hack go away (e.g. putting additional constraints -on the result of the movb). - -//===---------------------------------------------------------------------===// - -The x86-64 ABI for hidden-argument struct returns requires that the -incoming value of %rdi be copied into %rax by the callee upon return. - -The idea is that it saves callers from having to remember this value, -which would often require a callee-saved register. Callees usually -need to keep this value live for most of their body anyway, so it -doesn't add a significant burden on them. - -We currently implement this in codegen, however this is suboptimal -because it means that it would be quite awkward to implement the -optimization for callers. - -A better implementation would be to relax the LLVM IR rules for sret -arguments to allow a function with an sret argument to have a non-void -return type, and to have the front-end to set up the sret argument value -as the return value of the function. The front-end could more easily -emit uses of the returned struct value to be in terms of the function's -lowered return value, and it would free non-C frontends from a -complication only required by a C-based ABI. - -//===---------------------------------------------------------------------===// - -We get a redundant zero extension for code like this: - -int mask[1000]; -int foo(unsigned x) { - if (x < 10) - x = x * 45; - else - x = x * 78; - return mask[x]; -} - -_foo: -LBB1_0: ## entry - cmpl $9, %edi - jbe LBB1_3 ## bb -LBB1_1: ## bb1 - imull $78, %edi, %eax -LBB1_2: ## bb2 - movl %eax, %eax <---- - movq _mask@GOTPCREL(%rip), %rcx - movl (%rcx,%rax,4), %eax - ret -LBB1_3: ## bb - imull $45, %edi, %eax - jmp LBB1_2 ## bb2 - -Before regalloc, we have: - - %reg1025 = IMUL32rri8 %reg1024, 45, implicit-def %eflags - JMP mbb<bb2,0x203afb0> - Successors according to CFG: 0x203afb0 (#3) - -bb1: 0x203af60, LLVM BB @0x1e02310, ID#2: - Predecessors according to CFG: 0x203aec0 (#0) - %reg1026 = IMUL32rri8 %reg1024, 78, implicit-def %eflags - Successors according to CFG: 0x203afb0 (#3) - -bb2: 0x203afb0, LLVM BB @0x1e02340, ID#3: - Predecessors according to CFG: 0x203af10 (#1) 0x203af60 (#2) - %reg1027 = PHI %reg1025, mbb<bb,0x203af10>, - %reg1026, mbb<bb1,0x203af60> - %reg1029 = MOVZX64rr32 %reg1027 - -so we'd have to know that IMUL32rri8 leaves the high word zero extended and to -be able to recognize the zero extend. This could also presumably be implemented -if we have whole-function selectiondags. - -//===---------------------------------------------------------------------===// - -Take the following code -(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34653): -extern unsigned long table[]; -unsigned long foo(unsigned char *p) { - unsigned long tag = *p; - return table[tag >> 4] + table[tag & 0xf]; -} - -Current code generated: - movzbl (%rdi), %eax - movq %rax, %rcx - andq $240, %rcx - shrq %rcx - andq $15, %rax - movq table(,%rax,8), %rax - addq table(%rcx), %rax - ret - -Issues: -1. First movq should be movl; saves a byte. -2. Both andq's should be andl; saves another two bytes. I think this was - implemented at one point, but subsequently regressed. -3. shrq should be shrl; saves another byte. -4. The first andq can be completely eliminated by using a slightly more - expensive addressing mode. - -//===---------------------------------------------------------------------===// - -Consider the following (contrived testcase, but contains common factors): - -#include <stdarg.h> -int test(int x, ...) { - int sum, i; - va_list l; - va_start(l, x); - for (i = 0; i < x; i++) - sum += va_arg(l, int); - va_end(l); - return sum; -} - -Testcase given in C because fixing it will likely involve changing the IR -generated for it. The primary issue with the result is that it doesn't do any -of the optimizations which are possible if we know the address of a va_list -in the current function is never taken: -1. We shouldn't spill the XMM registers because we only call va_arg with "int". -2. It would be nice if we could sroa the va_list. -3. Probably overkill, but it'd be cool if we could peel off the first five -iterations of the loop. - -Other optimizations involving functions which use va_arg on floats which don't -have the address of a va_list taken: -1. Conversely to the above, we shouldn't spill general registers if we only - call va_arg on "double". -2. If we know nothing more than 64 bits wide is read from the XMM registers, - we can change the spilling code to reduce the amount of stack used by half. - -//===---------------------------------------------------------------------===// +//===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===// + +AMD64 Optimization Manual 8.2 has some nice information about optimizing integer +multiplication by a constant. How much of it applies to Intel's X86-64 +implementation? There are definite trade-offs to consider: latency vs. register +pressure vs. code size. + +//===---------------------------------------------------------------------===// + +Are we better off using branches instead of cmove to implement FP to +unsigned i64? + +_conv: + ucomiss LC0(%rip), %xmm0 + cvttss2siq %xmm0, %rdx + jb L3 + subss LC0(%rip), %xmm0 + movabsq $-9223372036854775808, %rax + cvttss2siq %xmm0, %rdx + xorq %rax, %rdx +L3: + movq %rdx, %rax + ret + +instead of + +_conv: + movss LCPI1_0(%rip), %xmm1 + cvttss2siq %xmm0, %rcx + movaps %xmm0, %xmm2 + subss %xmm1, %xmm2 + cvttss2siq %xmm2, %rax + movabsq $-9223372036854775808, %rdx + xorq %rdx, %rax + ucomiss %xmm1, %xmm0 + cmovb %rcx, %rax + ret + +Seems like the jb branch has high likelihood of being taken. It would have +saved a few instructions. + +//===---------------------------------------------------------------------===// + +It's not possible to reference AH, BH, CH, and DH registers in an instruction +requiring REX prefix. However, divb and mulb both produce results in AH. If isel +emits a CopyFromReg which gets turned into a movb and that can be allocated a +r8b - r15b. + +To get around this, isel emits a CopyFromReg from AX and then right shift it +down by 8 and truncate it. It's not pretty but it works. We need some register +allocation magic to make the hack go away (e.g. putting additional constraints +on the result of the movb). + +//===---------------------------------------------------------------------===// + +The x86-64 ABI for hidden-argument struct returns requires that the +incoming value of %rdi be copied into %rax by the callee upon return. + +The idea is that it saves callers from having to remember this value, +which would often require a callee-saved register. Callees usually +need to keep this value live for most of their body anyway, so it +doesn't add a significant burden on them. + +We currently implement this in codegen, however this is suboptimal +because it means that it would be quite awkward to implement the +optimization for callers. + +A better implementation would be to relax the LLVM IR rules for sret +arguments to allow a function with an sret argument to have a non-void +return type, and to have the front-end to set up the sret argument value +as the return value of the function. The front-end could more easily +emit uses of the returned struct value to be in terms of the function's +lowered return value, and it would free non-C frontends from a +complication only required by a C-based ABI. + +//===---------------------------------------------------------------------===// + +We get a redundant zero extension for code like this: + +int mask[1000]; +int foo(unsigned x) { + if (x < 10) + x = x * 45; + else + x = x * 78; + return mask[x]; +} + +_foo: +LBB1_0: ## entry + cmpl $9, %edi + jbe LBB1_3 ## bb +LBB1_1: ## bb1 + imull $78, %edi, %eax +LBB1_2: ## bb2 + movl %eax, %eax <---- + movq _mask@GOTPCREL(%rip), %rcx + movl (%rcx,%rax,4), %eax + ret +LBB1_3: ## bb + imull $45, %edi, %eax + jmp LBB1_2 ## bb2 + +Before regalloc, we have: + + %reg1025 = IMUL32rri8 %reg1024, 45, implicit-def %eflags + JMP mbb<bb2,0x203afb0> + Successors according to CFG: 0x203afb0 (#3) + +bb1: 0x203af60, LLVM BB @0x1e02310, ID#2: + Predecessors according to CFG: 0x203aec0 (#0) + %reg1026 = IMUL32rri8 %reg1024, 78, implicit-def %eflags + Successors according to CFG: 0x203afb0 (#3) + +bb2: 0x203afb0, LLVM BB @0x1e02340, ID#3: + Predecessors according to CFG: 0x203af10 (#1) 0x203af60 (#2) + %reg1027 = PHI %reg1025, mbb<bb,0x203af10>, + %reg1026, mbb<bb1,0x203af60> + %reg1029 = MOVZX64rr32 %reg1027 + +so we'd have to know that IMUL32rri8 leaves the high word zero extended and to +be able to recognize the zero extend. This could also presumably be implemented +if we have whole-function selectiondags. + +//===---------------------------------------------------------------------===// + +Take the following code +(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34653): +extern unsigned long table[]; +unsigned long foo(unsigned char *p) { + unsigned long tag = *p; + return table[tag >> 4] + table[tag & 0xf]; +} + +Current code generated: + movzbl (%rdi), %eax + movq %rax, %rcx + andq $240, %rcx + shrq %rcx + andq $15, %rax + movq table(,%rax,8), %rax + addq table(%rcx), %rax + ret + +Issues: +1. First movq should be movl; saves a byte. +2. Both andq's should be andl; saves another two bytes. I think this was + implemented at one point, but subsequently regressed. +3. shrq should be shrl; saves another byte. +4. The first andq can be completely eliminated by using a slightly more + expensive addressing mode. + +//===---------------------------------------------------------------------===// + +Consider the following (contrived testcase, but contains common factors): + +#include <stdarg.h> +int test(int x, ...) { + int sum, i; + va_list l; + va_start(l, x); + for (i = 0; i < x; i++) + sum += va_arg(l, int); + va_end(l); + return sum; +} + +Testcase given in C because fixing it will likely involve changing the IR +generated for it. The primary issue with the result is that it doesn't do any +of the optimizations which are possible if we know the address of a va_list +in the current function is never taken: +1. We shouldn't spill the XMM registers because we only call va_arg with "int". +2. It would be nice if we could sroa the va_list. +3. Probably overkill, but it'd be cool if we could peel off the first five +iterations of the loop. + +Other optimizations involving functions which use va_arg on floats which don't +have the address of a va_list taken: +1. Conversely to the above, we shouldn't spill general registers if we only + call va_arg on "double". +2. If we know nothing more than 64 bits wide is read from the XMM registers, + we can change the spilling code to reduce the amount of stack used by half. + +//===---------------------------------------------------------------------===// diff --git a/contrib/libs/llvm12/lib/Target/X86/README.txt b/contrib/libs/llvm12/lib/Target/X86/README.txt index 6bc6e74b26..c06a7b1ade 100644 --- a/contrib/libs/llvm12/lib/Target/X86/README.txt +++ b/contrib/libs/llvm12/lib/Target/X86/README.txt @@ -1,1794 +1,1794 @@ -//===---------------------------------------------------------------------===// -// Random ideas for the X86 backend. -//===---------------------------------------------------------------------===// - -Improvements to the multiply -> shift/add algorithm: -http://gcc.gnu.org/ml/gcc-patches/2004-08/msg01590.html - -//===---------------------------------------------------------------------===// - -Improve code like this (occurs fairly frequently, e.g. in LLVM): -long long foo(int x) { return 1LL << x; } - -http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01109.html -http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01128.html -http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01136.html - -Another useful one would be ~0ULL >> X and ~0ULL << X. - -One better solution for 1LL << x is: - xorl %eax, %eax - xorl %edx, %edx - testb $32, %cl - sete %al - setne %dl - sall %cl, %eax - sall %cl, %edx - -But that requires good 8-bit subreg support. - -Also, this might be better. It's an extra shift, but it's one instruction -shorter, and doesn't stress 8-bit subreg support. -(From http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01148.html, -but without the unnecessary and.) - movl %ecx, %eax - shrl $5, %eax - movl %eax, %edx - xorl $1, %edx - sall %cl, %eax - sall %cl. %edx - -64-bit shifts (in general) expand to really bad code. Instead of using -cmovs, we should expand to a conditional branch like GCC produces. - -//===---------------------------------------------------------------------===// - -Some isel ideas: - -1. Dynamic programming based approach when compile time is not an - issue. -2. Code duplication (addressing mode) during isel. -3. Other ideas from "Register-Sensitive Selection, Duplication, and - Sequencing of Instructions". -4. Scheduling for reduced register pressure. E.g. "Minimum Register - Instruction Sequence Problem: Revisiting Optimal Code Generation for DAGs" - and other related papers. - http://citeseer.ist.psu.edu/govindarajan01minimum.html - -//===---------------------------------------------------------------------===// - -Should we promote i16 to i32 to avoid partial register update stalls? - -//===---------------------------------------------------------------------===// - -Leave any_extend as pseudo instruction and hint to register -allocator. Delay codegen until post register allocation. -Note. any_extend is now turned into an INSERT_SUBREG. We still need to teach -the coalescer how to deal with it though. - -//===---------------------------------------------------------------------===// - -It appears icc use push for parameter passing. Need to investigate. - -//===---------------------------------------------------------------------===// - -The instruction selector sometimes misses folding a load into a compare. The -pattern is written as (cmp reg, (load p)). Because the compare isn't -commutative, it is not matched with the load on both sides. The dag combiner -should be made smart enough to canonicalize the load into the RHS of a compare -when it can invert the result of the compare for free. - -//===---------------------------------------------------------------------===// - -In many cases, LLVM generates code like this: - -_test: - movl 8(%esp), %eax - cmpl %eax, 4(%esp) - setl %al - movzbl %al, %eax - ret - -on some processors (which ones?), it is more efficient to do this: - -_test: - movl 8(%esp), %ebx - xor %eax, %eax - cmpl %ebx, 4(%esp) - setl %al - ret - -Doing this correctly is tricky though, as the xor clobbers the flags. - -//===---------------------------------------------------------------------===// - -We should generate bts/btr/etc instructions on targets where they are cheap or -when codesize is important. e.g., for: - -void setbit(int *target, int bit) { - *target |= (1 << bit); -} -void clearbit(int *target, int bit) { - *target &= ~(1 << bit); -} - -//===---------------------------------------------------------------------===// - -Instead of the following for memset char*, 1, 10: - - movl $16843009, 4(%edx) - movl $16843009, (%edx) - movw $257, 8(%edx) - -It might be better to generate - - movl $16843009, %eax - movl %eax, 4(%edx) - movl %eax, (%edx) - movw al, 8(%edx) - -when we can spare a register. It reduces code size. - -//===---------------------------------------------------------------------===// - -Evaluate what the best way to codegen sdiv X, (2^C) is. For X/8, we currently -get this: - -define i32 @test1(i32 %X) { - %Y = sdiv i32 %X, 8 - ret i32 %Y -} - -_test1: - movl 4(%esp), %eax - movl %eax, %ecx - sarl $31, %ecx - shrl $29, %ecx - addl %ecx, %eax - sarl $3, %eax - ret - -GCC knows several different ways to codegen it, one of which is this: - -_test1: - movl 4(%esp), %eax - cmpl $-1, %eax - leal 7(%eax), %ecx - cmovle %ecx, %eax - sarl $3, %eax - ret - -which is probably slower, but it's interesting at least :) - -//===---------------------------------------------------------------------===// - -We are currently lowering large (1MB+) memmove/memcpy to rep/stosl and rep/movsl -We should leave these as libcalls for everything over a much lower threshold, -since libc is hand tuned for medium and large mem ops (avoiding RFO for large -stores, TLB preheating, etc) - -//===---------------------------------------------------------------------===// - -Optimize this into something reasonable: - x * copysign(1.0, y) * copysign(1.0, z) - -//===---------------------------------------------------------------------===// - -Optimize copysign(x, *y) to use an integer load from y. - -//===---------------------------------------------------------------------===// - -The following tests perform worse with LSR: - -lambda, siod, optimizer-eval, ackermann, hash2, nestedloop, strcat, and Treesor. - -//===---------------------------------------------------------------------===// - -Adding to the list of cmp / test poor codegen issues: - -int test(__m128 *A, __m128 *B) { - if (_mm_comige_ss(*A, *B)) - return 3; - else - return 4; -} - -_test: - movl 8(%esp), %eax - movaps (%eax), %xmm0 - movl 4(%esp), %eax - movaps (%eax), %xmm1 - comiss %xmm0, %xmm1 - setae %al - movzbl %al, %ecx - movl $3, %eax - movl $4, %edx - cmpl $0, %ecx - cmove %edx, %eax - ret - -Note the setae, movzbl, cmpl, cmove can be replaced with a single cmovae. There -are a number of issues. 1) We are introducing a setcc between the result of the -intrisic call and select. 2) The intrinsic is expected to produce a i32 value -so a any extend (which becomes a zero extend) is added. - -We probably need some kind of target DAG combine hook to fix this. - -//===---------------------------------------------------------------------===// - -We generate significantly worse code for this than GCC: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21150 -http://gcc.gnu.org/bugzilla/attachment.cgi?id=8701 - -There is also one case we do worse on PPC. - -//===---------------------------------------------------------------------===// - -For this: - -int test(int a) -{ - return a * 3; -} - -We currently emits - imull $3, 4(%esp), %eax - -Perhaps this is what we really should generate is? Is imull three or four -cycles? Note: ICC generates this: - movl 4(%esp), %eax - leal (%eax,%eax,2), %eax - -The current instruction priority is based on pattern complexity. The former is -more "complex" because it folds a load so the latter will not be emitted. - -Perhaps we should use AddedComplexity to give LEA32r a higher priority? We -should always try to match LEA first since the LEA matching code does some -estimate to determine whether the match is profitable. - -However, if we care more about code size, then imull is better. It's two bytes -shorter than movl + leal. - -On a Pentium M, both variants have the same characteristics with regard -to throughput; however, the multiplication has a latency of four cycles, as -opposed to two cycles for the movl+lea variant. - -//===---------------------------------------------------------------------===// - -It appears gcc place string data with linkonce linkage in -.section __TEXT,__const_coal,coalesced instead of -.section __DATA,__const_coal,coalesced. -Take a look at darwin.h, there are other Darwin assembler directives that we -do not make use of. - -//===---------------------------------------------------------------------===// - -define i32 @foo(i32* %a, i32 %t) { -entry: - br label %cond_true - -cond_true: ; preds = %cond_true, %entry - %x.0.0 = phi i32 [ 0, %entry ], [ %tmp9, %cond_true ] ; <i32> [#uses=3] - %t_addr.0.0 = phi i32 [ %t, %entry ], [ %tmp7, %cond_true ] ; <i32> [#uses=1] - %tmp2 = getelementptr i32* %a, i32 %x.0.0 ; <i32*> [#uses=1] - %tmp3 = load i32* %tmp2 ; <i32> [#uses=1] - %tmp5 = add i32 %t_addr.0.0, %x.0.0 ; <i32> [#uses=1] - %tmp7 = add i32 %tmp5, %tmp3 ; <i32> [#uses=2] - %tmp9 = add i32 %x.0.0, 1 ; <i32> [#uses=2] - %tmp = icmp sgt i32 %tmp9, 39 ; <i1> [#uses=1] - br i1 %tmp, label %bb12, label %cond_true - -bb12: ; preds = %cond_true - ret i32 %tmp7 -} -is pessimized by -loop-reduce and -indvars - -//===---------------------------------------------------------------------===// - -u32 to float conversion improvement: - -float uint32_2_float( unsigned u ) { - float fl = (int) (u & 0xffff); - float fh = (int) (u >> 16); - fh *= 0x1.0p16f; - return fh + fl; -} - -00000000 subl $0x04,%esp -00000003 movl 0x08(%esp,1),%eax -00000007 movl %eax,%ecx -00000009 shrl $0x10,%ecx -0000000c cvtsi2ss %ecx,%xmm0 -00000010 andl $0x0000ffff,%eax -00000015 cvtsi2ss %eax,%xmm1 -00000019 mulss 0x00000078,%xmm0 -00000021 addss %xmm1,%xmm0 -00000025 movss %xmm0,(%esp,1) -0000002a flds (%esp,1) -0000002d addl $0x04,%esp -00000030 ret - -//===---------------------------------------------------------------------===// - -When using fastcc abi, align stack slot of argument of type double on 8 byte -boundary to improve performance. - -//===---------------------------------------------------------------------===// - -GCC's ix86_expand_int_movcc function (in i386.c) has a ton of interesting -simplifications for integer "x cmp y ? a : b". - -//===---------------------------------------------------------------------===// - -Consider the expansion of: - -define i32 @test3(i32 %X) { - %tmp1 = urem i32 %X, 255 - ret i32 %tmp1 -} - -Currently it compiles to: - -... - movl $2155905153, %ecx - movl 8(%esp), %esi - movl %esi, %eax - mull %ecx -... - -This could be "reassociated" into: - - movl $2155905153, %eax - movl 8(%esp), %ecx - mull %ecx - -to avoid the copy. In fact, the existing two-address stuff would do this -except that mul isn't a commutative 2-addr instruction. I guess this has -to be done at isel time based on the #uses to mul? - -//===---------------------------------------------------------------------===// - -Make sure the instruction which starts a loop does not cross a cacheline -boundary. This requires knowning the exact length of each machine instruction. -That is somewhat complicated, but doable. Example 256.bzip2: - -In the new trace, the hot loop has an instruction which crosses a cacheline -boundary. In addition to potential cache misses, this can't help decoding as I -imagine there has to be some kind of complicated decoder reset and realignment -to grab the bytes from the next cacheline. - -532 532 0x3cfc movb (1809(%esp, %esi), %bl <<<--- spans 2 64 byte lines -942 942 0x3d03 movl %dh, (1809(%esp, %esi) -937 937 0x3d0a incl %esi -3 3 0x3d0b cmpb %bl, %dl -27 27 0x3d0d jnz 0x000062db <main+11707> - -//===---------------------------------------------------------------------===// - -In c99 mode, the preprocessor doesn't like assembly comments like #TRUNCATE. - -//===---------------------------------------------------------------------===// - -This could be a single 16-bit load. - -int f(char *p) { - if ((p[0] == 1) & (p[1] == 2)) return 1; - return 0; -} - -//===---------------------------------------------------------------------===// - -We should inline lrintf and probably other libc functions. - -//===---------------------------------------------------------------------===// - -This code: - -void test(int X) { - if (X) abort(); -} - -is currently compiled to: - -_test: - subl $12, %esp - cmpl $0, 16(%esp) - jne LBB1_1 - addl $12, %esp - ret -LBB1_1: - call L_abort$stub - -It would be better to produce: - -_test: - subl $12, %esp - cmpl $0, 16(%esp) - jne L_abort$stub - addl $12, %esp - ret - -This can be applied to any no-return function call that takes no arguments etc. -Alternatively, the stack save/restore logic could be shrink-wrapped, producing -something like this: - -_test: - cmpl $0, 4(%esp) - jne LBB1_1 - ret -LBB1_1: - subl $12, %esp - call L_abort$stub - -Both are useful in different situations. Finally, it could be shrink-wrapped -and tail called, like this: - -_test: - cmpl $0, 4(%esp) - jne LBB1_1 - ret -LBB1_1: - pop %eax # realign stack. - call L_abort$stub - -Though this probably isn't worth it. - -//===---------------------------------------------------------------------===// - -Sometimes it is better to codegen subtractions from a constant (e.g. 7-x) with -a neg instead of a sub instruction. Consider: - -int test(char X) { return 7-X; } - -we currently produce: -_test: - movl $7, %eax - movsbl 4(%esp), %ecx - subl %ecx, %eax - ret - -We would use one fewer register if codegen'd as: - - movsbl 4(%esp), %eax - neg %eax - add $7, %eax - ret - -Note that this isn't beneficial if the load can be folded into the sub. In -this case, we want a sub: - -int test(int X) { return 7-X; } -_test: - movl $7, %eax - subl 4(%esp), %eax - ret - -//===---------------------------------------------------------------------===// - -Leaf functions that require one 4-byte spill slot have a prolog like this: - -_foo: - pushl %esi - subl $4, %esp -... -and an epilog like this: - addl $4, %esp - popl %esi - ret - -It would be smaller, and potentially faster, to push eax on entry and to -pop into a dummy register instead of using addl/subl of esp. Just don't pop -into any return registers :) - -//===---------------------------------------------------------------------===// - -The X86 backend should fold (branch (or (setcc, setcc))) into multiple -branches. We generate really poor code for: - -double testf(double a) { - return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0); -} - -For example, the entry BB is: - -_testf: - subl $20, %esp - pxor %xmm0, %xmm0 - movsd 24(%esp), %xmm1 - ucomisd %xmm0, %xmm1 - setnp %al - sete %cl - testb %cl, %al - jne LBB1_5 # UnifiedReturnBlock -LBB1_1: # cond_true - - -it would be better to replace the last four instructions with: - - jp LBB1_1 - je LBB1_5 -LBB1_1: - -We also codegen the inner ?: into a diamond: - - cvtss2sd LCPI1_0(%rip), %xmm2 - cvtss2sd LCPI1_1(%rip), %xmm3 - ucomisd %xmm1, %xmm0 - ja LBB1_3 # cond_true -LBB1_2: # cond_true - movapd %xmm3, %xmm2 -LBB1_3: # cond_true - movapd %xmm2, %xmm0 - ret - -We should sink the load into xmm3 into the LBB1_2 block. This should -be pretty easy, and will nuke all the copies. - -//===---------------------------------------------------------------------===// - -This: - #include <algorithm> - inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b) - { return std::make_pair(a + b, a + b < a); } - bool no_overflow(unsigned a, unsigned b) - { return !full_add(a, b).second; } - -Should compile to: - addl %esi, %edi - setae %al - movzbl %al, %eax - ret - -on x86-64, instead of the rather stupid-looking: - addl %esi, %edi - setb %al - xorb $1, %al - movzbl %al, %eax - ret - - -//===---------------------------------------------------------------------===// - -The following code: - -bb114.preheader: ; preds = %cond_next94 - %tmp231232 = sext i16 %tmp62 to i32 ; <i32> [#uses=1] - %tmp233 = sub i32 32, %tmp231232 ; <i32> [#uses=1] - %tmp245246 = sext i16 %tmp65 to i32 ; <i32> [#uses=1] - %tmp252253 = sext i16 %tmp68 to i32 ; <i32> [#uses=1] - %tmp254 = sub i32 32, %tmp252253 ; <i32> [#uses=1] - %tmp553554 = bitcast i16* %tmp37 to i8* ; <i8*> [#uses=2] - %tmp583584 = sext i16 %tmp98 to i32 ; <i32> [#uses=1] - %tmp585 = sub i32 32, %tmp583584 ; <i32> [#uses=1] - %tmp614615 = sext i16 %tmp101 to i32 ; <i32> [#uses=1] - %tmp621622 = sext i16 %tmp104 to i32 ; <i32> [#uses=1] - %tmp623 = sub i32 32, %tmp621622 ; <i32> [#uses=1] - br label %bb114 - -produces: - -LBB3_5: # bb114.preheader - movswl -68(%ebp), %eax - movl $32, %ecx - movl %ecx, -80(%ebp) - subl %eax, -80(%ebp) - movswl -52(%ebp), %eax - movl %ecx, -84(%ebp) - subl %eax, -84(%ebp) - movswl -70(%ebp), %eax - movl %ecx, -88(%ebp) - subl %eax, -88(%ebp) - movswl -50(%ebp), %eax - subl %eax, %ecx - movl %ecx, -76(%ebp) - movswl -42(%ebp), %eax - movl %eax, -92(%ebp) - movswl -66(%ebp), %eax - movl %eax, -96(%ebp) - movw $0, -98(%ebp) - -This appears to be bad because the RA is not folding the store to the stack -slot into the movl. The above instructions could be: - movl $32, -80(%ebp) -... - movl $32, -84(%ebp) -... -This seems like a cross between remat and spill folding. - -This has redundant subtractions of %eax from a stack slot. However, %ecx doesn't -change, so we could simply subtract %eax from %ecx first and then use %ecx (or -vice-versa). - -//===---------------------------------------------------------------------===// - -This code: - - %tmp659 = icmp slt i16 %tmp654, 0 ; <i1> [#uses=1] - br i1 %tmp659, label %cond_true662, label %cond_next715 - -produces this: - - testw %cx, %cx - movswl %cx, %esi - jns LBB4_109 # cond_next715 - -Shark tells us that using %cx in the testw instruction is sub-optimal. It -suggests using the 32-bit register (which is what ICC uses). - -//===---------------------------------------------------------------------===// - -We compile this: - -void compare (long long foo) { - if (foo < 4294967297LL) - abort(); -} - -to: - -compare: - subl $4, %esp - cmpl $0, 8(%esp) - setne %al - movzbw %al, %ax - cmpl $1, 12(%esp) - setg %cl - movzbw %cl, %cx - cmove %ax, %cx - testb $1, %cl - jne .LBB1_2 # UnifiedReturnBlock -.LBB1_1: # ifthen - call abort -.LBB1_2: # UnifiedReturnBlock - addl $4, %esp - ret - -(also really horrible code on ppc). This is due to the expand code for 64-bit -compares. GCC produces multiple branches, which is much nicer: - -compare: - subl $12, %esp - movl 20(%esp), %edx - movl 16(%esp), %eax - decl %edx - jle .L7 -.L5: - addl $12, %esp - ret - .p2align 4,,7 -.L7: - jl .L4 - cmpl $0, %eax - .p2align 4,,8 - ja .L5 -.L4: - .p2align 4,,9 - call abort - -//===---------------------------------------------------------------------===// - -Tail call optimization improvements: Tail call optimization currently -pushes all arguments on the top of the stack (their normal place for -non-tail call optimized calls) that source from the callers arguments -or that source from a virtual register (also possibly sourcing from -callers arguments). -This is done to prevent overwriting of parameters (see example -below) that might be used later. - -example: - -int callee(int32, int64); -int caller(int32 arg1, int32 arg2) { - int64 local = arg2 * 2; - return callee(arg2, (int64)local); -} - -[arg1] [!arg2 no longer valid since we moved local onto it] -[arg2] -> [(int64) -[RETADDR] local ] - -Moving arg1 onto the stack slot of callee function would overwrite -arg2 of the caller. - -Possible optimizations: - - - - Analyse the actual parameters of the callee to see which would - overwrite a caller parameter which is used by the callee and only - push them onto the top of the stack. - - int callee (int32 arg1, int32 arg2); - int caller (int32 arg1, int32 arg2) { - return callee(arg1,arg2); - } - - Here we don't need to write any variables to the top of the stack - since they don't overwrite each other. - - int callee (int32 arg1, int32 arg2); - int caller (int32 arg1, int32 arg2) { - return callee(arg2,arg1); - } - - Here we need to push the arguments because they overwrite each - other. - -//===---------------------------------------------------------------------===// - -main () -{ - int i = 0; - unsigned long int z = 0; - - do { - z -= 0x00004000; - i++; - if (i > 0x00040000) - abort (); - } while (z > 0); - exit (0); -} - -gcc compiles this to: - -_main: - subl $28, %esp - xorl %eax, %eax - jmp L2 -L3: - cmpl $262144, %eax - je L10 -L2: - addl $1, %eax - cmpl $262145, %eax - jne L3 - call L_abort$stub -L10: - movl $0, (%esp) - call L_exit$stub - -llvm: - -_main: - subl $12, %esp - movl $1, %eax - movl $16384, %ecx -LBB1_1: # bb - cmpl $262145, %eax - jge LBB1_4 # cond_true -LBB1_2: # cond_next - incl %eax - addl $4294950912, %ecx - cmpl $16384, %ecx - jne LBB1_1 # bb -LBB1_3: # bb11 - xorl %eax, %eax - addl $12, %esp - ret -LBB1_4: # cond_true - call L_abort$stub - -1. LSR should rewrite the first cmp with induction variable %ecx. -2. DAG combiner should fold - leal 1(%eax), %edx - cmpl $262145, %edx - => - cmpl $262144, %eax - -//===---------------------------------------------------------------------===// - -define i64 @test(double %X) { - %Y = fptosi double %X to i64 - ret i64 %Y -} - -compiles to: - -_test: - subl $20, %esp - movsd 24(%esp), %xmm0 - movsd %xmm0, 8(%esp) - fldl 8(%esp) - fisttpll (%esp) - movl 4(%esp), %edx - movl (%esp), %eax - addl $20, %esp - #FP_REG_KILL - ret - -This should just fldl directly from the input stack slot. - -//===---------------------------------------------------------------------===// - -This code: -int foo (int x) { return (x & 65535) | 255; } - -Should compile into: - -_foo: - movzwl 4(%esp), %eax - orl $255, %eax - ret - -instead of: -_foo: - movl $65280, %eax - andl 4(%esp), %eax - orl $255, %eax - ret - -//===---------------------------------------------------------------------===// - -We're codegen'ing multiply of long longs inefficiently: - -unsigned long long LLM(unsigned long long arg1, unsigned long long arg2) { - return arg1 * arg2; -} - -We compile to (fomit-frame-pointer): - -_LLM: - pushl %esi - movl 8(%esp), %ecx - movl 16(%esp), %esi - movl %esi, %eax - mull %ecx - imull 12(%esp), %esi - addl %edx, %esi - imull 20(%esp), %ecx - movl %esi, %edx - addl %ecx, %edx - popl %esi - ret - -This looks like a scheduling deficiency and lack of remat of the load from -the argument area. ICC apparently produces: - - movl 8(%esp), %ecx - imull 12(%esp), %ecx - movl 16(%esp), %eax - imull 4(%esp), %eax - addl %eax, %ecx - movl 4(%esp), %eax - mull 12(%esp) - addl %ecx, %edx - ret - -Note that it remat'd loads from 4(esp) and 12(esp). See this GCC PR: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17236 - -//===---------------------------------------------------------------------===// - -We can fold a store into "zeroing a reg". Instead of: - -xorl %eax, %eax -movl %eax, 124(%esp) - -we should get: - -movl $0, 124(%esp) - -if the flags of the xor are dead. - -Likewise, we isel "x<<1" into "add reg,reg". If reg is spilled, this should -be folded into: shl [mem], 1 - -//===---------------------------------------------------------------------===// - -In SSE mode, we turn abs and neg into a load from the constant pool plus a xor -or and instruction, for example: - - xorpd LCPI1_0, %xmm2 - -However, if xmm2 gets spilled, we end up with really ugly code like this: - - movsd (%esp), %xmm0 - xorpd LCPI1_0, %xmm0 - movsd %xmm0, (%esp) - -Since we 'know' that this is a 'neg', we can actually "fold" the spill into -the neg/abs instruction, turning it into an *integer* operation, like this: - - xorl 2147483648, [mem+4] ## 2147483648 = (1 << 31) - -you could also use xorb, but xorl is less likely to lead to a partial register -stall. Here is a contrived testcase: - -double a, b, c; -void test(double *P) { - double X = *P; - a = X; - bar(); - X = -X; - b = X; - bar(); - c = X; -} - -//===---------------------------------------------------------------------===// - -The generated code on x86 for checking for signed overflow on a multiply the -obvious way is much longer than it needs to be. - -int x(int a, int b) { - long long prod = (long long)a*b; - return prod > 0x7FFFFFFF || prod < (-0x7FFFFFFF-1); -} - -See PR2053 for more details. - -//===---------------------------------------------------------------------===// - -We should investigate using cdq/ctld (effect: edx = sar eax, 31) -more aggressively; it should cost the same as a move+shift on any modern -processor, but it's a lot shorter. Downside is that it puts more -pressure on register allocation because it has fixed operands. - -Example: -int abs(int x) {return x < 0 ? -x : x;} - -gcc compiles this to the following when using march/mtune=pentium2/3/4/m/etc.: -abs: - movl 4(%esp), %eax - cltd - xorl %edx, %eax - subl %edx, %eax - ret - -//===---------------------------------------------------------------------===// - -Take the following code (from -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16541): - -extern unsigned char first_one[65536]; -int FirstOnet(unsigned long long arg1) -{ - if (arg1 >> 48) - return (first_one[arg1 >> 48]); - return 0; -} - - -The following code is currently generated: -FirstOnet: - movl 8(%esp), %eax - cmpl $65536, %eax - movl 4(%esp), %ecx - jb .LBB1_2 # UnifiedReturnBlock -.LBB1_1: # ifthen - shrl $16, %eax - movzbl first_one(%eax), %eax - ret -.LBB1_2: # UnifiedReturnBlock - xorl %eax, %eax - ret - -We could change the "movl 8(%esp), %eax" into "movzwl 10(%esp), %eax"; this -lets us change the cmpl into a testl, which is shorter, and eliminate the shift. - -//===---------------------------------------------------------------------===// - -We compile this function: - -define i32 @foo(i32 %a, i32 %b, i32 %c, i8 zeroext %d) nounwind { -entry: - %tmp2 = icmp eq i8 %d, 0 ; <i1> [#uses=1] - br i1 %tmp2, label %bb7, label %bb - -bb: ; preds = %entry - %tmp6 = add i32 %b, %a ; <i32> [#uses=1] - ret i32 %tmp6 - -bb7: ; preds = %entry - %tmp10 = sub i32 %a, %c ; <i32> [#uses=1] - ret i32 %tmp10 -} - -to: - -foo: # @foo -# %bb.0: # %entry - movl 4(%esp), %ecx - cmpb $0, 16(%esp) - je .LBB0_2 -# %bb.1: # %bb - movl 8(%esp), %eax - addl %ecx, %eax - ret -.LBB0_2: # %bb7 - movl 12(%esp), %edx - movl %ecx, %eax - subl %edx, %eax - ret - -There's an obviously unnecessary movl in .LBB0_2, and we could eliminate a -couple more movls by putting 4(%esp) into %eax instead of %ecx. - -//===---------------------------------------------------------------------===// - -See rdar://4653682. - -From flops: - -LBB1_15: # bb310 - cvtss2sd LCPI1_0, %xmm1 - addsd %xmm1, %xmm0 - movsd 176(%esp), %xmm2 - mulsd %xmm0, %xmm2 - movapd %xmm2, %xmm3 - mulsd %xmm3, %xmm3 - movapd %xmm3, %xmm4 - mulsd LCPI1_23, %xmm4 - addsd LCPI1_24, %xmm4 - mulsd %xmm3, %xmm4 - addsd LCPI1_25, %xmm4 - mulsd %xmm3, %xmm4 - addsd LCPI1_26, %xmm4 - mulsd %xmm3, %xmm4 - addsd LCPI1_27, %xmm4 - mulsd %xmm3, %xmm4 - addsd LCPI1_28, %xmm4 - mulsd %xmm3, %xmm4 - addsd %xmm1, %xmm4 - mulsd %xmm2, %xmm4 - movsd 152(%esp), %xmm1 - addsd %xmm4, %xmm1 - movsd %xmm1, 152(%esp) - incl %eax - cmpl %eax, %esi - jge LBB1_15 # bb310 -LBB1_16: # bb358.loopexit - movsd 152(%esp), %xmm0 - addsd %xmm0, %xmm0 - addsd LCPI1_22, %xmm0 - movsd %xmm0, 152(%esp) - -Rather than spilling the result of the last addsd in the loop, we should have -insert a copy to split the interval (one for the duration of the loop, one -extending to the fall through). The register pressure in the loop isn't high -enough to warrant the spill. - -Also check why xmm7 is not used at all in the function. - -//===---------------------------------------------------------------------===// - -Take the following: - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-S128" -target triple = "i386-apple-darwin8" -@in_exit.4870.b = internal global i1 false ; <i1*> [#uses=2] -define fastcc void @abort_gzip() noreturn nounwind { -entry: - %tmp.b.i = load i1* @in_exit.4870.b ; <i1> [#uses=1] - br i1 %tmp.b.i, label %bb.i, label %bb4.i -bb.i: ; preds = %entry - tail call void @exit( i32 1 ) noreturn nounwind - unreachable -bb4.i: ; preds = %entry - store i1 true, i1* @in_exit.4870.b - tail call void @exit( i32 1 ) noreturn nounwind - unreachable -} -declare void @exit(i32) noreturn nounwind - -This compiles into: -_abort_gzip: ## @abort_gzip -## %bb.0: ## %entry - subl $12, %esp - movb _in_exit.4870.b, %al - cmpb $1, %al - jne LBB0_2 - -We somehow miss folding the movb into the cmpb. - -//===---------------------------------------------------------------------===// - -We compile: - -int test(int x, int y) { - return x-y-1; -} - -into (-m64): - -_test: - decl %edi - movl %edi, %eax - subl %esi, %eax - ret - -it would be better to codegen as: x+~y (notl+addl) - -//===---------------------------------------------------------------------===// - -This code: - -int foo(const char *str,...) -{ - __builtin_va_list a; int x; - __builtin_va_start(a,str); x = __builtin_va_arg(a,int); __builtin_va_end(a); - return x; -} - -gets compiled into this on x86-64: - subq $200, %rsp - movaps %xmm7, 160(%rsp) - movaps %xmm6, 144(%rsp) - movaps %xmm5, 128(%rsp) - movaps %xmm4, 112(%rsp) - movaps %xmm3, 96(%rsp) - movaps %xmm2, 80(%rsp) - movaps %xmm1, 64(%rsp) - movaps %xmm0, 48(%rsp) - movq %r9, 40(%rsp) - movq %r8, 32(%rsp) - movq %rcx, 24(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 8(%rsp) - leaq (%rsp), %rax - movq %rax, 192(%rsp) - leaq 208(%rsp), %rax - movq %rax, 184(%rsp) - movl $48, 180(%rsp) - movl $8, 176(%rsp) - movl 176(%rsp), %eax - cmpl $47, %eax - jbe .LBB1_3 # bb -.LBB1_1: # bb3 - movq 184(%rsp), %rcx - leaq 8(%rcx), %rax - movq %rax, 184(%rsp) -.LBB1_2: # bb4 - movl (%rcx), %eax - addq $200, %rsp - ret -.LBB1_3: # bb - movl %eax, %ecx - addl $8, %eax - addq 192(%rsp), %rcx - movl %eax, 176(%rsp) - jmp .LBB1_2 # bb4 - -gcc 4.3 generates: - subq $96, %rsp -.LCFI0: - leaq 104(%rsp), %rax - movq %rsi, -80(%rsp) - movl $8, -120(%rsp) - movq %rax, -112(%rsp) - leaq -88(%rsp), %rax - movq %rax, -104(%rsp) - movl $8, %eax - cmpl $48, %eax - jb .L6 - movq -112(%rsp), %rdx - movl (%rdx), %eax - addq $96, %rsp - ret - .p2align 4,,10 - .p2align 3 -.L6: - mov %eax, %edx - addq -104(%rsp), %rdx - addl $8, %eax - movl %eax, -120(%rsp) - movl (%rdx), %eax - addq $96, %rsp - ret - -and it gets compiled into this on x86: - pushl %ebp - movl %esp, %ebp - subl $4, %esp - leal 12(%ebp), %eax - movl %eax, -4(%ebp) - leal 16(%ebp), %eax - movl %eax, -4(%ebp) - movl 12(%ebp), %eax - addl $4, %esp - popl %ebp - ret - -gcc 4.3 generates: - pushl %ebp - movl %esp, %ebp - movl 12(%ebp), %eax - popl %ebp - ret - -//===---------------------------------------------------------------------===// - -Teach tblgen not to check bitconvert source type in some cases. This allows us -to consolidate the following patterns in X86InstrMMX.td: - -def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))), - (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>; -def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))), - (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>; -def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))), - (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>; - -There are other cases in various td files. - -//===---------------------------------------------------------------------===// - -Take something like the following on x86-32: -unsigned a(unsigned long long x, unsigned y) {return x % y;} - -We currently generate a libcall, but we really shouldn't: the expansion is -shorter and likely faster than the libcall. The expected code is something -like the following: - - movl 12(%ebp), %eax - movl 16(%ebp), %ecx - xorl %edx, %edx - divl %ecx - movl 8(%ebp), %eax - divl %ecx - movl %edx, %eax - ret - -A similar code sequence works for division. - -//===---------------------------------------------------------------------===// - -We currently compile this: - -define i32 @func1(i32 %v1, i32 %v2) nounwind { -entry: - %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) - %sum = extractvalue {i32, i1} %t, 0 - %obit = extractvalue {i32, i1} %t, 1 - br i1 %obit, label %overflow, label %normal -normal: - ret i32 %sum -overflow: - call void @llvm.trap() - unreachable -} -declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) -declare void @llvm.trap() - -to: - -_func1: - movl 4(%esp), %eax - addl 8(%esp), %eax - jo LBB1_2 ## overflow -LBB1_1: ## normal - ret -LBB1_2: ## overflow - ud2 - -it would be nice to produce "into" someday. - -//===---------------------------------------------------------------------===// - -Test instructions can be eliminated by using EFLAGS values from arithmetic -instructions. This is currently not done for mul, and, or, xor, neg, shl, -sra, srl, shld, shrd, atomic ops, and others. It is also currently not done -for read-modify-write instructions. It is also current not done if the -OF or CF flags are needed. - -The shift operators have the complication that when the shift count is -zero, EFLAGS is not set, so they can only subsume a test instruction if -the shift count is known to be non-zero. Also, using the EFLAGS value -from a shift is apparently very slow on some x86 implementations. - -In read-modify-write instructions, the root node in the isel match is -the store, and isel has no way for the use of the EFLAGS result of the -arithmetic to be remapped to the new node. - -Add and subtract instructions set OF on signed overflow and CF on unsiged -overflow, while test instructions always clear OF and CF. In order to -replace a test with an add or subtract in a situation where OF or CF is -needed, codegen must be able to prove that the operation cannot see -signed or unsigned overflow, respectively. - -//===---------------------------------------------------------------------===// - -memcpy/memmove do not lower to SSE copies when possible. A silly example is: -define <16 x float> @foo(<16 x float> %A) nounwind { - %tmp = alloca <16 x float>, align 16 - %tmp2 = alloca <16 x float>, align 16 - store <16 x float> %A, <16 x float>* %tmp - %s = bitcast <16 x float>* %tmp to i8* - %s2 = bitcast <16 x float>* %tmp2 to i8* - call void @llvm.memcpy.i64(i8* %s, i8* %s2, i64 64, i32 16) - %R = load <16 x float>* %tmp2 - ret <16 x float> %R -} - -declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind - -which compiles to: - -_foo: - subl $140, %esp - movaps %xmm3, 112(%esp) - movaps %xmm2, 96(%esp) - movaps %xmm1, 80(%esp) - movaps %xmm0, 64(%esp) - movl 60(%esp), %eax - movl %eax, 124(%esp) - movl 56(%esp), %eax - movl %eax, 120(%esp) - movl 52(%esp), %eax - <many many more 32-bit copies> - movaps (%esp), %xmm0 - movaps 16(%esp), %xmm1 - movaps 32(%esp), %xmm2 - movaps 48(%esp), %xmm3 - addl $140, %esp - ret - -On Nehalem, it may even be cheaper to just use movups when unaligned than to -fall back to lower-granularity chunks. - -//===---------------------------------------------------------------------===// - -Implement processor-specific optimizations for parity with GCC on these -processors. GCC does two optimizations: - -1. ix86_pad_returns inserts a noop before ret instructions if immediately - preceded by a conditional branch or is the target of a jump. -2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of - code contains more than 3 branches. - -The first one is done for all AMDs, Core2, and "Generic" -The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, - Core 2, and "Generic" - -//===---------------------------------------------------------------------===// -Testcase: -int x(int a) { return (a&0xf0)>>4; } - -Current output: - movl 4(%esp), %eax - shrl $4, %eax - andl $15, %eax - ret - -Ideal output: - movzbl 4(%esp), %eax - shrl $4, %eax - ret - -//===---------------------------------------------------------------------===// - -Re-implement atomic builtins __sync_add_and_fetch() and __sync_sub_and_fetch -properly. - -When the return value is not used (i.e. only care about the value in the -memory), x86 does not have to use add to implement these. Instead, it can use -add, sub, inc, dec instructions with the "lock" prefix. - -This is currently implemented using a bit of instruction selection trick. The -issue is the target independent pattern produces one output and a chain and we -want to map it into one that just output a chain. The current trick is to select -it into a MERGE_VALUES with the first definition being an implicit_def. The -proper solution is to add new ISD opcodes for the no-output variant. DAG -combiner can then transform the node before it gets to target node selection. - -Problem #2 is we are adding a whole bunch of x86 atomic instructions when in -fact these instructions are identical to the non-lock versions. We need a way to -add target specific information to target nodes and have this information -carried over to machine instructions. Asm printer (or JIT) can use this -information to add the "lock" prefix. - -//===---------------------------------------------------------------------===// - -struct B { - unsigned char y0 : 1; -}; - -int bar(struct B* a) { return a->y0; } - -define i32 @bar(%struct.B* nocapture %a) nounwind readonly optsize { - %1 = getelementptr inbounds %struct.B* %a, i64 0, i32 0 - %2 = load i8* %1, align 1 - %3 = and i8 %2, 1 - %4 = zext i8 %3 to i32 - ret i32 %4 -} - -bar: # @bar -# %bb.0: - movb (%rdi), %al - andb $1, %al - movzbl %al, %eax - ret - -Missed optimization: should be movl+andl. - -//===---------------------------------------------------------------------===// - -The x86_64 abi says: - -Booleans, when stored in a memory object, are stored as single byte objects the -value of which is always 0 (false) or 1 (true). - -We are not using this fact: - -int bar(_Bool *a) { return *a; } - -define i32 @bar(i8* nocapture %a) nounwind readonly optsize { - %1 = load i8* %a, align 1, !tbaa !0 - %tmp = and i8 %1, 1 - %2 = zext i8 %tmp to i32 - ret i32 %2 -} - -bar: - movb (%rdi), %al - andb $1, %al - movzbl %al, %eax - ret - -GCC produces - -bar: - movzbl (%rdi), %eax - ret - -//===---------------------------------------------------------------------===// - -Take the following C code: -int f(int a, int b) { return (unsigned char)a == (unsigned char)b; } - -We generate the following IR with clang: -define i32 @f(i32 %a, i32 %b) nounwind readnone { -entry: - %tmp = xor i32 %b, %a ; <i32> [#uses=1] - %tmp6 = and i32 %tmp, 255 ; <i32> [#uses=1] - %cmp = icmp eq i32 %tmp6, 0 ; <i1> [#uses=1] - %conv5 = zext i1 %cmp to i32 ; <i32> [#uses=1] - ret i32 %conv5 -} - -And the following x86 code: - xorl %esi, %edi - testb $-1, %dil - sete %al - movzbl %al, %eax - ret - -A cmpb instead of the xorl+testb would be one instruction shorter. - -//===---------------------------------------------------------------------===// - -Given the following C code: -int f(int a, int b) { return (signed char)a == (signed char)b; } - -We generate the following IR with clang: -define i32 @f(i32 %a, i32 %b) nounwind readnone { -entry: - %sext = shl i32 %a, 24 ; <i32> [#uses=1] - %conv1 = ashr i32 %sext, 24 ; <i32> [#uses=1] - %sext6 = shl i32 %b, 24 ; <i32> [#uses=1] - %conv4 = ashr i32 %sext6, 24 ; <i32> [#uses=1] - %cmp = icmp eq i32 %conv1, %conv4 ; <i1> [#uses=1] - %conv5 = zext i1 %cmp to i32 ; <i32> [#uses=1] - ret i32 %conv5 -} - -And the following x86 code: - movsbl %sil, %eax - movsbl %dil, %ecx - cmpl %eax, %ecx - sete %al - movzbl %al, %eax - ret - - -It should be possible to eliminate the sign extensions. - -//===---------------------------------------------------------------------===// - -LLVM misses a load+store narrowing opportunity in this code: - -%struct.bf = type { i64, i16, i16, i32 } - -@bfi = external global %struct.bf* ; <%struct.bf**> [#uses=2] - -define void @t1() nounwind ssp { -entry: - %0 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1] - %1 = getelementptr %struct.bf* %0, i64 0, i32 1 ; <i16*> [#uses=1] - %2 = bitcast i16* %1 to i32* ; <i32*> [#uses=2] - %3 = load i32* %2, align 1 ; <i32> [#uses=1] - %4 = and i32 %3, -65537 ; <i32> [#uses=1] - store i32 %4, i32* %2, align 1 - %5 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1] - %6 = getelementptr %struct.bf* %5, i64 0, i32 1 ; <i16*> [#uses=1] - %7 = bitcast i16* %6 to i32* ; <i32*> [#uses=2] - %8 = load i32* %7, align 1 ; <i32> [#uses=1] - %9 = and i32 %8, -131073 ; <i32> [#uses=1] - store i32 %9, i32* %7, align 1 - ret void -} - -LLVM currently emits this: - - movq bfi(%rip), %rax - andl $-65537, 8(%rax) - movq bfi(%rip), %rax - andl $-131073, 8(%rax) - ret - -It could narrow the loads and stores to emit this: - - movq bfi(%rip), %rax - andb $-2, 10(%rax) - movq bfi(%rip), %rax - andb $-3, 10(%rax) - ret - -The trouble is that there is a TokenFactor between the store and the -load, making it non-trivial to determine if there's anything between -the load and the store which would prohibit narrowing. - -//===---------------------------------------------------------------------===// - -This code: -void foo(unsigned x) { - if (x == 0) bar(); - else if (x == 1) qux(); -} - -currently compiles into: -_foo: - movl 4(%esp), %eax - cmpl $1, %eax - je LBB0_3 - testl %eax, %eax - jne LBB0_4 - -the testl could be removed: -_foo: - movl 4(%esp), %eax - cmpl $1, %eax - je LBB0_3 - jb LBB0_4 - -0 is the only unsigned number < 1. - -//===---------------------------------------------------------------------===// - -This code: - -%0 = type { i32, i1 } - -define i32 @add32carry(i32 %sum, i32 %x) nounwind readnone ssp { -entry: - %uadd = tail call %0 @llvm.uadd.with.overflow.i32(i32 %sum, i32 %x) - %cmp = extractvalue %0 %uadd, 1 - %inc = zext i1 %cmp to i32 - %add = add i32 %x, %sum - %z.0 = add i32 %add, %inc - ret i32 %z.0 -} - -declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone - -compiles to: - -_add32carry: ## @add32carry - addl %esi, %edi - sbbl %ecx, %ecx - movl %edi, %eax - subl %ecx, %eax - ret - -But it could be: - -_add32carry: - leal (%rsi,%rdi), %eax - cmpl %esi, %eax - adcl $0, %eax - ret - -//===---------------------------------------------------------------------===// - -The hot loop of 256.bzip2 contains code that looks a bit like this: - -int foo(char *P, char *Q, int x, int y) { - if (P[0] != Q[0]) - return P[0] < Q[0]; - if (P[1] != Q[1]) - return P[1] < Q[1]; - if (P[2] != Q[2]) - return P[2] < Q[2]; - return P[3] < Q[3]; -} - -In the real code, we get a lot more wrong than this. However, even in this -code we generate: - -_foo: ## @foo -## %bb.0: ## %entry - movb (%rsi), %al - movb (%rdi), %cl - cmpb %al, %cl - je LBB0_2 -LBB0_1: ## %if.then - cmpb %al, %cl - jmp LBB0_5 -LBB0_2: ## %if.end - movb 1(%rsi), %al - movb 1(%rdi), %cl - cmpb %al, %cl - jne LBB0_1 -## %bb.3: ## %if.end38 - movb 2(%rsi), %al - movb 2(%rdi), %cl - cmpb %al, %cl - jne LBB0_1 -## %bb.4: ## %if.end60 - movb 3(%rdi), %al - cmpb 3(%rsi), %al -LBB0_5: ## %if.end60 - setl %al - movzbl %al, %eax - ret - -Note that we generate jumps to LBB0_1 which does a redundant compare. The -redundant compare also forces the register values to be live, which prevents -folding one of the loads into the compare. In contrast, GCC 4.2 produces: - -_foo: - movzbl (%rsi), %eax - cmpb %al, (%rdi) - jne L10 -L12: - movzbl 1(%rsi), %eax - cmpb %al, 1(%rdi) - jne L10 - movzbl 2(%rsi), %eax - cmpb %al, 2(%rdi) - jne L10 - movzbl 3(%rdi), %eax - cmpb 3(%rsi), %al -L10: - setl %al - movzbl %al, %eax - ret - -which is "perfect". - -//===---------------------------------------------------------------------===// - -For the branch in the following code: -int a(); -int b(int x, int y) { - if (x & (1<<(y&7))) - return a(); - return y; -} - -We currently generate: - movb %sil, %al - andb $7, %al - movzbl %al, %eax - btl %eax, %edi - jae .LBB0_2 - -movl+andl would be shorter than the movb+andb+movzbl sequence. - -//===---------------------------------------------------------------------===// - -For the following: -struct u1 { - float x, y; -}; -float foo(struct u1 u) { - return u.x + u.y; -} - -We currently generate: - movdqa %xmm0, %xmm1 - pshufd $1, %xmm0, %xmm0 # xmm0 = xmm0[1,0,0,0] - addss %xmm1, %xmm0 - ret - -We could save an instruction here by commuting the addss. - -//===---------------------------------------------------------------------===// - -This (from PR9661): - -float clamp_float(float a) { - if (a > 1.0f) - return 1.0f; - else if (a < 0.0f) - return 0.0f; - else - return a; -} - -Could compile to: - -clamp_float: # @clamp_float - movss .LCPI0_0(%rip), %xmm1 - minss %xmm1, %xmm0 - pxor %xmm1, %xmm1 - maxss %xmm1, %xmm0 - ret - -with -ffast-math. - -//===---------------------------------------------------------------------===// - -This function (from PR9803): - -int clamp2(int a) { - if (a > 5) - a = 5; - if (a < 0) - return 0; - return a; -} - -Compiles to: - -_clamp2: ## @clamp2 - pushq %rbp - movq %rsp, %rbp - cmpl $5, %edi - movl $5, %ecx - cmovlel %edi, %ecx - testl %ecx, %ecx - movl $0, %eax - cmovnsl %ecx, %eax - popq %rbp - ret - -The move of 0 could be scheduled above the test to make it is xor reg,reg. - -//===---------------------------------------------------------------------===// - -GCC PR48986. We currently compile this: - -void bar(void); -void yyy(int* p) { - if (__sync_fetch_and_add(p, -1) == 1) - bar(); -} - -into: - movl $-1, %eax - lock - xaddl %eax, (%rdi) - cmpl $1, %eax - je LBB0_2 - -Instead we could generate: - - lock - dec %rdi - je LBB0_2 - -The trick is to match "fetch_and_add(X, -C) == C". - -//===---------------------------------------------------------------------===// - -unsigned t(unsigned a, unsigned b) { - return a <= b ? 5 : -5; -} - -We generate: - movl $5, %ecx - cmpl %esi, %edi - movl $-5, %eax - cmovbel %ecx, %eax - -GCC: - cmpl %edi, %esi - sbbl %eax, %eax - andl $-10, %eax - addl $5, %eax - -//===---------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// +// Random ideas for the X86 backend. +//===---------------------------------------------------------------------===// + +Improvements to the multiply -> shift/add algorithm: +http://gcc.gnu.org/ml/gcc-patches/2004-08/msg01590.html + +//===---------------------------------------------------------------------===// + +Improve code like this (occurs fairly frequently, e.g. in LLVM): +long long foo(int x) { return 1LL << x; } + +http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01109.html +http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01128.html +http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01136.html + +Another useful one would be ~0ULL >> X and ~0ULL << X. + +One better solution for 1LL << x is: + xorl %eax, %eax + xorl %edx, %edx + testb $32, %cl + sete %al + setne %dl + sall %cl, %eax + sall %cl, %edx + +But that requires good 8-bit subreg support. + +Also, this might be better. It's an extra shift, but it's one instruction +shorter, and doesn't stress 8-bit subreg support. +(From http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01148.html, +but without the unnecessary and.) + movl %ecx, %eax + shrl $5, %eax + movl %eax, %edx + xorl $1, %edx + sall %cl, %eax + sall %cl. %edx + +64-bit shifts (in general) expand to really bad code. Instead of using +cmovs, we should expand to a conditional branch like GCC produces. + +//===---------------------------------------------------------------------===// + +Some isel ideas: + +1. Dynamic programming based approach when compile time is not an + issue. +2. Code duplication (addressing mode) during isel. +3. Other ideas from "Register-Sensitive Selection, Duplication, and + Sequencing of Instructions". +4. Scheduling for reduced register pressure. E.g. "Minimum Register + Instruction Sequence Problem: Revisiting Optimal Code Generation for DAGs" + and other related papers. + http://citeseer.ist.psu.edu/govindarajan01minimum.html + +//===---------------------------------------------------------------------===// + +Should we promote i16 to i32 to avoid partial register update stalls? + +//===---------------------------------------------------------------------===// + +Leave any_extend as pseudo instruction and hint to register +allocator. Delay codegen until post register allocation. +Note. any_extend is now turned into an INSERT_SUBREG. We still need to teach +the coalescer how to deal with it though. + +//===---------------------------------------------------------------------===// + +It appears icc use push for parameter passing. Need to investigate. + +//===---------------------------------------------------------------------===// + +The instruction selector sometimes misses folding a load into a compare. The +pattern is written as (cmp reg, (load p)). Because the compare isn't +commutative, it is not matched with the load on both sides. The dag combiner +should be made smart enough to canonicalize the load into the RHS of a compare +when it can invert the result of the compare for free. + +//===---------------------------------------------------------------------===// + +In many cases, LLVM generates code like this: + +_test: + movl 8(%esp), %eax + cmpl %eax, 4(%esp) + setl %al + movzbl %al, %eax + ret + +on some processors (which ones?), it is more efficient to do this: + +_test: + movl 8(%esp), %ebx + xor %eax, %eax + cmpl %ebx, 4(%esp) + setl %al + ret + +Doing this correctly is tricky though, as the xor clobbers the flags. + +//===---------------------------------------------------------------------===// + +We should generate bts/btr/etc instructions on targets where they are cheap or +when codesize is important. e.g., for: + +void setbit(int *target, int bit) { + *target |= (1 << bit); +} +void clearbit(int *target, int bit) { + *target &= ~(1 << bit); +} + +//===---------------------------------------------------------------------===// + +Instead of the following for memset char*, 1, 10: + + movl $16843009, 4(%edx) + movl $16843009, (%edx) + movw $257, 8(%edx) + +It might be better to generate + + movl $16843009, %eax + movl %eax, 4(%edx) + movl %eax, (%edx) + movw al, 8(%edx) + +when we can spare a register. It reduces code size. + +//===---------------------------------------------------------------------===// + +Evaluate what the best way to codegen sdiv X, (2^C) is. For X/8, we currently +get this: + +define i32 @test1(i32 %X) { + %Y = sdiv i32 %X, 8 + ret i32 %Y +} + +_test1: + movl 4(%esp), %eax + movl %eax, %ecx + sarl $31, %ecx + shrl $29, %ecx + addl %ecx, %eax + sarl $3, %eax + ret + +GCC knows several different ways to codegen it, one of which is this: + +_test1: + movl 4(%esp), %eax + cmpl $-1, %eax + leal 7(%eax), %ecx + cmovle %ecx, %eax + sarl $3, %eax + ret + +which is probably slower, but it's interesting at least :) + +//===---------------------------------------------------------------------===// + +We are currently lowering large (1MB+) memmove/memcpy to rep/stosl and rep/movsl +We should leave these as libcalls for everything over a much lower threshold, +since libc is hand tuned for medium and large mem ops (avoiding RFO for large +stores, TLB preheating, etc) + +//===---------------------------------------------------------------------===// + +Optimize this into something reasonable: + x * copysign(1.0, y) * copysign(1.0, z) + +//===---------------------------------------------------------------------===// + +Optimize copysign(x, *y) to use an integer load from y. + +//===---------------------------------------------------------------------===// + +The following tests perform worse with LSR: + +lambda, siod, optimizer-eval, ackermann, hash2, nestedloop, strcat, and Treesor. + +//===---------------------------------------------------------------------===// + +Adding to the list of cmp / test poor codegen issues: + +int test(__m128 *A, __m128 *B) { + if (_mm_comige_ss(*A, *B)) + return 3; + else + return 4; +} + +_test: + movl 8(%esp), %eax + movaps (%eax), %xmm0 + movl 4(%esp), %eax + movaps (%eax), %xmm1 + comiss %xmm0, %xmm1 + setae %al + movzbl %al, %ecx + movl $3, %eax + movl $4, %edx + cmpl $0, %ecx + cmove %edx, %eax + ret + +Note the setae, movzbl, cmpl, cmove can be replaced with a single cmovae. There +are a number of issues. 1) We are introducing a setcc between the result of the +intrisic call and select. 2) The intrinsic is expected to produce a i32 value +so a any extend (which becomes a zero extend) is added. + +We probably need some kind of target DAG combine hook to fix this. + +//===---------------------------------------------------------------------===// + +We generate significantly worse code for this than GCC: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21150 +http://gcc.gnu.org/bugzilla/attachment.cgi?id=8701 + +There is also one case we do worse on PPC. + +//===---------------------------------------------------------------------===// + +For this: + +int test(int a) +{ + return a * 3; +} + +We currently emits + imull $3, 4(%esp), %eax + +Perhaps this is what we really should generate is? Is imull three or four +cycles? Note: ICC generates this: + movl 4(%esp), %eax + leal (%eax,%eax,2), %eax + +The current instruction priority is based on pattern complexity. The former is +more "complex" because it folds a load so the latter will not be emitted. + +Perhaps we should use AddedComplexity to give LEA32r a higher priority? We +should always try to match LEA first since the LEA matching code does some +estimate to determine whether the match is profitable. + +However, if we care more about code size, then imull is better. It's two bytes +shorter than movl + leal. + +On a Pentium M, both variants have the same characteristics with regard +to throughput; however, the multiplication has a latency of four cycles, as +opposed to two cycles for the movl+lea variant. + +//===---------------------------------------------------------------------===// + +It appears gcc place string data with linkonce linkage in +.section __TEXT,__const_coal,coalesced instead of +.section __DATA,__const_coal,coalesced. +Take a look at darwin.h, there are other Darwin assembler directives that we +do not make use of. + +//===---------------------------------------------------------------------===// + +define i32 @foo(i32* %a, i32 %t) { +entry: + br label %cond_true + +cond_true: ; preds = %cond_true, %entry + %x.0.0 = phi i32 [ 0, %entry ], [ %tmp9, %cond_true ] ; <i32> [#uses=3] + %t_addr.0.0 = phi i32 [ %t, %entry ], [ %tmp7, %cond_true ] ; <i32> [#uses=1] + %tmp2 = getelementptr i32* %a, i32 %x.0.0 ; <i32*> [#uses=1] + %tmp3 = load i32* %tmp2 ; <i32> [#uses=1] + %tmp5 = add i32 %t_addr.0.0, %x.0.0 ; <i32> [#uses=1] + %tmp7 = add i32 %tmp5, %tmp3 ; <i32> [#uses=2] + %tmp9 = add i32 %x.0.0, 1 ; <i32> [#uses=2] + %tmp = icmp sgt i32 %tmp9, 39 ; <i1> [#uses=1] + br i1 %tmp, label %bb12, label %cond_true + +bb12: ; preds = %cond_true + ret i32 %tmp7 +} +is pessimized by -loop-reduce and -indvars + +//===---------------------------------------------------------------------===// + +u32 to float conversion improvement: + +float uint32_2_float( unsigned u ) { + float fl = (int) (u & 0xffff); + float fh = (int) (u >> 16); + fh *= 0x1.0p16f; + return fh + fl; +} + +00000000 subl $0x04,%esp +00000003 movl 0x08(%esp,1),%eax +00000007 movl %eax,%ecx +00000009 shrl $0x10,%ecx +0000000c cvtsi2ss %ecx,%xmm0 +00000010 andl $0x0000ffff,%eax +00000015 cvtsi2ss %eax,%xmm1 +00000019 mulss 0x00000078,%xmm0 +00000021 addss %xmm1,%xmm0 +00000025 movss %xmm0,(%esp,1) +0000002a flds (%esp,1) +0000002d addl $0x04,%esp +00000030 ret + +//===---------------------------------------------------------------------===// + +When using fastcc abi, align stack slot of argument of type double on 8 byte +boundary to improve performance. + +//===---------------------------------------------------------------------===// + +GCC's ix86_expand_int_movcc function (in i386.c) has a ton of interesting +simplifications for integer "x cmp y ? a : b". + +//===---------------------------------------------------------------------===// + +Consider the expansion of: + +define i32 @test3(i32 %X) { + %tmp1 = urem i32 %X, 255 + ret i32 %tmp1 +} + +Currently it compiles to: + +... + movl $2155905153, %ecx + movl 8(%esp), %esi + movl %esi, %eax + mull %ecx +... + +This could be "reassociated" into: + + movl $2155905153, %eax + movl 8(%esp), %ecx + mull %ecx + +to avoid the copy. In fact, the existing two-address stuff would do this +except that mul isn't a commutative 2-addr instruction. I guess this has +to be done at isel time based on the #uses to mul? + +//===---------------------------------------------------------------------===// + +Make sure the instruction which starts a loop does not cross a cacheline +boundary. This requires knowning the exact length of each machine instruction. +That is somewhat complicated, but doable. Example 256.bzip2: + +In the new trace, the hot loop has an instruction which crosses a cacheline +boundary. In addition to potential cache misses, this can't help decoding as I +imagine there has to be some kind of complicated decoder reset and realignment +to grab the bytes from the next cacheline. + +532 532 0x3cfc movb (1809(%esp, %esi), %bl <<<--- spans 2 64 byte lines +942 942 0x3d03 movl %dh, (1809(%esp, %esi) +937 937 0x3d0a incl %esi +3 3 0x3d0b cmpb %bl, %dl +27 27 0x3d0d jnz 0x000062db <main+11707> + +//===---------------------------------------------------------------------===// + +In c99 mode, the preprocessor doesn't like assembly comments like #TRUNCATE. + +//===---------------------------------------------------------------------===// + +This could be a single 16-bit load. + +int f(char *p) { + if ((p[0] == 1) & (p[1] == 2)) return 1; + return 0; +} + +//===---------------------------------------------------------------------===// + +We should inline lrintf and probably other libc functions. + +//===---------------------------------------------------------------------===// + +This code: + +void test(int X) { + if (X) abort(); +} + +is currently compiled to: + +_test: + subl $12, %esp + cmpl $0, 16(%esp) + jne LBB1_1 + addl $12, %esp + ret +LBB1_1: + call L_abort$stub + +It would be better to produce: + +_test: + subl $12, %esp + cmpl $0, 16(%esp) + jne L_abort$stub + addl $12, %esp + ret + +This can be applied to any no-return function call that takes no arguments etc. +Alternatively, the stack save/restore logic could be shrink-wrapped, producing +something like this: + +_test: + cmpl $0, 4(%esp) + jne LBB1_1 + ret +LBB1_1: + subl $12, %esp + call L_abort$stub + +Both are useful in different situations. Finally, it could be shrink-wrapped +and tail called, like this: + +_test: + cmpl $0, 4(%esp) + jne LBB1_1 + ret +LBB1_1: + pop %eax # realign stack. + call L_abort$stub + +Though this probably isn't worth it. + +//===---------------------------------------------------------------------===// + +Sometimes it is better to codegen subtractions from a constant (e.g. 7-x) with +a neg instead of a sub instruction. Consider: + +int test(char X) { return 7-X; } + +we currently produce: +_test: + movl $7, %eax + movsbl 4(%esp), %ecx + subl %ecx, %eax + ret + +We would use one fewer register if codegen'd as: + + movsbl 4(%esp), %eax + neg %eax + add $7, %eax + ret + +Note that this isn't beneficial if the load can be folded into the sub. In +this case, we want a sub: + +int test(int X) { return 7-X; } +_test: + movl $7, %eax + subl 4(%esp), %eax + ret + +//===---------------------------------------------------------------------===// + +Leaf functions that require one 4-byte spill slot have a prolog like this: + +_foo: + pushl %esi + subl $4, %esp +... +and an epilog like this: + addl $4, %esp + popl %esi + ret + +It would be smaller, and potentially faster, to push eax on entry and to +pop into a dummy register instead of using addl/subl of esp. Just don't pop +into any return registers :) + +//===---------------------------------------------------------------------===// + +The X86 backend should fold (branch (or (setcc, setcc))) into multiple +branches. We generate really poor code for: + +double testf(double a) { + return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0); +} + +For example, the entry BB is: + +_testf: + subl $20, %esp + pxor %xmm0, %xmm0 + movsd 24(%esp), %xmm1 + ucomisd %xmm0, %xmm1 + setnp %al + sete %cl + testb %cl, %al + jne LBB1_5 # UnifiedReturnBlock +LBB1_1: # cond_true + + +it would be better to replace the last four instructions with: + + jp LBB1_1 + je LBB1_5 +LBB1_1: + +We also codegen the inner ?: into a diamond: + + cvtss2sd LCPI1_0(%rip), %xmm2 + cvtss2sd LCPI1_1(%rip), %xmm3 + ucomisd %xmm1, %xmm0 + ja LBB1_3 # cond_true +LBB1_2: # cond_true + movapd %xmm3, %xmm2 +LBB1_3: # cond_true + movapd %xmm2, %xmm0 + ret + +We should sink the load into xmm3 into the LBB1_2 block. This should +be pretty easy, and will nuke all the copies. + +//===---------------------------------------------------------------------===// + +This: + #include <algorithm> + inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b) + { return std::make_pair(a + b, a + b < a); } + bool no_overflow(unsigned a, unsigned b) + { return !full_add(a, b).second; } + +Should compile to: + addl %esi, %edi + setae %al + movzbl %al, %eax + ret + +on x86-64, instead of the rather stupid-looking: + addl %esi, %edi + setb %al + xorb $1, %al + movzbl %al, %eax + ret + + +//===---------------------------------------------------------------------===// + +The following code: + +bb114.preheader: ; preds = %cond_next94 + %tmp231232 = sext i16 %tmp62 to i32 ; <i32> [#uses=1] + %tmp233 = sub i32 32, %tmp231232 ; <i32> [#uses=1] + %tmp245246 = sext i16 %tmp65 to i32 ; <i32> [#uses=1] + %tmp252253 = sext i16 %tmp68 to i32 ; <i32> [#uses=1] + %tmp254 = sub i32 32, %tmp252253 ; <i32> [#uses=1] + %tmp553554 = bitcast i16* %tmp37 to i8* ; <i8*> [#uses=2] + %tmp583584 = sext i16 %tmp98 to i32 ; <i32> [#uses=1] + %tmp585 = sub i32 32, %tmp583584 ; <i32> [#uses=1] + %tmp614615 = sext i16 %tmp101 to i32 ; <i32> [#uses=1] + %tmp621622 = sext i16 %tmp104 to i32 ; <i32> [#uses=1] + %tmp623 = sub i32 32, %tmp621622 ; <i32> [#uses=1] + br label %bb114 + +produces: + +LBB3_5: # bb114.preheader + movswl -68(%ebp), %eax + movl $32, %ecx + movl %ecx, -80(%ebp) + subl %eax, -80(%ebp) + movswl -52(%ebp), %eax + movl %ecx, -84(%ebp) + subl %eax, -84(%ebp) + movswl -70(%ebp), %eax + movl %ecx, -88(%ebp) + subl %eax, -88(%ebp) + movswl -50(%ebp), %eax + subl %eax, %ecx + movl %ecx, -76(%ebp) + movswl -42(%ebp), %eax + movl %eax, -92(%ebp) + movswl -66(%ebp), %eax + movl %eax, -96(%ebp) + movw $0, -98(%ebp) + +This appears to be bad because the RA is not folding the store to the stack +slot into the movl. The above instructions could be: + movl $32, -80(%ebp) +... + movl $32, -84(%ebp) +... +This seems like a cross between remat and spill folding. + +This has redundant subtractions of %eax from a stack slot. However, %ecx doesn't +change, so we could simply subtract %eax from %ecx first and then use %ecx (or +vice-versa). + +//===---------------------------------------------------------------------===// + +This code: + + %tmp659 = icmp slt i16 %tmp654, 0 ; <i1> [#uses=1] + br i1 %tmp659, label %cond_true662, label %cond_next715 + +produces this: + + testw %cx, %cx + movswl %cx, %esi + jns LBB4_109 # cond_next715 + +Shark tells us that using %cx in the testw instruction is sub-optimal. It +suggests using the 32-bit register (which is what ICC uses). + +//===---------------------------------------------------------------------===// + +We compile this: + +void compare (long long foo) { + if (foo < 4294967297LL) + abort(); +} + +to: + +compare: + subl $4, %esp + cmpl $0, 8(%esp) + setne %al + movzbw %al, %ax + cmpl $1, 12(%esp) + setg %cl + movzbw %cl, %cx + cmove %ax, %cx + testb $1, %cl + jne .LBB1_2 # UnifiedReturnBlock +.LBB1_1: # ifthen + call abort +.LBB1_2: # UnifiedReturnBlock + addl $4, %esp + ret + +(also really horrible code on ppc). This is due to the expand code for 64-bit +compares. GCC produces multiple branches, which is much nicer: + +compare: + subl $12, %esp + movl 20(%esp), %edx + movl 16(%esp), %eax + decl %edx + jle .L7 +.L5: + addl $12, %esp + ret + .p2align 4,,7 +.L7: + jl .L4 + cmpl $0, %eax + .p2align 4,,8 + ja .L5 +.L4: + .p2align 4,,9 + call abort + +//===---------------------------------------------------------------------===// + +Tail call optimization improvements: Tail call optimization currently +pushes all arguments on the top of the stack (their normal place for +non-tail call optimized calls) that source from the callers arguments +or that source from a virtual register (also possibly sourcing from +callers arguments). +This is done to prevent overwriting of parameters (see example +below) that might be used later. + +example: + +int callee(int32, int64); +int caller(int32 arg1, int32 arg2) { + int64 local = arg2 * 2; + return callee(arg2, (int64)local); +} + +[arg1] [!arg2 no longer valid since we moved local onto it] +[arg2] -> [(int64) +[RETADDR] local ] + +Moving arg1 onto the stack slot of callee function would overwrite +arg2 of the caller. + +Possible optimizations: + + + - Analyse the actual parameters of the callee to see which would + overwrite a caller parameter which is used by the callee and only + push them onto the top of the stack. + + int callee (int32 arg1, int32 arg2); + int caller (int32 arg1, int32 arg2) { + return callee(arg1,arg2); + } + + Here we don't need to write any variables to the top of the stack + since they don't overwrite each other. + + int callee (int32 arg1, int32 arg2); + int caller (int32 arg1, int32 arg2) { + return callee(arg2,arg1); + } + + Here we need to push the arguments because they overwrite each + other. + +//===---------------------------------------------------------------------===// + +main () +{ + int i = 0; + unsigned long int z = 0; + + do { + z -= 0x00004000; + i++; + if (i > 0x00040000) + abort (); + } while (z > 0); + exit (0); +} + +gcc compiles this to: + +_main: + subl $28, %esp + xorl %eax, %eax + jmp L2 +L3: + cmpl $262144, %eax + je L10 +L2: + addl $1, %eax + cmpl $262145, %eax + jne L3 + call L_abort$stub +L10: + movl $0, (%esp) + call L_exit$stub + +llvm: + +_main: + subl $12, %esp + movl $1, %eax + movl $16384, %ecx +LBB1_1: # bb + cmpl $262145, %eax + jge LBB1_4 # cond_true +LBB1_2: # cond_next + incl %eax + addl $4294950912, %ecx + cmpl $16384, %ecx + jne LBB1_1 # bb +LBB1_3: # bb11 + xorl %eax, %eax + addl $12, %esp + ret +LBB1_4: # cond_true + call L_abort$stub + +1. LSR should rewrite the first cmp with induction variable %ecx. +2. DAG combiner should fold + leal 1(%eax), %edx + cmpl $262145, %edx + => + cmpl $262144, %eax + +//===---------------------------------------------------------------------===// + +define i64 @test(double %X) { + %Y = fptosi double %X to i64 + ret i64 %Y +} + +compiles to: + +_test: + subl $20, %esp + movsd 24(%esp), %xmm0 + movsd %xmm0, 8(%esp) + fldl 8(%esp) + fisttpll (%esp) + movl 4(%esp), %edx + movl (%esp), %eax + addl $20, %esp + #FP_REG_KILL + ret + +This should just fldl directly from the input stack slot. + +//===---------------------------------------------------------------------===// + +This code: +int foo (int x) { return (x & 65535) | 255; } + +Should compile into: + +_foo: + movzwl 4(%esp), %eax + orl $255, %eax + ret + +instead of: +_foo: + movl $65280, %eax + andl 4(%esp), %eax + orl $255, %eax + ret + +//===---------------------------------------------------------------------===// + +We're codegen'ing multiply of long longs inefficiently: + +unsigned long long LLM(unsigned long long arg1, unsigned long long arg2) { + return arg1 * arg2; +} + +We compile to (fomit-frame-pointer): + +_LLM: + pushl %esi + movl 8(%esp), %ecx + movl 16(%esp), %esi + movl %esi, %eax + mull %ecx + imull 12(%esp), %esi + addl %edx, %esi + imull 20(%esp), %ecx + movl %esi, %edx + addl %ecx, %edx + popl %esi + ret + +This looks like a scheduling deficiency and lack of remat of the load from +the argument area. ICC apparently produces: + + movl 8(%esp), %ecx + imull 12(%esp), %ecx + movl 16(%esp), %eax + imull 4(%esp), %eax + addl %eax, %ecx + movl 4(%esp), %eax + mull 12(%esp) + addl %ecx, %edx + ret + +Note that it remat'd loads from 4(esp) and 12(esp). See this GCC PR: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17236 + +//===---------------------------------------------------------------------===// + +We can fold a store into "zeroing a reg". Instead of: + +xorl %eax, %eax +movl %eax, 124(%esp) + +we should get: + +movl $0, 124(%esp) + +if the flags of the xor are dead. + +Likewise, we isel "x<<1" into "add reg,reg". If reg is spilled, this should +be folded into: shl [mem], 1 + +//===---------------------------------------------------------------------===// + +In SSE mode, we turn abs and neg into a load from the constant pool plus a xor +or and instruction, for example: + + xorpd LCPI1_0, %xmm2 + +However, if xmm2 gets spilled, we end up with really ugly code like this: + + movsd (%esp), %xmm0 + xorpd LCPI1_0, %xmm0 + movsd %xmm0, (%esp) + +Since we 'know' that this is a 'neg', we can actually "fold" the spill into +the neg/abs instruction, turning it into an *integer* operation, like this: + + xorl 2147483648, [mem+4] ## 2147483648 = (1 << 31) + +you could also use xorb, but xorl is less likely to lead to a partial register +stall. Here is a contrived testcase: + +double a, b, c; +void test(double *P) { + double X = *P; + a = X; + bar(); + X = -X; + b = X; + bar(); + c = X; +} + +//===---------------------------------------------------------------------===// + +The generated code on x86 for checking for signed overflow on a multiply the +obvious way is much longer than it needs to be. + +int x(int a, int b) { + long long prod = (long long)a*b; + return prod > 0x7FFFFFFF || prod < (-0x7FFFFFFF-1); +} + +See PR2053 for more details. + +//===---------------------------------------------------------------------===// + +We should investigate using cdq/ctld (effect: edx = sar eax, 31) +more aggressively; it should cost the same as a move+shift on any modern +processor, but it's a lot shorter. Downside is that it puts more +pressure on register allocation because it has fixed operands. + +Example: +int abs(int x) {return x < 0 ? -x : x;} + +gcc compiles this to the following when using march/mtune=pentium2/3/4/m/etc.: +abs: + movl 4(%esp), %eax + cltd + xorl %edx, %eax + subl %edx, %eax + ret + +//===---------------------------------------------------------------------===// + +Take the following code (from +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16541): + +extern unsigned char first_one[65536]; +int FirstOnet(unsigned long long arg1) +{ + if (arg1 >> 48) + return (first_one[arg1 >> 48]); + return 0; +} + + +The following code is currently generated: +FirstOnet: + movl 8(%esp), %eax + cmpl $65536, %eax + movl 4(%esp), %ecx + jb .LBB1_2 # UnifiedReturnBlock +.LBB1_1: # ifthen + shrl $16, %eax + movzbl first_one(%eax), %eax + ret +.LBB1_2: # UnifiedReturnBlock + xorl %eax, %eax + ret + +We could change the "movl 8(%esp), %eax" into "movzwl 10(%esp), %eax"; this +lets us change the cmpl into a testl, which is shorter, and eliminate the shift. + +//===---------------------------------------------------------------------===// + +We compile this function: + +define i32 @foo(i32 %a, i32 %b, i32 %c, i8 zeroext %d) nounwind { +entry: + %tmp2 = icmp eq i8 %d, 0 ; <i1> [#uses=1] + br i1 %tmp2, label %bb7, label %bb + +bb: ; preds = %entry + %tmp6 = add i32 %b, %a ; <i32> [#uses=1] + ret i32 %tmp6 + +bb7: ; preds = %entry + %tmp10 = sub i32 %a, %c ; <i32> [#uses=1] + ret i32 %tmp10 +} + +to: + +foo: # @foo +# %bb.0: # %entry + movl 4(%esp), %ecx + cmpb $0, 16(%esp) + je .LBB0_2 +# %bb.1: # %bb + movl 8(%esp), %eax + addl %ecx, %eax + ret +.LBB0_2: # %bb7 + movl 12(%esp), %edx + movl %ecx, %eax + subl %edx, %eax + ret + +There's an obviously unnecessary movl in .LBB0_2, and we could eliminate a +couple more movls by putting 4(%esp) into %eax instead of %ecx. + +//===---------------------------------------------------------------------===// + +See rdar://4653682. + +From flops: + +LBB1_15: # bb310 + cvtss2sd LCPI1_0, %xmm1 + addsd %xmm1, %xmm0 + movsd 176(%esp), %xmm2 + mulsd %xmm0, %xmm2 + movapd %xmm2, %xmm3 + mulsd %xmm3, %xmm3 + movapd %xmm3, %xmm4 + mulsd LCPI1_23, %xmm4 + addsd LCPI1_24, %xmm4 + mulsd %xmm3, %xmm4 + addsd LCPI1_25, %xmm4 + mulsd %xmm3, %xmm4 + addsd LCPI1_26, %xmm4 + mulsd %xmm3, %xmm4 + addsd LCPI1_27, %xmm4 + mulsd %xmm3, %xmm4 + addsd LCPI1_28, %xmm4 + mulsd %xmm3, %xmm4 + addsd %xmm1, %xmm4 + mulsd %xmm2, %xmm4 + movsd 152(%esp), %xmm1 + addsd %xmm4, %xmm1 + movsd %xmm1, 152(%esp) + incl %eax + cmpl %eax, %esi + jge LBB1_15 # bb310 +LBB1_16: # bb358.loopexit + movsd 152(%esp), %xmm0 + addsd %xmm0, %xmm0 + addsd LCPI1_22, %xmm0 + movsd %xmm0, 152(%esp) + +Rather than spilling the result of the last addsd in the loop, we should have +insert a copy to split the interval (one for the duration of the loop, one +extending to the fall through). The register pressure in the loop isn't high +enough to warrant the spill. + +Also check why xmm7 is not used at all in the function. + +//===---------------------------------------------------------------------===// + +Take the following: + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-S128" +target triple = "i386-apple-darwin8" +@in_exit.4870.b = internal global i1 false ; <i1*> [#uses=2] +define fastcc void @abort_gzip() noreturn nounwind { +entry: + %tmp.b.i = load i1* @in_exit.4870.b ; <i1> [#uses=1] + br i1 %tmp.b.i, label %bb.i, label %bb4.i +bb.i: ; preds = %entry + tail call void @exit( i32 1 ) noreturn nounwind + unreachable +bb4.i: ; preds = %entry + store i1 true, i1* @in_exit.4870.b + tail call void @exit( i32 1 ) noreturn nounwind + unreachable +} +declare void @exit(i32) noreturn nounwind + +This compiles into: +_abort_gzip: ## @abort_gzip +## %bb.0: ## %entry + subl $12, %esp + movb _in_exit.4870.b, %al + cmpb $1, %al + jne LBB0_2 + +We somehow miss folding the movb into the cmpb. + +//===---------------------------------------------------------------------===// + +We compile: + +int test(int x, int y) { + return x-y-1; +} + +into (-m64): + +_test: + decl %edi + movl %edi, %eax + subl %esi, %eax + ret + +it would be better to codegen as: x+~y (notl+addl) + +//===---------------------------------------------------------------------===// + +This code: + +int foo(const char *str,...) +{ + __builtin_va_list a; int x; + __builtin_va_start(a,str); x = __builtin_va_arg(a,int); __builtin_va_end(a); + return x; +} + +gets compiled into this on x86-64: + subq $200, %rsp + movaps %xmm7, 160(%rsp) + movaps %xmm6, 144(%rsp) + movaps %xmm5, 128(%rsp) + movaps %xmm4, 112(%rsp) + movaps %xmm3, 96(%rsp) + movaps %xmm2, 80(%rsp) + movaps %xmm1, 64(%rsp) + movaps %xmm0, 48(%rsp) + movq %r9, 40(%rsp) + movq %r8, 32(%rsp) + movq %rcx, 24(%rsp) + movq %rdx, 16(%rsp) + movq %rsi, 8(%rsp) + leaq (%rsp), %rax + movq %rax, 192(%rsp) + leaq 208(%rsp), %rax + movq %rax, 184(%rsp) + movl $48, 180(%rsp) + movl $8, 176(%rsp) + movl 176(%rsp), %eax + cmpl $47, %eax + jbe .LBB1_3 # bb +.LBB1_1: # bb3 + movq 184(%rsp), %rcx + leaq 8(%rcx), %rax + movq %rax, 184(%rsp) +.LBB1_2: # bb4 + movl (%rcx), %eax + addq $200, %rsp + ret +.LBB1_3: # bb + movl %eax, %ecx + addl $8, %eax + addq 192(%rsp), %rcx + movl %eax, 176(%rsp) + jmp .LBB1_2 # bb4 + +gcc 4.3 generates: + subq $96, %rsp +.LCFI0: + leaq 104(%rsp), %rax + movq %rsi, -80(%rsp) + movl $8, -120(%rsp) + movq %rax, -112(%rsp) + leaq -88(%rsp), %rax + movq %rax, -104(%rsp) + movl $8, %eax + cmpl $48, %eax + jb .L6 + movq -112(%rsp), %rdx + movl (%rdx), %eax + addq $96, %rsp + ret + .p2align 4,,10 + .p2align 3 +.L6: + mov %eax, %edx + addq -104(%rsp), %rdx + addl $8, %eax + movl %eax, -120(%rsp) + movl (%rdx), %eax + addq $96, %rsp + ret + +and it gets compiled into this on x86: + pushl %ebp + movl %esp, %ebp + subl $4, %esp + leal 12(%ebp), %eax + movl %eax, -4(%ebp) + leal 16(%ebp), %eax + movl %eax, -4(%ebp) + movl 12(%ebp), %eax + addl $4, %esp + popl %ebp + ret + +gcc 4.3 generates: + pushl %ebp + movl %esp, %ebp + movl 12(%ebp), %eax + popl %ebp + ret + +//===---------------------------------------------------------------------===// + +Teach tblgen not to check bitconvert source type in some cases. This allows us +to consolidate the following patterns in X86InstrMMX.td: + +def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))))), + (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>; +def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))))), + (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>; +def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))))), + (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>; + +There are other cases in various td files. + +//===---------------------------------------------------------------------===// + +Take something like the following on x86-32: +unsigned a(unsigned long long x, unsigned y) {return x % y;} + +We currently generate a libcall, but we really shouldn't: the expansion is +shorter and likely faster than the libcall. The expected code is something +like the following: + + movl 12(%ebp), %eax + movl 16(%ebp), %ecx + xorl %edx, %edx + divl %ecx + movl 8(%ebp), %eax + divl %ecx + movl %edx, %eax + ret + +A similar code sequence works for division. + +//===---------------------------------------------------------------------===// + +We currently compile this: + +define i32 @func1(i32 %v1, i32 %v2) nounwind { +entry: + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) + %sum = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + br i1 %obit, label %overflow, label %normal +normal: + ret i32 %sum +overflow: + call void @llvm.trap() + unreachable +} +declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) +declare void @llvm.trap() + +to: + +_func1: + movl 4(%esp), %eax + addl 8(%esp), %eax + jo LBB1_2 ## overflow +LBB1_1: ## normal + ret +LBB1_2: ## overflow + ud2 + +it would be nice to produce "into" someday. + +//===---------------------------------------------------------------------===// + +Test instructions can be eliminated by using EFLAGS values from arithmetic +instructions. This is currently not done for mul, and, or, xor, neg, shl, +sra, srl, shld, shrd, atomic ops, and others. It is also currently not done +for read-modify-write instructions. It is also current not done if the +OF or CF flags are needed. + +The shift operators have the complication that when the shift count is +zero, EFLAGS is not set, so they can only subsume a test instruction if +the shift count is known to be non-zero. Also, using the EFLAGS value +from a shift is apparently very slow on some x86 implementations. + +In read-modify-write instructions, the root node in the isel match is +the store, and isel has no way for the use of the EFLAGS result of the +arithmetic to be remapped to the new node. + +Add and subtract instructions set OF on signed overflow and CF on unsiged +overflow, while test instructions always clear OF and CF. In order to +replace a test with an add or subtract in a situation where OF or CF is +needed, codegen must be able to prove that the operation cannot see +signed or unsigned overflow, respectively. + +//===---------------------------------------------------------------------===// + +memcpy/memmove do not lower to SSE copies when possible. A silly example is: +define <16 x float> @foo(<16 x float> %A) nounwind { + %tmp = alloca <16 x float>, align 16 + %tmp2 = alloca <16 x float>, align 16 + store <16 x float> %A, <16 x float>* %tmp + %s = bitcast <16 x float>* %tmp to i8* + %s2 = bitcast <16 x float>* %tmp2 to i8* + call void @llvm.memcpy.i64(i8* %s, i8* %s2, i64 64, i32 16) + %R = load <16 x float>* %tmp2 + ret <16 x float> %R +} + +declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind + +which compiles to: + +_foo: + subl $140, %esp + movaps %xmm3, 112(%esp) + movaps %xmm2, 96(%esp) + movaps %xmm1, 80(%esp) + movaps %xmm0, 64(%esp) + movl 60(%esp), %eax + movl %eax, 124(%esp) + movl 56(%esp), %eax + movl %eax, 120(%esp) + movl 52(%esp), %eax + <many many more 32-bit copies> + movaps (%esp), %xmm0 + movaps 16(%esp), %xmm1 + movaps 32(%esp), %xmm2 + movaps 48(%esp), %xmm3 + addl $140, %esp + ret + +On Nehalem, it may even be cheaper to just use movups when unaligned than to +fall back to lower-granularity chunks. + +//===---------------------------------------------------------------------===// + +Implement processor-specific optimizations for parity with GCC on these +processors. GCC does two optimizations: + +1. ix86_pad_returns inserts a noop before ret instructions if immediately + preceded by a conditional branch or is the target of a jump. +2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of + code contains more than 3 branches. + +The first one is done for all AMDs, Core2, and "Generic" +The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, + Core 2, and "Generic" + +//===---------------------------------------------------------------------===// +Testcase: +int x(int a) { return (a&0xf0)>>4; } + +Current output: + movl 4(%esp), %eax + shrl $4, %eax + andl $15, %eax + ret + +Ideal output: + movzbl 4(%esp), %eax + shrl $4, %eax + ret + +//===---------------------------------------------------------------------===// + +Re-implement atomic builtins __sync_add_and_fetch() and __sync_sub_and_fetch +properly. + +When the return value is not used (i.e. only care about the value in the +memory), x86 does not have to use add to implement these. Instead, it can use +add, sub, inc, dec instructions with the "lock" prefix. + +This is currently implemented using a bit of instruction selection trick. The +issue is the target independent pattern produces one output and a chain and we +want to map it into one that just output a chain. The current trick is to select +it into a MERGE_VALUES with the first definition being an implicit_def. The +proper solution is to add new ISD opcodes for the no-output variant. DAG +combiner can then transform the node before it gets to target node selection. + +Problem #2 is we are adding a whole bunch of x86 atomic instructions when in +fact these instructions are identical to the non-lock versions. We need a way to +add target specific information to target nodes and have this information +carried over to machine instructions. Asm printer (or JIT) can use this +information to add the "lock" prefix. + +//===---------------------------------------------------------------------===// + +struct B { + unsigned char y0 : 1; +}; + +int bar(struct B* a) { return a->y0; } + +define i32 @bar(%struct.B* nocapture %a) nounwind readonly optsize { + %1 = getelementptr inbounds %struct.B* %a, i64 0, i32 0 + %2 = load i8* %1, align 1 + %3 = and i8 %2, 1 + %4 = zext i8 %3 to i32 + ret i32 %4 +} + +bar: # @bar +# %bb.0: + movb (%rdi), %al + andb $1, %al + movzbl %al, %eax + ret + +Missed optimization: should be movl+andl. + +//===---------------------------------------------------------------------===// + +The x86_64 abi says: + +Booleans, when stored in a memory object, are stored as single byte objects the +value of which is always 0 (false) or 1 (true). + +We are not using this fact: + +int bar(_Bool *a) { return *a; } + +define i32 @bar(i8* nocapture %a) nounwind readonly optsize { + %1 = load i8* %a, align 1, !tbaa !0 + %tmp = and i8 %1, 1 + %2 = zext i8 %tmp to i32 + ret i32 %2 +} + +bar: + movb (%rdi), %al + andb $1, %al + movzbl %al, %eax + ret + +GCC produces + +bar: + movzbl (%rdi), %eax + ret + +//===---------------------------------------------------------------------===// + +Take the following C code: +int f(int a, int b) { return (unsigned char)a == (unsigned char)b; } + +We generate the following IR with clang: +define i32 @f(i32 %a, i32 %b) nounwind readnone { +entry: + %tmp = xor i32 %b, %a ; <i32> [#uses=1] + %tmp6 = and i32 %tmp, 255 ; <i32> [#uses=1] + %cmp = icmp eq i32 %tmp6, 0 ; <i1> [#uses=1] + %conv5 = zext i1 %cmp to i32 ; <i32> [#uses=1] + ret i32 %conv5 +} + +And the following x86 code: + xorl %esi, %edi + testb $-1, %dil + sete %al + movzbl %al, %eax + ret + +A cmpb instead of the xorl+testb would be one instruction shorter. + +//===---------------------------------------------------------------------===// + +Given the following C code: +int f(int a, int b) { return (signed char)a == (signed char)b; } + +We generate the following IR with clang: +define i32 @f(i32 %a, i32 %b) nounwind readnone { +entry: + %sext = shl i32 %a, 24 ; <i32> [#uses=1] + %conv1 = ashr i32 %sext, 24 ; <i32> [#uses=1] + %sext6 = shl i32 %b, 24 ; <i32> [#uses=1] + %conv4 = ashr i32 %sext6, 24 ; <i32> [#uses=1] + %cmp = icmp eq i32 %conv1, %conv4 ; <i1> [#uses=1] + %conv5 = zext i1 %cmp to i32 ; <i32> [#uses=1] + ret i32 %conv5 +} + +And the following x86 code: + movsbl %sil, %eax + movsbl %dil, %ecx + cmpl %eax, %ecx + sete %al + movzbl %al, %eax + ret + + +It should be possible to eliminate the sign extensions. + +//===---------------------------------------------------------------------===// + +LLVM misses a load+store narrowing opportunity in this code: + +%struct.bf = type { i64, i16, i16, i32 } + +@bfi = external global %struct.bf* ; <%struct.bf**> [#uses=2] + +define void @t1() nounwind ssp { +entry: + %0 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1] + %1 = getelementptr %struct.bf* %0, i64 0, i32 1 ; <i16*> [#uses=1] + %2 = bitcast i16* %1 to i32* ; <i32*> [#uses=2] + %3 = load i32* %2, align 1 ; <i32> [#uses=1] + %4 = and i32 %3, -65537 ; <i32> [#uses=1] + store i32 %4, i32* %2, align 1 + %5 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1] + %6 = getelementptr %struct.bf* %5, i64 0, i32 1 ; <i16*> [#uses=1] + %7 = bitcast i16* %6 to i32* ; <i32*> [#uses=2] + %8 = load i32* %7, align 1 ; <i32> [#uses=1] + %9 = and i32 %8, -131073 ; <i32> [#uses=1] + store i32 %9, i32* %7, align 1 + ret void +} + +LLVM currently emits this: + + movq bfi(%rip), %rax + andl $-65537, 8(%rax) + movq bfi(%rip), %rax + andl $-131073, 8(%rax) + ret + +It could narrow the loads and stores to emit this: + + movq bfi(%rip), %rax + andb $-2, 10(%rax) + movq bfi(%rip), %rax + andb $-3, 10(%rax) + ret + +The trouble is that there is a TokenFactor between the store and the +load, making it non-trivial to determine if there's anything between +the load and the store which would prohibit narrowing. + +//===---------------------------------------------------------------------===// + +This code: +void foo(unsigned x) { + if (x == 0) bar(); + else if (x == 1) qux(); +} + +currently compiles into: +_foo: + movl 4(%esp), %eax + cmpl $1, %eax + je LBB0_3 + testl %eax, %eax + jne LBB0_4 + +the testl could be removed: +_foo: + movl 4(%esp), %eax + cmpl $1, %eax + je LBB0_3 + jb LBB0_4 + +0 is the only unsigned number < 1. + +//===---------------------------------------------------------------------===// + +This code: + +%0 = type { i32, i1 } + +define i32 @add32carry(i32 %sum, i32 %x) nounwind readnone ssp { +entry: + %uadd = tail call %0 @llvm.uadd.with.overflow.i32(i32 %sum, i32 %x) + %cmp = extractvalue %0 %uadd, 1 + %inc = zext i1 %cmp to i32 + %add = add i32 %x, %sum + %z.0 = add i32 %add, %inc + ret i32 %z.0 +} + +declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone + +compiles to: + +_add32carry: ## @add32carry + addl %esi, %edi + sbbl %ecx, %ecx + movl %edi, %eax + subl %ecx, %eax + ret + +But it could be: + +_add32carry: + leal (%rsi,%rdi), %eax + cmpl %esi, %eax + adcl $0, %eax + ret + +//===---------------------------------------------------------------------===// + +The hot loop of 256.bzip2 contains code that looks a bit like this: + +int foo(char *P, char *Q, int x, int y) { + if (P[0] != Q[0]) + return P[0] < Q[0]; + if (P[1] != Q[1]) + return P[1] < Q[1]; + if (P[2] != Q[2]) + return P[2] < Q[2]; + return P[3] < Q[3]; +} + +In the real code, we get a lot more wrong than this. However, even in this +code we generate: + +_foo: ## @foo +## %bb.0: ## %entry + movb (%rsi), %al + movb (%rdi), %cl + cmpb %al, %cl + je LBB0_2 +LBB0_1: ## %if.then + cmpb %al, %cl + jmp LBB0_5 +LBB0_2: ## %if.end + movb 1(%rsi), %al + movb 1(%rdi), %cl + cmpb %al, %cl + jne LBB0_1 +## %bb.3: ## %if.end38 + movb 2(%rsi), %al + movb 2(%rdi), %cl + cmpb %al, %cl + jne LBB0_1 +## %bb.4: ## %if.end60 + movb 3(%rdi), %al + cmpb 3(%rsi), %al +LBB0_5: ## %if.end60 + setl %al + movzbl %al, %eax + ret + +Note that we generate jumps to LBB0_1 which does a redundant compare. The +redundant compare also forces the register values to be live, which prevents +folding one of the loads into the compare. In contrast, GCC 4.2 produces: + +_foo: + movzbl (%rsi), %eax + cmpb %al, (%rdi) + jne L10 +L12: + movzbl 1(%rsi), %eax + cmpb %al, 1(%rdi) + jne L10 + movzbl 2(%rsi), %eax + cmpb %al, 2(%rdi) + jne L10 + movzbl 3(%rdi), %eax + cmpb 3(%rsi), %al +L10: + setl %al + movzbl %al, %eax + ret + +which is "perfect". + +//===---------------------------------------------------------------------===// + +For the branch in the following code: +int a(); +int b(int x, int y) { + if (x & (1<<(y&7))) + return a(); + return y; +} + +We currently generate: + movb %sil, %al + andb $7, %al + movzbl %al, %eax + btl %eax, %edi + jae .LBB0_2 + +movl+andl would be shorter than the movb+andb+movzbl sequence. + +//===---------------------------------------------------------------------===// + +For the following: +struct u1 { + float x, y; +}; +float foo(struct u1 u) { + return u.x + u.y; +} + +We currently generate: + movdqa %xmm0, %xmm1 + pshufd $1, %xmm0, %xmm0 # xmm0 = xmm0[1,0,0,0] + addss %xmm1, %xmm0 + ret + +We could save an instruction here by commuting the addss. + +//===---------------------------------------------------------------------===// + +This (from PR9661): + +float clamp_float(float a) { + if (a > 1.0f) + return 1.0f; + else if (a < 0.0f) + return 0.0f; + else + return a; +} + +Could compile to: + +clamp_float: # @clamp_float + movss .LCPI0_0(%rip), %xmm1 + minss %xmm1, %xmm0 + pxor %xmm1, %xmm1 + maxss %xmm1, %xmm0 + ret + +with -ffast-math. + +//===---------------------------------------------------------------------===// + +This function (from PR9803): + +int clamp2(int a) { + if (a > 5) + a = 5; + if (a < 0) + return 0; + return a; +} + +Compiles to: + +_clamp2: ## @clamp2 + pushq %rbp + movq %rsp, %rbp + cmpl $5, %edi + movl $5, %ecx + cmovlel %edi, %ecx + testl %ecx, %ecx + movl $0, %eax + cmovnsl %ecx, %eax + popq %rbp + ret + +The move of 0 could be scheduled above the test to make it is xor reg,reg. + +//===---------------------------------------------------------------------===// + +GCC PR48986. We currently compile this: + +void bar(void); +void yyy(int* p) { + if (__sync_fetch_and_add(p, -1) == 1) + bar(); +} + +into: + movl $-1, %eax + lock + xaddl %eax, (%rdi) + cmpl $1, %eax + je LBB0_2 + +Instead we could generate: + + lock + dec %rdi + je LBB0_2 + +The trick is to match "fetch_and_add(X, -C) == C". + +//===---------------------------------------------------------------------===// + +unsigned t(unsigned a, unsigned b) { + return a <= b ? 5 : -5; +} + +We generate: + movl $5, %ecx + cmpl %esi, %edi + movl $-5, %eax + cmovbel %ecx, %eax + +GCC: + cmpl %edi, %esi + sbbl %eax, %eax + andl $-10, %eax + addl $5, %eax + +//===---------------------------------------------------------------------===// diff --git a/contrib/libs/llvm12/lib/Target/X86/TargetInfo/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Target/X86/TargetInfo/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Target/X86/TargetInfo/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Target/X86/TargetInfo/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Target/X86/TargetInfo/ya.make b/contrib/libs/llvm12/lib/Target/X86/TargetInfo/ya.make index 9048b1b373..2f30db941e 100644 --- a/contrib/libs/llvm12/lib/Target/X86/TargetInfo/ya.make +++ b/contrib/libs/llvm12/lib/Target/X86/TargetInfo/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/Support diff --git a/contrib/libs/llvm12/lib/Target/X86/ya.make b/contrib/libs/llvm12/lib/Target/X86/ya.make index eda842b955..1df03a55e7 100644 --- a/contrib/libs/llvm12/lib/Target/X86/ya.make +++ b/contrib/libs/llvm12/lib/Target/X86/ya.make @@ -2,18 +2,18 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE( + Apache-2.0 WITH LLVM-exception AND + NCSA +) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE( - Apache-2.0 WITH LLVM-exception AND - NCSA -) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Target/ya.make b/contrib/libs/llvm12/lib/Target/ya.make index 7696dde9a3..8401456429 100644 --- a/contrib/libs/llvm12/lib/Target/ya.make +++ b/contrib/libs/llvm12/lib/Target/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -20,9 +20,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/Target -) +ADDINCL( + contrib/libs/llvm12/lib/Target +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/TextAPI/MachO/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/TextAPI/MachO/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/TextAPI/MachO/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/TextAPI/MachO/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/TextAPI/MachO/ya.make b/contrib/libs/llvm12/lib/TextAPI/MachO/ya.make index ea5bab4453..4631a5ed63 100644 --- a/contrib/libs/llvm12/lib/TextAPI/MachO/ya.make +++ b/contrib/libs/llvm12/lib/TextAPI/MachO/ya.make @@ -2,24 +2,24 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) -LICENSE(Apache-2.0 WITH LLVM-exception) +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/lib/BinaryFormat contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/TextAPI -) +ADDINCL( + contrib/libs/llvm12/lib/TextAPI +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/ToolDrivers/llvm-dlltool/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/ToolDrivers/llvm-dlltool/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/ToolDrivers/llvm-dlltool/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/ToolDrivers/llvm-dlltool/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/ToolDrivers/llvm-dlltool/ya.make b/contrib/libs/llvm12/lib/ToolDrivers/llvm-dlltool/ya.make index a81e7e45a1..3de52a0add 100644 --- a/contrib/libs/llvm12/lib/ToolDrivers/llvm-dlltool/ya.make +++ b/contrib/libs/llvm12/lib/ToolDrivers/llvm-dlltool/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/ToolDrivers/llvm-lib/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/ToolDrivers/llvm-lib/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/ToolDrivers/llvm-lib/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/ToolDrivers/llvm-lib/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/ToolDrivers/llvm-lib/ya.make b/contrib/libs/llvm12/lib/ToolDrivers/llvm-lib/ya.make index 413acd8f71..cbe474770c 100644 --- a/contrib/libs/llvm12/lib/ToolDrivers/llvm-lib/ya.make +++ b/contrib/libs/llvm12/lib/ToolDrivers/llvm-lib/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include diff --git a/contrib/libs/llvm12/lib/Transforms/AggressiveInstCombine/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Transforms/AggressiveInstCombine/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Transforms/AggressiveInstCombine/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Transforms/AggressiveInstCombine/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Transforms/AggressiveInstCombine/ya.make b/contrib/libs/llvm12/lib/Transforms/AggressiveInstCombine/ya.make index 7134743b9e..c472a2054a 100644 --- a/contrib/libs/llvm12/lib/Transforms/AggressiveInstCombine/ya.make +++ b/contrib/libs/llvm12/lib/Transforms/AggressiveInstCombine/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -20,9 +20,9 @@ PEERDIR( contrib/libs/llvm12/lib/Transforms/Utils ) -ADDINCL( - contrib/libs/llvm12/lib/Transforms/AggressiveInstCombine -) +ADDINCL( + contrib/libs/llvm12/lib/Transforms/AggressiveInstCombine +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Transforms/CFGuard/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Transforms/CFGuard/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Transforms/CFGuard/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Transforms/CFGuard/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Transforms/CFGuard/ya.make b/contrib/libs/llvm12/lib/Transforms/CFGuard/ya.make index a4e92b45e5..37fe9ccc94 100644 --- a/contrib/libs/llvm12/lib/Transforms/CFGuard/ya.make +++ b/contrib/libs/llvm12/lib/Transforms/CFGuard/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -18,9 +18,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/Transforms/CFGuard -) +ADDINCL( + contrib/libs/llvm12/lib/Transforms/CFGuard +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Transforms/IPO/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Transforms/IPO/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Transforms/IPO/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Transforms/IPO/ya.make b/contrib/libs/llvm12/lib/Transforms/IPO/ya.make index a1076bc963..5b078050fe 100644 --- a/contrib/libs/llvm12/lib/Transforms/IPO/ya.make +++ b/contrib/libs/llvm12/lib/Transforms/IPO/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -32,9 +32,9 @@ PEERDIR( contrib/libs/llvm12/lib/Transforms/Vectorize ) -ADDINCL( - contrib/libs/llvm12/lib/Transforms/IPO -) +ADDINCL( + contrib/libs/llvm12/lib/Transforms/IPO +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Transforms/InstCombine/.yandex_meta/licenses.list.txt index f3e7a0b1f6..8045fd2cd1 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/.yandex_meta/licenses.list.txt @@ -1,19 +1,19 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================COPYRIGHT==================== - // The instruction has the form "(A op' B) op (C)". Try to factorize common - // term. - if (Op0) - - -====================COPYRIGHT==================== - Value *NotC = Builder.CreateNot(C); - Value *RHS = Builder.CreateAnd(B, NotC); - return BinaryOperator::CreateOr(LHS, RHS); +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================COPYRIGHT==================== + // The instruction has the form "(A op' B) op (C)". Try to factorize common + // term. + if (Op0) + + +====================COPYRIGHT==================== + Value *NotC = Builder.CreateNot(C); + Value *RHS = Builder.CreateAnd(B, NotC); + return BinaryOperator::CreateOr(LHS, RHS); diff --git a/contrib/libs/llvm12/lib/Transforms/InstCombine/ya.make b/contrib/libs/llvm12/lib/Transforms/InstCombine/ya.make index 080700a2de..3f74e68d16 100644 --- a/contrib/libs/llvm12/lib/Transforms/InstCombine/ya.make +++ b/contrib/libs/llvm12/lib/Transforms/InstCombine/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -20,9 +20,9 @@ PEERDIR( contrib/libs/llvm12/lib/Transforms/Utils ) -ADDINCL( - contrib/libs/llvm12/lib/Transforms/InstCombine -) +ADDINCL( + contrib/libs/llvm12/lib/Transforms/InstCombine +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Transforms/Instrumentation/.yandex_meta/licenses.list.txt index 789dea7ef6..9e25064ed6 100644 --- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/.yandex_meta/licenses.list.txt @@ -1,308 +1,308 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================COPYRIGHT==================== - Value *Sc = getShadow(C); - Value *Sd = getShadow(D); - - -====================File: LICENSE.TXT==================== -============================================================================== -The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: -============================================================================== - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ----- LLVM Exceptions to the Apache 2.0 License ---- - -As an exception, if, as a result of your compiling your source code, portions -of this Software are embedded into an Object form of such source code, you -may redistribute such embedded portions in such Object form without complying -with the conditions of Sections 4(a), 4(b) and 4(d) of the License. - -In addition, if you combine or link compiled forms of this Software with -software that is licensed under the GPLv2 ("Combined Software") and if a -court of competent jurisdiction determines that the patent provision (Section -3), the indemnity provision (Section 9) or other Section of the License -conflicts with the conditions of the GPLv2, you may retroactively and -prospectively choose to deem waived or otherwise exclude such Section(s) of -the License, but only in their entirety and only with respect to the Combined -Software. - -============================================================================== -Software from third parties included in the LLVM Project: -============================================================================== -The LLVM Project contains third party software which is under different license -terms. All such code will be identified clearly using at least one of two -mechanisms: -1) It will be in a separate directory tree with its own `LICENSE.txt` or - `LICENSE` file at the top containing the specific license and restrictions - which apply to that software, or -2) It will contain specific license and restriction terms at the top of every - file. - -============================================================================== -Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): -============================================================================== -University of Illinois/NCSA -Open Source License - -Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - - - -====================File: include/llvm/Support/LICENSE.TXT==================== -LLVM System Interface Library -------------------------------------------------------------------------------- -The LLVM System Interface Library is licensed under the Illinois Open Source -License and has the following additional copyright: - -Copyright (C) 2004 eXtensible Systems, Inc. - - -====================NCSA==================== -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================COPYRIGHT==================== + Value *Sc = getShadow(C); + Value *Sd = getShadow(D); + + +====================File: LICENSE.TXT==================== +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +============================================================================== +Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + + + +====================File: include/llvm/Support/LICENSE.TXT==================== +LLVM System Interface Library +------------------------------------------------------------------------------- +The LLVM System Interface Library is licensed under the Illinois Open Source +License and has the following additional copyright: + +Copyright (C) 2004 eXtensible Systems, Inc. + + +====================NCSA==================== +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/ya.make b/contrib/libs/llvm12/lib/Transforms/Instrumentation/ya.make index 05a59d030b..39dab1eb7d 100644 --- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/ya.make +++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/ya.make @@ -2,18 +2,18 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE( - Apache-2.0 WITH LLVM-exception AND - NCSA -) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE( + Apache-2.0 WITH LLVM-exception AND + NCSA +) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -25,9 +25,9 @@ PEERDIR( contrib/libs/llvm12/lib/Transforms/Utils ) -ADDINCL( - contrib/libs/llvm12/lib/Transforms/Instrumentation -) +ADDINCL( + contrib/libs/llvm12/lib/Transforms/Instrumentation +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Transforms/ObjCARC/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Transforms/ObjCARC/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Transforms/ObjCARC/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Transforms/ObjCARC/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Transforms/ObjCARC/ya.make b/contrib/libs/llvm12/lib/Transforms/ObjCARC/ya.make index b6e697bacc..727ec42c3f 100644 --- a/contrib/libs/llvm12/lib/Transforms/ObjCARC/ya.make +++ b/contrib/libs/llvm12/lib/Transforms/ObjCARC/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -20,9 +20,9 @@ PEERDIR( contrib/libs/llvm12/lib/Transforms/Utils ) -ADDINCL( - contrib/libs/llvm12/lib/Transforms/ObjCARC -) +ADDINCL( + contrib/libs/llvm12/lib/Transforms/ObjCARC +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Transforms/Scalar/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Transforms/Scalar/ya.make b/contrib/libs/llvm12/lib/Transforms/Scalar/ya.make index d9cb6bacab..75501ae81a 100644 --- a/contrib/libs/llvm12/lib/Transforms/Scalar/ya.make +++ b/contrib/libs/llvm12/lib/Transforms/Scalar/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -22,9 +22,9 @@ PEERDIR( contrib/libs/llvm12/lib/Transforms/Utils ) -ADDINCL( - contrib/libs/llvm12/lib/Transforms/Scalar -) +ADDINCL( + contrib/libs/llvm12/lib/Transforms/Scalar +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Transforms/Utils/.yandex_meta/licenses.list.txt index da33742c1d..a87e3b68b9 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Transforms/Utils/.yandex_meta/licenses.list.txt @@ -1,13 +1,13 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - - -====================COPYRIGHT==================== -// if (c) if (c) -// X1 = ... X1 = ... -// else else +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + +====================COPYRIGHT==================== +// if (c) if (c) +// X1 = ... X1 = ... +// else else diff --git a/contrib/libs/llvm12/lib/Transforms/Utils/ya.make b/contrib/libs/llvm12/lib/Transforms/Utils/ya.make index 80c4a0dd10..c07d5d6db6 100644 --- a/contrib/libs/llvm12/lib/Transforms/Utils/ya.make +++ b/contrib/libs/llvm12/lib/Transforms/Utils/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -19,9 +19,9 @@ PEERDIR( contrib/libs/llvm12/lib/Support ) -ADDINCL( - contrib/libs/llvm12/lib/Transforms/Utils -) +ADDINCL( + contrib/libs/llvm12/lib/Transforms/Utils +) NO_COMPILER_WARNINGS() diff --git a/contrib/libs/llvm12/lib/Transforms/Vectorize/.yandex_meta/licenses.list.txt b/contrib/libs/llvm12/lib/Transforms/Vectorize/.yandex_meta/licenses.list.txt index a4433625d4..c62d353021 100644 --- a/contrib/libs/llvm12/lib/Transforms/Vectorize/.yandex_meta/licenses.list.txt +++ b/contrib/libs/llvm12/lib/Transforms/Vectorize/.yandex_meta/licenses.list.txt @@ -1,7 +1,7 @@ -====================Apache-2.0 WITH LLVM-exception==================== -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. - - -====================Apache-2.0 WITH LLVM-exception==================== -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +====================Apache-2.0 WITH LLVM-exception==================== +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. + + +====================Apache-2.0 WITH LLVM-exception==================== +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/contrib/libs/llvm12/lib/Transforms/Vectorize/ya.make b/contrib/libs/llvm12/lib/Transforms/Vectorize/ya.make index 5188be1c70..a68c667bde 100644 --- a/contrib/libs/llvm12/lib/Transforms/Vectorize/ya.make +++ b/contrib/libs/llvm12/lib/Transforms/Vectorize/ya.make @@ -2,15 +2,15 @@ LIBRARY() -OWNER( - orivej - g:cpp-contrib -) - -LICENSE(Apache-2.0 WITH LLVM-exception) - -LICENSE_TEXTS(.yandex_meta/licenses.list.txt) - +OWNER( + orivej + g:cpp-contrib +) + +LICENSE(Apache-2.0 WITH LLVM-exception) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + PEERDIR( contrib/libs/llvm12 contrib/libs/llvm12/include @@ -20,9 +20,9 @@ PEERDIR( contrib/libs/llvm12/lib/Transforms/Utils ) -ADDINCL( - contrib/libs/llvm12/lib/Transforms/Vectorize -) +ADDINCL( + contrib/libs/llvm12/lib/Transforms/Vectorize +) NO_COMPILER_WARNINGS() |