diff options
author | qrort <qrort@yandex-team.com> | 2022-11-30 23:47:12 +0300 |
---|---|---|
committer | qrort <qrort@yandex-team.com> | 2022-11-30 23:47:12 +0300 |
commit | 22f8ae0e3f5d68b92aecccdf96c1d841a0334311 (patch) | |
tree | bffa27765faf54126ad44bcafa89fadecb7a73d7 /contrib/libs/numa | |
parent | 332b99e2173f0425444abb759eebcb2fafaa9209 (diff) | |
download | ydb-22f8ae0e3f5d68b92aecccdf96c1d841a0334311.tar.gz |
validate canons without yatest_common
Diffstat (limited to 'contrib/libs/numa')
-rw-r--r-- | contrib/libs/numa/INSTALL.md | 73 | ||||
-rw-r--r-- | contrib/libs/numa/LICENSE.GPL2 | 339 | ||||
-rw-r--r-- | contrib/libs/numa/LICENSE.LGPL2.1 | 502 | ||||
-rw-r--r-- | contrib/libs/numa/README.md | 48 | ||||
-rw-r--r-- | contrib/libs/numa/affinity.c | 347 | ||||
-rw-r--r-- | contrib/libs/numa/distance.c | 120 | ||||
-rw-r--r-- | contrib/libs/numa/internal/affinity.h | 5 | ||||
-rw-r--r-- | contrib/libs/numa/internal/config.h | 70 | ||||
-rw-r--r-- | contrib/libs/numa/internal/rtnetlink.h | 5 | ||||
-rw-r--r-- | contrib/libs/numa/internal/sysfs.h | 3 | ||||
-rw-r--r-- | contrib/libs/numa/internal/util.h | 23 | ||||
-rw-r--r-- | contrib/libs/numa/libnuma.c | 2166 | ||||
-rw-r--r-- | contrib/libs/numa/numa.h | 494 | ||||
-rw-r--r-- | contrib/libs/numa/numaif.h | 52 | ||||
-rw-r--r-- | contrib/libs/numa/numaint.h | 57 | ||||
-rw-r--r-- | contrib/libs/numa/rtnetlink.c | 89 | ||||
-rw-r--r-- | contrib/libs/numa/syscall.c | 284 | ||||
-rw-r--r-- | contrib/libs/numa/sysfs.c | 76 |
18 files changed, 4753 insertions, 0 deletions
diff --git a/contrib/libs/numa/INSTALL.md b/contrib/libs/numa/INSTALL.md new file mode 100644 index 0000000000..462ffd59ff --- /dev/null +++ b/contrib/libs/numa/INSTALL.md @@ -0,0 +1,73 @@ +## Building `numactl` + +TL;DR: + +```shell +$ ./autogen.sh +$ ./configure +$ make +# make install +``` + +Start by configuring the build running the configure script: + +```shell +$ ./configure +``` + +You can pass options to configure to define build options, to pass it +compiler paths, compiler flags and to define the installation layout. Run +`./configure --help` for more details on how to customize the build. + +Once build is completed, build `numactl` with: + +```shell +$ make +``` + +If you would like to increase verbosity by printing the full build command +lines, pass `make` the `V=1` parameter: + +```shell +$ make V=1 +``` + +You can build and run the tests included with numactl with the following +command: + +```shell +$ make check +``` + +The results will be saved in `test/*.log` files and a `test-suite.log` will be +generated with the summary of test passes and failures. + +Install numactl to the system by running the following command as root: + +```shell +# make install +``` + +You can also install it to a staging directory, in which case it is not +required to be root while running the install steps. Just pass a DESTDIR +variable while running `make install` with the path to the staging +directory. + +```shell +$ make install DESTDIR=/path/to/staging/numactl +``` + +## Using a snapshot from the Git repository + +First, the build system files need to be generated using the `./autogen.sh` +script, which calls `autoreconf` with the appropriate options to generate the +configure script and the templates for `Makefile`, `config.h`, etc. + +Once those files are generated, follow the normal steps to configure and +build numactl. + +In order to create a distribution tarball, use `make dist` from a configured +build tree. Use `make distcheck` to build a distribution tarball and confirm +that rebuilding from that archive works as expected, that building from +out-of-tree works, that test cases pass. + diff --git a/contrib/libs/numa/LICENSE.GPL2 b/contrib/libs/numa/LICENSE.GPL2 new file mode 100644 index 0000000000..d511905c16 --- /dev/null +++ b/contrib/libs/numa/LICENSE.GPL2 @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/contrib/libs/numa/LICENSE.LGPL2.1 b/contrib/libs/numa/LICENSE.LGPL2.1 new file mode 100644 index 0000000000..4362b49151 --- /dev/null +++ b/contrib/libs/numa/LICENSE.LGPL2.1 @@ -0,0 +1,502 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/contrib/libs/numa/README.md b/contrib/libs/numa/README.md new file mode 100644 index 0000000000..1d9b33b1f2 --- /dev/null +++ b/contrib/libs/numa/README.md @@ -0,0 +1,48 @@ +# numactl + +[![Build Status](https://travis-ci.org/numactl/numactl.svg?branch=master)](https://travis-ci.org/numactl/numactl) + +Simple NUMA policy support. It consists of a numactl program to run other +programs with a specific NUMA policy and a libnuma shared library ("NUMA API") +to set NUMA policy in applications. + +The libnuma binary interface is supposed to stay binary compatible. + +Incompatible changes will use new symbol version numbers. + +In addition there are various test and utility programs, like `numastat` to +display NUMA allocation statistics and `memhog`. + +In `test/` there is a small regression test suite. + +Note that `regress` assumes an unloaded machine with memory free on each node. +Otherwise you will get spurious failures in the non-strict policies (preferred, +interleave.) + +See the manpages [`numactl.8`](https://linux.die.net/man/8/numactl) and +[`numa.3`](https://linux.die.net/man/3/numa) for details. + +# License, Copyrights, Acknowledgements + +`numactl` and the demo programs are under the GNU General Public License, v.2. + +`libnuma` is under the GNU Lesser General Public License, v2.1. + +The manpages are under the same license as the Linux manpages (see the files.) + +`numademo` links with a library derived from the C version of STREAM by John D. +McCalpin and Joe R. Zagar for one sub benchmark. See `stream_lib.c` for the +license. In particular when you publish `numademo` output you might need to pay +attention there or filter out the STREAM results. + +It also uses a public domain Mersenne Twister implementation from Michael +Brundage. + +Version 2.0.10-rc2: (C)2014 SGI + +Author: +Andi Kleen, SUSE Labs + +Version 2.0.0 by Cliff Wickman (`cpw@sgi.com`), Christoph Lameter +(`clameter@sgi.com`) and Lee Schermerhorn (`lee.schermerhorn@hp.com`). + diff --git a/contrib/libs/numa/affinity.c b/contrib/libs/numa/affinity.c new file mode 100644 index 0000000000..6f69a9b895 --- /dev/null +++ b/contrib/libs/numa/affinity.c @@ -0,0 +1,347 @@ +/* Support for specifying IO affinity by various means. + Copyright 2010 Intel Corporation + Author: Andi Kleen + + libnuma is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; version + 2.1. + + libnuma is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should find a copy of v2.1 of the GNU Lesser General Public License + somewhere on your Linux system; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Notebook: + - Separate real errors from no NUMA with fallback + - Infiniband + - FCoE? + - Support for other special IO devices + - Specifying cpu subsets inside the IO node? + - Handle multiple IO nodes (needs kernel changes) + - Better support for multi-path IO? + */ +#define _GNU_SOURCE 1 +#include <string.h> +#include <errno.h> +#include <sys/stat.h> +#include <netdb.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <net/if.h> +#include <dirent.h> +#include <linux/rtnetlink.h> +#include <linux/netlink.h> +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <ctype.h> +#include <assert.h> +#include <regex.h> +#include <sys/sysmacros.h> +#include "numa.h" +#include "numaint.h" +#include "sysfs.h" +#include "affinity.h" +#include "rtnetlink.h" + +static int badchar(const char *s) +{ + if (strpbrk(s, "/.")) + return 1; + return 0; +} + +static int node_parse_failure(int ret, char *cls, const char *dev) +{ + if (!cls) + cls = ""; + if (ret == -2) + numa_warn(W_node_parse1, + "Kernel does not know node mask for%s%s device `%s'", + *cls ? " " : "", cls, dev); + else + numa_warn(W_node_parse2, + "Cannot read node mask for %s device `%s'", + cls, dev); + return -1; +} + +/* Generic sysfs class lookup */ +static int +affinity_class(struct bitmask *mask, char *cls, const char *dev) +{ + int ret; + while (isspace(*dev)) + dev++; + if (badchar(dev)) { + numa_warn(W_badchar, "Illegal characters in `%s' specification", + dev); + return -1; + } + + /* Somewhat hackish: extract device from symlink path. + Better would be a direct backlink. This knows slightly too + much about the actual sysfs layout. */ + char path[1024]; + char *fn = NULL; + if (asprintf(&fn, "/sys/class/%s/%s", cls, dev) > 0 && + readlink(fn, path, sizeof path) > 0) { + regex_t re; + regmatch_t match[2]; + char *p; + + regcomp(&re, "(/devices/pci[0-9a-fA-F:/]+\\.[0-9]+)/", + REG_EXTENDED); + ret = regexec(&re, path, 2, match, 0); + regfree(&re); + if (ret == 0) { + free(fn); + assert(match[0].rm_so > 0); + assert(match[0].rm_eo > 0); + path[match[1].rm_eo + 1] = 0; + p = path + match[0].rm_so; + ret = sysfs_node_read(mask, "/sys/%s/numa_node", p); + if (ret < 0) + return node_parse_failure(ret, NULL, p); + return ret; + } + } + free(fn); + + ret = sysfs_node_read(mask, "/sys/class/%s/%s/device/numa_node", + cls, dev); + if (ret < 0) + return node_parse_failure(ret, cls, dev); + return 0; +} + +/* Turn file (or device node) into class name */ +static int affinity_file(struct bitmask *mask, char *cls, const char *file) +{ + struct stat st; + DIR *dir; + int n; + unsigned maj = 0, min = 0; + dev_t d; + struct dirent *dep; + + cls = "block"; + char fn[sizeof("/sys/class/") + strlen(cls)]; + if (stat(file, &st) < 0) { + numa_warn(W_blockdev1, "Cannot stat file %s", file); + return -1; + } + d = st.st_dev; + if (S_ISCHR(st.st_mode)) { + /* Better choice than misc? Most likely misc will not work + anyways unless the kernel is fixed. */ + cls = "misc"; + d = st.st_rdev; + } else if (S_ISBLK(st.st_mode)) + d = st.st_rdev; + + sprintf(fn, "/sys/class/%s", cls); + dir = opendir(fn); + if (!dir) { + numa_warn(W_blockdev2, "Cannot enumerate %s devices in sysfs", + cls); + return -1; + } + while ((dep = readdir(dir)) != NULL) { + char *name = dep->d_name; + int ret; + + if (*name == '.') + continue; + char *dev; + char fn2[sizeof("/sys/class/block//dev") + strlen(name)]; + + n = -1; + if (sprintf(fn2, "/sys/class/block/%s/dev", name) < 0) + break; + dev = sysfs_read(fn2); + if (dev) { + n = sscanf(dev, "%u:%u", &maj, &min); + free(dev); + } + if (n != 2) { + numa_warn(W_blockdev3, "Cannot parse sysfs device %s", + name); + continue; + } + + if (major(d) != maj || minor(d) != min) + continue; + + ret = affinity_class(mask, "block", name); + closedir(dir); + return ret; + } + closedir(dir); + numa_warn(W_blockdev5, "Cannot find block device %x:%x in sysfs for `%s'", + maj, min, file); + return -1; +} + +/* Look up interface of route using rtnetlink. */ +static int find_route(struct sockaddr *dst, int *iifp) +{ + struct rtattr *rta; + const int hdrlen = NLMSG_LENGTH(sizeof(struct rtmsg)); + struct { + struct nlmsghdr msg; + struct rtmsg rt; + char buf[256]; + } req = { + .msg = { + .nlmsg_len = hdrlen, + .nlmsg_type = RTM_GETROUTE, + .nlmsg_flags = NLM_F_REQUEST, + }, + .rt = { + .rtm_family = dst->sa_family, + }, + }; + struct sockaddr_nl adr = { + .nl_family = AF_NETLINK, + }; + + if (rta_put_address(&req.msg, RTA_DST, dst) < 0) { + numa_warn(W_netlink1, "Cannot handle network family %x", + dst->sa_family); + return -1; + } + + if (rtnetlink_request(&req.msg, sizeof req, &adr) < 0) { + numa_warn(W_netlink2, "Cannot request rtnetlink route: %s", + strerror(errno)); + return -1; + } + + /* Fish the interface out of the netlink soup. */ + rta = NULL; + while ((rta = rta_get(&req.msg, rta, hdrlen)) != NULL) { + if (rta->rta_type == RTA_OIF) { + memcpy(iifp, RTA_DATA(rta), sizeof(int)); + return 0; + } + } + + numa_warn(W_netlink3, "rtnetlink query did not return interface"); + return -1; +} + +static int iif_to_name(int iif, struct ifreq *ifr) +{ + int n; + int sk = socket(PF_INET, SOCK_DGRAM, 0); + if (sk < 0) + return -1; + ifr->ifr_ifindex = iif; + n = ioctl(sk, SIOCGIFNAME, ifr); + close(sk); + return n; +} + +/* Resolve an IP address to the nodes of a network device. + This generally only attempts to handle simple cases: + no multi-path, no bounding etc. In these cases only + the first interface or none is chosen. */ +static int affinity_ip(struct bitmask *mask, char *cls, const char *id) +{ + struct addrinfo *ai; + int n; + int iif; + struct ifreq ifr; + + if ((n = getaddrinfo(id, NULL, NULL, &ai)) != 0) { + numa_warn(W_net1, "Cannot resolve %s: %s", + id, gai_strerror(n)); + return -1; + } + + if (find_route(&ai->ai_addr[0], &iif) < 0) + goto out_ai; + + if (iif_to_name(iif, &ifr) < 0) { + numa_warn(W_net2, "Cannot resolve network interface %d", iif); + goto out_ai; + } + + freeaddrinfo(ai); + return affinity_class(mask, "net", ifr.ifr_name); + +out_ai: + freeaddrinfo(ai); + return -1; +} + +/* Look up affinity for a PCI device */ +static int affinity_pci(struct bitmask *mask, char *cls, const char *id) +{ + unsigned seg, bus, dev, func; + int n, ret; + + /* Func is optional. */ + if ((n = sscanf(id, "%x:%x:%x.%x",&seg,&bus,&dev,&func)) == 4 || n == 3) { + if (n == 3) + func = 0; + } + /* Segment is optional too */ + else if ((n = sscanf(id, "%x:%x.%x",&bus,&dev,&func)) == 3 || n == 2) { + seg = 0; + if (n == 2) + func = 0; + } else { + numa_warn(W_pci1, "Cannot parse PCI device `%s'", id); + return -1; + } + ret = sysfs_node_read(mask, + "/sys/devices/pci%04x:%02x/%04x:%02x:%02x.%x/numa_node", + seg, bus, seg, bus, dev, func); + if (ret < 0) + return node_parse_failure(ret, cls, id); + return 0; +} + +static struct handler { + char first; + char *name; + char *cls; + int (*handler)(struct bitmask *mask, char *cls, const char *desc); +} handlers[] = { + { 'n', "netdev:", "net", affinity_class }, + { 'i', "ip:", NULL, affinity_ip }, + { 'f', "file:", NULL, affinity_file }, + { 'b', "block:", "block", affinity_class }, + { 'p', "pci:", NULL, affinity_pci }, + {} +}; + +hidden int resolve_affinity(const char *id, struct bitmask *mask) +{ + struct handler *h; + + for (h = &handlers[0]; h->first; h++) { + int len; + if (id[0] != h->first) + continue; + len = strlen(h->name); + if (!strncmp(id, h->name, len)) { + int ret = h->handler(mask, h->cls, id + len); + if (ret == -2) { + numa_warn(W_nonode, "Kernel does not know node for %s\n", + id + len); + } + return ret; + } + } + return NO_IO_AFFINITY; +} diff --git a/contrib/libs/numa/distance.c b/contrib/libs/numa/distance.c new file mode 100644 index 0000000000..3d544c7fe4 --- /dev/null +++ b/contrib/libs/numa/distance.c @@ -0,0 +1,120 @@ +/* Discover distances + Copyright (C) 2005 Andi Kleen, SuSE Labs. + + libnuma is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; version + 2.1. + + libnuma is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should find a copy of v2.1 of the GNU Lesser General Public License + somewhere on your Linux system; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + All calls are undefined when numa_available returns an error. */ +#define _GNU_SOURCE 1 +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include "numa.h" +#include "numaint.h" + +static int distance_numnodes; +static int *distance_table; + +static void parse_numbers(char *s, int *iptr) +{ + int i, d, j; + char *end; + int maxnode = numa_max_node(); + int numnodes = 0; + + for (i = 0; i <= maxnode; i++) + if (numa_bitmask_isbitset(numa_nodes_ptr, i)) + numnodes++; + + for (i = 0, j = 0; i <= maxnode; i++, j++) { + d = strtoul(s, &end, 0); + /* Skip unavailable nodes */ + while (j<=maxnode && !numa_bitmask_isbitset(numa_nodes_ptr, j)) + j++; + if (s == end) + break; + *(iptr+j) = d; + s = end; + } +} + +static int read_distance_table(void) +{ + int nd, len; + char *line = NULL; + size_t linelen = 0; + int maxnode = numa_max_node() + 1; + int *table = NULL; + int err = -1; + + for (nd = 0;; nd++) { + char fn[100]; + FILE *dfh; + sprintf(fn, "/sys/devices/system/node/node%d/distance", nd); + dfh = fopen(fn, "r"); + if (!dfh) { + if (errno == ENOENT) + err = 0; + if (!err && nd<maxnode) + continue; + else + break; + } + len = getdelim(&line, &linelen, '\n', dfh); + fclose(dfh); + if (len <= 0) + break; + + if (!table) { + table = calloc(maxnode * maxnode, sizeof(int)); + if (!table) { + errno = ENOMEM; + break; + } + } + + parse_numbers(line, table + nd * maxnode); + } + free(line); + if (err) { + numa_warn(W_distance, + "Cannot parse distance information in sysfs: %s", + strerror(errno)); + free(table); + return err; + } + /* Update the global table pointer. Race window here with + other threads, but in the worst case we leak one distance + array one time, which is tolerable. This avoids a + dependency on pthreads. */ + if (distance_table) { + free(table); + return 0; + } + distance_numnodes = maxnode; + distance_table = table; + return 0; +} + +int numa_distance(int a, int b) +{ + if (!distance_table) { + int err = read_distance_table(); + if ((err < 0) || (!distance_table)) + return 0; + } + if ((unsigned)a >= distance_numnodes || (unsigned)b >= distance_numnodes) + return 0; + return distance_table[a * distance_numnodes + b]; +} diff --git a/contrib/libs/numa/internal/affinity.h b/contrib/libs/numa/internal/affinity.h new file mode 100644 index 0000000000..6fbd3642a7 --- /dev/null +++ b/contrib/libs/numa/internal/affinity.h @@ -0,0 +1,5 @@ +enum { + NO_IO_AFFINITY = -2 +}; + +int resolve_affinity(const char *id, struct bitmask *mask); diff --git a/contrib/libs/numa/internal/config.h b/contrib/libs/numa/internal/config.h new file mode 100644 index 0000000000..8faa59e8da --- /dev/null +++ b/contrib/libs/numa/internal/config.h @@ -0,0 +1,70 @@ +/* config.h. Generated from config.h.in by configure. */ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Checking for symver attribute */ +#define HAVE_ATTRIBUTE_SYMVER 0 + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you have the <inttypes.h> header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the <stdint.h> header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the <stdio.h> header file. */ +#define HAVE_STDIO_H 1 + +/* Define to 1 if you have the <stdlib.h> header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the <strings.h> header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the <string.h> header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the <sys/types.h> header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the <unistd.h> header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "numactl" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "numactl" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "numactl 2.0.16" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "numactl" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2.0.16" + +/* Define to 1 if all of the C90 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ +#define STDC_HEADERS 1 + +/* If the compiler supports a TLS storage class define it to that here */ +#define TLS __thread + +/* Version number of package */ +#define VERSION "2.0.16" diff --git a/contrib/libs/numa/internal/rtnetlink.h b/contrib/libs/numa/internal/rtnetlink.h new file mode 100644 index 0000000000..f73d9094b7 --- /dev/null +++ b/contrib/libs/numa/internal/rtnetlink.h @@ -0,0 +1,5 @@ +hidden int +rta_put_address(struct nlmsghdr *msg, int type, struct sockaddr *adr); +hidden struct rtattr *rta_get(struct nlmsghdr *m, struct rtattr *p, int offset); +hidden void *rta_put(struct nlmsghdr *m, int type, int len); +hidden int rtnetlink_request(struct nlmsghdr *msg, int buflen, struct sockaddr_nl *adr); diff --git a/contrib/libs/numa/internal/sysfs.h b/contrib/libs/numa/internal/sysfs.h new file mode 100644 index 0000000000..0574ab1e5f --- /dev/null +++ b/contrib/libs/numa/internal/sysfs.h @@ -0,0 +1,3 @@ +struct bitmask; +hidden char *sysfs_read(char *name); +hidden int sysfs_node_read(struct bitmask *mask, char *fmt, ...); diff --git a/contrib/libs/numa/internal/util.h b/contrib/libs/numa/internal/util.h new file mode 100644 index 0000000000..8ee7e6d7db --- /dev/null +++ b/contrib/libs/numa/internal/util.h @@ -0,0 +1,23 @@ +extern void printmask(char *name, struct bitmask *mask); +extern int find_first(struct bitmask *mask); +extern struct bitmask *nodemask(char *s); +extern struct bitmask *cpumask(char *s, int *ncpus); +extern int read_sysctl(char *name); +extern void complain(char *fmt, ...); +extern void nerror(char *fmt, ...); + +extern long memsize(char *s); +extern int parse_policy(char *name, char *arg); +extern void print_policies(void); +extern char *policy_name(int policy); + +#define err(x) perror("numactl: " x),exit(1) +#define array_len(x) (sizeof(x)/sizeof(*(x))) + +#define round_up(x,y) (((x) + (y) - 1) & ~((y)-1)) + +#if HAVE_ATTRIBUTE_SYMVER +#define SYMVER(a,b) __attribute__ ((symver (b))) +#else +#define SYMVER(a,b) __asm__ (".symver " a "," b); +#endif diff --git a/contrib/libs/numa/libnuma.c b/contrib/libs/numa/libnuma.c new file mode 100644 index 0000000000..0aced8033a --- /dev/null +++ b/contrib/libs/numa/libnuma.c @@ -0,0 +1,2166 @@ +/* Simple NUMA library. + Copyright (C) 2003,2004,2005,2008 Andi Kleen,SuSE Labs and + Cliff Wickman,SGI. + + libnuma is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; version + 2.1. + + libnuma is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should find a copy of v2.1 of the GNU Lesser General Public License + somewhere on your Linux system; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + All calls are undefined when numa_available returns an error. */ +#define _GNU_SOURCE 1 +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <sched.h> +#include <dirent.h> +#include <errno.h> +#include <stdarg.h> +#include <ctype.h> +#include <assert.h> + +#include <sys/mman.h> +#include <limits.h> + +#include "config.h" +#include "numa.h" +#include "numaif.h" +#include "numaint.h" +#include "util.h" +#include "affinity.h" + +#define WEAK __attribute__((weak)) + +#define CPU_BUFFER_SIZE 4096 /* This limits you to 32768 CPUs */ + +/* these are the old (version 1) masks */ +nodemask_t numa_no_nodes; +nodemask_t numa_all_nodes; +/* these are now the default bitmask (pointers to) (version 2) */ +struct bitmask *numa_no_nodes_ptr = NULL; +struct bitmask *numa_all_nodes_ptr = NULL; +struct bitmask *numa_possible_nodes_ptr = NULL; +struct bitmask *numa_all_cpus_ptr = NULL; +struct bitmask *numa_possible_cpus_ptr = NULL; +/* I would prefer to use symbol versioning to create v1 and v2 versions + of numa_no_nodes and numa_all_nodes, but the loader does not correctly + handle versioning of BSS versus small data items */ + +struct bitmask *numa_nodes_ptr = NULL; +static struct bitmask *numa_memnode_ptr = NULL; +static unsigned long *node_cpu_mask_v1[NUMA_NUM_NODES]; +static char node_cpu_mask_v1_stale = 1; +static struct bitmask **node_cpu_mask_v2; +static char node_cpu_mask_v2_stale = 1; + +WEAK void numa_error(char *where); + +#ifndef TLS +#warning "not threadsafe" +#define __thread +#endif + +static __thread int bind_policy = MPOL_BIND; +static __thread unsigned int mbind_flags = 0; +static int sizes_set=0; +static int maxconfigurednode = -1; +static int maxconfiguredcpu = -1; +static int numprocnode = -1; +static int numproccpu = -1; +static int nodemask_sz = 0; +static int cpumask_sz = 0; + +static int has_preferred_many = 0; + +int numa_exit_on_error = 0; +int numa_exit_on_warn = 0; +static void set_sizes(void); + +/* + * There are two special functions, _init(void) and _fini(void), which + * are called automatically by the dynamic loader whenever a library is loaded. + * + * The v1 library depends upon nodemask_t's of all nodes and no nodes. + */ +void __attribute__((constructor)) +numa_init(void) +{ + int max,i; + + if (sizes_set) + return; + + set_sizes(); + /* numa_all_nodes should represent existing nodes on this system */ + max = numa_num_configured_nodes(); + for (i = 0; i < max; i++) + nodemask_set_compat((nodemask_t *)&numa_all_nodes, i); + memset(&numa_no_nodes, 0, sizeof(numa_no_nodes)); +} + +static void cleanup_node_cpu_mask_v2(void); + +#define FREE_AND_ZERO(x) if (x) { \ + numa_bitmask_free(x); \ + x = NULL; \ + } + +void __attribute__((destructor)) +numa_fini(void) +{ + FREE_AND_ZERO(numa_all_cpus_ptr); + FREE_AND_ZERO(numa_possible_cpus_ptr); + FREE_AND_ZERO(numa_all_nodes_ptr); + FREE_AND_ZERO(numa_possible_nodes_ptr); + FREE_AND_ZERO(numa_no_nodes_ptr); + FREE_AND_ZERO(numa_memnode_ptr); + FREE_AND_ZERO(numa_nodes_ptr); + cleanup_node_cpu_mask_v2(); +} + +static int numa_find_first(struct bitmask *mask) +{ + int i; + for (i = 0; i < mask->size; i++) + if (numa_bitmask_isbitset(mask, i)) + return i; + return -1; +} + +/* + * The following bitmask declarations, bitmask_*() routines, and associated + * _setbit() and _getbit() routines are: + * Copyright (c) 2004_2007 Silicon Graphics, Inc. (SGI) All rights reserved. + * SGI publishes it under the terms of the GNU General Public License, v2, + * as published by the Free Software Foundation. + */ +static unsigned int +_getbit(const struct bitmask *bmp, unsigned int n) +{ + if (n < bmp->size) + return (bmp->maskp[n/bitsperlong] >> (n % bitsperlong)) & 1; + else + return 0; +} + +static void +_setbit(struct bitmask *bmp, unsigned int n, unsigned int v) +{ + if (n < bmp->size) { + if (v) + bmp->maskp[n/bitsperlong] |= 1UL << (n % bitsperlong); + else + bmp->maskp[n/bitsperlong] &= ~(1UL << (n % bitsperlong)); + } +} + +int +numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int i) +{ + return _getbit(bmp, i); +} + +struct bitmask * +numa_bitmask_setall(struct bitmask *bmp) +{ + unsigned int i; + for (i = 0; i < bmp->size; i++) + _setbit(bmp, i, 1); + return bmp; +} + +struct bitmask * +numa_bitmask_clearall(struct bitmask *bmp) +{ + unsigned int i; + for (i = 0; i < bmp->size; i++) + _setbit(bmp, i, 0); + return bmp; +} + +struct bitmask * +numa_bitmask_setbit(struct bitmask *bmp, unsigned int i) +{ + _setbit(bmp, i, 1); + return bmp; +} + +struct bitmask * +numa_bitmask_clearbit(struct bitmask *bmp, unsigned int i) +{ + _setbit(bmp, i, 0); + return bmp; +} + +unsigned int +numa_bitmask_nbytes(struct bitmask *bmp) +{ + return longsperbits(bmp->size) * sizeof(unsigned long); +} + +/* where n is the number of bits in the map */ +/* This function should not exit on failure, but right now we cannot really + recover from this. */ +struct bitmask * +numa_bitmask_alloc(unsigned int n) +{ + struct bitmask *bmp; + + if (n < 1) { + errno = EINVAL; + numa_error("request to allocate mask for invalid number"); + exit(1); + } + bmp = malloc(sizeof(*bmp)); + if (bmp == 0) + goto oom; + bmp->size = n; + bmp->maskp = calloc(longsperbits(n), sizeof(unsigned long)); + if (bmp->maskp == 0) { + free(bmp); + goto oom; + } + return bmp; + +oom: + numa_error("Out of memory allocating bitmask"); + exit(1); +} + +void +numa_bitmask_free(struct bitmask *bmp) +{ + if (bmp == 0) + return; + free(bmp->maskp); + bmp->maskp = (unsigned long *)0xdeadcdef; /* double free tripwire */ + free(bmp); + return; +} + +/* True if two bitmasks are equal */ +int +numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask *bmp2) +{ + unsigned int i; + for (i = 0; i < bmp1->size || i < bmp2->size; i++) + if (_getbit(bmp1, i) != _getbit(bmp2, i)) + return 0; + return 1; +} + +/* Hamming Weight: number of set bits */ +unsigned int numa_bitmask_weight(const struct bitmask *bmp) +{ + unsigned int i; + unsigned int w = 0; + for (i = 0; i < bmp->size; i++) + if (_getbit(bmp, i)) + w++; + return w; +} + +/* *****end of bitmask_ routines ************ */ + +/* Next two can be overwritten by the application for different error handling */ +WEAK void numa_error(char *where) +{ + int olde = errno; + perror(where); + if (numa_exit_on_error) + exit(1); + errno = olde; +} + +WEAK void numa_warn(int num, char *fmt, ...) +{ + static unsigned warned; + va_list ap; + int olde = errno; + + /* Give each warning only once */ + if ((1<<num) & warned) + return; + warned |= (1<<num); + + va_start(ap,fmt); + fprintf(stderr, "libnuma: Warning: "); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); + va_end(ap); + + errno = olde; +} + +static void setpol(int policy, struct bitmask *bmp) +{ + if (set_mempolicy(policy, bmp->maskp, bmp->size + 1) < 0) + numa_error("set_mempolicy"); +} + +static void getpol(int *oldpolicy, struct bitmask *bmp) +{ + if (get_mempolicy(oldpolicy, bmp->maskp, bmp->size + 1, 0, 0) < 0) + numa_error("get_mempolicy"); +} + +static void dombind(void *mem, size_t size, int pol, struct bitmask *bmp) +{ + if (mbind(mem, size, pol, bmp ? bmp->maskp : NULL, bmp ? bmp->size + 1 : 0, + mbind_flags) < 0) + numa_error("mbind"); +} + +/* (undocumented) */ +/* gives the wrong answer for hugetlbfs mappings. */ +int numa_pagesize(void) +{ + static int pagesize; + if (pagesize > 0) + return pagesize; + pagesize = getpagesize(); + return pagesize; +} + +make_internal_alias(numa_pagesize); + +/* + * Find nodes (numa_nodes_ptr), nodes with memory (numa_memnode_ptr) + * and the highest numbered existing node (maxconfigurednode). + */ +static void +set_configured_nodes(void) +{ + DIR *d; + struct dirent *de; + long long freep; + + numa_memnode_ptr = numa_allocate_nodemask(); + numa_nodes_ptr = numa_allocate_nodemask(); + + d = opendir("/sys/devices/system/node"); + if (!d) { + maxconfigurednode = 0; + } else { + while ((de = readdir(d)) != NULL) { + int nd; + if (strncmp(de->d_name, "node", 4)) + continue; + nd = strtoul(de->d_name+4, NULL, 0); + numa_bitmask_setbit(numa_nodes_ptr, nd); + if (numa_node_size64(nd, &freep) > 0) + numa_bitmask_setbit(numa_memnode_ptr, nd); + if (maxconfigurednode < nd) + maxconfigurednode = nd; + } + closedir(d); + } +} + +/* + * Convert the string length of an ascii hex mask to the number + * of bits represented by that mask. + */ +static int s2nbits(const char *s) +{ + return strlen(s) * 32 / 9; +} + +/* Is string 'pre' a prefix of string 's'? */ +static int strprefix(const char *s, const char *pre) +{ + return strncmp(s, pre, strlen(pre)) == 0; +} + +static const char *mask_size_file = "/proc/self/status"; +static const char *nodemask_prefix = "Mems_allowed:\t"; +/* + * (do this the way Paul Jackson's libcpuset does it) + * The nodemask values in /proc/self/status are in an + * ascii format that uses 9 characters for each 32 bits of mask. + * (this could also be used to find the cpumask size) + */ +static void +set_nodemask_size(void) +{ + FILE *fp; + char *buf = NULL; + size_t bufsize = 0; + + if ((fp = fopen(mask_size_file, "r")) == NULL) + goto done; + + while (getline(&buf, &bufsize, fp) > 0) { + if (strprefix(buf, nodemask_prefix)) { + nodemask_sz = s2nbits(buf + strlen(nodemask_prefix)); + break; + } + } + free(buf); + fclose(fp); +done: + if (nodemask_sz == 0) {/* fall back on error */ + int pol; + unsigned long *mask = NULL; + nodemask_sz = 16; + do { + nodemask_sz <<= 1; + mask = realloc(mask, nodemask_sz / 8); + if (!mask) + return; + } while (get_mempolicy(&pol, mask, nodemask_sz + 1, 0, 0) < 0 && errno == EINVAL && + nodemask_sz < 4096*8); + free(mask); + } +} + +/* + * Read a mask consisting of a sequence of hexadecimal longs separated by + * commas. Order them correctly and return the number of bits set. + */ +static int +read_mask(char *s, struct bitmask *bmp) +{ + char *end = s; + int tmplen = (bmp->size + bitsperint - 1) / bitsperint; + unsigned int tmp[tmplen]; + unsigned int *start = tmp; + unsigned int i, n = 0, m = 0; + + if (!s) + return 0; /* shouldn't happen */ + + i = strtoul(s, &end, 16); + + /* Skip leading zeros */ + while (!i && *end++ == ',') { + i = strtoul(end, &end, 16); + } + + if (!i) + /* End of string. No mask */ + return -1; + + start[n++] = i; + /* Read sequence of ints */ + while (*end++ == ',') { + i = strtoul(end, &end, 16); + start[n++] = i; + + /* buffer overflow */ + if (n > tmplen) + return -1; + } + + /* + * Invert sequence of ints if necessary since the first int + * is the highest and we put it first because we read it first. + */ + while (n) { + int w; + unsigned long x = 0; + /* read into long values in an endian-safe way */ + for (w = 0; n && w < bitsperlong; w += bitsperint) + x |= ((unsigned long)start[n-- - 1] << w); + + bmp->maskp[m++] = x; + } + /* + * Return the number of bits set + */ + return numa_bitmask_weight(bmp); +} + +/* + * Read a processes constraints in terms of nodes and cpus from + * /proc/self/status. + */ +static void +set_task_constraints(void) +{ + int hicpu = maxconfiguredcpu; + int i; + char *buffer = NULL; + size_t buflen = 0; + FILE *f; + + numa_all_cpus_ptr = numa_allocate_cpumask(); + numa_possible_cpus_ptr = numa_allocate_cpumask(); + numa_all_nodes_ptr = numa_allocate_nodemask(); + numa_possible_nodes_ptr = numa_allocate_cpumask(); + numa_no_nodes_ptr = numa_allocate_nodemask(); + + f = fopen(mask_size_file, "r"); + if (!f) { + //numa_warn(W_cpumap, "Cannot parse %s", mask_size_file); + return; + } + + while (getline(&buffer, &buflen, f) > 0) { + /* mask starts after [last] tab */ + char *mask = strrchr(buffer,'\t'); + + if (strncmp(buffer,"Cpus_allowed:",13) == 0) + numproccpu = read_mask(mask + 1, numa_all_cpus_ptr); + + if (strncmp(buffer,"Mems_allowed:",13) == 0) { + numprocnode = read_mask(mask + 1, numa_all_nodes_ptr); + } + } + fclose(f); + free(buffer); + + for (i = 0; i <= hicpu; i++) + numa_bitmask_setbit(numa_possible_cpus_ptr, i); + for (i = 0; i <= maxconfigurednode; i++) + numa_bitmask_setbit(numa_possible_nodes_ptr, i); + + /* + * Cpus_allowed in the kernel can be defined to all f's + * i.e. it may be a superset of the actual available processors. + * As such let's reduce numproccpu to the number of actual + * available cpus. + */ + if (numproccpu <= 0) { + for (i = 0; i <= hicpu; i++) + numa_bitmask_setbit(numa_all_cpus_ptr, i); + numproccpu = hicpu+1; + } + + if (numproccpu > hicpu+1) { + numproccpu = hicpu+1; + for (i=hicpu+1; i<numa_all_cpus_ptr->size; i++) { + numa_bitmask_clearbit(numa_all_cpus_ptr, i); + } + } + + if (numprocnode <= 0) { + for (i = 0; i <= maxconfigurednode; i++) + numa_bitmask_setbit(numa_all_nodes_ptr, i); + numprocnode = maxconfigurednode + 1; + } + + return; +} + +/* + * Find the highest cpu number possible (in other words the size + * of a kernel cpumask_t (in bits) - 1) + */ +static void +set_numa_max_cpu(void) +{ + int len = 4096; + int n; + int olde = errno; + struct bitmask *buffer; + + do { + buffer = numa_bitmask_alloc(len); + n = numa_sched_getaffinity_v2_int(0, buffer); + /* on success, returns size of kernel cpumask_t, in bytes */ + if (n < 0) { + if (errno == EINVAL) { + if (len >= 1024*1024) + break; + len *= 2; + numa_bitmask_free(buffer); + continue; + } else { + numa_warn(W_numcpus, "Unable to determine max cpu" + " (sched_getaffinity: %s); guessing...", + strerror(errno)); + n = sizeof(cpu_set_t); + break; + } + } + } while (n < 0); + numa_bitmask_free(buffer); + errno = olde; + cpumask_sz = n*8; +} + +/* + * get the total (configured) number of cpus - both online and offline + */ +static void +set_configured_cpus(void) +{ + maxconfiguredcpu = sysconf(_SC_NPROCESSORS_CONF) - 1; + if (maxconfiguredcpu == -1) + numa_error("sysconf(NPROCESSORS_CONF) failed"); +} + +static void +set_kernel_abi() +{ + int oldp; + struct bitmask *bmp, *tmp; + bmp = numa_allocate_nodemask(); + tmp = numa_allocate_nodemask(); + + if (get_mempolicy(&oldp, bmp->maskp, bmp->size + 1, 0, 0) < 0) + goto out; + + /* Assumes there's always a node 0, and it's online */ + numa_bitmask_setbit(tmp, 0); + if (set_mempolicy(MPOL_PREFERRED_MANY, tmp->maskp, tmp->size) == 0) { + has_preferred_many++; + /* reset the old memory policy */ + setpol(oldp, bmp); + } + +out: + numa_bitmask_free(tmp); + numa_bitmask_free(bmp); +} + +/* + * Initialize all the sizes. + */ +static void +set_sizes(void) +{ + sizes_set++; + set_nodemask_size(); /* size of kernel nodemask_t */ + set_configured_nodes(); /* configured nodes listed in /sys */ + set_numa_max_cpu(); /* size of kernel cpumask_t */ + set_configured_cpus(); /* cpus listed in /sys/devices/system/cpu */ + set_task_constraints(); /* cpus and nodes for current task */ + set_kernel_abi(); /* man policy supported */ +} + +int +numa_num_configured_nodes(void) +{ + /* + * NOTE: this function's behavior matches the documentation (ie: it + * returns a count of nodes with memory) despite the poor function + * naming. We also cannot use the similarly poorly named + * numa_all_nodes_ptr as it only tracks nodes with memory from which + * the calling process can allocate. Think sparse nodes, memory-less + * nodes, cpusets... + */ + int memnodecount=0, i; + + for (i=0; i <= maxconfigurednode; i++) { + if (numa_bitmask_isbitset(numa_memnode_ptr, i)) + memnodecount++; + } + return memnodecount; +} + +int +numa_num_configured_cpus(void) +{ + + return maxconfiguredcpu+1; +} + +int +numa_num_possible_nodes(void) +{ + return nodemask_sz; +} + +int +numa_num_possible_cpus(void) +{ + return cpumask_sz; +} + +int +numa_num_task_nodes(void) +{ + return numprocnode; +} + +/* + * for backward compatibility + */ +int +numa_num_thread_nodes(void) +{ + return numa_num_task_nodes(); +} + +int +numa_num_task_cpus(void) +{ + return numproccpu; +} + +/* + * for backward compatibility + */ +int +numa_num_thread_cpus(void) +{ + return numa_num_task_cpus(); +} + +/* + * Return the number of the highest node in this running system, + */ +int +numa_max_node(void) +{ + return maxconfigurednode; +} + +make_internal_alias(numa_max_node); + +/* + * Return the number of the highest possible node in a system, + * which for v1 is the size of a numa.h nodemask_t(in bits)-1. + * but for v2 is the size of a kernel nodemask_t(in bits)-1. + */ +SYMVER("numa_max_possible_node_v1", "numa_max_possible_node@libnuma_1.1") +int +numa_max_possible_node_v1(void) +{ + return ((sizeof(nodemask_t)*8)-1); +} + +SYMVER("numa_max_possible_node_v2", "numa_max_possible_node@@libnuma_1.2") +int +numa_max_possible_node_v2(void) +{ + return numa_num_possible_nodes()-1; +} + +make_internal_alias(numa_max_possible_node_v1); +make_internal_alias(numa_max_possible_node_v2); + +/* + * Allocate a bitmask for cpus, of a size large enough to + * match the kernel's cpumask_t. + */ +struct bitmask * +numa_allocate_cpumask() +{ + int ncpus = numa_num_possible_cpus(); + + return numa_bitmask_alloc(ncpus); +} + +/* + * Allocate a bitmask the size of a libnuma nodemask_t + */ +static struct bitmask * +allocate_nodemask_v1(void) +{ + int nnodes = numa_max_possible_node_v1_int()+1; + + return numa_bitmask_alloc(nnodes); +} + +/* + * Allocate a bitmask for nodes, of a size large enough to + * match the kernel's nodemask_t. + */ +struct bitmask * +numa_allocate_nodemask(void) +{ + struct bitmask *bmp; + int nnodes = numa_max_possible_node_v2_int() + 1; + + bmp = numa_bitmask_alloc(nnodes); + return bmp; +} + +/* (cache the result?) */ +long long numa_node_size64(int node, long long *freep) +{ + size_t len = 0; + char *line = NULL; + long long size = -1; + FILE *f; + char fn[64]; + int ok = 0; + int required = freep ? 2 : 1; + + if (freep) + *freep = -1; + sprintf(fn,"/sys/devices/system/node/node%d/meminfo", node); + f = fopen(fn, "r"); + if (!f) + return -1; + while (getdelim(&line, &len, '\n', f) > 0) { + char *end; + char *s = strcasestr(line, "kB"); + if (!s) + continue; + --s; + while (s > line && isspace(*s)) + --s; + while (s > line && isdigit(*s)) + --s; + if (strstr(line, "MemTotal")) { + size = strtoull(s,&end,0) << 10; + if (end == s) + size = -1; + else + ok++; + } + if (freep && strstr(line, "MemFree")) { + *freep = strtoull(s,&end,0) << 10; + if (end == s) + *freep = -1; + else + ok++; + } + } + fclose(f); + free(line); + if (ok != required) + numa_warn(W_badmeminfo, "Cannot parse sysfs meminfo (%d)", ok); + return size; +} + +make_internal_alias(numa_node_size64); + +long numa_node_size(int node, long *freep) +{ + long long f2; + long sz = numa_node_size64_int(node, &f2); + if (freep) + *freep = f2; + return sz; +} + +int numa_available(void) +{ + if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS) + return -1; + return 0; +} + +SYMVER("numa_interleave_memory_v1", "numa_interleave_memory@libnuma_1.1") +void +numa_interleave_memory_v1(void *mem, size_t size, const nodemask_t *mask) +{ + struct bitmask bitmask; + + bitmask.size = sizeof(nodemask_t) * 8; + bitmask.maskp = (unsigned long *)mask; + dombind(mem, size, MPOL_INTERLEAVE, &bitmask); +} + +SYMVER("numa_interleave_memory_v2", "numa_interleave_memory@@libnuma_1.2") +void +numa_interleave_memory_v2(void *mem, size_t size, struct bitmask *bmp) +{ + dombind(mem, size, MPOL_INTERLEAVE, bmp); +} + +void numa_tonode_memory(void *mem, size_t size, int node) +{ + struct bitmask *nodes; + + nodes = numa_allocate_nodemask(); + numa_bitmask_setbit(nodes, node); + dombind(mem, size, bind_policy, nodes); + numa_bitmask_free(nodes); +} + +SYMVER("numa_tonodemask_memory_v1", "numa_tonodemask_memory@libnuma_1.1") +void +numa_tonodemask_memory_v1(void *mem, size_t size, const nodemask_t *mask) +{ + struct bitmask bitmask; + + bitmask.maskp = (unsigned long *)mask; + bitmask.size = sizeof(nodemask_t); + dombind(mem, size, bind_policy, &bitmask); +} + +SYMVER("numa_tonodemask_memory_v2", "numa_tonodemask_memory@@libnuma_1.2") +void +numa_tonodemask_memory_v2(void *mem, size_t size, struct bitmask *bmp) +{ + dombind(mem, size, bind_policy, bmp); +} + +void numa_setlocal_memory(void *mem, size_t size) +{ + dombind(mem, size, MPOL_LOCAL, NULL); +} + +void numa_police_memory(void *mem, size_t size) +{ + int pagesize = numa_pagesize_int(); + unsigned long i; + char *p = mem; + for (i = 0; i < size; i += pagesize, p += pagesize) + __atomic_and_fetch(p, 0xff, __ATOMIC_RELAXED); + +} + +make_internal_alias(numa_police_memory); + +void *numa_alloc(size_t size) +{ + char *mem; + mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, + 0, 0); + if (mem == (char *)-1) + return NULL; + numa_police_memory_int(mem, size); + return mem; +} + +void *numa_realloc(void *old_addr, size_t old_size, size_t new_size) +{ + char *mem; + mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE); + if (mem == (char *)-1) + return NULL; + /* + * The memory policy of the allocated pages is preserved by mremap(), so + * there is no need to (re)set it here. If the policy of the original + * allocation is not set, the new pages will be allocated according to the + * process' mempolicy. Trying to allocate explicitly the new pages on the + * same node as the original ones would require changing the policy of the + * newly allocated pages, which violates the numa_realloc() semantics. + */ + return mem; +} + +SYMVER("numa_alloc_interleaved_subset_v1", "numa_alloc_interleaved_subset@libnuma_1.1") +void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask) +{ + char *mem; + struct bitmask bitmask; + + mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, + 0, 0); + if (mem == (char *)-1) + return NULL; + bitmask.maskp = (unsigned long *)mask; + bitmask.size = sizeof(nodemask_t); + dombind(mem, size, MPOL_INTERLEAVE, &bitmask); + return mem; +} + +SYMVER("numa_alloc_interleaved_subset_v2", "numa_alloc_interleaved_subset@@libnuma_1.2") +void *numa_alloc_interleaved_subset_v2(size_t size, struct bitmask *bmp) +{ + char *mem; + + mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, + 0, 0); + if (mem == (char *)-1) + return NULL; + dombind(mem, size, MPOL_INTERLEAVE, bmp); + return mem; +} + +make_internal_alias(numa_alloc_interleaved_subset_v1); +make_internal_alias(numa_alloc_interleaved_subset_v2); + +void * +numa_alloc_interleaved(size_t size) +{ + return numa_alloc_interleaved_subset_v2_int(size, numa_all_nodes_ptr); +} + +/* + * given a user node mask, set memory policy to use those nodes + */ +SYMVER("numa_set_interleave_mask_v1", "numa_set_interleave_mask@libnuma_1.1") +void +numa_set_interleave_mask_v1(nodemask_t *mask) +{ + struct bitmask *bmp; + int nnodes = numa_max_possible_node_v1_int()+1; + + bmp = numa_bitmask_alloc(nnodes); + copy_nodemask_to_bitmask(mask, bmp); + if (numa_bitmask_equal(bmp, numa_no_nodes_ptr)) + setpol(MPOL_DEFAULT, bmp); + else + setpol(MPOL_INTERLEAVE, bmp); + numa_bitmask_free(bmp); +} + + +SYMVER("numa_set_interleave_mask_v2", "numa_set_interleave_mask@@libnuma_1.2") +void +numa_set_interleave_mask_v2(struct bitmask *bmp) +{ + if (numa_bitmask_equal(bmp, numa_no_nodes_ptr)) + setpol(MPOL_DEFAULT, bmp); + else + setpol(MPOL_INTERLEAVE, bmp); +} + +SYMVER("numa_get_interleave_mask_v1", "numa_get_interleave_mask@libnuma_1.1") +nodemask_t +numa_get_interleave_mask_v1(void) +{ + int oldpolicy; + struct bitmask *bmp; + nodemask_t mask; + + bmp = allocate_nodemask_v1(); + getpol(&oldpolicy, bmp); + if (oldpolicy == MPOL_INTERLEAVE) + copy_bitmask_to_nodemask(bmp, &mask); + else + copy_bitmask_to_nodemask(numa_no_nodes_ptr, &mask); + numa_bitmask_free(bmp); + return mask; +} + +SYMVER("numa_get_interleave_mask_v2", "numa_get_interleave_mask@@libnuma_1.2") +struct bitmask * +numa_get_interleave_mask_v2(void) +{ + int oldpolicy; + struct bitmask *bmp; + + bmp = numa_allocate_nodemask(); + getpol(&oldpolicy, bmp); + if (oldpolicy != MPOL_INTERLEAVE) + copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp); + return bmp; +} + +/* (undocumented) */ +int numa_get_interleave_node(void) +{ + int nd; + if (get_mempolicy(&nd, NULL, 0, 0, MPOL_F_NODE) == 0) + return nd; + return 0; +} + +void *numa_alloc_onnode(size_t size, int node) +{ + char *mem; + struct bitmask *bmp; + + bmp = numa_allocate_nodemask(); + numa_bitmask_setbit(bmp, node); + mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, + 0, 0); + if (mem == (char *)-1) + mem = NULL; + else + dombind(mem, size, bind_policy, bmp); + numa_bitmask_free(bmp); + return mem; +} + +void *numa_alloc_local(size_t size) +{ + char *mem; + mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, + 0, 0); + if (mem == (char *)-1) + mem = NULL; + else + dombind(mem, size, MPOL_LOCAL, NULL); + return mem; +} + +void numa_set_bind_policy(int strict) +{ + if (strict) + bind_policy = MPOL_BIND; + else if (has_preferred_many) + bind_policy = MPOL_PREFERRED_MANY; + else + bind_policy = MPOL_PREFERRED; +} + +SYMVER("numa_set_membind_v1", "numa_set_membind@libnuma_1.1") +void +numa_set_membind_v1(const nodemask_t *mask) +{ + struct bitmask bitmask; + + bitmask.maskp = (unsigned long *)mask; + bitmask.size = sizeof(nodemask_t); + setpol(MPOL_BIND, &bitmask); +} + +SYMVER("numa_set_membind_v2", "numa_set_membind@@libnuma_1.2") +void +numa_set_membind_v2(struct bitmask *bmp) +{ + setpol(MPOL_BIND, bmp); +} + +make_internal_alias(numa_set_membind_v2); + +void +numa_set_membind_balancing(struct bitmask *bmp) +{ + /* MPOL_F_NUMA_BALANCING: ignore if unsupported */ + if (set_mempolicy(MPOL_BIND | MPOL_F_NUMA_BALANCING, + bmp->maskp, bmp->size + 1) < 0) { + if (errno == EINVAL) { + errno = 0; + numa_set_membind_v2(bmp); + } else + numa_error("set_mempolicy"); + } +} + +/* + * copy a bitmask map body to a numa.h nodemask_t structure + */ +void +copy_bitmask_to_nodemask(struct bitmask *bmp, nodemask_t *nmp) +{ + int max, i; + + memset(nmp, 0, sizeof(nodemask_t)); + max = (sizeof(nodemask_t)*8); + for (i=0; i<bmp->size; i++) { + if (i >= max) + break; + if (numa_bitmask_isbitset(bmp, i)) + nodemask_set_compat((nodemask_t *)nmp, i); + } +} + +/* + * copy a bitmask map body to another bitmask body + * fill a larger destination with zeroes + */ +void +copy_bitmask_to_bitmask(struct bitmask *bmpfrom, struct bitmask *bmpto) +{ + int bytes; + + if (bmpfrom->size >= bmpto->size) { + memcpy(bmpto->maskp, bmpfrom->maskp, CPU_BYTES(bmpto->size)); + } else if (bmpfrom->size < bmpto->size) { + bytes = CPU_BYTES(bmpfrom->size); + memcpy(bmpto->maskp, bmpfrom->maskp, bytes); + memset(((char *)bmpto->maskp)+bytes, 0, + CPU_BYTES(bmpto->size)-bytes); + } +} + +/* + * copy a numa.h nodemask_t structure to a bitmask map body + */ +void +copy_nodemask_to_bitmask(nodemask_t *nmp, struct bitmask *bmp) +{ + int max, i; + + numa_bitmask_clearall(bmp); + max = (sizeof(nodemask_t)*8); + if (max > bmp->size) + max = bmp->size; + for (i=0; i<max; i++) { + if (nodemask_isset_compat(nmp, i)) + numa_bitmask_setbit(bmp, i); + } +} + +SYMVER("numa_get_membind_v1", "numa_get_membind@libnuma_1.1") +nodemask_t +numa_get_membind_v1(void) +{ + int oldpolicy; + struct bitmask *bmp; + nodemask_t nmp; + + bmp = allocate_nodemask_v1(); + getpol(&oldpolicy, bmp); + if (oldpolicy == MPOL_BIND) { + copy_bitmask_to_nodemask(bmp, &nmp); + } else { + /* copy the body of the map to numa_all_nodes */ + copy_bitmask_to_nodemask(bmp, &numa_all_nodes); + nmp = numa_all_nodes; + } + numa_bitmask_free(bmp); + return nmp; +} + +SYMVER("numa_get_membind_v2", "numa_get_membind@@libnuma_1.2") +struct bitmask * +numa_get_membind_v2(void) +{ + int oldpolicy; + struct bitmask *bmp; + + bmp = numa_allocate_nodemask(); + getpol(&oldpolicy, bmp); + if (oldpolicy != MPOL_BIND) + copy_bitmask_to_bitmask(numa_all_nodes_ptr, bmp); + return bmp; +} + +//TODO: do we need a v1 nodemask_t version? +struct bitmask *numa_get_mems_allowed(void) +{ + struct bitmask *bmp; + + /* + * can change, so query on each call. + */ + bmp = numa_allocate_nodemask(); + if (get_mempolicy(NULL, bmp->maskp, bmp->size + 1, 0, + MPOL_F_MEMS_ALLOWED) < 0) + numa_error("get_mempolicy"); + return bmp; +} +make_internal_alias(numa_get_mems_allowed); + +void numa_free(void *mem, size_t size) +{ + munmap(mem, size); +} + +SYMVER("numa_parse_bitmap_v1", "numa_parse_bitmap@libnuma_1.1") +int +numa_parse_bitmap_v1(char *line, unsigned long *mask, int ncpus) +{ + int i; + char *p = strchr(line, '\n'); + if (!p) + return -1; + + for (i = 0; p > line;i++) { + char *oldp, *endp; + oldp = p; + if (*p == ',') + --p; + while (p > line && *p != ',') + --p; + /* Eat two 32bit fields at a time to get longs */ + if (p > line && sizeof(unsigned long) == 8) { + oldp--; + memmove(p, p+1, oldp-p+1); + while (p > line && *p != ',') + --p; + } + if (*p == ',') + p++; + if (i >= CPU_LONGS(ncpus)) + return -1; + mask[i] = strtoul(p, &endp, 16); + if (endp != oldp) + return -1; + p--; + } + return 0; +} + +SYMVER("numa_parse_bitmap_v2", "numa_parse_bitmap@@libnuma_1.2") +int +numa_parse_bitmap_v2(char *line, struct bitmask *mask) +{ + int i, ncpus; + char *p = strchr(line, '\n'); + if (!p) + return -1; + ncpus = mask->size; + + for (i = 0; p > line;i++) { + char *oldp, *endp; + oldp = p; + if (*p == ',') + --p; + while (p > line && *p != ',') + --p; + /* Eat two 32bit fields at a time to get longs */ + if (p > line && sizeof(unsigned long) == 8) { + oldp--; + memmove(p, p+1, oldp-p+1); + while (p > line && *p != ',') + --p; + } + if (*p == ',') + p++; + if (i >= CPU_LONGS(ncpus)) + return -1; + mask->maskp[i] = strtoul(p, &endp, 16); + if (endp != oldp) + return -1; + p--; + } + return 0; +} + +static void init_node_cpu_mask_v2(void) +{ + int nnodes = numa_max_possible_node_v2_int() + 1; + node_cpu_mask_v2 = calloc (nnodes, sizeof(struct bitmask *)); +} + +static void cleanup_node_cpu_mask_v2(void) +{ + if (node_cpu_mask_v2) { + int i; + int nnodes; + nnodes = numa_max_possible_node_v2_int() + 1; + for (i = 0; i < nnodes; i++) { + FREE_AND_ZERO(node_cpu_mask_v2[i]); + } + free(node_cpu_mask_v2); + node_cpu_mask_v2 = NULL; + } +} + +/* This would be better with some locking, but I don't want to make libnuma + dependent on pthreads right now. The races are relatively harmless. */ +SYMVER("numa_node_to_cpus_v1", "numa_node_to_cpus@libnuma_1.1") +int +numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen) +{ + int err = 0; + char fn[64]; + FILE *f; + char update; + char *line = NULL; + size_t len = 0; + struct bitmask bitmask; + int buflen_needed; + unsigned long *mask; + int ncpus = numa_num_possible_cpus(); + int maxnode = numa_max_node_int(); + + buflen_needed = CPU_BYTES(ncpus); + if ((unsigned)node > maxnode || bufferlen < buflen_needed) { + errno = ERANGE; + return -1; + } + if (bufferlen > buflen_needed) + memset(buffer, 0, bufferlen); + update = __atomic_fetch_and(&node_cpu_mask_v1_stale, 0, __ATOMIC_RELAXED); + if (node_cpu_mask_v1[node] && !update) { + memcpy(buffer, node_cpu_mask_v1[node], buflen_needed); + return 0; + } + + mask = malloc(buflen_needed); + if (!mask) + mask = (unsigned long *)buffer; + memset(mask, 0, buflen_needed); + + sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node); + f = fopen(fn, "r"); + if (!f || getdelim(&line, &len, '\n', f) < 1) { + if (numa_bitmask_isbitset(numa_nodes_ptr, node)) { + numa_warn(W_nosysfs2, + "/sys not mounted or invalid. Assuming one node: %s", + strerror(errno)); + numa_warn(W_nosysfs2, + "(cannot open or correctly parse %s)", fn); + } + bitmask.maskp = (unsigned long *)mask; + bitmask.size = buflen_needed * 8; + numa_bitmask_setall(&bitmask); + err = -1; + } + if (f) + fclose(f); + + if (line && (numa_parse_bitmap_v1(line, mask, ncpus) < 0)) { + numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node"); + bitmask.maskp = (unsigned long *)mask; + bitmask.size = buflen_needed * 8; + numa_bitmask_setall(&bitmask); + err = -1; + } + + free(line); + memcpy(buffer, mask, buflen_needed); + + /* slightly racy, see above */ + if (node_cpu_mask_v1[node]) { + if (update) { + /* + * There may be readers on node_cpu_mask_v1[], hence it can not + * be freed. + */ + memcpy(node_cpu_mask_v1[node], mask, buflen_needed); + free(mask); + mask = NULL; + } else if (mask != buffer) + free(mask); + } else { + node_cpu_mask_v1[node] = mask; + } + return err; +} + +/* + * test whether a node has cpus + */ +/* This would be better with some locking, but I don't want to make libnuma + dependent on pthreads right now. The races are relatively harmless. */ +/* + * deliver a bitmask of cpus representing the cpus on a given node + */ +SYMVER("numa_node_to_cpus_v2", "numa_node_to_cpus@@libnuma_1.2") +int +numa_node_to_cpus_v2(int node, struct bitmask *buffer) +{ + int err = 0; + int nnodes = numa_max_node(); + char fn[64], *line = NULL; + FILE *f; + char update; + size_t len = 0; + struct bitmask *mask; + + if (!node_cpu_mask_v2) + init_node_cpu_mask_v2(); + + if (node > nnodes) { + errno = ERANGE; + return -1; + } + numa_bitmask_clearall(buffer); + + update = __atomic_fetch_and(&node_cpu_mask_v2_stale, 0, __ATOMIC_RELAXED); + if (node_cpu_mask_v2[node] && !update) { + /* have already constructed a mask for this node */ + if (buffer->size < node_cpu_mask_v2[node]->size) { + errno = EINVAL; + numa_error("map size mismatch"); + return -1; + } + copy_bitmask_to_bitmask(node_cpu_mask_v2[node], buffer); + return 0; + } + + /* need a new mask for this node */ + mask = numa_allocate_cpumask(); + + /* this is a kernel cpumask_t (see node_read_cpumap()) */ + sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node); + f = fopen(fn, "r"); + if (!f || getdelim(&line, &len, '\n', f) < 1) { + if (numa_bitmask_isbitset(numa_nodes_ptr, node)) { + numa_warn(W_nosysfs2, + "/sys not mounted or invalid. Assuming one node: %s", + strerror(errno)); + numa_warn(W_nosysfs2, + "(cannot open or correctly parse %s)", fn); + } + numa_bitmask_setall(mask); + err = -1; + } + if (f) + fclose(f); + + if (line && (numa_parse_bitmap_v2(line, mask) < 0)) { + numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node"); + numa_bitmask_setall(mask); + err = -1; + } + + free(line); + copy_bitmask_to_bitmask(mask, buffer); + + /* slightly racy, see above */ + /* save the mask we created */ + if (node_cpu_mask_v2[node]) { + if (update) { + copy_bitmask_to_bitmask(mask, node_cpu_mask_v2[node]); + numa_bitmask_free(mask); + mask = NULL; + /* how could this be? */ + } else if (mask != buffer) + numa_bitmask_free(mask); + } else { + /* we don't want to cache faulty result */ + if (!err) + node_cpu_mask_v2[node] = mask; + else + numa_bitmask_free(mask); + } + return err; +} + +make_internal_alias(numa_node_to_cpus_v1); +make_internal_alias(numa_node_to_cpus_v2); + +void numa_node_to_cpu_update(void) +{ + __atomic_store_n(&node_cpu_mask_v1_stale, 1, __ATOMIC_RELAXED); + __atomic_store_n(&node_cpu_mask_v2_stale, 1, __ATOMIC_RELAXED); +} + +/* report the node of the specified cpu */ +int numa_node_of_cpu(int cpu) +{ + struct bitmask *bmp; + int ncpus, nnodes, node, ret; + + ncpus = numa_num_possible_cpus(); + if (cpu > ncpus){ + errno = EINVAL; + return -1; + } + bmp = numa_bitmask_alloc(ncpus); + nnodes = numa_max_node(); + for (node = 0; node <= nnodes; node++){ + if (numa_node_to_cpus_v2_int(node, bmp) < 0) { + /* It's possible for the node to not exist */ + continue; + } + if (numa_bitmask_isbitset(bmp, cpu)){ + ret = node; + goto end; + } + } + ret = -1; + errno = EINVAL; +end: + numa_bitmask_free(bmp); + return ret; +} + +SYMVER("numa_run_on_node_mask_v1", "numa_run_on_node_mask@libnuma_1.1") +int +numa_run_on_node_mask_v1(const nodemask_t *mask) +{ + int ncpus = numa_num_possible_cpus(); + int i, k, err; + unsigned long cpus[CPU_LONGS(ncpus)], nodecpus[CPU_LONGS(ncpus)]; + memset(cpus, 0, CPU_BYTES(ncpus)); + for (i = 0; i < NUMA_NUM_NODES; i++) { + if (mask->n[i / BITS_PER_LONG] == 0) + continue; + if (nodemask_isset_compat(mask, i)) { + if (numa_node_to_cpus_v1_int(i, nodecpus, CPU_BYTES(ncpus)) < 0) { + numa_warn(W_noderunmask, + "Cannot read node cpumask from sysfs"); + continue; + } + for (k = 0; k < CPU_LONGS(ncpus); k++) + cpus[k] |= nodecpus[k]; + } + } + err = numa_sched_setaffinity_v1(0, CPU_BYTES(ncpus), cpus); + + /* The sched_setaffinity API is broken because it expects + the user to guess the kernel cpuset size. Do this in a + brute force way. */ + if (err < 0 && errno == EINVAL) { + int savederrno = errno; + char *bigbuf; + static int size = -1; + if (size == -1) + size = CPU_BYTES(ncpus) * 2; + bigbuf = malloc(CPU_BUFFER_SIZE); + if (!bigbuf) { + errno = ENOMEM; + return -1; + } + errno = savederrno; + while (size <= CPU_BUFFER_SIZE) { + memcpy(bigbuf, cpus, CPU_BYTES(ncpus)); + memset(bigbuf + CPU_BYTES(ncpus), 0, + CPU_BUFFER_SIZE - CPU_BYTES(ncpus)); + err = numa_sched_setaffinity_v1_int(0, size, (unsigned long *)bigbuf); + if (err == 0 || errno != EINVAL) + break; + size *= 2; + } + savederrno = errno; + free(bigbuf); + errno = savederrno; + } + return err; +} + +/* + * Given a node mask (size of a kernel nodemask_t) (probably populated by + * a user argument list) set up a map of cpus (map "cpus") on those nodes. + * Then set affinity to those cpus. + */ +SYMVER("numa_run_on_node_mask_v2", "numa_run_on_node_mask@@libnuma_1.2") +int +numa_run_on_node_mask_v2(struct bitmask *bmp) +{ + int ncpus, i, k, err; + struct bitmask *cpus, *nodecpus; + + cpus = numa_allocate_cpumask(); + ncpus = cpus->size; + nodecpus = numa_allocate_cpumask(); + + for (i = 0; i < bmp->size; i++) { + if (bmp->maskp[i / BITS_PER_LONG] == 0) + continue; + if (numa_bitmask_isbitset(bmp, i)) { + /* + * numa_all_nodes_ptr is cpuset aware; use only + * these nodes + */ + if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) { + numa_warn(W_noderunmask, + "node %d not allowed", i); + continue; + } + if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) { + numa_warn(W_noderunmask, + "Cannot read node cpumask from sysfs"); + continue; + } + for (k = 0; k < CPU_LONGS(ncpus); k++) + cpus->maskp[k] |= nodecpus->maskp[k]; + } + } + err = numa_sched_setaffinity_v2_int(0, cpus); + + numa_bitmask_free(cpus); + numa_bitmask_free(nodecpus); + + /* used to have to consider that this could fail - it shouldn't now */ + if (err < 0) { + numa_error("numa_sched_setaffinity_v2_int() failed"); + } + + return err; +} + +make_internal_alias(numa_run_on_node_mask_v2); + +/* + * Given a node mask (size of a kernel nodemask_t) (probably populated by + * a user argument list) set up a map of cpus (map "cpus") on those nodes + * without any cpuset awareness. Then set affinity to those cpus. + */ +int +numa_run_on_node_mask_all(struct bitmask *bmp) +{ + int ncpus, i, k, err; + struct bitmask *cpus, *nodecpus; + + cpus = numa_allocate_cpumask(); + ncpus = cpus->size; + nodecpus = numa_allocate_cpumask(); + + for (i = 0; i < bmp->size; i++) { + if (bmp->maskp[i / BITS_PER_LONG] == 0) + continue; + if (numa_bitmask_isbitset(bmp, i)) { + if (!numa_bitmask_isbitset(numa_possible_nodes_ptr, i)) { + numa_warn(W_noderunmask, + "node %d not allowed", i); + continue; + } + if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) { + numa_warn(W_noderunmask, + "Cannot read node cpumask from sysfs"); + continue; + } + for (k = 0; k < CPU_LONGS(ncpus); k++) + cpus->maskp[k] |= nodecpus->maskp[k]; + } + } + err = numa_sched_setaffinity_v2_int(0, cpus); + + numa_bitmask_free(cpus); + numa_bitmask_free(nodecpus); + + /* With possible nodes freedom it can happen easily now */ + if (err < 0) { + numa_error("numa_sched_setaffinity_v2_int() failed"); + } + + return err; +} + +SYMVER("numa_get_run_node_mask_v1", "numa_get_run_node_mask@libnuma_1.1") +nodemask_t +numa_get_run_node_mask_v1(void) +{ + int ncpus = numa_num_configured_cpus(); + int i, k; + int max = numa_max_node_int(); + struct bitmask *bmp, *cpus, *nodecpus; + nodemask_t nmp; + + cpus = numa_allocate_cpumask(); + if (numa_sched_getaffinity_v2_int(0, cpus) < 0){ + nmp = numa_no_nodes; + goto free_cpus; + } + + nodecpus = numa_allocate_cpumask(); + bmp = allocate_nodemask_v1(); /* the size of a nodemask_t */ + for (i = 0; i <= max; i++) { + if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) { + /* It's possible for the node to not exist */ + continue; + } + for (k = 0; k < CPU_LONGS(ncpus); k++) { + if (nodecpus->maskp[k] & cpus->maskp[k]) + numa_bitmask_setbit(bmp, i); + } + } + copy_bitmask_to_nodemask(bmp, &nmp); + numa_bitmask_free(bmp); + numa_bitmask_free(nodecpus); +free_cpus: + numa_bitmask_free(cpus); + return nmp; +} + +SYMVER("numa_get_run_node_mask_v2", "numa_get_run_node_mask@@libnuma_1.2") +struct bitmask * +numa_get_run_node_mask_v2(void) +{ + int i, k; + int ncpus = numa_num_configured_cpus(); + int max = numa_max_node_int(); + struct bitmask *bmp, *cpus, *nodecpus; + + bmp = numa_allocate_cpumask(); + cpus = numa_allocate_cpumask(); + if (numa_sched_getaffinity_v2_int(0, cpus) < 0){ + copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp); + goto free_cpus; + } + + nodecpus = numa_allocate_cpumask(); + for (i = 0; i <= max; i++) { + /* + * numa_all_nodes_ptr is cpuset aware; show only + * these nodes + */ + if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) { + continue; + } + if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) { + /* It's possible for the node to not exist */ + continue; + } + for (k = 0; k < CPU_LONGS(ncpus); k++) { + if (nodecpus->maskp[k] & cpus->maskp[k]) + numa_bitmask_setbit(bmp, i); + } + } + numa_bitmask_free(nodecpus); +free_cpus: + numa_bitmask_free(cpus); + return bmp; +} + +int +numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bitmask *tonodes) +{ + int numa_num_nodes = numa_num_possible_nodes(); + + return migrate_pages(pid, numa_num_nodes + 1, fromnodes->maskp, + tonodes->maskp); +} + +int numa_move_pages(int pid, unsigned long count, + void **pages, const int *nodes, int *status, int flags) +{ + return move_pages(pid, count, pages, nodes, status, flags); +} + +int numa_run_on_node(int node) +{ + int numa_num_nodes = numa_num_possible_nodes(); + int ret = -1; + struct bitmask *cpus; + + if (node >= numa_num_nodes){ + errno = EINVAL; + goto out; + } + + cpus = numa_allocate_cpumask(); + + if (node == -1) + numa_bitmask_setall(cpus); + else if (numa_node_to_cpus_v2_int(node, cpus) < 0){ + numa_warn(W_noderunmask, "Cannot read node cpumask from sysfs"); + goto free; + } + + ret = numa_sched_setaffinity_v2_int(0, cpus); +free: + numa_bitmask_free(cpus); +out: + return ret; +} + +static struct bitmask *__numa_preferred(void) +{ + int policy; + struct bitmask *bmp; + + bmp = numa_allocate_nodemask(); + /* could read the current CPU from /proc/self/status. Probably + not worth it. */ + numa_bitmask_clearall(bmp); + getpol(&policy, bmp); + + if (policy != MPOL_PREFERRED && + policy != MPOL_PREFERRED_MANY && + policy != MPOL_BIND) + return bmp; + + if (numa_bitmask_weight(bmp) > 1) + numa_error(__FILE__); + + return bmp; +} + +int numa_preferred(void) +{ + int first_node = 0; + struct bitmask *bmp; + + bmp = __numa_preferred(); + first_node = numa_find_first(bmp); + numa_bitmask_free(bmp); + + return first_node; +} + +static void __numa_set_preferred(struct bitmask *bmp) +{ + int nodes = numa_bitmask_weight(bmp); + if (nodes > 1) + numa_error(__FILE__); + setpol(nodes ? MPOL_PREFERRED : MPOL_LOCAL, bmp); +} + +void numa_set_preferred(int node) +{ + struct bitmask *bmp = numa_allocate_nodemask(); + numa_bitmask_setbit(bmp, node); + __numa_set_preferred(bmp); + numa_bitmask_free(bmp); +} + +int numa_has_preferred_many(void) +{ + return has_preferred_many; +} + +void numa_set_preferred_many(struct bitmask *bitmask) +{ + int first_node = 0; + + if (!has_preferred_many) { + numa_warn(W_nodeparse, + "Unable to handle MANY preferred nodes. Falling back to first node\n"); + first_node = numa_find_first(bitmask); + numa_set_preferred(first_node); + return; + } + setpol(MPOL_PREFERRED_MANY, bitmask); +} + +struct bitmask *numa_preferred_many() +{ + return __numa_preferred(); +} + +void numa_set_localalloc(void) +{ + setpol(MPOL_LOCAL, numa_no_nodes_ptr); +} + +SYMVER("numa_bind_v1", "numa_bind@libnuma_1.1") +void numa_bind_v1(const nodemask_t *nodemask) +{ + struct bitmask bitmask; + + bitmask.maskp = (unsigned long *)nodemask; + bitmask.size = sizeof(nodemask_t); + numa_run_on_node_mask_v2_int(&bitmask); + numa_set_membind_v2_int(&bitmask); +} + +SYMVER("numa_bind_v2", "numa_bind@@libnuma_1.2") +void numa_bind_v2(struct bitmask *bmp) +{ + numa_run_on_node_mask_v2_int(bmp); + numa_set_membind_v2_int(bmp); +} + +void numa_set_strict(int flag) +{ + if (flag) + mbind_flags |= MPOL_MF_STRICT; + else + mbind_flags &= ~MPOL_MF_STRICT; +} + +/* + * Extract a node or processor number from the given string. + * Allow a relative node / processor specification within the allowed + * set if "relative" is nonzero + */ +static unsigned long get_nr(const char *s, char **end, struct bitmask *bmp, int relative) +{ + long i, nr; + + if (!relative) + return strtoul(s, end, 0); + + nr = strtoul(s, end, 0); + if (s == *end) + return nr; + /* Find the nth set bit */ + for (i = 0; nr >= 0 && i <= bmp->size; i++) + if (numa_bitmask_isbitset(bmp, i)) + nr--; + return i-1; +} + +/* + * __numa_parse_nodestring() is called to create a node mask, given + * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10. + * (the + indicates that the numbers are nodeset-relative) + * + * The nodes may be specified as absolute, or relative to the current nodeset. + * The list of available nodes is in a map pointed to by "allowed_nodes_ptr", + * which may represent all nodes or the nodes in the current nodeset. + * + * The caller must free the returned bitmask. + */ +static struct bitmask * +__numa_parse_nodestring(const char *s, struct bitmask *allowed_nodes_ptr) +{ + int invert = 0, relative = 0; + int conf_nodes = numa_num_configured_nodes(); + char *end; + struct bitmask *mask; + + mask = numa_allocate_nodemask(); + + if (s[0] == 0){ + copy_bitmask_to_bitmask(numa_no_nodes_ptr, mask); + return mask; /* return freeable mask */ + } + if (*s == '!') { + invert = 1; + s++; + } + if (*s == '+') { + relative++; + s++; + } + do { + unsigned long arg; + int i; + if (isalpha(*s)) { + int n; + if (!strcmp(s,"all")) { + copy_bitmask_to_bitmask(allowed_nodes_ptr, + mask); + s+=4; + break; + } + n = resolve_affinity(s, mask); + if (n != NO_IO_AFFINITY) { + if (n < 0) + goto err; + s += strlen(s) + 1; + break; + } + } + arg = get_nr(s, &end, allowed_nodes_ptr, relative); + if (end == s) { + numa_warn(W_nodeparse, "unparseable node description `%s'\n", s); + goto err; + } + if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg)) { + numa_warn(W_nodeparse, "node argument %d is out of range\n", arg); + goto err; + } + i = arg; + numa_bitmask_setbit(mask, i); + s = end; + if (*s == '-') { + char *end2; + unsigned long arg2; + arg2 = get_nr(++s, &end2, allowed_nodes_ptr, relative); + if (end2 == s) { + numa_warn(W_nodeparse, "missing node argument %s\n", s); + goto err; + } + if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg2)) { + numa_warn(W_nodeparse, "node argument %d out of range\n", arg2); + goto err; + } + while (arg <= arg2) { + i = arg; + if (numa_bitmask_isbitset(allowed_nodes_ptr,i)) + numa_bitmask_setbit(mask, i); + arg++; + } + s = end2; + } + } while (*s++ == ','); + if (s[-1] != '\0') + goto err; + if (invert) { + int i; + for (i = 0; i < conf_nodes; i++) { + if (numa_bitmask_isbitset(mask, i)) + numa_bitmask_clearbit(mask, i); + else + numa_bitmask_setbit(mask, i); + } + } + return mask; + +err: + numa_bitmask_free(mask); + return NULL; +} + +/* + * numa_parse_nodestring() is called to create a bitmask from nodes available + * for this task. + */ + +struct bitmask * numa_parse_nodestring(const char *s) +{ + return __numa_parse_nodestring(s, numa_all_nodes_ptr); +} + +/* + * numa_parse_nodestring_all() is called to create a bitmask from all nodes + * available. + */ + +struct bitmask * numa_parse_nodestring_all(const char *s) +{ + return __numa_parse_nodestring(s, numa_possible_nodes_ptr); +} + +/* + * __numa_parse_cpustring() is called to create a bitmask, given + * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10. + * (the + indicates that the numbers are cpuset-relative) + * + * The cpus may be specified as absolute, or relative to the current cpuset. + * The list of available cpus for this task is in the map pointed to by + * "allowed_cpus_ptr", which may represent all cpus or the cpus in the + * current cpuset. + * + * The caller must free the returned bitmask. + */ +static struct bitmask * +__numa_parse_cpustring(const char *s, struct bitmask *allowed_cpus_ptr) +{ + int invert = 0, relative=0; + int conf_cpus = numa_num_configured_cpus(); + char *end; + struct bitmask *mask; + int i; + + mask = numa_allocate_cpumask(); + + if (s[0] == 0) + return mask; + if (*s == '!') { + invert = 1; + s++; + } + if (*s == '+') { + relative++; + s++; + } + do { + unsigned long arg; + + if (!strcmp(s,"all")) { + copy_bitmask_to_bitmask(allowed_cpus_ptr, mask); + s+=4; + break; + } + arg = get_nr(s, &end, allowed_cpus_ptr, relative); + if (end == s) { + numa_warn(W_cpuparse, "unparseable cpu description `%s'\n", s); + goto err; + } + if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg)) { + numa_warn(W_cpuparse, "cpu argument %s is out of range\n", s); + goto err; + } + i = arg; + numa_bitmask_setbit(mask, i); + s = end; + if (*s == '-') { + char *end2; + unsigned long arg2; + arg2 = get_nr(++s, &end2, allowed_cpus_ptr, relative); + if (end2 == s) { + numa_warn(W_cpuparse, "missing cpu argument %s\n", s); + goto err; + } + if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg2)) { + numa_warn(W_cpuparse, "cpu argument %s out of range\n", s); + goto err; + } + while (arg <= arg2) { + i = arg; + if (numa_bitmask_isbitset(allowed_cpus_ptr, i)) + numa_bitmask_setbit(mask, i); + arg++; + } + s = end2; + } + } while (*s++ == ','); + if (s[-1] != '\0') + goto err; + if (invert) { + for (i = 0; i < conf_cpus; i++) { + if (numa_bitmask_isbitset(mask, i)) + numa_bitmask_clearbit(mask, i); + else + numa_bitmask_setbit(mask, i); + } + } + return mask; + +err: + numa_bitmask_free(mask); + return NULL; +} + +/* + * numa_parse_cpustring() is called to create a bitmask from cpus available + * for this task. + */ + +struct bitmask * numa_parse_cpustring(const char *s) +{ + return __numa_parse_cpustring(s, numa_all_cpus_ptr); +} + +/* + * numa_parse_cpustring_all() is called to create a bitmask from all cpus + * available. + */ + +struct bitmask * numa_parse_cpustring_all(const char *s) +{ + return __numa_parse_cpustring(s, numa_possible_cpus_ptr); +} diff --git a/contrib/libs/numa/numa.h b/contrib/libs/numa/numa.h new file mode 100644 index 0000000000..b4fcd1f8e9 --- /dev/null +++ b/contrib/libs/numa/numa.h @@ -0,0 +1,494 @@ +/* Copyright (C) 2003,2004 Andi Kleen, SuSE Labs. + + libnuma is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; version + 2.1. + + libnuma is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should find a copy of v2.1 of the GNU Lesser General Public License + somewhere on your Linux system; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _NUMA_H +#define _NUMA_H 1 + +/* allow an application to test for the current programming interface: */ +#define LIBNUMA_API_VERSION 2 + +/* Simple NUMA policy library */ + +#include <stddef.h> +#include <string.h> +#include <sys/types.h> +#include <stdlib.h> + +#if defined(__x86_64__) || defined(__i386__) +#define NUMA_NUM_NODES 128 +#else +#define NUMA_NUM_NODES 2048 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + unsigned long n[NUMA_NUM_NODES/(sizeof(unsigned long)*8)]; +} nodemask_t; + +struct bitmask { + unsigned long size; /* number of bits in the map */ + unsigned long *maskp; +}; + +/* operations on struct bitmask */ +int numa_bitmask_isbitset(const struct bitmask *, unsigned int); +struct bitmask *numa_bitmask_setall(struct bitmask *); +struct bitmask *numa_bitmask_clearall(struct bitmask *); +struct bitmask *numa_bitmask_setbit(struct bitmask *, unsigned int); +struct bitmask *numa_bitmask_clearbit(struct bitmask *, unsigned int); +unsigned int numa_bitmask_nbytes(struct bitmask *); +unsigned int numa_bitmask_weight(const struct bitmask *); +struct bitmask *numa_bitmask_alloc(unsigned int); +void numa_bitmask_free(struct bitmask *); +int numa_bitmask_equal(const struct bitmask *, const struct bitmask *); +void copy_nodemask_to_bitmask(nodemask_t *, struct bitmask *); +void copy_bitmask_to_nodemask(struct bitmask *, nodemask_t *); +void copy_bitmask_to_bitmask(struct bitmask *, struct bitmask *); + +/* compatibility for codes that used them: */ + +static inline void nodemask_zero(nodemask_t *mask) +{ + struct bitmask tmp; + + tmp.maskp = (unsigned long *)mask; + tmp.size = sizeof(nodemask_t) * 8; + numa_bitmask_clearall(&tmp); +} + +static inline void nodemask_zero_compat(nodemask_t *mask) +{ + struct bitmask tmp; + + tmp.maskp = (unsigned long *)mask; + tmp.size = sizeof(nodemask_t) * 8; + numa_bitmask_clearall(&tmp); +} + +static inline void nodemask_set_compat(nodemask_t *mask, int node) +{ + mask->n[node / (8*sizeof(unsigned long))] |= + (1UL<<(node%(8*sizeof(unsigned long)))); +} + +static inline void nodemask_clr_compat(nodemask_t *mask, int node) +{ + mask->n[node / (8*sizeof(unsigned long))] &= + ~(1UL<<(node%(8*sizeof(unsigned long)))); +} + +static inline int nodemask_isset_compat(const nodemask_t *mask, int node) +{ + if ((unsigned)node >= NUMA_NUM_NODES) + return 0; + if (mask->n[node / (8*sizeof(unsigned long))] & + (1UL<<(node%(8*sizeof(unsigned long))))) + return 1; + return 0; +} + +static inline int nodemask_equal(const nodemask_t *a, const nodemask_t *b) +{ + struct bitmask tmp_a, tmp_b; + + tmp_a.maskp = (unsigned long *)a; + tmp_a.size = sizeof(nodemask_t) * 8; + + tmp_b.maskp = (unsigned long *)b; + tmp_b.size = sizeof(nodemask_t) * 8; + + return numa_bitmask_equal(&tmp_a, &tmp_b); +} + +static inline int nodemask_equal_compat(const nodemask_t *a, const nodemask_t *b) +{ + struct bitmask tmp_a, tmp_b; + + tmp_a.maskp = (unsigned long *)a; + tmp_a.size = sizeof(nodemask_t) * 8; + + tmp_b.maskp = (unsigned long *)b; + tmp_b.size = sizeof(nodemask_t) * 8; + + return numa_bitmask_equal(&tmp_a, &tmp_b); +} + +/* NUMA support available. If this returns a negative value all other function + in this library are undefined. */ +int numa_available(void); + +/* Basic NUMA state */ + +/* Get max available node */ +int numa_max_node(void); +int numa_max_possible_node(void); +/* Return preferred node */ +int numa_preferred(void); + +/* Return node size and free memory */ +long long numa_node_size64(int node, long long *freep); +long numa_node_size(int node, long *freep); + +int numa_pagesize(void); + +/* Set with all nodes from which the calling process may allocate memory. + Only valid after numa_available. */ +extern struct bitmask *numa_all_nodes_ptr; + +/* Set with all nodes the kernel has exposed to userspace */ +extern struct bitmask *numa_nodes_ptr; + +/* For source compatibility */ +extern nodemask_t numa_all_nodes; + +/* Set with all cpus. */ +extern struct bitmask *numa_all_cpus_ptr; + +/* Set with no nodes */ +extern struct bitmask *numa_no_nodes_ptr; + +/* Source compatibility */ +extern nodemask_t numa_no_nodes; + +/* Only run and allocate memory from a specific set of nodes. */ +void numa_bind(struct bitmask *nodes); + +/* Set the NUMA node interleaving mask. 0 to turn off interleaving */ +void numa_set_interleave_mask(struct bitmask *nodemask); + +/* Return the current interleaving mask */ +struct bitmask *numa_get_interleave_mask(void); + +/* allocate a bitmask big enough for all nodes */ +struct bitmask *numa_allocate_nodemask(void); + +static inline void numa_free_nodemask(struct bitmask *b) +{ + numa_bitmask_free(b); +} + +/* Some node to preferably allocate memory from for task. */ +void numa_set_preferred(int node); + +/* Returns whether or not the platform supports MPOL_PREFERRED_MANY */ +int numa_has_preferred_many(void); + +/* Set of nodes to preferably allocate memory from for task. */ +void numa_set_preferred_many(struct bitmask *bitmask); + +/* Return preferred nodes */ +struct bitmask *numa_preferred_many(void); + +/* Set local memory allocation policy for task */ +void numa_set_localalloc(void); + +/* Only allocate memory from the nodes set in mask. 0 to turn off */ +void numa_set_membind(struct bitmask *nodemask); + +/* Only allocate memory from the nodes set in mask. Optimize page + placement with Linux kernel NUMA balancing if possible. 0 to turn off */ +void numa_set_membind_balancing(struct bitmask *bmp); + +/* Return current membind */ +struct bitmask *numa_get_membind(void); + +/* Return allowed memories [nodes] */ +struct bitmask *numa_get_mems_allowed(void); + +int numa_get_interleave_node(void); + +/* NUMA memory allocation. These functions always round to page size + and are relatively slow. */ + +/* Alloc memory page interleaved on nodes in mask */ +void *numa_alloc_interleaved_subset(size_t size, struct bitmask *nodemask); +/* Alloc memory page interleaved on all nodes. */ +void *numa_alloc_interleaved(size_t size); +/* Alloc memory located on node */ +void *numa_alloc_onnode(size_t size, int node); +/* Alloc memory on local node */ +void *numa_alloc_local(size_t size); +/* Allocation with current policy */ +void *numa_alloc(size_t size); +/* Change the size of a memory area preserving the memory policy */ +void *numa_realloc(void *old_addr, size_t old_size, size_t new_size); +/* Free memory allocated by the functions above */ +void numa_free(void *mem, size_t size); + +/* Low level functions, primarily for shared memory. All memory + processed by these must not be touched yet */ + +/* Interleave a memory area. */ +void numa_interleave_memory(void *mem, size_t size, struct bitmask *mask); + +/* Allocate a memory area on a specific node. */ +void numa_tonode_memory(void *start, size_t size, int node); + +/* Allocate memory on a mask of nodes. */ +void numa_tonodemask_memory(void *mem, size_t size, struct bitmask *mask); + +/* Allocate a memory area on the current node. */ +void numa_setlocal_memory(void *start, size_t size); + +/* Allocate memory area with current memory policy */ +void numa_police_memory(void *start, size_t size); + +/* Run current task only on nodes in mask */ +int numa_run_on_node_mask(struct bitmask *mask); +/* Run current task on nodes in mask without any cpuset awareness */ +int numa_run_on_node_mask_all(struct bitmask *mask); +/* Run current task only on node */ +int numa_run_on_node(int node); +/* Return current mask of nodes the task can run on */ +struct bitmask * numa_get_run_node_mask(void); + +/* When strict fail allocation when memory cannot be allocated in target node(s). */ +void numa_set_bind_policy(int strict); + +/* Fail when existing memory has incompatible policy */ +void numa_set_strict(int flag); + +/* maximum nodes (size of kernel nodemask_t) */ +int numa_num_possible_nodes(void); + +/* maximum cpus (size of kernel cpumask_t) */ +int numa_num_possible_cpus(void); + +/* nodes in the system */ +int numa_num_configured_nodes(void); + +/* maximum cpus */ +int numa_num_configured_cpus(void); + +/* maximum cpus allowed to current task */ +int numa_num_task_cpus(void); +int numa_num_thread_cpus(void); /* backward compatibility */ + +/* maximum nodes allowed to current task */ +int numa_num_task_nodes(void); +int numa_num_thread_nodes(void); /* backward compatibility */ + +/* allocate a bitmask the size of the kernel cpumask_t */ +struct bitmask *numa_allocate_cpumask(void); + +static inline void numa_free_cpumask(struct bitmask *b) +{ + numa_bitmask_free(b); +} + +/* Convert node to CPU mask. -1/errno on failure, otherwise 0. */ +int numa_node_to_cpus(int, struct bitmask *); + +void numa_node_to_cpu_update(void); + +/* report the node of the specified cpu. -1/errno on invalid cpu. */ +int numa_node_of_cpu(int cpu); + +/* Report distance of node1 from node2. 0 on error.*/ +int numa_distance(int node1, int node2); + +/* Error handling. */ +/* This is an internal function in libnuma that can be overwritten by an user + program. Default is to print an error to stderr and exit if numa_exit_on_error + is true. */ +void numa_error(char *where); + +/* When true exit the program when a NUMA system call (except numa_available) + fails */ +extern int numa_exit_on_error; +/* Warning function. Can also be overwritten. Default is to print on stderr + once. */ +void numa_warn(int num, char *fmt, ...); + +/* When true exit the program on a numa_warn() call */ +extern int numa_exit_on_warn; + +int numa_migrate_pages(int pid, struct bitmask *from, struct bitmask *to); + +int numa_move_pages(int pid, unsigned long count, void **pages, + const int *nodes, int *status, int flags); + +int numa_sched_getaffinity(pid_t, struct bitmask *); +int numa_sched_setaffinity(pid_t, struct bitmask *); + +/* Convert an ascii list of nodes to a bitmask */ +struct bitmask *numa_parse_nodestring(const char *); + +/* Convert an ascii list of nodes to a bitmask without current nodeset + * dependency */ +struct bitmask *numa_parse_nodestring_all(const char *); + +/* Convert an ascii list of cpu to a bitmask */ +struct bitmask *numa_parse_cpustring(const char *); + +/* Convert an ascii list of cpu to a bitmask without current taskset + * dependency */ +struct bitmask *numa_parse_cpustring_all(const char *); + +/* + * The following functions are for source code compatibility + * with releases prior to version 2. + * Such codes should be compiled with NUMA_VERSION1_COMPATIBILITY defined. + */ + +static inline void numa_set_interleave_mask_compat(nodemask_t *nodemask) +{ + struct bitmask tmp; + + tmp.maskp = (unsigned long *)nodemask; + tmp.size = sizeof(nodemask_t) * 8; + numa_set_interleave_mask(&tmp); +} + +static inline nodemask_t numa_get_interleave_mask_compat(void) +{ + struct bitmask *tp; + nodemask_t mask; + + tp = numa_get_interleave_mask(); + copy_bitmask_to_nodemask(tp, &mask); + numa_bitmask_free(tp); + return mask; +} + +static inline void numa_bind_compat(nodemask_t *mask) +{ + struct bitmask *tp; + + tp = numa_allocate_nodemask(); + copy_nodemask_to_bitmask(mask, tp); + numa_bind(tp); + numa_bitmask_free(tp); +} + +static inline void numa_set_membind_compat(nodemask_t *mask) +{ + struct bitmask tmp; + + tmp.maskp = (unsigned long *)mask; + tmp.size = sizeof(nodemask_t) * 8; + numa_set_membind(&tmp); +} + +static inline nodemask_t numa_get_membind_compat(void) +{ + struct bitmask *tp; + nodemask_t mask; + + tp = numa_get_membind(); + copy_bitmask_to_nodemask(tp, &mask); + numa_bitmask_free(tp); + return mask; +} + +static inline void *numa_alloc_interleaved_subset_compat(size_t size, + const nodemask_t *mask) +{ + struct bitmask tmp; + + tmp.maskp = (unsigned long *)mask; + tmp.size = sizeof(nodemask_t) * 8; + return numa_alloc_interleaved_subset(size, &tmp); +} + +static inline int numa_run_on_node_mask_compat(const nodemask_t *mask) +{ + struct bitmask tmp; + + tmp.maskp = (unsigned long *)mask; + tmp.size = sizeof(nodemask_t) * 8; + return numa_run_on_node_mask(&tmp); +} + +static inline nodemask_t numa_get_run_node_mask_compat(void) +{ + struct bitmask *tp; + nodemask_t mask; + + tp = numa_get_run_node_mask(); + copy_bitmask_to_nodemask(tp, &mask); + numa_bitmask_free(tp); + return mask; +} + +static inline void numa_interleave_memory_compat(void *mem, size_t size, + const nodemask_t *mask) +{ + struct bitmask tmp; + + tmp.maskp = (unsigned long *)mask; + tmp.size = sizeof(nodemask_t) * 8; + numa_interleave_memory(mem, size, &tmp); +} + +static inline void numa_tonodemask_memory_compat(void *mem, size_t size, + const nodemask_t *mask) +{ + struct bitmask tmp; + + tmp.maskp = (unsigned long *)mask; + tmp.size = sizeof(nodemask_t) * 8; + numa_tonodemask_memory(mem, size, &tmp); +} + +static inline int numa_sched_getaffinity_compat(pid_t pid, unsigned len, + unsigned long *mask) +{ + struct bitmask tmp; + + tmp.maskp = (unsigned long *)mask; + tmp.size = len * 8; + return numa_sched_getaffinity(pid, &tmp); +} + +static inline int numa_sched_setaffinity_compat(pid_t pid, unsigned len, + unsigned long *mask) +{ + struct bitmask tmp; + + tmp.maskp = (unsigned long *)mask; + tmp.size = len * 8; + return numa_sched_setaffinity(pid, &tmp); +} + +static inline int numa_node_to_cpus_compat(int node, unsigned long *buffer, + int buffer_len) +{ + struct bitmask tmp; + + tmp.maskp = (unsigned long *)buffer; + tmp.size = buffer_len * 8; + return numa_node_to_cpus(node, &tmp); +} + +/* end of version 1 compatibility functions */ + +/* + * To compile an application that uses libnuma version 1: + * add -DNUMA_VERSION1_COMPATIBILITY to your Makefile's CFLAGS + */ +#ifdef NUMA_VERSION1_COMPATIBILITY +#error #include <numacompat1.h> +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/libs/numa/numaif.h b/contrib/libs/numa/numaif.h new file mode 100644 index 0000000000..3208fe9774 --- /dev/null +++ b/contrib/libs/numa/numaif.h @@ -0,0 +1,52 @@ +#ifndef NUMAIF_H +#define NUMAIF_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* Kernel interface for NUMA API */ + +/* System calls */ +extern long get_mempolicy(int *mode, unsigned long *nmask, + unsigned long maxnode, void *addr, unsigned flags); +extern long mbind(void *start, unsigned long len, int mode, + const unsigned long *nmask, unsigned long maxnode, unsigned flags); +extern long set_mempolicy(int mode, const unsigned long *nmask, + unsigned long maxnode); +extern long migrate_pages(int pid, unsigned long maxnode, + const unsigned long *frommask, + const unsigned long *tomask); + +extern long move_pages(int pid, unsigned long count, + void **pages, const int *nodes, int *status, int flags); + +/* Policies */ +#define MPOL_DEFAULT 0 +#define MPOL_PREFERRED 1 +#define MPOL_BIND 2 +#define MPOL_INTERLEAVE 3 +#define MPOL_LOCAL 4 +#define MPOL_PREFERRED_MANY 5 +#define MPOL_MAX 6 + +/* Flags for set_mempolicy, specified in mode */ +#define MPOL_F_NUMA_BALANCING (1 << 13) /* Optimize with NUMA balancing if possible */ + +/* Flags for get_mem_policy */ +#define MPOL_F_NODE (1<<0) /* return next il node or node of address */ + /* Warning: MPOL_F_NODE is unsupported and + subject to change. Don't use. */ +#define MPOL_F_ADDR (1<<1) /* look up vma using address */ +#define MPOL_F_MEMS_ALLOWED (1<<2) /* query nodes allowed in cpuset */ + +/* Flags for mbind */ +#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */ +#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */ +#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/libs/numa/numaint.h b/contrib/libs/numa/numaint.h new file mode 100644 index 0000000000..e9cd385735 --- /dev/null +++ b/contrib/libs/numa/numaint.h @@ -0,0 +1,57 @@ +/* Internal interfaces of libnuma */ + +extern int numa_sched_setaffinity_v1(pid_t pid, unsigned len, const unsigned long *mask); +extern int numa_sched_getaffinity_v1(pid_t pid, unsigned len, const unsigned long *mask); +extern int numa_sched_setaffinity_v1_int(pid_t pid, unsigned len,const unsigned long *mask); +extern int numa_sched_getaffinity_v1_int(pid_t pid, unsigned len,const unsigned long *mask); +extern int numa_sched_setaffinity_v2(pid_t pid, struct bitmask *mask); +extern int numa_sched_getaffinity_v2(pid_t pid, struct bitmask *mask); +extern int numa_sched_setaffinity_v2_int(pid_t pid, struct bitmask *mask); +extern int numa_sched_getaffinity_v2_int(pid_t pid, struct bitmask *mask); + +#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ + +#define BITS_PER_LONG (sizeof(unsigned long) * 8) +#define CPU_BYTES(x) (round_up(x, BITS_PER_LONG)/8) +#define CPU_LONGS(x) (CPU_BYTES(x) / sizeof(long)) + +#define make_internal_alias(x) extern __typeof (x) x##_int __attribute((alias(#x), visibility("hidden"))) +#define hidden __attribute__((visibility("hidden"))) + +enum numa_warn { + W_nosysfs, + W_noproc, + W_badmeminfo, + W_nosysfs2, + W_cpumap, + W_numcpus, + W_noderunmask, + W_distance, + W_memory, + W_cpuparse, + W_nodeparse, + W_blockdev1, + W_blockdev2, + W_blockdev3, + W_blockdev4, + W_blockdev5, + W_netlink1, + W_netlink2, + W_netlink3, + W_net1, + W_net2, + W_class1, + W_class2, + W_pci1, + W_pci2, + W_node_parse1, + W_node_parse2, + W_nonode, + W_badchar, +}; + +#define howmany(x,y) (((x)+((y)-1))/(y)) +#define bitsperlong (8 * sizeof(unsigned long)) +#define bitsperint (8 * sizeof(unsigned int)) +#define longsperbits(n) howmany(n, bitsperlong) +#define bytesperbits(x) ((x+7)/8) diff --git a/contrib/libs/numa/rtnetlink.c b/contrib/libs/numa/rtnetlink.c new file mode 100644 index 0000000000..985f74a657 --- /dev/null +++ b/contrib/libs/numa/rtnetlink.c @@ -0,0 +1,89 @@ +/* Simple LPGLed rtnetlink library */ +#include <sys/socket.h> +#include <linux/rtnetlink.h> +#include <linux/netlink.h> +#include <netinet/in.h> +#include <errno.h> +#include <unistd.h> +#define hidden __attribute__((visibility("hidden"))) +#include "rtnetlink.h" + +hidden void *rta_put(struct nlmsghdr *m, int type, int len) +{ + struct rtattr *rta = (void *)m + NLMSG_ALIGN(m->nlmsg_len); + int rtalen = RTA_LENGTH(len); + + rta->rta_type = type; + rta->rta_len = rtalen; + m->nlmsg_len = NLMSG_ALIGN(m->nlmsg_len) + RTA_ALIGN(rtalen); + return RTA_DATA(rta); +} + +hidden struct rtattr *rta_get(struct nlmsghdr *m, struct rtattr *p, int offset) +{ + struct rtattr *rta; + + if (p) { + rta = RTA_NEXT(p, m->nlmsg_len); + if (!RTA_OK(rta, m->nlmsg_len)) + return NULL; + } else { + rta = (void *)m + NLMSG_ALIGN(offset); + } + return rta; +} + +hidden int +rta_put_address(struct nlmsghdr *msg, int type, struct sockaddr *adr) +{ + switch (adr->sa_family) { + case AF_INET: { + struct in_addr *i = rta_put(msg, type, 4); + *i = ((struct sockaddr_in *)adr)->sin_addr; + break; + } + case AF_INET6: { + struct in6_addr *i6 = rta_put(msg, type, 16); + *i6 = ((struct sockaddr_in6 *)adr)->sin6_addr; + break; + } + default: + return -1; + } + return 0; +} + +/* Assumes no truncation. Make the buffer large enough. */ +hidden int +rtnetlink_request(struct nlmsghdr *msg, int buflen, struct sockaddr_nl *adr) +{ + int rsk; + int n; + int e; + + /* Use a private socket to avoid having to keep state + for a sequence number. */ + rsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (rsk < 0) + return -1; + n = sendto(rsk, msg, msg->nlmsg_len, 0, (struct sockaddr *)adr, + sizeof(struct sockaddr_nl)); + if (n >= 0) { + socklen_t adrlen = sizeof(struct sockaddr_nl); + n = recvfrom(rsk, msg, buflen, 0, (struct sockaddr *)adr, + &adrlen); + } + e = errno; + close(rsk); + errno = e; + if (n < 0) + return -1; + /* Assume we only get a single reply back. This is (hopefully?) + safe because it's a single use socket. */ + if (msg->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err = NLMSG_DATA(msg); + errno = -err->error; + return -1; + } + return 0; +} diff --git a/contrib/libs/numa/syscall.c b/contrib/libs/numa/syscall.c new file mode 100644 index 0000000000..45d983053e --- /dev/null +++ b/contrib/libs/numa/syscall.c @@ -0,0 +1,284 @@ +/* Copyright (C) 2003,2004 Andi Kleen, SuSE Labs. + + libnuma is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; version + 2.1. + + libnuma is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should find a copy of v2.1 of the GNU Lesser General Public License + somewhere on your Linux system; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <unistd.h> +#include <sys/types.h> +#include <asm/unistd.h> +#include <errno.h> +#include "numa.h" +#include "numaif.h" +#include "numaint.h" +#include "config.h" +#include "util.h" + +#define WEAK __attribute__((weak)) + +#if !defined(__NR_mbind) || !defined(__NR_set_mempolicy) || \ + !defined(__NR_get_mempolicy) || !defined(__NR_migrate_pages) || \ + !defined(__NR_move_pages) + +#if defined(__x86_64__) + +#define __NR_sched_setaffinity 203 +#define __NR_sched_getaffinity 204 + +/* Official allocation */ + +#define __NR_mbind 237 +#define __NR_set_mempolicy 238 +#define __NR_get_mempolicy 239 +#define __NR_migrate_pages 256 +#define __NR_move_pages 279 + +#elif defined(__ia64__) +#define __NR_sched_setaffinity 1231 +#define __NR_sched_getaffinity 1232 +#define __NR_migrate_pages 1280 +#define __NR_move_pages 1276 + +/* Official allocation */ + +#define __NR_mbind 1259 +#define __NR_get_mempolicy 1260 +#define __NR_set_mempolicy 1261 + +#elif defined(__i386__) + +#define __NR_mbind 274 +#define __NR_get_mempolicy 275 +#define __NR_set_mempolicy 276 +#define __NR_migrate_pages 294 +#define __NR_move_pages 317 + +#elif defined(__powerpc__) + +#define __NR_mbind 259 +#define __NR_get_mempolicy 260 +#define __NR_set_mempolicy 261 +#define __NR_migrate_pages 258 +/* FIXME: powerpc is missing move pages!!! +#define __NR_move_pages xxx +*/ + +#elif defined(__loongarch__) + +//reference to /usr/include/asm-generic/unistd.h + +#define __NR_mbind 235 +#define __NR_get_mempolicy 236 +#define __NR_set_mempolicy 237 +#define __NR_migrate_pages 238 +#define __NR_move_pages 239 + +#elif defined(__mips__) + +#if _MIPS_SIM == _ABIO32 +/* + * Linux o32 style syscalls are in the range from 4000 to 4999. + */ +#define __NR_Linux 4000 +#define __NR_mbind (__NR_Linux + 268) +#define __NR_get_mempolicy (__NR_Linux + 269) +#define __NR_set_mempolicy (__NR_Linux + 270) +#define __NR_migrate_pages (__NR_Linux + 287) +#endif + +#if _MIPS_SIM == _ABI64 +/* + * Linux 64-bit syscalls are in the range from 5000 to 5999. + */ +#define __NR_Linux 5000 +#define __NR_mbind (__NR_Linux + 227) +#define __NR_get_mempolicy (__NR_Linux + 228) +#define __NR_set_mempolicy (__NR_Linux + 229) +#define __NR_migrate_pages (__NR_Linux + 246) +#endif + +#if _MIPS_SIM == _ABIN32 +/* + * Linux N32 syscalls are in the range from 6000 to 6999. + */ +#define __NR_Linux 6000 +#define __NR_mbind (__NR_Linux + 231) +#define __NR_get_mempolicy (__NR_Linux + 232) +#define __NR_set_mempolicy (__NR_Linux + 233) +#define __NR_migrate_pages (__NR_Linux + 250) +#endif + +#elif defined(__hppa__) + +#define __NR_migrate_pages 272 + +#elif defined(__arm__) +/* https://bugs.debian.org/796802 */ +#warning "ARM does not implement the migrate_pages() syscall" + +#elif defined(__s390x__) + +#define __NR_mbind 268 +#define __NR_get_mempolicy 269 +#define __NR_set_mempolicy 270 +#define __NR_migrate_pages 287 +#define __NR_move_pages 310 + +#elif !defined(DEPS_RUN) +#error "Add syscalls for your architecture or update kernel headers" +#endif + +#endif + +#ifndef __GLIBC_PREREQ +# define __GLIBC_PREREQ(x,y) 0 +#endif + +#if defined(__GLIBC__) && __GLIBC_PREREQ(2, 11) + +/* glibc 2.11 seems to have working 6 argument sycall. Use the + glibc supplied syscall in this case. + The version cut-off is rather arbitrary and could be probably + earlier. */ + +#define syscall6 syscall +#elif defined(__x86_64__) +/* 6 argument calls on x86-64 are often buggy in both glibc and + asm/unistd.h. Add a working version here. */ +long syscall6(long call, long a, long b, long c, long d, long e, long f) +{ + long res; + asm volatile ("movq %[d],%%r10 ; movq %[e],%%r8 ; movq %[f],%%r9 ; syscall" + : "=a" (res) + : "0" (call),"D" (a),"S" (b), "d" (c), + [d] "g" (d), [e] "g" (e), [f] "g" (f) : + "r11","rcx","r8","r10","r9","memory" ); + if (res < 0) { + errno = -res; + res = -1; + } + return res; +} +#elif defined(__i386__) + +/* i386 has buggy syscall6 in glibc too. This is tricky to do + in inline assembly because it clobbers so many registers. Do it + out of line. */ +asm( +"__syscall6:\n" +" pushl %ebp\n" +" pushl %edi\n" +" pushl %esi\n" +" pushl %ebx\n" +" movl (0+5)*4(%esp),%eax\n" +" movl (1+5)*4(%esp),%ebx\n" +" movl (2+5)*4(%esp),%ecx\n" +" movl (3+5)*4(%esp),%edx\n" +" movl (4+5)*4(%esp),%esi\n" +" movl (5+5)*4(%esp),%edi\n" +" movl (6+5)*4(%esp),%ebp\n" +" int $0x80\n" +" popl %ebx\n" +" popl %esi\n" +" popl %edi\n" +" popl %ebp\n" +" ret" +); +extern long __syscall6(long n, long a, long b, long c, long d, long e, long f); + +long syscall6(long call, long a, long b, long c, long d, long e, long f) +{ + long res = __syscall6(call,a,b,c,d,e,f); + if (res < 0) { + errno = -res; + res = -1; + } + return res; +} + +#else +#define syscall6 syscall +#endif + +long WEAK get_mempolicy(int *policy, unsigned long *nmask, + unsigned long maxnode, void *addr, + unsigned flags) +{ + return syscall(__NR_get_mempolicy, policy, nmask, + maxnode, addr, flags); +} + +long WEAK mbind(void *start, unsigned long len, int mode, + const unsigned long *nmask, unsigned long maxnode, unsigned flags) +{ + return syscall6(__NR_mbind, (long)start, len, mode, (long)nmask, + maxnode, flags); +} + +long WEAK set_mempolicy(int mode, const unsigned long *nmask, + unsigned long maxnode) +{ + long i; + i = syscall(__NR_set_mempolicy,mode,nmask,maxnode); + return i; +} + +long WEAK migrate_pages(int pid, unsigned long maxnode, + const unsigned long *frommask, const unsigned long *tomask) +{ +#if defined(__NR_migrate_pages) + return syscall(__NR_migrate_pages, pid, maxnode, frommask, tomask); +#else + errno = ENOSYS; + return -1; +#endif +} + +long WEAK move_pages(int pid, unsigned long count, + void **pages, const int *nodes, int *status, int flags) +{ + return syscall(__NR_move_pages, pid, count, pages, nodes, status, flags); +} + +/* SLES8 glibc doesn't define those */ +SYMVER("numa_sched_setaffinity_v1", "numa_sched_setaffinity@libnuma_1.1") +int numa_sched_setaffinity_v1(pid_t pid, unsigned len, const unsigned long *mask) +{ + return syscall(__NR_sched_setaffinity,pid,len,mask); +} + +SYMVER("numa_sched_setaffinity_v2", "numa_sched_setaffinity@@libnuma_1.2") +int numa_sched_setaffinity_v2(pid_t pid, struct bitmask *mask) +{ + return syscall(__NR_sched_setaffinity, pid, numa_bitmask_nbytes(mask), + mask->maskp); +} + +SYMVER("numa_sched_getaffinity_v1", "numa_sched_getaffinity@libnuma_1.1") +int numa_sched_getaffinity_v1(pid_t pid, unsigned len, const unsigned long *mask) +{ + return syscall(__NR_sched_getaffinity,pid,len,mask); +} + +SYMVER("numa_sched_getaffinity_v2", "numa_sched_getaffinity@@libnuma_1.2") +int numa_sched_getaffinity_v2(pid_t pid, struct bitmask *mask) +{ + /* len is length in bytes */ + return syscall(__NR_sched_getaffinity, pid, numa_bitmask_nbytes(mask), + mask->maskp); + /* sched_getaffinity returns sizeof(cpumask_t) */ +} + +make_internal_alias(numa_sched_getaffinity_v1); +make_internal_alias(numa_sched_getaffinity_v2); +make_internal_alias(numa_sched_setaffinity_v1); +make_internal_alias(numa_sched_setaffinity_v2); diff --git a/contrib/libs/numa/sysfs.c b/contrib/libs/numa/sysfs.c new file mode 100644 index 0000000000..a35c4b580e --- /dev/null +++ b/contrib/libs/numa/sysfs.c @@ -0,0 +1,76 @@ +/* Utility functions for reading sysfs values */ +#define _GNU_SOURCE 1 +#include <stdio.h> +#include <sys/fcntl.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdarg.h> +#include <ctype.h> +#include "numa.h" +#include "numaint.h" + +#define SYSFS_BLOCK 4096 + +hidden char *sysfs_read(char *name) +{ + char *buf; + int n; + int fd; + + buf = malloc(SYSFS_BLOCK); + if (!buf) + return NULL; + fd = open(name, O_RDONLY); + n = read(fd, buf, SYSFS_BLOCK - 1); + close(fd); + if (n <= 0) { + free(buf); + return NULL; + } + buf[n] = 0; + return buf; +} + +hidden int sysfs_node_read(struct bitmask *mask, char *fmt, ...) +{ + int n, ret = 0; + va_list ap; + char *p, *fn, *m, *end; + int num; + + va_start(ap, fmt); + n = vasprintf(&fn, fmt, ap); + va_end(ap); + if (n < 0) + return -1; + p = sysfs_read(fn); + free(fn); + if (!p) + return -1; + + m = p; + do { + num = strtol(m, &end, 0); + if (m == end) { + ret = -1; + goto out; + } + if (num < 0) { + ret = -2; + goto out; + } + if (num >= numa_num_task_nodes()) { + ret = -1; + goto out; + } + numa_bitmask_setbit(mask, num); + + /* Continuation not supported by kernel yet. */ + m = end; + while (isspace(*m) || *m == ',') + m++; + } while (isdigit(*m)); +out: + free(p); + return ret; +} |