validate canons without yatest_common

author: qrort <qrort@yandex-team.com> 2022-11-30 23:47:12 +0300
committer: qrort <qrort@yandex-team.com> 2022-11-30 23:47:12 +0300
commit: 22f8ae0e3f5d68b92aecccdf96c1d841a0334311 (patch)
tree: bffa27765faf54126ad44bcafa89fadecb7a73d7 /contrib/libs/numa/libnuma.c
parent: 332b99e2173f0425444abb759eebcb2fafaa9209 (diff)
download: ydb-22f8ae0e3f5d68b92aecccdf96c1d841a0334311.tar.gz
1 files changed, 2166 insertions, 0 deletions
diff --git a/contrib/libs/numa/libnuma.c b/contrib/libs/numa/libnuma.c
new file mode 100644
index 00000000000..0aced8033a6
--- /dev/null
+++ b/contrib/libs/numa/libnuma.c
@@ -0,0 +1,2166 @@
+/* Simple NUMA library.
+   Copyright (C) 2003,2004,2005,2008 Andi Kleen,SuSE Labs and
+   Cliff Wickman,SGI.
+
+   libnuma is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; version
+   2.1.
+
+   libnuma is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should find a copy of v2.1 of the GNU Lesser General Public License
+   somewhere on your Linux system; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+   All calls are undefined when numa_available returns an error. */
+#define _GNU_SOURCE 1
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sched.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <assert.h>
+
+#include <sys/mman.h>
+#include <limits.h>
+
+#include "config.h"
+#include "numa.h"
+#include "numaif.h"
+#include "numaint.h"
+#include "util.h"
+#include "affinity.h"
+
+#define WEAK __attribute__((weak))
+
+#define CPU_BUFFER_SIZE 4096     /* This limits you to 32768 CPUs */
+
+/* these are the old (version 1) masks */
+nodemask_t numa_no_nodes;
+nodemask_t numa_all_nodes;
+/* these are now the default bitmask (pointers to) (version 2) */
+struct bitmask *numa_no_nodes_ptr = NULL;
+struct bitmask *numa_all_nodes_ptr = NULL;
+struct bitmask *numa_possible_nodes_ptr = NULL;
+struct bitmask *numa_all_cpus_ptr = NULL;
+struct bitmask *numa_possible_cpus_ptr = NULL;
+/* I would prefer to use symbol versioning to create v1 and v2 versions
+   of numa_no_nodes and numa_all_nodes, but the loader does not correctly
+   handle versioning of BSS versus small data items */
+
+struct bitmask *numa_nodes_ptr = NULL;
+static struct bitmask *numa_memnode_ptr = NULL;
+static unsigned long *node_cpu_mask_v1[NUMA_NUM_NODES];
+static char node_cpu_mask_v1_stale = 1;
+static struct bitmask **node_cpu_mask_v2;
+static char node_cpu_mask_v2_stale = 1;
+
+WEAK void numa_error(char *where);
+
+#ifndef TLS
+#warning "not threadsafe"
+#define __thread
+#endif
+
+static __thread int bind_policy = MPOL_BIND;
+static __thread unsigned int mbind_flags = 0;
+static int sizes_set=0;
+static int maxconfigurednode = -1;
+static int maxconfiguredcpu = -1;
+static int numprocnode = -1;
+static int numproccpu = -1;
+static int nodemask_sz = 0;
+static int cpumask_sz = 0;
+
+static int has_preferred_many = 0;
+
+int numa_exit_on_error = 0;
+int numa_exit_on_warn = 0;
+static void set_sizes(void);
+
+/*
+ * There are two special functions, _init(void) and _fini(void), which
+ * are called automatically by the dynamic loader whenever a library is loaded.
+ *
+ * The v1 library depends upon nodemask_t's of all nodes and no nodes.
+ */
+void __attribute__((constructor))
+numa_init(void)
+{
+	int max,i;
+
+	if (sizes_set)
+		return;
+
+	set_sizes();
+	/* numa_all_nodes should represent existing nodes on this system */
+        max = numa_num_configured_nodes();
+        for (i = 0; i < max; i++)
+                nodemask_set_compat((nodemask_t *)&numa_all_nodes, i);
+	memset(&numa_no_nodes, 0, sizeof(numa_no_nodes));
+}
+
+static void cleanup_node_cpu_mask_v2(void);
+
+#define FREE_AND_ZERO(x) if (x) {	\
+		numa_bitmask_free(x);	\
+		x = NULL;		\
+	}
+
+void __attribute__((destructor))
+numa_fini(void)
+{
+	FREE_AND_ZERO(numa_all_cpus_ptr);
+	FREE_AND_ZERO(numa_possible_cpus_ptr);
+	FREE_AND_ZERO(numa_all_nodes_ptr);
+	FREE_AND_ZERO(numa_possible_nodes_ptr);
+	FREE_AND_ZERO(numa_no_nodes_ptr);
+	FREE_AND_ZERO(numa_memnode_ptr);
+	FREE_AND_ZERO(numa_nodes_ptr);
+	cleanup_node_cpu_mask_v2();
+}
+
+static int numa_find_first(struct bitmask *mask)
+{
+	int i;
+	for (i = 0; i < mask->size; i++)
+		if (numa_bitmask_isbitset(mask, i))
+			return i;
+	return -1;
+}
+
+/*
+ * The following bitmask declarations, bitmask_*() routines, and associated
+ * _setbit() and _getbit() routines are:
+ * Copyright (c) 2004_2007 Silicon Graphics, Inc. (SGI) All rights reserved.
+ * SGI publishes it under the terms of the GNU General Public License, v2,
+ * as published by the Free Software Foundation.
+ */
+static unsigned int
+_getbit(const struct bitmask *bmp, unsigned int n)
+{
+	if (n < bmp->size)
+		return (bmp->maskp[n/bitsperlong] >> (n % bitsperlong)) & 1;
+	else
+		return 0;
+}
+
+static void
+_setbit(struct bitmask *bmp, unsigned int n, unsigned int v)
+{
+	if (n < bmp->size) {
+		if (v)
+			bmp->maskp[n/bitsperlong] |= 1UL << (n % bitsperlong);
+		else
+			bmp->maskp[n/bitsperlong] &= ~(1UL << (n % bitsperlong));
+	}
+}
+
+int
+numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int i)
+{
+	return _getbit(bmp, i);
+}
+
+struct bitmask *
+numa_bitmask_setall(struct bitmask *bmp)
+{
+	unsigned int i;
+	for (i = 0; i < bmp->size; i++)
+		_setbit(bmp, i, 1);
+	return bmp;
+}
+
+struct bitmask *
+numa_bitmask_clearall(struct bitmask *bmp)
+{
+	unsigned int i;
+	for (i = 0; i < bmp->size; i++)
+		_setbit(bmp, i, 0);
+	return bmp;
+}
+
+struct bitmask *
+numa_bitmask_setbit(struct bitmask *bmp, unsigned int i)
+{
+	_setbit(bmp, i, 1);
+	return bmp;
+}
+
+struct bitmask *
+numa_bitmask_clearbit(struct bitmask *bmp, unsigned int i)
+{
+	_setbit(bmp, i, 0);
+	return bmp;
+}
+
+unsigned int
+numa_bitmask_nbytes(struct bitmask *bmp)
+{
+	return longsperbits(bmp->size) * sizeof(unsigned long);
+}
+
+/* where n is the number of bits in the map */
+/* This function should not exit on failure, but right now we cannot really
+   recover from this. */
+struct bitmask *
+numa_bitmask_alloc(unsigned int n)
+{
+	struct bitmask *bmp;
+
+	if (n < 1) {
+		errno = EINVAL;
+		numa_error("request to allocate mask for invalid number");
+		exit(1);
+	}
+	bmp = malloc(sizeof(*bmp));
+	if (bmp == 0)
+		goto oom;
+	bmp->size = n;
+	bmp->maskp = calloc(longsperbits(n), sizeof(unsigned long));
+	if (bmp->maskp == 0) {
+		free(bmp);
+		goto oom;
+	}
+	return bmp;
+
+oom:
+	numa_error("Out of memory allocating bitmask");
+	exit(1);
+}
+
+void
+numa_bitmask_free(struct bitmask *bmp)
+{
+	if (bmp == 0)
+		return;
+	free(bmp->maskp);
+	bmp->maskp = (unsigned long *)0xdeadcdef;  /* double free tripwire */
+	free(bmp);
+	return;
+}
+
+/* True if two bitmasks are equal */
+int
+numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask *bmp2)
+{
+	unsigned int i;
+	for (i = 0; i < bmp1->size || i < bmp2->size; i++)
+		if (_getbit(bmp1, i) != _getbit(bmp2, i))
+			return 0;
+	return 1;
+}
+
+/* Hamming Weight: number of set bits */
+unsigned int numa_bitmask_weight(const struct bitmask *bmp)
+{
+	unsigned int i;
+	unsigned int w = 0;
+	for (i = 0; i < bmp->size; i++)
+		if (_getbit(bmp, i))
+			w++;
+	return w;
+}
+
+/* *****end of bitmask_  routines ************ */
+
+/* Next two can be overwritten by the application for different error handling */
+WEAK void numa_error(char *where)
+{
+	int olde = errno;
+	perror(where);
+	if (numa_exit_on_error)
+		exit(1);
+	errno = olde;
+}
+
+WEAK void numa_warn(int num, char *fmt, ...)
+{
+	static unsigned warned;
+	va_list ap;
+	int olde = errno;
+
+	/* Give each warning only once */
+	if ((1<<num) & warned)
+		return;
+	warned |= (1<<num);
+
+	va_start(ap,fmt);
+	fprintf(stderr, "libnuma: Warning: ");
+	vfprintf(stderr, fmt, ap);
+	fputc('\n', stderr);
+	va_end(ap);
+
+	errno = olde;
+}
+
+static void setpol(int policy, struct bitmask *bmp)
+{
+	if (set_mempolicy(policy, bmp->maskp, bmp->size + 1) < 0)
+		numa_error("set_mempolicy");
+}
+
+static void getpol(int *oldpolicy, struct bitmask *bmp)
+{
+	if (get_mempolicy(oldpolicy, bmp->maskp, bmp->size + 1, 0, 0) < 0)
+		numa_error("get_mempolicy");
+}
+
+static void dombind(void *mem, size_t size, int pol, struct bitmask *bmp)
+{
+	if (mbind(mem, size, pol, bmp ? bmp->maskp : NULL, bmp ? bmp->size + 1 : 0,
+		  mbind_flags) < 0)
+		numa_error("mbind");
+}
+
+/* (undocumented) */
+/* gives the wrong answer for hugetlbfs mappings. */
+int numa_pagesize(void)
+{
+	static int pagesize;
+	if (pagesize > 0)
+		return pagesize;
+	pagesize = getpagesize();
+	return pagesize;
+}
+
+make_internal_alias(numa_pagesize);
+
+/*
+ * Find nodes (numa_nodes_ptr), nodes with memory (numa_memnode_ptr)
+ * and the highest numbered existing node (maxconfigurednode).
+ */
+static void
+set_configured_nodes(void)
+{
+	DIR *d;
+	struct dirent *de;
+	long long freep;
+
+	numa_memnode_ptr = numa_allocate_nodemask();
+	numa_nodes_ptr = numa_allocate_nodemask();
+
+	d = opendir("/sys/devices/system/node");
+	if (!d) {
+		maxconfigurednode = 0;
+	} else {
+		while ((de = readdir(d)) != NULL) {
+			int nd;
+			if (strncmp(de->d_name, "node", 4))
+				continue;
+			nd = strtoul(de->d_name+4, NULL, 0);
+			numa_bitmask_setbit(numa_nodes_ptr, nd);
+			if (numa_node_size64(nd, &freep) > 0)
+				numa_bitmask_setbit(numa_memnode_ptr, nd);
+			if (maxconfigurednode < nd)
+				maxconfigurednode = nd;
+		}
+		closedir(d);
+	}
+}
+
+/*
+ * Convert the string length of an ascii hex mask to the number
+ * of bits represented by that mask.
+ */
+static int s2nbits(const char *s)
+{
+	return strlen(s) * 32 / 9;
+}
+
+/* Is string 'pre' a prefix of string 's'? */
+static int strprefix(const char *s, const char *pre)
+{
+	return strncmp(s, pre, strlen(pre)) == 0;
+}
+
+static const char *mask_size_file = "/proc/self/status";
+static const char *nodemask_prefix = "Mems_allowed:\t";
+/*
+ * (do this the way Paul Jackson's libcpuset does it)
+ * The nodemask values in /proc/self/status are in an
+ * ascii format that uses 9 characters for each 32 bits of mask.
+ * (this could also be used to find the cpumask size)
+ */
+static void
+set_nodemask_size(void)
+{
+	FILE *fp;
+	char *buf = NULL;
+	size_t bufsize = 0;
+
+	if ((fp = fopen(mask_size_file, "r")) == NULL)
+		goto done;
+
+	while (getline(&buf, &bufsize, fp) > 0) {
+		if (strprefix(buf, nodemask_prefix)) {
+			nodemask_sz = s2nbits(buf + strlen(nodemask_prefix));
+			break;
+		}
+	}
+	free(buf);
+	fclose(fp);
+done:
+	if (nodemask_sz == 0) {/* fall back on error */
+		int pol;
+		unsigned long *mask = NULL;
+		nodemask_sz = 16;
+		do {
+			nodemask_sz <<= 1;
+			mask = realloc(mask, nodemask_sz / 8);
+			if (!mask)
+				return;
+		} while (get_mempolicy(&pol, mask, nodemask_sz + 1, 0, 0) < 0 && errno == EINVAL &&
+				nodemask_sz < 4096*8);
+		free(mask);
+	}
+}
+
+/*
+ * Read a mask consisting of a sequence of hexadecimal longs separated by
+ * commas. Order them correctly and return the number of bits set.
+ */
+static int
+read_mask(char *s, struct bitmask *bmp)
+{
+	char *end = s;
+	int tmplen = (bmp->size + bitsperint - 1) / bitsperint;
+	unsigned int tmp[tmplen];
+	unsigned int *start = tmp;
+	unsigned int i, n = 0, m = 0;
+
+	if (!s)
+		return 0;	/* shouldn't happen */
+
+	i = strtoul(s, &end, 16);
+
+	/* Skip leading zeros */
+	while (!i && *end++ == ',') {
+		i = strtoul(end, &end, 16);
+	}
+
+	if (!i)
+		/* End of string. No mask */
+		return -1;
+
+	start[n++] = i;
+	/* Read sequence of ints */
+	while (*end++ == ',') {
+		i = strtoul(end, &end, 16);
+		start[n++] = i;
+
+		/* buffer overflow */
+		if (n > tmplen)
+			return -1;
+	}
+
+	/*
+	 * Invert sequence of ints if necessary since the first int
+	 * is the highest and we put it first because we read it first.
+	 */
+	while (n) {
+		int w;
+		unsigned long x = 0;
+		/* read into long values in an endian-safe way */
+		for (w = 0; n && w < bitsperlong; w += bitsperint)
+			x |= ((unsigned long)start[n-- - 1] << w);
+
+		bmp->maskp[m++] = x;
+	}
+	/*
+	 * Return the number of bits set
+	 */
+	return numa_bitmask_weight(bmp);
+}
+
+/*
+ * Read a processes constraints in terms of nodes and cpus from
+ * /proc/self/status.
+ */
+static void
+set_task_constraints(void)
+{
+	int hicpu = maxconfiguredcpu;
+	int i;
+	char *buffer = NULL;
+	size_t buflen = 0;
+	FILE *f;
+
+	numa_all_cpus_ptr = numa_allocate_cpumask();
+	numa_possible_cpus_ptr = numa_allocate_cpumask();
+	numa_all_nodes_ptr = numa_allocate_nodemask();
+	numa_possible_nodes_ptr = numa_allocate_cpumask();
+	numa_no_nodes_ptr = numa_allocate_nodemask();
+
+	f = fopen(mask_size_file, "r");
+	if (!f) {
+		//numa_warn(W_cpumap, "Cannot parse %s", mask_size_file);
+		return;
+	}
+
+	while (getline(&buffer, &buflen, f) > 0) {
+		/* mask starts after [last] tab */
+		char  *mask = strrchr(buffer,'\t');
+
+		if (strncmp(buffer,"Cpus_allowed:",13) == 0)
+			numproccpu = read_mask(mask + 1, numa_all_cpus_ptr);
+
+		if (strncmp(buffer,"Mems_allowed:",13) == 0) {
+			numprocnode = read_mask(mask + 1, numa_all_nodes_ptr);
+		}
+	}
+	fclose(f);
+	free(buffer);
+
+	for (i = 0; i <= hicpu; i++)
+		numa_bitmask_setbit(numa_possible_cpus_ptr, i);
+	for (i = 0; i <= maxconfigurednode; i++)
+		numa_bitmask_setbit(numa_possible_nodes_ptr, i);
+
+	/*
+	 * Cpus_allowed in the kernel can be defined to all f's
+	 * i.e. it may be a superset of the actual available processors.
+	 * As such let's reduce numproccpu to the number of actual
+	 * available cpus.
+	 */
+	if (numproccpu <= 0) {
+		for (i = 0; i <= hicpu; i++)
+			numa_bitmask_setbit(numa_all_cpus_ptr, i);
+		numproccpu = hicpu+1;
+	}
+
+	if (numproccpu > hicpu+1) {
+		numproccpu = hicpu+1;
+		for (i=hicpu+1; i<numa_all_cpus_ptr->size; i++) {
+			numa_bitmask_clearbit(numa_all_cpus_ptr, i);
+		}
+	}
+
+	if (numprocnode <= 0) {
+		for (i = 0; i <= maxconfigurednode; i++)
+			numa_bitmask_setbit(numa_all_nodes_ptr, i);
+		numprocnode = maxconfigurednode + 1;
+	}
+
+	return;
+}
+
+/*
+ * Find the highest cpu number possible (in other words the size
+ * of a kernel cpumask_t (in bits) - 1)
+ */
+static void
+set_numa_max_cpu(void)
+{
+	int len = 4096;
+	int n;
+	int olde = errno;
+	struct bitmask *buffer;
+
+	do {
+		buffer = numa_bitmask_alloc(len);
+		n = numa_sched_getaffinity_v2_int(0, buffer);
+		/* on success, returns size of kernel cpumask_t, in bytes */
+		if (n < 0) {
+			if (errno == EINVAL) {
+				if (len >= 1024*1024)
+					break;
+				len *= 2;
+				numa_bitmask_free(buffer);
+				continue;
+			} else {
+				numa_warn(W_numcpus, "Unable to determine max cpu"
+					  " (sched_getaffinity: %s); guessing...",
+					  strerror(errno));
+				n = sizeof(cpu_set_t);
+				break;
+			}
+		}
+	} while (n < 0);
+	numa_bitmask_free(buffer);
+	errno = olde;
+	cpumask_sz = n*8;
+}
+
+/*
+ * get the total (configured) number of cpus - both online and offline
+ */
+static void
+set_configured_cpus(void)
+{
+	maxconfiguredcpu = sysconf(_SC_NPROCESSORS_CONF) - 1;
+	if (maxconfiguredcpu == -1)
+		numa_error("sysconf(NPROCESSORS_CONF) failed");
+}
+
+static void
+set_kernel_abi()
+{
+	int oldp;
+	struct bitmask *bmp, *tmp;
+	bmp = numa_allocate_nodemask();
+	tmp = numa_allocate_nodemask();
+
+	if (get_mempolicy(&oldp, bmp->maskp, bmp->size + 1, 0, 0) < 0)
+		goto out;
+
+	/* Assumes there's always a node 0, and it's online */
+	numa_bitmask_setbit(tmp, 0);
+	if (set_mempolicy(MPOL_PREFERRED_MANY, tmp->maskp, tmp->size) == 0) {
+		has_preferred_many++;
+		/* reset the old memory policy */
+		setpol(oldp, bmp);
+	}
+
+out:
+	numa_bitmask_free(tmp);
+	numa_bitmask_free(bmp);
+}
+
+/*
+ * Initialize all the sizes.
+ */
+static void
+set_sizes(void)
+{
+	sizes_set++;
+	set_nodemask_size();	/* size of kernel nodemask_t */
+	set_configured_nodes();	/* configured nodes listed in /sys */
+	set_numa_max_cpu();	/* size of kernel cpumask_t */
+	set_configured_cpus();	/* cpus listed in /sys/devices/system/cpu */
+	set_task_constraints(); /* cpus and nodes for current task */
+	set_kernel_abi();	/* man policy supported */
+}
+
+int
+numa_num_configured_nodes(void)
+{
+	/*
+	* NOTE: this function's behavior matches the documentation (ie: it
+	* returns a count of nodes with memory) despite the poor function
+	* naming.  We also cannot use the similarly poorly named
+	* numa_all_nodes_ptr as it only tracks nodes with memory from which
+	* the calling process can allocate.  Think sparse nodes, memory-less
+	* nodes, cpusets...
+	*/
+	int memnodecount=0, i;
+
+	for (i=0; i <= maxconfigurednode; i++) {
+		if (numa_bitmask_isbitset(numa_memnode_ptr, i))
+			memnodecount++;
+	}
+	return memnodecount;
+}
+
+int
+numa_num_configured_cpus(void)
+{
+
+	return maxconfiguredcpu+1;
+}
+
+int
+numa_num_possible_nodes(void)
+{
+	return nodemask_sz;
+}
+
+int
+numa_num_possible_cpus(void)
+{
+	return cpumask_sz;
+}
+
+int
+numa_num_task_nodes(void)
+{
+	return numprocnode;
+}
+
+/*
+ * for backward compatibility
+ */
+int
+numa_num_thread_nodes(void)
+{
+	return numa_num_task_nodes();
+}
+
+int
+numa_num_task_cpus(void)
+{
+	return numproccpu;
+}
+
+/*
+ * for backward compatibility
+ */
+int
+numa_num_thread_cpus(void)
+{
+	return numa_num_task_cpus();
+}
+
+/*
+ * Return the number of the highest node in this running system,
+ */
+int
+numa_max_node(void)
+{
+	return maxconfigurednode;
+}
+
+make_internal_alias(numa_max_node);
+
+/*
+ * Return the number of the highest possible node in a system,
+ * which for v1 is the size of a numa.h nodemask_t(in bits)-1.
+ * but for v2 is the size of a kernel nodemask_t(in bits)-1.
+ */
+SYMVER("numa_max_possible_node_v1", "numa_max_possible_node@libnuma_1.1")
+int
+numa_max_possible_node_v1(void)
+{
+	return ((sizeof(nodemask_t)*8)-1);
+}
+
+SYMVER("numa_max_possible_node_v2", "numa_max_possible_node@@libnuma_1.2")
+int
+numa_max_possible_node_v2(void)
+{
+	return numa_num_possible_nodes()-1;
+}
+
+make_internal_alias(numa_max_possible_node_v1);
+make_internal_alias(numa_max_possible_node_v2);
+
+/*
+ * Allocate a bitmask for cpus, of a size large enough to
+ * match the kernel's cpumask_t.
+ */
+struct bitmask *
+numa_allocate_cpumask()
+{
+	int ncpus = numa_num_possible_cpus();
+
+	return numa_bitmask_alloc(ncpus);
+}
+
+/*
+ * Allocate a bitmask the size of a libnuma nodemask_t
+ */
+static struct bitmask *
+allocate_nodemask_v1(void)
+{
+	int nnodes = numa_max_possible_node_v1_int()+1;
+
+	return numa_bitmask_alloc(nnodes);
+}
+
+/*
+ * Allocate a bitmask for nodes, of a size large enough to
+ * match the kernel's nodemask_t.
+ */
+struct bitmask *
+numa_allocate_nodemask(void)
+{
+	struct bitmask *bmp;
+	int nnodes = numa_max_possible_node_v2_int() + 1;
+
+	bmp = numa_bitmask_alloc(nnodes);
+	return bmp;
+}
+
+/* (cache the result?) */
+long long numa_node_size64(int node, long long *freep)
+{
+	size_t len = 0;
+	char *line = NULL;
+	long long size = -1;
+	FILE *f;
+	char fn[64];
+	int ok = 0;
+	int required = freep ? 2 : 1;
+
+	if (freep)
+		*freep = -1;
+	sprintf(fn,"/sys/devices/system/node/node%d/meminfo", node);
+	f = fopen(fn, "r");
+	if (!f)
+		return -1;
+	while (getdelim(&line, &len, '\n', f) > 0) {
+		char *end;
+		char *s = strcasestr(line, "kB");
+		if (!s)
+			continue;
+		--s;
+		while (s > line && isspace(*s))
+			--s;
+		while (s > line && isdigit(*s))
+			--s;
+		if (strstr(line, "MemTotal")) {
+			size = strtoull(s,&end,0) << 10;
+			if (end == s)
+				size = -1;
+			else
+				ok++;
+		}
+		if (freep && strstr(line, "MemFree")) {
+			*freep = strtoull(s,&end,0) << 10;
+			if (end == s)
+				*freep = -1;
+			else
+				ok++;
+		}
+	}
+	fclose(f);
+	free(line);
+	if (ok != required)
+		numa_warn(W_badmeminfo, "Cannot parse sysfs meminfo (%d)", ok);
+	return size;
+}
+
+make_internal_alias(numa_node_size64);
+
+long numa_node_size(int node, long *freep)
+{
+	long long f2;
+	long sz = numa_node_size64_int(node, &f2);
+	if (freep)
+		*freep = f2;
+	return sz;
+}
+
+int numa_available(void)
+{
+	if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS)
+		return -1;
+	return 0;
+}
+
+SYMVER("numa_interleave_memory_v1", "numa_interleave_memory@libnuma_1.1")
+void
+numa_interleave_memory_v1(void *mem, size_t size, const nodemask_t *mask)
+{
+	struct bitmask bitmask;
+
+	bitmask.size = sizeof(nodemask_t) * 8;
+	bitmask.maskp = (unsigned long *)mask;
+	dombind(mem, size, MPOL_INTERLEAVE, &bitmask);
+}
+
+SYMVER("numa_interleave_memory_v2", "numa_interleave_memory@@libnuma_1.2")
+void
+numa_interleave_memory_v2(void *mem, size_t size, struct bitmask *bmp)
+{
+	dombind(mem, size, MPOL_INTERLEAVE, bmp);
+}
+
+void numa_tonode_memory(void *mem, size_t size, int node)
+{
+	struct bitmask *nodes;
+
+	nodes = numa_allocate_nodemask();
+	numa_bitmask_setbit(nodes, node);
+	dombind(mem, size, bind_policy, nodes);
+	numa_bitmask_free(nodes);
+}
+
+SYMVER("numa_tonodemask_memory_v1", "numa_tonodemask_memory@libnuma_1.1")
+void
+numa_tonodemask_memory_v1(void *mem, size_t size, const nodemask_t *mask)
+{
+	struct bitmask bitmask;
+
+	bitmask.maskp = (unsigned long *)mask;
+	bitmask.size  = sizeof(nodemask_t);
+	dombind(mem, size,  bind_policy, &bitmask);
+}
+
+SYMVER("numa_tonodemask_memory_v2", "numa_tonodemask_memory@@libnuma_1.2")
+void
+numa_tonodemask_memory_v2(void *mem, size_t size, struct bitmask *bmp)
+{
+	dombind(mem, size,  bind_policy, bmp);
+}
+
+void numa_setlocal_memory(void *mem, size_t size)
+{
+	dombind(mem, size, MPOL_LOCAL, NULL);
+}
+
+void numa_police_memory(void *mem, size_t size)
+{
+	int pagesize = numa_pagesize_int();
+	unsigned long i;
+	char *p = mem;
+	for (i = 0; i < size; i += pagesize, p += pagesize)
+		__atomic_and_fetch(p, 0xff, __ATOMIC_RELAXED);
+
+}
+
+make_internal_alias(numa_police_memory);
+
+void *numa_alloc(size_t size)
+{
+	char *mem;
+	mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
+		   0, 0);
+	if (mem == (char *)-1)
+		return NULL;
+	numa_police_memory_int(mem, size);
+	return mem;
+}
+
+void *numa_realloc(void *old_addr, size_t old_size, size_t new_size)
+{
+	char *mem;
+	mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE);
+	if (mem == (char *)-1)
+		return NULL;
+	/*
+	 *	The memory policy of the allocated pages is preserved by mremap(), so
+	 *	there is no need to (re)set it here. If the policy of the original
+	 *	allocation is not set, the new pages will be allocated according to the
+	 *	process' mempolicy. Trying to allocate explicitly the new pages on the
+	 *	same node as the original ones would require changing the policy of the
+	 *	newly allocated pages, which violates the numa_realloc() semantics.
+	 */
+	return mem;
+}
+
+SYMVER("numa_alloc_interleaved_subset_v1", "numa_alloc_interleaved_subset@libnuma_1.1")
+void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask)
+{
+	char *mem;
+	struct bitmask bitmask;
+
+	mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
+			0, 0);
+	if (mem == (char *)-1)
+		return NULL;
+	bitmask.maskp = (unsigned long *)mask;
+	bitmask.size  = sizeof(nodemask_t);
+	dombind(mem, size, MPOL_INTERLEAVE, &bitmask);
+	return mem;
+}
+
+SYMVER("numa_alloc_interleaved_subset_v2", "numa_alloc_interleaved_subset@@libnuma_1.2")
+void *numa_alloc_interleaved_subset_v2(size_t size, struct bitmask *bmp)
+{
+	char *mem;
+
+	mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
+		   0, 0);
+	if (mem == (char *)-1)
+		return NULL;
+	dombind(mem, size, MPOL_INTERLEAVE, bmp);
+	return mem;
+}
+
+make_internal_alias(numa_alloc_interleaved_subset_v1);
+make_internal_alias(numa_alloc_interleaved_subset_v2);
+
+void *
+numa_alloc_interleaved(size_t size)
+{
+	return numa_alloc_interleaved_subset_v2_int(size, numa_all_nodes_ptr);
+}
+
+/*
+ * given a user node mask, set memory policy to use those nodes
+ */
+SYMVER("numa_set_interleave_mask_v1", "numa_set_interleave_mask@libnuma_1.1")
+void
+numa_set_interleave_mask_v1(nodemask_t *mask)
+{
+	struct bitmask *bmp;
+	int nnodes = numa_max_possible_node_v1_int()+1;
+
+	bmp = numa_bitmask_alloc(nnodes);
+	copy_nodemask_to_bitmask(mask, bmp);
+	if (numa_bitmask_equal(bmp, numa_no_nodes_ptr))
+		setpol(MPOL_DEFAULT, bmp);
+	else
+		setpol(MPOL_INTERLEAVE, bmp);
+	numa_bitmask_free(bmp);
+}
+
+
+SYMVER("numa_set_interleave_mask_v2", "numa_set_interleave_mask@@libnuma_1.2")
+void
+numa_set_interleave_mask_v2(struct bitmask *bmp)
+{
+	if (numa_bitmask_equal(bmp, numa_no_nodes_ptr))
+		setpol(MPOL_DEFAULT, bmp);
+	else
+		setpol(MPOL_INTERLEAVE, bmp);
+}
+
+SYMVER("numa_get_interleave_mask_v1", "numa_get_interleave_mask@libnuma_1.1")
+nodemask_t
+numa_get_interleave_mask_v1(void)
+{
+	int oldpolicy;
+	struct bitmask *bmp;
+	nodemask_t mask;
+
+	bmp = allocate_nodemask_v1();
+	getpol(&oldpolicy, bmp);
+	if (oldpolicy == MPOL_INTERLEAVE)
+		copy_bitmask_to_nodemask(bmp, &mask);
+	else
+		copy_bitmask_to_nodemask(numa_no_nodes_ptr, &mask);
+	numa_bitmask_free(bmp);
+	return mask;
+}
+
+SYMVER("numa_get_interleave_mask_v2", "numa_get_interleave_mask@@libnuma_1.2")
+struct bitmask *
+numa_get_interleave_mask_v2(void)
+{
+	int oldpolicy;
+	struct bitmask *bmp;
+
+	bmp = numa_allocate_nodemask();
+	getpol(&oldpolicy, bmp);
+	if (oldpolicy != MPOL_INTERLEAVE)
+		copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp);
+	return bmp;
+}
+
+/* (undocumented) */
+int numa_get_interleave_node(void)
+{
+	int nd;
+	if (get_mempolicy(&nd, NULL, 0, 0, MPOL_F_NODE) == 0)
+		return nd;
+	return 0;
+}
+
+void *numa_alloc_onnode(size_t size, int node)
+{
+	char *mem;
+	struct bitmask *bmp;
+
+	bmp = numa_allocate_nodemask();
+	numa_bitmask_setbit(bmp, node);
+	mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
+		   0, 0);
+	if (mem == (char *)-1)
+		mem = NULL;
+	else
+		dombind(mem, size, bind_policy, bmp);
+	numa_bitmask_free(bmp);
+	return mem;
+}
+
+void *numa_alloc_local(size_t size)
+{
+	char *mem;
+	mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
+		   0, 0);
+	if (mem == (char *)-1)
+		mem =  NULL;
+	else
+		dombind(mem, size, MPOL_LOCAL, NULL);
+	return mem;
+}
+
+void numa_set_bind_policy(int strict)
+{
+	if (strict)
+		bind_policy = MPOL_BIND;
+	else if (has_preferred_many)
+		bind_policy = MPOL_PREFERRED_MANY;
+	else
+		bind_policy = MPOL_PREFERRED;
+}
+
+SYMVER("numa_set_membind_v1", "numa_set_membind@libnuma_1.1")
+void
+numa_set_membind_v1(const nodemask_t *mask)
+{
+	struct bitmask bitmask;
+
+	bitmask.maskp = (unsigned long *)mask;
+	bitmask.size  = sizeof(nodemask_t);
+	setpol(MPOL_BIND, &bitmask);
+}
+
+SYMVER("numa_set_membind_v2", "numa_set_membind@@libnuma_1.2")
+void
+numa_set_membind_v2(struct bitmask *bmp)
+{
+	setpol(MPOL_BIND, bmp);
+}
+
+make_internal_alias(numa_set_membind_v2);
+
+void
+numa_set_membind_balancing(struct bitmask *bmp)
+{
+	/* MPOL_F_NUMA_BALANCING: ignore if unsupported */
+	if (set_mempolicy(MPOL_BIND | MPOL_F_NUMA_BALANCING,
+			  bmp->maskp, bmp->size + 1) < 0) {
+		if (errno == EINVAL) {
+			errno = 0;
+			numa_set_membind_v2(bmp);
+		} else
+			numa_error("set_mempolicy");
+	}
+}
+
+/*
+ * copy a bitmask map body to a numa.h nodemask_t structure
+ */
+void
+copy_bitmask_to_nodemask(struct bitmask *bmp, nodemask_t *nmp)
+{
+	int max, i;
+
+	memset(nmp, 0, sizeof(nodemask_t));
+        max = (sizeof(nodemask_t)*8);
+	for (i=0; i<bmp->size; i++) {
+		if (i >= max)
+			break;
+		if (numa_bitmask_isbitset(bmp, i))
+			nodemask_set_compat((nodemask_t *)nmp, i);
+	}
+}
+
+/*
+ * copy a bitmask map body to another bitmask body
+ * fill a larger destination with zeroes
+ */
+void
+copy_bitmask_to_bitmask(struct bitmask *bmpfrom, struct bitmask *bmpto)
+{
+	int bytes;
+
+	if (bmpfrom->size >= bmpto->size) {
+		memcpy(bmpto->maskp, bmpfrom->maskp, CPU_BYTES(bmpto->size));
+	} else if (bmpfrom->size < bmpto->size) {
+		bytes = CPU_BYTES(bmpfrom->size);
+		memcpy(bmpto->maskp, bmpfrom->maskp, bytes);
+		memset(((char *)bmpto->maskp)+bytes, 0,
+					CPU_BYTES(bmpto->size)-bytes);
+	}
+}
+
+/*
+ * copy a numa.h nodemask_t structure to a bitmask map body
+ */
+void
+copy_nodemask_to_bitmask(nodemask_t *nmp, struct bitmask *bmp)
+{
+	int max, i;
+
+	numa_bitmask_clearall(bmp);
+        max = (sizeof(nodemask_t)*8);
+	if (max > bmp->size)
+		max = bmp->size;
+	for (i=0; i<max; i++) {
+		if (nodemask_isset_compat(nmp, i))
+			numa_bitmask_setbit(bmp, i);
+	}
+}
+
+SYMVER("numa_get_membind_v1", "numa_get_membind@libnuma_1.1")
+nodemask_t
+numa_get_membind_v1(void)
+{
+	int oldpolicy;
+	struct bitmask *bmp;
+	nodemask_t nmp;
+
+	bmp = allocate_nodemask_v1();
+	getpol(&oldpolicy, bmp);
+	if (oldpolicy == MPOL_BIND) {
+		copy_bitmask_to_nodemask(bmp, &nmp);
+	} else {
+		/* copy the body of the map to numa_all_nodes */
+		copy_bitmask_to_nodemask(bmp, &numa_all_nodes);
+		nmp = numa_all_nodes;
+	}
+	numa_bitmask_free(bmp);
+	return nmp;
+}
+
+SYMVER("numa_get_membind_v2", "numa_get_membind@@libnuma_1.2")
+struct bitmask *
+numa_get_membind_v2(void)
+{
+	int oldpolicy;
+	struct bitmask *bmp;
+
+	bmp = numa_allocate_nodemask();
+	getpol(&oldpolicy, bmp);
+	if (oldpolicy != MPOL_BIND)
+		copy_bitmask_to_bitmask(numa_all_nodes_ptr, bmp);
+	return bmp;
+}
+
+//TODO:  do we need a v1 nodemask_t version?
+struct bitmask *numa_get_mems_allowed(void)
+{
+	struct bitmask *bmp;
+
+	/*
+	 * can change, so query on each call.
+	 */
+	bmp = numa_allocate_nodemask();
+	if (get_mempolicy(NULL, bmp->maskp, bmp->size + 1, 0,
+				MPOL_F_MEMS_ALLOWED) < 0)
+		numa_error("get_mempolicy");
+	return bmp;
+}
+make_internal_alias(numa_get_mems_allowed);
+
+void numa_free(void *mem, size_t size)
+{
+	munmap(mem, size);
+}
+
+SYMVER("numa_parse_bitmap_v1", "numa_parse_bitmap@libnuma_1.1")
+int
+numa_parse_bitmap_v1(char *line, unsigned long *mask, int ncpus)
+{
+	int i;
+	char *p = strchr(line, '\n');
+	if (!p)
+		return -1;
+
+	for (i = 0; p > line;i++) {
+		char *oldp, *endp;
+		oldp = p;
+		if (*p == ',')
+			--p;
+		while (p > line && *p != ',')
+			--p;
+		/* Eat two 32bit fields at a time to get longs */
+		if (p > line && sizeof(unsigned long) == 8) {
+			oldp--;
+			memmove(p, p+1, oldp-p+1);
+			while (p > line && *p != ',')
+				--p;
+		}
+		if (*p == ',')
+			p++;
+		if (i >= CPU_LONGS(ncpus))
+			return -1;
+		mask[i] = strtoul(p, &endp, 16);
+		if (endp != oldp)
+			return -1;
+		p--;
+	}
+	return 0;
+}
+
+SYMVER("numa_parse_bitmap_v2", "numa_parse_bitmap@@libnuma_1.2")
+int
+numa_parse_bitmap_v2(char *line, struct bitmask *mask)
+{
+	int i, ncpus;
+	char *p = strchr(line, '\n');
+	if (!p)
+		return -1;
+	ncpus = mask->size;
+
+	for (i = 0; p > line;i++) {
+		char *oldp, *endp;
+		oldp = p;
+		if (*p == ',')
+			--p;
+		while (p > line && *p != ',')
+			--p;
+		/* Eat two 32bit fields at a time to get longs */
+		if (p > line && sizeof(unsigned long) == 8) {
+			oldp--;
+			memmove(p, p+1, oldp-p+1);
+			while (p > line && *p != ',')
+				--p;
+		}
+		if (*p == ',')
+			p++;
+		if (i >= CPU_LONGS(ncpus))
+			return -1;
+		mask->maskp[i] = strtoul(p, &endp, 16);
+		if (endp != oldp)
+			return -1;
+		p--;
+	}
+	return 0;
+}
+
+static void init_node_cpu_mask_v2(void)
+{
+	int nnodes = numa_max_possible_node_v2_int() + 1;
+	node_cpu_mask_v2 = calloc (nnodes, sizeof(struct bitmask *));
+}
+
+static void cleanup_node_cpu_mask_v2(void)
+{
+	if (node_cpu_mask_v2) {
+		int i;
+		int nnodes;
+		nnodes = numa_max_possible_node_v2_int() + 1;
+		for (i = 0; i < nnodes; i++) {
+			FREE_AND_ZERO(node_cpu_mask_v2[i]);
+		}
+		free(node_cpu_mask_v2);
+		node_cpu_mask_v2 = NULL;
+	}
+}
+
+/* This would be better with some locking, but I don't want to make libnuma
+   dependent on pthreads right now. The races are relatively harmless. */
+SYMVER("numa_node_to_cpus_v1", "numa_node_to_cpus@libnuma_1.1")
+int
+numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen)
+{
+	int err = 0;
+	char fn[64];
+	FILE *f;
+	char update;
+	char *line = NULL;
+	size_t len = 0;
+	struct bitmask bitmask;
+	int buflen_needed;
+	unsigned long *mask;
+	int ncpus = numa_num_possible_cpus();
+	int maxnode = numa_max_node_int();
+
+	buflen_needed = CPU_BYTES(ncpus);
+	if ((unsigned)node > maxnode || bufferlen < buflen_needed) {
+		errno = ERANGE;
+		return -1;
+	}
+	if (bufferlen > buflen_needed)
+		memset(buffer, 0, bufferlen);
+	update = __atomic_fetch_and(&node_cpu_mask_v1_stale, 0, __ATOMIC_RELAXED);
+	if (node_cpu_mask_v1[node] && !update) {
+		memcpy(buffer, node_cpu_mask_v1[node], buflen_needed);
+		return 0;
+	}
+
+	mask = malloc(buflen_needed);
+	if (!mask)
+		mask = (unsigned long *)buffer;
+	memset(mask, 0, buflen_needed);
+
+	sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node);
+	f = fopen(fn, "r");
+	if (!f || getdelim(&line, &len, '\n', f) < 1) {
+		if (numa_bitmask_isbitset(numa_nodes_ptr, node)) {
+			numa_warn(W_nosysfs2,
+			   "/sys not mounted or invalid. Assuming one node: %s",
+				  strerror(errno));
+			numa_warn(W_nosysfs2,
+			   "(cannot open or correctly parse %s)", fn);
+		}
+		bitmask.maskp = (unsigned long *)mask;
+		bitmask.size  = buflen_needed * 8;
+		numa_bitmask_setall(&bitmask);
+		err = -1;
+	}
+	if (f)
+		fclose(f);
+
+	if (line && (numa_parse_bitmap_v1(line, mask, ncpus) < 0)) {
+		numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node");
+		bitmask.maskp = (unsigned long *)mask;
+		bitmask.size  = buflen_needed * 8;
+		numa_bitmask_setall(&bitmask);
+		err = -1;
+	}
+
+	free(line);
+	memcpy(buffer, mask, buflen_needed);
+
+	/* slightly racy, see above */
+	if (node_cpu_mask_v1[node]) {
+		if (update) {
+			/*
+			 * There may be readers on node_cpu_mask_v1[], hence it can not
+			 * be freed.
+			 */
+			memcpy(node_cpu_mask_v1[node], mask, buflen_needed);
+			free(mask);
+			mask = NULL;
+		} else if (mask != buffer)
+			free(mask);
+	} else {
+		node_cpu_mask_v1[node] = mask;
+	}
+	return err;
+}
+
+/*
+ * test whether a node has cpus
+ */
+/* This would be better with some locking, but I don't want to make libnuma
+   dependent on pthreads right now. The races are relatively harmless. */
+/*
+ * deliver a bitmask of cpus representing the cpus on a given node
+ */
+SYMVER("numa_node_to_cpus_v2", "numa_node_to_cpus@@libnuma_1.2")
+int
+numa_node_to_cpus_v2(int node, struct bitmask *buffer)
+{
+	int err = 0;
+	int nnodes = numa_max_node();
+	char fn[64], *line = NULL;
+	FILE *f;
+	char update;
+	size_t len = 0;
+	struct bitmask *mask;
+
+	if (!node_cpu_mask_v2)
+		init_node_cpu_mask_v2();
+
+	if (node > nnodes) {
+		errno = ERANGE;
+		return -1;
+	}
+	numa_bitmask_clearall(buffer);
+
+	update = __atomic_fetch_and(&node_cpu_mask_v2_stale, 0, __ATOMIC_RELAXED);
+	if (node_cpu_mask_v2[node] && !update) {
+		/* have already constructed a mask for this node */
+		if (buffer->size < node_cpu_mask_v2[node]->size) {
+			errno = EINVAL;
+			numa_error("map size mismatch");
+			return -1;
+		}
+		copy_bitmask_to_bitmask(node_cpu_mask_v2[node], buffer);
+		return 0;
+	}
+
+	/* need a new mask for this node */
+	mask = numa_allocate_cpumask();
+
+	/* this is a kernel cpumask_t (see node_read_cpumap()) */
+	sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node);
+	f = fopen(fn, "r");
+	if (!f || getdelim(&line, &len, '\n', f) < 1) {
+		if (numa_bitmask_isbitset(numa_nodes_ptr, node)) {
+			numa_warn(W_nosysfs2,
+			   "/sys not mounted or invalid. Assuming one node: %s",
+				  strerror(errno));
+			numa_warn(W_nosysfs2,
+			   "(cannot open or correctly parse %s)", fn);
+		}
+		numa_bitmask_setall(mask);
+		err = -1;
+	}
+	if (f)
+		fclose(f);
+
+	if (line && (numa_parse_bitmap_v2(line, mask) < 0)) {
+		numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node");
+		numa_bitmask_setall(mask);
+		err = -1;
+	}
+
+	free(line);
+	copy_bitmask_to_bitmask(mask, buffer);
+
+	/* slightly racy, see above */
+	/* save the mask we created */
+	if (node_cpu_mask_v2[node]) {
+		if (update) {
+			copy_bitmask_to_bitmask(mask, node_cpu_mask_v2[node]);
+			numa_bitmask_free(mask);
+			mask = NULL;
+		/* how could this be? */
+		} else if (mask != buffer)
+			numa_bitmask_free(mask);
+	} else {
+		/* we don't want to cache faulty result */
+		if (!err)
+			node_cpu_mask_v2[node] = mask;
+		else
+			numa_bitmask_free(mask);
+	}
+	return err;
+}
+
+make_internal_alias(numa_node_to_cpus_v1);
+make_internal_alias(numa_node_to_cpus_v2);
+
+void numa_node_to_cpu_update(void)
+{
+	__atomic_store_n(&node_cpu_mask_v1_stale, 1, __ATOMIC_RELAXED);
+	__atomic_store_n(&node_cpu_mask_v2_stale, 1, __ATOMIC_RELAXED);
+}
+
+/* report the node of the specified cpu */
+int numa_node_of_cpu(int cpu)
+{
+	struct bitmask *bmp;
+	int ncpus, nnodes, node, ret;
+
+	ncpus = numa_num_possible_cpus();
+	if (cpu > ncpus){
+		errno = EINVAL;
+		return -1;
+	}
+	bmp = numa_bitmask_alloc(ncpus);
+	nnodes = numa_max_node();
+	for (node = 0; node <= nnodes; node++){
+		if (numa_node_to_cpus_v2_int(node, bmp) < 0) {
+			/* It's possible for the node to not exist */
+			continue;
+		}
+		if (numa_bitmask_isbitset(bmp, cpu)){
+			ret = node;
+			goto end;
+		}
+	}
+	ret = -1;
+	errno = EINVAL;
+end:
+	numa_bitmask_free(bmp);
+	return ret;
+}
+
+SYMVER("numa_run_on_node_mask_v1", "numa_run_on_node_mask@libnuma_1.1")
+int
+numa_run_on_node_mask_v1(const nodemask_t *mask)
+{
+	int ncpus = numa_num_possible_cpus();
+	int i, k, err;
+	unsigned long cpus[CPU_LONGS(ncpus)], nodecpus[CPU_LONGS(ncpus)];
+	memset(cpus, 0, CPU_BYTES(ncpus));
+	for (i = 0; i < NUMA_NUM_NODES; i++) {
+		if (mask->n[i / BITS_PER_LONG] == 0)
+			continue;
+		if (nodemask_isset_compat(mask, i)) {
+			if (numa_node_to_cpus_v1_int(i, nodecpus, CPU_BYTES(ncpus)) < 0) {
+				numa_warn(W_noderunmask,
+					  "Cannot read node cpumask from sysfs");
+				continue;
+			}
+			for (k = 0; k < CPU_LONGS(ncpus); k++)
+				cpus[k] |= nodecpus[k];
+		}
+	}
+	err = numa_sched_setaffinity_v1(0, CPU_BYTES(ncpus), cpus);
+
+	/* The sched_setaffinity API is broken because it expects
+	   the user to guess the kernel cpuset size. Do this in a
+	   brute force way. */
+	if (err < 0 && errno == EINVAL) {
+		int savederrno = errno;
+		char *bigbuf;
+		static int size = -1;
+		if (size == -1)
+			size = CPU_BYTES(ncpus) * 2;
+		bigbuf = malloc(CPU_BUFFER_SIZE);
+		if (!bigbuf) {
+			errno = ENOMEM;
+			return -1;
+		}
+		errno = savederrno;
+		while (size <= CPU_BUFFER_SIZE) {
+			memcpy(bigbuf, cpus, CPU_BYTES(ncpus));
+			memset(bigbuf + CPU_BYTES(ncpus), 0,
+			       CPU_BUFFER_SIZE - CPU_BYTES(ncpus));
+			err = numa_sched_setaffinity_v1_int(0, size, (unsigned long *)bigbuf);
+			if (err == 0 || errno != EINVAL)
+				break;
+			size *= 2;
+		}
+		savederrno = errno;
+		free(bigbuf);
+		errno = savederrno;
+	}
+	return err;
+}
+
+/*
+ * Given a node mask (size of a kernel nodemask_t) (probably populated by
+ * a user argument list) set up a map of cpus (map "cpus") on those nodes.
+ * Then set affinity to those cpus.
+ */
+SYMVER("numa_run_on_node_mask_v2", "numa_run_on_node_mask@@libnuma_1.2")
+int
+numa_run_on_node_mask_v2(struct bitmask *bmp)
+{
+	int ncpus, i, k, err;
+	struct bitmask *cpus, *nodecpus;
+
+	cpus = numa_allocate_cpumask();
+	ncpus = cpus->size;
+	nodecpus = numa_allocate_cpumask();
+
+	for (i = 0; i < bmp->size; i++) {
+		if (bmp->maskp[i / BITS_PER_LONG] == 0)
+			continue;
+		if (numa_bitmask_isbitset(bmp, i)) {
+			/*
+			 * numa_all_nodes_ptr is cpuset aware; use only
+			 * these nodes
+			 */
+			if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) {
+				numa_warn(W_noderunmask,
+					"node %d not allowed", i);
+				continue;
+			}
+			if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
+				numa_warn(W_noderunmask,
+					"Cannot read node cpumask from sysfs");
+				continue;
+			}
+			for (k = 0; k < CPU_LONGS(ncpus); k++)
+				cpus->maskp[k] |= nodecpus->maskp[k];
+		}
+	}
+	err = numa_sched_setaffinity_v2_int(0, cpus);
+
+	numa_bitmask_free(cpus);
+	numa_bitmask_free(nodecpus);
+
+	/* used to have to consider that this could fail - it shouldn't now */
+	if (err < 0) {
+		numa_error("numa_sched_setaffinity_v2_int() failed");
+	}
+
+	return err;
+}
+
+make_internal_alias(numa_run_on_node_mask_v2);
+
+/*
+ * Given a node mask (size of a kernel nodemask_t) (probably populated by
+ * a user argument list) set up a map of cpus (map "cpus") on those nodes
+ * without any cpuset awareness. Then set affinity to those cpus.
+ */
+int
+numa_run_on_node_mask_all(struct bitmask *bmp)
+{
+	int ncpus, i, k, err;
+	struct bitmask *cpus, *nodecpus;
+
+	cpus = numa_allocate_cpumask();
+	ncpus = cpus->size;
+	nodecpus = numa_allocate_cpumask();
+
+	for (i = 0; i < bmp->size; i++) {
+		if (bmp->maskp[i / BITS_PER_LONG] == 0)
+			continue;
+		if (numa_bitmask_isbitset(bmp, i)) {
+			if (!numa_bitmask_isbitset(numa_possible_nodes_ptr, i)) {
+				numa_warn(W_noderunmask,
+					"node %d not allowed", i);
+				continue;
+			}
+			if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
+				numa_warn(W_noderunmask,
+					"Cannot read node cpumask from sysfs");
+				continue;
+			}
+			for (k = 0; k < CPU_LONGS(ncpus); k++)
+				cpus->maskp[k] |= nodecpus->maskp[k];
+		}
+	}
+	err = numa_sched_setaffinity_v2_int(0, cpus);
+
+	numa_bitmask_free(cpus);
+	numa_bitmask_free(nodecpus);
+
+	/* With possible nodes freedom it can happen easily now */
+	if (err < 0) {
+		numa_error("numa_sched_setaffinity_v2_int() failed");
+	}
+
+	return err;
+}
+
+SYMVER("numa_get_run_node_mask_v1", "numa_get_run_node_mask@libnuma_1.1")
+nodemask_t
+numa_get_run_node_mask_v1(void)
+{
+	int ncpus = numa_num_configured_cpus();
+	int i, k;
+	int max = numa_max_node_int();
+	struct bitmask *bmp, *cpus, *nodecpus;
+	nodemask_t nmp;
+
+	cpus = numa_allocate_cpumask();
+	if (numa_sched_getaffinity_v2_int(0, cpus) < 0){
+		nmp = numa_no_nodes;
+		goto free_cpus;
+	}
+
+	nodecpus = numa_allocate_cpumask();
+	bmp = allocate_nodemask_v1(); /* the size of a nodemask_t */
+	for (i = 0; i <= max; i++) {
+		if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
+			/* It's possible for the node to not exist */
+			continue;
+		}
+		for (k = 0; k < CPU_LONGS(ncpus); k++) {
+			if (nodecpus->maskp[k] & cpus->maskp[k])
+				numa_bitmask_setbit(bmp, i);
+		}
+	}
+	copy_bitmask_to_nodemask(bmp, &nmp);
+	numa_bitmask_free(bmp);
+	numa_bitmask_free(nodecpus);
+free_cpus:
+	numa_bitmask_free(cpus);
+	return nmp;
+}
+
+SYMVER("numa_get_run_node_mask_v2", "numa_get_run_node_mask@@libnuma_1.2")
+struct bitmask *
+numa_get_run_node_mask_v2(void)
+{
+	int i, k;
+	int ncpus = numa_num_configured_cpus();
+	int max = numa_max_node_int();
+	struct bitmask *bmp, *cpus, *nodecpus;
+
+	bmp = numa_allocate_cpumask();
+	cpus = numa_allocate_cpumask();
+	if (numa_sched_getaffinity_v2_int(0, cpus) < 0){
+		copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp);
+		goto free_cpus;
+	}
+
+	nodecpus = numa_allocate_cpumask();
+	for (i = 0; i <= max; i++) {
+		/*
+		 * numa_all_nodes_ptr is cpuset aware; show only
+		 * these nodes
+		 */
+		if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) {
+			continue;
+		}
+		if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
+			/* It's possible for the node to not exist */
+			continue;
+		}
+		for (k = 0; k < CPU_LONGS(ncpus); k++) {
+			if (nodecpus->maskp[k] & cpus->maskp[k])
+				numa_bitmask_setbit(bmp, i);
+		}
+	}
+	numa_bitmask_free(nodecpus);
+free_cpus:
+	numa_bitmask_free(cpus);
+	return bmp;
+}
+
+int
+numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bitmask *tonodes)
+{
+	int numa_num_nodes = numa_num_possible_nodes();
+
+	return migrate_pages(pid, numa_num_nodes + 1, fromnodes->maskp,
+							tonodes->maskp);
+}
+
+int numa_move_pages(int pid, unsigned long count,
+	void **pages, const int *nodes, int *status, int flags)
+{
+	return move_pages(pid, count, pages, nodes, status, flags);
+}
+
+int numa_run_on_node(int node)
+{
+	int numa_num_nodes = numa_num_possible_nodes();
+	int ret = -1;
+	struct bitmask *cpus;
+
+	if (node >= numa_num_nodes){
+		errno = EINVAL;
+		goto out;
+	}
+
+	cpus = numa_allocate_cpumask();
+
+	if (node == -1)
+		numa_bitmask_setall(cpus);
+	else if (numa_node_to_cpus_v2_int(node, cpus) < 0){
+		numa_warn(W_noderunmask, "Cannot read node cpumask from sysfs");
+		goto free;
+	}
+
+	ret = numa_sched_setaffinity_v2_int(0, cpus);
+free:
+	numa_bitmask_free(cpus);
+out:
+	return ret;
+}
+
+static struct bitmask *__numa_preferred(void)
+{
+	int policy;
+	struct bitmask *bmp;
+
+	bmp = numa_allocate_nodemask();
+	/* could read the current CPU from /proc/self/status. Probably
+	   not worth it. */
+	numa_bitmask_clearall(bmp);
+	getpol(&policy, bmp);
+
+	if (policy != MPOL_PREFERRED &&
+			policy != MPOL_PREFERRED_MANY &&
+			policy != MPOL_BIND)
+		return bmp;
+
+	if (numa_bitmask_weight(bmp) > 1)
+		numa_error(__FILE__);
+
+	return bmp;
+}
+
+int numa_preferred(void)
+{
+	int first_node = 0;
+	struct bitmask *bmp;
+
+	bmp = __numa_preferred();
+	first_node = numa_find_first(bmp);
+	numa_bitmask_free(bmp);
+	
+	return first_node;
+}
+
+static void __numa_set_preferred(struct bitmask *bmp)
+{
+	int nodes = numa_bitmask_weight(bmp);
+	if (nodes > 1)
+		numa_error(__FILE__);
+	setpol(nodes ? MPOL_PREFERRED : MPOL_LOCAL, bmp);
+}
+
+void numa_set_preferred(int node)
+{
+	struct bitmask *bmp = numa_allocate_nodemask();
+	numa_bitmask_setbit(bmp, node);
+	__numa_set_preferred(bmp);
+	numa_bitmask_free(bmp);
+}
+
+int numa_has_preferred_many(void)
+{
+	return has_preferred_many;
+}
+
+void numa_set_preferred_many(struct bitmask *bitmask)
+{
+	int first_node = 0;
+
+	if (!has_preferred_many) {
+		numa_warn(W_nodeparse,
+			"Unable to handle MANY preferred nodes. Falling back to first node\n");
+		first_node = numa_find_first(bitmask);
+		numa_set_preferred(first_node);
+		return;
+	}
+	setpol(MPOL_PREFERRED_MANY, bitmask);
+}
+
+struct bitmask *numa_preferred_many()
+{
+	return __numa_preferred();
+}
+
+void numa_set_localalloc(void)
+{
+	setpol(MPOL_LOCAL, numa_no_nodes_ptr);
+}
+
+SYMVER("numa_bind_v1", "numa_bind@libnuma_1.1")
+void numa_bind_v1(const nodemask_t *nodemask)
+{
+	struct bitmask bitmask;
+
+	bitmask.maskp = (unsigned long *)nodemask;
+	bitmask.size  = sizeof(nodemask_t);
+	numa_run_on_node_mask_v2_int(&bitmask);
+	numa_set_membind_v2_int(&bitmask);
+}
+
+SYMVER("numa_bind_v2", "numa_bind@@libnuma_1.2")
+void numa_bind_v2(struct bitmask *bmp)
+{
+	numa_run_on_node_mask_v2_int(bmp);
+	numa_set_membind_v2_int(bmp);
+}
+
+void numa_set_strict(int flag)
+{
+	if (flag)
+		mbind_flags |= MPOL_MF_STRICT;
+	else
+		mbind_flags &= ~MPOL_MF_STRICT;
+}
+
+/*
+ * Extract a node or processor number from the given string.
+ * Allow a relative node / processor specification within the allowed
+ * set if "relative" is nonzero
+ */
+static unsigned long get_nr(const char *s, char **end, struct bitmask *bmp, int relative)
+{
+	long i, nr;
+
+	if (!relative)
+		return strtoul(s, end, 0);
+
+	nr = strtoul(s, end, 0);
+	if (s == *end)
+		return nr;
+	/* Find the nth set bit */
+	for (i = 0; nr >= 0 && i <= bmp->size; i++)
+		if (numa_bitmask_isbitset(bmp, i))
+			nr--;
+	return i-1;
+}
+
+/*
+ * __numa_parse_nodestring() is called to create a node mask, given
+ * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10.
+ * (the + indicates that the numbers are nodeset-relative)
+ *
+ * The nodes may be specified as absolute, or relative to the current nodeset.
+ * The list of available nodes is in a map pointed to by "allowed_nodes_ptr",
+ * which may represent all nodes or the nodes in the current nodeset.
+ *
+ * The caller must free the returned bitmask.
+ */
+static struct bitmask *
+__numa_parse_nodestring(const char *s, struct bitmask *allowed_nodes_ptr)
+{
+	int invert = 0, relative = 0;
+	int conf_nodes = numa_num_configured_nodes();
+	char *end;
+	struct bitmask *mask;
+
+	mask = numa_allocate_nodemask();
+
+	if (s[0] == 0){
+		copy_bitmask_to_bitmask(numa_no_nodes_ptr, mask);
+		return mask; /* return freeable mask */
+	}
+	if (*s == '!') {
+		invert = 1;
+		s++;
+	}
+	if (*s == '+') {
+		relative++;
+		s++;
+	}
+	do {
+		unsigned long arg;
+		int i;
+		if (isalpha(*s)) {
+			int n;
+			if (!strcmp(s,"all")) {
+				copy_bitmask_to_bitmask(allowed_nodes_ptr,
+							mask);
+				s+=4;
+				break;
+			}
+			n = resolve_affinity(s, mask);
+			if (n != NO_IO_AFFINITY) {
+				if (n < 0)
+					goto err;
+				s += strlen(s) + 1;
+				break;
+			}
+		}
+		arg = get_nr(s, &end, allowed_nodes_ptr, relative);
+		if (end == s) {
+			numa_warn(W_nodeparse, "unparseable node description `%s'\n", s);
+			goto err;
+		}
+		if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg)) {
+			numa_warn(W_nodeparse, "node argument %d is out of range\n", arg);
+			goto err;
+		}
+		i = arg;
+		numa_bitmask_setbit(mask, i);
+		s = end;
+		if (*s == '-') {
+			char *end2;
+			unsigned long arg2;
+			arg2 = get_nr(++s, &end2, allowed_nodes_ptr, relative);
+			if (end2 == s) {
+				numa_warn(W_nodeparse, "missing node argument %s\n", s);
+				goto err;
+			}
+			if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg2)) {
+				numa_warn(W_nodeparse, "node argument %d out of range\n", arg2);
+				goto err;
+			}
+			while (arg <= arg2) {
+				i = arg;
+				if (numa_bitmask_isbitset(allowed_nodes_ptr,i))
+					numa_bitmask_setbit(mask, i);
+				arg++;
+			}
+			s = end2;
+		}
+	} while (*s++ == ',');
+	if (s[-1] != '\0')
+		goto err;
+	if (invert) {
+		int i;
+		for (i = 0; i < conf_nodes; i++) {
+			if (numa_bitmask_isbitset(mask, i))
+				numa_bitmask_clearbit(mask, i);
+			else
+				numa_bitmask_setbit(mask, i);
+		}
+	}
+	return mask;
+
+err:
+	numa_bitmask_free(mask);
+	return NULL;
+}
+
+/*
+ * numa_parse_nodestring() is called to create a bitmask from nodes available
+ * for this task.
+ */
+
+struct bitmask * numa_parse_nodestring(const char *s)
+{
+	return __numa_parse_nodestring(s, numa_all_nodes_ptr);
+}
+
+/*
+ * numa_parse_nodestring_all() is called to create a bitmask from all nodes
+ * available.
+ */
+
+struct bitmask * numa_parse_nodestring_all(const char *s)
+{
+	return __numa_parse_nodestring(s, numa_possible_nodes_ptr);
+}
+
+/*
+ * __numa_parse_cpustring() is called to create a bitmask, given
+ * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10.
+ * (the + indicates that the numbers are cpuset-relative)
+ *
+ * The cpus may be specified as absolute, or relative to the current cpuset.
+ * The list of available cpus for this task is in the map pointed to by
+ * "allowed_cpus_ptr", which may represent all cpus or the cpus in the
+ * current cpuset.
+ *
+ * The caller must free the returned bitmask.
+ */
+static struct bitmask *
+__numa_parse_cpustring(const char *s, struct bitmask *allowed_cpus_ptr)
+{
+	int invert = 0, relative=0;
+	int conf_cpus = numa_num_configured_cpus();
+	char *end;
+	struct bitmask *mask;
+	int i;
+
+	mask = numa_allocate_cpumask();
+
+	if (s[0] == 0)
+		return mask;
+	if (*s == '!') {
+		invert = 1;
+		s++;
+	}
+	if (*s == '+') {
+		relative++;
+		s++;
+	}
+	do {
+		unsigned long arg;
+
+		if (!strcmp(s,"all")) {
+			copy_bitmask_to_bitmask(allowed_cpus_ptr, mask);
+			s+=4;
+			break;
+		}
+		arg = get_nr(s, &end, allowed_cpus_ptr, relative);
+		if (end == s) {
+			numa_warn(W_cpuparse, "unparseable cpu description `%s'\n", s);
+			goto err;
+		}
+		if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg)) {
+			numa_warn(W_cpuparse, "cpu argument %s is out of range\n", s);
+			goto err;
+		}
+		i = arg;
+		numa_bitmask_setbit(mask, i);
+		s = end;
+		if (*s == '-') {
+			char *end2;
+			unsigned long arg2;
+			arg2 = get_nr(++s, &end2, allowed_cpus_ptr, relative);
+			if (end2 == s) {
+				numa_warn(W_cpuparse, "missing cpu argument %s\n", s);
+				goto err;
+			}
+			if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg2)) {
+				numa_warn(W_cpuparse, "cpu argument %s out of range\n", s);
+				goto err;
+			}
+			while (arg <= arg2) {
+				i = arg;
+				if (numa_bitmask_isbitset(allowed_cpus_ptr, i))
+					numa_bitmask_setbit(mask, i);
+				arg++;
+			}
+			s = end2;
+		}
+	} while (*s++ == ',');
+	if (s[-1] != '\0')
+		goto err;
+	if (invert) {
+		for (i = 0; i < conf_cpus; i++) {
+			if (numa_bitmask_isbitset(mask, i))
+				numa_bitmask_clearbit(mask, i);
+			else
+				numa_bitmask_setbit(mask, i);
+		}
+	}
+	return mask;
+
+err:
+	numa_bitmask_free(mask);
+	return NULL;
+}
+
+/*
+ * numa_parse_cpustring() is called to create a bitmask from cpus available
+ * for this task.
+ */
+
+struct bitmask * numa_parse_cpustring(const char *s)
+{
+	return __numa_parse_cpustring(s, numa_all_cpus_ptr);
+}
+
+/*
+ * numa_parse_cpustring_all() is called to create a bitmask from all cpus
+ * available.
+ */
+
+struct bitmask * numa_parse_cpustring_all(const char *s)
+{
+	return __numa_parse_cpustring(s, numa_possible_cpus_ptr);
+}
author	qrort <qrort@yandex-team.com>	2022-11-30 23:47:12 +0300
committer	qrort <qrort@yandex-team.com>	2022-11-30 23:47:12 +0300
commit	22f8ae0e3f5d68b92aecccdf96c1d841a0334311 (patch)
tree	bffa27765faf54126ad44bcafa89fadecb7a73d7 /contrib/libs/numa/libnuma.c
parent	332b99e2173f0425444abb759eebcb2fafaa9209 (diff)
download	ydb-22f8ae0e3f5d68b92aecccdf96c1d841a0334311.tar.gz