1 /*
2  * Copyright (C) 2012 Linux Test Project, Inc.
3  *
4  * This program is free software;  you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY;  without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
12  * the GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program;  if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 
19 #include "config.h"
20 #include <errno.h>
21 #if HAVE_NUMA_H
22 #include <numa.h>
23 #endif
24 #if HAVE_NUMAIF_H
25 #include <numaif.h>
26 #endif
27 #include <stdarg.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <errno.h>
33 
34 #include "test.h"
35 #include "safe_macros.h"
36 #include "numa_helper.h"
37 #include "linux_syscall_numbers.h"
38 
get_max_node(void)39 unsigned long get_max_node(void)
40 {
41 	unsigned long max_node = 0;
42 #if HAVE_NUMA_H
43 #if !defined(LIBNUMA_API_VERSION) || LIBNUMA_API_VERSION < 2
44 	max_node = NUMA_NUM_NODES;
45 	/*
46 	 * NUMA_NUM_NODES is not reliable, libnuma >=2 is looking
47 	 * at /proc/self/status to figure out correct number.
48 	 * If buffer is not large enough get_mempolicy will fail with EINVAL.
49 	 */
50 	if (max_node < 1024)
51 		max_node = 1024;
52 #else
53 	max_node = numa_max_possible_node() + 1;
54 #endif
55 #endif /* HAVE_NUMA_H */
56 	return max_node;
57 }
58 
59 #if HAVE_NUMA_H
get_nodemask_allnodes(nodemask_t * nodemask,unsigned long max_node)60 static void get_nodemask_allnodes(nodemask_t * nodemask, unsigned long max_node)
61 {
62 	unsigned long nodemask_size = max_node / 8;
63 	int i;
64 	char fn[64];
65 	struct stat st;
66 
67 	memset(nodemask, 0, nodemask_size);
68 	for (i = 0; i < max_node; i++) {
69 		sprintf(fn, "/sys/devices/system/node/node%d", i);
70 		if (stat(fn, &st) == 0)
71 			nodemask_set(nodemask, i);
72 	}
73 }
74 
filter_nodemask_mem(nodemask_t * nodemask,unsigned long max_node)75 static int filter_nodemask_mem(nodemask_t * nodemask, unsigned long max_node)
76 {
77 #if MPOL_F_MEMS_ALLOWED
78 	unsigned long nodemask_size = max_node / 8;
79 	memset(nodemask, 0, nodemask_size);
80 	/*
81 	 * avoid numa_get_mems_allowed(), because of bug in getpol()
82 	 * utility function in older versions:
83 	 * http://www.spinics.net/lists/linux-numa/msg00849.html
84 	 *
85 	 * At the moment numa_available() implementation also uses
86 	 * get_mempolicy, but let's make explicit check for ENOSYS
87 	 * here as well in case it changes in future. Silent ignore
88 	 * of ENOSYS is OK, because without NUMA caller gets empty
89 	 * set of nodes anyway.
90 	 */
91 	if (syscall(__NR_get_mempolicy, NULL, nodemask->n,
92 		    max_node, 0, MPOL_F_MEMS_ALLOWED) < 0) {
93 		if (errno == ENOSYS)
94 			return 0;
95 		return -2;
96 	}
97 #else
98 	int i;
99 	/*
100 	 * old libnuma/kernel don't have MPOL_F_MEMS_ALLOWED, so let's assume
101 	 * that we can use any node with memory > 0
102 	 */
103 	for (i = 0; i < max_node; i++) {
104 		if (!nodemask_isset(nodemask, i))
105 			continue;
106 		if (numa_node_size64(i, NULL) <= 0)
107 			nodemask_clr(nodemask, i);
108 	}
109 #endif /* MPOL_F_MEMS_ALLOWED */
110 	return 0;
111 }
112 
cpumask_has_cpus(char * cpumask,size_t len)113 static int cpumask_has_cpus(char *cpumask, size_t len)
114 {
115 	int j;
116 	for (j = 0; j < len; j++)
117 		if (cpumask[j] == '\0')
118 			return 0;
119 		else if ((cpumask[j] > '0' && cpumask[j] <= '9') ||
120 			 (cpumask[j] >= 'a' && cpumask[j] <= 'f'))
121 			return 1;
122 	return 0;
123 
124 }
125 
filter_nodemask_cpu(nodemask_t * nodemask,unsigned long max_node)126 static void filter_nodemask_cpu(nodemask_t * nodemask, unsigned long max_node)
127 {
128 	char *cpumask = NULL;
129 	char fn[64];
130 	FILE *f;
131 	size_t len;
132 	int i, ret;
133 
134 	for (i = 0; i < max_node; i++) {
135 		if (!nodemask_isset(nodemask, i))
136 			continue;
137 		sprintf(fn, "/sys/devices/system/node/node%d/cpumap", i);
138 		f = fopen(fn, "r");
139 		if (f) {
140 			ret = getdelim(&cpumask, &len, '\n', f);
141 			if ((ret > 0) && (!cpumask_has_cpus(cpumask, len)))
142 				nodemask_clr(nodemask, i);
143 			fclose(f);
144 		}
145 	}
146 	free(cpumask);
147 }
148 #endif /* HAVE_NUMA_H */
149 
150 /*
151  * get_allowed_nodes_arr - get number and array of available nodes
152  * @num_nodes: pointer where number of available nodes will be stored
153  * @nodes: array of available node ids, this is MPOL_F_MEMS_ALLOWED
154  *                 node bitmask compacted (without holes), so that each field
155  *                 contains node number. If NULL only num_nodes is
156  *                 returned, otherwise it cotains new allocated array,
157  *                 which caller is responsible to free.
158  * RETURNS:
159  *     0 on success
160  *    -1 on allocation failure
161  *    -2 on get_mempolicy failure
162  */
get_allowed_nodes_arr(int flag,int * num_nodes,int ** nodes)163 int get_allowed_nodes_arr(int flag, int *num_nodes, int **nodes)
164 {
165 	int ret = 0;
166 #if HAVE_NUMA_H
167 	int i;
168 	nodemask_t *nodemask = NULL;
169 #endif
170 	*num_nodes = 0;
171 	if (nodes)
172 		*nodes = NULL;
173 
174 #if HAVE_NUMA_H
175 	unsigned long max_node, nodemask_size;
176 
177 	if (numa_available() == -1)
178 		return 0;
179 
180 	max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8);
181 	nodemask_size = max_node / 8;
182 
183 	nodemask = malloc(nodemask_size);
184 	if (nodes)
185 		*nodes = malloc(sizeof(int) * max_node);
186 
187 	do {
188 		if (nodemask == NULL || (nodes && (*nodes == NULL))) {
189 			ret = -1;
190 			break;
191 		}
192 
193 		/* allow all nodes at start, then filter based on flags */
194 		get_nodemask_allnodes(nodemask, max_node);
195 		if ((flag & NH_MEMS) == NH_MEMS) {
196 			ret = filter_nodemask_mem(nodemask, max_node);
197 			if (ret < 0)
198 				break;
199 		}
200 		if ((flag & NH_CPUS) == NH_CPUS)
201 			filter_nodemask_cpu(nodemask, max_node);
202 
203 		for (i = 0; i < max_node; i++) {
204 			if (nodemask_isset(nodemask, i)) {
205 				if (nodes)
206 					(*nodes)[*num_nodes] = i;
207 				(*num_nodes)++;
208 			}
209 		}
210 	} while (0);
211 	free(nodemask);
212 #endif
213 	return ret;
214 }
215 
216 /*
217  * get_allowed_nodes - convenience function to get fixed number of nodes
218  * @count: how many nodes to get
219  * @...: int pointers, where node ids will be stored
220  * RETURNS:
221  *     0 on success
222  *    -1 on allocation failure
223  *    -2 on get_mempolicy failure
224  *    -3 on not enough allowed nodes
225  */
get_allowed_nodes(int flag,int count,...)226 int get_allowed_nodes(int flag, int count, ...)
227 {
228 	int ret;
229 	int i, *nodep;
230 	va_list ap;
231 	int num_nodes = 0;
232 	int *nodes = NULL;
233 
234 	ret = get_allowed_nodes_arr(flag, &num_nodes, &nodes);
235 	if (ret < 0)
236 		return ret;
237 
238 	va_start(ap, count);
239 	for (i = 0; i < count; i++) {
240 		nodep = va_arg(ap, int *);
241 		if (i < num_nodes) {
242 			*nodep = nodes[i];
243 		} else {
244 			ret = -3;
245 			errno = EINVAL;
246 			break;
247 		}
248 	}
249 	free(nodes);
250 	va_end(ap);
251 
252 	return ret;
253 }
254 
print_node_info(int flag)255 static void print_node_info(int flag)
256 {
257 	int *allowed_nodes = NULL;
258 	int i, ret, num_nodes;
259 
260 	ret = get_allowed_nodes_arr(flag, &num_nodes, &allowed_nodes);
261 	printf("nodes (flag=%d): ", flag);
262 	if (ret == 0) {
263 		for (i = 0; i < num_nodes; i++)
264 			printf("%d ", allowed_nodes[i]);
265 		printf("\n");
266 	} else
267 		printf("error(%d)\n", ret);
268 	free(allowed_nodes);
269 }
270 
271 /*
272  * nh_dump_nodes - dump info about nodes to stdout
273  */
nh_dump_nodes(void)274 void nh_dump_nodes(void)
275 {
276 	print_node_info(0);
277 	print_node_info(NH_MEMS);
278 	print_node_info(NH_CPUS);
279 	print_node_info(NH_MEMS | NH_CPUS);
280 }
281 
282 /*
283  * is_numa - judge a system is NUMA system or not
284  * @flag: NH_MEMS and/or NH_CPUS
285  * @min_nodes: find at least 'min_nodes' nodes with memory
286  * NOTE: the function is designed to try to find at least 'min_nodes'
287  * available nodes, where each node contains memory.
288  * WARN: Don't use this func in child, as it calls tst_brkm()
289  * RETURNS:
290  *     0 - it's not a NUMA system
291  *     1 - it's a NUMA system
292  */
is_numa(void (* cleanup_fn)(void),int flag,int min_nodes)293 int is_numa(void (*cleanup_fn)(void), int flag, int min_nodes)
294 {
295 	int ret;
296 	int numa_nodes = 0;
297 
298 	ret = get_allowed_nodes_arr(flag, &numa_nodes, NULL);
299 	if (ret < 0)
300 		tst_brkm(TBROK | TERRNO, cleanup_fn, "get_allowed_nodes_arr");
301 
302 	if (numa_nodes >= min_nodes)
303 		return 1;
304 	else
305 		return 0;
306 }
307