1 
2 /*
3  * This is the latest version of hackbench.c, that tests scheduler and
4  * unix-socket (or pipe) performance.
5  *
6  * Usage: hackbench [-pipe] <num groups> [process|thread] [loops]
7  *
8  * Build it with:
9  *   gcc -g -Wall -O2 -o hackbench hackbench.c -lpthread
10  */
11 #if 0
12 
13 Date: Fri, 04 Jan 2008 14:06:26 +0800
14 From: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
15 To: LKML <linux-kernel@vger.kernel.org>
16 Subject: Improve hackbench
17 Cc: Ingo Molnar <mingo@elte.hu>, Arjan van de Ven <arjan@infradead.org>
18 
19 hackbench tests the Linux scheduler. The original program is at
20 http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c
21 Based on this multi-process version, a nice person created a multi-thread
22 version. Pls. see
23 http://www.bullopensource.org/posix/pi-futex/hackbench_pth.c
24 
25 When I integrated them into my automation testing system, I found
26 a couple of issues and did some improvements.
27 
28 1) Merge hackbench: I integrated hackbench_pth.c into hackbench and added a
29 new parameter which can be used to choose process mode or thread mode. The
30 default mode is process.
31 
32 2) It runs too fast and ends in a couple of seconds. Sometimes it's too hard to debug
33 the issues. On my ia64 Montecito machines, the result looks weird when comparing
34 process mode and thread mode.
35 I want a stable result and hope the testing could run for a stable longer time, so I
36 might use performance tools to debug issues.
37 I added another new parameter,`loops`, which can be used to change variable loops,
38 so more messages will be passed from writers to receivers. Parameter 'loops' is equal to
39 100 by default.
40 
41 For example on my 8-core x86_64:
42 [ymzhang@lkp-st01-x8664 hackbench]$ uname -a
43 Linux lkp-st01-x8664 2.6.24-rc6 #1 SMP Fri Dec 21 08:32:31 CST 2007 x86_64 x86_64 x86_64 GNU/Linux
44 [ymzhang@lkp-st01-x8664 hackbench]$ ./hackbench
45 Usage: hackbench [-pipe] <num groups> [process|thread] [loops]
46 [ymzhang@lkp-st01-x8664 hackbench]$ ./hackbench 150 process 1000
47 Time: 151.533
48 [ymzhang@lkp-st01-x8664 hackbench]$ ./hackbench 150 thread 1000
49 Time: 153.666
50 
51 
52 With the same new parameters, I did captured the SLUB issue discussed on LKML recently.
53 
54 3) hackbench_pth.c will fail on ia64 machine because pthread_attr_setstacksize always
55 fails if the stack size is less than 196*1024. I moved this statement within a __ia64__ check.
56 
57 
58 This new program could be compiled with command line:
59 #gcc -g -Wall  -o hackbench hackbench.c -lpthread
60 
61 
62 Thank Ingo for his great comments!
63 
64 -yanmin
65 
66 ---
67 
68 * Nathan Lynch <ntl@pobox.com> wrote:
69 
70 > Here's a fixlet for the hackbench program found at
71 >
72 > http://people.redhat.com/mingo/cfs-scheduler/tools/hackbench.c
73 >
74 > When redirecting hackbench output I am seeing multiple copies of the
75 > "Running with %d*40 (== %d) tasks" line.  Need to flush the buffered
76 > output before forking.
77 
78 #endif
79 
80 /* Test groups of 20 processes spraying to 20 receivers */
81 #include <pthread.h>
82 #include <stdio.h>
83 #include <stdlib.h>
84 #include <string.h>
85 #include <errno.h>
86 #include <unistd.h>
87 #include <sys/types.h>
88 #include <sys/socket.h>
89 #include <sys/wait.h>
90 #include <sys/time.h>
91 #include <sys/poll.h>
92 #include <limits.h>
93 
94 #define DATASIZE 100
95 static unsigned int loops = 100;
96 /*
97  * 0 means thread mode and others mean process (default)
98  */
99 static unsigned int process_mode = 1;
100 
101 static int use_pipes = 0;
102 
103 struct sender_context {
104 	unsigned int num_fds;
105 	int ready_out;
106 	int wakefd;
107 	int out_fds[0];
108 };
109 
110 struct receiver_context {
111 	unsigned int num_packets;
112 	int in_fds[2];
113 	int ready_out;
114 	int wakefd;
115 };
116 
117 
barf(const char * msg)118 static void barf(const char *msg)
119 {
120 	fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno));
121 	exit(1);
122 }
123 
print_usage_exit()124 static void print_usage_exit()
125 {
126 	printf("Usage: hackbench [-pipe] <num groups> [process|thread] [loops]\n");
127 	exit(1);
128 }
129 
fdpair(int fds[2])130 static void fdpair(int fds[2])
131 {
132 	if (use_pipes) {
133 		if (pipe(fds) == 0)
134 			return;
135 	} else {
136 		if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0)
137 			return;
138 	}
139 	barf("Creating fdpair");
140 }
141 
142 /* Block until we're ready to go */
ready(int ready_out,int wakefd)143 static void ready(int ready_out, int wakefd)
144 {
145 	char dummy;
146 	struct pollfd pollfd = { .fd = wakefd, .events = POLLIN };
147 
148 	/* Tell them we're ready. */
149 	if (write(ready_out, &dummy, 1) != 1)
150 		barf("CLIENT: ready write");
151 
152 	/* Wait for "GO" signal */
153 	if (poll(&pollfd, 1, -1) != 1)
154 		barf("poll");
155 }
156 
157 /* Sender sprays loops messages down each file descriptor */
sender(struct sender_context * ctx)158 static void *sender(struct sender_context *ctx)
159 {
160 	char data[DATASIZE];
161 	unsigned int i, j;
162 
163 	ready(ctx->ready_out, ctx->wakefd);
164 
165 	/* Now pump to every receiver. */
166 	for (i = 0; i < loops; i++) {
167 		for (j = 0; j < ctx->num_fds; j++) {
168 			int ret, done = 0;
169 
170 again:
171 			ret = write(ctx->out_fds[j], data + done, sizeof(data)-done);
172 			if (ret < 0)
173 				barf("SENDER: write");
174 			done += ret;
175 			if (done < sizeof(data))
176 				goto again;
177 		}
178 	}
179 
180 	return NULL;
181 }
182 
183 
184 /* One receiver per fd */
receiver(struct receiver_context * ctx)185 static void *receiver(struct receiver_context* ctx)
186 {
187 	unsigned int i;
188 
189 	if (process_mode)
190 		close(ctx->in_fds[1]);
191 
192 	/* Wait for start... */
193 	ready(ctx->ready_out, ctx->wakefd);
194 
195 	/* Receive them all */
196 	for (i = 0; i < ctx->num_packets; i++) {
197 		char data[DATASIZE];
198 		int ret, done = 0;
199 
200 again:
201 		ret = read(ctx->in_fds[0], data + done, DATASIZE - done);
202 		if (ret < 0)
203 			barf("SERVER: read");
204 		done += ret;
205 		if (done < DATASIZE)
206 			goto again;
207 	}
208 
209 	return NULL;
210 }
211 
create_worker(void * ctx,void * (* func)(void *))212 pthread_t create_worker(void *ctx, void *(*func)(void *))
213 {
214 	pthread_attr_t attr;
215 	pthread_t childid;
216 	int err;
217 
218 	if (process_mode) {
219 		/* process mode */
220 		/* Fork the receiver. */
221 		switch (fork()) {
222 			case -1: barf("fork()");
223 			case 0:
224 				(*func) (ctx);
225 				exit(0);
226 		}
227 
228 		return (pthread_t) 0;
229 	}
230 
231 	if (pthread_attr_init(&attr) != 0)
232 		barf("pthread_attr_init:");
233 
234 #ifndef __ia64__
235 	if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
236 		barf("pthread_attr_setstacksize");
237 #endif
238 
239 	if ((err=pthread_create(&childid, &attr, func, ctx)) != 0) {
240 		fprintf(stderr, "pthread_create failed: %s (%d)\n", strerror(err), err);
241 		exit(-1);
242 	}
243 	return (childid);
244 }
245 
reap_worker(pthread_t id)246 void reap_worker(pthread_t id)
247 {
248 	int status;
249 
250 	if (process_mode) {
251 		/* process mode */
252 		wait(&status);
253 		if (!WIFEXITED(status))
254 			exit(1);
255 	} else {
256 		void *status;
257 
258 		pthread_join(id, &status);
259 	}
260 }
261 
262 /* One group of senders and receivers */
group(pthread_t * pth,unsigned int num_fds,int ready_out,int wakefd)263 static unsigned int group(pthread_t *pth,
264 		unsigned int num_fds,
265 		int ready_out,
266 		int wakefd)
267 {
268 	unsigned int i;
269 	struct sender_context* snd_ctx = malloc (sizeof(struct sender_context)
270 			+num_fds*sizeof(int));
271 
272 	for (i = 0; i < num_fds; i++) {
273 		int fds[2];
274 		struct receiver_context* ctx = malloc (sizeof(*ctx));
275 
276 		if (!ctx)
277 			barf("malloc()");
278 
279 
280 		/* Create the pipe between client and server */
281 		fdpair(fds);
282 
283 		ctx->num_packets = num_fds*loops;
284 		ctx->in_fds[0] = fds[0];
285 		ctx->in_fds[1] = fds[1];
286 		ctx->ready_out = ready_out;
287 		ctx->wakefd = wakefd;
288 
289 		pth[i] = create_worker(ctx, (void *)(void *)receiver);
290 
291 		snd_ctx->out_fds[i] = fds[1];
292 		if (process_mode)
293 			close(fds[0]);
294 	}
295 
296 	/* Now we have all the fds, fork the senders */
297 	for (i = 0; i < num_fds; i++) {
298 		snd_ctx->ready_out = ready_out;
299 		snd_ctx->wakefd = wakefd;
300 		snd_ctx->num_fds = num_fds;
301 
302 		pth[num_fds+i] = create_worker(snd_ctx, (void *)(void *)sender);
303 	}
304 
305 	/* Close the fds we have left */
306 	if (process_mode)
307 		for (i = 0; i < num_fds; i++)
308 			close(snd_ctx->out_fds[i]);
309 
310 	/* Return number of children to reap */
311 	return num_fds * 2;
312 }
313 
main(int argc,char * argv[])314 int main(int argc, char *argv[])
315 {
316 	unsigned int i, num_groups = 10, total_children;
317 	struct timeval start, stop, diff;
318 	unsigned int num_fds = 20;
319 	int readyfds[2], wakefds[2];
320 	char dummy;
321 	pthread_t *pth_tab;
322 
323 	if (argv[1] && strcmp(argv[1], "-pipe") == 0) {
324 		use_pipes = 1;
325 		argc--;
326 		argv++;
327 	}
328 
329 	if (argc >= 2 && (num_groups = atoi(argv[1])) == 0)
330 		print_usage_exit();
331 
332 	printf("Running with %d*40 (== %d) tasks.\n",
333 		num_groups, num_groups*40);
334 
335 	fflush(NULL);
336 
337 	if (argc > 2) {
338 		if ( !strcmp(argv[2], "process") )
339 			process_mode = 1;
340 		else if ( !strcmp(argv[2], "thread") )
341 			process_mode = 0;
342 		else
343 			print_usage_exit();
344 	}
345 
346 	if (argc > 3)
347 		loops = atoi(argv[3]);
348 
349 	pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t));
350 
351 	if (!pth_tab)
352 		barf("main:malloc()");
353 
354 	fdpair(readyfds);
355 	fdpair(wakefds);
356 
357 	total_children = 0;
358 	for (i = 0; i < num_groups; i++)
359 		total_children += group(pth_tab+total_children, num_fds, readyfds[1], wakefds[0]);
360 
361 	/* Wait for everyone to be ready */
362 	for (i = 0; i < total_children; i++)
363 		if (read(readyfds[0], &dummy, 1) != 1)
364 			barf("Reading for readyfds");
365 
366 	gettimeofday(&start, NULL);
367 
368 	/* Kick them off */
369 	if (write(wakefds[1], &dummy, 1) != 1)
370 		barf("Writing to start them");
371 
372 	/* Reap them all */
373 	for (i = 0; i < total_children; i++)
374 		reap_worker(pth_tab[i]);
375 
376 	gettimeofday(&stop, NULL);
377 
378 	/* Print time... */
379 	timersub(&stop, &start, &diff);
380 	printf("Time: %lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000);
381 	exit(0);
382 }
383 
384 
385