1 /*
2  * Submitted by David Pacheco (dp.spambait@gmail.com)
3  *
4  * Copyright 2006-2007 Niels Provos
5  * Copyright 2007-2012 Niels Provos and Nick Mathewson
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. The name of the author may not be used to endorse or promote products
16  *    derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY SUN MICROSYSTEMS, INC. ``AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL SUN MICROSYSTEMS, INC. BE LIABLE FOR ANY
22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * Copyright (c) 2007 Sun Microsystems. All rights reserved.
32  * Use is subject to license terms.
33  */
34 
35 /*
36  * evport.c: event backend using Solaris 10 event ports. See port_create(3C).
37  * This implementation is loosely modeled after the one used for select(2) (in
38  * select.c).
39  *
40  * The outstanding events are tracked in a data structure called evport_data.
41  * Each entry in the ed_fds array corresponds to a file descriptor, and contains
42  * pointers to the read and write events that correspond to that fd. (That is,
43  * when the file is readable, the "read" event should handle it, etc.)
44  *
45  * evport_add and evport_del update this data structure. evport_dispatch uses it
46  * to determine where to callback when an event occurs (which it gets from
47  * port_getn).
48  *
49  * Helper functions are used: grow() grows the file descriptor array as
50  * necessary when large fd's come in. reassociate() takes care of maintaining
51  * the proper file-descriptor/event-port associations.
52  *
53  * As in the select(2) implementation, signals are handled by evsignal.
54  */
55 
56 #include "event2/event-config.h"
57 
58 #include <sys/time.h>
59 #include <sys/queue.h>
60 #include <errno.h>
61 #include <poll.h>
62 #include <port.h>
63 #include <signal.h>
64 #include <stdio.h>
65 #include <stdlib.h>
66 #include <string.h>
67 #include <time.h>
68 #include <unistd.h>
69 
70 #include "event2/thread.h"
71 
72 #include "evthread-internal.h"
73 #include "event-internal.h"
74 #include "log-internal.h"
75 #include "evsignal-internal.h"
76 #include "evmap-internal.h"
77 
78 /*
79  * Default value for ed_nevents, which is the maximum file descriptor number we
80  * can handle. If an event comes in for a file descriptor F > nevents, we will
81  * grow the array of file descriptors, doubling its size.
82  */
83 #define DEFAULT_NFDS	16
84 
85 
86 /*
87  * EVENTS_PER_GETN is the maximum number of events to retrieve from port_getn on
88  * any particular call. You can speed things up by increasing this, but it will
89  * (obviously) require more memory.
90  */
91 #define EVENTS_PER_GETN 8
92 
93 /*
94  * Per-file-descriptor information about what events we're subscribed to. These
95  * fields are NULL if no event is subscribed to either of them.
96  */
97 
98 struct fd_info {
99 	short fdi_what;		/* combinations of EV_READ and EV_WRITE */
100 };
101 
102 #define FDI_HAS_READ(fdi)  ((fdi)->fdi_what & EV_READ)
103 #define FDI_HAS_WRITE(fdi) ((fdi)->fdi_what & EV_WRITE)
104 #define FDI_HAS_EVENTS(fdi) (FDI_HAS_READ(fdi) || FDI_HAS_WRITE(fdi))
105 #define FDI_TO_SYSEVENTS(fdi) (FDI_HAS_READ(fdi) ? POLLIN : 0) | \
106     (FDI_HAS_WRITE(fdi) ? POLLOUT : 0)
107 
108 struct evport_data {
109 	int		ed_port;	/* event port for system events  */
110 	int		ed_nevents;	/* number of allocated fdi's	 */
111 	struct fd_info *ed_fds;		/* allocated fdi table		 */
112 	/* fdi's that we need to reassoc */
113 	int ed_pending[EVENTS_PER_GETN]; /* fd's with pending events */
114 };
115 
116 static void*	evport_init(struct event_base *);
117 static int evport_add(struct event_base *, int fd, short old, short events, void *);
118 static int evport_del(struct event_base *, int fd, short old, short events, void *);
119 static int	evport_dispatch(struct event_base *, struct timeval *);
120 static void	evport_dealloc(struct event_base *);
121 
122 const struct eventop evportops = {
123 	"evport",
124 	evport_init,
125 	evport_add,
126 	evport_del,
127 	evport_dispatch,
128 	evport_dealloc,
129 	1, /* need reinit */
130 	0, /* features */
131 	0, /* fdinfo length */
132 };
133 
134 /*
135  * Initialize the event port implementation.
136  */
137 
138 static void*
evport_init(struct event_base * base)139 evport_init(struct event_base *base)
140 {
141 	struct evport_data *evpd;
142 	int i;
143 
144 	if (!(evpd = mm_calloc(1, sizeof(struct evport_data))))
145 		return (NULL);
146 
147 	if ((evpd->ed_port = port_create()) == -1) {
148 		mm_free(evpd);
149 		return (NULL);
150 	}
151 
152 	/*
153 	 * Initialize file descriptor structure
154 	 */
155 	evpd->ed_fds = mm_calloc(DEFAULT_NFDS, sizeof(struct fd_info));
156 	if (evpd->ed_fds == NULL) {
157 		close(evpd->ed_port);
158 		mm_free(evpd);
159 		return (NULL);
160 	}
161 	evpd->ed_nevents = DEFAULT_NFDS;
162 	for (i = 0; i < EVENTS_PER_GETN; i++)
163 		evpd->ed_pending[i] = -1;
164 
165 	evsig_init(base);
166 
167 	return (evpd);
168 }
169 
170 #ifdef CHECK_INVARIANTS
171 /*
172  * Checks some basic properties about the evport_data structure. Because it
173  * checks all file descriptors, this function can be expensive when the maximum
174  * file descriptor ever used is rather large.
175  */
176 
177 static void
check_evportop(struct evport_data * evpd)178 check_evportop(struct evport_data *evpd)
179 {
180 	EVUTIL_ASSERT(evpd);
181 	EVUTIL_ASSERT(evpd->ed_nevents > 0);
182 	EVUTIL_ASSERT(evpd->ed_port > 0);
183 	EVUTIL_ASSERT(evpd->ed_fds > 0);
184 }
185 
186 /*
187  * Verifies very basic integrity of a given port_event.
188  */
189 static void
check_event(port_event_t * pevt)190 check_event(port_event_t* pevt)
191 {
192 	/*
193 	 * We've only registered for PORT_SOURCE_FD events. The only
194 	 * other thing we can legitimately receive is PORT_SOURCE_ALERT,
195 	 * but since we're not using port_alert either, we can assume
196 	 * PORT_SOURCE_FD.
197 	 */
198 	EVUTIL_ASSERT(pevt->portev_source == PORT_SOURCE_FD);
199 	EVUTIL_ASSERT(pevt->portev_user == NULL);
200 }
201 
202 #else
203 #define check_evportop(epop)
204 #define check_event(pevt)
205 #endif /* CHECK_INVARIANTS */
206 
207 /*
208  * Doubles the size of the allocated file descriptor array.
209  */
210 static int
grow(struct evport_data * epdp,int factor)211 grow(struct evport_data *epdp, int factor)
212 {
213 	struct fd_info *tmp;
214 	int oldsize = epdp->ed_nevents;
215 	int newsize = factor * oldsize;
216 	EVUTIL_ASSERT(factor > 1);
217 
218 	check_evportop(epdp);
219 
220 	tmp = mm_realloc(epdp->ed_fds, sizeof(struct fd_info) * newsize);
221 	if (NULL == tmp)
222 		return -1;
223 	epdp->ed_fds = tmp;
224 	memset((char*) (epdp->ed_fds + oldsize), 0,
225 	    (newsize - oldsize)*sizeof(struct fd_info));
226 	epdp->ed_nevents = newsize;
227 
228 	check_evportop(epdp);
229 
230 	return 0;
231 }
232 
233 
234 /*
235  * (Re)associates the given file descriptor with the event port. The OS events
236  * are specified (implicitly) from the fd_info struct.
237  */
238 static int
reassociate(struct evport_data * epdp,struct fd_info * fdip,int fd)239 reassociate(struct evport_data *epdp, struct fd_info *fdip, int fd)
240 {
241 	int sysevents = FDI_TO_SYSEVENTS(fdip);
242 
243 	if (sysevents != 0) {
244 		if (port_associate(epdp->ed_port, PORT_SOURCE_FD,
245 				   fd, sysevents, NULL) == -1) {
246 			event_warn("port_associate");
247 			return (-1);
248 		}
249 	}
250 
251 	check_evportop(epdp);
252 
253 	return (0);
254 }
255 
256 /*
257  * Main event loop - polls port_getn for some number of events, and processes
258  * them.
259  */
260 
261 static int
evport_dispatch(struct event_base * base,struct timeval * tv)262 evport_dispatch(struct event_base *base, struct timeval *tv)
263 {
264 	int i, res;
265 	struct evport_data *epdp = base->evbase;
266 	port_event_t pevtlist[EVENTS_PER_GETN];
267 
268 	/*
269 	 * port_getn will block until it has at least nevents events. It will
270 	 * also return how many it's given us (which may be more than we asked
271 	 * for, as long as it's less than our maximum (EVENTS_PER_GETN)) in
272 	 * nevents.
273 	 */
274 	int nevents = 1;
275 
276 	/*
277 	 * We have to convert a struct timeval to a struct timespec
278 	 * (only difference is nanoseconds vs. microseconds). If no time-based
279 	 * events are active, we should wait for I/O (and tv == NULL).
280 	 */
281 	struct timespec ts;
282 	struct timespec *ts_p = NULL;
283 	if (tv != NULL) {
284 		ts.tv_sec = tv->tv_sec;
285 		ts.tv_nsec = tv->tv_usec * 1000;
286 		ts_p = &ts;
287 	}
288 
289 	/*
290 	 * Before doing anything else, we need to reassociate the events we hit
291 	 * last time which need reassociation. See comment at the end of the
292 	 * loop below.
293 	 */
294 	for (i = 0; i < EVENTS_PER_GETN; ++i) {
295 		struct fd_info *fdi = NULL;
296 		if (epdp->ed_pending[i] != -1) {
297 			fdi = &(epdp->ed_fds[epdp->ed_pending[i]]);
298 		}
299 
300 		if (fdi != NULL && FDI_HAS_EVENTS(fdi)) {
301 			int fd = epdp->ed_pending[i];
302 			reassociate(epdp, fdi, fd);
303 			epdp->ed_pending[i] = -1;
304 		}
305 	}
306 
307 	EVBASE_RELEASE_LOCK(base, th_base_lock);
308 
309 	res = port_getn(epdp->ed_port, pevtlist, EVENTS_PER_GETN,
310 	    (unsigned int *) &nevents, ts_p);
311 
312 	EVBASE_ACQUIRE_LOCK(base, th_base_lock);
313 
314 	if (res == -1) {
315 		if (errno == EINTR || errno == EAGAIN) {
316 			return (0);
317 		} else if (errno == ETIME) {
318 			if (nevents == 0)
319 				return (0);
320 		} else {
321 			event_warn("port_getn");
322 			return (-1);
323 		}
324 	}
325 
326 	event_debug(("%s: port_getn reports %d events", __func__, nevents));
327 
328 	for (i = 0; i < nevents; ++i) {
329 		struct fd_info *fdi;
330 		port_event_t *pevt = &pevtlist[i];
331 		int fd = (int) pevt->portev_object;
332 
333 		check_evportop(epdp);
334 		check_event(pevt);
335 		epdp->ed_pending[i] = fd;
336 
337 		/*
338 		 * Figure out what kind of event it was
339 		 * (because we have to pass this to the callback)
340 		 */
341 		res = 0;
342 		if (pevt->portev_events & (POLLERR|POLLHUP)) {
343 			res = EV_READ | EV_WRITE;
344 		} else {
345 			if (pevt->portev_events & POLLIN)
346 				res |= EV_READ;
347 			if (pevt->portev_events & POLLOUT)
348 				res |= EV_WRITE;
349 		}
350 
351 		/*
352 		 * Check for the error situations or a hangup situation
353 		 */
354 		if (pevt->portev_events & (POLLERR|POLLHUP|POLLNVAL))
355 			res |= EV_READ|EV_WRITE;
356 
357 		EVUTIL_ASSERT(epdp->ed_nevents > fd);
358 		fdi = &(epdp->ed_fds[fd]);
359 
360 		evmap_io_active(base, fd, res);
361 	} /* end of all events gotten */
362 
363 	check_evportop(epdp);
364 
365 	return (0);
366 }
367 
368 
369 /*
370  * Adds the given event (so that you will be notified when it happens via
371  * the callback function).
372  */
373 
374 static int
evport_add(struct event_base * base,int fd,short old,short events,void * p)375 evport_add(struct event_base *base, int fd, short old, short events, void *p)
376 {
377 	struct evport_data *evpd = base->evbase;
378 	struct fd_info *fdi;
379 	int factor;
380 	(void)p;
381 
382 	check_evportop(evpd);
383 
384 	/*
385 	 * If necessary, grow the file descriptor info table
386 	 */
387 
388 	factor = 1;
389 	while (fd >= factor * evpd->ed_nevents)
390 		factor *= 2;
391 
392 	if (factor > 1) {
393 		if (-1 == grow(evpd, factor)) {
394 			return (-1);
395 		}
396 	}
397 
398 	fdi = &evpd->ed_fds[fd];
399 	fdi->fdi_what |= events;
400 
401 	return reassociate(evpd, fdi, fd);
402 }
403 
404 /*
405  * Removes the given event from the list of events to wait for.
406  */
407 
408 static int
evport_del(struct event_base * base,int fd,short old,short events,void * p)409 evport_del(struct event_base *base, int fd, short old, short events, void *p)
410 {
411 	struct evport_data *evpd = base->evbase;
412 	struct fd_info *fdi;
413 	int i;
414 	int associated = 1;
415 	(void)p;
416 
417 	check_evportop(evpd);
418 
419 	if (evpd->ed_nevents < fd) {
420 		return (-1);
421 	}
422 
423 	for (i = 0; i < EVENTS_PER_GETN; ++i) {
424 		if (evpd->ed_pending[i] == fd) {
425 			associated = 0;
426 			break;
427 		}
428 	}
429 
430 	fdi = &evpd->ed_fds[fd];
431 	if (events & EV_READ)
432 		fdi->fdi_what &= ~EV_READ;
433 	if (events & EV_WRITE)
434 		fdi->fdi_what &= ~EV_WRITE;
435 
436 	if (associated) {
437 		if (!FDI_HAS_EVENTS(fdi) &&
438 		    port_dissociate(evpd->ed_port, PORT_SOURCE_FD, fd) == -1) {
439 			/*
440 			 * Ignore EBADFD error the fd could have been closed
441 			 * before event_del() was called.
442 			 */
443 			if (errno != EBADFD) {
444 				event_warn("port_dissociate");
445 				return (-1);
446 			}
447 		} else {
448 			if (FDI_HAS_EVENTS(fdi)) {
449 				return (reassociate(evpd, fdi, fd));
450 			}
451 		}
452 	} else {
453 		if ((fdi->fdi_what & (EV_READ|EV_WRITE)) == 0) {
454 			evpd->ed_pending[i] = -1;
455 		}
456 	}
457 	return 0;
458 }
459 
460 
461 static void
evport_dealloc(struct event_base * base)462 evport_dealloc(struct event_base *base)
463 {
464 	struct evport_data *evpd = base->evbase;
465 
466 	evsig_dealloc(base);
467 
468 	close(evpd->ed_port);
469 
470 	if (evpd->ed_fds)
471 		mm_free(evpd->ed_fds);
472 	mm_free(evpd);
473 }
474