1 /*
2  * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17  */
18 
19 FILE_LICENCE ( GPL2_OR_LATER );
20 
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <byteswap.h>
27 #include <errno.h>
28 #include <assert.h>
29 #include <gpxe/list.h>
30 #include <gpxe/errortab.h>
31 #include <gpxe/if_arp.h>
32 #include <gpxe/netdevice.h>
33 #include <gpxe/iobuf.h>
34 #include <gpxe/ipoib.h>
35 #include <gpxe/process.h>
36 #include <gpxe/infiniband.h>
37 #include <gpxe/ib_mi.h>
38 #include <gpxe/ib_sma.h>
39 
40 /** @file
41  *
42  * Infiniband protocol
43  *
44  */
45 
46 /** List of Infiniband devices */
47 struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
48 
49 /** List of open Infiniband devices, in reverse order of opening */
50 static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
51 
52 /* Disambiguate the various possible EINPROGRESSes */
53 #define EINPROGRESS_INIT ( EINPROGRESS | EUNIQ_01 )
54 #define EINPROGRESS_ARMED ( EINPROGRESS | EUNIQ_02 )
55 
56 /** Human-readable message for the link statuses */
57 struct errortab infiniband_errors[] __errortab = {
58 	{ EINPROGRESS_INIT, "Initialising" },
59 	{ EINPROGRESS_ARMED, "Armed" },
60 };
61 
62 /***************************************************************************
63  *
64  * Completion queues
65  *
66  ***************************************************************************
67  */
68 
69 /**
70  * Create completion queue
71  *
72  * @v ibdev		Infiniband device
73  * @v num_cqes		Number of completion queue entries
74  * @v op		Completion queue operations
75  * @ret cq		New completion queue
76  */
77 struct ib_completion_queue *
ib_create_cq(struct ib_device * ibdev,unsigned int num_cqes,struct ib_completion_queue_operations * op)78 ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
79 	       struct ib_completion_queue_operations *op ) {
80 	struct ib_completion_queue *cq;
81 	int rc;
82 
83 	DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
84 
85 	/* Allocate and initialise data structure */
86 	cq = zalloc ( sizeof ( *cq ) );
87 	if ( ! cq )
88 		goto err_alloc_cq;
89 	cq->ibdev = ibdev;
90 	list_add ( &cq->list, &ibdev->cqs );
91 	cq->num_cqes = num_cqes;
92 	INIT_LIST_HEAD ( &cq->work_queues );
93 	cq->op = op;
94 
95 	/* Perform device-specific initialisation and get CQN */
96 	if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
97 		DBGC ( ibdev, "IBDEV %p could not initialise completion "
98 		       "queue: %s\n", ibdev, strerror ( rc ) );
99 		goto err_dev_create_cq;
100 	}
101 
102 	DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
103 	       "with CQN %#lx\n", ibdev, num_cqes, cq,
104 	       ib_cq_get_drvdata ( cq ), cq->cqn );
105 	return cq;
106 
107 	ibdev->op->destroy_cq ( ibdev, cq );
108  err_dev_create_cq:
109 	list_del ( &cq->list );
110 	free ( cq );
111  err_alloc_cq:
112 	return NULL;
113 }
114 
115 /**
116  * Destroy completion queue
117  *
118  * @v ibdev		Infiniband device
119  * @v cq		Completion queue
120  */
ib_destroy_cq(struct ib_device * ibdev,struct ib_completion_queue * cq)121 void ib_destroy_cq ( struct ib_device *ibdev,
122 		     struct ib_completion_queue *cq ) {
123 	DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
124 	       ibdev, cq->cqn );
125 	assert ( list_empty ( &cq->work_queues ) );
126 	ibdev->op->destroy_cq ( ibdev, cq );
127 	list_del ( &cq->list );
128 	free ( cq );
129 }
130 
131 /**
132  * Poll completion queue
133  *
134  * @v ibdev		Infiniband device
135  * @v cq		Completion queue
136  */
ib_poll_cq(struct ib_device * ibdev,struct ib_completion_queue * cq)137 void ib_poll_cq ( struct ib_device *ibdev,
138 		  struct ib_completion_queue *cq ) {
139 	struct ib_work_queue *wq;
140 
141 	/* Poll completion queue */
142 	ibdev->op->poll_cq ( ibdev, cq );
143 
144 	/* Refill receive work queues */
145 	list_for_each_entry ( wq, &cq->work_queues, list ) {
146 		if ( ! wq->is_send )
147 			ib_refill_recv ( ibdev, wq->qp );
148 	}
149 }
150 
151 /***************************************************************************
152  *
153  * Work queues
154  *
155  ***************************************************************************
156  */
157 
158 /**
159  * Create queue pair
160  *
161  * @v ibdev		Infiniband device
162  * @v type		Queue pair type
163  * @v num_send_wqes	Number of send work queue entries
164  * @v send_cq		Send completion queue
165  * @v num_recv_wqes	Number of receive work queue entries
166  * @v recv_cq		Receive completion queue
167  * @ret qp		Queue pair
168  *
169  * The queue pair will be left in the INIT state; you must call
170  * ib_modify_qp() before it is ready to use for sending and receiving.
171  */
ib_create_qp(struct ib_device * ibdev,enum ib_queue_pair_type type,unsigned int num_send_wqes,struct ib_completion_queue * send_cq,unsigned int num_recv_wqes,struct ib_completion_queue * recv_cq)172 struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
173 				      enum ib_queue_pair_type type,
174 				      unsigned int num_send_wqes,
175 				      struct ib_completion_queue *send_cq,
176 				      unsigned int num_recv_wqes,
177 				      struct ib_completion_queue *recv_cq ) {
178 	struct ib_queue_pair *qp;
179 	size_t total_size;
180 	int rc;
181 
182 	DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
183 
184 	/* Allocate and initialise data structure */
185 	total_size = ( sizeof ( *qp ) +
186 		       ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
187 		       ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
188 	qp = zalloc ( total_size );
189 	if ( ! qp )
190 		goto err_alloc_qp;
191 	qp->ibdev = ibdev;
192 	list_add ( &qp->list, &ibdev->qps );
193 	qp->type = type;
194 	qp->send.qp = qp;
195 	qp->send.is_send = 1;
196 	qp->send.cq = send_cq;
197 	list_add ( &qp->send.list, &send_cq->work_queues );
198 	qp->send.psn = ( random() & 0xffffffUL );
199 	qp->send.num_wqes = num_send_wqes;
200 	qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
201 	qp->recv.qp = qp;
202 	qp->recv.cq = recv_cq;
203 	list_add ( &qp->recv.list, &recv_cq->work_queues );
204 	qp->recv.psn = ( random() & 0xffffffUL );
205 	qp->recv.num_wqes = num_recv_wqes;
206 	qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
207 			    ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
208 	INIT_LIST_HEAD ( &qp->mgids );
209 
210 	/* Perform device-specific initialisation and get QPN */
211 	if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
212 		DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
213 		       "%s\n", ibdev, strerror ( rc ) );
214 		goto err_dev_create_qp;
215 	}
216 	DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
217 	       ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
218 	DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
219 	       ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
220 	       qp->recv.iobufs );
221 	DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
222 	       ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
223 	       ( ( ( void * ) qp ) + total_size ) );
224 
225 	/* Calculate externally-visible QPN */
226 	switch ( type ) {
227 	case IB_QPT_SMI:
228 		qp->ext_qpn = IB_QPN_SMI;
229 		break;
230 	case IB_QPT_GSI:
231 		qp->ext_qpn = IB_QPN_GSI;
232 		break;
233 	default:
234 		qp->ext_qpn = qp->qpn;
235 		break;
236 	}
237 	if ( qp->ext_qpn != qp->qpn ) {
238 		DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
239 		       ibdev, qp->qpn, qp->ext_qpn );
240 	}
241 
242 	return qp;
243 
244 	ibdev->op->destroy_qp ( ibdev, qp );
245  err_dev_create_qp:
246 	list_del ( &qp->send.list );
247 	list_del ( &qp->recv.list );
248 	list_del ( &qp->list );
249 	free ( qp );
250  err_alloc_qp:
251 	return NULL;
252 }
253 
254 /**
255  * Modify queue pair
256  *
257  * @v ibdev		Infiniband device
258  * @v qp		Queue pair
259  * @v av		New address vector, if applicable
260  * @ret rc		Return status code
261  */
ib_modify_qp(struct ib_device * ibdev,struct ib_queue_pair * qp)262 int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
263 	int rc;
264 
265 	DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
266 
267 	if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
268 		DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
269 		       ibdev, qp->qpn, strerror ( rc ) );
270 		return rc;
271 	}
272 
273 	return 0;
274 }
275 
276 /**
277  * Destroy queue pair
278  *
279  * @v ibdev		Infiniband device
280  * @v qp		Queue pair
281  */
ib_destroy_qp(struct ib_device * ibdev,struct ib_queue_pair * qp)282 void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
283 	struct io_buffer *iobuf;
284 	unsigned int i;
285 
286 	DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
287 	       ibdev, qp->qpn );
288 
289 	assert ( list_empty ( &qp->mgids ) );
290 
291 	/* Perform device-specific destruction */
292 	ibdev->op->destroy_qp ( ibdev, qp );
293 
294 	/* Complete any remaining I/O buffers with errors */
295 	for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
296 		if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
297 			ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
298 	}
299 	for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
300 		if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
301 			ib_complete_recv ( ibdev, qp, NULL, iobuf,
302 					   -ECANCELED );
303 		}
304 	}
305 
306 	/* Remove work queues from completion queue */
307 	list_del ( &qp->send.list );
308 	list_del ( &qp->recv.list );
309 
310 	/* Free QP */
311 	list_del ( &qp->list );
312 	free ( qp );
313 }
314 
315 /**
316  * Find queue pair by QPN
317  *
318  * @v ibdev		Infiniband device
319  * @v qpn		Queue pair number
320  * @ret qp		Queue pair, or NULL
321  */
ib_find_qp_qpn(struct ib_device * ibdev,unsigned long qpn)322 struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
323 					unsigned long qpn ) {
324 	struct ib_queue_pair *qp;
325 
326 	list_for_each_entry ( qp, &ibdev->qps, list ) {
327 		if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
328 			return qp;
329 	}
330 	return NULL;
331 }
332 
333 /**
334  * Find queue pair by multicast GID
335  *
336  * @v ibdev		Infiniband device
337  * @v gid		Multicast GID
338  * @ret qp		Queue pair, or NULL
339  */
ib_find_qp_mgid(struct ib_device * ibdev,struct ib_gid * gid)340 struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
341 					 struct ib_gid *gid ) {
342 	struct ib_queue_pair *qp;
343 	struct ib_multicast_gid *mgid;
344 
345 	list_for_each_entry ( qp, &ibdev->qps, list ) {
346 		list_for_each_entry ( mgid, &qp->mgids, list ) {
347 			if ( memcmp ( &mgid->gid, gid,
348 				      sizeof ( mgid->gid ) ) == 0 ) {
349 				return qp;
350 			}
351 		}
352 	}
353 	return NULL;
354 }
355 
356 /**
357  * Find work queue belonging to completion queue
358  *
359  * @v cq		Completion queue
360  * @v qpn		Queue pair number
361  * @v is_send		Find send work queue (rather than receive)
362  * @ret wq		Work queue, or NULL if not found
363  */
ib_find_wq(struct ib_completion_queue * cq,unsigned long qpn,int is_send)364 struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
365 				    unsigned long qpn, int is_send ) {
366 	struct ib_work_queue *wq;
367 
368 	list_for_each_entry ( wq, &cq->work_queues, list ) {
369 		if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
370 			return wq;
371 	}
372 	return NULL;
373 }
374 
375 /**
376  * Post send work queue entry
377  *
378  * @v ibdev		Infiniband device
379  * @v qp		Queue pair
380  * @v av		Address vector
381  * @v iobuf		I/O buffer
382  * @ret rc		Return status code
383  */
ib_post_send(struct ib_device * ibdev,struct ib_queue_pair * qp,struct ib_address_vector * av,struct io_buffer * iobuf)384 int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
385 		   struct ib_address_vector *av,
386 		   struct io_buffer *iobuf ) {
387 	struct ib_address_vector av_copy;
388 	int rc;
389 
390 	/* Check queue fill level */
391 	if ( qp->send.fill >= qp->send.num_wqes ) {
392 		DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
393 		       ibdev, qp->qpn );
394 		return -ENOBUFS;
395 	}
396 
397 	/* Use default address vector if none specified */
398 	if ( ! av )
399 		av = &qp->av;
400 
401 	/* Make modifiable copy of address vector */
402 	memcpy ( &av_copy, av, sizeof ( av_copy ) );
403 	av = &av_copy;
404 
405 	/* Fill in optional parameters in address vector */
406 	if ( ! av->qkey )
407 		av->qkey = qp->qkey;
408 	if ( ! av->rate )
409 		av->rate = IB_RATE_2_5;
410 
411 	/* Post to hardware */
412 	if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) {
413 		DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
414 		       "%s\n", ibdev, qp->qpn, strerror ( rc ) );
415 		return rc;
416 	}
417 
418 	qp->send.fill++;
419 	return 0;
420 }
421 
422 /**
423  * Post receive work queue entry
424  *
425  * @v ibdev		Infiniband device
426  * @v qp		Queue pair
427  * @v iobuf		I/O buffer
428  * @ret rc		Return status code
429  */
ib_post_recv(struct ib_device * ibdev,struct ib_queue_pair * qp,struct io_buffer * iobuf)430 int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
431 		   struct io_buffer *iobuf ) {
432 	int rc;
433 
434 	/* Check packet length */
435 	if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
436 		DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
437 		       ibdev, qp->qpn, iob_tailroom ( iobuf ) );
438 		return -EINVAL;
439 	}
440 
441 	/* Check queue fill level */
442 	if ( qp->recv.fill >= qp->recv.num_wqes ) {
443 		DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
444 		       ibdev, qp->qpn );
445 		return -ENOBUFS;
446 	}
447 
448 	/* Post to hardware */
449 	if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
450 		DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
451 		       "%s\n", ibdev, qp->qpn, strerror ( rc ) );
452 		return rc;
453 	}
454 
455 	qp->recv.fill++;
456 	return 0;
457 }
458 
459 /**
460  * Complete send work queue entry
461  *
462  * @v ibdev		Infiniband device
463  * @v qp		Queue pair
464  * @v iobuf		I/O buffer
465  * @v rc		Completion status code
466  */
ib_complete_send(struct ib_device * ibdev,struct ib_queue_pair * qp,struct io_buffer * iobuf,int rc)467 void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
468 			struct io_buffer *iobuf, int rc ) {
469 
470 	if ( qp->send.cq->op->complete_send ) {
471 		qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
472 	} else {
473 		free_iob ( iobuf );
474 	}
475 	qp->send.fill--;
476 }
477 
478 /**
479  * Complete receive work queue entry
480  *
481  * @v ibdev		Infiniband device
482  * @v qp		Queue pair
483  * @v av		Address vector
484  * @v iobuf		I/O buffer
485  * @v rc		Completion status code
486  */
ib_complete_recv(struct ib_device * ibdev,struct ib_queue_pair * qp,struct ib_address_vector * av,struct io_buffer * iobuf,int rc)487 void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
488 			struct ib_address_vector *av,
489 			struct io_buffer *iobuf, int rc ) {
490 
491 	if ( qp->recv.cq->op->complete_recv ) {
492 		qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc );
493 	} else {
494 		free_iob ( iobuf );
495 	}
496 	qp->recv.fill--;
497 }
498 
499 /**
500  * Refill receive work queue
501  *
502  * @v ibdev		Infiniband device
503  * @v qp		Queue pair
504  */
ib_refill_recv(struct ib_device * ibdev,struct ib_queue_pair * qp)505 void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
506 	struct io_buffer *iobuf;
507 	int rc;
508 
509 	/* Keep filling while unfilled entries remain */
510 	while ( qp->recv.fill < qp->recv.num_wqes ) {
511 
512 		/* Allocate I/O buffer */
513 		iobuf = alloc_iob ( IB_MAX_PAYLOAD_SIZE );
514 		if ( ! iobuf ) {
515 			/* Non-fatal; we will refill on next attempt */
516 			return;
517 		}
518 
519 		/* Post I/O buffer */
520 		if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
521 			DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
522 			       ibdev, strerror ( rc ) );
523 			free_iob ( iobuf );
524 			/* Give up */
525 			return;
526 		}
527 	}
528 }
529 
530 /***************************************************************************
531  *
532  * Link control
533  *
534  ***************************************************************************
535  */
536 
537 /**
538  * Open port
539  *
540  * @v ibdev		Infiniband device
541  * @ret rc		Return status code
542  */
ib_open(struct ib_device * ibdev)543 int ib_open ( struct ib_device *ibdev ) {
544 	int rc;
545 
546 	/* Increment device open request counter */
547 	if ( ibdev->open_count++ > 0 ) {
548 		/* Device was already open; do nothing */
549 		return 0;
550 	}
551 
552 	/* Create subnet management interface */
553 	ibdev->smi = ib_create_mi ( ibdev, IB_QPT_SMI );
554 	if ( ! ibdev->smi ) {
555 		DBGC ( ibdev, "IBDEV %p could not create SMI\n", ibdev );
556 		rc = -ENOMEM;
557 		goto err_create_smi;
558 	}
559 
560 	/* Create subnet management agent */
561 	if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
562 		DBGC ( ibdev, "IBDEV %p could not create SMA: %s\n",
563 		       ibdev, strerror ( rc ) );
564 		goto err_create_sma;
565 	}
566 
567 	/* Create general services interface */
568 	ibdev->gsi = ib_create_mi ( ibdev, IB_QPT_GSI );
569 	if ( ! ibdev->gsi ) {
570 		DBGC ( ibdev, "IBDEV %p could not create GSI\n", ibdev );
571 		rc = -ENOMEM;
572 		goto err_create_gsi;
573 	}
574 
575 	/* Open device */
576 	if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
577 		DBGC ( ibdev, "IBDEV %p could not open: %s\n",
578 		       ibdev, strerror ( rc ) );
579 		goto err_open;
580 	}
581 
582 	/* Add to head of open devices list */
583 	list_add ( &ibdev->open_list, &open_ib_devices );
584 
585 	assert ( ibdev->open_count == 1 );
586 	return 0;
587 
588 	ibdev->op->close ( ibdev );
589  err_open:
590 	ib_destroy_mi ( ibdev, ibdev->gsi );
591  err_create_gsi:
592 	ib_destroy_sma ( ibdev, ibdev->smi );
593  err_create_sma:
594 	ib_destroy_mi ( ibdev, ibdev->smi );
595  err_create_smi:
596 	assert ( ibdev->open_count == 1 );
597 	ibdev->open_count = 0;
598 	return rc;
599 }
600 
601 /**
602  * Close port
603  *
604  * @v ibdev		Infiniband device
605  */
ib_close(struct ib_device * ibdev)606 void ib_close ( struct ib_device *ibdev ) {
607 
608 	/* Decrement device open request counter */
609 	ibdev->open_count--;
610 
611 	/* Close device if this was the last remaining requested opening */
612 	if ( ibdev->open_count == 0 ) {
613 		list_del ( &ibdev->open_list );
614 		ib_destroy_mi ( ibdev, ibdev->gsi );
615 		ib_destroy_sma ( ibdev, ibdev->smi );
616 		ib_destroy_mi ( ibdev, ibdev->smi );
617 		ibdev->op->close ( ibdev );
618 	}
619 }
620 
621 /**
622  * Get link state
623  *
624  * @v ibdev		Infiniband device
625  * @ret rc		Link status code
626  */
ib_link_rc(struct ib_device * ibdev)627 int ib_link_rc ( struct ib_device *ibdev ) {
628 	switch ( ibdev->port_state ) {
629 	case IB_PORT_STATE_DOWN:	return -ENOTCONN;
630 	case IB_PORT_STATE_INIT:	return -EINPROGRESS_INIT;
631 	case IB_PORT_STATE_ARMED:	return -EINPROGRESS_ARMED;
632 	case IB_PORT_STATE_ACTIVE:	return 0;
633 	default:			return -EINVAL;
634 	}
635 }
636 
637 /***************************************************************************
638  *
639  * Multicast
640  *
641  ***************************************************************************
642  */
643 
644 /**
645  * Attach to multicast group
646  *
647  * @v ibdev		Infiniband device
648  * @v qp		Queue pair
649  * @v gid		Multicast GID
650  * @ret rc		Return status code
651  *
652  * Note that this function handles only the local device's attachment
653  * to the multicast GID; it does not issue the relevant MADs to join
654  * the multicast group on the subnet.
655  */
ib_mcast_attach(struct ib_device * ibdev,struct ib_queue_pair * qp,struct ib_gid * gid)656 int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
657 		      struct ib_gid *gid ) {
658 	struct ib_multicast_gid *mgid;
659 	int rc;
660 
661 	/* Add to software multicast GID list */
662 	mgid = zalloc ( sizeof ( *mgid ) );
663 	if ( ! mgid ) {
664 		rc = -ENOMEM;
665 		goto err_alloc_mgid;
666 	}
667 	memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
668 	list_add ( &mgid->list, &qp->mgids );
669 
670 	/* Add to hardware multicast GID list */
671 	if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
672 		goto err_dev_mcast_attach;
673 
674 	return 0;
675 
676  err_dev_mcast_attach:
677 	list_del ( &mgid->list );
678 	free ( mgid );
679  err_alloc_mgid:
680 	return rc;
681 }
682 
683 /**
684  * Detach from multicast group
685  *
686  * @v ibdev		Infiniband device
687  * @v qp		Queue pair
688  * @v gid		Multicast GID
689  */
ib_mcast_detach(struct ib_device * ibdev,struct ib_queue_pair * qp,struct ib_gid * gid)690 void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
691 		       struct ib_gid *gid ) {
692 	struct ib_multicast_gid *mgid;
693 
694 	/* Remove from hardware multicast GID list */
695 	ibdev->op->mcast_detach ( ibdev, qp, gid );
696 
697 	/* Remove from software multicast GID list */
698 	list_for_each_entry ( mgid, &qp->mgids, list ) {
699 		if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
700 			list_del ( &mgid->list );
701 			free ( mgid );
702 			break;
703 		}
704 	}
705 }
706 
707 /***************************************************************************
708  *
709  * Miscellaneous
710  *
711  ***************************************************************************
712  */
713 
714 /**
715  * Get Infiniband HCA information
716  *
717  * @v ibdev		Infiniband device
718  * @ret hca_guid	HCA GUID
719  * @ret num_ports	Number of ports
720  */
ib_get_hca_info(struct ib_device * ibdev,struct ib_gid_half * hca_guid)721 int ib_get_hca_info ( struct ib_device *ibdev,
722 		      struct ib_gid_half *hca_guid ) {
723 	struct ib_device *tmp;
724 	int num_ports = 0;
725 
726 	/* Search for IB devices with the same physical device to
727 	 * identify port count and a suitable Node GUID.
728 	 */
729 	for_each_ibdev ( tmp ) {
730 		if ( tmp->dev != ibdev->dev )
731 			continue;
732 		if ( num_ports == 0 ) {
733 			memcpy ( hca_guid, &tmp->gid.u.half[1],
734 				 sizeof ( *hca_guid ) );
735 		}
736 		num_ports++;
737 	}
738 	return num_ports;
739 }
740 
741 /**
742  * Set port information
743  *
744  * @v ibdev		Infiniband device
745  * @v mad		Set port information MAD
746  */
ib_set_port_info(struct ib_device * ibdev,union ib_mad * mad)747 int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
748 	int rc;
749 
750 	/* Adapters with embedded SMAs do not need to support this method */
751 	if ( ! ibdev->op->set_port_info ) {
752 		DBGC ( ibdev, "IBDEV %p does not support setting port "
753 		       "information\n", ibdev );
754 		return -ENOTSUP;
755 	}
756 
757 	if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
758 		DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
759 		       ibdev, strerror ( rc ) );
760 		return rc;
761 	}
762 
763 	return 0;
764 };
765 
766 /**
767  * Set partition key table
768  *
769  * @v ibdev		Infiniband device
770  * @v mad		Set partition key table MAD
771  */
ib_set_pkey_table(struct ib_device * ibdev,union ib_mad * mad)772 int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
773 	int rc;
774 
775 	/* Adapters with embedded SMAs do not need to support this method */
776 	if ( ! ibdev->op->set_pkey_table ) {
777 		DBGC ( ibdev, "IBDEV %p does not support setting partition "
778 		       "key table\n", ibdev );
779 		return -ENOTSUP;
780 	}
781 
782 	if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
783 		DBGC ( ibdev, "IBDEV %p could not set partition key table: "
784 		       "%s\n", ibdev, strerror ( rc ) );
785 		return rc;
786 	}
787 
788 	return 0;
789 };
790 
791 /***************************************************************************
792  *
793  * Event queues
794  *
795  ***************************************************************************
796  */
797 
798 /**
799  * Handle Infiniband link state change
800  *
801  * @v ibdev		Infiniband device
802  */
ib_link_state_changed(struct ib_device * ibdev)803 void ib_link_state_changed ( struct ib_device *ibdev ) {
804 
805 	/* Notify IPoIB of link state change */
806 	ipoib_link_state_changed ( ibdev );
807 }
808 
809 /**
810  * Poll event queue
811  *
812  * @v ibdev		Infiniband device
813  */
ib_poll_eq(struct ib_device * ibdev)814 void ib_poll_eq ( struct ib_device *ibdev ) {
815 	struct ib_completion_queue *cq;
816 
817 	/* Poll device's event queue */
818 	ibdev->op->poll_eq ( ibdev );
819 
820 	/* Poll all completion queues */
821 	list_for_each_entry ( cq, &ibdev->cqs, list )
822 		ib_poll_cq ( ibdev, cq );
823 }
824 
825 /**
826  * Single-step the Infiniband event queue
827  *
828  * @v process		Infiniband event queue process
829  */
ib_step(struct process * process __unused)830 static void ib_step ( struct process *process __unused ) {
831 	struct ib_device *ibdev;
832 
833 	for_each_ibdev ( ibdev )
834 		ib_poll_eq ( ibdev );
835 }
836 
837 /** Infiniband event queue process */
838 struct process ib_process __permanent_process = {
839 	.list = LIST_HEAD_INIT ( ib_process.list ),
840 	.step = ib_step,
841 };
842 
843 /***************************************************************************
844  *
845  * Infiniband device creation/destruction
846  *
847  ***************************************************************************
848  */
849 
850 /**
851  * Allocate Infiniband device
852  *
853  * @v priv_size		Size of driver private data area
854  * @ret ibdev		Infiniband device, or NULL
855  */
alloc_ibdev(size_t priv_size)856 struct ib_device * alloc_ibdev ( size_t priv_size ) {
857 	struct ib_device *ibdev;
858 	void *drv_priv;
859 	size_t total_len;
860 
861 	total_len = ( sizeof ( *ibdev ) + priv_size );
862 	ibdev = zalloc ( total_len );
863 	if ( ibdev ) {
864 		drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
865 		ib_set_drvdata ( ibdev, drv_priv );
866 		INIT_LIST_HEAD ( &ibdev->cqs );
867 		INIT_LIST_HEAD ( &ibdev->qps );
868 		ibdev->port_state = IB_PORT_STATE_DOWN;
869 		ibdev->lid = IB_LID_NONE;
870 		ibdev->pkey = IB_PKEY_DEFAULT;
871 	}
872 	return ibdev;
873 }
874 
875 /**
876  * Register Infiniband device
877  *
878  * @v ibdev		Infiniband device
879  * @ret rc		Return status code
880  */
register_ibdev(struct ib_device * ibdev)881 int register_ibdev ( struct ib_device *ibdev ) {
882 	int rc;
883 
884 	/* Add to device list */
885 	ibdev_get ( ibdev );
886 	list_add_tail ( &ibdev->list, &ib_devices );
887 
888 	/* Add IPoIB device */
889 	if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) {
890 		DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n",
891 		       ibdev, strerror ( rc ) );
892 		goto err_ipoib_probe;
893 	}
894 
895 	DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
896 	       ibdev->dev->name );
897 	return 0;
898 
899  err_ipoib_probe:
900 	list_del ( &ibdev->list );
901 	ibdev_put ( ibdev );
902 	return rc;
903 }
904 
905 /**
906  * Unregister Infiniband device
907  *
908  * @v ibdev		Infiniband device
909  */
unregister_ibdev(struct ib_device * ibdev)910 void unregister_ibdev ( struct ib_device *ibdev ) {
911 
912 	/* Close device */
913 	ipoib_remove ( ibdev );
914 
915 	/* Remove from device list */
916 	list_del ( &ibdev->list );
917 	ibdev_put ( ibdev );
918 	DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
919 }
920 
921 /**
922  * Find Infiniband device by GID
923  *
924  * @v gid		GID
925  * @ret ibdev		Infiniband device, or NULL
926  */
find_ibdev(struct ib_gid * gid)927 struct ib_device * find_ibdev ( struct ib_gid *gid ) {
928 	struct ib_device *ibdev;
929 
930 	for_each_ibdev ( ibdev ) {
931 		if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
932 			return ibdev;
933 	}
934 	return NULL;
935 }
936 
937 /**
938  * Get most recently opened Infiniband device
939  *
940  * @ret ibdev		Most recently opened Infiniband device, or NULL
941  */
last_opened_ibdev(void)942 struct ib_device * last_opened_ibdev ( void ) {
943 	struct ib_device *ibdev;
944 
945 	list_for_each_entry ( ibdev, &open_ib_devices, open_list ) {
946 		assert ( ibdev->open_count != 0 );
947 		return ibdev;
948 	}
949 
950 	return NULL;
951 }
952