1 #ifndef _GPXE_INFINIBAND_H
2 #define _GPXE_INFINIBAND_H
3 
4 /** @file
5  *
6  * Infiniband protocol
7  *
8  */
9 
10 FILE_LICENCE ( GPL2_OR_LATER );
11 
12 #include <stdint.h>
13 #include <gpxe/refcnt.h>
14 #include <gpxe/device.h>
15 #include <gpxe/ib_packet.h>
16 #include <gpxe/ib_mad.h>
17 
18 /** Subnet management interface QPN */
19 #define IB_QPN_SMI 0
20 
21 /** Subnet management interface queue key */
22 #define IB_QKEY_SMI 0
23 
24 /** General service interface QPN */
25 #define IB_QPN_GSI 1
26 
27 /** General service interface queue key */
28 #define IB_QKEY_GSI 0x80010000UL
29 
30 /** Broadcast QPN */
31 #define IB_QPN_BROADCAST 0xffffffUL
32 
33 /** QPN mask */
34 #define IB_QPN_MASK 0xffffffUL
35 
36 /** Default Infiniband partition key */
37 #define IB_PKEY_DEFAULT 0xffff
38 
39 /** Infiniband partition key full membership flag */
40 #define IB_PKEY_FULL 0x8000
41 
42 /**
43  * Maximum payload size
44  *
45  * This is currently hard-coded in various places (drivers, subnet
46  * management agent, etc.) to 2048.
47  */
48 #define IB_MAX_PAYLOAD_SIZE 2048
49 
50 struct ib_device;
51 struct ib_queue_pair;
52 struct ib_address_vector;
53 struct ib_completion_queue;
54 struct ib_mad_interface;
55 
56 /** Infiniband transmission rates */
57 enum ib_rate {
58 	IB_RATE_2_5 = 2,
59 	IB_RATE_10 = 3,
60 	IB_RATE_30 = 4,
61 	IB_RATE_5 = 5,
62 	IB_RATE_20 = 6,
63 	IB_RATE_40 = 7,
64 	IB_RATE_60 = 8,
65 	IB_RATE_80 = 9,
66 	IB_RATE_120 = 10,
67 };
68 
69 /** An Infiniband Address Vector */
70 struct ib_address_vector {
71 	/** Queue Pair Number */
72 	unsigned long qpn;
73 	/** Queue key
74 	 *
75 	 * Not specified for received packets.
76 	 */
77 	unsigned long qkey;
78 	/** Local ID */
79 	unsigned int lid;
80 	/** Rate
81 	 *
82 	 * Not specified for received packets.
83 	 */
84 	enum ib_rate rate;
85 	/** Service level */
86 	unsigned int sl;
87 	/** GID is present */
88 	unsigned int gid_present;
89 	/** GID, if present */
90 	struct ib_gid gid;
91 };
92 
93 /** An Infiniband Work Queue */
94 struct ib_work_queue {
95 	/** Containing queue pair */
96 	struct ib_queue_pair *qp;
97 	/** "Is a send queue" flag */
98 	int is_send;
99 	/** Associated completion queue */
100 	struct ib_completion_queue *cq;
101 	/** List of work queues on this completion queue */
102 	struct list_head list;
103 	/** Packet sequence number */
104 	uint32_t psn;
105 	/** Number of work queue entries */
106 	unsigned int num_wqes;
107 	/** Number of occupied work queue entries */
108 	unsigned int fill;
109 	/** Next work queue entry index
110 	 *
111 	 * This is the index of the next entry to be filled (i.e. the
112 	 * first empty entry).  This value is not bounded by num_wqes;
113 	 * users must logical-AND with (num_wqes-1) to generate an
114 	 * array index.
115 	 */
116 	unsigned long next_idx;
117 	/** I/O buffers assigned to work queue */
118 	struct io_buffer **iobufs;
119 	/** Driver private data */
120 	void *drv_priv;
121 };
122 
123 /** An Infiniband multicast GID */
124 struct ib_multicast_gid {
125 	/** List of multicast GIDs on this QP */
126 	struct list_head list;
127 	/** Multicast GID */
128 	struct ib_gid gid;
129 };
130 
131 /** An Infiniband queue pair type */
132 enum ib_queue_pair_type {
133 	IB_QPT_SMI,
134 	IB_QPT_GSI,
135 	IB_QPT_UD,
136 	IB_QPT_RC,
137 };
138 
139 /** An Infiniband Queue Pair */
140 struct ib_queue_pair {
141 	/** Containing Infiniband device */
142 	struct ib_device *ibdev;
143 	/** List of queue pairs on this Infiniband device */
144 	struct list_head list;
145 	/** Queue pair number */
146 	unsigned long qpn;
147 	/** Externally-visible queue pair number
148 	 *
149 	 * This may differ from the real queue pair number (e.g. when
150 	 * the HCA cannot use the management QPNs 0 and 1 as hardware
151 	 * QPNs and needs to remap them).
152 	 */
153 	unsigned long ext_qpn;
154 	/** Queue pair type */
155 	enum ib_queue_pair_type type;
156 	/** Queue key */
157 	unsigned long qkey;
158 	/** Send queue */
159 	struct ib_work_queue send;
160 	/** Receive queue */
161 	struct ib_work_queue recv;
162 	/** List of multicast GIDs */
163 	struct list_head mgids;
164 	/** Address vector */
165 	struct ib_address_vector av;
166 	/** Driver private data */
167 	void *drv_priv;
168 	/** Queue owner private data */
169 	void *owner_priv;
170 };
171 
172 /** Infiniband completion queue operations */
173 struct ib_completion_queue_operations {
174 	/**
175 	 * Complete Send WQE
176 	 *
177 	 * @v ibdev		Infiniband device
178 	 * @v qp		Queue pair
179 	 * @v iobuf		I/O buffer
180 	 * @v rc		Completion status code
181 	 */
182 	void ( * complete_send ) ( struct ib_device *ibdev,
183 				   struct ib_queue_pair *qp,
184 				   struct io_buffer *iobuf, int rc );
185 	/**
186 	 * Complete Receive WQE
187 	 *
188 	 * @v ibdev		Infiniband device
189 	 * @v qp		Queue pair
190 	 * @v av		Address vector, or NULL
191 	 * @v iobuf		I/O buffer
192 	 * @v rc		Completion status code
193 	 */
194 	void ( * complete_recv ) ( struct ib_device *ibdev,
195 				   struct ib_queue_pair *qp,
196 				   struct ib_address_vector *av,
197 				   struct io_buffer *iobuf, int rc );
198 };
199 
200 /** An Infiniband Completion Queue */
201 struct ib_completion_queue {
202 	/** Containing Infiniband device */
203 	struct ib_device *ibdev;
204 	/** List of completion queues on this Infiniband device */
205 	struct list_head list;
206 	/** Completion queue number */
207 	unsigned long cqn;
208 	/** Number of completion queue entries */
209 	unsigned int num_cqes;
210 	/** Next completion queue entry index
211 	 *
212 	 * This is the index of the next entry to be filled (i.e. the
213 	 * first empty entry).  This value is not bounded by num_wqes;
214 	 * users must logical-AND with (num_wqes-1) to generate an
215 	 * array index.
216 	 */
217 	unsigned long next_idx;
218 	/** List of work queues completing to this queue */
219 	struct list_head work_queues;
220 	/** Completion queue operations */
221 	struct ib_completion_queue_operations *op;
222 	/** Driver private data */
223 	void *drv_priv;
224 };
225 
226 /**
227  * Infiniband device operations
228  *
229  * These represent a subset of the Infiniband Verbs.
230  */
231 struct ib_device_operations {
232 	/** Create completion queue
233 	 *
234 	 * @v ibdev		Infiniband device
235 	 * @v cq		Completion queue
236 	 * @ret rc		Return status code
237 	 */
238 	int ( * create_cq ) ( struct ib_device *ibdev,
239 			      struct ib_completion_queue *cq );
240 	/** Destroy completion queue
241 	 *
242 	 * @v ibdev		Infiniband device
243 	 * @v cq		Completion queue
244 	 */
245 	void ( * destroy_cq ) ( struct ib_device *ibdev,
246 				struct ib_completion_queue *cq );
247 	/** Create queue pair
248 	 *
249 	 * @v ibdev		Infiniband device
250 	 * @v qp		Queue pair
251 	 * @ret rc		Return status code
252 	 */
253 	int ( * create_qp ) ( struct ib_device *ibdev,
254 			      struct ib_queue_pair *qp );
255 	/** Modify queue pair
256 	 *
257 	 * @v ibdev		Infiniband device
258 	 * @v qp		Queue pair
259 	 * @ret rc		Return status code
260 	 */
261 	int ( * modify_qp ) ( struct ib_device *ibdev,
262 			      struct ib_queue_pair *qp );
263 	/** Destroy queue pair
264 	 *
265 	 * @v ibdev		Infiniband device
266 	 * @v qp		Queue pair
267 	 */
268 	void ( * destroy_qp ) ( struct ib_device *ibdev,
269 				struct ib_queue_pair *qp );
270 	/** Post send work queue entry
271 	 *
272 	 * @v ibdev		Infiniband device
273 	 * @v qp		Queue pair
274 	 * @v av		Address vector
275 	 * @v iobuf		I/O buffer
276 	 * @ret rc		Return status code
277 	 *
278 	 * If this method returns success, the I/O buffer remains
279 	 * owned by the queue pair.  If this method returns failure,
280 	 * the I/O buffer is immediately released; the failure is
281 	 * interpreted as "failure to enqueue buffer".
282 	 */
283 	int ( * post_send ) ( struct ib_device *ibdev,
284 			      struct ib_queue_pair *qp,
285 			      struct ib_address_vector *av,
286 			      struct io_buffer *iobuf );
287 	/** Post receive work queue entry
288 	 *
289 	 * @v ibdev		Infiniband device
290 	 * @v qp		Queue pair
291 	 * @v iobuf		I/O buffer
292 	 * @ret rc		Return status code
293 	 *
294 	 * If this method returns success, the I/O buffer remains
295 	 * owned by the queue pair.  If this method returns failure,
296 	 * the I/O buffer is immediately released; the failure is
297 	 * interpreted as "failure to enqueue buffer".
298 	 */
299 	int ( * post_recv ) ( struct ib_device *ibdev,
300 			      struct ib_queue_pair *qp,
301 			      struct io_buffer *iobuf );
302 	/** Poll completion queue
303 	 *
304 	 * @v ibdev		Infiniband device
305 	 * @v cq		Completion queue
306 	 *
307 	 * The relevant completion handler (specified at completion
308 	 * queue creation time) takes ownership of the I/O buffer.
309 	 */
310 	void ( * poll_cq ) ( struct ib_device *ibdev,
311 			     struct ib_completion_queue *cq );
312 	/**
313 	 * Poll event queue
314 	 *
315 	 * @v ibdev		Infiniband device
316 	 */
317 	void ( * poll_eq ) ( struct ib_device *ibdev );
318 	/**
319 	 * Open port
320 	 *
321 	 * @v ibdev		Infiniband device
322 	 * @ret rc		Return status code
323 	 */
324 	int ( * open ) ( struct ib_device *ibdev );
325 	/**
326 	 * Close port
327 	 *
328 	 * @v ibdev		Infiniband device
329 	 */
330 	void ( * close ) ( struct ib_device *ibdev );
331 	/** Attach to multicast group
332 	 *
333 	 * @v ibdev		Infiniband device
334 	 * @v qp		Queue pair
335 	 * @v gid		Multicast GID
336 	 * @ret rc		Return status code
337 	 */
338 	int ( * mcast_attach ) ( struct ib_device *ibdev,
339 				 struct ib_queue_pair *qp,
340 				 struct ib_gid *gid );
341 	/** Detach from multicast group
342 	 *
343 	 * @v ibdev		Infiniband device
344 	 * @v qp		Queue pair
345 	 * @v gid		Multicast GID
346 	 */
347 	void ( * mcast_detach ) ( struct ib_device *ibdev,
348 				  struct ib_queue_pair *qp,
349 				  struct ib_gid *gid );
350 	/** Set port information
351 	 *
352 	 * @v ibdev		Infiniband device
353 	 * @v mad		Set port information MAD
354 	 *
355 	 * This method is required only by adapters that do not have
356 	 * an embedded SMA.
357 	 */
358 	int ( * set_port_info ) ( struct ib_device *ibdev, union ib_mad *mad );
359 	/** Set partition key table
360 	 *
361 	 * @v ibdev		Infiniband device
362 	 * @v mad		Set partition key table MAD
363 	 *
364 	 * This method is required only by adapters that do not have
365 	 * an embedded SMA.
366 	 */
367 	int ( * set_pkey_table ) ( struct ib_device *ibdev,
368 				   union ib_mad *mad );
369 };
370 
371 /** An Infiniband device */
372 struct ib_device {
373 	/** Reference counter */
374 	struct refcnt refcnt;
375 	/** List of Infiniband devices */
376 	struct list_head list;
377 	/** List of open Infiniband devices */
378 	struct list_head open_list;
379 	/** Underlying device */
380 	struct device *dev;
381 	/** List of completion queues */
382 	struct list_head cqs;
383 	/** List of queue pairs */
384 	struct list_head qps;
385 	/** Infiniband operations */
386 	struct ib_device_operations *op;
387 	/** Port number */
388 	unsigned int port;
389 	/** Port open request counter */
390 	unsigned int open_count;
391 
392 	/** Port state */
393 	uint8_t port_state;
394 	/** Link width supported */
395 	uint8_t link_width_supported;
396 	/** Link width enabled */
397 	uint8_t link_width_enabled;
398 	/** Link width active */
399 	uint8_t link_width_active;
400 	/** Link speed supported */
401 	uint8_t link_speed_supported;
402 	/** Link speed enabled */
403 	uint8_t link_speed_enabled;
404 	/** Link speed active */
405 	uint8_t link_speed_active;
406 	/** Port GID */
407 	struct ib_gid gid;
408 	/** Port LID */
409 	uint16_t lid;
410 	/** Subnet manager LID */
411 	uint16_t sm_lid;
412 	/** Subnet manager SL */
413 	uint8_t sm_sl;
414 	/** Partition key */
415 	uint16_t pkey;
416 
417 	/** RDMA key
418 	 *
419 	 * This is a single key allowing unrestricted access to
420 	 * memory.
421 	 */
422 	uint32_t rdma_key;
423 
424 	/** Subnet management interface */
425 	struct ib_mad_interface *smi;
426 	/** General services interface */
427 	struct ib_mad_interface *gsi;
428 
429 	/** Driver private data */
430 	void *drv_priv;
431 	/** Owner private data */
432 	void *owner_priv;
433 };
434 
435 extern struct ib_completion_queue *
436 ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
437 	       struct ib_completion_queue_operations *op );
438 extern void ib_destroy_cq ( struct ib_device *ibdev,
439 			    struct ib_completion_queue *cq );
440 extern void ib_poll_cq ( struct ib_device *ibdev,
441 			 struct ib_completion_queue *cq );
442 extern struct ib_queue_pair *
443 ib_create_qp ( struct ib_device *ibdev, enum ib_queue_pair_type type,
444 	       unsigned int num_send_wqes, struct ib_completion_queue *send_cq,
445 	       unsigned int num_recv_wqes,
446 	       struct ib_completion_queue *recv_cq );
447 extern int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp );
448 extern void ib_destroy_qp ( struct ib_device *ibdev,
449 			    struct ib_queue_pair *qp );
450 extern struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
451 					       unsigned long qpn );
452 extern struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
453 						struct ib_gid *gid );
454 extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
455 					   unsigned long qpn, int is_send );
456 extern int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
457 			  struct ib_address_vector *av,
458 			  struct io_buffer *iobuf );
459 extern int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
460 			  struct io_buffer *iobuf );
461 extern void ib_complete_send ( struct ib_device *ibdev,
462 			       struct ib_queue_pair *qp,
463 			       struct io_buffer *iobuf, int rc );
464 extern void ib_complete_recv ( struct ib_device *ibdev,
465 			       struct ib_queue_pair *qp,
466 			       struct ib_address_vector *av,
467 			       struct io_buffer *iobuf, int rc );
468 extern void ib_refill_recv ( struct ib_device *ibdev,
469 			     struct ib_queue_pair *qp );
470 extern int ib_open ( struct ib_device *ibdev );
471 extern void ib_close ( struct ib_device *ibdev );
472 extern int ib_link_rc ( struct ib_device *ibdev );
473 extern int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
474 			     struct ib_gid *gid );
475 extern void ib_mcast_detach ( struct ib_device *ibdev,
476 			      struct ib_queue_pair *qp, struct ib_gid *gid );
477 extern int ib_get_hca_info ( struct ib_device *ibdev,
478 			     struct ib_gid_half *hca_guid );
479 extern int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad );
480 extern int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad );
481 extern struct ib_device * alloc_ibdev ( size_t priv_size );
482 extern int register_ibdev ( struct ib_device *ibdev );
483 extern void unregister_ibdev ( struct ib_device *ibdev );
484 extern struct ib_device * find_ibdev ( struct ib_gid *gid );
485 extern struct ib_device * last_opened_ibdev ( void );
486 extern void ib_link_state_changed ( struct ib_device *ibdev );
487 extern void ib_poll_eq ( struct ib_device *ibdev );
488 extern struct list_head ib_devices;
489 
490 /** Iterate over all network devices */
491 #define for_each_ibdev( ibdev ) \
492 	list_for_each_entry ( (ibdev), &ib_devices, list )
493 
494 /**
495  * Check link state
496  *
497  * @v ibdev		Infiniband device
498  * @ret link_up		Link is up
499  */
500 static inline __always_inline int
ib_link_ok(struct ib_device * ibdev)501 ib_link_ok ( struct ib_device *ibdev ) {
502 	return ( ibdev->port_state == IB_PORT_STATE_ACTIVE );
503 }
504 
505 /**
506  * Get reference to Infiniband device
507  *
508  * @v ibdev		Infiniband device
509  * @ret ibdev		Infiniband device
510  */
511 static inline __always_inline struct ib_device *
ibdev_get(struct ib_device * ibdev)512 ibdev_get ( struct ib_device *ibdev ) {
513 	ref_get ( &ibdev->refcnt );
514 	return ibdev;
515 }
516 
517 /**
518  * Drop reference to Infiniband device
519  *
520  * @v ibdev		Infiniband device
521  */
522 static inline __always_inline void
ibdev_put(struct ib_device * ibdev)523 ibdev_put ( struct ib_device *ibdev ) {
524 	ref_put ( &ibdev->refcnt );
525 }
526 
527 /**
528  * Set Infiniband work queue driver-private data
529  *
530  * @v wq		Work queue
531  * @v priv		Private data
532  */
533 static inline __always_inline void
ib_wq_set_drvdata(struct ib_work_queue * wq,void * priv)534 ib_wq_set_drvdata ( struct ib_work_queue *wq, void *priv ) {
535 	wq->drv_priv = priv;
536 }
537 
538 /**
539  * Get Infiniband work queue driver-private data
540  *
541  * @v wq		Work queue
542  * @ret priv		Private data
543  */
544 static inline __always_inline void *
ib_wq_get_drvdata(struct ib_work_queue * wq)545 ib_wq_get_drvdata ( struct ib_work_queue *wq ) {
546 	return wq->drv_priv;
547 }
548 
549 /**
550  * Set Infiniband queue pair driver-private data
551  *
552  * @v qp		Queue pair
553  * @v priv		Private data
554  */
555 static inline __always_inline void
ib_qp_set_drvdata(struct ib_queue_pair * qp,void * priv)556 ib_qp_set_drvdata ( struct ib_queue_pair *qp, void *priv ) {
557 	qp->drv_priv = priv;
558 }
559 
560 /**
561  * Get Infiniband queue pair driver-private data
562  *
563  * @v qp		Queue pair
564  * @ret priv		Private data
565  */
566 static inline __always_inline void *
ib_qp_get_drvdata(struct ib_queue_pair * qp)567 ib_qp_get_drvdata ( struct ib_queue_pair *qp ) {
568 	return qp->drv_priv;
569 }
570 
571 /**
572  * Set Infiniband queue pair owner-private data
573  *
574  * @v qp		Queue pair
575  * @v priv		Private data
576  */
577 static inline __always_inline void
ib_qp_set_ownerdata(struct ib_queue_pair * qp,void * priv)578 ib_qp_set_ownerdata ( struct ib_queue_pair *qp, void *priv ) {
579 	qp->owner_priv = priv;
580 }
581 
582 /**
583  * Get Infiniband queue pair owner-private data
584  *
585  * @v qp		Queue pair
586  * @ret priv		Private data
587  */
588 static inline __always_inline void *
ib_qp_get_ownerdata(struct ib_queue_pair * qp)589 ib_qp_get_ownerdata ( struct ib_queue_pair *qp ) {
590 	return qp->owner_priv;
591 }
592 
593 /**
594  * Set Infiniband completion queue driver-private data
595  *
596  * @v cq		Completion queue
597  * @v priv		Private data
598  */
599 static inline __always_inline void
ib_cq_set_drvdata(struct ib_completion_queue * cq,void * priv)600 ib_cq_set_drvdata ( struct ib_completion_queue *cq, void *priv ) {
601 	cq->drv_priv = priv;
602 }
603 
604 /**
605  * Get Infiniband completion queue driver-private data
606  *
607  * @v cq		Completion queue
608  * @ret priv		Private data
609  */
610 static inline __always_inline void *
ib_cq_get_drvdata(struct ib_completion_queue * cq)611 ib_cq_get_drvdata ( struct ib_completion_queue *cq ) {
612 	return cq->drv_priv;
613 }
614 
615 /**
616  * Set Infiniband device driver-private data
617  *
618  * @v ibdev		Infiniband device
619  * @v priv		Private data
620  */
621 static inline __always_inline void
ib_set_drvdata(struct ib_device * ibdev,void * priv)622 ib_set_drvdata ( struct ib_device *ibdev, void *priv ) {
623 	ibdev->drv_priv = priv;
624 }
625 
626 /**
627  * Get Infiniband device driver-private data
628  *
629  * @v ibdev		Infiniband device
630  * @ret priv		Private data
631  */
632 static inline __always_inline void *
ib_get_drvdata(struct ib_device * ibdev)633 ib_get_drvdata ( struct ib_device *ibdev ) {
634 	return ibdev->drv_priv;
635 }
636 
637 /**
638  * Set Infiniband device owner-private data
639  *
640  * @v ibdev		Infiniband device
641  * @v priv		Private data
642  */
643 static inline __always_inline void
ib_set_ownerdata(struct ib_device * ibdev,void * priv)644 ib_set_ownerdata ( struct ib_device *ibdev, void *priv ) {
645 	ibdev->owner_priv = priv;
646 }
647 
648 /**
649  * Get Infiniband device owner-private data
650  *
651  * @v ibdev		Infiniband device
652  * @ret priv		Private data
653  */
654 static inline __always_inline void *
ib_get_ownerdata(struct ib_device * ibdev)655 ib_get_ownerdata ( struct ib_device *ibdev ) {
656 	return ibdev->owner_priv;
657 }
658 
659 #endif /* _GPXE_INFINIBAND_H */
660