1 /* SPDX-License-Identifier: LGPL-2.1-only */
2 /*
3  * lib/route/tc.c		Traffic Control
4  *
5  *	This library is free software; you can redistribute it and/or
6  *	modify it under the terms of the GNU Lesser General Public
7  *	License as published by the Free Software Foundation version 2.1
8  *	of the License.
9  *
10  * Copyright (c) 2003-2011 Thomas Graf <tgraf@suug.ch>
11  */
12 
13 /**
14  * @ingroup rtnl
15  * @defgroup tc Traffic Control
16  * @{
17  */
18 
19 #include <netlink-private/netlink.h>
20 #include <netlink-private/tc.h>
21 #include <netlink/netlink.h>
22 #include <netlink/utils.h>
23 #include <netlink/route/rtnl.h>
24 #include <netlink/route/link.h>
25 #include <netlink/route/tc.h>
26 #include <netlink-private/route/tc-api.h>
27 
28 #include "netlink-private/utils.h"
29 
30 /** @cond SKIP */
31 
32 static struct nl_list_head tc_ops_list[__RTNL_TC_TYPE_MAX];
33 static struct rtnl_tc_type_ops *tc_type_ops[__RTNL_TC_TYPE_MAX];
34 
35 static struct nla_policy tc_policy[TCA_MAX+1] = {
36 	[TCA_KIND]	= { .type = NLA_STRING,
37 			    .maxlen = TCKINDSIZ },
38 	[TCA_CHAIN]	= { .type = NLA_U32 },
39 	[TCA_STATS]	= { .minlen = sizeof(struct tc_stats) },
40 	[TCA_STATS2]	= { .type = NLA_NESTED },
41 };
42 
tca_parse(struct nlattr ** tb,int maxattr,struct rtnl_tc * g,const struct nla_policy * policy)43 int tca_parse(struct nlattr **tb, int maxattr, struct rtnl_tc *g,
44 	      const struct nla_policy *policy)
45 {
46 
47 	if (g->ce_mask & TCA_ATTR_OPTS)
48 		return nla_parse(tb, maxattr,
49 				 (struct nlattr *) g->tc_opts->d_data,
50 				 g->tc_opts->d_size, policy);
51 	else {
52 		/* Ugly but tb[] must be in a defined state even if no
53 		 * attributes can be found. */
54 		memset(tb, 0, sizeof(struct nlattr *) * (maxattr + 1));
55 		return 0;
56 	}
57 }
58 
59 static struct nla_policy tc_stats2_policy[TCA_STATS_MAX+1] = {
60 	[TCA_STATS_BASIC]    = { .minlen = sizeof(struct gnet_stats_basic) },
61 	[TCA_STATS_RATE_EST] = { .minlen = sizeof(struct gnet_stats_rate_est) },
62 	[TCA_STATS_QUEUE]    = { .minlen = sizeof(struct gnet_stats_queue) },
63 };
64 
rtnl_tc_msg_parse(struct nlmsghdr * n,struct rtnl_tc * tc)65 int rtnl_tc_msg_parse(struct nlmsghdr *n, struct rtnl_tc *tc)
66 {
67 	struct nl_cache *link_cache;
68 	struct rtnl_tc_ops *ops;
69 	struct nlattr *tb[TCA_MAX + 1];
70 	char kind[TCKINDSIZ];
71 	struct tcmsg *tm;
72 	int err;
73 
74 	tc->ce_msgtype = n->nlmsg_type;
75 
76 	err = nlmsg_parse(n, sizeof(*tm), tb, TCA_MAX, tc_policy);
77 	if (err < 0)
78 		return err;
79 
80 	if (tb[TCA_KIND] == NULL)
81 		return -NLE_MISSING_ATTR;
82 
83 	nla_strlcpy(kind, tb[TCA_KIND], sizeof(kind));
84 	rtnl_tc_set_kind(tc, kind);
85 
86 	if (tb[TCA_CHAIN])
87 	        rtnl_tc_set_chain(tc, nla_get_u32(tb[TCA_CHAIN]));
88 
89 	tm = nlmsg_data(n);
90 	tc->tc_family  = tm->tcm_family;
91 	tc->tc_ifindex = tm->tcm_ifindex;
92 	tc->tc_handle  = tm->tcm_handle;
93 	tc->tc_parent  = tm->tcm_parent;
94 	tc->tc_info    = tm->tcm_info;
95 
96 	tc->ce_mask |= (TCA_ATTR_FAMILY | TCA_ATTR_IFINDEX | TCA_ATTR_HANDLE|
97 		        TCA_ATTR_PARENT | TCA_ATTR_INFO);
98 
99 	if (tb[TCA_OPTIONS]) {
100 		tc->tc_opts = nl_data_alloc_attr(tb[TCA_OPTIONS]);
101 		if (!tc->tc_opts)
102 			return -NLE_NOMEM;
103 		tc->ce_mask |= TCA_ATTR_OPTS;
104 	}
105 
106 	if (tb[TCA_STATS2]) {
107 		struct nlattr *tbs[TCA_STATS_MAX + 1];
108 
109 		err = nla_parse_nested(tbs, TCA_STATS_MAX, tb[TCA_STATS2],
110 				       tc_stats2_policy);
111 		if (err < 0)
112 			return err;
113 
114 		if (tbs[TCA_STATS_BASIC]) {
115 			struct gnet_stats_basic *bs;
116 
117 			bs = nla_data(tbs[TCA_STATS_BASIC]);
118 			tc->tc_stats[RTNL_TC_BYTES]	= bs->bytes;
119 			tc->tc_stats[RTNL_TC_PACKETS]	= bs->packets;
120 		}
121 
122 		if (tbs[TCA_STATS_RATE_EST]) {
123 			struct gnet_stats_rate_est *re;
124 
125 			re = nla_data(tbs[TCA_STATS_RATE_EST]);
126 			tc->tc_stats[RTNL_TC_RATE_BPS]	= re->bps;
127 			tc->tc_stats[RTNL_TC_RATE_PPS]	= re->pps;
128 		}
129 
130 		if (tbs[TCA_STATS_QUEUE]) {
131 			struct gnet_stats_queue *q;
132 
133 			q = nla_data(tbs[TCA_STATS_QUEUE]);
134 			tc->tc_stats[RTNL_TC_QLEN]	= q->qlen;
135 			tc->tc_stats[RTNL_TC_BACKLOG]	= q->backlog;
136 			tc->tc_stats[RTNL_TC_DROPS]	= q->drops;
137 			tc->tc_stats[RTNL_TC_REQUEUES]	= q->requeues;
138 			tc->tc_stats[RTNL_TC_OVERLIMITS]	= q->overlimits;
139 		}
140 
141 		tc->ce_mask |= TCA_ATTR_STATS;
142 
143 		if (tbs[TCA_STATS_APP]) {
144 			tc->tc_xstats = nl_data_alloc_attr(tbs[TCA_STATS_APP]);
145 			if (tc->tc_xstats == NULL)
146 				return -NLE_NOMEM;
147 			tc->ce_mask |= TCA_ATTR_XSTATS;
148 		} else
149 			goto compat_xstats;
150 	} else {
151 		if (tb[TCA_STATS]) {
152 			struct tc_stats *st = nla_data(tb[TCA_STATS]);
153 
154 			tc->tc_stats[RTNL_TC_BYTES]	= st->bytes;
155 			tc->tc_stats[RTNL_TC_PACKETS]	= st->packets;
156 			tc->tc_stats[RTNL_TC_RATE_BPS]	= st->bps;
157 			tc->tc_stats[RTNL_TC_RATE_PPS]	= st->pps;
158 			tc->tc_stats[RTNL_TC_QLEN]	= st->qlen;
159 			tc->tc_stats[RTNL_TC_BACKLOG]	= st->backlog;
160 			tc->tc_stats[RTNL_TC_DROPS]	= st->drops;
161 			tc->tc_stats[RTNL_TC_OVERLIMITS]= st->overlimits;
162 
163 			tc->ce_mask |= TCA_ATTR_STATS;
164 		}
165 
166 compat_xstats:
167 		if (tb[TCA_XSTATS]) {
168 			tc->tc_xstats = nl_data_alloc_attr(tb[TCA_XSTATS]);
169 			if (tc->tc_xstats == NULL)
170 				return -NLE_NOMEM;
171 			tc->ce_mask |= TCA_ATTR_XSTATS;
172 		}
173 	}
174 
175 	ops = rtnl_tc_get_ops(tc);
176 	if (ops && ops->to_msg_parser) {
177 		void *data = rtnl_tc_data(tc);
178 
179 		if (!data)
180 			return -NLE_NOMEM;
181 
182 		err = ops->to_msg_parser(tc, data);
183 		if (err < 0)
184 			return err;
185 	}
186 
187 	if ((link_cache = __nl_cache_mngt_require("route/link"))) {
188 		struct rtnl_link *link;
189 
190 		if ((link = rtnl_link_get(link_cache, tc->tc_ifindex))) {
191 			rtnl_tc_set_link(tc, link);
192 
193 			/* rtnl_tc_set_link incs refcnt */
194 			rtnl_link_put(link);
195 		}
196 	}
197 
198 	return 0;
199 }
200 
rtnl_tc_msg_build(struct rtnl_tc * tc,int type,int flags,struct nl_msg ** result)201 int rtnl_tc_msg_build(struct rtnl_tc *tc, int type, int flags,
202 		      struct nl_msg **result)
203 {
204 	struct nl_msg *msg;
205 	struct rtnl_tc_ops *ops;
206 	struct tcmsg tchdr = {
207 		.tcm_family = AF_UNSPEC,
208 		.tcm_ifindex = tc->tc_ifindex,
209 		.tcm_handle = tc->tc_handle,
210 		.tcm_parent = tc->tc_parent,
211 	};
212 	int err;
213 
214 	msg = nlmsg_alloc_simple(type, flags);
215 	if (!msg)
216 		return -NLE_NOMEM;
217 
218 	if (nlmsg_append(msg, &tchdr, sizeof(tchdr), NLMSG_ALIGNTO) < 0) {
219 		err = -NLE_MSGSIZE;
220 		goto out_err;
221 	}
222 
223 	if (tc->ce_mask & TCA_ATTR_KIND)
224 		NLA_PUT_STRING(msg, TCA_KIND, tc->tc_kind);
225 
226 	if (tc->ce_mask & TCA_ATTR_CHAIN)
227 	        NLA_PUT_U32(msg, TCA_CHAIN, tc->tc_chain);
228 
229 	ops = rtnl_tc_get_ops(tc);
230 	if (ops && (ops->to_msg_fill || ops->to_msg_fill_raw)) {
231 		struct nlattr *opts;
232 		void *data = rtnl_tc_data(tc);
233 
234 		if (ops->to_msg_fill) {
235 			if (!(opts = nla_nest_start(msg, TCA_OPTIONS))) {
236 				err = -NLE_NOMEM;
237 				goto out_err;
238 			}
239 
240 			if ((err = ops->to_msg_fill(tc, data, msg)) < 0)
241 				goto out_err;
242 
243 			if (strcmp("cgroup", tc->tc_kind))
244 				nla_nest_end(msg, opts);
245 			else
246 				nla_nest_end_keep_empty(msg, opts);
247 		} else if ((err = ops->to_msg_fill_raw(tc, data, msg)) < 0)
248 			goto out_err;
249 	}
250 
251 	*result = msg;
252 	return 0;
253 
254 nla_put_failure:
255 	err = -NLE_NOMEM;
256 out_err:
257 	nlmsg_free(msg);
258 	return err;
259 }
260 
261 
262 /** @endcond */
263 
264 /**
265  * @name Attributes
266  * @{
267  */
268 
269 /**
270  * Set interface index of traffic control object
271  * @arg tc		traffic control object
272  * @arg ifindex		interface index.
273  *
274  * Sets the interface index of a traffic control object. The interface
275  * index defines the network device which this tc object is attached to.
276  * This function will overwrite any network device assigned with previous
277  * calls to rtnl_tc_set_ifindex() or rtnl_tc_set_link().
278  */
rtnl_tc_set_ifindex(struct rtnl_tc * tc,int ifindex)279 void rtnl_tc_set_ifindex(struct rtnl_tc *tc, int ifindex)
280 {
281 	/* Obsolete possible old link reference */
282 	rtnl_link_put(tc->tc_link);
283 	tc->tc_link = NULL;
284 	tc->ce_mask &= ~TCA_ATTR_LINK;
285 
286 	tc->tc_ifindex = ifindex;
287 	tc->ce_mask |= TCA_ATTR_IFINDEX;
288 }
289 
290 /**
291  * Return interface index of traffic control object
292  * @arg tc		traffic control object
293  */
rtnl_tc_get_ifindex(struct rtnl_tc * tc)294 int rtnl_tc_get_ifindex(struct rtnl_tc *tc)
295 {
296 	return tc->tc_ifindex;
297 }
298 
299 /**
300  * Set link of traffic control object
301  * @arg tc		traffic control object
302  * @arg link		link object
303  *
304  * Sets the link of a traffic control object. This function serves
305  * the same purpose as rtnl_tc_set_ifindex() but due to the continued
306  * allowed access to the link object it gives it the possibility to
307  * retrieve sane default values for the the MTU and the linktype.
308  * Always prefer this function over rtnl_tc_set_ifindex() if you can
309  * spare to have an additional link object around.
310  */
rtnl_tc_set_link(struct rtnl_tc * tc,struct rtnl_link * link)311 void rtnl_tc_set_link(struct rtnl_tc *tc, struct rtnl_link *link)
312 {
313 	rtnl_link_put(tc->tc_link);
314 
315 	if (!link)
316 		return;
317 	if (!link->l_index)
318 		BUG();
319 
320 	nl_object_get(OBJ_CAST(link));
321 	tc->tc_link = link;
322 	tc->tc_ifindex = link->l_index;
323 	tc->ce_mask |= TCA_ATTR_LINK | TCA_ATTR_IFINDEX;
324 }
325 
326 /**
327  * Get link of traffic control object
328  * @arg tc		traffic control object
329  *
330  * Returns the link of a traffic control object. The link is only
331  * returned if it has been set before via rtnl_tc_set_link() or
332  * if a link cache was available while parsing the tc object. This
333  * function may still return NULL even if an ifindex is assigned to
334  * the tc object. It will _not_ look up the link by itself.
335  *
336  * @note The returned link will have its reference counter incremented.
337  *       It is in the responsibility of the caller to return the
338  *       reference.
339  *
340  * @return link object or NULL if not set.
341  */
rtnl_tc_get_link(struct rtnl_tc * tc)342 struct rtnl_link *rtnl_tc_get_link(struct rtnl_tc *tc)
343 {
344 	if (tc->tc_link) {
345 		nl_object_get(OBJ_CAST(tc->tc_link));
346 		return tc->tc_link;
347 	}
348 
349 	return NULL;
350 }
351 
352 /**
353  * Set the Maximum Transmission Unit (MTU) of traffic control object
354  * @arg tc		traffic control object
355  * @arg mtu		largest packet size expected
356  *
357  * Sets the MTU of a traffic control object. Not all traffic control
358  * objects will make use of this but it helps while calculating rate
359  * tables. This value is typically derived directly from the link
360  * the tc object is attached to if the link has been assigned via
361  * rtnl_tc_set_link(). It is usually not necessary to set the MTU
362  * manually, this function is provided to allow overwriting the derived
363  * value.
364  */
rtnl_tc_set_mtu(struct rtnl_tc * tc,uint32_t mtu)365 void rtnl_tc_set_mtu(struct rtnl_tc *tc, uint32_t mtu)
366 {
367 	tc->tc_mtu = mtu;
368 	tc->ce_mask |= TCA_ATTR_MTU;
369 }
370 
371 /**
372  * Return the MTU of traffic control object
373  * @arg tc		traffic control object
374  *
375  * Returns the MTU of a traffic control object which has been set via:
376  * -# User specified value set via rtnl_tc_set_mtu()
377  * -# Dervied from link set via rtnl_tc_set_link()
378  * -# Fall back to default: ethernet = 1500
379  */
rtnl_tc_get_mtu(struct rtnl_tc * tc)380 uint32_t rtnl_tc_get_mtu(struct rtnl_tc *tc)
381 {
382 	if (tc->ce_mask & TCA_ATTR_MTU)
383 		return tc->tc_mtu;
384 	else if (tc->ce_mask & TCA_ATTR_LINK)
385 		return tc->tc_link->l_mtu;
386 	else
387 		return 1500; /* default to ethernet */
388 }
389 
390 /**
391  * Set the Minimum Packet Unit (MPU) of a traffic control object
392  * @arg tc		traffic control object
393  * @arg mpu		minimum packet size expected
394  *
395  * Sets the MPU of a traffic contorl object. It specifies the minimum
396  * packet size to ever hit this traffic control object. Not all traffic
397  * control objects will make use of this but it helps while calculating
398  * rate tables.
399  */
rtnl_tc_set_mpu(struct rtnl_tc * tc,uint32_t mpu)400 void rtnl_tc_set_mpu(struct rtnl_tc *tc, uint32_t mpu)
401 {
402 	tc->tc_mpu = mpu;
403 	tc->ce_mask |= TCA_ATTR_MPU;
404 }
405 
406 /**
407  * Return the Minimum Packet Unit (MPU) of a traffic control object
408  * @arg tc		traffic control object
409  *
410  * @return The MPU previously set via rtnl_tc_set_mpu() or 0.
411  */
rtnl_tc_get_mpu(struct rtnl_tc * tc)412 uint32_t rtnl_tc_get_mpu(struct rtnl_tc *tc)
413 {
414 	return tc->tc_mpu;
415 }
416 
417 /**
418  * Set per packet overhead of a traffic control object
419  * @arg tc		traffic control object
420  * @arg overhead	overhead per packet in bytes
421  *
422  * Sets the per packet overhead in bytes occuring on the link not seen
423  * by the kernel. This value can be used to correct size calculations
424  * if the packet size on the wire does not match the packet sizes seen
425  * in the network stack. Not all traffic control objects will make use
426  * this but it helps while calculating accurate packet sizes in the
427  * kernel.
428  */
rtnl_tc_set_overhead(struct rtnl_tc * tc,uint32_t overhead)429 void rtnl_tc_set_overhead(struct rtnl_tc *tc, uint32_t overhead)
430 {
431 	tc->tc_overhead = overhead;
432 	tc->ce_mask |= TCA_ATTR_OVERHEAD;
433 }
434 
435 /**
436  * Return per packet overhead of a traffic control object
437  * @arg tc		traffic control object
438  *
439  * @return The overhead previously set by rtnl_tc_set_overhead() or 0.
440  */
rtnl_tc_get_overhead(struct rtnl_tc * tc)441 uint32_t rtnl_tc_get_overhead(struct rtnl_tc *tc)
442 {
443 	return tc->tc_overhead;
444 }
445 
446 /**
447  * Set the linktype of a traffic control object
448  * @arg tc		traffic control object
449  * @arg type		type of link (e.g. ARPHRD_ATM, ARPHRD_ETHER)
450  *
451  * Overwrites the type of link this traffic control object is attached to.
452  * This value is typically derived from the link this tc object is attached
453  * if the link has been assigned via rtnl_tc_set_link(). It is usually not
454  * necessary to set the linktype manually. This function is provided to
455  * allow overwriting the linktype.
456  */
rtnl_tc_set_linktype(struct rtnl_tc * tc,uint32_t type)457 void rtnl_tc_set_linktype(struct rtnl_tc *tc, uint32_t type)
458 {
459 	tc->tc_linktype = type;
460 	tc->ce_mask |= TCA_ATTR_LINKTYPE;
461 }
462 
463 /**
464  * Return the linktype of a traffic control object
465  * @arg tc		traffic control object
466  *
467  * Returns the linktype of the link the traffic control object is attached to:
468  * -# User specified value via rtnl_tc_set_linktype()
469  * -# Value derived from link set via rtnl_tc_set_link()
470  * -# Default fall-back: ARPHRD_ETHER
471  */
rtnl_tc_get_linktype(struct rtnl_tc * tc)472 uint32_t rtnl_tc_get_linktype(struct rtnl_tc *tc)
473 {
474 	if (tc->ce_mask & TCA_ATTR_LINKTYPE)
475 		return tc->tc_linktype;
476 	else if (tc->ce_mask & TCA_ATTR_LINK)
477 		return tc->tc_link->l_arptype;
478 	else
479 		return ARPHRD_ETHER; /* default to ethernet */
480 }
481 
482 /**
483  * Set identifier of traffic control object
484  * @arg tc		traffic control object
485  * @arg id		unique identifier
486  */
rtnl_tc_set_handle(struct rtnl_tc * tc,uint32_t id)487 void rtnl_tc_set_handle(struct rtnl_tc *tc, uint32_t id)
488 {
489 	tc->tc_handle = id;
490 	tc->ce_mask |= TCA_ATTR_HANDLE;
491 }
492 
493 /**
494  * Return identifier of a traffic control object
495  * @arg tc		traffic control object
496  */
rtnl_tc_get_handle(struct rtnl_tc * tc)497 uint32_t rtnl_tc_get_handle(struct rtnl_tc *tc)
498 {
499 	return tc->tc_handle;
500 }
501 
502 /**
503  * Set the parent identifier of a traffic control object
504  * @arg tc		traffic control object
505  * @arg parent		identifier of parent traffif control object
506  *
507  */
rtnl_tc_set_parent(struct rtnl_tc * tc,uint32_t parent)508 void rtnl_tc_set_parent(struct rtnl_tc *tc, uint32_t parent)
509 {
510 	tc->tc_parent = parent;
511 	tc->ce_mask |= TCA_ATTR_PARENT;
512 }
513 
514 /**
515  * Return parent identifier of a traffic control object
516  * @arg tc		traffic control object
517  */
rtnl_tc_get_parent(struct rtnl_tc * tc)518 uint32_t rtnl_tc_get_parent(struct rtnl_tc *tc)
519 {
520 	return tc->tc_parent;
521 }
522 
523 /**
524  * Define the type of traffic control object
525  * @arg tc		traffic control object
526  * @arg kind		name of the tc object type
527  *
528  * @return 0 on success or a negative error code
529  */
rtnl_tc_set_kind(struct rtnl_tc * tc,const char * kind)530 int rtnl_tc_set_kind(struct rtnl_tc *tc, const char *kind)
531 {
532 	if (tc->ce_mask & TCA_ATTR_KIND)
533 		return -NLE_EXIST;
534 
535 	if (   !kind
536 	    || strlen (kind) >= sizeof (tc->tc_kind))
537 		return -NLE_INVAL;
538 
539 	_nl_strncpy(tc->tc_kind, kind, sizeof(tc->tc_kind));
540 
541 	tc->ce_mask |= TCA_ATTR_KIND;
542 
543 	/* Force allocation of data */
544 	rtnl_tc_data(tc);
545 
546 	return 0;
547 }
548 
549 /**
550  * Return kind of traffic control object
551  * @arg tc		traffic control object
552  *
553  * @return Kind of traffic control object or NULL if not set.
554  */
rtnl_tc_get_kind(struct rtnl_tc * tc)555 char *rtnl_tc_get_kind(struct rtnl_tc *tc)
556 {
557 	if (tc->ce_mask & TCA_ATTR_KIND)
558 		return tc->tc_kind;
559 	else
560 		return NULL;
561 }
562 
563 /**
564  * Return value of a statistical counter of a traffic control object
565  * @arg tc		traffic control object
566  * @arg id		identifier of statistical counter
567  *
568  * @return Value of requested statistic counter or 0.
569  */
rtnl_tc_get_stat(struct rtnl_tc * tc,enum rtnl_tc_stat id)570 uint64_t rtnl_tc_get_stat(struct rtnl_tc *tc, enum rtnl_tc_stat id)
571 {
572 	if ((unsigned int) id > RTNL_TC_STATS_MAX)
573 		return 0;
574 
575 	return tc->tc_stats[id];
576 }
577 
578 /**
579  * Set the chain index of a traffic control object
580  * @arg tc		traffic control object
581  * @arg chain		chain index of traffic control object
582  *
583  */
rtnl_tc_set_chain(struct rtnl_tc * tc,uint32_t chain)584 void rtnl_tc_set_chain(struct rtnl_tc *tc, uint32_t chain)
585 {
586 	tc->tc_chain = chain;
587 	tc->ce_mask |= TCA_ATTR_CHAIN;
588 }
589 
590 /**
591  * Return chain index of traffic control object
592  * @arg tc		traffic control object
593  * @arg out_value       output argument.
594  *
595  * @return 0 of the output value was successfully returned, or a negative
596  *   error code on failure.
597  */
rtnl_tc_get_chain(struct rtnl_tc * tc,uint32_t * out_value)598 int rtnl_tc_get_chain(struct rtnl_tc *tc, uint32_t *out_value)
599 {
600 	if (!(tc->ce_mask & TCA_ATTR_CHAIN))
601 		return -NLE_MISSING_ATTR;
602 	*out_value = tc->tc_chain;
603 	return 0;
604 }
605 
606 /** @} */
607 
608 /**
609  * @name Utilities
610  * @{
611  */
612 
613 static const struct trans_tbl tc_stats[] = {
614 	__ADD(RTNL_TC_PACKETS, packets),
615 	__ADD(RTNL_TC_BYTES, bytes),
616 	__ADD(RTNL_TC_RATE_BPS, rate_bps),
617 	__ADD(RTNL_TC_RATE_PPS, rate_pps),
618 	__ADD(RTNL_TC_QLEN, qlen),
619 	__ADD(RTNL_TC_BACKLOG, backlog),
620 	__ADD(RTNL_TC_DROPS, drops),
621 	__ADD(RTNL_TC_REQUEUES, requeues),
622 	__ADD(RTNL_TC_OVERLIMITS, overlimits),
623 };
624 
rtnl_tc_stat2str(enum rtnl_tc_stat st,char * buf,size_t len)625 char *rtnl_tc_stat2str(enum rtnl_tc_stat st, char *buf, size_t len)
626 {
627 	return __type2str(st, buf, len, tc_stats, ARRAY_SIZE(tc_stats));
628 }
629 
rtnl_tc_str2stat(const char * name)630 int rtnl_tc_str2stat(const char *name)
631 {
632 	return __str2type(name, tc_stats, ARRAY_SIZE(tc_stats));
633 }
634 
635 /**
636  * Calculate time required to transmit buffer at a specific rate
637  * @arg bufsize		Size of buffer to be transmited in bytes.
638  * @arg rate		Transmit rate in bytes per second.
639  *
640  * Calculates the number of micro seconds required to transmit a
641  * specific buffer at a specific transmit rate.
642  *
643  * @f[
644  *   txtime=\frac{bufsize}{rate}10^6
645  * @f]
646  *
647  * @return Required transmit time in micro seconds.
648  */
rtnl_tc_calc_txtime(int bufsize,int rate)649 int rtnl_tc_calc_txtime(int bufsize, int rate)
650 {
651 	return ((double) bufsize / (double) rate) * 1000000.0;
652 }
653 
654 /**
655  * Calculate buffer size able to transmit in a specific time and rate.
656  * @arg txtime		Available transmit time in micro seconds.
657  * @arg rate		Transmit rate in bytes per second.
658  *
659  * Calculates the size of the buffer that can be transmitted in a
660  * specific time period at a specific transmit rate.
661  *
662  * @f[
663  *   bufsize=\frac{{txtime} \times {rate}}{10^6}
664  * @f]
665  *
666  * @return Size of buffer in bytes.
667  */
rtnl_tc_calc_bufsize(int txtime,int rate)668 int rtnl_tc_calc_bufsize(int txtime, int rate)
669 {
670 	return ((double) txtime * (double) rate) / 1000000.0;
671 }
672 
673 /**
674  * Calculate the binary logarithm for a specific cell size
675  * @arg cell_size	Size of cell, must be a power of two.
676  * @return Binary logirhtm of cell size or a negative error code.
677  */
rtnl_tc_calc_cell_log(int cell_size)678 int rtnl_tc_calc_cell_log(int cell_size)
679 {
680 	int i;
681 
682 	for (i = 0; i < 32; i++)
683 		if ((1 << i) == cell_size)
684 			return i;
685 
686 	return -NLE_INVAL;
687 }
688 
689 
690 /** @} */
691 
692 /**
693  * @name Rate Tables
694  * @{
695  */
696 
697 /*
698  * COPYRIGHT NOTE:
699  * align_to_atm() and adjust_size() derived/coped from iproute2 source.
700  */
701 
702 /*
703  * The align to ATM cells is used for determining the (ATM) SAR
704  * alignment overhead at the ATM layer. (SAR = Segmentation And
705  * Reassembly).  This is for example needed when scheduling packet on
706  * an ADSL connection.  Note that the extra ATM-AAL overhead is _not_
707  * included in this calculation. This overhead is added in the kernel
708  * before doing the rate table lookup, as this gives better precision
709  * (as the table will always be aligned for 48 bytes).
710  *  --Hawk, d.7/11-2004. <hawk@diku.dk>
711  */
align_to_atm(unsigned int size)712 static unsigned int align_to_atm(unsigned int size)
713 {
714 	int linksize, cells;
715 	cells = size / ATM_CELL_PAYLOAD;
716 	if ((size % ATM_CELL_PAYLOAD) > 0)
717 		cells++;
718 
719 	linksize = cells * ATM_CELL_SIZE; /* Use full cell size to add ATM tax */
720 	return linksize;
721 }
722 
adjust_size(unsigned int size,unsigned int mpu,uint32_t linktype)723 static unsigned int adjust_size(unsigned int size, unsigned int mpu,
724 				uint32_t linktype)
725 {
726 	if (size < mpu)
727 		size = mpu;
728 
729 	switch (linktype) {
730 	case ARPHRD_ATM:
731 		return align_to_atm(size);
732 
733 	case ARPHRD_ETHER:
734 	default:
735 		return size;
736 	}
737 }
738 
739 /**
740  * Compute a transmission time lookup table
741  * @arg tc		traffic control object
742  * @arg spec		Rate specification
743  * @arg dst		Destination buffer of RTNL_TC_RTABLE_SIZE uint32_t[].
744  *
745  * Computes a table of RTNL_TC_RTABLE_SIZE entries specyfing the
746  * transmission times for various packet sizes, e.g. the transmission
747  * time for a packet of size \c pktsize could be looked up:
748  * @code
749  * txtime = table[pktsize >> log2(mtu)];
750  * @endcode
751  */
rtnl_tc_build_rate_table(struct rtnl_tc * tc,struct rtnl_ratespec * spec,uint32_t * dst)752 int rtnl_tc_build_rate_table(struct rtnl_tc *tc, struct rtnl_ratespec *spec,
753 			     uint32_t *dst)
754 {
755 	uint32_t mtu = rtnl_tc_get_mtu(tc);
756 	uint32_t linktype = rtnl_tc_get_linktype(tc);
757 	uint8_t cell_log = spec->rs_cell_log;
758 	unsigned int size, i;
759 
760 	spec->rs_mpu = rtnl_tc_get_mpu(tc);
761 	spec->rs_overhead = rtnl_tc_get_overhead(tc);
762 
763 	if (mtu == 0)
764 		mtu = 2047;
765 
766 	if (cell_log == UINT8_MAX) {
767 		/*
768 		 * cell_log not specified, calculate it. It has to specify the
769 		 * minimum number of rshifts required to break the MTU to below
770 		 * RTNL_TC_RTABLE_SIZE.
771 		 */
772 		cell_log = 0;
773 		while ((mtu >> cell_log) >= RTNL_TC_RTABLE_SIZE)
774 			cell_log++;
775 	}
776 
777 	for (i = 0; i < RTNL_TC_RTABLE_SIZE; i++) {
778 		size = adjust_size((i + 1) << cell_log, spec->rs_mpu, linktype);
779 		dst[i] = nl_us2ticks(rtnl_tc_calc_txtime64(size, spec->rs_rate64));
780 	}
781 
782 	spec->rs_cell_align = -1;
783 	spec->rs_cell_log = cell_log;
784 
785 	return 0;
786 }
787 
788 /** @} */
789 
790 /**
791  * @name TC implementation of cache functions
792  */
793 
rtnl_tc_free_data(struct nl_object * obj)794 void rtnl_tc_free_data(struct nl_object *obj)
795 {
796 	struct rtnl_tc *tc = TC_CAST(obj);
797 	struct rtnl_tc_ops *ops;
798 
799 	rtnl_link_put(tc->tc_link);
800 	nl_data_free(tc->tc_opts);
801 	nl_data_free(tc->tc_xstats);
802 
803 	if (tc->tc_subdata) {
804 		ops = rtnl_tc_get_ops(tc);
805 		if (ops && ops->to_free_data)
806 			ops->to_free_data(tc, nl_data_get(tc->tc_subdata));
807 
808 		nl_data_free(tc->tc_subdata);
809 	}
810 }
811 
rtnl_tc_clone(struct nl_object * dstobj,struct nl_object * srcobj)812 int rtnl_tc_clone(struct nl_object *dstobj, struct nl_object *srcobj)
813 {
814 	struct rtnl_tc *dst = TC_CAST(dstobj);
815 	struct rtnl_tc *src = TC_CAST(srcobj);
816 	struct rtnl_tc_ops *ops;
817 
818 	if (src->tc_link) {
819 		nl_object_get(OBJ_CAST(src->tc_link));
820 		dst->tc_link = src->tc_link;
821 	}
822 
823 	dst->tc_opts = NULL;
824 	dst->tc_xstats = NULL;
825 	dst->tc_subdata = NULL;
826 	dst->ce_mask &= ~(TCA_ATTR_OPTS |
827 	                  TCA_ATTR_XSTATS);
828 
829 	if (src->tc_opts) {
830 		dst->tc_opts = nl_data_clone(src->tc_opts);
831 		if (!dst->tc_opts)
832 			return -NLE_NOMEM;
833 		dst->ce_mask |= TCA_ATTR_OPTS;
834 	}
835 
836 	if (src->tc_xstats) {
837 		dst->tc_xstats = nl_data_clone(src->tc_xstats);
838 		if (!dst->tc_xstats)
839 			return -NLE_NOMEM;
840 		dst->ce_mask |= TCA_ATTR_XSTATS;
841 	}
842 
843 	if (src->tc_subdata) {
844 		if (!(dst->tc_subdata = nl_data_clone(src->tc_subdata))) {
845 			return -NLE_NOMEM;
846 		}
847 	}
848 
849 	ops = rtnl_tc_get_ops(src);
850 	if (ops && ops->to_clone) {
851 		void *a = rtnl_tc_data(dst), *b = rtnl_tc_data(src);
852 
853 		if (!a)
854 			return 0;
855 		else if (!b)
856 			return -NLE_NOMEM;
857 
858 		return ops->to_clone(a, b);
859 	}
860 
861 	return 0;
862 }
863 
tc_dump(struct rtnl_tc * tc,enum nl_dump_type type,struct nl_dump_params * p)864 static int tc_dump(struct rtnl_tc *tc, enum nl_dump_type type,
865 		   struct nl_dump_params *p)
866 {
867 	struct rtnl_tc_type_ops *type_ops;
868 	struct rtnl_tc_ops *ops;
869 	void *data = rtnl_tc_data(tc);
870 
871 	type_ops = tc_type_ops[tc->tc_type];
872 	if (type_ops && type_ops->tt_dump[type])
873 		type_ops->tt_dump[type](tc, p);
874 
875 	ops = rtnl_tc_get_ops(tc);
876 	if (ops && ops->to_dump[type]) {
877 		ops->to_dump[type](tc, data, p);
878 		return 1;
879 	}
880 
881 	return 0;
882 }
883 
rtnl_tc_dump_line(struct nl_object * obj,struct nl_dump_params * p)884 void rtnl_tc_dump_line(struct nl_object *obj, struct nl_dump_params *p)
885 {
886 	struct rtnl_tc_type_ops *type_ops;
887 	struct rtnl_tc *tc = TC_CAST(obj);
888 	struct nl_cache *link_cache;
889 	char buf[32];
890 
891 	nl_new_line(p);
892 
893 	type_ops = tc_type_ops[tc->tc_type];
894 	if (type_ops && type_ops->tt_dump_prefix)
895 		nl_dump(p, "%s ", type_ops->tt_dump_prefix);
896 
897 	nl_dump(p, "%s ", tc->tc_kind);
898 
899 	if ((link_cache = nl_cache_mngt_require_safe("route/link"))) {
900 		nl_dump(p, "dev %s ",
901 			rtnl_link_i2name(link_cache, tc->tc_ifindex,
902 					 buf, sizeof(buf)));
903 	} else
904 		nl_dump(p, "dev %u ", tc->tc_ifindex);
905 
906 	nl_dump(p, "id %s ",
907 		rtnl_tc_handle2str(tc->tc_handle, buf, sizeof(buf)));
908 
909 	nl_dump(p, "parent %s",
910 		rtnl_tc_handle2str(tc->tc_parent, buf, sizeof(buf)));
911 
912 	tc_dump(tc, NL_DUMP_LINE, p);
913 	nl_dump(p, "\n");
914 
915 	if (link_cache)
916 		nl_cache_put(link_cache);
917 }
918 
rtnl_tc_dump_details(struct nl_object * obj,struct nl_dump_params * p)919 void rtnl_tc_dump_details(struct nl_object *obj, struct nl_dump_params *p)
920 {
921 	struct rtnl_tc *tc = TC_CAST(obj);
922 
923 	rtnl_tc_dump_line(OBJ_CAST(tc), p);
924 
925 	nl_dump_line(p, "  ");
926 
927 	if (tc->ce_mask & TCA_ATTR_MTU)
928 		nl_dump(p, " mtu %u", tc->tc_mtu);
929 
930 	if (tc->ce_mask & TCA_ATTR_MPU)
931 		nl_dump(p, " mpu %u", tc->tc_mpu);
932 
933 	if (tc->ce_mask & TCA_ATTR_OVERHEAD)
934 		nl_dump(p, " overhead %u", tc->tc_overhead);
935 
936 	if (!tc_dump(tc, NL_DUMP_DETAILS, p))
937 		nl_dump(p, "no options");
938 	nl_dump(p, "\n");
939 }
940 
rtnl_tc_dump_stats(struct nl_object * obj,struct nl_dump_params * p)941 void rtnl_tc_dump_stats(struct nl_object *obj, struct nl_dump_params *p)
942 {
943 	struct rtnl_tc *tc = TC_CAST(obj);
944 	char *unit;
945 	float res;
946 
947 	rtnl_tc_dump_details(OBJ_CAST(tc), p);
948 
949 	nl_dump_line(p,
950 	             "  stats: %-14s %-10s   %-10s %-10s %-10s %-10s\n",
951 	             "bytes", "packets", "drops", "overlimits", "qlen", "backlog");
952 
953 	res = nl_cancel_down_bytes(tc->tc_stats[RTNL_TC_BYTES], &unit);
954 
955 	nl_dump_line(p,
956 	             "       %10.2f %3s   %10u   %-10u %-10u %-10u %-10u\n",
957 	             res, unit,
958 	             tc->tc_stats[RTNL_TC_PACKETS],
959 	             tc->tc_stats[RTNL_TC_DROPS],
960 	             tc->tc_stats[RTNL_TC_OVERLIMITS],
961 	             tc->tc_stats[RTNL_TC_QLEN],
962 	             tc->tc_stats[RTNL_TC_BACKLOG]);
963 
964 	res = nl_cancel_down_bytes(tc->tc_stats[RTNL_TC_RATE_BPS], &unit);
965 
966 	nl_dump_line(p,
967 	             "       %10.2f %3s/s %10u/s\n",
968 	             res,
969 	             unit,
970 	             tc->tc_stats[RTNL_TC_RATE_PPS]);
971 }
972 
rtnl_tc_compare(struct nl_object * aobj,struct nl_object * bobj,uint64_t attrs,int flags)973 uint64_t rtnl_tc_compare(struct nl_object *aobj, struct nl_object *bobj,
974 			 uint64_t attrs, int flags)
975 {
976 	struct rtnl_tc *a = TC_CAST(aobj);
977 	struct rtnl_tc *b = TC_CAST(bobj);
978 	uint64_t diff = 0;
979 
980 #define TC_DIFF(ATTR, EXPR) ATTR_DIFF(attrs, TCA_ATTR_##ATTR, a, b, EXPR)
981 
982 	diff |= TC_DIFF(HANDLE,		a->tc_handle != b->tc_handle);
983 	diff |= TC_DIFF(PARENT,		a->tc_parent != b->tc_parent);
984 	diff |= TC_DIFF(IFINDEX,	a->tc_ifindex != b->tc_ifindex);
985 	diff |= TC_DIFF(KIND,		strcmp(a->tc_kind, b->tc_kind));
986 
987 #undef TC_DIFF
988 
989 	return diff;
990 }
991 
992 /** @} */
993 
994 /**
995  * @name Modules API
996  */
997 
rtnl_tc_lookup_ops(enum rtnl_tc_type type,const char * kind)998 struct rtnl_tc_ops *rtnl_tc_lookup_ops(enum rtnl_tc_type type, const char *kind)
999 {
1000 	struct rtnl_tc_ops *ops;
1001 
1002 	nl_list_for_each_entry(ops, &tc_ops_list[type], to_list)
1003 		if (!strcmp(kind, ops->to_kind))
1004 			return ops;
1005 
1006 	return NULL;
1007 }
1008 
rtnl_tc_get_ops(struct rtnl_tc * tc)1009 struct rtnl_tc_ops *rtnl_tc_get_ops(struct rtnl_tc *tc)
1010 {
1011 	if (!tc->tc_ops)
1012 		tc->tc_ops = rtnl_tc_lookup_ops(tc->tc_type, tc->tc_kind);
1013 
1014 	return tc->tc_ops;
1015 }
1016 
1017 /**
1018  * Register a traffic control module
1019  * @arg ops		traffic control module operations
1020  */
rtnl_tc_register(struct rtnl_tc_ops * ops)1021 int rtnl_tc_register(struct rtnl_tc_ops *ops)
1022 {
1023 	static int init = 0;
1024 
1025 	/*
1026 	 * Initialiation hack, make sure list is initialized when
1027 	 * the first tc module registers. Putting this in a
1028 	 * separate __init would required correct ordering of init
1029 	 * functions
1030 	 */
1031 	if (!init) {
1032 		int i;
1033 
1034 		for (i = 0; i < __RTNL_TC_TYPE_MAX; i++)
1035 			nl_init_list_head(&tc_ops_list[i]);
1036 
1037 		init = 1;
1038 	}
1039 
1040 	if (!ops->to_kind || ops->to_type > RTNL_TC_TYPE_MAX)
1041 		BUG();
1042 
1043 	if (rtnl_tc_lookup_ops(ops->to_type, ops->to_kind))
1044 		return -NLE_EXIST;
1045 
1046 	nl_list_add_tail(&ops->to_list, &tc_ops_list[ops->to_type]);
1047 
1048 	return 0;
1049 }
1050 
1051 /**
1052  * Unregister a traffic control module
1053  * @arg ops		traffic control module operations
1054  */
rtnl_tc_unregister(struct rtnl_tc_ops * ops)1055 void rtnl_tc_unregister(struct rtnl_tc_ops *ops)
1056 {
1057 	nl_list_del(&ops->to_list);
1058 }
1059 
1060 /**
1061  * Returns the private data of the traffic control object.
1062  * Contrary to rtnl_tc_data(), this returns NULL if the data is
1063  * not yet allocated
1064  * @arg tc		traffic control object
1065  *
1066  * @return pointer to the private data or NULL if not allocated.
1067  */
rtnl_tc_data_peek(struct rtnl_tc * tc)1068 void *rtnl_tc_data_peek(struct rtnl_tc *tc)
1069 {
1070 	return tc->tc_subdata ? nl_data_get(tc->tc_subdata) : NULL;
1071 }
1072 
1073 /**
1074  * Return pointer to private data of traffic control object
1075  * @arg tc		traffic control object
1076  *
1077  * Allocates the private traffic control object data section
1078  * as necessary and returns it.
1079  *
1080  * @return Pointer to private tc data or NULL if allocation failed.
1081  */
rtnl_tc_data(struct rtnl_tc * tc)1082 void *rtnl_tc_data(struct rtnl_tc *tc)
1083 {
1084 	if (!tc->tc_subdata) {
1085 		size_t size;
1086 
1087 		if (!tc->tc_ops) {
1088 			if (!rtnl_tc_get_ops(tc))
1089 				return NULL;
1090 		}
1091 
1092 		if (!(size = tc->tc_ops->to_size))
1093 			BUG();
1094 
1095 		if (!(tc->tc_subdata = nl_data_alloc(NULL, size)))
1096 			return NULL;
1097 	}
1098 
1099 	return nl_data_get(tc->tc_subdata);
1100 }
1101 
1102 /**
1103  * Check traffic control object type and return private data section
1104  * @arg tc		traffic control object
1105  * @arg ops		expected traffic control object operations
1106  * @arg err		the place where saves the error code if fails
1107  *
1108  * Checks whether the traffic control object matches the type
1109  * specified with the traffic control object operations. If the
1110  * type matches, the private tc object data is returned. If type
1111  * mismatches, APPBUG() will print a application bug warning.
1112  *
1113  * @see rtnl_tc_data()
1114  *
1115  * @return Pointer to private tc data or NULL if type mismatches.
1116  */
rtnl_tc_data_check(struct rtnl_tc * tc,struct rtnl_tc_ops * ops,int * err)1117 void *rtnl_tc_data_check(struct rtnl_tc *tc, struct rtnl_tc_ops *ops, int *err)
1118 {
1119 	void *ret;
1120 
1121 	if (tc->tc_ops != ops) {
1122 		char buf[64];
1123 
1124 		snprintf(buf, sizeof(buf),
1125 			 "tc object %p used in %s context but is of type %s",
1126 			 tc, ops->to_kind, tc->tc_ops->to_kind);
1127 		APPBUG(buf);
1128 
1129 		if (err)
1130 			*err = -NLE_OPNOTSUPP;
1131 		return NULL;
1132 	}
1133 
1134 	ret = rtnl_tc_data(tc);
1135 	if (ret == NULL) {
1136 		if (err)
1137 			*err = -NLE_NOMEM;
1138 	}
1139 
1140 	return ret;
1141 }
1142 
1143 struct nl_af_group tc_groups[] = {
1144 	{ AF_UNSPEC,	RTNLGRP_TC },
1145 	{ END_OF_GROUP_LIST },
1146 };
1147 
1148 
rtnl_tc_type_register(struct rtnl_tc_type_ops * ops)1149 void rtnl_tc_type_register(struct rtnl_tc_type_ops *ops)
1150 {
1151 	if (ops->tt_type > RTNL_TC_TYPE_MAX)
1152 		BUG();
1153 
1154 	tc_type_ops[ops->tt_type] = ops;
1155 }
1156 
rtnl_tc_type_unregister(struct rtnl_tc_type_ops * ops)1157 void rtnl_tc_type_unregister(struct rtnl_tc_type_ops *ops)
1158 {
1159 	if (ops->tt_type > RTNL_TC_TYPE_MAX)
1160 		BUG();
1161 
1162 	tc_type_ops[ops->tt_type] = NULL;
1163 }
1164 
1165 /** @} */
1166 
1167 /** @} */
1168