1 /*
2  * lib/route/tc.c		Traffic Control
3  *
4  *	This library is free software; you can redistribute it and/or
5  *	modify it under the terms of the GNU Lesser General Public
6  *	License as published by the Free Software Foundation version 2.1
7  *	of the License.
8  *
9  * Copyright (c) 2003-2011 Thomas Graf <tgraf@suug.ch>
10  */
11 
12 /**
13  * @ingroup rtnl
14  * @defgroup tc Traffic Control
15  * @{
16  */
17 
18 #include <netlink-private/netlink.h>
19 #include <netlink-private/tc.h>
20 #include <netlink/netlink.h>
21 #include <netlink/utils.h>
22 #include <netlink/route/rtnl.h>
23 #include <netlink/route/link.h>
24 #include <netlink/route/tc.h>
25 #include <netlink-private/route/tc-api.h>
26 
27 /** @cond SKIP */
28 
29 static struct nl_list_head tc_ops_list[__RTNL_TC_TYPE_MAX];
30 static struct rtnl_tc_type_ops *tc_type_ops[__RTNL_TC_TYPE_MAX];
31 
32 static struct nla_policy tc_policy[TCA_MAX+1] = {
33 	[TCA_KIND]	= { .type = NLA_STRING,
34 			    .maxlen = TCKINDSIZ },
35 	[TCA_STATS]	= { .minlen = sizeof(struct tc_stats) },
36 	[TCA_STATS2]	= { .type = NLA_NESTED },
37 };
38 
tca_parse(struct nlattr ** tb,int maxattr,struct rtnl_tc * g,struct nla_policy * policy)39 int tca_parse(struct nlattr **tb, int maxattr, struct rtnl_tc *g,
40 	      struct nla_policy *policy)
41 {
42 
43 	if (g->ce_mask & TCA_ATTR_OPTS)
44 		return nla_parse(tb, maxattr,
45 				 (struct nlattr *) g->tc_opts->d_data,
46 				 g->tc_opts->d_size, policy);
47 	else {
48 		/* Ugly but tb[] must be in a defined state even if no
49 		 * attributes can be found. */
50 		memset(tb, 0, sizeof(struct nlattr *) * (maxattr + 1));
51 		return 0;
52 	}
53 }
54 
55 static struct nla_policy tc_stats2_policy[TCA_STATS_MAX+1] = {
56 	[TCA_STATS_BASIC]    = { .minlen = sizeof(struct gnet_stats_basic) },
57 	[TCA_STATS_RATE_EST] = { .minlen = sizeof(struct gnet_stats_rate_est) },
58 	[TCA_STATS_QUEUE]    = { .minlen = sizeof(struct gnet_stats_queue) },
59 };
60 
rtnl_tc_msg_parse(struct nlmsghdr * n,struct rtnl_tc * tc)61 int rtnl_tc_msg_parse(struct nlmsghdr *n, struct rtnl_tc *tc)
62 {
63 	struct nl_cache *link_cache;
64 	struct rtnl_tc_ops *ops;
65 	struct nlattr *tb[TCA_MAX + 1];
66 	char kind[TCKINDSIZ];
67 	struct tcmsg *tm;
68 	int err;
69 
70 	tc->ce_msgtype = n->nlmsg_type;
71 
72 	err = nlmsg_parse(n, sizeof(*tm), tb, TCA_MAX, tc_policy);
73 	if (err < 0)
74 		return err;
75 
76 	if (tb[TCA_KIND] == NULL)
77 		return -NLE_MISSING_ATTR;
78 
79 	nla_strlcpy(kind, tb[TCA_KIND], sizeof(kind));
80 	rtnl_tc_set_kind(tc, kind);
81 
82 	tm = nlmsg_data(n);
83 	tc->tc_family  = tm->tcm_family;
84 	tc->tc_ifindex = tm->tcm_ifindex;
85 	tc->tc_handle  = tm->tcm_handle;
86 	tc->tc_parent  = tm->tcm_parent;
87 	tc->tc_info    = tm->tcm_info;
88 
89 	tc->ce_mask |= (TCA_ATTR_FAMILY | TCA_ATTR_IFINDEX | TCA_ATTR_HANDLE|
90 		        TCA_ATTR_PARENT | TCA_ATTR_INFO);
91 
92 	if (tb[TCA_OPTIONS]) {
93 		tc->tc_opts = nl_data_alloc_attr(tb[TCA_OPTIONS]);
94 		if (!tc->tc_opts)
95 			return -NLE_NOMEM;
96 		tc->ce_mask |= TCA_ATTR_OPTS;
97 	}
98 
99 	if (tb[TCA_STATS2]) {
100 		struct nlattr *tbs[TCA_STATS_MAX + 1];
101 
102 		err = nla_parse_nested(tbs, TCA_STATS_MAX, tb[TCA_STATS2],
103 				       tc_stats2_policy);
104 		if (err < 0)
105 			return err;
106 
107 		if (tbs[TCA_STATS_BASIC]) {
108 			struct gnet_stats_basic *bs;
109 
110 			bs = nla_data(tbs[TCA_STATS_BASIC]);
111 			tc->tc_stats[RTNL_TC_BYTES]	= bs->bytes;
112 			tc->tc_stats[RTNL_TC_PACKETS]	= bs->packets;
113 		}
114 
115 		if (tbs[TCA_STATS_RATE_EST]) {
116 			struct gnet_stats_rate_est *re;
117 
118 			re = nla_data(tbs[TCA_STATS_RATE_EST]);
119 			tc->tc_stats[RTNL_TC_RATE_BPS]	= re->bps;
120 			tc->tc_stats[RTNL_TC_RATE_PPS]	= re->pps;
121 		}
122 
123 		if (tbs[TCA_STATS_QUEUE]) {
124 			struct gnet_stats_queue *q;
125 
126 			q = nla_data(tbs[TCA_STATS_QUEUE]);
127 			tc->tc_stats[RTNL_TC_QLEN]	= q->qlen;
128 			tc->tc_stats[RTNL_TC_BACKLOG]	= q->backlog;
129 			tc->tc_stats[RTNL_TC_DROPS]	= q->drops;
130 			tc->tc_stats[RTNL_TC_REQUEUES]	= q->requeues;
131 			tc->tc_stats[RTNL_TC_OVERLIMITS]	= q->overlimits;
132 		}
133 
134 		tc->ce_mask |= TCA_ATTR_STATS;
135 
136 		if (tbs[TCA_STATS_APP]) {
137 			tc->tc_xstats = nl_data_alloc_attr(tbs[TCA_STATS_APP]);
138 			if (tc->tc_xstats == NULL)
139 				return -NLE_NOMEM;
140 		} else
141 			goto compat_xstats;
142 	} else {
143 		if (tb[TCA_STATS]) {
144 			struct tc_stats *st = nla_data(tb[TCA_STATS]);
145 
146 			tc->tc_stats[RTNL_TC_BYTES]	= st->bytes;
147 			tc->tc_stats[RTNL_TC_PACKETS]	= st->packets;
148 			tc->tc_stats[RTNL_TC_RATE_BPS]	= st->bps;
149 			tc->tc_stats[RTNL_TC_RATE_PPS]	= st->pps;
150 			tc->tc_stats[RTNL_TC_QLEN]	= st->qlen;
151 			tc->tc_stats[RTNL_TC_BACKLOG]	= st->backlog;
152 			tc->tc_stats[RTNL_TC_DROPS]	= st->drops;
153 			tc->tc_stats[RTNL_TC_OVERLIMITS]= st->overlimits;
154 
155 			tc->ce_mask |= TCA_ATTR_STATS;
156 		}
157 
158 compat_xstats:
159 		if (tb[TCA_XSTATS]) {
160 			tc->tc_xstats = nl_data_alloc_attr(tb[TCA_XSTATS]);
161 			if (tc->tc_xstats == NULL)
162 				return -NLE_NOMEM;
163 			tc->ce_mask |= TCA_ATTR_XSTATS;
164 		}
165 	}
166 
167 	ops = rtnl_tc_get_ops(tc);
168 	if (ops && ops->to_msg_parser) {
169 		void *data = rtnl_tc_data(tc);
170 
171 		if (!data)
172 			return -NLE_NOMEM;
173 
174 		err = ops->to_msg_parser(tc, data);
175 		if (err < 0)
176 			return err;
177 	}
178 
179 	if ((link_cache = __nl_cache_mngt_require("route/link"))) {
180 		struct rtnl_link *link;
181 
182 		if ((link = rtnl_link_get(link_cache, tc->tc_ifindex))) {
183 			rtnl_tc_set_link(tc, link);
184 
185 			/* rtnl_tc_set_link incs refcnt */
186 			rtnl_link_put(link);
187 		}
188 	}
189 
190 	return 0;
191 }
192 
rtnl_tc_msg_build(struct rtnl_tc * tc,int type,int flags,struct nl_msg ** result)193 int rtnl_tc_msg_build(struct rtnl_tc *tc, int type, int flags,
194 		      struct nl_msg **result)
195 {
196 	struct nl_msg *msg;
197 	struct rtnl_tc_ops *ops;
198 	struct tcmsg tchdr = {
199 		.tcm_family = AF_UNSPEC,
200 		.tcm_ifindex = tc->tc_ifindex,
201 		.tcm_handle = tc->tc_handle,
202 		.tcm_parent = tc->tc_parent,
203 	};
204 	int err = -NLE_MSGSIZE;
205 
206 	msg = nlmsg_alloc_simple(type, flags);
207 	if (!msg)
208 		return -NLE_NOMEM;
209 
210 	if (nlmsg_append(msg, &tchdr, sizeof(tchdr), NLMSG_ALIGNTO) < 0)
211 		goto nla_put_failure;
212 
213 	if (tc->ce_mask & TCA_ATTR_KIND)
214 	    NLA_PUT_STRING(msg, TCA_KIND, tc->tc_kind);
215 
216 	ops = rtnl_tc_get_ops(tc);
217 	if (ops && (ops->to_msg_fill || ops->to_msg_fill_raw)) {
218 		struct nlattr *opts;
219 		void *data = rtnl_tc_data(tc);
220 
221 		if (ops->to_msg_fill) {
222 			if (!(opts = nla_nest_start(msg, TCA_OPTIONS)))
223 				goto nla_put_failure;
224 
225 			if ((err = ops->to_msg_fill(tc, data, msg)) < 0)
226 				goto nla_put_failure;
227 
228 			nla_nest_end(msg, opts);
229 		} else if ((err = ops->to_msg_fill_raw(tc, data, msg)) < 0)
230 			goto nla_put_failure;
231 	}
232 
233 	*result = msg;
234 	return 0;
235 
236 nla_put_failure:
237 	nlmsg_free(msg);
238 	return err;
239 }
240 
tca_set_kind(struct rtnl_tc * t,const char * kind)241 void tca_set_kind(struct rtnl_tc *t, const char *kind)
242 {
243 	strncpy(t->tc_kind, kind, sizeof(t->tc_kind) - 1);
244 	t->ce_mask |= TCA_ATTR_KIND;
245 }
246 
247 
248 /** @endcond */
249 
250 /**
251  * @name Attributes
252  * @{
253  */
254 
255 /**
256  * Set interface index of traffic control object
257  * @arg tc		traffic control object
258  * @arg ifindex		interface index.
259  *
260  * Sets the interface index of a traffic control object. The interface
261  * index defines the network device which this tc object is attached to.
262  * This function will overwrite any network device assigned with previous
263  * calls to rtnl_tc_set_ifindex() or rtnl_tc_set_link().
264  */
rtnl_tc_set_ifindex(struct rtnl_tc * tc,int ifindex)265 void rtnl_tc_set_ifindex(struct rtnl_tc *tc, int ifindex)
266 {
267 	/* Obsolete possible old link reference */
268 	rtnl_link_put(tc->tc_link);
269 	tc->tc_link = NULL;
270 	tc->ce_mask &= ~TCA_ATTR_LINK;
271 
272 	tc->tc_ifindex = ifindex;
273 	tc->ce_mask |= TCA_ATTR_IFINDEX;
274 }
275 
276 /**
277  * Return interface index of traffic control object
278  * @arg tc		traffic control object
279  */
rtnl_tc_get_ifindex(struct rtnl_tc * tc)280 int rtnl_tc_get_ifindex(struct rtnl_tc *tc)
281 {
282 	return tc->tc_ifindex;
283 }
284 
285 /**
286  * Set link of traffic control object
287  * @arg tc		traffic control object
288  * @arg link		link object
289  *
290  * Sets the link of a traffic control object. This function serves
291  * the same purpose as rtnl_tc_set_ifindex() but due to the continued
292  * allowed access to the link object it gives it the possibility to
293  * retrieve sane default values for the the MTU and the linktype.
294  * Always prefer this function over rtnl_tc_set_ifindex() if you can
295  * spare to have an additional link object around.
296  */
rtnl_tc_set_link(struct rtnl_tc * tc,struct rtnl_link * link)297 void rtnl_tc_set_link(struct rtnl_tc *tc, struct rtnl_link *link)
298 {
299 	rtnl_link_put(tc->tc_link);
300 
301 	if (!link)
302 		return;
303 	if (!link->l_index)
304 		BUG();
305 
306 	nl_object_get(OBJ_CAST(link));
307 	tc->tc_link = link;
308 	tc->tc_ifindex = link->l_index;
309 	tc->ce_mask |= TCA_ATTR_LINK | TCA_ATTR_IFINDEX;
310 }
311 
312 /**
313  * Get link of traffic control object
314  * @arg tc		traffic control object
315  *
316  * Returns the link of a traffic control object. The link is only
317  * returned if it has been set before via rtnl_tc_set_link() or
318  * if a link cache was available while parsing the tc object. This
319  * function may still return NULL even if an ifindex is assigned to
320  * the tc object. It will _not_ look up the link by itself.
321  *
322  * @note The returned link will have its reference counter incremented.
323  *       It is in the responsibility of the caller to return the
324  *       reference.
325  *
326  * @return link object or NULL if not set.
327  */
rtnl_tc_get_link(struct rtnl_tc * tc)328 struct rtnl_link *rtnl_tc_get_link(struct rtnl_tc *tc)
329 {
330 	if (tc->tc_link) {
331 		nl_object_get(OBJ_CAST(tc->tc_link));
332 		return tc->tc_link;
333 	}
334 
335 	return NULL;
336 }
337 
338 /**
339  * Set the Maximum Transmission Unit (MTU) of traffic control object
340  * @arg tc		traffic control object
341  * @arg mtu		largest packet size expected
342  *
343  * Sets the MTU of a traffic control object. Not all traffic control
344  * objects will make use of this but it helps while calculating rate
345  * tables. This value is typically derived directly from the link
346  * the tc object is attached to if the link has been assigned via
347  * rtnl_tc_set_link(). It is usually not necessary to set the MTU
348  * manually, this function is provided to allow overwriting the derived
349  * value.
350  */
rtnl_tc_set_mtu(struct rtnl_tc * tc,uint32_t mtu)351 void rtnl_tc_set_mtu(struct rtnl_tc *tc, uint32_t mtu)
352 {
353 	tc->tc_mtu = mtu;
354 	tc->ce_mask |= TCA_ATTR_MTU;
355 }
356 
357 /**
358  * Return the MTU of traffic control object
359  * @arg tc		traffic control object
360  *
361  * Returns the MTU of a traffic control object which has been set via:
362  * -# User specified value set via rtnl_tc_set_mtu()
363  * -# Dervied from link set via rtnl_tc_set_link()
364  * -# Fall back to default: ethernet = 1500
365  */
rtnl_tc_get_mtu(struct rtnl_tc * tc)366 uint32_t rtnl_tc_get_mtu(struct rtnl_tc *tc)
367 {
368 	if (tc->ce_mask & TCA_ATTR_MTU)
369 		return tc->tc_mtu;
370 	else if (tc->ce_mask & TCA_ATTR_LINK)
371 		return tc->tc_link->l_mtu;
372 	else
373 		return 1500; /* default to ethernet */
374 }
375 
376 /**
377  * Set the Minimum Packet Unit (MPU) of a traffic control object
378  * @arg tc		traffic control object
379  * @arg mpu		minimum packet size expected
380  *
381  * Sets the MPU of a traffic contorl object. It specifies the minimum
382  * packet size to ever hit this traffic control object. Not all traffic
383  * control objects will make use of this but it helps while calculating
384  * rate tables.
385  */
rtnl_tc_set_mpu(struct rtnl_tc * tc,uint32_t mpu)386 void rtnl_tc_set_mpu(struct rtnl_tc *tc, uint32_t mpu)
387 {
388 	tc->tc_mpu = mpu;
389 	tc->ce_mask |= TCA_ATTR_MPU;
390 }
391 
392 /**
393  * Return the Minimum Packet Unit (MPU) of a traffic control object
394  * @arg tc		traffic control object
395  *
396  * @return The MPU previously set via rtnl_tc_set_mpu() or 0.
397  */
rtnl_tc_get_mpu(struct rtnl_tc * tc)398 uint32_t rtnl_tc_get_mpu(struct rtnl_tc *tc)
399 {
400 	return tc->tc_mpu;
401 }
402 
403 /**
404  * Set per packet overhead of a traffic control object
405  * @arg tc		traffic control object
406  * @arg overhead	overhead per packet in bytes
407  *
408  * Sets the per packet overhead in bytes occuring on the link not seen
409  * by the kernel. This value can be used to correct size calculations
410  * if the packet size on the wire does not match the packet sizes seen
411  * in the network stack. Not all traffic control objects will make use
412  * this but it helps while calculating accurate packet sizes in the
413  * kernel.
414  */
rtnl_tc_set_overhead(struct rtnl_tc * tc,uint32_t overhead)415 void rtnl_tc_set_overhead(struct rtnl_tc *tc, uint32_t overhead)
416 {
417 	tc->tc_overhead = overhead;
418 	tc->ce_mask |= TCA_ATTR_OVERHEAD;
419 }
420 
421 /**
422  * Return per packet overhead of a traffic control object
423  * @arg tc		traffic control object
424  *
425  * @return The overhead previously set by rtnl_tc_set_overhead() or 0.
426  */
rtnl_tc_get_overhead(struct rtnl_tc * tc)427 uint32_t rtnl_tc_get_overhead(struct rtnl_tc *tc)
428 {
429 	return tc->tc_overhead;
430 }
431 
432 /**
433  * Set the linktype of a traffic control object
434  * @arg tc		traffic control object
435  * @arg type		type of link (e.g. ARPHRD_ATM, ARPHRD_ETHER)
436  *
437  * Overwrites the type of link this traffic control object is attached to.
438  * This value is typically derived from the link this tc object is attached
439  * if the link has been assigned via rtnl_tc_set_link(). It is usually not
440  * necessary to set the linktype manually. This function is provided to
441  * allow overwriting the linktype.
442  */
rtnl_tc_set_linktype(struct rtnl_tc * tc,uint32_t type)443 void rtnl_tc_set_linktype(struct rtnl_tc *tc, uint32_t type)
444 {
445 	tc->tc_linktype = type;
446 	tc->ce_mask |= TCA_ATTR_LINKTYPE;
447 }
448 
449 /**
450  * Return the linktype of a traffic control object
451  * @arg tc		traffic control object
452  *
453  * Returns the linktype of the link the traffic control object is attached to:
454  * -# User specified value via rtnl_tc_set_linktype()
455  * -# Value derived from link set via rtnl_tc_set_link()
456  * -# Default fall-back: ARPHRD_ETHER
457  */
rtnl_tc_get_linktype(struct rtnl_tc * tc)458 uint32_t rtnl_tc_get_linktype(struct rtnl_tc *tc)
459 {
460 	if (tc->ce_mask & TCA_ATTR_LINKTYPE)
461 		return tc->tc_linktype;
462 	else if (tc->ce_mask & TCA_ATTR_LINK)
463 		return tc->tc_link->l_arptype;
464 	else
465 		return ARPHRD_ETHER; /* default to ethernet */
466 }
467 
468 /**
469  * Set identifier of traffic control object
470  * @arg tc		traffic control object
471  * @arg id		unique identifier
472  */
rtnl_tc_set_handle(struct rtnl_tc * tc,uint32_t id)473 void rtnl_tc_set_handle(struct rtnl_tc *tc, uint32_t id)
474 {
475 	tc->tc_handle = id;
476 	tc->ce_mask |= TCA_ATTR_HANDLE;
477 }
478 
479 /**
480  * Return identifier of a traffic control object
481  * @arg tc		traffic control object
482  */
rtnl_tc_get_handle(struct rtnl_tc * tc)483 uint32_t rtnl_tc_get_handle(struct rtnl_tc *tc)
484 {
485 	return tc->tc_handle;
486 }
487 
488 /**
489  * Set the parent identifier of a traffic control object
490  * @arg tc		traffic control object
491  * @arg parent		identifier of parent traffif control object
492  *
493  */
rtnl_tc_set_parent(struct rtnl_tc * tc,uint32_t parent)494 void rtnl_tc_set_parent(struct rtnl_tc *tc, uint32_t parent)
495 {
496 	tc->tc_parent = parent;
497 	tc->ce_mask |= TCA_ATTR_PARENT;
498 }
499 
500 /**
501  * Return parent identifier of a traffic control object
502  * @arg tc		traffic control object
503  */
rtnl_tc_get_parent(struct rtnl_tc * tc)504 uint32_t rtnl_tc_get_parent(struct rtnl_tc *tc)
505 {
506 	return tc->tc_parent;
507 }
508 
509 /**
510  * Define the type of traffic control object
511  * @arg tc		traffic control object
512  * @arg kind		name of the tc object type
513  *
514  * @return 0 on success or a negative error code
515  */
rtnl_tc_set_kind(struct rtnl_tc * tc,const char * kind)516 int rtnl_tc_set_kind(struct rtnl_tc *tc, const char *kind)
517 {
518 	if (tc->ce_mask & TCA_ATTR_KIND)
519 		return -NLE_EXIST;
520 
521 	strncpy(tc->tc_kind, kind, sizeof(tc->tc_kind) - 1);
522 	tc->ce_mask |= TCA_ATTR_KIND;
523 
524 	/* Force allocation of data */
525 	rtnl_tc_data(tc);
526 
527 	return 0;
528 }
529 
530 /**
531  * Return kind of traffic control object
532  * @arg tc		traffic control object
533  *
534  * @return Kind of traffic control object or NULL if not set.
535  */
rtnl_tc_get_kind(struct rtnl_tc * tc)536 char *rtnl_tc_get_kind(struct rtnl_tc *tc)
537 {
538 	if (tc->ce_mask & TCA_ATTR_KIND)
539 		return tc->tc_kind;
540 	else
541 		return NULL;
542 }
543 
544 /**
545  * Return value of a statistical counter of a traffic control object
546  * @arg tc		traffic control object
547  * @arg id		identifier of statistical counter
548  *
549  * @return Value of requested statistic counter or 0.
550  */
rtnl_tc_get_stat(struct rtnl_tc * tc,enum rtnl_tc_stat id)551 uint64_t rtnl_tc_get_stat(struct rtnl_tc *tc, enum rtnl_tc_stat id)
552 {
553 	if (id < 0 || id > RTNL_TC_STATS_MAX)
554 		return 0;
555 
556 	return tc->tc_stats[id];
557 }
558 
559 /** @} */
560 
561 /**
562  * @name Utilities
563  * @{
564  */
565 
566 /**
567  * Calculate time required to transmit buffer at a specific rate
568  * @arg bufsize		Size of buffer to be transmited in bytes.
569  * @arg rate		Transmit rate in bytes per second.
570  *
571  * Calculates the number of micro seconds required to transmit a
572  * specific buffer at a specific transmit rate.
573  *
574  * @f[
575  *   txtime=\frac{bufsize}{rate}10^6
576  * @f]
577  *
578  * @return Required transmit time in micro seconds.
579  */
rtnl_tc_calc_txtime(int bufsize,int rate)580 int rtnl_tc_calc_txtime(int bufsize, int rate)
581 {
582 	double tx_time_secs;
583 
584 	tx_time_secs = (double) bufsize / (double) rate;
585 
586 	return tx_time_secs * 1000000.;
587 }
588 
589 /**
590  * Calculate buffer size able to transmit in a specific time and rate.
591  * @arg txtime		Available transmit time in micro seconds.
592  * @arg rate		Transmit rate in bytes per second.
593  *
594  * Calculates the size of the buffer that can be transmitted in a
595  * specific time period at a specific transmit rate.
596  *
597  * @f[
598  *   bufsize=\frac{{txtime} \times {rate}}{10^6}
599  * @f]
600  *
601  * @return Size of buffer in bytes.
602  */
rtnl_tc_calc_bufsize(int txtime,int rate)603 int rtnl_tc_calc_bufsize(int txtime, int rate)
604 {
605 	double bufsize;
606 
607 	bufsize = (double) txtime * (double) rate;
608 
609 	return bufsize / 1000000.;
610 }
611 
612 /**
613  * Calculate the binary logarithm for a specific cell size
614  * @arg cell_size	Size of cell, must be a power of two.
615  * @return Binary logirhtm of cell size or a negative error code.
616  */
rtnl_tc_calc_cell_log(int cell_size)617 int rtnl_tc_calc_cell_log(int cell_size)
618 {
619 	int i;
620 
621 	for (i = 0; i < 32; i++)
622 		if ((1 << i) == cell_size)
623 			return i;
624 
625 	return -NLE_INVAL;
626 }
627 
628 
629 /** @} */
630 
631 /**
632  * @name Rate Tables
633  * @{
634  */
635 
636 /*
637  * COPYRIGHT NOTE:
638  * align_to_atm() and adjust_size() derived/coped from iproute2 source.
639  */
640 
641 /*
642  * The align to ATM cells is used for determining the (ATM) SAR
643  * alignment overhead at the ATM layer. (SAR = Segmentation And
644  * Reassembly).  This is for example needed when scheduling packet on
645  * an ADSL connection.  Note that the extra ATM-AAL overhead is _not_
646  * included in this calculation. This overhead is added in the kernel
647  * before doing the rate table lookup, as this gives better precision
648  * (as the table will always be aligned for 48 bytes).
649  *  --Hawk, d.7/11-2004. <hawk@diku.dk>
650  */
align_to_atm(unsigned int size)651 static unsigned int align_to_atm(unsigned int size)
652 {
653 	int linksize, cells;
654 	cells = size / ATM_CELL_PAYLOAD;
655 	if ((size % ATM_CELL_PAYLOAD) > 0)
656 		cells++;
657 
658 	linksize = cells * ATM_CELL_SIZE; /* Use full cell size to add ATM tax */
659 	return linksize;
660 }
661 
adjust_size(unsigned int size,unsigned int mpu,uint32_t linktype)662 static unsigned int adjust_size(unsigned int size, unsigned int mpu,
663 				uint32_t linktype)
664 {
665 	if (size < mpu)
666 		size = mpu;
667 
668 	switch (linktype) {
669 	case ARPHRD_ATM:
670 		return align_to_atm(size);
671 
672 	case ARPHRD_ETHER:
673 	default:
674 		return size;
675 	}
676 }
677 
678 /**
679  * Compute a transmission time lookup table
680  * @arg tc		traffic control object
681  * @arg spec		Rate specification
682  * @arg dst		Destination buffer of RTNL_TC_RTABLE_SIZE uint32_t[].
683  *
684  * Computes a table of RTNL_TC_RTABLE_SIZE entries specyfing the
685  * transmission times for various packet sizes, e.g. the transmission
686  * time for a packet of size \c pktsize could be looked up:
687  * @code
688  * txtime = table[pktsize >> log2(mtu)];
689  * @endcode
690  */
rtnl_tc_build_rate_table(struct rtnl_tc * tc,struct rtnl_ratespec * spec,uint32_t * dst)691 int rtnl_tc_build_rate_table(struct rtnl_tc *tc, struct rtnl_ratespec *spec,
692 			     uint32_t *dst)
693 {
694 	uint32_t mtu = rtnl_tc_get_mtu(tc);
695 	uint32_t linktype = rtnl_tc_get_linktype(tc);
696 	uint8_t cell_log = spec->rs_cell_log;
697 	unsigned int size, i;
698 
699 	spec->rs_mpu = rtnl_tc_get_mpu(tc);
700 	spec->rs_overhead = rtnl_tc_get_overhead(tc);
701 
702 	if (mtu == 0)
703 		mtu = 2047;
704 
705 	if (cell_log == UINT8_MAX) {
706 		/*
707 		 * cell_log not specified, calculate it. It has to specify the
708 		 * minimum number of rshifts required to break the MTU to below
709 		 * RTNL_TC_RTABLE_SIZE.
710 		 */
711 		cell_log = 0;
712 		while ((mtu >> cell_log) >= RTNL_TC_RTABLE_SIZE)
713 			cell_log++;
714 	}
715 
716 	for (i = 0; i < RTNL_TC_RTABLE_SIZE; i++) {
717 		size = adjust_size((i + 1) << cell_log, spec->rs_mpu, linktype);
718 		dst[i] = nl_us2ticks(rtnl_tc_calc_txtime(size, spec->rs_rate));
719 	}
720 
721 	spec->rs_cell_align = -1;
722 	spec->rs_cell_log = cell_log;
723 
724 	return 0;
725 }
726 
727 /** @} */
728 
729 /**
730  * @name TC implementation of cache functions
731  */
732 
rtnl_tc_free_data(struct nl_object * obj)733 void rtnl_tc_free_data(struct nl_object *obj)
734 {
735 	struct rtnl_tc *tc = TC_CAST(obj);
736 	struct rtnl_tc_ops *ops;
737 
738 	rtnl_link_put(tc->tc_link);
739 	nl_data_free(tc->tc_opts);
740 	nl_data_free(tc->tc_xstats);
741 
742 	if (tc->tc_subdata) {
743 		ops = rtnl_tc_get_ops(tc);
744 		if (ops && ops->to_free_data)
745 			ops->to_free_data(tc, nl_data_get(tc->tc_subdata));
746 
747 		nl_data_free(tc->tc_subdata);
748 	}
749 }
750 
rtnl_tc_clone(struct nl_object * dstobj,struct nl_object * srcobj)751 int rtnl_tc_clone(struct nl_object *dstobj, struct nl_object *srcobj)
752 {
753 	struct rtnl_tc *dst = TC_CAST(dstobj);
754 	struct rtnl_tc *src = TC_CAST(srcobj);
755 	struct rtnl_tc_ops *ops;
756 
757 	if (src->tc_link) {
758 		nl_object_get(OBJ_CAST(src->tc_link));
759 		dst->tc_link = src->tc_link;
760 	}
761 
762 	if (src->tc_opts) {
763 		dst->tc_opts = nl_data_clone(src->tc_opts);
764 		if (!dst->tc_opts)
765 			return -NLE_NOMEM;
766 	}
767 
768 	if (src->tc_xstats) {
769 		dst->tc_xstats = nl_data_clone(src->tc_xstats);
770 		if (!dst->tc_xstats)
771 			return -NLE_NOMEM;
772 	}
773 
774 	if (src->tc_subdata) {
775 		if (!(dst->tc_subdata = nl_data_clone(src->tc_subdata))) {
776 			return -NLE_NOMEM;
777 		}
778 	}
779 
780 	ops = rtnl_tc_get_ops(src);
781 	if (ops && ops->to_clone) {
782 		void *a = rtnl_tc_data(dst), *b = rtnl_tc_data(src);
783 
784 		if (!a)
785 			return 0;
786 		else if (!b)
787 			return -NLE_NOMEM;
788 
789 		return ops->to_clone(a, b);
790 	}
791 
792 	return 0;
793 }
794 
tc_dump(struct rtnl_tc * tc,enum nl_dump_type type,struct nl_dump_params * p)795 static int tc_dump(struct rtnl_tc *tc, enum nl_dump_type type,
796 		   struct nl_dump_params *p)
797 {
798 	struct rtnl_tc_type_ops *type_ops;
799 	struct rtnl_tc_ops *ops;
800 	void *data = rtnl_tc_data(tc);
801 
802 	type_ops = tc_type_ops[tc->tc_type];
803 	if (type_ops && type_ops->tt_dump[type])
804 		type_ops->tt_dump[type](tc, p);
805 
806 	ops = rtnl_tc_get_ops(tc);
807 	if (ops && ops->to_dump[type]) {
808 		ops->to_dump[type](tc, data, p);
809 		return 1;
810 	}
811 
812 	return 0;
813 }
814 
rtnl_tc_dump_line(struct nl_object * obj,struct nl_dump_params * p)815 void rtnl_tc_dump_line(struct nl_object *obj, struct nl_dump_params *p)
816 {
817 	struct rtnl_tc_type_ops *type_ops;
818 	struct rtnl_tc *tc = TC_CAST(obj);
819 	struct nl_cache *link_cache;
820 	char buf[32];
821 
822 	nl_new_line(p);
823 
824 	type_ops = tc_type_ops[tc->tc_type];
825 	if (type_ops && type_ops->tt_dump_prefix)
826 		nl_dump(p, "%s ", type_ops->tt_dump_prefix);
827 
828 	nl_dump(p, "%s ", tc->tc_kind);
829 
830 	if ((link_cache = nl_cache_mngt_require_safe("route/link"))) {
831 		nl_dump(p, "dev %s ",
832 			rtnl_link_i2name(link_cache, tc->tc_ifindex,
833 					 buf, sizeof(buf)));
834 	} else
835 		nl_dump(p, "dev %u ", tc->tc_ifindex);
836 
837 	nl_dump(p, "id %s ",
838 		rtnl_tc_handle2str(tc->tc_handle, buf, sizeof(buf)));
839 
840 	nl_dump(p, "parent %s",
841 		rtnl_tc_handle2str(tc->tc_parent, buf, sizeof(buf)));
842 
843 	tc_dump(tc, NL_DUMP_LINE, p);
844 	nl_dump(p, "\n");
845 
846 	if (link_cache)
847 		nl_cache_put(link_cache);
848 }
849 
rtnl_tc_dump_details(struct nl_object * obj,struct nl_dump_params * p)850 void rtnl_tc_dump_details(struct nl_object *obj, struct nl_dump_params *p)
851 {
852 	struct rtnl_tc *tc = TC_CAST(obj);
853 
854 	rtnl_tc_dump_line(OBJ_CAST(tc), p);
855 
856 	nl_dump_line(p, "  ");
857 
858 	if (tc->ce_mask & TCA_ATTR_MTU)
859 		nl_dump(p, " mtu %u", tc->tc_mtu);
860 
861 	if (tc->ce_mask & TCA_ATTR_MPU)
862 		nl_dump(p, " mpu %u", tc->tc_mpu);
863 
864 	if (tc->ce_mask & TCA_ATTR_OVERHEAD)
865 		nl_dump(p, " overhead %u", tc->tc_overhead);
866 
867 	if (!tc_dump(tc, NL_DUMP_DETAILS, p))
868 		nl_dump(p, "no options");
869 	nl_dump(p, "\n");
870 }
871 
rtnl_tc_dump_stats(struct nl_object * obj,struct nl_dump_params * p)872 void rtnl_tc_dump_stats(struct nl_object *obj, struct nl_dump_params *p)
873 {
874 	struct rtnl_tc *tc = TC_CAST(obj);
875 	char *unit, fmt[64];
876 	float res;
877 
878 	rtnl_tc_dump_details(OBJ_CAST(tc), p);
879 
880 	strcpy(fmt, "        %7.2f %s %10u %10u %10u %10u %10u\n");
881 
882 	nl_dump_line(p,
883 		"    Stats:    bytes    packets      drops overlimits" \
884 		"       qlen    backlog\n");
885 
886 	res = nl_cancel_down_bytes(tc->tc_stats[RTNL_TC_BYTES], &unit);
887 	if (*unit == 'B')
888 		fmt[11] = '9';
889 
890 	nl_dump_line(p, fmt, res, unit,
891 		tc->tc_stats[RTNL_TC_PACKETS],
892 		tc->tc_stats[RTNL_TC_DROPS],
893 		tc->tc_stats[RTNL_TC_OVERLIMITS],
894 		tc->tc_stats[RTNL_TC_QLEN],
895 		tc->tc_stats[RTNL_TC_BACKLOG]);
896 
897 	res = nl_cancel_down_bytes(tc->tc_stats[RTNL_TC_RATE_BPS], &unit);
898 
899 	strcpy(fmt, "        %7.2f %s/s%9u pps");
900 
901 	if (*unit == 'B')
902 		fmt[11] = '9';
903 
904 	nl_dump_line(p, fmt, res, unit, tc->tc_stats[RTNL_TC_RATE_PPS]);
905 
906 	tc_dump(tc, NL_DUMP_LINE, p);
907 	nl_dump(p, "\n");
908 }
909 
rtnl_tc_compare(struct nl_object * aobj,struct nl_object * bobj,uint32_t attrs,int flags)910 int rtnl_tc_compare(struct nl_object *aobj, struct nl_object *bobj,
911 		    uint32_t attrs, int flags)
912 {
913 	struct rtnl_tc *a = TC_CAST(aobj);
914 	struct rtnl_tc *b = TC_CAST(bobj);
915 	int diff = 0;
916 
917 #define TC_DIFF(ATTR, EXPR) ATTR_DIFF(attrs, TCA_ATTR_##ATTR, a, b, EXPR)
918 
919 	diff |= TC_DIFF(HANDLE,		a->tc_handle != b->tc_handle);
920 	diff |= TC_DIFF(PARENT,		a->tc_parent != b->tc_parent);
921 	diff |= TC_DIFF(IFINDEX,	a->tc_ifindex != b->tc_ifindex);
922 	diff |= TC_DIFF(KIND,		strcmp(a->tc_kind, b->tc_kind));
923 
924 #undef TC_DIFF
925 
926 	return diff;
927 }
928 
929 /** @} */
930 
931 /**
932  * @name Modules API
933  */
934 
rtnl_tc_lookup_ops(enum rtnl_tc_type type,const char * kind)935 struct rtnl_tc_ops *rtnl_tc_lookup_ops(enum rtnl_tc_type type, const char *kind)
936 {
937 	struct rtnl_tc_ops *ops;
938 
939 	nl_list_for_each_entry(ops, &tc_ops_list[type], to_list)
940 		if (!strcmp(kind, ops->to_kind))
941 			return ops;
942 
943 	return NULL;
944 }
945 
rtnl_tc_get_ops(struct rtnl_tc * tc)946 struct rtnl_tc_ops *rtnl_tc_get_ops(struct rtnl_tc *tc)
947 {
948 	if (!tc->tc_ops)
949 		tc->tc_ops = rtnl_tc_lookup_ops(tc->tc_type, tc->tc_kind);
950 
951 	return tc->tc_ops;
952 }
953 
954 /**
955  * Register a traffic control module
956  * @arg ops		traffic control module operations
957  */
rtnl_tc_register(struct rtnl_tc_ops * ops)958 int rtnl_tc_register(struct rtnl_tc_ops *ops)
959 {
960 	static int init = 0;
961 
962 	/*
963 	 * Initialiation hack, make sure list is initialized when
964 	 * the first tc module registers. Putting this in a
965 	 * separate __init would required correct ordering of init
966 	 * functions
967 	 */
968 	if (!init) {
969 		int i;
970 
971 		for (i = 0; i < __RTNL_TC_TYPE_MAX; i++)
972 			nl_init_list_head(&tc_ops_list[i]);
973 
974 		init = 1;
975 	}
976 
977 	if (!ops->to_kind || ops->to_type > RTNL_TC_TYPE_MAX)
978 		BUG();
979 
980 	if (rtnl_tc_lookup_ops(ops->to_type, ops->to_kind))
981 		return -NLE_EXIST;
982 
983 	nl_list_add_tail(&ops->to_list, &tc_ops_list[ops->to_type]);
984 
985 	return 0;
986 }
987 
988 /**
989  * Unregister a traffic control module
990  * @arg ops		traffic control module operations
991  */
rtnl_tc_unregister(struct rtnl_tc_ops * ops)992 void rtnl_tc_unregister(struct rtnl_tc_ops *ops)
993 {
994 	nl_list_del(&ops->to_list);
995 }
996 
997 /**
998  * Return pointer to private data of traffic control object
999  * @arg tc		traffic control object
1000  *
1001  * Allocates the private traffic control object data section
1002  * as necessary and returns it.
1003  *
1004  * @return Pointer to private tc data or NULL if allocation failed.
1005  */
rtnl_tc_data(struct rtnl_tc * tc)1006 void *rtnl_tc_data(struct rtnl_tc *tc)
1007 {
1008 	if (!tc->tc_subdata) {
1009 		size_t size;
1010 
1011 		if (!tc->tc_ops) {
1012 			if (!tc->tc_kind)
1013 				BUG();
1014 
1015 			if (!rtnl_tc_get_ops(tc))
1016 				return NULL;
1017 		}
1018 
1019 		if (!(size = tc->tc_ops->to_size))
1020 			BUG();
1021 
1022 		if (!(tc->tc_subdata = nl_data_alloc(NULL, size)))
1023 			return NULL;
1024 	}
1025 
1026 	return nl_data_get(tc->tc_subdata);
1027 }
1028 
1029 /**
1030  * Check traffic control object type and return private data section
1031  * @arg tc		traffic control object
1032  * @arg ops		expected traffic control object operations
1033  *
1034  * Checks whether the traffic control object matches the type
1035  * specified with the traffic control object operations. If the
1036  * type matches, the private tc object data is returned. If type
1037  * mismatches, APPBUG() will print a application bug warning.
1038  *
1039  * @see rtnl_tc_data()
1040  *
1041  * @return Pointer to private tc data or NULL if type mismatches.
1042  */
rtnl_tc_data_check(struct rtnl_tc * tc,struct rtnl_tc_ops * ops)1043 void *rtnl_tc_data_check(struct rtnl_tc *tc, struct rtnl_tc_ops *ops)
1044 {
1045 	if (tc->tc_ops != ops) {
1046 		char buf[64];
1047 
1048 		snprintf(buf, sizeof(buf),
1049 			 "tc object %p used in %s context but is of type %s",
1050 			 tc, ops->to_kind, tc->tc_ops->to_kind);
1051 		APPBUG(buf);
1052 
1053 		return NULL;
1054 	}
1055 
1056 	return rtnl_tc_data(tc);
1057 }
1058 
1059 struct nl_af_group tc_groups[] = {
1060 	{ AF_UNSPEC,	RTNLGRP_TC },
1061 	{ END_OF_GROUP_LIST },
1062 };
1063 
1064 
rtnl_tc_type_register(struct rtnl_tc_type_ops * ops)1065 void rtnl_tc_type_register(struct rtnl_tc_type_ops *ops)
1066 {
1067 	if (ops->tt_type > RTNL_TC_TYPE_MAX)
1068 		BUG();
1069 
1070 	tc_type_ops[ops->tt_type] = ops;
1071 }
1072 
rtnl_tc_type_unregister(struct rtnl_tc_type_ops * ops)1073 void rtnl_tc_type_unregister(struct rtnl_tc_type_ops *ops)
1074 {
1075 	if (ops->tt_type > RTNL_TC_TYPE_MAX)
1076 		BUG();
1077 
1078 	tc_type_ops[ops->tt_type] = NULL;
1079 }
1080 
1081 /** @} */
1082 
1083 /** @} */
1084