1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
3#
4# Check that route PMTU values match expectations, and that initial device MTU
5# values are assigned correctly
6#
7# Tests currently implemented:
8#
9# - pmtu_ipv4
10#	Set up two namespaces, A and B, with two paths between them over routers
11#	R1 and R2 (also implemented with namespaces), with different MTUs:
12#
13#	  segment a_r1    segment b_r1		a_r1: 2000
14#	.--------------R1--------------.	a_r2: 1500
15#	A                               B	a_r3: 2000
16#	'--------------R2--------------'	a_r4: 1400
17#	  segment a_r2    segment b_r2
18#
19#	Check that PMTU exceptions with the correct PMTU are created. Then
20#	decrease and increase the MTU of the local link for one of the paths,
21#	A to R1, checking that route exception PMTU changes accordingly over
22#	this path. Also check that locked exceptions are created when an ICMP
23#	message advertising a PMTU smaller than net.ipv4.route.min_pmtu is
24#	received
25#
26# - pmtu_ipv6
27#	Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
28#
29# - pmtu_vti4_exception
30#	Set up vti tunnel on top of veth, with xfrm states and policies, in two
31#	namespaces with matching endpoints. Check that route exception is not
32#	created if link layer MTU is not exceeded, then exceed it and check that
33#	exception is created with the expected PMTU. The approach described
34#	below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
35#	changes alone won't affect PMTU
36#
37# - pmtu_vti6_exception
38#	Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
39#	namespaces with matching endpoints. Check that route exception is
40#	created by exceeding link layer MTU with ping to other endpoint. Then
41#	decrease and increase MTU of tunnel, checking that route exception PMTU
42#	changes accordingly
43#
44# - pmtu_vti4_default_mtu
45#	Set up vti4 tunnel on top of veth, in two namespaces with matching
46#	endpoints. Check that MTU assigned to vti interface is the MTU of the
47#	lower layer (veth) minus additional lower layer headers (zero, for veth)
48#	minus IPv4 header length
49#
50# - pmtu_vti6_default_mtu
51#	Same as above, for IPv6
52#
53# - pmtu_vti4_link_add_mtu
54#	Set up vti4 interface passing MTU value at link creation, check MTU is
55#	configured, and that link is not created with invalid MTU values
56#
57# - pmtu_vti6_link_add_mtu
58#	Same as above, for IPv6
59#
60# - pmtu_vti6_link_change_mtu
61#	Set up two dummy interfaces with different MTUs, create a vti6 tunnel
62#	and check that configured MTU is used on link creation and changes, and
63#	that MTU is properly calculated instead when MTU is not configured from
64#	userspace
65
66# Kselftest framework requirement - SKIP code is 4.
67ksft_skip=4
68
69# Some systems don't have a ping6 binary anymore
70which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
71
72tests="
73	pmtu_ipv4_exception		ipv4: PMTU exceptions
74	pmtu_ipv6_exception		ipv6: PMTU exceptions
75	pmtu_vti6_exception		vti6: PMTU exceptions
76	pmtu_vti4_exception		vti4: PMTU exceptions
77	pmtu_vti4_default_mtu		vti4: default MTU assignment
78	pmtu_vti6_default_mtu		vti6: default MTU assignment
79	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation
80	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation
81	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes"
82
83NS_A="ns-$(mktemp -u XXXXXX)"
84NS_B="ns-$(mktemp -u XXXXXX)"
85NS_R1="ns-$(mktemp -u XXXXXX)"
86NS_R2="ns-$(mktemp -u XXXXXX)"
87ns_a="ip netns exec ${NS_A}"
88ns_b="ip netns exec ${NS_B}"
89ns_r1="ip netns exec ${NS_R1}"
90ns_r2="ip netns exec ${NS_R2}"
91
92# Addressing and routing for tests with routers: four network segments, with
93# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
94# identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
95# Addresses are:
96# - IPv4: PREFIX4.SEGMENT.ID (/24)
97# - IPv6: PREFIX6:SEGMENT::ID (/64)
98prefix4="192.168"
99prefix6="fd00"
100a_r1=1
101a_r2=2
102b_r1=3
103b_r2=4
104#	ns	peer	segment
105routing_addrs="
106	A	R1	${a_r1}
107	A	R2	${a_r2}
108	B	R1	${b_r1}
109	B	R2	${b_r2}
110"
111# Traffic from A to B goes through R1 by default, and through R2, if destined to
112# B's address on the b_r2 segment.
113# Traffic from B to A goes through R1.
114#	ns	destination		gateway
115routes="
116	A	default			${prefix4}.${a_r1}.2
117	A	${prefix4}.${b_r2}.1	${prefix4}.${a_r2}.2
118	B	default			${prefix4}.${b_r1}.2
119
120	A	default			${prefix6}:${a_r1}::2
121	A	${prefix6}:${b_r2}::1	${prefix6}:${a_r2}::2
122	B	default			${prefix6}:${b_r1}::2
123"
124
125veth4_a_addr="192.168.1.1"
126veth4_b_addr="192.168.1.2"
127veth4_mask="24"
128veth6_a_addr="fd00:1::a"
129veth6_b_addr="fd00:1::b"
130veth6_mask="64"
131
132vti4_a_addr="192.168.2.1"
133vti4_b_addr="192.168.2.2"
134vti4_mask="24"
135vti6_a_addr="fd00:2::a"
136vti6_b_addr="fd00:2::b"
137vti6_mask="64"
138
139dummy6_0_addr="fc00:1000::0"
140dummy6_1_addr="fc00:1001::0"
141dummy6_mask="64"
142
143cleanup_done=1
144err_buf=
145tcpdump_pids=
146
147err() {
148	err_buf="${err_buf}${1}
149"
150}
151
152err_flush() {
153	echo -n "${err_buf}"
154	err_buf=
155}
156
157# Find the auto-generated name for this namespace
158nsname() {
159	eval echo \$NS_$1
160}
161
162setup_namespaces() {
163	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
164		ip netns add ${n} || return 1
165	done
166}
167
168setup_veth() {
169	${ns_a} ip link add veth_a type veth peer name veth_b || return 1
170	${ns_a} ip link set veth_b netns ${NS_B}
171
172	${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
173	${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
174
175	${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
176	${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
177
178	${ns_a} ip link set veth_a up
179	${ns_b} ip link set veth_b up
180}
181
182setup_vti() {
183	proto=${1}
184	veth_a_addr="${2}"
185	veth_b_addr="${3}"
186	vti_a_addr="${4}"
187	vti_b_addr="${5}"
188	vti_mask=${6}
189
190	[ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
191
192	${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
193	${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
194
195	${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
196	${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
197
198	${ns_a} ip link set vti${proto}_a up
199	${ns_b} ip link set vti${proto}_b up
200
201	sleep 1
202}
203
204setup_vti4() {
205	setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask}
206}
207
208setup_vti6() {
209	setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask}
210}
211
212setup_xfrm() {
213	proto=${1}
214	veth_a_addr="${2}"
215	veth_b_addr="${3}"
216
217	${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
218	${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
219	${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
220	${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
221
222	${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
223	${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
224	${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
225	${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
226}
227
228setup_xfrm4() {
229	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
230}
231
232setup_xfrm6() {
233	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
234}
235
236setup_routing() {
237	for i in ${NS_R1} ${NS_R2}; do
238		ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
239		ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1
240	done
241
242	for i in ${routing_addrs}; do
243		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
244		[ "${peer}" = "" ]	&& peer="${i}"		&& continue
245		[ "${segment}" = "" ]	&& segment="${i}"
246
247		ns_name="$(nsname ${ns})"
248		peer_name="$(nsname ${peer})"
249		if="veth_${ns}-${peer}"
250		ifpeer="veth_${peer}-${ns}"
251
252		# Create veth links
253		ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1
254		ip -n ${peer_name} link set dev ${ifpeer} up
255
256		# Add addresses
257		ip -n ${ns_name}   addr add ${prefix4}.${segment}.1/24  dev ${if}
258		ip -n ${ns_name}   addr add ${prefix6}:${segment}::1/64 dev ${if}
259
260		ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24  dev ${ifpeer}
261		ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer}
262
263		ns=""; peer=""; segment=""
264	done
265
266	for i in ${routes}; do
267		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
268		[ "${addr}" = "" ]	&& addr="${i}"		&& continue
269		[ "${gw}" = "" ]	&& gw="${i}"
270
271		ns_name="$(nsname ${ns})"
272
273		ip -n ${ns_name} route add ${addr} via ${gw}
274
275		ns=""; addr=""; gw=""
276	done
277}
278
279setup() {
280	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
281
282	cleanup_done=0
283	for arg do
284		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
285	done
286}
287
288trace() {
289	[ $tracing -eq 0 ] && return
290
291	for arg do
292		[ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
293		${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
294		tcpdump_pids="${tcpdump_pids} $!"
295		ns_cmd=
296	done
297	sleep 1
298}
299
300cleanup() {
301	for pid in ${tcpdump_pids}; do
302		kill ${pid}
303	done
304	tcpdump_pids=
305
306	[ ${cleanup_done} -eq 1 ] && return
307	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
308		ip netns del ${n} 2> /dev/null
309	done
310	cleanup_done=1
311}
312
313mtu() {
314	ns_cmd="${1}"
315	dev="${2}"
316	mtu="${3}"
317
318	${ns_cmd} ip link set dev ${dev} mtu ${mtu}
319}
320
321mtu_parse() {
322	input="${1}"
323
324	next=0
325	for i in ${input}; do
326		[ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue
327		[ ${next} -eq 1 ] && echo "${i}" && return
328		[ ${next} -eq 2 ] && echo "lock ${i}" && return
329		[ "${i}" = "mtu" ] && next=1
330	done
331}
332
333link_get() {
334	ns_cmd="${1}"
335	name="${2}"
336
337	${ns_cmd} ip link show dev "${name}"
338}
339
340link_get_mtu() {
341	ns_cmd="${1}"
342	name="${2}"
343
344	mtu_parse "$(link_get "${ns_cmd}" ${name})"
345}
346
347route_get_dst_exception() {
348	ns_cmd="${1}"
349	dst="${2}"
350
351	${ns_cmd} ip route get "${dst}"
352}
353
354route_get_dst_pmtu_from_exception() {
355	ns_cmd="${1}"
356	dst="${2}"
357
358	mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
359}
360
361check_pmtu_value() {
362	expected="${1}"
363	value="${2}"
364	event="${3}"
365
366	[ "${expected}" = "any" ] && [ -n "${value}" ] && return 0
367	[ "${value}" = "${expected}" ] && return 0
368	[ -z "${value}" ] &&    err "  PMTU exception wasn't created after ${event}" && return 1
369	[ -z "${expected}" ] && err "  PMTU exception shouldn't exist after ${event}" && return 1
370	err "  found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}"
371	return 1
372}
373
374test_pmtu_ipvX() {
375	family=${1}
376
377	setup namespaces routing || return 2
378	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
379	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
380	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
381	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
382
383	if [ ${family} -eq 4 ]; then
384		ping=ping
385		dst1="${prefix4}.${b_r1}.1"
386		dst2="${prefix4}.${b_r2}.1"
387	else
388		ping=${ping6}
389		dst1="${prefix6}:${b_r1}::1"
390		dst2="${prefix6}:${b_r2}::1"
391	fi
392
393	# Set up initial MTU values
394	mtu "${ns_a}"  veth_A-R1 2000
395	mtu "${ns_r1}" veth_R1-A 2000
396	mtu "${ns_r1}" veth_R1-B 1400
397	mtu "${ns_b}"  veth_B-R1 1400
398
399	mtu "${ns_a}"  veth_A-R2 2000
400	mtu "${ns_r2}" veth_R2-A 2000
401	mtu "${ns_r2}" veth_R2-B 1500
402	mtu "${ns_b}"  veth_B-R2 1500
403
404	# Create route exceptions
405	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst1} > /dev/null
406	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst2} > /dev/null
407
408	# Check that exceptions have been created with the correct PMTU
409	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
410	check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
411	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
412	check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
413
414	# Decrease local MTU below PMTU, check for PMTU decrease in route exception
415	mtu "${ns_a}"  veth_A-R1 1300
416	mtu "${ns_r1}" veth_R1-A 1300
417	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
418	check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1
419	# Second exception shouldn't be modified
420	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
421	check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
422
423	# Increase MTU, check for PMTU increase in route exception
424	mtu "${ns_a}"  veth_A-R1 1700
425	mtu "${ns_r1}" veth_R1-A 1700
426	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
427	check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1
428	# Second exception shouldn't be modified
429	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
430	check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
431
432	# Skip PMTU locking tests for IPv6
433	[ $family -eq 6 ] && return 0
434
435	# Decrease remote MTU on path via R2, get new exception
436	mtu "${ns_r2}" veth_R2-B 400
437	mtu "${ns_b}"  veth_B-R2 400
438	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null
439	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
440	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
441
442	# Decrease local MTU below PMTU
443	mtu "${ns_a}"  veth_A-R2 500
444	mtu "${ns_r2}" veth_R2-A 500
445	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
446	check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1
447
448	# Increase local MTU
449	mtu "${ns_a}"  veth_A-R2 1500
450	mtu "${ns_r2}" veth_R2-A 1500
451	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
452	check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
453
454	# Get new exception
455	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null
456	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
457	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
458}
459
460test_pmtu_ipv4_exception() {
461	test_pmtu_ipvX 4
462}
463
464test_pmtu_ipv6_exception() {
465	test_pmtu_ipvX 6
466}
467
468test_pmtu_vti4_exception() {
469	setup namespaces veth vti4 xfrm4 || return 2
470	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
471	      "${ns_a}" vti4_a    "${ns_b}" vti4_b
472
473	veth_mtu=1500
474	vti_mtu=$((veth_mtu - 20))
475
476	#                                SPI   SN   IV  ICV   pad length   next header
477	esp_payload_rfc4106=$((vti_mtu - 4   - 4  - 8 - 16  - 1          - 1))
478	ping_payload=$((esp_payload_rfc4106 - 28))
479
480	mtu "${ns_a}" veth_a ${veth_mtu}
481	mtu "${ns_b}" veth_b ${veth_mtu}
482	mtu "${ns_a}" vti4_a ${vti_mtu}
483	mtu "${ns_b}" vti4_b ${vti_mtu}
484
485	# Send DF packet without exceeding link layer MTU, check that no
486	# exception is created
487	${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
488	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
489	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
490
491	# Now exceed link layer MTU by one byte, check that exception is created
492	# with the right PMTU value
493	${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
494	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
495	check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
496}
497
498test_pmtu_vti6_exception() {
499	setup namespaces veth vti6 xfrm6 || return 2
500	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
501	      "${ns_a}" vti6_a    "${ns_b}" vti6_b
502	fail=0
503
504	# Create route exception by exceeding link layer MTU
505	mtu "${ns_a}" veth_a 4000
506	mtu "${ns_b}" veth_b 4000
507	mtu "${ns_a}" vti6_a 5000
508	mtu "${ns_b}" vti6_b 5000
509	${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
510
511	# Check that exception was created
512	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
513	check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
514
515	# Decrease tunnel MTU, check for PMTU decrease in route exception
516	mtu "${ns_a}" vti6_a 3000
517	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
518	check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
519
520	# Increase tunnel MTU, check for PMTU increase in route exception
521	mtu "${ns_a}" vti6_a 9000
522	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
523	check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
524
525	return ${fail}
526}
527
528test_pmtu_vti4_default_mtu() {
529	setup namespaces veth vti4 || return 2
530
531	# Check that MTU of vti device is MTU of veth minus IPv4 header length
532	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
533	vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
534	if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
535		err "  vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
536		return 1
537	fi
538}
539
540test_pmtu_vti6_default_mtu() {
541	setup namespaces veth vti6 || return 2
542
543	# Check that MTU of vti device is MTU of veth minus IPv6 header length
544	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
545	vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
546	if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
547		err "  vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
548		return 1
549	fi
550}
551
552test_pmtu_vti4_link_add_mtu() {
553	setup namespaces || return 2
554
555	${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
556	[ $? -ne 0 ] && err "  vti not supported" && return 2
557	${ns_a} ip link del vti4_a
558
559	fail=0
560
561	min=68
562	max=$((65535 - 20))
563	# Check invalid values first
564	for v in $((min - 1)) $((max + 1)); do
565		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
566		# This can fail, or MTU can be adjusted to a proper value
567		[ $? -ne 0 ] && continue
568		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
569		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
570			err "  vti tunnel created with invalid MTU ${mtu}"
571			fail=1
572		fi
573		${ns_a} ip link del vti4_a
574	done
575
576	# Now check valid values
577	for v in ${min} 1300 ${max}; do
578		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
579		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
580		${ns_a} ip link del vti4_a
581		if [ "${mtu}" != "${v}" ]; then
582			err "  vti MTU ${mtu} doesn't match configured value ${v}"
583			fail=1
584		fi
585	done
586
587	return ${fail}
588}
589
590test_pmtu_vti6_link_add_mtu() {
591	setup namespaces || return 2
592
593	${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
594	[ $? -ne 0 ] && err "  vti6 not supported" && return 2
595	${ns_a} ip link del vti6_a
596
597	fail=0
598
599	min=68			# vti6 can carry IPv4 packets too
600	max=$((65535 - 40))
601	# Check invalid values first
602	for v in $((min - 1)) $((max + 1)); do
603		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null
604		# This can fail, or MTU can be adjusted to a proper value
605		[ $? -ne 0 ] && continue
606		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
607		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
608			err "  vti6 tunnel created with invalid MTU ${v}"
609			fail=1
610		fi
611		${ns_a} ip link del vti6_a
612	done
613
614	# Now check valid values
615	for v in 68 1280 1300 $((65535 - 40)); do
616		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
617		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
618		${ns_a} ip link del vti6_a
619		if [ "${mtu}" != "${v}" ]; then
620			err "  vti6 MTU ${mtu} doesn't match configured value ${v}"
621			fail=1
622		fi
623	done
624
625	return ${fail}
626}
627
628test_pmtu_vti6_link_change_mtu() {
629	setup namespaces || return 2
630
631	${ns_a} ip link add dummy0 mtu 1500 type dummy
632	[ $? -ne 0 ] && err "  dummy not supported" && return 2
633	${ns_a} ip link add dummy1 mtu 3000 type dummy
634	${ns_a} ip link set dummy0 up
635	${ns_a} ip link set dummy1 up
636
637	${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
638	${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
639
640	fail=0
641
642	# Create vti6 interface bound to device, passing MTU, check it
643	${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
644	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
645	if [ ${mtu} -ne 1300 ]; then
646		err "  vti6 MTU ${mtu} doesn't match configured value 1300"
647		fail=1
648	fi
649
650	# Move to another device with different MTU, without passing MTU, check
651	# MTU is adjusted
652	${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
653	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
654	if [ ${mtu} -ne $((3000 - 40)) ]; then
655		err "  vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
656		fail=1
657	fi
658
659	# Move it back, passing MTU, check MTU is not overridden
660	${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
661	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
662	if [ ${mtu} -ne 1280 ]; then
663		err "  vti6 MTU ${mtu} doesn't match configured value 1280"
664		fail=1
665	fi
666
667	return ${fail}
668}
669
670usage() {
671	echo
672	echo "$0 [OPTIONS] [TEST]..."
673	echo "If no TEST argument is given, all tests will be run."
674	echo
675	echo "Options"
676	echo "  --trace: capture traffic to TEST_INTERFACE.pcap"
677	echo
678	echo "Available tests${tests}"
679	exit 1
680}
681
682exitcode=0
683desc=0
684IFS="
685"
686
687tracing=0
688for arg do
689	if [ "${arg}" != "${arg#--*}" ]; then
690		opt="${arg#--}"
691		if [ "${opt}" = "trace" ]; then
692			if which tcpdump > /dev/null 2>&1; then
693				tracing=1
694			else
695				echo "=== tcpdump not available, tracing disabled"
696			fi
697		else
698			usage
699		fi
700	else
701		# Check first that all requested tests are available before
702		# running any
703		command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
704	fi
705done
706
707trap cleanup EXIT
708
709for t in ${tests}; do
710	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
711
712	run_this=1
713	for arg do
714		[ "${arg}" != "${arg#--*}" ] && continue
715		[ "${arg}" = "${name}" ] && run_this=1 && break
716		run_this=0
717	done
718	[ $run_this -eq 0 ] && continue
719
720	(
721		unset IFS
722		eval test_${name}
723		ret=$?
724		cleanup
725
726		if [ $ret -eq 0 ]; then
727			printf "TEST: %-60s  [ OK ]\n" "${t}"
728		elif [ $ret -eq 1 ]; then
729			printf "TEST: %-60s  [FAIL]\n" "${t}"
730			err_flush
731			exit 1
732		elif [ $ret -eq 2 ]; then
733			printf "TEST: %-60s  [SKIP]\n" "${t}"
734			err_flush
735		fi
736	)
737	[ $? -ne 0 ] && exitcode=1
738done
739
740exit ${exitcode}
741