1 /*-
2  * Copyright (c) 2017 Michael Tuexen
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 /*
29  * Compile: cc -Wall -Werror -pedantic pcap2corpus.c -lpcap -o pcap2corpus
30  *
31  * Usage: pcap2corpus infile outfile_prefix [expression]
32  *        if no expression, a pcap filter, is provided, sctp is used.
33  */
34 #define _GNU_SOURCE
35 #include <sys/types.h>
36 #include <net/ethernet.h>
37 #include <netinet/in.h>
38 #include <netinet/ip.h>
39 #include <netinet/ip6.h>
40 #include <pcap/pcap.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 
45 static unsigned long nr_read = 0;
46 static unsigned long nr_decaps = 0;
47 
48 #define PRE_PADDING 1
49 
50 struct args {
51 	struct bpf_program bpf_prog;
52 	char *filename_prefix;
53 	int (*is_ipv4)(const void *);
54 	int (*is_ipv6)(const void *);
55 	int linktype;
56 	unsigned int offset;
57 };
58 
59 /*
60  * SCTP protocol - RFC4960.
61  */
62 struct sctphdr {
63 	uint16_t src_port;	/* source port */
64 	uint16_t dest_port;	/* destination port */
65 	uint32_t v_tag;		/* verification tag of packet */
66 	uint32_t checksum;	/* CRC32C checksum */
67 	/* chunks follow... */
68 } __attribute__((packed));
69 
70 static int
loopback_is_ipv4(const void * bytes)71 loopback_is_ipv4(const void *bytes)
72 {
73 	uint32_t family;
74 
75 	family = *(const uint32_t *)bytes;
76 	return (family == 2);
77 }
78 
79 static int
loopback_is_ipv6(const void * bytes)80 loopback_is_ipv6(const void *bytes)
81 {
82 	uint32_t family;
83 
84 	family = *(const uint32_t *)bytes;
85 	return (family == 24 || family == 28 || family == 30);
86 }
87 
88 static int
ethernet_is_ipv4(const void * bytes)89 ethernet_is_ipv4(const void *bytes)
90 {
91 	const struct ether_header *ether_hdr;
92 
93 	ether_hdr = (const struct ether_header *)bytes;
94 	return (ntohs(ether_hdr->ether_type) == ETHERTYPE_IP);
95 }
96 
97 static int
ethernet_is_ipv6(const void * bytes)98 ethernet_is_ipv6(const void *bytes)
99 {
100 	const struct ether_header *ether_hdr;
101 
102 	ether_hdr = (const struct ether_header *)bytes;
103 	return (ntohs(ether_hdr->ether_type) == ETHERTYPE_IPV6);
104 }
105 
106 static void
packet_handler(u_char * user,const struct pcap_pkthdr * pkthdr,const u_char * bytes_in)107 packet_handler(u_char *user, const struct pcap_pkthdr *pkthdr, const u_char *bytes_in)
108 {
109 	struct args *args;
110 	const u_char *bytes_out;
111 	FILE *file;
112 	char *filename;
113 	const struct ip *ip4_hdr_in;
114 	const struct ip6_hdr *ip6_hdr_in;
115 	size_t offset, length;
116 	int null = 0;
117 
118 	args = (struct args *)(void *)user;
119 	bytes_out = NULL;
120 	if (pcap_offline_filter(&args->bpf_prog, pkthdr, bytes_in)  == 0) {
121 		goto out;
122 	}
123 	if (pkthdr->caplen < args->offset) {
124 		goto out;
125 	}
126 	if (args->is_ipv4(bytes_in)) {
127 		offset = args->offset + sizeof(struct ip) + sizeof(struct sctphdr);
128 		if (pkthdr->caplen < offset) {
129 			goto out;
130 		}
131 		ip4_hdr_in = (const struct ip *)(const void *)(bytes_in + args->offset);
132 		if (ip4_hdr_in->ip_p == IPPROTO_SCTP) {
133 			unsigned int ip4_hdr_len;
134 
135 			ip4_hdr_len = ip4_hdr_in->ip_hl << 2;
136 			offset = args->offset + ip4_hdr_len + sizeof(struct sctphdr);
137 			if (pkthdr->caplen < offset) {
138 				goto out;
139 			}
140 			bytes_out = bytes_in + offset;
141 			length = pkthdr->caplen - offset;
142 		}
143 	}
144 	if (args->is_ipv6(bytes_in)) {
145 		offset = args->offset + sizeof(struct ip6_hdr) + sizeof(struct sctphdr);
146 		if (pkthdr->caplen < offset) {
147 			goto out;
148 		}
149 		ip6_hdr_in = (const struct ip6_hdr *)(bytes_in + args->offset);
150 		if (ip6_hdr_in->ip6_nxt == IPPROTO_SCTP) {
151 			bytes_out = bytes_in + offset;
152 			length = pkthdr->caplen - offset;
153 		}
154 	}
155 out:
156 	nr_read++;
157 	if (bytes_out != NULL) {
158 		if (asprintf(&filename, "%s-%06lu", args->filename_prefix, nr_decaps) < 0) {
159 			return;
160 		}
161 		file = fopen(filename, "w");
162 		fwrite(&null, 1, PRE_PADDING, file);
163 		fwrite(bytes_out, length, 1, file);
164 		fclose(file);
165 		free(filename);
166 		nr_decaps++;
167 	}
168 }
169 
170 static char *
get_filter(int argc,char * argv[])171 get_filter(int argc, char *argv[])
172 {
173 	char *result, *c;
174 	size_t len;
175 	int i;
176 
177 	if (argc == 3) {
178 		if (asprintf(&result, "%s", "sctp") < 0) {
179 			return (NULL);
180 		}
181 	} else {
182 		len = 0;
183 		for (i = 3; i < argc; i++) {
184 			len += strlen(argv[i]) + 1;
185 		}
186 		len -= 1;
187 		result = malloc(len);
188 		c = result;
189 		for (i = 3; i < argc; i++) {
190 			size_t arg_len;
191 
192 			arg_len = strlen(argv[i]);
193 			memcpy(c, argv[i], arg_len);
194 			c += arg_len;
195 			if (i < argc - 1) {
196 				*c++ = ' ';
197 			}
198 		}
199 	}
200 	return (result);
201 }
202 
203 int
main(int argc,char * argv[])204 main(int argc, char *argv[])
205 {
206 	char errbuf[PCAP_ERRBUF_SIZE];
207 	pcap_t *pcap_reader;
208 	char *filter;
209 	struct args args;
210 
211 	if (argc < 3) {
212 		fprintf(stderr, "Usage: %s infile outfile_prefix [expression]\n", argv[0]);
213 		return (-1);
214 	}
215 	args.filename_prefix = argv[2];
216 	pcap_reader = pcap_open_offline(argv[1], errbuf);
217 	if (pcap_reader == NULL) {
218 		fprintf(stderr, "Can't open input file %s: %s\n", argv[1], errbuf);
219 		return (-1);
220 	}
221 	args.linktype = pcap_datalink(pcap_reader);
222 	switch (args.linktype) {
223 	case DLT_NULL:
224 		args.is_ipv4 = loopback_is_ipv4;
225 		args.is_ipv6 = loopback_is_ipv6;
226 		args.offset = sizeof(uint32_t);
227 		break;
228 	case DLT_EN10MB:
229 		args.is_ipv4 = ethernet_is_ipv4;
230 		args.is_ipv6 = ethernet_is_ipv6;
231 		args.offset = sizeof(struct ether_header);
232 		break;
233 	default:
234 		fprintf(stderr, "Datalink type %d not supported\n", args.linktype);
235 		pcap_close(pcap_reader);
236 		return (-1);
237 	}
238 	filter = get_filter(argc, argv);
239 	if (pcap_compile(pcap_reader, &args.bpf_prog, filter, 0, PCAP_NETMASK_UNKNOWN) < 0) {
240 		fprintf(stderr, "Can't compile filter %s: %s\n", filter, pcap_geterr(pcap_reader));
241 		free(filter);
242 		pcap_close(pcap_reader);
243 		return (-1);
244 	}
245 	free(filter);
246 	pcap_dispatch(pcap_reader, 0, packet_handler, (u_char *)&args);
247 	pcap_close(pcap_reader);
248 	fprintf(stderr, "%lu packets processed\n", nr_read);
249 	fprintf(stderr, "%lu packets decapsulated\n", nr_decaps);
250 	return (0);
251 }
252