1 /*-
2 * Copyright (c) 2017 Michael Tuexen
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28 /*
29 * Compile: cc -Wall -Werror -pedantic pcap2corpus.c -lpcap -o pcap2corpus
30 *
31 * Usage: pcap2corpus infile outfile_prefix [expression]
32 * if no expression, a pcap filter, is provided, sctp is used.
33 */
34 #define _GNU_SOURCE
35 #include <sys/types.h>
36 #include <net/ethernet.h>
37 #include <netinet/in.h>
38 #include <netinet/ip.h>
39 #include <netinet/ip6.h>
40 #include <pcap/pcap.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44
45 static unsigned long nr_read = 0;
46 static unsigned long nr_decaps = 0;
47
48 #define PRE_PADDING 1
49
50 struct args {
51 struct bpf_program bpf_prog;
52 char *filename_prefix;
53 int (*is_ipv4)(const void *);
54 int (*is_ipv6)(const void *);
55 int linktype;
56 unsigned int offset;
57 };
58
59 /*
60 * SCTP protocol - RFC4960.
61 */
62 struct sctphdr {
63 uint16_t src_port; /* source port */
64 uint16_t dest_port; /* destination port */
65 uint32_t v_tag; /* verification tag of packet */
66 uint32_t checksum; /* CRC32C checksum */
67 /* chunks follow... */
68 } __attribute__((packed));
69
70 static int
loopback_is_ipv4(const void * bytes)71 loopback_is_ipv4(const void *bytes)
72 {
73 uint32_t family;
74
75 family = *(const uint32_t *)bytes;
76 return (family == 2);
77 }
78
79 static int
loopback_is_ipv6(const void * bytes)80 loopback_is_ipv6(const void *bytes)
81 {
82 uint32_t family;
83
84 family = *(const uint32_t *)bytes;
85 return (family == 24 || family == 28 || family == 30);
86 }
87
88 static int
ethernet_is_ipv4(const void * bytes)89 ethernet_is_ipv4(const void *bytes)
90 {
91 const struct ether_header *ether_hdr;
92
93 ether_hdr = (const struct ether_header *)bytes;
94 return (ntohs(ether_hdr->ether_type) == ETHERTYPE_IP);
95 }
96
97 static int
ethernet_is_ipv6(const void * bytes)98 ethernet_is_ipv6(const void *bytes)
99 {
100 const struct ether_header *ether_hdr;
101
102 ether_hdr = (const struct ether_header *)bytes;
103 return (ntohs(ether_hdr->ether_type) == ETHERTYPE_IPV6);
104 }
105
106 static void
packet_handler(u_char * user,const struct pcap_pkthdr * pkthdr,const u_char * bytes_in)107 packet_handler(u_char *user, const struct pcap_pkthdr *pkthdr, const u_char *bytes_in)
108 {
109 struct args *args;
110 const u_char *bytes_out;
111 FILE *file;
112 char *filename;
113 const struct ip *ip4_hdr_in;
114 const struct ip6_hdr *ip6_hdr_in;
115 size_t offset, length;
116 int null = 0;
117
118 args = (struct args *)(void *)user;
119 bytes_out = NULL;
120 if (pcap_offline_filter(&args->bpf_prog, pkthdr, bytes_in) == 0) {
121 goto out;
122 }
123 if (pkthdr->caplen < args->offset) {
124 goto out;
125 }
126 if (args->is_ipv4(bytes_in)) {
127 offset = args->offset + sizeof(struct ip) + sizeof(struct sctphdr);
128 if (pkthdr->caplen < offset) {
129 goto out;
130 }
131 ip4_hdr_in = (const struct ip *)(const void *)(bytes_in + args->offset);
132 if (ip4_hdr_in->ip_p == IPPROTO_SCTP) {
133 unsigned int ip4_hdr_len;
134
135 ip4_hdr_len = ip4_hdr_in->ip_hl << 2;
136 offset = args->offset + ip4_hdr_len + sizeof(struct sctphdr);
137 if (pkthdr->caplen < offset) {
138 goto out;
139 }
140 bytes_out = bytes_in + offset;
141 length = pkthdr->caplen - offset;
142 }
143 }
144 if (args->is_ipv6(bytes_in)) {
145 offset = args->offset + sizeof(struct ip6_hdr) + sizeof(struct sctphdr);
146 if (pkthdr->caplen < offset) {
147 goto out;
148 }
149 ip6_hdr_in = (const struct ip6_hdr *)(bytes_in + args->offset);
150 if (ip6_hdr_in->ip6_nxt == IPPROTO_SCTP) {
151 bytes_out = bytes_in + offset;
152 length = pkthdr->caplen - offset;
153 }
154 }
155 out:
156 nr_read++;
157 if (bytes_out != NULL) {
158 if (asprintf(&filename, "%s-%06lu", args->filename_prefix, nr_decaps) < 0) {
159 return;
160 }
161 file = fopen(filename, "w");
162 fwrite(&null, 1, PRE_PADDING, file);
163 fwrite(bytes_out, length, 1, file);
164 fclose(file);
165 free(filename);
166 nr_decaps++;
167 }
168 }
169
170 static char *
get_filter(int argc,char * argv[])171 get_filter(int argc, char *argv[])
172 {
173 char *result, *c;
174 size_t len;
175 int i;
176
177 if (argc == 3) {
178 if (asprintf(&result, "%s", "sctp") < 0) {
179 return (NULL);
180 }
181 } else {
182 len = 0;
183 for (i = 3; i < argc; i++) {
184 len += strlen(argv[i]) + 1;
185 }
186 len -= 1;
187 result = malloc(len);
188 c = result;
189 for (i = 3; i < argc; i++) {
190 size_t arg_len;
191
192 arg_len = strlen(argv[i]);
193 memcpy(c, argv[i], arg_len);
194 c += arg_len;
195 if (i < argc - 1) {
196 *c++ = ' ';
197 }
198 }
199 }
200 return (result);
201 }
202
203 int
main(int argc,char * argv[])204 main(int argc, char *argv[])
205 {
206 char errbuf[PCAP_ERRBUF_SIZE];
207 pcap_t *pcap_reader;
208 char *filter;
209 struct args args;
210
211 if (argc < 3) {
212 fprintf(stderr, "Usage: %s infile outfile_prefix [expression]\n", argv[0]);
213 return (-1);
214 }
215 args.filename_prefix = argv[2];
216 pcap_reader = pcap_open_offline(argv[1], errbuf);
217 if (pcap_reader == NULL) {
218 fprintf(stderr, "Can't open input file %s: %s\n", argv[1], errbuf);
219 return (-1);
220 }
221 args.linktype = pcap_datalink(pcap_reader);
222 switch (args.linktype) {
223 case DLT_NULL:
224 args.is_ipv4 = loopback_is_ipv4;
225 args.is_ipv6 = loopback_is_ipv6;
226 args.offset = sizeof(uint32_t);
227 break;
228 case DLT_EN10MB:
229 args.is_ipv4 = ethernet_is_ipv4;
230 args.is_ipv6 = ethernet_is_ipv6;
231 args.offset = sizeof(struct ether_header);
232 break;
233 default:
234 fprintf(stderr, "Datalink type %d not supported\n", args.linktype);
235 pcap_close(pcap_reader);
236 return (-1);
237 }
238 filter = get_filter(argc, argv);
239 if (pcap_compile(pcap_reader, &args.bpf_prog, filter, 0, PCAP_NETMASK_UNKNOWN) < 0) {
240 fprintf(stderr, "Can't compile filter %s: %s\n", filter, pcap_geterr(pcap_reader));
241 free(filter);
242 pcap_close(pcap_reader);
243 return (-1);
244 }
245 free(filter);
246 pcap_dispatch(pcap_reader, 0, packet_handler, (u_char *)&args);
247 pcap_close(pcap_reader);
248 fprintf(stderr, "%lu packets processed\n", nr_read);
249 fprintf(stderr, "%lu packets decapsulated\n", nr_decaps);
250 return (0);
251 }
252