1 /* tar.c - create/extract archives
2 *
3 * Copyright 2014 Ashwini Kumar <ak.ashwini81@gmail.com>
4 *
5 * For the command, see
6 * http://pubs.opengroup.org/onlinepubs/007908799/xcu/tar.html
7 * For the modern file format, see
8 * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
9 * https://en.wikipedia.org/wiki/Tar_(computing)#File_format
10 * https://www.gnu.org/software/tar/manual/html_node/Tar-Internals.html
11 *
12 * For writing to external program
13 * http://www.gnu.org/software/tar/manual/html_node/Writing-to-an-External-Program.html
14 *
15 * Toybox will never implement the "pax" command as a matter of policy.
16 *
17 * Why --exclude pattern but no --include? tar cvzf a.tgz dir --include '*.txt'
18 *
19
20 USE_TAR(NEWTOY(tar, "&(restrict)(full-time)(no-recursion)(numeric-owner)(no-same-permissions)(overwrite)(exclude)*(mode):(mtime):(group):(owner):(to-command):o(no-same-owner)p(same-permissions)k(keep-old)c(create)|h(dereference)x(extract)|t(list)|v(verbose)I(use-compress-program):J(xz)j(bzip2)z(gzip)S(sparse)O(to-stdout)P(absolute-names)m(touch)X(exclude-from)*T(files-from)*C(directory):f(file):a[!txc][!jzJa]", TOYFLAG_USR|TOYFLAG_BIN))
21
22 config TAR
23 bool "tar"
24 default y
25 help
26 usage: tar [-cxt] [-fvohmjkOS] [-XTCf NAME] [FILE...]
27
28 Create, extract, or list files in a .tar (or compressed t?z) file.
29
30 Options:
31 c Create x Extract t Test (list)
32 f tar FILE (default -) C Change to DIR first v Verbose display
33 o Ignore owner h Follow symlinks m Ignore mtime
34 J xz compression j bzip2 compression z gzip compression
35 O Extract to stdout X exclude names in FILE T include names in FILE
36
37 --exclude FILENAME to exclude --full-time Show seconds with -tv
38 --mode MODE Adjust modes --mtime TIME Override timestamps
39 --owner NAME Set file owner to NAME --group NAME Set file group to NAME
40 --sparse Record sparse files
41 --restrict All archive contents must extract under one subdirectory
42 --numeric-owner Save/use/display uid and gid, not user/group name
43 --no-recursion Don't store directory contents
44 -I PROG Filter through PROG to compress or PROG -d to decompress
45 */
46
47 #define FOR_tar
48 #include "toys.h"
49
50 GLOBALS(
51 char *f, *C;
52 struct arg_list *T, *X;
53 char *I, *to_command, *owner, *group, *mtime, *mode;
54 struct arg_list *exclude;
55
56 struct double_list *incl, *excl, *seen;
57 struct string_list *dirs;
58 char *cwd;
59 int fd, ouid, ggid, hlc, warn, adev, aino, sparselen;
60 long long *sparse;
61 time_t mtt;
62
63 // hardlinks seen so far (hlc many)
64 struct {
65 char *arg;
66 ino_t ino;
67 dev_t dev;
68 } *hlx;
69
70 // Parsed information about a tar header.
71 struct tar_header {
72 char *name, *link_target, *uname, *gname;
73 long long size, ssize;
74 uid_t uid;
75 gid_t gid;
76 mode_t mode;
77 time_t mtime;
78 dev_t device;
79 } hdr;
80 )
81
82 struct tar_hdr {
83 char name[100], mode[8], uid[8], gid[8],size[12], mtime[12], chksum[8],
84 type, link[100], magic[8], uname[32], gname[32], major[8], minor[8],
85 prefix[155], padd[12];
86 };
87
88 // Tar uses ASCII octal when it fits, base-256 otherwise.
ascii_fits(unsigned long long val,int len)89 static int ascii_fits(unsigned long long val, int len)
90 {
91 return !(val>>(3*(len-1)));
92 }
93
94 // convert from int to octal (or base-256)
itoo(char * str,int len,unsigned long long val)95 static void itoo(char *str, int len, unsigned long long val)
96 {
97 if (ascii_fits(val, len)) sprintf(str, "%0*llo", len-1, val);
98 else {
99 for (str += len; len--; val >>= 8) *--str = val;
100 *str = 128;
101 }
102 }
103 #define ITOO(x, y) itoo(x, sizeof(x), y)
104
105 // convert octal (or base-256) to int
otoi(char * str,unsigned len)106 static unsigned long long otoi(char *str, unsigned len)
107 {
108 unsigned long long val = 0;
109
110 // When tar value too big or octal, use binary encoding with high bit set
111 if (128&*str) while (--len) val = (val<<8)+*++str;
112 else {
113 while (len && *str == ' ') str++;
114 while (len && *str>='0' && *str<='7') val = val*8+*str++-'0', len--;
115 if (len && *str && *str != ' ') error_exit("bad header");
116 }
117
118 return val;
119 }
120 #define OTOI(x) otoi(x, sizeof(x))
121
write_longname(char * name,char type)122 static void write_longname(char *name, char type)
123 {
124 struct tar_hdr tmp;
125 int sz = strlen(name) +1;
126
127 memset(&tmp, 0, sizeof(tmp));
128 strcpy(tmp.name, "././@LongLink");
129 ITOO(tmp.uid, 0);
130 ITOO(tmp.gid, 0);
131 ITOO(tmp.size, sz);
132 ITOO(tmp.mtime, 0);
133 tmp.type = type;
134 strcpy(tmp.magic, "ustar ");
135
136 // Historical nonsense to match other implementations. Never used.
137 ITOO(tmp.mode, 0644);
138 strcpy(tmp.uname, "root");
139 strcpy(tmp.gname, "root");
140
141 // Calculate checksum. Since 512*255 = 0377000 in octal, this can never
142 // use more than 6 digits. The last byte is ' ' for historical reasons.
143 itoo(tmp.chksum, sizeof(tmp.chksum)-1, tar_cksum(&tmp));
144 tmp.chksum[7] = ' ';
145
146 // write header and name, padded with NUL to block size
147 xwrite(TT.fd, &tmp, 512);
148 xwrite(TT.fd, name, sz);
149 if (sz%512) xwrite(TT.fd, toybuf, 512-(sz%512));
150 }
151
filter(struct double_list * lst,char * name)152 static struct double_list *filter(struct double_list *lst, char *name)
153 {
154 struct double_list *end = lst;
155
156 if (lst)
157 // constant is FNM_LEADING_DIR
158 do if (!fnmatch(lst->data, name, 1<<3)) return lst;
159 while (end != (lst = lst->next));
160
161 return 0;
162 }
163
skippy(long long len)164 static void skippy(long long len)
165 {
166 if (lskip(TT.fd, len)) perror_exit("EOF");
167 }
168
169 // allocate and read data from TT.fd
alloread(void * buf,int len)170 static void alloread(void *buf, int len)
171 {
172 // actually void **, but automatic typecasting doesn't work with void ** :(
173 char **b = buf;
174
175 free(*b);
176 *b = xmalloc(len+1);
177 xreadall(TT.fd, *b, len);
178 (*b)[len] = 0;
179 }
180
181 // callback from dirtree to create archive
add_to_tar(struct dirtree * node)182 static int add_to_tar(struct dirtree *node)
183 {
184 struct stat *st = &(node->st);
185 struct tar_hdr hdr;
186 struct passwd *pw = pw;
187 struct group *gr = gr;
188 int i, fd = -1, norecurse = FLAG(no_recursion);
189 char *name, *lnk, *hname;
190
191 if (!dirtree_notdotdot(node)) return 0;
192 if (TT.adev == st->st_dev && TT.aino == st->st_ino) {
193 error_msg("'%s' file is the archive; not dumped", node->name);
194 return 0;
195 }
196
197 i = 1;
198 name = hname = dirtree_path(node, &i);
199
200 // exclusion defaults to --no-anchored and --wildcards-match-slash
201 for (lnk = name; *lnk;) {
202 if (filter(TT.excl, lnk)) {
203 norecurse++;
204
205 goto done;
206 }
207 while (*lnk && *lnk!='/') lnk++;
208 while (*lnk=='/') lnk++;
209 }
210
211 // Consume the 1 extra byte alocated in dirtree_path()
212 if (S_ISDIR(st->st_mode) && name[i-1] != '/') strcat(name, "/");
213
214 // remove leading / and any .. entries from saved name
215 if (!FLAG(P)) while (*hname == '/') hname++;
216 for (lnk = hname;;) {
217 if (!(lnk = strstr(lnk, ".."))) break;
218 if (lnk == hname || lnk[-1] == '/') {
219 if (!lnk[2]) goto done;
220 if (lnk[2]=='/') lnk = hname = lnk+3;
221 } else lnk+= 2;
222 }
223 if (!*hname) goto done;
224
225 if (TT.warn && hname != name) {
226 fprintf(stderr, "removing leading '%.*s' from member names\n",
227 (int)(hname-name), name);
228 TT.warn = 0;
229 }
230
231 if (TT.owner) st->st_uid = TT.ouid;
232 if (TT.group) st->st_gid = TT.ggid;
233 if (TT.mode) st->st_mode = string_to_mode(TT.mode, st->st_mode);
234 if (TT.mtime) st->st_mtime = TT.mtt;
235
236 memset(&hdr, 0, sizeof(hdr));
237 strncpy(hdr.name, hname, sizeof(hdr.name));
238 ITOO(hdr.mode, st->st_mode &07777);
239 ITOO(hdr.uid, st->st_uid);
240 ITOO(hdr.gid, st->st_gid);
241 ITOO(hdr.size, 0); //set size later
242 ITOO(hdr.mtime, st->st_mtime);
243 strcpy(hdr.magic, "ustar ");
244
245 // Hard link or symlink? i=0 neither, i=1 hardlink, i=2 symlink
246
247 // Are there hardlinks to a non-directory entry?
248 if (st->st_nlink>1 && !S_ISDIR(st->st_mode)) {
249 // Have we seen this dev&ino before?
250 for (i = 0; i<TT.hlc; i++) {
251 if (st->st_ino == TT.hlx[i].ino && st->st_dev == TT.hlx[i].dev)
252 break;
253 }
254 if (i != TT.hlc) {
255 lnk = TT.hlx[i].arg;
256 i = 1;
257 } else {
258 // first time we've seen it. Store as normal file, but remember it.
259 if (!(TT.hlc&255))
260 TT.hlx = xrealloc(TT.hlx, sizeof(*TT.hlx)*(TT.hlc+256));
261 TT.hlx[TT.hlc].arg = xstrdup(hname);
262 TT.hlx[TT.hlc].ino = st->st_ino;
263 TT.hlx[TT.hlc].dev = st->st_dev;
264 TT.hlc++;
265 i = 0;
266 }
267 } else i = 0;
268
269 // Handle file types
270 if (i || S_ISLNK(st->st_mode)) {
271 hdr.type = '1'+!i;
272 if (!i && !(lnk = xreadlink(name))) {
273 perror_msg("readlink");
274 goto done;
275 }
276 if (strlen(lnk) > sizeof(hdr.link)) write_longname(lnk, 'K');
277 strncpy(hdr.link, lnk, sizeof(hdr.link));
278 if (!i) free(lnk);
279 } else if (S_ISREG(st->st_mode)) {
280 hdr.type = '0';
281 ITOO(hdr.size, st->st_size);
282 } else if (S_ISDIR(st->st_mode)) hdr.type = '5';
283 else if (S_ISFIFO(st->st_mode)) hdr.type = '6';
284 else if (S_ISBLK(st->st_mode) || S_ISCHR(st->st_mode)) {
285 hdr.type = (S_ISCHR(st->st_mode))?'3':'4';
286 ITOO(hdr.major, dev_major(st->st_rdev));
287 ITOO(hdr.minor, dev_minor(st->st_rdev));
288 } else {
289 error_msg("unknown file type '%o'", st->st_mode & S_IFMT);
290 goto done;
291 }
292
293 if (strlen(hname) > sizeof(hdr.name)) write_longname(hname, 'L');
294
295 if (!FLAG(numeric_owner)) {
296 if ((TT.owner || (pw = bufgetpwuid(st->st_uid))) &&
297 ascii_fits(st->st_uid, sizeof(hdr.uid)))
298 strncpy(hdr.uname, TT.owner ? TT.owner : pw->pw_name, sizeof(hdr.uname));
299 if ((TT.group || (gr = bufgetgrgid(st->st_gid))) &&
300 ascii_fits(st->st_gid, sizeof(hdr.gid)))
301 strncpy(hdr.gname, TT.group ? TT.group : gr->gr_name, sizeof(hdr.gname));
302 }
303
304 TT.sparselen = 0;
305 if (hdr.type == '0') {
306 // Before we write the header, make sure we can read the file
307 if ((fd = open(name, O_RDONLY)) < 0) {
308 perror_msg("can't open '%s'", name);
309
310 return 0;
311 }
312 if (FLAG(S)) {
313 long long lo, ld = 0, len = 0;
314
315 // Enumerate the extents
316 while ((lo = lseek(fd, ld, SEEK_HOLE)) != -1) {
317 if (!(TT.sparselen&511))
318 TT.sparse = xrealloc(TT.sparse, (TT.sparselen+514)*sizeof(long long));
319 if (ld != lo) {
320 TT.sparse[TT.sparselen++] = ld;
321 len += TT.sparse[TT.sparselen++] = lo-ld;
322 }
323 if (lo == st->st_size || (ld = lseek(fd, lo, SEEK_DATA)) < lo) break;
324 }
325
326 // If there were extents, change type to S record
327 if (TT.sparselen>2) {
328 TT.sparse[TT.sparselen++] = st->st_size;
329 TT.sparse[TT.sparselen++] = 0;
330 hdr.type = 'S';
331 lnk = (char *)&hdr;
332 for (i = 0; i<TT.sparselen && i<8; i++)
333 itoo(lnk+386+12*i, 12, TT.sparse[i]);
334
335 // Record if there's overflow records, change length to sparse length,
336 // record apparent length
337 if (TT.sparselen>8) lnk[482] = 1;
338 itoo(lnk+483, 12, st->st_size);
339 ITOO(hdr.size, len);
340 } else TT.sparselen = 0;
341 lseek(fd, 0, SEEK_SET);
342 }
343 }
344
345 itoo(hdr.chksum, sizeof(hdr.chksum)-1, tar_cksum(&hdr));
346 hdr.chksum[7] = ' ';
347
348 if (FLAG(v)) dprintf((TT.fd==1) ? 2 : 1, "%s\n", hname);
349
350 // Write header and data to archive
351 xwrite(TT.fd, &hdr, 512);
352 if (TT.sparselen>8) {
353 char buf[512];
354
355 // write extent overflow blocks
356 for (i=8;;i++) {
357 int j = (i-8)%42;
358
359 if (!j || i==TT.sparselen) {
360 if (i!=8) {
361 if (i!=TT.sparselen) buf[504] = 1;
362 xwrite(TT.fd, buf, 512);
363 }
364 if (i==TT.sparselen) break;
365 memset(buf, 0, sizeof(buf));
366 }
367 itoo(buf+12*j, 12, TT.sparse[i]);
368 }
369 }
370 TT.sparselen >>= 1;
371 if (hdr.type == '0' || hdr.type == 'S') {
372 if (hdr.type == '0') xsendfile_pad(fd, TT.fd, st->st_size);
373 else for (i = 0; i<TT.sparselen; i++) {
374 if (TT.sparse[i*2] != lseek(fd, TT.sparse[i*2], SEEK_SET))
375 perror_msg("%s: seek %lld", name, TT.sparse[i*2]);
376 xsendfile_pad(fd, TT.fd, TT.sparse[i*2+1]);
377 }
378 if (st->st_size%512) writeall(TT.fd, toybuf, (512-(st->st_size%512)));
379 close(fd);
380 }
381 done:
382 free(name);
383
384 return (DIRTREE_RECURSE|(FLAG(h)?DIRTREE_SYMFOLLOW:0))*!norecurse;
385 }
386
wsettime(char * s,long long sec)387 static void wsettime(char *s, long long sec)
388 {
389 struct timespec times[2] = {{sec, 0},{sec, 0}};
390
391 if (utimensat(AT_FDCWD, s, times, AT_SYMLINK_NOFOLLOW))
392 perror_msg("settime %lld %s", sec, s);
393 }
394
395 // Do pending directory utimes(), NULL to flush all.
dirflush(char * name,int isdir)396 static int dirflush(char *name, int isdir)
397 {
398 char *s = 0, *ss;
399
400 // Barf if name not in TT.cwd
401 if (name) {
402 if (!(ss = s = xabspath(name, -1-isdir))) {
403 error_msg("'%s' bad symlink", name);
404
405 return 1;
406 }
407 if (TT.cwd[1] && (!strstart(&ss, TT.cwd) || (*ss && *ss!='/'))) {
408 error_msg("'%s' %s not under '%s'", name, s, TT.cwd);
409 free(s);
410
411 return 1;
412 }
413
414 // --restrict means first entry extracted is what everything must be under
415 if (FLAG(restrict)) {
416 free(TT.cwd);
417 TT.cwd = strdup(s);
418 toys.optflags ^= FLAG_restrict;
419 }
420 // use resolved name so trailing / is stripped
421 if (isdir) unlink(s);
422 }
423
424 // Set deferred utimes() for directories this file isn't under.
425 // (Files must be depth-first ordered in tarball for this to matter.)
426 while (TT.dirs) {
427
428 // If next file is under (or equal to) this dir, keep waiting
429 if (name && strstart(&ss, ss = s) && (!*ss || *ss=='/')) break;
430
431 wsettime(TT.dirs->str+sizeof(long long), *(long long *)TT.dirs->str);
432 free(llist_pop(&TT.dirs));
433 }
434 free(s);
435
436 // name was under TT.cwd
437 return 0;
438 }
439
440 // write data to file
sendfile_sparse(int fd)441 static void sendfile_sparse(int fd)
442 {
443 long long len, used = 0, sent;
444 int i = 0, j;
445
446 do {
447 if (TT.sparselen) {
448 // Seek past holes or fill output with zeroes.
449 if (-1 == lseek(fd, len = TT.sparse[i*2], SEEK_SET)) {
450 sent = 0;
451 while (len) {
452 // first/last 512 bytes used, rest left zeroes
453 j = (len>3072) ? 3072 : len;
454 if (j != writeall(fd, toybuf+512, j)) goto error;
455 len -= j;
456 }
457 } else {
458 sent = len;
459 if (!(len = TT.sparse[i*2+1]) && ftruncate(fd, sent+len))
460 perror_msg("ftruncate");
461 }
462 if (len+used>TT.hdr.size) error_exit("sparse overflow");
463 } else len = TT.hdr.size;
464
465 len -= sendfile_len(TT.fd, fd, len, &sent);
466 used += sent;
467 if (len) {
468 error:
469 if (fd!=1) perror_msg(0);
470 skippy(TT.hdr.size-used);
471
472 break;
473 }
474 } while (++i<TT.sparselen);
475
476 close(fd);
477 }
478
extract_to_disk(void)479 static void extract_to_disk(void)
480 {
481 char *name = TT.hdr.name;
482 int ala = TT.hdr.mode;
483
484 if (dirflush(name, S_ISDIR(ala))) {
485 if (S_ISREG(ala) && !TT.hdr.link_target) skippy(TT.hdr.size);
486
487 return;
488 }
489
490 // create path before file if necessary
491 if (strrchr(name, '/') && mkpath(name) && errno!=EEXIST)
492 return perror_msg(":%s: can't mkdir", name);
493
494 // remove old file, if exists
495 if (!FLAG(k) && !S_ISDIR(ala) && unlink(name)) {
496 if (errno==EISDIR && !rmdir(name));
497 else if (errno!=ENOENT) return perror_msg("can't remove: %s", name);
498 }
499
500 if (S_ISREG(ala)) {
501 // hardlink?
502 if (TT.hdr.link_target) {
503 if (link(TT.hdr.link_target, name))
504 return perror_msg("can't link '%s' -> '%s'", name, TT.hdr.link_target);
505 // write contents
506 } else {
507 int fd = xcreate(name,
508 WARN_ONLY|O_WRONLY|O_CREAT|(FLAG(overwrite)?O_TRUNC:O_EXCL),
509 ala & 07777);
510 if (fd != -1) sendfile_sparse(fd);
511 else skippy(TT.hdr.size);
512 }
513 } else if (S_ISDIR(ala)) {
514 if ((mkdir(name, 0700) == -1) && errno != EEXIST)
515 return perror_msg("%s: can't create", TT.hdr.name);
516 } else if (S_ISLNK(ala)) {
517 if (symlink(TT.hdr.link_target, TT.hdr.name))
518 return perror_msg("can't link '%s' -> '%s'", name, TT.hdr.link_target);
519 } else if (mknod(name, ala, TT.hdr.device))
520 return perror_msg("can't create '%s'", name);
521
522 // Set ownership
523 if (!FLAG(o) && !geteuid()) {
524 int u = TT.hdr.uid, g = TT.hdr.gid;
525
526 if (TT.owner) TT.hdr.uid = TT.ouid;
527 else if (!FLAG(numeric_owner) && *TT.hdr.uname) {
528 struct passwd *pw = getpwnam(TT.hdr.uname);
529 if (pw && (TT.owner || !FLAG(numeric_owner))) TT.hdr.uid = pw->pw_uid;
530 }
531
532 if (TT.group) TT.hdr.gid = TT.ggid;
533 else if (!FLAG(numeric_owner) && *TT.hdr.uname) {
534 struct group *gr = getgrnam(TT.hdr.gname);
535 if (gr) TT.hdr.gid = gr->gr_gid;
536 }
537
538 if (lchown(name, u, g)) perror_msg("chown %d:%d '%s'", u, g, name);;
539 }
540
541 if (!S_ISLNK(ala)) chmod(TT.hdr.name, FLAG(p) ? ala : ala&0777);
542
543 // Apply mtime.
544 if (!FLAG(m)) {
545 if (S_ISDIR(ala)) {
546 struct string_list *sl;
547
548 // Writing files into a directory changes directory timestamps, so
549 // defer mtime updates until contents written.
550
551 sl = xmalloc(sizeof(struct string_list)+sizeof(long long)+strlen(name)+1);
552 *(long long *)sl->str = TT.hdr.mtime;
553 strcpy(sl->str+sizeof(long long), name);
554 sl->next = TT.dirs;
555 TT.dirs = sl;
556 } else wsettime(TT.hdr.name, TT.hdr.mtime);
557 }
558 }
559
unpack_tar(char * first)560 static void unpack_tar(char *first)
561 {
562 struct double_list *walk, *delete;
563 struct tar_hdr tar;
564 int i, and = 0;
565 unsigned maj, min;
566 char *s;
567
568 for (;;) {
569 if (first) {
570 memcpy(&tar, first, i = 512);
571 first = 0;
572 } else {
573 // align to next block and read it
574 if (TT.hdr.size%512) skippy(512-TT.hdr.size%512);
575 i = readall(TT.fd, &tar, 512);
576 }
577
578 if (i && i!=512) error_exit("short header");
579
580 // Two consecutive empty headers ends tar even if there's more data
581 if (!i || !*tar.name) {
582 if (!i || and++) return;
583 TT.hdr.size = 0;
584 continue;
585 }
586 // ensure null temination even of pathological packets
587 tar.padd[0] = and = 0;
588
589 // Is this a valid TAR header?
590 if (!is_tar_header(&tar)) error_exit("bad header");
591 TT.hdr.size = OTOI(tar.size);
592
593 // If this header isn't writing something to the filesystem
594 if ((tar.type<'0' || tar.type>'7') && tar.type!='S'
595 && (*tar.magic && tar.type))
596 {
597 // Long name extension header?
598 if (tar.type == 'K') alloread(&TT.hdr.link_target, TT.hdr.size);
599 else if (tar.type == 'L') alloread(&TT.hdr.name, TT.hdr.size);
600 else if (tar.type == 'x') {
601 char *p, *buf = 0;
602 int i, len, n = 0;
603
604 // Posix extended record "LEN NAME=VALUE\n" format
605 alloread(&buf, TT.hdr.size);
606 for (p = buf; (p-buf)<TT.hdr.size; p += len) {
607 i = sscanf(p, "%u path=%n", &len, &n);
608 if (i<1 || len<4 || len>TT.hdr.size) {
609 error_msg("bad header");
610 break;
611 }
612 p[len-1] = 0;
613 if (n) {
614 TT.hdr.name = xstrdup(p+n);
615 break;
616 }
617 }
618 free(buf);
619
620 // Ignore everything else.
621 } else skippy(TT.hdr.size);
622
623 continue;
624 }
625
626 // Handle sparse file type
627 if (tar.type == 'S') {
628 char sparse[512];
629 int max = 8;
630
631 // Load 4 pairs of offset/len from S block, plus 21 pairs from each
632 // continuation block, list says where to seek/write sparse file contents
633 TT.sparselen = 0;
634 s = 386+(char *)&tar;
635 *sparse = i = 0;
636
637 for (;;) {
638 if (!(TT.sparselen&511))
639 TT.sparse = xrealloc(TT.sparse, (TT.sparselen+512)*sizeof(long long));
640
641 // If out of data in block check continue flag, stop or load next block
642 if (++i>max || !*s) {
643 if (!(*sparse ? sparse[504] : ((char *)&tar)[482])) break;
644 xreadall(TT.fd, s = sparse, 512);
645 max = 41;
646 i = 0;
647 }
648 // Load next entry
649 TT.sparse[TT.sparselen++] = otoi(s, 12);
650 s += 12;
651 }
652
653 // Odd number of entries (from corrupted tar) would be dropped here
654 TT.sparselen /= 2;
655 if (TT.sparselen)
656 TT.hdr.ssize = TT.sparse[2*TT.sparselen-1]+TT.sparse[2*TT.sparselen-2];
657 } else {
658 TT.sparselen = 0;
659 TT.hdr.ssize = TT.hdr.size;
660 }
661
662 // At this point, we have something to output. Convert metadata.
663 TT.hdr.mode = OTOI(tar.mode)&0xfff;
664 if (tar.type == 'S' || !tar.type) TT.hdr.mode |= 0x8000;
665 else TT.hdr.mode |= (char []){8,8,10,2,6,4,1,8}[tar.type-'0']<<12;
666 TT.hdr.uid = OTOI(tar.uid);
667 TT.hdr.gid = OTOI(tar.gid);
668 TT.hdr.mtime = OTOI(tar.mtime);
669 maj = OTOI(tar.major);
670 min = OTOI(tar.minor);
671 TT.hdr.device = dev_makedev(maj, min);
672 TT.hdr.uname = xstrndup(TT.owner ? TT.owner : tar.uname, sizeof(tar.uname));
673 TT.hdr.gname = xstrndup(TT.group ? TT.group : tar.gname, sizeof(tar.gname));
674
675 if (TT.owner) TT.hdr.uid = TT.ouid;
676 else if (!FLAG(numeric_owner)) {
677 struct passwd *pw = getpwnam(TT.hdr.uname);
678 if (pw && (TT.owner || !FLAG(numeric_owner))) TT.hdr.uid = pw->pw_uid;
679 }
680
681 if (TT.group) TT.hdr.gid = TT.ggid;
682 else if (!FLAG(numeric_owner)) {
683 struct group *gr = getgrnam(TT.hdr.gname);
684 if (gr) TT.hdr.gid = gr->gr_gid;
685 }
686
687 if (!TT.hdr.link_target && *tar.link)
688 TT.hdr.link_target = xstrndup(tar.link, sizeof(tar.link));
689 if (!TT.hdr.name) {
690 // Glue prefix and name fields together with / if necessary
691 i = (tar.type=='S') ? 0 : strnlen(tar.prefix, sizeof(tar.prefix));
692 TT.hdr.name = xmprintf("%.*s%s%.*s", i, tar.prefix,
693 (i && tar.prefix[i-1] != '/') ? "/" : "",
694 (int)sizeof(tar.name), tar.name);
695 }
696
697 // Old broken tar recorded dir as "file with trailing slash"
698 if (S_ISREG(TT.hdr.mode) && (s = strend(TT.hdr.name, "/"))) {
699 *s = 0;
700 TT.hdr.mode = (TT.hdr.mode & ~S_IFMT) | S_IFDIR;
701 }
702
703 // Non-regular files don't have contents stored in archive.
704 if ((TT.hdr.link_target && *TT.hdr.link_target)
705 || (tar.type && !S_ISREG(TT.hdr.mode)))
706 TT.hdr.size = 0;
707
708 // Files are seen even if excluded, so check them here.
709 // TT.seen points to first seen entry in TT.incl, or NULL if none yet.
710
711 if ((delete = filter(TT.incl, TT.hdr.name)) && TT.incl != TT.seen) {
712 if (!TT.seen) TT.seen = delete;
713
714 // Move seen entry to end of list.
715 if (TT.incl == delete) TT.incl = TT.incl->next;
716 else for (walk = TT.incl; walk != TT.seen; walk = walk->next) {
717 if (walk == delete) {
718 dlist_pop(&walk);
719 dlist_add_nomalloc(&TT.incl, delete);
720 }
721 }
722 }
723
724 // Skip excluded files
725 if (filter(TT.excl, TT.hdr.name) || (TT.incl && !delete))
726 skippy(TT.hdr.size);
727 else if (FLAG(t)) {
728 if (FLAG(v)) {
729 struct tm *lc = localtime(TT.mtime ? &TT.mtt : &TT.hdr.mtime);
730 char perm[12], gname[12];
731
732 mode_to_string(TT.hdr.mode, perm);
733 printf("%s", perm);
734 sprintf(perm, "%u", TT.hdr.uid);
735 sprintf(gname, "%u", TT.hdr.gid);
736 printf(" %s/%s ", *TT.hdr.uname ? TT.hdr.uname : perm,
737 *TT.hdr.gname ? TT.hdr.gname : gname);
738 if (tar.type=='3' || tar.type=='4') printf("%u,%u", maj, min);
739 else printf("%9lld", TT.hdr.ssize);
740 sprintf(perm, ":%02d", lc->tm_sec);
741 printf(" %d-%02d-%02d %02d:%02d%s ", 1900+lc->tm_year, 1+lc->tm_mon,
742 lc->tm_mday, lc->tm_hour, lc->tm_min, FLAG(full_time) ? perm : "");
743 }
744 printf("%s", TT.hdr.name);
745 if (TT.hdr.link_target) printf(" -> %s", TT.hdr.link_target);
746 xputc('\n');
747 skippy(TT.hdr.size);
748 } else {
749 if (FLAG(v)) printf("%s\n", TT.hdr.name);
750 if (FLAG(O)) sendfile_sparse(1);
751 else if (FLAG(to_command)) {
752 if (S_ISREG(TT.hdr.mode)) {
753 int fd, pid;
754
755 xsetenv("TAR_FILETYPE", "f");
756 xsetenv(xmprintf("TAR_MODE=%o", TT.hdr.mode), 0);
757 xsetenv(xmprintf("TAR_SIZE=%lld", TT.hdr.ssize), 0);
758 xsetenv("TAR_FILENAME", TT.hdr.name);
759 xsetenv("TAR_UNAME", TT.hdr.uname);
760 xsetenv("TAR_GNAME", TT.hdr.gname);
761 xsetenv(xmprintf("TAR_MTIME=%llo", (long long)TT.hdr.mtime), 0);
762 xsetenv(xmprintf("TAR_UID=%o", TT.hdr.uid), 0);
763 xsetenv(xmprintf("TAR_GID=%o", TT.hdr.gid), 0);
764
765 pid = xpopen((char *[]){"sh", "-c", TT.to_command, NULL}, &fd, 0);
766 // todo: short write exits tar here, other skips data.
767 sendfile_sparse(fd);
768 fd = xpclose_both(pid, 0);
769 if (fd) error_msg("%d: Child returned %d", pid, fd);
770 }
771 } else extract_to_disk();
772 }
773
774 free(TT.hdr.name);
775 free(TT.hdr.link_target);
776 free(TT.hdr.uname);
777 free(TT.hdr.gname);
778 TT.hdr.name = TT.hdr.link_target = 0;
779 }
780 }
781
782 // Add copy of filename (minus trailing \n and /) to dlist **
trim2list(void * list,char * pline)783 static void trim2list(void *list, char *pline)
784 {
785 char *n = xstrdup(pline);
786 int i = strlen(n);
787
788 dlist_add(list, n);
789 if (i && n[i-1]=='\n') i--;
790 while (i && n[i-1] == '/') i--;
791 n[i] = 0;
792 }
793
794 // do_lines callback, selects TT.incl or TT.excl based on call order
do_XT(char ** pline,long len)795 static void do_XT(char **pline, long len)
796 {
797 if (pline) trim2list(TT.X ? &TT.excl : &TT.incl, *pline);
798 }
799
tar_main(void)800 void tar_main(void)
801 {
802 char *s, **args = toys.optargs,
803 *archiver = FLAG(I) ? TT.I : (FLAG(z) ? "gzip" : (FLAG(J) ? "xz":"bzip2"));
804 int len = 0;
805
806 // Needed when extracting to command
807 signal(SIGPIPE, SIG_IGN);
808
809 // Get possible early errors out of the way
810 if (!geteuid()) toys.optflags |= FLAG_p;
811 if (TT.owner) TT.ouid = xgetuid(TT.owner);
812 if (TT.group) TT.ggid = xgetgid(TT.group);
813 if (TT.mtime) xparsedate(TT.mtime, &TT.mtt, (void *)&s, 1);
814
815 // Collect file list.
816 for (; TT.exclude; TT.exclude = TT.exclude->next)
817 trim2list(&TT.excl, TT.exclude->arg);
818 for (;TT.X; TT.X = TT.X->next) do_lines(xopenro(TT.X->arg), '\n', do_XT);
819 for (args = toys.optargs; *args; args++) trim2list(&TT.incl, *args);
820 for (;TT.T; TT.T = TT.T->next) do_lines(xopenro(TT.T->arg), '\n', do_XT);
821
822 // If include file list empty, don't create empty archive
823 if (FLAG(c)) {
824 if (!TT.incl) error_exit("empty archive");
825 TT.fd = 1;
826 }
827
828 // nommu reentry for nonseekable input skips this, parent did it for us
829 if (toys.stacktop) {
830 if (TT.f && strcmp(TT.f, "-"))
831 TT.fd = xcreate(TT.f, TT.fd*(O_WRONLY|O_CREAT|O_TRUNC), 0666);
832 // Get destination directory
833 if (TT.C) xchdir(TT.C);
834 }
835
836 // Get destination directory
837 TT.cwd = xabspath(s = xgetcwd(), 1);
838 free(s);
839
840 // Remember archive inode so we don't overwrite it or add it to itself
841 {
842 struct stat st;
843
844 if (!fstat(TT.fd, &st)) {
845 TT.aino = st.st_ino;
846 TT.adev = st.st_dev;
847 }
848 }
849
850 // Are we reading?
851 if (FLAG(x)||FLAG(t)) {
852 char *hdr = 0;
853
854 // autodetect compression type when not specified
855 if (!(FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J))) {
856 len = xread(TT.fd, hdr = toybuf+sizeof(toybuf)-512, 512);
857 if (len!=512 || !is_tar_header(hdr)) {
858 // detect gzip and bzip signatures
859 if (SWAP_BE16(*(short *)hdr)==0x1f8b) toys.optflags |= FLAG_z;
860 else if (!memcmp(hdr, "BZh", 3)) toys.optflags |= FLAG_j;
861 else if (peek_be(hdr, 7) == 0xfd377a585a0000UL) toys.optflags |= FLAG_J;
862 else error_exit("Not tar");
863
864 // if we can seek back we don't need to loop and copy data
865 if (!lseek(TT.fd, -len, SEEK_CUR)) hdr = 0;
866 }
867 }
868
869 if (FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J)) {
870 int pipefd[2] = {hdr ? -1 : TT.fd, -1}, i, pid;
871 struct string_list *zcat = FLAG(I) ? 0 : find_in_path(getenv("PATH"),
872 FLAG(j) ? "bzcat" : FLAG(J) ? "xzcat" : "zcat");
873
874 // Toybox provides more decompressors than compressors, so try them first
875 xpopen_both(zcat ? (char *[]){zcat->str, 0} :
876 (char *[]){archiver, "-d", 0}, pipefd);
877 if (CFG_TOYBOX_FREE) llist_traverse(zcat, free);
878
879 if (!hdr) {
880 // If we could seek, child gzip inherited fd and we read its output
881 close(TT.fd);
882 TT.fd = pipefd[1];
883
884 } else {
885
886 // If we autodetected type but then couldn't lseek to put the data back
887 // we have to loop reading data from TT.fd and pass it to gzip ourselves
888 // (starting with the block of data we read to autodetect).
889
890 // dirty trick: move gzip input pipe to stdin so child closes spare copy
891 dup2(pipefd[0], 0);
892 if (pipefd[0]) close(pipefd[0]);
893
894 // Fork a copy of ourselves to handle extraction (reads from zip output
895 // pipe, writes to stdout).
896 pipefd[0] = pipefd[1];
897 pipefd[1] = 1;
898 pid = xpopen_both(0, pipefd);
899 close(pipefd[1]);
900
901 // loop writing collated data to zip proc
902 xwrite(0, hdr, len);
903 for (;;) {
904 if ((i = read(TT.fd, toybuf, sizeof(toybuf)))<1) {
905 close(0);
906 xwaitpid(pid);
907 return;
908 }
909 xwrite(0, toybuf, i);
910 }
911 }
912 }
913
914 unpack_tar(hdr);
915 dirflush(0, 0);
916
917 // Each time a TT.incl entry is seen it's moved to the end of the list,
918 // with TT.seen pointing to first seen list entry. Anything between
919 // TT.incl and TT.seen wasn't encountered in archive..
920 if (TT.seen != TT.incl) {
921 if (!TT.seen) TT.seen = TT.incl;
922 while (TT.incl != TT.seen) {
923 error_msg("'%s' not in archive", TT.incl->data);
924 TT.incl = TT.incl->next;
925 }
926 }
927
928 // are we writing? (Don't have to test flag here, one of 3 must be set)
929 } else {
930 struct double_list *dl = TT.incl;
931
932 // autodetect compression type based on -f name. (Use > to avoid.)
933 if (TT.f && !FLAG(j) && !FLAG(z) && !FLAG(I) && !FLAG(J)) {
934 char *tbz[] = {".tbz", ".tbz2", ".tar.bz", ".tar.bz2"};
935 if (strend(TT.f, ".tgz") || strend(TT.f, ".tar.gz"))
936 toys.optflags |= FLAG_z;
937 if (strend(TT.f, ".txz") || strend(TT.f, ".tar.xz"))
938 toys.optflags |= FLAG_J;
939 else for (len = 0; len<ARRAY_LEN(tbz); len++)
940 if (strend(TT.f, tbz[len])) toys.optflags |= FLAG_j;
941 }
942
943 if (FLAG(j)||FLAG(z)||FLAG(I)||FLAG(J)) {
944 int pipefd[2] = {-1, TT.fd};
945
946 xpopen_both((char *[]){archiver, 0}, pipefd);
947 close(TT.fd);
948 TT.fd = pipefd[0];
949 }
950 do {
951 TT.warn = 1;
952 dirtree_flagread(dl->data, FLAG(h)?DIRTREE_SYMFOLLOW:0, add_to_tar);
953 } while (TT.incl != (dl = dl->next));
954
955 writeall(TT.fd, toybuf, 1024);
956 }
957
958 if (CFG_TOYBOX_FREE) {
959 llist_traverse(TT.excl, llist_free_double);
960 llist_traverse(TT.incl, llist_free_double);
961 while(TT.hlc) free(TT.hlx[--TT.hlc].arg);
962 free(TT.hlx);
963 free(TT.cwd);
964 close(TT.fd);
965 }
966 }
967