1 /* diff.c - compare files line by line
2  *
3  * Copyright 2014 Sandeep Sharma <sandeep.jack2756@gmail.com>
4  * Copyright 2014 Ashwini Kumar <ak.ashwini1981@gmail.com>
5  *
6  * See: http://cm.bell-labs.com/cm/cs/cstr/41.pdf
7 
8 USE_DIFF(NEWTOY(diff, "<2>2(color)B(ignore-blank-lines)d(minimal)b(ignore-space-change)ut(expand-tabs)w(ignore-all-space)i(ignore-case)T(initial-tab)s(report-identical-files)q(brief)a(text)L(label)*S(starting-file):N(new-file)r(recursive)U(unified)#<0=3", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
9 
10 config DIFF
11   bool "diff"
12   default n
13   help
14   usage: diff [-abBdiNqrTstw] [-L LABEL] [-S FILE] [-U LINES] FILE1 FILE2
15 
16   -a  Treat all files as text
17   -b  Ignore changes in the amount of whitespace
18   -B  Ignore changes whose lines are all blank
19   -d  Try hard to find a smaller set of changes
20   -i  Ignore case differences
21   -L  Use LABEL instead of the filename in the unified header
22   -N  Treat absent files as empty
23   -q  Output only whether files differ
24   -r  Recurse
25   -S  Start with FILE when comparing directories
26   -T  Make tabs line up by prefixing a tab when necessary
27   -s  Report when two files are the same
28   -t  Expand tabs to spaces in output
29   -U  Output LINES lines of context
30   -w  Ignore all whitespace
31 
32   --color  Colored output
33 */
34 
35 #define FOR_diff
36 #include "toys.h"
37 
38 GLOBALS(
39   long ct;
40   char *start;
41   struct arg_list *L_list;
42 
43   int dir_num, size, is_binary, status, change, len[2];
44   int *offset[2];
45   struct stat st[2];
46 )
47 
48 #define MIN(x,y) ((x) < (y) ? (x) : (y))
49 #define MAX(x,y) ((x) > (y) ? (x) : (y))
50 #define IS_STDIN(s)     ((s)[0] == '-' && !(s)[1])
51 
52 struct v_vector {
53   unsigned serial:31;
54   unsigned last:1;
55   union {
56     unsigned hash;
57     unsigned p;
58   };
59 };
60 
61 struct diff {
62   long a, b, c, d, prev, suff;
63 };
64 
65 static struct dir_t {
66   char **list;
67   int nr_elm;
68 } dir[2];
69 
70 struct candidate {
71   int a, b;
72   struct candidate *prev, *next;
73 };
74 
75 static struct file_t {
76   FILE *fp;
77   int len;
78 } file[2];
79 
80 enum {
81   SAME,
82   DIFFER,
83 };
84 
85 enum {
86   empty = 1 << 9,
87   eol = 1 << 10,
88   eof = 1 << 11,
89   space = 1 << 12
90 };
91 
comp(const void * a,const void * b)92 static int comp(const void *a, const void* b)
93 {
94   int i = ((struct v_vector *)a)->hash -
95     ((struct v_vector *)b)->hash;
96 
97   if (!i) i = ((struct v_vector *)a)->serial -
98     ((struct v_vector *)b)->serial;
99   return i;
100 }
101 
search(struct candidate ** K,int r,int k,int j)102 static int search (struct candidate **K, int r, int k, int j)
103 {
104   int low = r, upper = k, mid;
105 
106   mid = (low + upper) / 2;
107   while (low <= mid) {
108     if (((struct candidate*)(K[mid]))->b < j &&
109         ((struct candidate*)(K[mid + 1]))->b > j)
110       return mid;
111 
112     if (((struct candidate*)(K[mid]))->b < j) low = mid + 1;
113     else if (((struct candidate*)(K[mid]))->b > j) upper = mid - 1;
114     else return -1;
115 
116     mid = (low + upper) / 2;
117   }
118   return -1;
119 }
120 
new_candidate(int i,int j,struct candidate * prev)121 static struct candidate * new_candidate (int i, int j, struct candidate* prev)
122 {
123   struct candidate *c = xzalloc(sizeof(struct candidate));
124 
125   c->a = i;
126   c->b = j;
127   c->prev = prev;
128   return c;
129 }
130 
131 
free_candidates(struct candidate * c)132 static void free_candidates(struct candidate *c)
133 {
134   struct candidate *t = c;
135 
136   while ((t = c)) {
137     c = c->next;
138     free(t);
139   }
140 }
141 /*
142  * 1. Search K[r: k] for an element K[s] such that K[s]-> b < j and K[s + 1]->b > j
143  * 2. if found do
144  *  2.a. If K[s + 1]->b > j do K[r] = c; r = s+1 and c = candidate(i, j, K[s]) //we have a candidate
145  *  2.b. if s = k (fence reached move it further) do K[k + 2] = K[k + 1], k++
146  * 3. if E[p].last true break i.e we have reached at the end of an equiv class
147  *    else p = p + 1 //keep traversing the equiv class.
148  * 4. K[r] = c //Save the sucessfully filled k-candidate.
149  */
do_merge(struct candidate ** K,int * k,int i,struct v_vector * E,int p)150 static void  do_merge(struct candidate **K, int *k, int i,
151     struct v_vector *E, int p)
152 {
153   int r = 0, s, j;
154   struct candidate *pr = 0, *c = K[0];
155 
156   while (1) {
157     j = E[p].serial;
158     s = search(K, r, *k, j);
159     if (s >= 0 && (((struct candidate*)(K[s]))->b < j &&
160           ((struct candidate*)(K[s + 1]))->b > j)) {
161 
162       if (((struct candidate*)(K[s + 1]))->b > j) {
163         pr = K[s];
164         if (r && K[r]) c->next = K[r];
165         K[r] = c;
166         r = s + 1;
167         c = new_candidate(i , j, pr);
168       }
169       if (s == *k) {
170         K[*k + 2] = K[*k + 1];
171         *k = *k + 1;
172         break;
173       }
174     }
175     if (E[p].last) break;
176     else p = p + 1;
177   }
178   K[r] = c;
179 }
180 
read_stdin()181 static FILE* read_stdin()
182 {
183   char tmp_name[] = "/tmp/diffXXXXXX";
184   int rd, wr, tmpfd = mkstemp(tmp_name);
185 
186   if (tmpfd == -1) perror_exit("mkstemp");
187   unlink(tmp_name);
188 
189   while (1) {
190     rd = xread(STDIN_FILENO, toybuf, sizeof(toybuf));
191 
192     if (!rd) break;
193     if (rd < 0) perror_exit("read error");
194     wr = writeall(tmpfd, toybuf, rd);
195     if (wr < 0) perror_exit("write");
196   }
197   return fdopen(tmpfd, "r");
198 }
199 
read_tok(FILE * fp,off_t * off,int tok)200 static int read_tok(FILE *fp, off_t *off, int tok)
201 {
202   int t = 0, is_space;
203 
204   tok |= empty;
205   while (!(tok & eol)) {
206 
207     t = fgetc(fp);
208     if (off && t != EOF) *off += 1;
209     is_space = isspace(t) || (t == EOF);
210     tok |= (t & (eof + eol)); //set tok eof+eol when t is eof
211 
212     if (t == '\n') tok |= eol;
213     if (toys.optflags & FLAG_i)
214       if (t >= 'A' && t <= 'Z') t = tolower(t);
215 
216     if (toys.optflags & FLAG_w && is_space) continue;
217 
218     if (toys.optflags & FLAG_b) {
219       if (tok & space) {
220         if (is_space) continue;
221         tok &= ~space;
222       } else if (is_space) t = space + ' ';
223     }
224     tok &= ~(empty + 0xff);  //remove empty and char too.
225     tok |= t; //add most recent char
226     break;
227   }
228 
229   return tok;
230 }
231 
bcomp(const void * a,const void * b)232 int bcomp(const void *a, const void *b)
233 {
234   struct v_vector *l = (struct v_vector*)a,
235                   *r = (struct v_vector*)b;
236   int ret = l->hash - r->hash;
237 
238   if (!ret) {
239     if ((r -1)->last) return 0;
240     else return -1;
241   }
242   return ret;
243 }
244 /*  file[0] corresponds file 1 and file[1] correspond file 2.
245  * 1. calc hashes for both the files and store them in vector(v[0], v[1])
246  * 2. sort file[1] with hash as primary and serial as sec. key
247  * 3. Form the equivalance class of file[1] stored in e vector. It lists all the equivalence
248  *    classes of lines in file[1], with e.last = true on the last element of each class.
249  *    The elements are ordered by serial within classes.
250  * 4. Form the p vector stored in  p_vector. p_vector[i], if non-zero, now points in e vector
251  *    to the begining of the equiv class of lines in file[1] equivalent to line
252  *    i in file[0].
253  * 5. Form the k-candidates as discribed in do_merge.
254  * 6. Create a vector J[i] = j, such that i'th line in file[0] is j'th line of
255  *    file[1], i.e J comprises LCS
256  */
create_j_vector()257 static int * create_j_vector()
258 {
259   int tok, i, j, size = 100, k;
260   off_t off;
261   long hash;
262   int *p_vector, *J;
263   struct v_vector *v[2], *e;
264   struct candidate **kcand, *pr;
265 
266   for (i = 0; i < 2; i++) {
267     tok = off = 0;
268     hash = 5831;
269     v[i] = xzalloc(size * sizeof(struct v_vector));
270     TT.offset[i] = xzalloc(size * sizeof(int));
271     file[i].len = 0;
272     fseek(file[i].fp, 0, SEEK_SET);
273 
274     while (1) {
275       tok  = read_tok(file[i].fp, &off, tok);
276       if (!(tok & empty)) {
277         hash = ((hash << 5) + hash) + (tok & 0xff);
278         continue;
279       }
280 
281       if (size == ++file[i].len) {
282         size = size * 11 / 10;
283         v[i] = xrealloc(v[i], size*sizeof(struct v_vector));
284         TT.offset[i] = xrealloc(TT.offset[i], size*sizeof(int));
285       }
286 
287       v[i][file[i].len].hash = hash & INT_MAX;
288       TT.offset[i][file[i].len] = off;
289       if ((tok & eof)) {
290         TT.offset[i][file[i].len] = ++off;
291         break;
292       }
293       hash = 5831;  //next line
294       tok = 0;
295     }
296     if (TT.offset[i][file[i].len] - TT.offset[i][file[i].len - 1] == 1)
297       file[i].len--;
298   }
299 
300   for (i = 0; i <= file[1].len; i++) v[1][i].serial = i;
301   qsort(v[1] + 1, file[1].len, sizeof(struct v_vector), comp);
302 
303   e = v[1];
304   e[0].serial = 0;
305   e[0].last = 1;
306   for ( i = 1; i <= file[1].len; i++) {
307     if ((i == file[1].len) || (v[1][i].hash != v[1][i+1].hash)) e[i].last = 1;
308     else e[i].last = 0;
309   }
310 
311   p_vector = xzalloc((file[0].len + 2) * sizeof(int));
312   for (i = 1; i <= file[0].len; i++) {
313     void *r = bsearch(&v[0][i], (e + 1), file[1].len, sizeof(e[0]), bcomp);
314     if (r) p_vector[i] = (struct v_vector*)r - e;
315   }
316 
317   for (i = 1; i <= file[0].len; i++)
318     e[i].p = p_vector[i];
319   free(p_vector);
320 
321   size = 100;
322   kcand = xzalloc(size * sizeof(struct candidate*));
323 
324   kcand[0] = new_candidate(0 , 0, NULL);
325   kcand[1] = new_candidate(file[0].len+1, file[1].len+1, NULL); //the fence
326 
327   k = 0;  //last successfully filled k candidate.
328   for (i = 1; i <= file[0].len; i++) {
329 
330     if (!e[i].p) continue;
331     if ((size - 2) == k) {
332       size = size * 11 / 10;
333       kcand = xrealloc(kcand, (size * sizeof(struct candidate*)));
334     }
335     do_merge(kcand, &k, i, e, e[i].p);
336   }
337   free(v[0]); //no need for v_vector now.
338   free(v[1]);
339 
340   J = xzalloc((file[0].len + 2) * sizeof(int));
341 
342   for (pr = kcand[k]; pr; pr = pr->prev)
343     J[pr->a] = pr->b;
344   J[file[0].len + 1] = file[1].len+1; //mark boundary
345 
346   for (i = k + 1; i >= 0; i--) free_candidates(kcand[i]);
347   free(kcand);
348 
349   for (i = 1; i <= file[0].len; i++) { // jackpot?
350     if (!J[i]) continue;
351 
352     fseek(file[0].fp, TT.offset[0][i - 1], SEEK_SET);
353     fseek(file[1].fp, TT.offset[1][J[i] - 1], SEEK_SET);
354 
355     for (j = J[i]; i <= file[0].len && J[i] == j; i++, j++) {
356       int tok0 = 0, tok1 = 0;
357 
358       do {
359         tok0 = read_tok(file[0].fp, NULL, tok0);
360         tok1 = read_tok(file[1].fp, NULL, tok1);
361         if (((tok0 ^ tok1) & empty) || ((tok0 & 0xff) != (tok1 & 0xff)))
362           J[i] = 0;
363       } while (!(tok0 & tok1 & empty));
364     }
365   }
366   return J;
367 }
368 
diff(char ** files)369 static int *diff(char **files)
370 {
371   size_t i ,j;
372   int s, t;
373   char *bufi, *bufj;
374 
375   TT.is_binary = 0; //loop calls to diff
376   TT.status = SAME;
377 
378   for (i = 0; i < 2; i++) {
379     if (IS_STDIN(files[i])) file[i].fp = read_stdin();
380     else file[i].fp = fopen(files[i], "r");
381 
382     if (!file[i].fp){
383       perror_msg("%s",files[i]);
384       TT.status = 2;
385       return NULL; //return SAME
386     }
387   }
388 
389   s = sizeof(toybuf)/2;
390   bufi = toybuf;
391   bufj = (toybuf + s);
392 
393   fseek(file[0].fp, 0, SEEK_SET);
394   fseek(file[1].fp, 0, SEEK_SET);
395 
396   if (toys.optflags & FLAG_a) return create_j_vector();
397 
398   while (1) {
399     i = fread(bufi, 1, s, file[0].fp);
400     j = fread(bufj, 1, s, file[1].fp);
401 
402     if (i != j) TT.status = DIFFER;
403 
404     for (t = 0; t < i && !TT.is_binary; t++)
405       if (!bufi[t]) TT.is_binary = 1;
406     for (t = 0; t < j && !TT.is_binary; t++)
407       if (!bufj[t]) TT.is_binary = 1;
408 
409     i = MIN(i, j);
410     for (t = 0; t < i; t++)
411       if (bufi[t] != bufj[t]) TT.status = DIFFER;
412 
413     if (!i || !j) break;
414   }
415   if (TT.is_binary || (TT.status == SAME)) return NULL;
416   return create_j_vector();
417 }
418 
print_diff(int a,int b,char c,int * off_set,FILE * fp)419 static void print_diff(int a, int b, char c, int *off_set, FILE *fp)
420 {
421   int i, j, cc, cl;
422   char *reset = NULL;
423 
424   if (c != ' ' && (toys.optflags & FLAG_color)) {
425     printf("\033[%dm", c == '+' ? 32 : 31);
426     reset = "\033[0m";
427   }
428 
429   for (i = a; i <= b; i++) {
430     fseek(fp, off_set[i - 1], SEEK_SET);
431     putchar(c);
432     if (toys.optflags & FLAG_T) putchar('\t');
433     for (j = 0, cl = 0; j <  (off_set[i] - off_set[i - 1]); j++) {
434       cc = fgetc(fp);
435       if (cc == EOF) {
436         printf("%s\n\\ No newline at end of file\n", reset ? reset : "");
437         return;
438       }
439       if ((cc == '\t') && (toys.optflags & FLAG_t))
440         do putchar(' '); while (++cl & 7);
441       else {
442         putchar(cc); //xputc has calls to fflush, it hurts performance badly.
443         cl++;
444       }
445     }
446   }
447   if (reset) printf("%s", reset);
448 }
449 
concat_file_path(char * path,char * default_path)450 static char *concat_file_path(char *path, char *default_path)
451 {
452   char *final_path;
453 
454   if ('/' == path[strlen(path) - 1]) {
455     while (*default_path == '/') ++default_path;
456     final_path = xmprintf("%s%s", path, default_path);
457   }
458   else if (*default_path != '/')
459     final_path = xmprintf("%s/%s", path, default_path);
460   else final_path = xmprintf("%s%s", path, default_path);
461   return final_path;
462 }
463 
skip(struct dirtree * node)464 static int skip(struct dirtree *node)
465 {
466   int len = strlen(toys.optargs[TT.dir_num]), ret = 0;
467   char *tmp = NULL, *ptr, *f_path = dirtree_path(node, NULL);
468   struct stat st;
469 
470   ptr = f_path;
471   ptr += len;
472   if (ptr[0]) {
473     tmp = concat_file_path(toys.optargs[1 - TT.dir_num], ptr);
474     if (tmp && !stat(tmp, &st)) ret = 0; //it is there on other side
475     else ret = 1; //not present on other side.
476   }
477   free(f_path);
478   if (tmp) free(tmp);
479   return ret; //add otherwise
480 }
481 
add_to_list(struct dirtree * node)482 static void add_to_list(struct dirtree *node)
483 {
484   char *full_path;
485 
486   dir[TT.dir_num].list = xrealloc(dir[TT.dir_num].list,
487       (TT.size + 1)*sizeof(char*));
488   TT.size++;
489   full_path = dirtree_path(node, NULL);
490   dir[TT.dir_num].list[TT.size - 1] = full_path;
491 }
492 
list_dir(struct dirtree * node)493 static int list_dir (struct dirtree *node)
494 {
495   int ret = 0;
496 
497   if (!dirtree_notdotdot(node)) return 0;
498 
499   if (S_ISDIR(node->st.st_mode) && !node->parent) { //add root dirs.
500     add_to_list(node);
501     return (DIRTREE_RECURSE|DIRTREE_SYMFOLLOW);
502   }
503 
504   if (S_ISDIR(node->st.st_mode) && (toys.optflags & FLAG_r)) {
505     if (!(toys.optflags & FLAG_N)) ret = skip(node);
506     if (!ret) return (DIRTREE_RECURSE|DIRTREE_SYMFOLLOW);
507     else {
508       add_to_list(node); //only at one side.
509       return 0;
510     }
511   } else {
512     add_to_list(node);
513     return S_ISDIR(node->st.st_mode) ? 0 : (DIRTREE_RECURSE|DIRTREE_SYMFOLLOW);
514   }
515 }
516 
cmp(const void * p1,const void * p2)517 static int cmp(const void *p1, const void *p2)
518 {
519    return strcmp(* (char * const *)p1, * (char * const *)p2);
520 }
521 
show_label(char * prefix,char * filename,struct stat * sb)522 static void show_label(char *prefix, char *filename, struct stat *sb)
523 {
524   char date[36];
525 
526   printf("%s %s\t%s\n", prefix, filename,
527     format_iso_time(date, sizeof(date), &sb->st_mtim));
528 }
529 
do_diff(char ** files)530 static void do_diff(char **files)
531 {
532 
533   long i = 1, size = 1, x = 0, change = 0, ignore_white,
534    start1, end1, start2, end2;
535   struct diff *d;
536   struct arg_list *llist = TT.L_list;
537   int *J;
538 
539   TT.offset[0] = TT.offset[1] = NULL;
540   J = diff(files);
541 
542   if (!J) return; //No need to compare, have to status only
543 
544   d = xzalloc(size *sizeof(struct diff));
545   do {
546     ignore_white = 0;
547     for (d[x].a = i; d[x].a <= file[0].len; d[x].a++) {
548       if (J[d[x].a] != (J[d[x].a - 1] + 1)) break;
549       else continue;
550     }
551     d[x].c = (J[d[x].a - 1] + 1);
552 
553     for (d[x].b = (d[x].a - 1); d[x].b <= file[0].len; d[x].b++) {
554       if (J[d[x].b + 1]) break;
555       else continue;
556     }
557     d[x].d = (J[d[x].b + 1] - 1);
558 
559     if ((toys.optflags & FLAG_B)) {
560       if (d[x].a <= d[x].b) {
561         if ((TT.offset[0][d[x].b] - TT.offset[0][d[x].a - 1])
562             == (d[x].b - d[x].a + 1))
563           ignore_white = 1;
564       } else if (d[x].c <= d[x].d){
565         if ((TT.offset[1][d[x].d] - TT.offset[1][d[x].c - 1])
566             == (d[x].d - d[x].c + 1))
567           ignore_white = 1;
568       }
569     }
570 
571     if ((d[x].a <= d[x].b || d[x].c <= d[x].d) && !ignore_white)
572       change = 1; //is we have diff ?
573 
574     if (!ignore_white) d = xrealloc(d, (x + 2) *sizeof(struct diff));
575     i = d[x].b + 1;
576     if (i > file[0].len) break;
577     J[d[x].b] = d[x].d;
578     if (!ignore_white) x++;
579   } while (i <= file[0].len);
580 
581   i = x+1;
582   TT.status = change; //update status, may change bcoz of -w etc.
583 
584   if (!(toys.optflags & FLAG_q) && change) {  //start of !FLAG_q
585     if (toys.optflags & FLAG_color) printf("\033[1m");
586     if (toys.optflags & FLAG_L) printf("--- %s\n", llist->arg);
587     else show_label("---", files[0], &(TT).st[0]);
588     if (((toys.optflags & FLAG_L) && !llist->next) || !(toys.optflags & FLAG_L))
589       show_label("+++", files[1], &(TT).st[1]);
590     else {
591       while (llist->next) llist = llist->next;
592       printf("+++ %s\n", llist->arg);
593     }
594     if (toys.optflags & FLAG_color) printf("\033[0m");
595 
596     struct diff *t, *ptr1 = d, *ptr2 = d;
597     while (i) {
598       long a,b;
599 
600       if (TT.ct > file[0].len) TT.ct = file[0].len; //trim context to file len.
601       if (ptr1->b < ptr1->a && ptr1->d < ptr1->c) {
602         i--;
603         continue;
604       }
605       //Handle the context stuff
606       a =  ptr1->a;
607       b =  ptr1->b;
608 
609       b  = MIN(file[0].len, b);
610       if (i == x + 1) ptr1->suff = MAX(1,a - TT.ct);
611       else {
612         if ((ptr1 - 1)->prev >= (ptr1->a - TT.ct))
613           ptr1->suff = (ptr1 - 1)->prev + 1;
614         else ptr1->suff =  ptr1->a - TT.ct;
615       }
616 calc_ct:
617       if (i > 1) {
618         if ((ptr2->b + TT.ct) >= (ptr2  + 1)->a) {
619           ptr2++;
620           i--;
621           goto calc_ct;
622         } else ptr2->prev = ptr2->b + TT.ct;
623       } else ptr2->prev = ptr2->b;
624       start1 = (ptr2->prev - ptr1->suff + 1);
625       end1 = (start1 == 1) ? -1 : start1;
626       start2 = MAX(1, ptr1->c - (ptr1->a - ptr1->suff));
627       end2 = ptr2->prev - ptr2->b + ptr2->d;
628 
629       if (toys.optflags & FLAG_color) printf("\033[36m");
630       printf("@@ -%ld", start1 ? ptr1->suff: (ptr1->suff -1));
631       if (end1 != -1) printf(",%ld ", ptr2->prev-ptr1->suff + 1);
632       else putchar(' ');
633 
634       printf("+%ld", (end2 - start2 + 1) ? start2: (start2 -1));
635       if ((end2 - start2 +1) != 1) printf(",%ld ", (end2 - start2 +1));
636       else putchar(' ');
637       printf("@@");
638       if (toys.optflags & FLAG_color) printf("\033[0m");
639       putchar('\n');
640 
641       for (t = ptr1; t <= ptr2; t++) {
642         if (t== ptr1) print_diff(t->suff, t->a-1, ' ', TT.offset[0], file[0].fp);
643         print_diff(t->a, t->b, '-', TT.offset[0], file[0].fp);
644         print_diff(t->c, t->d, '+', TT.offset[1], file[1].fp);
645         if (t == ptr2)
646           print_diff(t->b+1, (t)->prev, ' ', TT.offset[0], file[0].fp);
647         else print_diff(t->b+1, (t+1)->a-1, ' ', TT.offset[0], file[0].fp);
648       }
649       ptr2++;
650       ptr1 = ptr2;
651       i--;
652     } //end of while
653   } //End of !FLAG_q
654   free(d);
655   free(J);
656   free(TT.offset[0]);
657   free(TT.offset[1]);
658 }
659 
show_status(char ** files)660 static void show_status(char **files)
661 {
662   switch (TT.status) {
663     case SAME:
664       if (toys.optflags & FLAG_s)
665         printf("Files %s and %s are identical\n",files[0], files[1]);
666       break;
667     case DIFFER:
668       if ((toys.optflags & FLAG_q) || TT.is_binary)
669         printf("Files %s and %s differ\n",files[0], files[1]);
670       break;
671   }
672 }
673 
create_empty_entry(int l,int r,int j)674 static void create_empty_entry(int l , int r, int j)
675 {
676   struct stat st[2];
677   char *f[2], *path[2];
678   int i;
679 
680   if (j > 0 && (toys.optflags & FLAG_N)) {
681     path[0] = concat_file_path(dir[0].list[0], dir[1].list[r] + TT.len[1]);
682     f[0] = "/dev/null";
683     path[1] = f[1] = dir[1].list[r];
684     stat(f[1], &st[0]);
685     st[1] = st[0];
686   }
687   else if (j < 0 && (toys.optflags & FLAG_N)) {
688     path[1] = concat_file_path(dir[1].list[0], dir[0].list[l] + TT.len[0]);
689     f[1] = "/dev/null";
690     path[0] = f[0] = dir[0].list[l];
691     stat(f[0], &st[0]);
692     st[1] = st[0];
693   }
694 
695   if (!j) {
696     for (i = 0; i < 2; i++) {
697       path[i] = f[i] = dir[i].list[!i ? l: r];
698       stat(f[i], &st[i]);
699     }
700   }
701 
702   if (S_ISDIR(st[0].st_mode) && S_ISDIR(st[1].st_mode))
703     printf("Common subdirectories: %s and %s\n", path[0], path[1]);
704   else if (!S_ISREG(st[0].st_mode) && !S_ISDIR(st[0].st_mode))
705     printf("File %s is not a regular file or directory "
706         "and was skipped\n", path[0]);
707   else if (!S_ISREG(st[1].st_mode) && !S_ISDIR(st[1].st_mode))
708     printf("File %s is not a regular file or directory "
709         "and was skipped\n", path[1]);
710   else if (S_ISDIR(st[0].st_mode) != S_ISDIR(st[1].st_mode)) {
711     if (S_ISDIR(st[0].st_mode))
712       printf("File %s is a %s while file %s is a"
713           " %s\n", path[0], "directory", path[1], "regular file");
714     else
715       printf("File %s is a %s while file %s is a"
716           " %s\n", path[0], "regular file", path[1], "directory");
717   } else {
718     do_diff(f);
719     show_status(path);
720     if (file[0].fp) fclose(file[0].fp);
721     if (file[1].fp) fclose(file[1].fp);
722   }
723 
724   if ((toys.optflags & FLAG_N) && j) {
725     if (j > 0) free(path[0]);
726     else free(path[1]);
727   }
728 }
729 
diff_dir(int * start)730 static void diff_dir(int *start)
731 {
732   int l, r, j = 0;
733 
734   l = start[0]; //left side file start
735   r = start[1]; //right side file start
736   while (l < dir[0].nr_elm && r < dir[1].nr_elm) {
737     if ((j = strcmp ((dir[0].list[l] + TT.len[0]),
738             (dir[1].list[r] + TT.len[1]))) && !(toys.optflags & FLAG_N)) {
739       if (j > 0) {
740         printf ("Only in %s: %s\n", dir[1].list[0], dir[1].list[r] + TT.len[1]);
741         free(dir[1].list[r]);
742         r++;
743       } else {
744         printf ("Only in %s: %s\n", dir[0].list[0], dir[0].list[l] + TT.len[0]);
745         free(dir[0].list[l]);
746         l++;
747       }
748       TT.status = DIFFER;
749     } else {
750       create_empty_entry(l, r, j); //create non empty dirs/files if -N.
751       if (j > 0) {
752         free(dir[1].list[r]);
753         r++;
754       } else if (j < 0) {
755         free(dir[0].list[l]);
756         l++;
757       } else {
758         free(dir[1].list[r]);
759         free(dir[0].list[l]);
760         l++;
761         r++;
762       }
763     }
764   }
765 
766   if (l == dir[0].nr_elm) {
767     while (r < dir[1].nr_elm) {
768       if (!(toys.optflags & FLAG_N)) {
769         printf ("Only in %s: %s\n", dir[1].list[0], dir[1].list[r] + TT.len[1]);
770         TT.status = DIFFER;
771       } else create_empty_entry(l, r, 1);
772       free(dir[1].list[r]);
773       r++;
774     }
775   } else if (r == dir[1].nr_elm) {
776     while (l < dir[0].nr_elm) {
777       if (!(toys.optflags & FLAG_N)) {
778         printf ("Only in %s: %s\n", dir[0].list[0], dir[0].list[l] + TT.len[0]);
779         TT.status = DIFFER;
780       } else create_empty_entry(l, r, -1);
781       free(dir[0].list[l]);
782       l++;
783     }
784   }
785   free(dir[0].list[0]); //we are done, free root nodes too
786   free(dir[1].list[0]);
787 }
788 
diff_main(void)789 void diff_main(void)
790 {
791   int j = 0, k = 1, start[2] = {1, 1};
792   char *files[2];
793 
794   toys.exitval = 2;
795 
796   if ((toys.optflags & FLAG_color) && !isatty(1)) toys.optflags ^= FLAG_color;
797 
798   for (j = 0; j < 2; j++) {
799     files[j] = toys.optargs[j];
800     if (IS_STDIN(files[j])) {
801       if (fstat(0, &TT.st[j]) == -1)
802         perror_exit("can't fstat %s", files[j]);
803     } else {
804       xstat(files[j], &TT.st[j]);
805     }
806   }
807 
808   if ((IS_STDIN(files[0]) || IS_STDIN(files[1]))
809       && (S_ISDIR(TT.st[0].st_mode) || S_ISDIR(TT.st[1].st_mode)))
810     error_exit("can't compare stdin to directory");
811 
812   if ((TT.st[0].st_ino == TT.st[1].st_ino) //physicaly same device
813       && (TT.st[0].st_dev == TT.st[1].st_dev)) {
814     toys.exitval = 0;
815     return show_status(files);
816   }
817 
818   if (S_ISDIR(TT.st[0].st_mode) && S_ISDIR(TT.st[1].st_mode)) {
819     for (j = 0; j < 2; j++) {
820       memset(&dir[j], 0, sizeof(struct dir_t));
821       dirtree_flagread(files[j], DIRTREE_SYMFOLLOW, list_dir);
822       dir[j].nr_elm = TT.size; //size updated in list_dir
823       qsort(&(dir[j].list[1]), (TT.size - 1), sizeof(char*), cmp);
824 
825       TT.len[j] = strlen(dir[j].list[0]); //calc root node len
826       TT.len[j] += (dir[j].list[0][TT.len[j] -1] != '/');
827 
828       if (toys.optflags & FLAG_S) {
829         while (k < TT.size && strcmp(dir[j].list[k] +
830               TT.len[j], TT.start) < 0) {
831           start[j] += 1;
832           k++;
833         }
834       }
835       TT.dir_num++;
836       TT.size = 0;
837       k = 1;
838     }
839     diff_dir(start);
840     free(dir[0].list); //free array
841     free(dir[1].list);
842   } else {
843     if (S_ISDIR(TT.st[0].st_mode) || S_ISDIR(TT.st[1].st_mode)) {
844       int d = S_ISDIR(TT.st[0].st_mode);
845       char *slash = strrchr(files[d], '/');
846 
847       files[1 - d] = concat_file_path(files[1 - d], slash ? slash + 1 : files[d]);
848       if ((stat(files[1 - d], &TT.st[1 - d])) == -1)
849         perror_exit("%s", files[1 - d]);
850     }
851     do_diff(files);
852     show_status(files);
853     if (file[0].fp) fclose(file[0].fp);
854     if (file[1].fp) fclose(file[1].fp);
855   }
856   toys.exitval = TT.status; //exit status will be the status
857 }
858