1 /* diff.c - compare files line by line
2  *
3  * Copyright 2014 Sandeep Sharma <sandeep.jack2756@gmail.com>
4  * Copyright 2014 Ashwini Kumar <ak.ashwini1981@gmail.com>
5  *
6  * See: http://cm.bell-labs.com/cm/cs/cstr/41.pdf
7 
8 USE_DIFF(NEWTOY(diff, "<2>2(color)(strip-trailing-cr)B(ignore-blank-lines)d(minimal)b(ignore-space-change)ut(expand-tabs)w(ignore-all-space)i(ignore-case)T(initial-tab)s(report-identical-files)q(brief)a(text)L(label)*S(starting-file):N(new-file)r(recursive)U(unified)#<0=3", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
9 
10 config DIFF
11   bool "diff"
12   default n
13   help
14   usage: diff [-abBdiNqrTstw] [-L LABEL] [-S FILE] [-U LINES] FILE1 FILE2
15 
16   -a	Treat all files as text
17   -b	Ignore changes in the amount of whitespace
18   -B	Ignore changes whose lines are all blank
19   -d	Try hard to find a smaller set of changes
20   -i	Ignore case differences
21   -L	Use LABEL instead of the filename in the unified header
22   -N	Treat absent files as empty
23   -q	Output only whether files differ
24   -r	Recurse
25   -S	Start with FILE when comparing directories
26   -T	Make tabs line up by prefixing a tab when necessary
27   -s	Report when two files are the same
28   -t	Expand tabs to spaces in output
29   -u	Unified diff
30   -U	Output LINES lines of context
31   -w	Ignore all whitespace
32 
33   --color              Colored output
34   --strip-trailing-cr  Strip trailing '\r's from input lines
35 */
36 
37 #define FOR_diff
38 #include "toys.h"
39 
40 GLOBALS(
41   long ct;
42   char *start;
43   struct arg_list *L_list;
44 
45   int dir_num, size, is_binary, status, change, len[2];
46   int *offset[2];
47   struct stat st[2];
48 )
49 
50 #define MIN(x,y) ((x) < (y) ? (x) : (y))
51 #define MAX(x,y) ((x) > (y) ? (x) : (y))
52 #define IS_STDIN(s)     ((s)[0] == '-' && !(s)[1])
53 
54 struct v_vector {
55   unsigned serial:31;
56   unsigned last:1;
57   union {
58     unsigned hash;
59     unsigned p;
60   };
61 };
62 
63 struct diff {
64   long a, b, c, d, prev, suff;
65 };
66 
67 static struct dir_t {
68   char **list;
69   int nr_elm;
70 } dir[2];
71 
72 struct candidate {
73   int a, b;
74   struct candidate *prev, *next;
75 };
76 
77 static struct file_t {
78   FILE *fp;
79   int len;
80 } file[2];
81 
82 enum {
83   SAME,
84   DIFFER,
85 };
86 
87 enum {
88   empty = 1 << 9,
89   eol = 1 << 10,
90   eof = 1 << 11,
91   space = 1 << 12
92 };
93 
comp(const void * a,const void * b)94 static int comp(const void *a, const void* b)
95 {
96   int i = ((struct v_vector *)a)->hash -
97     ((struct v_vector *)b)->hash;
98 
99   if (!i) i = ((struct v_vector *)a)->serial -
100     ((struct v_vector *)b)->serial;
101   return i;
102 }
103 
search(struct candidate ** K,int r,int k,int j)104 static int search (struct candidate **K, int r, int k, int j)
105 {
106   int low = r, upper = k, mid;
107 
108   mid = (low + upper) / 2;
109   while (low <= mid) {
110     if (((struct candidate*)(K[mid]))->b < j &&
111         ((struct candidate*)(K[mid + 1]))->b > j)
112       return mid;
113 
114     if (((struct candidate*)(K[mid]))->b < j) low = mid + 1;
115     else if (((struct candidate*)(K[mid]))->b > j) upper = mid - 1;
116     else return -1;
117 
118     mid = (low + upper) / 2;
119   }
120   return -1;
121 }
122 
new_candidate(int i,int j,struct candidate * prev)123 static struct candidate * new_candidate (int i, int j, struct candidate* prev)
124 {
125   struct candidate *c = xzalloc(sizeof(struct candidate));
126 
127   c->a = i;
128   c->b = j;
129   c->prev = prev;
130   return c;
131 }
132 
133 
free_candidates(struct candidate * c)134 static void free_candidates(struct candidate *c)
135 {
136   struct candidate *t = c;
137 
138   while ((t = c)) {
139     c = c->next;
140     free(t);
141   }
142 }
143 /*
144  * 1. Search K[r: k] for an element K[s] such that K[s]-> b < j and K[s + 1]->b > j
145  * 2. if found do
146  *  2.a. If K[s + 1]->b > j do K[r] = c; r = s+1 and c = candidate(i, j, K[s]) //we have a candidate
147  *  2.b. if s = k (fence reached move it further) do K[k + 2] = K[k + 1], k++
148  * 3. if E[p].last true break i.e we have reached at the end of an equiv class
149  *    else p = p + 1 //keep traversing the equiv class.
150  * 4. K[r] = c //Save the sucessfully filled k-candidate.
151  */
do_merge(struct candidate ** K,int * k,int i,struct v_vector * E,int p)152 static void  do_merge(struct candidate **K, int *k, int i,
153     struct v_vector *E, int p)
154 {
155   int r = 0, s, j;
156   struct candidate *pr = 0, *c = K[0];
157 
158   while (1) {
159     j = E[p].serial;
160     s = search(K, r, *k, j);
161     if (s >= 0 && (((struct candidate*)(K[s]))->b < j &&
162           ((struct candidate*)(K[s + 1]))->b > j)) {
163 
164       if (((struct candidate*)(K[s + 1]))->b > j) {
165         pr = K[s];
166         if (r && K[r]) c->next = K[r];
167         K[r] = c;
168         r = s + 1;
169         c = new_candidate(i , j, pr);
170       }
171       if (s == *k) {
172         K[*k + 2] = K[*k + 1];
173         *k = *k + 1;
174         break;
175       }
176     }
177     if (E[p].last) break;
178     else p = p + 1;
179   }
180   K[r] = c;
181 }
182 
read_stdin()183 static FILE* read_stdin()
184 {
185   char *tmp_name;
186   int tmpfd = xtempfile("stdin", &tmp_name);
187 
188   unlink(tmp_name);
189   free(tmp_name);
190 
191   xsendfile(0, tmpfd);
192   return fdopen(tmpfd, "r");
193 }
194 
read_tok(FILE * fp,off_t * off,int tok)195 static int read_tok(FILE *fp, off_t *off, int tok)
196 {
197   int t = 0, is_space;
198 
199   tok |= empty;
200   while (!(tok & eol)) {
201     t = fgetc(fp);
202 
203     if (FLAG(strip_trailing_cr) && t == '\r') {
204       int t2 = fgetc(fp);
205       if (t2 == '\n') {
206         t = t2;
207         if (off) (*off)++;
208       } else {
209         ungetc(t2, fp);
210       }
211     }
212 
213     if (off && t != EOF) *off += 1;
214     is_space = isspace(t) || (t == EOF);
215     tok |= (t & (eof + eol)); //set tok eof+eol when t is eof
216 
217     if (t == '\n') tok |= eol;
218     if (toys.optflags & FLAG_i)
219       if (t >= 'A' && t <= 'Z') t = tolower(t);
220 
221     if (toys.optflags & FLAG_w && is_space) continue;
222 
223     if (toys.optflags & FLAG_b) {
224       if (tok & space) {
225         if (is_space) continue;
226         tok &= ~space;
227       } else if (is_space) t = space + ' ';
228     }
229     tok &= ~(empty + 0xff);  //remove empty and char too.
230     tok |= t; //add most recent char
231     break;
232   }
233 
234   return tok;
235 }
236 
bcomp(const void * a,const void * b)237 int bcomp(const void *a, const void *b)
238 {
239   struct v_vector *l = (struct v_vector*)a,
240                   *r = (struct v_vector*)b;
241   int ret = l->hash - r->hash;
242 
243   if (!ret) {
244     if ((r -1)->last) return 0;
245     else return -1;
246   }
247   return ret;
248 }
249 /*  file[0] corresponds file 1 and file[1] correspond file 2.
250  * 1. calc hashes for both the files and store them in vector(v[0], v[1])
251  * 2. sort file[1] with hash as primary and serial as sec. key
252  * 3. Form the equivalance class of file[1] stored in e vector. It lists all the equivalence
253  *    classes of lines in file[1], with e.last = true on the last element of each class.
254  *    The elements are ordered by serial within classes.
255  * 4. Form the p vector stored in  p_vector. p_vector[i], if non-zero, now points in e vector
256  *    to the beginning of the equiv class of lines in file[1] equivalent to line
257  *    i in file[0].
258  * 5. Form the k-candidates as discribed in do_merge.
259  * 6. Create a vector J[i] = j, such that i'th line in file[0] is j'th line of
260  *    file[1], i.e J comprises LCS
261  */
create_j_vector()262 static int * create_j_vector()
263 {
264   int tok, i, j, size = 100, k;
265   off_t off;
266   long hash;
267   int *p_vector, *J;
268   struct v_vector *v[2], *e;
269   struct candidate **kcand, *pr;
270 
271   for (i = 0; i < 2; i++) {
272     tok = off = 0;
273     hash = 5831;
274     v[i] = xzalloc(size * sizeof(struct v_vector));
275     TT.offset[i] = xzalloc(size * sizeof(int));
276     file[i].len = 0;
277     fseek(file[i].fp, 0, SEEK_SET);
278 
279     while (1) {
280       tok  = read_tok(file[i].fp, &off, tok);
281       if (!(tok & empty)) {
282         hash = ((hash << 5) + hash) + (tok & 0xff);
283         continue;
284       }
285 
286       if (size == ++file[i].len) {
287         size = size * 11 / 10;
288         v[i] = xrealloc(v[i], size*sizeof(struct v_vector));
289         TT.offset[i] = xrealloc(TT.offset[i], size*sizeof(int));
290       }
291 
292       v[i][file[i].len].hash = hash & INT_MAX;
293       TT.offset[i][file[i].len] = off;
294       if ((tok & eof)) {
295         TT.offset[i][file[i].len] = ++off;
296         break;
297       }
298       hash = 5831;  //next line
299       tok = 0;
300     }
301     if (TT.offset[i][file[i].len] - TT.offset[i][file[i].len - 1] == 1)
302       file[i].len--;
303   }
304 
305   for (i = 0; i <= file[1].len; i++) v[1][i].serial = i;
306   qsort(v[1] + 1, file[1].len, sizeof(struct v_vector), comp);
307 
308   e = v[1];
309   e[0].serial = 0;
310   e[0].last = 1;
311   for ( i = 1; i <= file[1].len; i++) {
312     if ((i == file[1].len) || (v[1][i].hash != v[1][i+1].hash)) e[i].last = 1;
313     else e[i].last = 0;
314   }
315 
316   p_vector = xzalloc((file[0].len + 2) * sizeof(int));
317   for (i = 1; i <= file[0].len; i++) {
318     void *r = bsearch(&v[0][i], (e + 1), file[1].len, sizeof(e[0]), bcomp);
319     if (r) p_vector[i] = (struct v_vector*)r - e;
320   }
321 
322   for (i = 1; i <= file[0].len; i++)
323     e[i].p = p_vector[i];
324   free(p_vector);
325 
326   size = 100;
327   kcand = xzalloc(size * sizeof(struct candidate*));
328 
329   kcand[0] = new_candidate(0 , 0, NULL);
330   kcand[1] = new_candidate(file[0].len+1, file[1].len+1, NULL); //the fence
331 
332   k = 0;  //last successfully filled k candidate.
333   for (i = 1; i <= file[0].len; i++) {
334 
335     if (!e[i].p) continue;
336     if ((size - 2) == k) {
337       size = size * 11 / 10;
338       kcand = xrealloc(kcand, (size * sizeof(struct candidate*)));
339     }
340     do_merge(kcand, &k, i, e, e[i].p);
341   }
342   free(v[0]); //no need for v_vector now.
343   free(v[1]);
344 
345   J = xzalloc((file[0].len + 2) * sizeof(int));
346 
347   for (pr = kcand[k]; pr; pr = pr->prev)
348     J[pr->a] = pr->b;
349   J[file[0].len + 1] = file[1].len+1; //mark boundary
350 
351   for (i = k + 1; i >= 0; i--) free_candidates(kcand[i]);
352   free(kcand);
353 
354   for (i = 1; i <= file[0].len; i++) { // jackpot?
355     if (!J[i]) continue;
356 
357     fseek(file[0].fp, TT.offset[0][i - 1], SEEK_SET);
358     fseek(file[1].fp, TT.offset[1][J[i] - 1], SEEK_SET);
359 
360     for (j = J[i]; i <= file[0].len && J[i] == j; i++, j++) {
361       int tok0 = 0, tok1 = 0;
362 
363       do {
364         tok0 = read_tok(file[0].fp, NULL, tok0);
365         tok1 = read_tok(file[1].fp, NULL, tok1);
366         if (((tok0 ^ tok1) & empty) || ((tok0 & 0xff) != (tok1 & 0xff)))
367           J[i] = 0;
368       } while (!(tok0 & tok1 & empty));
369     }
370   }
371   return J;
372 }
373 
diff(char ** files)374 static int *diff(char **files)
375 {
376   size_t i ,j;
377   int s, t;
378   char *bufi, *bufj;
379 
380   TT.is_binary = 0; //loop calls to diff
381   TT.status = SAME;
382 
383   for (i = 0; i < 2; i++) {
384     if (IS_STDIN(files[i])) file[i].fp = read_stdin();
385     else file[i].fp = fopen(files[i], "r");
386 
387     if (!file[i].fp){
388       perror_msg("%s",files[i]);
389       TT.status = 2;
390       return NULL; //return SAME
391     }
392   }
393 
394   s = sizeof(toybuf)/2;
395   bufi = toybuf;
396   bufj = (toybuf + s);
397 
398   fseek(file[0].fp, 0, SEEK_SET);
399   fseek(file[1].fp, 0, SEEK_SET);
400 
401   if (toys.optflags & FLAG_a) return create_j_vector();
402 
403   while (1) {
404     i = fread(bufi, 1, s, file[0].fp);
405     j = fread(bufj, 1, s, file[1].fp);
406 
407     if (i != j) TT.status = DIFFER;
408 
409     for (t = 0; t < i && !TT.is_binary; t++)
410       if (!bufi[t]) TT.is_binary = 1;
411     for (t = 0; t < j && !TT.is_binary; t++)
412       if (!bufj[t]) TT.is_binary = 1;
413 
414     i = MIN(i, j);
415     for (t = 0; t < i; t++)
416       if (bufi[t] != bufj[t]) TT.status = DIFFER;
417 
418     if (!i || !j) break;
419   }
420   if (TT.is_binary || (TT.status == SAME)) return NULL;
421   return create_j_vector();
422 }
423 
print_diff(int a,int b,char c,int * off_set,FILE * fp)424 static void print_diff(int a, int b, char c, int *off_set, FILE *fp)
425 {
426   int i, j, cc, cl;
427   char *reset = NULL;
428 
429   if (c != ' ' && (toys.optflags & FLAG_color)) {
430     printf("\033[%dm", c == '+' ? 32 : 31);
431     reset = "\033[0m";
432   }
433 
434   for (i = a; i <= b; i++) {
435     fseek(fp, off_set[i - 1], SEEK_SET);
436     putchar(c);
437     if (toys.optflags & FLAG_T) putchar('\t');
438     for (j = 0, cl = 0; j <  (off_set[i] - off_set[i - 1]); j++) {
439       cc = fgetc(fp);
440       if (cc == EOF) {
441         printf("%s\n\\ No newline at end of file\n", reset ? reset : "");
442         return;
443       }
444       if ((cc == '\t') && (toys.optflags & FLAG_t))
445         do putchar(' '); while (++cl & 7);
446       else {
447         putchar(cc); //xputc has calls to fflush, it hurts performance badly.
448         cl++;
449       }
450     }
451   }
452   if (reset) printf("%s", reset);
453 }
454 
concat_file_path(char * path,char * default_path)455 static char *concat_file_path(char *path, char *default_path)
456 {
457   char *final_path;
458 
459   if ('/' == path[strlen(path) - 1]) {
460     while (*default_path == '/') ++default_path;
461     final_path = xmprintf("%s%s", path, default_path);
462   }
463   else if (*default_path != '/')
464     final_path = xmprintf("%s/%s", path, default_path);
465   else final_path = xmprintf("%s%s", path, default_path);
466   return final_path;
467 }
468 
skip(struct dirtree * node)469 static int skip(struct dirtree *node)
470 {
471   int len = strlen(toys.optargs[TT.dir_num]), ret = 0;
472   char *tmp = NULL, *ptr, *f_path = dirtree_path(node, NULL);
473   struct stat st;
474 
475   ptr = f_path;
476   ptr += len;
477   if (ptr[0]) {
478     tmp = concat_file_path(toys.optargs[1 - TT.dir_num], ptr);
479     if (tmp && !stat(tmp, &st)) ret = 0; //it is there on other side
480     else ret = 1; //not present on other side.
481   }
482   free(f_path);
483   if (tmp) free(tmp);
484   return ret; //add otherwise
485 }
486 
add_to_list(struct dirtree * node)487 static void add_to_list(struct dirtree *node)
488 {
489   char *full_path;
490 
491   dir[TT.dir_num].list = xrealloc(dir[TT.dir_num].list,
492       (TT.size + 1)*sizeof(char*));
493   TT.size++;
494   full_path = dirtree_path(node, NULL);
495   dir[TT.dir_num].list[TT.size - 1] = full_path;
496 }
497 
list_dir(struct dirtree * node)498 static int list_dir (struct dirtree *node)
499 {
500   int ret = 0;
501 
502   if (!dirtree_notdotdot(node)) return 0;
503 
504   if (S_ISDIR(node->st.st_mode) && !node->parent) { //add root dirs.
505     add_to_list(node);
506     return (DIRTREE_RECURSE|DIRTREE_SYMFOLLOW);
507   }
508 
509   if (S_ISDIR(node->st.st_mode) && (toys.optflags & FLAG_r)) {
510     if (!(toys.optflags & FLAG_N)) ret = skip(node);
511     if (!ret) return (DIRTREE_RECURSE|DIRTREE_SYMFOLLOW);
512     else {
513       add_to_list(node); //only at one side.
514       return 0;
515     }
516   } else {
517     add_to_list(node);
518     return S_ISDIR(node->st.st_mode) ? 0 : (DIRTREE_RECURSE|DIRTREE_SYMFOLLOW);
519   }
520 }
521 
cmp(const void * p1,const void * p2)522 static int cmp(const void *p1, const void *p2)
523 {
524    return strcmp(* (char * const *)p1, * (char * const *)p2);
525 }
526 
527 // quote and escape filenames that have awkward characters
quote_filename(char * filename)528 char *quote_filename(char *filename)
529 {
530   char *to = "abfnrtv\"\\", *from = "\a\b\f\n\r\t\v\"\\";
531   char *result, *s, *t;
532   size_t len = 0;
533   int quote = 0;
534 
535   // calculate memory usage and presence of quotes
536   for (s = filename; *s; s++) {
537     if (*s == '\a' || *s == '\b' || *s == '\f' || *s == '\r' || *s == '\v'
538       || *s == '\n' || *s == '\t' || *s == '"' || *s == '\\')
539     {
540       quote = 1;
541       len += 2;
542     } else if (*s == ' ') {
543       quote = 1;
544       len++;
545     } else if (*s < 0x20 || *s >= 0x80) {
546       quote = 1;
547       len += 4;
548     } else {
549       len++;
550     }
551   }
552 
553   // construct the new string
554   result = xmalloc(len + (quote ? 2 : 0) + 1);
555   t = result;
556   if (quote) *t++ = '"';
557   for (s = filename; *s; s++) {
558     if (*s == '\a' || *s == '\b' || *s == '\f' || *s == '\r' || *s == '\v'
559       || *s == '\n' || *s == '\t' || *s == '"' || *s == '\\')
560     {
561       *t = '\\';
562       t[1] = to[strchr(from, *s) - from];
563       t += 2;
564     } else if (*s < 0x20 || *s >= 0x80) {
565       sprintf(t, "\\%.3o", *s);
566       t += 4;
567     } else {
568       *t++ = *s;
569     }
570   }
571   if (quote) *t++ = '"';
572   *t = 0;
573   return result;
574 }
575 
show_label(char * prefix,char * filename,struct stat * sb)576 static void show_label(char *prefix, char *filename, struct stat *sb)
577 {
578   char date[36];
579   char *quoted_file;
580 
581   quoted_file = quote_filename(filename);
582   printf("%s %s\t%s\n", prefix, quoted_file,
583     format_iso_time(date, sizeof(date), &sb->st_mtim));
584   free(quoted_file);
585 }
586 
do_diff(char ** files)587 static void do_diff(char **files)
588 {
589 
590   long i = 1, size = 1, x = 0, change = 0, ignore_white,
591    start1, end1, start2, end2;
592   struct diff *d;
593   struct arg_list *llist = TT.L_list;
594   int *J;
595 
596   TT.offset[0] = TT.offset[1] = NULL;
597   J = diff(files);
598 
599   if (!J) return; //No need to compare, have to status only
600 
601   d = xzalloc(size *sizeof(struct diff));
602   do {
603     ignore_white = 0;
604     for (d[x].a = i; d[x].a <= file[0].len; d[x].a++) {
605       if (J[d[x].a] != (J[d[x].a - 1] + 1)) break;
606       else continue;
607     }
608     d[x].c = (J[d[x].a - 1] + 1);
609 
610     for (d[x].b = (d[x].a - 1); d[x].b <= file[0].len; d[x].b++) {
611       if (J[d[x].b + 1]) break;
612       else continue;
613     }
614     d[x].d = (J[d[x].b + 1] - 1);
615 
616     if ((toys.optflags & FLAG_B)) {
617       if (d[x].a <= d[x].b) {
618         if ((TT.offset[0][d[x].b] - TT.offset[0][d[x].a - 1])
619             == (d[x].b - d[x].a + 1))
620           ignore_white = 1;
621       } else if (d[x].c <= d[x].d){
622         if ((TT.offset[1][d[x].d] - TT.offset[1][d[x].c - 1])
623             == (d[x].d - d[x].c + 1))
624           ignore_white = 1;
625       }
626     }
627 
628     if ((d[x].a <= d[x].b || d[x].c <= d[x].d) && !ignore_white)
629       change = 1; //is we have diff ?
630 
631     if (!ignore_white) d = xrealloc(d, (x + 2) *sizeof(struct diff));
632     i = d[x].b + 1;
633     if (i > file[0].len) break;
634     J[d[x].b] = d[x].d;
635     if (!ignore_white) x++;
636   } while (i <= file[0].len);
637 
638   i = x+1;
639   TT.status = change; //update status, may change bcoz of -w etc.
640 
641   if (!(toys.optflags & FLAG_q) && change) {  //start of !FLAG_q
642     if (toys.optflags & FLAG_color) printf("\033[1m");
643     if (toys.optflags & FLAG_L) printf("--- %s\n", llist->arg);
644     else show_label("---", files[0], &(TT).st[0]);
645     if (((toys.optflags & FLAG_L) && !llist->next) || !(toys.optflags & FLAG_L))
646       show_label("+++", files[1], &(TT).st[1]);
647     else {
648       while (llist->next) llist = llist->next;
649       printf("+++ %s\n", llist->arg);
650     }
651     if (toys.optflags & FLAG_color) printf("\033[0m");
652 
653     struct diff *t, *ptr1 = d, *ptr2 = d;
654     while (i) {
655       long a,b;
656 
657       if (TT.ct > file[0].len) TT.ct = file[0].len; //trim context to file len.
658       if (ptr1->b < ptr1->a && ptr1->d < ptr1->c) {
659         i--;
660         continue;
661       }
662       //Handle the context stuff
663       a =  ptr1->a;
664       b =  ptr1->b;
665 
666       b  = MIN(file[0].len, b);
667       if (i == x + 1) ptr1->suff = MAX(1,a - TT.ct);
668       else {
669         if ((ptr1 - 1)->prev >= (ptr1->a - TT.ct))
670           ptr1->suff = (ptr1 - 1)->prev + 1;
671         else ptr1->suff =  ptr1->a - TT.ct;
672       }
673 calc_ct:
674       if (i > 1) {
675         if ((ptr2->b + TT.ct) >= (ptr2  + 1)->a) {
676           ptr2++;
677           i--;
678           goto calc_ct;
679         } else ptr2->prev = ptr2->b + TT.ct;
680       } else ptr2->prev = ptr2->b;
681       start1 = (ptr2->prev - ptr1->suff + 1);
682       end1 = (start1 == 1) ? -1 : start1;
683       start2 = MAX(1, ptr1->c - (ptr1->a - ptr1->suff));
684       end2 = ptr2->prev - ptr2->b + ptr2->d;
685 
686       if (toys.optflags & FLAG_color) printf("\033[36m");
687       printf("@@ -%ld", start1 ? ptr1->suff: (ptr1->suff -1));
688       if (end1 != -1) printf(",%ld ", ptr2->prev-ptr1->suff + 1);
689       else putchar(' ');
690 
691       printf("+%ld", (end2 - start2 + 1) ? start2: (start2 -1));
692       if ((end2 - start2 +1) != 1) printf(",%ld ", (end2 - start2 +1));
693       else putchar(' ');
694       printf("@@");
695       if (toys.optflags & FLAG_color) printf("\033[0m");
696       putchar('\n');
697 
698       for (t = ptr1; t <= ptr2; t++) {
699         if (t== ptr1) print_diff(t->suff, t->a-1, ' ', TT.offset[0], file[0].fp);
700         print_diff(t->a, t->b, '-', TT.offset[0], file[0].fp);
701         print_diff(t->c, t->d, '+', TT.offset[1], file[1].fp);
702         if (t == ptr2)
703           print_diff(t->b+1, (t)->prev, ' ', TT.offset[0], file[0].fp);
704         else print_diff(t->b+1, (t+1)->a-1, ' ', TT.offset[0], file[0].fp);
705       }
706       ptr2++;
707       ptr1 = ptr2;
708       i--;
709     } //end of while
710   } //End of !FLAG_q
711   free(d);
712   free(J);
713   free(TT.offset[0]);
714   free(TT.offset[1]);
715 }
716 
show_status(char ** files)717 static void show_status(char **files)
718 {
719   switch (TT.status) {
720     case SAME:
721       if (toys.optflags & FLAG_s)
722         printf("Files %s and %s are identical\n",files[0], files[1]);
723       break;
724     case DIFFER:
725       if ((toys.optflags & FLAG_q) || TT.is_binary)
726         printf("Files %s and %s differ\n",files[0], files[1]);
727       break;
728   }
729 }
730 
create_empty_entry(int l,int r,int j)731 static void create_empty_entry(int l , int r, int j)
732 {
733   struct stat st[2];
734   char *f[2], *path[2];
735   int i;
736 
737   if (j > 0 && (toys.optflags & FLAG_N)) {
738     path[0] = concat_file_path(dir[0].list[0], dir[1].list[r] + TT.len[1]);
739     f[0] = "/dev/null";
740     path[1] = f[1] = dir[1].list[r];
741     stat(f[1], &st[0]);
742     st[1] = st[0];
743   }
744   else if (j < 0 && (toys.optflags & FLAG_N)) {
745     path[1] = concat_file_path(dir[1].list[0], dir[0].list[l] + TT.len[0]);
746     f[1] = "/dev/null";
747     path[0] = f[0] = dir[0].list[l];
748     stat(f[0], &st[0]);
749     st[1] = st[0];
750   }
751 
752   if (!j) {
753     for (i = 0; i < 2; i++) {
754       path[i] = f[i] = dir[i].list[!i ? l: r];
755       stat(f[i], &st[i]);
756     }
757   }
758 
759   if (S_ISDIR(st[0].st_mode) && S_ISDIR(st[1].st_mode))
760     printf("Common subdirectories: %s and %s\n", path[0], path[1]);
761   else if (!S_ISREG(st[0].st_mode) && !S_ISDIR(st[0].st_mode))
762     printf("File %s is not a regular file or directory "
763         "and was skipped\n", path[0]);
764   else if (!S_ISREG(st[1].st_mode) && !S_ISDIR(st[1].st_mode))
765     printf("File %s is not a regular file or directory "
766         "and was skipped\n", path[1]);
767   else if (S_ISDIR(st[0].st_mode) != S_ISDIR(st[1].st_mode)) {
768     if (S_ISDIR(st[0].st_mode))
769       printf("File %s is a %s while file %s is a"
770           " %s\n", path[0], "directory", path[1], "regular file");
771     else
772       printf("File %s is a %s while file %s is a"
773           " %s\n", path[0], "regular file", path[1], "directory");
774   } else {
775     do_diff(f);
776     show_status(path);
777     if (file[0].fp) fclose(file[0].fp);
778     if (file[1].fp) fclose(file[1].fp);
779   }
780 
781   if ((toys.optflags & FLAG_N) && j) {
782     if (j > 0) free(path[0]);
783     else free(path[1]);
784   }
785 }
786 
diff_dir(int * start)787 static void diff_dir(int *start)
788 {
789   int l, r, j = 0;
790 
791   l = start[0]; //left side file start
792   r = start[1]; //right side file start
793   while (l < dir[0].nr_elm && r < dir[1].nr_elm) {
794     if ((j = strcmp ((dir[0].list[l] + TT.len[0]),
795             (dir[1].list[r] + TT.len[1]))) && !(toys.optflags & FLAG_N)) {
796       if (j > 0) {
797         printf ("Only in %s: %s\n", dir[1].list[0], dir[1].list[r] + TT.len[1]);
798         free(dir[1].list[r]);
799         r++;
800       } else {
801         printf ("Only in %s: %s\n", dir[0].list[0], dir[0].list[l] + TT.len[0]);
802         free(dir[0].list[l]);
803         l++;
804       }
805       TT.status = DIFFER;
806     } else {
807       create_empty_entry(l, r, j); //create non empty dirs/files if -N.
808       if (j > 0) {
809         free(dir[1].list[r]);
810         r++;
811       } else if (j < 0) {
812         free(dir[0].list[l]);
813         l++;
814       } else {
815         free(dir[1].list[r]);
816         free(dir[0].list[l]);
817         l++;
818         r++;
819       }
820     }
821   }
822 
823   if (l == dir[0].nr_elm) {
824     while (r < dir[1].nr_elm) {
825       if (!(toys.optflags & FLAG_N)) {
826         printf ("Only in %s: %s\n", dir[1].list[0], dir[1].list[r] + TT.len[1]);
827         TT.status = DIFFER;
828       } else create_empty_entry(l, r, 1);
829       free(dir[1].list[r]);
830       r++;
831     }
832   } else if (r == dir[1].nr_elm) {
833     while (l < dir[0].nr_elm) {
834       if (!(toys.optflags & FLAG_N)) {
835         printf ("Only in %s: %s\n", dir[0].list[0], dir[0].list[l] + TT.len[0]);
836         TT.status = DIFFER;
837       } else create_empty_entry(l, r, -1);
838       free(dir[0].list[l]);
839       l++;
840     }
841   }
842   free(dir[0].list[0]); //we are done, free root nodes too
843   free(dir[1].list[0]);
844 }
845 
diff_main(void)846 void diff_main(void)
847 {
848   int j = 0, k = 1, start[2] = {1, 1};
849   char *files[2];
850 
851   toys.exitval = 2;
852 
853   if ((toys.optflags & FLAG_color) && !isatty(1)) toys.optflags ^= FLAG_color;
854 
855   for (j = 0; j < 2; j++) {
856     files[j] = toys.optargs[j];
857     if (IS_STDIN(files[j])) {
858       if (fstat(0, &TT.st[j]) == -1)
859         perror_exit("can't fstat %s", files[j]);
860     } else {
861       xstat(files[j], &TT.st[j]);
862     }
863   }
864 
865   if ((IS_STDIN(files[0]) || IS_STDIN(files[1]))
866       && (S_ISDIR(TT.st[0].st_mode) || S_ISDIR(TT.st[1].st_mode)))
867     error_exit("can't compare stdin to directory");
868 
869   if ((TT.st[0].st_ino == TT.st[1].st_ino) //physicaly same device
870       && (TT.st[0].st_dev == TT.st[1].st_dev)) {
871     toys.exitval = 0;
872     return show_status(files);
873   }
874 
875   if (S_ISDIR(TT.st[0].st_mode) && S_ISDIR(TT.st[1].st_mode)) {
876     for (j = 0; j < 2; j++) {
877       memset(&dir[j], 0, sizeof(struct dir_t));
878       dirtree_flagread(files[j], DIRTREE_SYMFOLLOW, list_dir);
879       dir[j].nr_elm = TT.size; //size updated in list_dir
880       qsort(&(dir[j].list[1]), (TT.size - 1), sizeof(char*), cmp);
881 
882       TT.len[j] = strlen(dir[j].list[0]); //calc root node len
883       TT.len[j] += (dir[j].list[0][TT.len[j] -1] != '/');
884 
885       if (toys.optflags & FLAG_S) {
886         while (k < TT.size && strcmp(dir[j].list[k] +
887               TT.len[j], TT.start) < 0) {
888           start[j] += 1;
889           k++;
890         }
891       }
892       TT.dir_num++;
893       TT.size = 0;
894       k = 1;
895     }
896     diff_dir(start);
897     free(dir[0].list); //free array
898     free(dir[1].list);
899   } else {
900     if (S_ISDIR(TT.st[0].st_mode) || S_ISDIR(TT.st[1].st_mode)) {
901       int d = S_ISDIR(TT.st[0].st_mode);
902       char *slash = strrchr(files[d], '/');
903 
904       files[1 - d] = concat_file_path(files[1 - d], slash ? slash + 1 : files[d]);
905       if ((stat(files[1 - d], &TT.st[1 - d])) == -1)
906         perror_exit("%s", files[1 - d]);
907     }
908     do_diff(files);
909     show_status(files);
910     if (file[0].fp) fclose(file[0].fp);
911     if (file[1].fp) fclose(file[1].fp);
912   }
913   toys.exitval = TT.status; //exit status will be the status
914 }
915