1 /* uniq.c - report or filter out repeated lines in a file
2  *
3  * Copyright 2012 Georgi Chorbadzhiyski <georgi@unixsol.org>
4  *
5  * See http://opengroup.org/onlinepubs/9699919799/utilities/uniq.html
6 
7 USE_UNIQ(NEWTOY(uniq, "f#s#w#zicdu", TOYFLAG_USR|TOYFLAG_BIN))
8 
9 config UNIQ
10   bool "uniq"
11   default y
12   help
13     usage: uniq [-cduiz] [-w maxchars] [-f fields] [-s char] [input_file [output_file]]
14 
15     Report or filter out repeated lines in a file
16 
17     -c	show counts before each line
18     -d	show only lines that are repeated
19     -u	show only lines that are unique
20     -i	ignore case when comparing lines
21     -z	lines end with \0 not \n
22     -w	compare maximum X chars per line
23     -f	ignore first X fields
24     -s	ignore first X chars
25 */
26 
27 #define FOR_uniq
28 #include "toys.h"
29 
GLOBALS(long maxchars;long nchars;long nfields;long repeats;)30 GLOBALS(
31   long maxchars;
32   long nchars;
33   long nfields;
34   long repeats;
35 )
36 
37 static char *skip(char *str)
38 {
39   long nchars = TT.nchars, nfields;
40 
41   // Skip fields first
42   for (nfields = TT.nfields; nfields; str++) {
43     while (*str && isspace(*str)) str++;
44     while (*str && !isspace(*str)) str++;
45     nfields--;
46   }
47   // Skip chars
48   while (*str && nchars--) str++;
49 
50   return str;
51 }
52 
print_line(FILE * f,char * line)53 static void print_line(FILE *f, char *line)
54 {
55   if (toys.optflags & (TT.repeats ? FLAG_u : FLAG_d)) return;
56   if (toys.optflags & FLAG_c) fprintf(f, "%7lu ", TT.repeats + 1);
57   fputs(line, f);
58   if (toys.optflags & FLAG_z) fputc(0, f);
59 }
60 
uniq_main(void)61 void uniq_main(void)
62 {
63   FILE *infile = stdin, *outfile = stdout;
64   char *thisline = NULL, *prevline = NULL, *tmpline, eol = '\n';
65   size_t thissize, prevsize = 0, tmpsize;
66 
67   if (toys.optc >= 1) infile = xfopen(toys.optargs[0], "r");
68   if (toys.optc >= 2) outfile = xfopen(toys.optargs[1], "w");
69 
70   if (toys.optflags & FLAG_z) eol = 0;
71 
72   // If first line can't be read
73   if (getdelim(&prevline, &prevsize, eol, infile) < 0)
74     return;
75 
76   while (getdelim(&thisline, &thissize, eol, infile) > 0) {
77     int diff;
78     char *t1, *t2;
79 
80     // If requested get the chosen fields + character offsets.
81     if (TT.nfields || TT.nchars) {
82       t1 = skip(thisline);
83       t2 = skip(prevline);
84     } else {
85       t1 = thisline;
86       t2 = prevline;
87     }
88 
89     if (TT.maxchars == 0) {
90       diff = !(toys.optflags & FLAG_i) ? strcmp(t1, t2) : strcasecmp(t1, t2);
91     } else {
92       diff = !(toys.optflags & FLAG_i) ? strncmp(t1, t2, TT.maxchars)
93               : strncasecmp(t1, t2, TT.maxchars);
94     }
95 
96     if (diff == 0) { // same
97       TT.repeats++;
98     } else {
99       print_line(outfile, prevline);
100 
101       TT.repeats = 0;
102 
103       tmpline = prevline;
104       prevline = thisline;
105       thisline = tmpline;
106 
107       tmpsize = prevsize;
108       prevsize = thissize;
109       thissize = tmpsize;
110     }
111   }
112 
113   print_line(outfile, prevline);
114 
115   if (CFG_TOYBOX_FREE) {
116     if (outfile != stdout) fclose(outfile);
117     if (infile != stdin) fclose(infile);
118     free(prevline);
119     free(thisline);
120   }
121 }
122