1 /* uniq.c - report or filter out repeated lines in a file
2  *
3  * Copyright 2012 Georgi Chorbadzhiyski <georgi@unixsol.org>
4  *
5  * See http://opengroup.org/onlinepubs/9699919799/utilities/uniq.html
6 
7 USE_UNIQ(NEWTOY(uniq, "f#s#w#zicdu", TOYFLAG_USR|TOYFLAG_BIN))
8 
9 config UNIQ
10   bool "uniq"
11   default y
12   help
13     usage: uniq [-cduiz] [-w maxchars] [-f fields] [-s char] [input_file [output_file]]
14 
15     Report or filter out repeated lines in a file
16 
17     -c	Show counts before each line
18     -d	Show only lines that are repeated
19     -u	Show only lines that are unique
20     -i	Ignore case when comparing lines
21     -z	Lines end with \0 not \n
22     -w	Compare maximum X chars per line
23     -f	Ignore first X fields
24     -s	Ignore first X chars
25 */
26 
27 #define FOR_uniq
28 #include "toys.h"
29 
GLOBALS(long w,s,f;long repeats;)30 GLOBALS(
31   long w, s, f;
32 
33   long repeats;
34 )
35 
36 static char *skip(char *str)
37 {
38   long nchars = TT.s, nfields = TT.f;
39 
40   // Skip fields first
41   while (nfields--) {
42     while (*str && isspace(*str)) str++;
43     while (*str && !isspace(*str)) str++;
44   }
45   // Skip chars
46   while (*str && nchars--) str++;
47 
48   return str;
49 }
50 
print_line(FILE * f,char * line)51 static void print_line(FILE *f, char *line)
52 {
53   if (TT.repeats ? FLAG(u) : FLAG(d)) return;
54   if (FLAG(c)) fprintf(f, "%7lu ", TT.repeats + 1);
55   fputs(line, f);
56   if (FLAG(z)) fputc(0, f);
57 }
58 
uniq_main(void)59 void uniq_main(void)
60 {
61   FILE *infile = stdin, *outfile = stdout;
62   char *thisline = 0, *prevline = 0, *tmpline, eol = '\n';
63   size_t thissize, prevsize = 0, tmpsize;
64 
65   if (toys.optc >= 1) infile = xfopen(toys.optargs[0], "r");
66   if (toys.optc >= 2) outfile = xfopen(toys.optargs[1], "w");
67 
68   if (FLAG(z)) eol = 0;
69 
70   // If first line can't be read
71   if (getdelim(&prevline, &prevsize, eol, infile) < 0) return;
72 
73   while (getdelim(&thisline, &thissize, eol, infile) > 0) {
74     int diff;
75     char *t1, *t2;
76 
77     // If requested get the chosen fields + character offsets.
78     if (TT.f || TT.s) {
79       t1 = skip(thisline);
80       t2 = skip(prevline);
81     } else {
82       t1 = thisline;
83       t2 = prevline;
84     }
85 
86     if (!TT.w)
87       diff = !FLAG(i) ? strcmp(t1, t2) : strcasecmp(t1, t2);
88     else diff = !FLAG(i) ? strncmp(t1, t2, TT.w) : strncasecmp(t1, t2, TT.w);
89 
90     if (!diff) TT.repeats++;
91     else {
92       print_line(outfile, prevline);
93 
94       TT.repeats = 0;
95 
96       tmpline = prevline;
97       prevline = thisline;
98       thisline = tmpline;
99 
100       tmpsize = prevsize;
101       prevsize = thissize;
102       thissize = tmpsize;
103     }
104   }
105 
106   print_line(outfile, prevline);
107 
108   if (CFG_TOYBOX_FREE) {
109     if (outfile != stdout) fclose(outfile);
110     if (infile != stdin) fclose(infile);
111     free(prevline);
112     free(thisline);
113   }
114 }
115