1 /* sort.c - put input lines into order
2  *
3  * Copyright 2004, 2008 Rob Landley <rob@landley.net>
4  *
5  * See http://opengroup.org/onlinepubs/007904975/utilities/sort.html
6 
7 USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")USE_SORT_BIG("S:T:m" "o:k*t:xbMcszdfi") "run", TOYFLAG_USR|TOYFLAG_BIN))
8 
9 config SORT
10   bool "sort"
11   default y
12   help
13     usage: sort [-run] [FILE...]
14 
15     Sort all lines of text from input files (or stdin) to stdout.
16 
17     -r	reverse
18     -u	unique lines only
19     -n	numeric order (instead of alphabetical)
20 
21 config SORT_BIG
22   bool "SuSv3 options (Support -ktcsbdfiozM)"
23   default y
24   depends on SORT
25   help
26     usage: sort [-bcdfiMsz] [-k#[,#[x]] [-t X]] [-o FILE]
27 
28     -b	ignore leading blanks (or trailing blanks in second part of key)
29     -c	check whether input is sorted
30     -d	dictionary order (use alphanumeric and whitespace chars only)
31     -f	force uppercase (case insensitive sort)
32     -i	ignore nonprinting characters
33     -M	month sort (jan, feb, etc).
34     -x	Hexadecimal numerical sort
35     -s	skip fallback sort (only sort with keys)
36     -z	zero (null) terminated input
37     -k	sort by "key" (see below)
38     -t	use a key separator other than whitespace
39     -o	output to FILE instead of stdout
40 
41     Sorting by key looks at a subset of the words on each line.  -k2
42     uses the second word to the end of the line, -k2,2 looks at only
43     the second word, -k2,4 looks from the start of the second to the end
44     of the fourth word.  Specifying multiple keys uses the later keys as
45     tie breakers, in order.  A type specifier appended to a sort key
46     (such as -2,2n) applies only to sorting that key.
47 
48 config SORT_FLOAT
49   bool
50   default y
51   depends on SORT_BIG && TOYBOX_FLOAT
52   help
53     usage: sort [-g]
54 
55     -g	general numeric sort (double precision with nan and inf)
56 */
57 
58 #define FOR_sort
59 #include "toys.h"
60 
61 GLOBALS(
62   char *key_separator;
63   struct arg_list *raw_keys;
64   char *outfile;
65   char *ignore1, ignore2;   // GNU compatability NOPs for -S and -T.
66 
67   void *key_list;
68   int linecount;
69   char **lines;
70 )
71 
72 // The sort types are n, g, and M.
73 // u, c, s, and z apply to top level only, not to keys.
74 // b at top level implies bb.
75 // The remaining options can be applied to search keys.
76 
77 #define FLAG_bb (1<<31)  // Ignore trailing blanks
78 
79 struct sort_key
80 {
81   struct sort_key *next_key;  // linked list
82   unsigned range[4];          // start word, start char, end word, end char
83   int flags;
84 };
85 
86 // Copy of the part of this string corresponding to a key/flags.
87 
get_key_data(char * str,struct sort_key * key,int flags)88 static char *get_key_data(char *str, struct sort_key *key, int flags)
89 {
90   int start=0, end, len, i, j;
91 
92   // Special case whole string, so we don't have to make a copy
93 
94   if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
95     && !(flags&(FLAG_b&FLAG_d&FLAG_f&FLAG_i&FLAG_bb))) return str;
96 
97   // Find start of key on first pass, end on second pass
98 
99   len = strlen(str);
100   for (j=0; j<2; j++) {
101     if (!key->range[2*j]) end=len;
102 
103     // Loop through fields
104     else {
105       end=0;
106       for (i=1; i < key->range[2*j]+j; i++) {
107 
108         // Skip leading blanks
109         if (str[end] && !TT.key_separator)
110           while (isspace(str[end])) end++;
111 
112         // Skip body of key
113         for (; str[end]; end++) {
114           if (TT.key_separator) {
115             if (str[end]==*TT.key_separator) break;
116           } else if (isspace(str[end])) break;
117         }
118       }
119     }
120     if (!j) start=end;
121   }
122 
123   // Key with explicit separator starts after the separator
124   if (TT.key_separator && str[start]==*TT.key_separator) start++;
125 
126   // Strip leading and trailing whitespace if necessary
127   if (flags&FLAG_b) while (isspace(str[start])) start++;
128   if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--;
129 
130   // Handle offsets on start and end
131   if (key->range[3]) {
132     end += key->range[3]-1;
133     if (end>len) end=len;
134   }
135   if (key->range[1]) {
136     start += key->range[1]-1;
137     if (start>len) start=len;
138   }
139 
140   // Make the copy
141   if (end<start) end=start;
142   str = xstrndup(str+start, end-start);
143 
144   // Handle -d
145   if (flags&FLAG_d) {
146     for (start = end = 0; str[end]; end++)
147       if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end];
148     str[start] = 0;
149   }
150 
151   // Handle -i
152   if (flags&FLAG_i) {
153     for (start = end = 0; str[end]; end++)
154       if (isprint(str[end])) str[start++] = str[end];
155     str[start] = 0;
156   }
157 
158   // Handle -f
159   if (flags*FLAG_f) for(i=0; str[i]; i++) str[i] = toupper(str[i]);
160 
161   return str;
162 }
163 
164 // append a sort_key to key_list.
165 
add_key(void)166 static struct sort_key *add_key(void)
167 {
168   void **stupid_compiler = &TT.key_list;
169   struct sort_key **pkey = (struct sort_key **)stupid_compiler;
170 
171   while (*pkey) pkey = &((*pkey)->next_key);
172   return *pkey = xzalloc(sizeof(struct sort_key));
173 }
174 
175 // Perform actual comparison
compare_values(int flags,char * x,char * y)176 static int compare_values(int flags, char *x, char *y)
177 {
178   int ff = flags & (FLAG_n|FLAG_g|FLAG_M|FLAG_x);
179 
180   // Ascii sort
181   if (!ff) return strcmp(x, y);
182 
183   if (CFG_SORT_FLOAT && ff == FLAG_g) {
184     char *xx,*yy;
185     double dx = strtod(x,&xx), dy = strtod(y,&yy);
186     int xinf, yinf;
187 
188     // not numbers < NaN < -infinity < numbers < +infinity
189 
190     if (x==xx) return y==yy ? 0 : -1;
191     if (y==yy) return 1;
192 
193     // Check for isnan
194     if (dx!=dx) return (dy!=dy) ? 0 : -1;
195     if (dy!=dy) return 1;
196 
197     // Check for infinity.  (Could underflow, but avoids needing libm.)
198     xinf = (1.0/dx == 0.0);
199     yinf = (1.0/dy == 0.0);
200     if (xinf) {
201       if(dx<0) return (yinf && dy<0) ? 0 : -1;
202       return (yinf && dy>0) ? 0 : 1;
203     }
204     if (yinf) return dy<0 ? 1 : -1;
205 
206     return dx>dy ? 1 : (dx<dy ? -1 : 0);
207   } else if (CFG_SORT_BIG && ff == FLAG_M) {
208     struct tm thyme;
209     int dx;
210     char *xx,*yy;
211 
212     xx = strptime(x,"%b",&thyme);
213     dx = thyme.tm_mon;
214     yy = strptime(y,"%b",&thyme);
215     if (!xx) return !yy ? 0 : -1;
216     else if (!yy) return 1;
217     else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon;
218 
219   } else if (CFG_SORT_BIG && ff == FLAG_x) {
220     return strtol(x, NULL, 16)-strtol(y, NULL, 16);
221   // This has to be ff == FLAG_n
222   } else {
223     // Full floating point version of -n
224     if (CFG_SORT_FLOAT) {
225       double dx = atof(x), dy = atof(y);
226 
227       return dx>dy ? 1 : (dx<dy ? -1 : 0);
228     // Integer version of -n for tiny systems
229     } else return atoi(x)-atoi(y);
230   }
231 }
232 
233 // Callback from qsort(): Iterate through key_list and perform comparisons.
compare_keys(const void * xarg,const void * yarg)234 static int compare_keys(const void *xarg, const void *yarg)
235 {
236   int flags = toys.optflags, retval = 0;
237   char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg;
238   struct sort_key *key;
239 
240   if (CFG_SORT_BIG) {
241     for (key=(struct sort_key *)TT.key_list; !retval && key;
242        key = key->next_key)
243     {
244       flags = key->flags ? key->flags : toys.optflags;
245 
246       // Chop out and modify key chunks, handling -dfib
247 
248       x = get_key_data(xx, key, flags);
249       y = get_key_data(yy, key, flags);
250 
251       retval = compare_values(flags, x, y);
252 
253       // Free the copies get_key_data() made.
254 
255       if (x != xx) free(x);
256       if (y != yy) free(y);
257 
258       if (retval) break;
259     }
260   } else retval = compare_values(flags, xx, yy);
261 
262   // Perform fallback sort if necessary
263   if (!retval && !(CFG_SORT_BIG && (toys.optflags&FLAG_s))) {
264     retval = strcmp(xx, yy);
265     flags = toys.optflags;
266   }
267 
268   return retval * ((flags&FLAG_r) ? -1 : 1);
269 }
270 
271 // Callback from loopfiles to handle input files.
sort_read(int fd,char * name)272 static void sort_read(int fd, char *name)
273 {
274   // Read each line from file, appending to a big array.
275 
276   for (;;) {
277     char * line = (CFG_SORT_BIG && (toys.optflags&FLAG_z))
278              ? get_rawline(fd, NULL, 0) : get_line(fd);
279 
280     if (!line) break;
281 
282     // handle -c here so we don't allocate more memory than necessary.
283     if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) {
284       int j = (toys.optflags&FLAG_u) ? -1 : 0;
285 
286       if (TT.lines && compare_keys((void *)&TT.lines, &line)>j)
287         error_exit("%s: Check line %d\n", name, TT.linecount);
288       free(TT.lines);
289       TT.lines = (char **)line;
290     } else {
291       if (!(TT.linecount&63))
292         TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64));
293       TT.lines[TT.linecount] = line;
294     }
295     TT.linecount++;
296   }
297 }
298 
sort_main(void)299 void sort_main(void)
300 {
301   int idx, fd = 1;
302 
303   // Open output file if necessary.
304   if (CFG_SORT_BIG && TT.outfile)
305     fd = xcreate(TT.outfile, O_CREAT|O_TRUNC|O_WRONLY, 0666);
306 
307   // Parse -k sort keys.
308   if (CFG_SORT_BIG && TT.raw_keys) {
309     struct arg_list *arg;
310 
311     for (arg = TT.raw_keys; arg; arg = arg->next) {
312       struct sort_key *key = add_key();
313       char *temp;
314       int flag;
315 
316       idx = 0;
317       temp = arg->arg;
318       while (*temp) {
319         // Start of range
320         key->range[2*idx] = (unsigned)strtol(temp, &temp, 10);
321         if (*temp=='.')
322           key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10);
323 
324         // Handle flags appended to a key type.
325         for (;*temp;temp++) {
326           char *temp2, *optlist;
327 
328           // Note that a second comma becomes an "Unknown key" error.
329 
330           if (*temp==',' && !idx++) {
331             temp++;
332             break;
333           }
334 
335           // Which flag is this?
336 
337           optlist = toys.which->options;
338           temp2 = strchr(optlist, *temp);
339           flag = (1<<(optlist-temp2+strlen(optlist)-1));
340 
341           // Was it a flag that can apply to a key?
342 
343           if (!temp2 || flag>FLAG_b
344             || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z)))
345           {
346             error_exit("Unknown key option.");
347           }
348           // b after , means strip _trailing_ space, not leading.
349           if (idx && flag==FLAG_b) flag = FLAG_bb;
350           key->flags |= flag;
351         }
352       }
353     }
354   }
355 
356   // global b flag strips both leading and trailing spaces
357   if (toys.optflags&FLAG_b) toys.optflags |= FLAG_bb;
358 
359   // If no keys, perform alphabetic sort over the whole line.
360   if (CFG_SORT_BIG && !TT.key_list) add_key()->range[0] = 1;
361 
362   // Open input files and read data, populating TT.lines[TT.linecount]
363   loopfiles(toys.optargs, sort_read);
364 
365   // The compare (-c) logic was handled in sort_read(),
366   // so if we got here, we're done.
367   if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) goto exit_now;
368 
369   // Perform the actual sort
370   qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys);
371 
372   // handle unique (-u)
373   if (toys.optflags&FLAG_u) {
374     int jdx;
375 
376     for (jdx=0, idx=1; idx<TT.linecount; idx++) {
377       if (!compare_keys(&TT.lines[jdx], &TT.lines[idx]))
378         free(TT.lines[idx]);
379       else TT.lines[++jdx] = TT.lines[idx];
380     }
381     if (TT.linecount) TT.linecount = jdx+1;
382   }
383 
384   // Output result
385   for (idx = 0; idx<TT.linecount; idx++) {
386     char *s = TT.lines[idx];
387     xwrite(fd, s, strlen(s));
388     if (CFG_TOYBOX_FREE) free(s);
389     xwrite(fd, "\n", 1);
390   }
391 
392 exit_now:
393   if (CFG_TOYBOX_FREE) {
394     if (fd != 1) close(fd);
395     free(TT.lines);
396   }
397 }
398