1 /* sort.c - put input lines into order
2  *
3  * Copyright 2004, 2008 Rob Landley <rob@landley.net>
4  *
5  * See http://opengroup.org/onlinepubs/007904975/utilities/sort.html
6  *
7  * Deviations from POSIX: Lots.
8  * We invented -x
9 
10 USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")"S:T:m" "o:k*t:" "xVbMcszdfirun", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
11 
12 config SORT
13   bool "sort"
14   default y
15   help
16     usage: sort [-runbcdfiMsz] [FILE...] [-k#[,#[x]] [-t X]] [-o FILE]
17 
18     Sort all lines of text from input files (or stdin) to stdout.
19 
20     -r	Reverse
21     -u	Unique lines only
22     -n	Numeric order (instead of alphabetical)
23     -b	Ignore leading blanks (or trailing blanks in second part of key)
24     -c	Check whether input is sorted
25     -d	Dictionary order (use alphanumeric and whitespace chars only)
26     -f	Force uppercase (case insensitive sort)
27     -i	Ignore nonprinting characters
28     -M	Month sort (jan, feb, etc)
29     -x	Hexadecimal numerical sort
30     -s	Skip fallback sort (only sort with keys)
31     -z	Zero (null) terminated lines
32     -k	Sort by "key" (see below)
33     -t	Use a key separator other than whitespace
34     -o	Output to FILE instead of stdout
35     -V	Version numbers (name-1.234-rc6.5b.tgz)
36 
37     Sorting by key looks at a subset of the words on each line. -k2 uses the
38     second word to the end of the line, -k2,2 looks at only the second word,
39     -k2,4 looks from the start of the second to the end of the fourth word.
40     -k2.4,5 starts from the fourth character of the second word, to the end
41     of the fifth word. Specifying multiple keys uses the later keys as tie
42     breakers, in order. A type specifier appended to a sort key (such as -2,2n)
43     applies only to sorting that key.
44 
45 config SORT_FLOAT
46   bool
47   default y
48   depends on TOYBOX_FLOAT
49   help
50     usage: sort [-g]
51 
52     -g	General numeric sort (double precision with nan and inf)
53 */
54 
55 #define FOR_sort
56 #include "toys.h"
57 
58 GLOBALS(
59   char *t;
60   struct arg_list *k;
61   char *o, *T, S;
62 
63   void *key_list;
64   int linecount;
65   char **lines;
66 )
67 
68 // The sort types are n, g, and M.
69 // u, c, s, and z apply to top level only, not to keys.
70 // b at top level implies bb.
71 // The remaining options can be applied to search keys.
72 
73 #define FLAG_bb (1<<31)  // Ignore trailing blanks
74 
75 struct sort_key
76 {
77   struct sort_key *next_key;  // linked list
78   unsigned range[4];          // start word, start char, end word, end char
79   int flags;
80 };
81 
82 // Copy of the part of this string corresponding to a key/flags.
83 
get_key_data(char * str,struct sort_key * key,int flags)84 static char *get_key_data(char *str, struct sort_key *key, int flags)
85 {
86   int start=0, end, len, i, j;
87 
88   // Special case whole string, so we don't have to make a copy
89 
90   if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
91     && !(flags&(FLAG_b|FLAG_d|FLAG_i|FLAG_bb))) return str;
92 
93   // Find start of key on first pass, end on second pass
94 
95   len = strlen(str);
96   for (j=0; j<2; j++) {
97     if (!key->range[2*j]) end=len;
98 
99     // Loop through fields
100     else {
101       end=0;
102       for (i=1; i < key->range[2*j]+j; i++) {
103 
104         // Skip leading blanks
105         if (str[end] && !TT.t) while (isspace(str[end])) end++;
106 
107         // Skip body of key
108         for (; str[end]; end++) {
109           if (TT.t) {
110             if (str[end]==*TT.t) {
111               end++;
112               break;
113             }
114           } else if (isspace(str[end])) break;
115         }
116       }
117     }
118     if (!j) start=end;
119   }
120 
121   // Key with explicit separator starts after the separator
122   if (TT.t && str[start]==*TT.t) start++;
123 
124   // Strip leading and trailing whitespace if necessary
125   if ((flags&FLAG_b) || (!TT.t && !key->range[3]))
126     while (isspace(str[start])) start++;
127   if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--;
128 
129   // Handle offsets on start and end
130   if (key->range[3]) {
131     end += key->range[3]-1;
132     if (end>len) end=len;
133   }
134   if (key->range[1]) {
135     start += key->range[1]-1;
136     if (start>len) start=len;
137   }
138 
139   // Make the copy
140   if (end<start) end=start;
141   str = xstrndup(str+start, end-start);
142 
143   // Handle -d
144   if (flags&FLAG_d) {
145     for (start = end = 0; str[end]; end++)
146       if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end];
147     str[start] = 0;
148   }
149 
150   // Handle -i
151   if (flags&FLAG_i) {
152     for (start = end = 0; str[end]; end++)
153       if (isprint(str[end])) str[start++] = str[end];
154     str[start] = 0;
155   }
156 
157   return str;
158 }
159 
160 // append a sort_key to key_list.
161 
add_key(void)162 static struct sort_key *add_key(void)
163 {
164   void **stupid_compiler = &TT.key_list;
165   struct sort_key **pkey = (struct sort_key **)stupid_compiler;
166 
167   while (*pkey) pkey = &((*pkey)->next_key);
168   return *pkey = xzalloc(sizeof(struct sort_key));
169 }
170 
171 // Perform actual comparison
compare_values(int flags,char * x,char * y)172 static int compare_values(int flags, char *x, char *y)
173 {
174   if (CFG_SORT_FLOAT && (flags & FLAG_g)) {
175     char *xx,*yy;
176     double dx = strtod(x,&xx), dy = strtod(y,&yy);
177     int xinf, yinf;
178 
179     // not numbers < NaN < -infinity < numbers < +infinity
180 
181     if (x==xx) return y==yy ? 0 : -1;
182     if (y==yy) return 1;
183 
184     // Check for isnan
185     if (dx!=dx) return (dy!=dy) ? 0 : -1;
186     if (dy!=dy) return 1;
187 
188     // Check for infinity.  (Could underflow, but avoids needing libm.)
189     xinf = (1.0/dx == 0.0);
190     yinf = (1.0/dy == 0.0);
191     if (xinf) {
192       if(dx<0) return (yinf && dy<0) ? 0 : -1;
193       return (yinf && dy>0) ? 0 : 1;
194     }
195     if (yinf) return dy<0 ? 1 : -1;
196 
197     return dx>dy ? 1 : (dx<dy ? -1 : 0);
198   } else if (flags & FLAG_M) {
199     struct tm thyme;
200     int dx;
201     char *xx,*yy;
202 
203     xx = strptime(x,"%b",&thyme);
204     dx = thyme.tm_mon;
205     yy = strptime(y,"%b",&thyme);
206     if (!xx) return !yy ? 0 : -1;
207     else if (!yy) return 1;
208     else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon;
209 
210   } else if (flags & FLAG_x) return strtol(x, NULL, 16)-strtol(y, NULL, 16);
211   else if (flags & FLAG_V) {
212     while (*x && *y) {
213       while (*x && *x == *y) x++, y++;
214       if (isdigit(*x) && isdigit(*y)) {
215         long long xx = strtoll(x, &x, 10), yy = strtoll(y, &y, 10);
216 
217         if (xx<yy) return -1;
218         if (xx>yy) return 1;
219       } else {
220         char xx = *x ? *x : x[-1], yy = *y ? *y : y[-1];
221 
222         // -rc/-pre hack so abc-123 > abc-123-rc1 (other way already - < 0-9)
223         if (xx != yy) {
224           if (xx<yy && !strstart(&y, "-rc") && !strstart(&y, "-pre")) return -1;
225           else return 1;
226         }
227       }
228     }
229     return *x ? !!*y : -1;
230   } else if (flags & FLAG_n) {
231     // Full floating point version of -n
232     if (CFG_SORT_FLOAT) {
233       double dx = atof(x), dy = atof(y);
234 
235       return dx>dy ? 1 : (dx<dy ? -1 : 0);
236     // Integer version of -n for tiny systems
237     } else return atoi(x)-atoi(y);
238 
239   // Ascii sort
240   } else return ((flags&FLAG_f) ? strcasecmp : strcmp)(x, y);
241 }
242 
243 // Callback from qsort(): Iterate through key_list and perform comparisons.
compare_keys(const void * xarg,const void * yarg)244 static int compare_keys(const void *xarg, const void *yarg)
245 {
246   int flags = toys.optflags, retval = 0;
247   char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg;
248   struct sort_key *key;
249 
250   for (key=(struct sort_key *)TT.key_list; !retval && key; key = key->next_key){
251     flags = key->flags ? key->flags : toys.optflags;
252 
253     // Chop out and modify key chunks, handling -dfib
254 
255     x = get_key_data(xx, key, flags);
256     y = get_key_data(yy, key, flags);
257 
258     retval = compare_values(flags, x, y);
259 
260     // Free the copies get_key_data() made.
261 
262     if (x != xx) free(x);
263     if (y != yy) free(y);
264 
265     if (retval) break;
266   }
267 
268   // Perform fallback sort if necessary (always case insensitive, no -f,
269   // the point is to get a stable order even for -f sorts)
270   if (!retval && !FLAG(s)) {
271     flags = toys.optflags;
272     retval = strcmp(xx, yy);
273   }
274 
275   return retval * ((flags&FLAG_r) ? -1 : 1);
276 }
277 
278 // Callback from loopfiles to handle input files.
sort_read(int fd,char * name)279 static void sort_read(int fd, char *name)
280 {
281   // Read each line from file, appending to a big array.
282 
283   for (;;) {
284     char * line = FLAG(z) ? get_rawline(fd, NULL, 0) : get_line(fd);
285 
286     if (!line) break;
287 
288     // handle -c here so we don't allocate more memory than necessary.
289     if (FLAG(c)) {
290       int j = FLAG(u) ? -1 : 0;
291 
292       if (TT.lines && compare_keys((void *)&TT.lines, &line)>j)
293         error_exit("%s: Check line %d\n", name, TT.linecount);
294       free(TT.lines);
295       TT.lines = (char **)line;
296     } else {
297       if (!(TT.linecount&63))
298         TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64));
299       TT.lines[TT.linecount] = line;
300     }
301     TT.linecount++;
302   }
303 }
304 
sort_main(void)305 void sort_main(void)
306 {
307   int idx, fd = 1;
308 
309   // Parse -k sort keys.
310   if (TT.k) {
311     struct arg_list *arg;
312 
313     for (arg = TT.k; arg; arg = arg->next) {
314       struct sort_key *key = add_key();
315       char *temp;
316       int flag;
317 
318       idx = 0;
319       temp = arg->arg;
320       while (*temp) {
321         // Start of range
322         key->range[2*idx] = (unsigned)strtol(temp, &temp, 10);
323         if (*temp=='.')
324           key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10);
325 
326         // Handle flags appended to a key type.
327         for (;*temp;temp++) {
328           char *temp2, *optlist;
329 
330           // Note that a second comma becomes an "Unknown key" error.
331 
332           if (*temp==',' && !idx++) {
333             temp++;
334             break;
335           }
336 
337           // Which flag is this?
338 
339           optlist = toys.which->options;
340           temp2 = strchr(optlist, *temp);
341           flag = (1<<(optlist-temp2+strlen(optlist)-1));
342 
343           // Was it a flag that can apply to a key?
344 
345           if (!temp2 || flag>FLAG_x
346             || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z)))
347           {
348             toys.exitval = 2;
349             error_exit("Unknown key option.");
350           }
351           // b after , means strip _trailing_ space, not leading.
352           if (idx && flag==FLAG_b) flag = FLAG_bb;
353           key->flags |= flag;
354         }
355       }
356     }
357   }
358 
359   // global b flag strips both leading and trailing spaces
360   if (FLAG(b)) toys.optflags |= FLAG_bb;
361 
362   // If no keys, perform alphabetic sort over the whole line.
363   if (!TT.key_list) add_key()->range[0] = 1;
364 
365   // Open input files and read data, populating TT.lines[TT.linecount]
366   loopfiles(toys.optargs, sort_read);
367 
368   // The compare (-c) logic was handled in sort_read(),
369   // so if we got here, we're done.
370   if (FLAG(c)) goto exit_now;
371 
372   // Perform the actual sort
373   qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys);
374 
375   // handle unique (-u)
376   if (FLAG(u)) {
377     int jdx;
378 
379     for (jdx=0, idx=1; idx<TT.linecount; idx++) {
380       if (!compare_keys(&TT.lines[jdx], &TT.lines[idx]))
381         free(TT.lines[idx]);
382       else TT.lines[++jdx] = TT.lines[idx];
383     }
384     if (TT.linecount) TT.linecount = jdx+1;
385   }
386 
387   // Open output file if necessary. We can't do this until we've finished
388   // reading in case the output file is one of the input files.
389   if (TT.o) fd = xcreate(TT.o, O_CREAT|O_TRUNC|O_WRONLY, 0666);
390 
391   // Output result
392   for (idx = 0; idx<TT.linecount; idx++) {
393     char *s = TT.lines[idx];
394     unsigned i = strlen(s);
395 
396     if (!FLAG(z)) s[i] = '\n';
397     xwrite(fd, s, i+1);
398     if (CFG_TOYBOX_FREE) free(s);
399   }
400 
401 exit_now:
402   if (CFG_TOYBOX_FREE) {
403     if (fd != 1) close(fd);
404     free(TT.lines);
405   }
406 }
407