1 /* sort.c - put input lines into order
2 *
3 * Copyright 2004, 2008 Rob Landley <rob@landley.net>
4 *
5 * See http://opengroup.org/onlinepubs/007904975/utilities/sort.html
6 *
7 * Deviations from POSIX: Lots.
8 * We invented -x
9
10 USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")USE_SORT_BIG("S:T:m" "o:k*t:xbMcszdfi") "run", TOYFLAG_USR|TOYFLAG_BIN))
11
12 config SORT
13 bool "sort"
14 default y
15 help
16 usage: sort [-run] [FILE...]
17
18 Sort all lines of text from input files (or stdin) to stdout.
19
20 -r reverse
21 -u unique lines only
22 -n numeric order (instead of alphabetical)
23
24 config SORT_BIG
25 bool "SuSv3 options (Support -ktcsbdfiozM)"
26 default y
27 depends on SORT
28 help
29 usage: sort [-bcdfiMsz] [-k#[,#[x]] [-t X]] [-o FILE]
30
31 -b ignore leading blanks (or trailing blanks in second part of key)
32 -c check whether input is sorted
33 -d dictionary order (use alphanumeric and whitespace chars only)
34 -f force uppercase (case insensitive sort)
35 -i ignore nonprinting characters
36 -M month sort (jan, feb, etc).
37 -x Hexadecimal numerical sort
38 -s skip fallback sort (only sort with keys)
39 -z zero (null) terminated lines
40 -k sort by "key" (see below)
41 -t use a key separator other than whitespace
42 -o output to FILE instead of stdout
43
44 Sorting by key looks at a subset of the words on each line. -k2
45 uses the second word to the end of the line, -k2,2 looks at only
46 the second word, -k2,4 looks from the start of the second to the end
47 of the fourth word. Specifying multiple keys uses the later keys as
48 tie breakers, in order. A type specifier appended to a sort key
49 (such as -2,2n) applies only to sorting that key.
50
51 config SORT_FLOAT
52 bool
53 default y
54 depends on SORT_BIG && TOYBOX_FLOAT
55 help
56 usage: sort [-g]
57
58 -g general numeric sort (double precision with nan and inf)
59 */
60
61 #define FOR_sort
62 #include "toys.h"
63
64 GLOBALS(
65 char *key_separator;
66 struct arg_list *raw_keys;
67 char *outfile;
68 char *ignore1, ignore2; // GNU compatability NOPs for -S and -T.
69
70 void *key_list;
71 int linecount;
72 char **lines;
73 )
74
75 // The sort types are n, g, and M.
76 // u, c, s, and z apply to top level only, not to keys.
77 // b at top level implies bb.
78 // The remaining options can be applied to search keys.
79
80 #define FLAG_bb (1<<31) // Ignore trailing blanks
81
82 struct sort_key
83 {
84 struct sort_key *next_key; // linked list
85 unsigned range[4]; // start word, start char, end word, end char
86 int flags;
87 };
88
89 // Copy of the part of this string corresponding to a key/flags.
90
get_key_data(char * str,struct sort_key * key,int flags)91 static char *get_key_data(char *str, struct sort_key *key, int flags)
92 {
93 int start=0, end, len, i, j;
94
95 // Special case whole string, so we don't have to make a copy
96
97 if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
98 && !(flags&(FLAG_b|FLAG_d|FLAG_i|FLAG_bb))) return str;
99
100 // Find start of key on first pass, end on second pass
101
102 len = strlen(str);
103 for (j=0; j<2; j++) {
104 if (!key->range[2*j]) end=len;
105
106 // Loop through fields
107 else {
108 end=0;
109 for (i=1; i < key->range[2*j]+j; i++) {
110
111 // Skip leading blanks
112 if (str[end] && !TT.key_separator)
113 while (isspace(str[end])) end++;
114
115 // Skip body of key
116 for (; str[end]; end++) {
117 if (TT.key_separator) {
118 if (str[end]==*TT.key_separator) break;
119 } else if (isspace(str[end])) break;
120 }
121 }
122 }
123 if (!j) start=end;
124 }
125
126 // Key with explicit separator starts after the separator
127 if (TT.key_separator && str[start]==*TT.key_separator) start++;
128
129 // Strip leading and trailing whitespace if necessary
130 if (flags&FLAG_b) while (isspace(str[start])) start++;
131 if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--;
132
133 // Handle offsets on start and end
134 if (key->range[3]) {
135 end += key->range[3]-1;
136 if (end>len) end=len;
137 }
138 if (key->range[1]) {
139 start += key->range[1]-1;
140 if (start>len) start=len;
141 }
142
143 // Make the copy
144 if (end<start) end=start;
145 str = xstrndup(str+start, end-start);
146
147 // Handle -d
148 if (flags&FLAG_d) {
149 for (start = end = 0; str[end]; end++)
150 if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end];
151 str[start] = 0;
152 }
153
154 // Handle -i
155 if (flags&FLAG_i) {
156 for (start = end = 0; str[end]; end++)
157 if (isprint(str[end])) str[start++] = str[end];
158 str[start] = 0;
159 }
160
161 return str;
162 }
163
164 // append a sort_key to key_list.
165
add_key(void)166 static struct sort_key *add_key(void)
167 {
168 void **stupid_compiler = &TT.key_list;
169 struct sort_key **pkey = (struct sort_key **)stupid_compiler;
170
171 while (*pkey) pkey = &((*pkey)->next_key);
172 return *pkey = xzalloc(sizeof(struct sort_key));
173 }
174
175 // Perform actual comparison
compare_values(int flags,char * x,char * y)176 static int compare_values(int flags, char *x, char *y)
177 {
178 int ff = flags & (FLAG_n|FLAG_g|FLAG_M|FLAG_x);
179
180 // Ascii sort
181 if (!ff) return ((flags&FLAG_f) ? strcasecmp : strcmp)(x, y);
182
183 if (CFG_SORT_FLOAT && ff == FLAG_g) {
184 char *xx,*yy;
185 double dx = strtod(x,&xx), dy = strtod(y,&yy);
186 int xinf, yinf;
187
188 // not numbers < NaN < -infinity < numbers < +infinity
189
190 if (x==xx) return y==yy ? 0 : -1;
191 if (y==yy) return 1;
192
193 // Check for isnan
194 if (dx!=dx) return (dy!=dy) ? 0 : -1;
195 if (dy!=dy) return 1;
196
197 // Check for infinity. (Could underflow, but avoids needing libm.)
198 xinf = (1.0/dx == 0.0);
199 yinf = (1.0/dy == 0.0);
200 if (xinf) {
201 if(dx<0) return (yinf && dy<0) ? 0 : -1;
202 return (yinf && dy>0) ? 0 : 1;
203 }
204 if (yinf) return dy<0 ? 1 : -1;
205
206 return dx>dy ? 1 : (dx<dy ? -1 : 0);
207 } else if (CFG_SORT_BIG && ff == FLAG_M) {
208 struct tm thyme;
209 int dx;
210 char *xx,*yy;
211
212 xx = strptime(x,"%b",&thyme);
213 dx = thyme.tm_mon;
214 yy = strptime(y,"%b",&thyme);
215 if (!xx) return !yy ? 0 : -1;
216 else if (!yy) return 1;
217 else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon;
218
219 } else if (CFG_SORT_BIG && ff == FLAG_x) {
220 return strtol(x, NULL, 16)-strtol(y, NULL, 16);
221 // This has to be ff == FLAG_n
222 } else {
223 // Full floating point version of -n
224 if (CFG_SORT_FLOAT) {
225 double dx = atof(x), dy = atof(y);
226
227 return dx>dy ? 1 : (dx<dy ? -1 : 0);
228 // Integer version of -n for tiny systems
229 } else return atoi(x)-atoi(y);
230 }
231 }
232
233 // Callback from qsort(): Iterate through key_list and perform comparisons.
compare_keys(const void * xarg,const void * yarg)234 static int compare_keys(const void *xarg, const void *yarg)
235 {
236 int flags = toys.optflags, retval = 0;
237 char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg;
238 struct sort_key *key;
239
240 if (CFG_SORT_BIG) {
241 for (key=(struct sort_key *)TT.key_list; !retval && key;
242 key = key->next_key)
243 {
244 flags = key->flags ? key->flags : toys.optflags;
245
246 // Chop out and modify key chunks, handling -dfib
247
248 x = get_key_data(xx, key, flags);
249 y = get_key_data(yy, key, flags);
250
251 retval = compare_values(flags, x, y);
252
253 // Free the copies get_key_data() made.
254
255 if (x != xx) free(x);
256 if (y != yy) free(y);
257
258 if (retval) break;
259 }
260 } else retval = compare_values(flags, xx, yy);
261
262 // Perform fallback sort if necessary (always case insensitive, no -f,
263 // the point is to get a stable order even for -f sorts)
264 if (!retval && !(CFG_SORT_BIG && (toys.optflags&FLAG_s))) {
265 flags = toys.optflags;
266 retval = strcmp(xx, yy);
267 }
268
269 return retval * ((flags&FLAG_r) ? -1 : 1);
270 }
271
272 // Callback from loopfiles to handle input files.
sort_read(int fd,char * name)273 static void sort_read(int fd, char *name)
274 {
275 // Read each line from file, appending to a big array.
276
277 for (;;) {
278 char * line = (CFG_SORT_BIG && (toys.optflags&FLAG_z))
279 ? get_rawline(fd, NULL, 0) : get_line(fd);
280
281 if (!line) break;
282
283 // handle -c here so we don't allocate more memory than necessary.
284 if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) {
285 int j = (toys.optflags&FLAG_u) ? -1 : 0;
286
287 if (TT.lines && compare_keys((void *)&TT.lines, &line)>j)
288 error_exit("%s: Check line %d\n", name, TT.linecount);
289 free(TT.lines);
290 TT.lines = (char **)line;
291 } else {
292 if (!(TT.linecount&63))
293 TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64));
294 TT.lines[TT.linecount] = line;
295 }
296 TT.linecount++;
297 }
298 }
299
sort_main(void)300 void sort_main(void)
301 {
302 int idx, fd = 1;
303
304 // Open output file if necessary.
305 if (CFG_SORT_BIG && TT.outfile)
306 fd = xcreate(TT.outfile, O_CREAT|O_TRUNC|O_WRONLY, 0666);
307
308 // Parse -k sort keys.
309 if (CFG_SORT_BIG && TT.raw_keys) {
310 struct arg_list *arg;
311
312 for (arg = TT.raw_keys; arg; arg = arg->next) {
313 struct sort_key *key = add_key();
314 char *temp;
315 int flag;
316
317 idx = 0;
318 temp = arg->arg;
319 while (*temp) {
320 // Start of range
321 key->range[2*idx] = (unsigned)strtol(temp, &temp, 10);
322 if (*temp=='.')
323 key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10);
324
325 // Handle flags appended to a key type.
326 for (;*temp;temp++) {
327 char *temp2, *optlist;
328
329 // Note that a second comma becomes an "Unknown key" error.
330
331 if (*temp==',' && !idx++) {
332 temp++;
333 break;
334 }
335
336 // Which flag is this?
337
338 optlist = toys.which->options;
339 temp2 = strchr(optlist, *temp);
340 flag = (1<<(optlist-temp2+strlen(optlist)-1));
341
342 // Was it a flag that can apply to a key?
343
344 if (!temp2 || flag>FLAG_b
345 || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z)))
346 {
347 error_exit("Unknown key option.");
348 }
349 // b after , means strip _trailing_ space, not leading.
350 if (idx && flag==FLAG_b) flag = FLAG_bb;
351 key->flags |= flag;
352 }
353 }
354 }
355 }
356
357 // global b flag strips both leading and trailing spaces
358 if (toys.optflags&FLAG_b) toys.optflags |= FLAG_bb;
359
360 // If no keys, perform alphabetic sort over the whole line.
361 if (CFG_SORT_BIG && !TT.key_list) add_key()->range[0] = 1;
362
363 // Open input files and read data, populating TT.lines[TT.linecount]
364 loopfiles(toys.optargs, sort_read);
365
366 // The compare (-c) logic was handled in sort_read(),
367 // so if we got here, we're done.
368 if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) goto exit_now;
369
370 // Perform the actual sort
371 qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys);
372
373 // handle unique (-u)
374 if (toys.optflags&FLAG_u) {
375 int jdx;
376
377 for (jdx=0, idx=1; idx<TT.linecount; idx++) {
378 if (!compare_keys(&TT.lines[jdx], &TT.lines[idx]))
379 free(TT.lines[idx]);
380 else TT.lines[++jdx] = TT.lines[idx];
381 }
382 if (TT.linecount) TT.linecount = jdx+1;
383 }
384
385 // Output result
386 for (idx = 0; idx<TT.linecount; idx++) {
387 char *s = TT.lines[idx];
388 unsigned i = strlen(s);
389
390 if (!(toys.optflags&FLAG_z)) s[i] = '\n';
391 xwrite(fd, s, i+1);
392 if (CFG_TOYBOX_FREE) free(s);
393 }
394
395 exit_now:
396 if (CFG_TOYBOX_FREE) {
397 if (fd != 1) close(fd);
398 free(TT.lines);
399 }
400 }
401