1 /* cut.c - print selected ranges from a file
2 *
3 * Copyright 2016 Rob Landley <rob@landley.net>
4 *
5 * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cut.html
6 *
7 * Deviations from posix: added -DF. We can only accept 512 selections, and
8 * "-" counts as start to end. Using spaces to separate a comma-separated list
9 * is silly and inconsistent with dd, ps, cp, and mount.
10 *
11 * todo: -n, -s with -c
12
13 USE_CUT(NEWTOY(cut, "b*|c*|f*|F*|C*|O(output-delimiter):d:sDn[!cbf]", TOYFLAG_USR|TOYFLAG_BIN))
14
15 config CUT
16 bool "cut"
17 default y
18 help
19 usage: cut [-Ds] [-bcfF LIST] [-dO DELIM] [FILE...]
20
21 Print selected parts of lines from each FILE to standard output.
22
23 Each selection LIST is comma separated, either numbers (counting from 1)
24 or dash separated ranges (inclusive, with X- meaning to end of line and -X
25 from start). By default selection ranges are sorted and collated, use -D
26 to prevent that.
27
28 -b Select bytes
29 -c Select UTF-8 characters
30 -C Select unicode columns
31 -d Use DELIM (default is TAB for -f, run of whitespace for -F)
32 -D Don't sort/collate selections or match -fF lines without delimiter
33 -f Select fields (words) separated by single DELIM character
34 -F Select fields separated by DELIM regex
35 -O Output delimiter (default one space for -F, input delim for -f)
36 -s Skip lines without delimiters
37 */
38 #define FOR_cut
39 #include "toys.h"
40
41 GLOBALS(
42 char *d, *O;
43 struct arg_list *select[5]; // we treat them the same, so loop through
44
45 int pairs;
46 regex_t reg;
47 )
48
49
50 // Apply selections to an input line, producing output
cut_line(char ** pline,long len)51 static void cut_line(char **pline, long len)
52 {
53 unsigned *pairs = (void *)toybuf;
54 char *line;
55 int i, j;
56
57 if (!pline) return;
58 line = *pline;
59 if (len && line[len-1]=='\n') line[--len] = 0;
60
61 // Loop through selections
62 for (i=0; i<TT.pairs; i++) {
63 unsigned start = pairs[2*i], end = pairs[(2*i)+1], count;
64 char *s = line, *ss;
65
66 // input: start/end position, count=difference between them
67 // output: s = start of string, len = bytes to output
68
69 if (start) start--;
70 if (start>=len) continue;
71 if (!end || end>len) end = len;
72 count = end-start;
73
74 // Find start and end of output string for the relevant selection type
75 if (toys.optflags&FLAG_b) s += start;
76 else if (toys.optflags&FLAG_C) {
77 // crunch_str() currently assumes that combining characters get
78 // escaped, to provide an unambiguous visual representation.
79 // This assumes the input string is null terminated.
80 if (start) crunch_str(&s, start, 0, 0, 0);
81 if (!*s) continue;
82 start = s-line;
83 ss = s;
84 crunch_str(&ss, count, 0, 0, 0);
85 count = ss-s;
86
87 } else if (toys.optflags&FLAG_c) {
88 wchar_t wc;
89 char *sss;
90
91 // Find start
92 ss = line+len;
93 while (start && s<ss) {
94 if (0<=(j = utf8towc(&wc, s, len))) start--;
95 s += (j<1) ? 1 : j;
96 }
97 if (s == ss) continue;
98
99 // Find end
100 end = count;
101 sss = s;
102 while (end && sss<ss) {
103 if (0<=(j = utf8towc(&wc, sss, len))) end--;
104 sss += (j<1) ? 1 : j;
105 }
106 count = sss-s;
107 } else {
108 regmatch_t match;
109
110 // Loop through skipping appropriate number of fields
111 for (j = 0; j<2; j++) {
112 ss = s;
113 if (j) start = count;
114 else end = start;
115 while (*ss && start) {
116 if (toys.optflags&FLAG_f) {
117 if (!strchr(TT.d, *ss++)) continue;
118 if (!--start && j) ss--;
119 } else {
120 if (regexec(&TT.reg, ss, 1, &match, REG_NOTBOL|REG_NOTEOL)) {
121 ss = line+len;
122 continue;
123 }
124 if (!match.rm_eo) break; // zero length match == no delimiter
125 ss += (!--start && j) ? match.rm_so : match.rm_eo;
126 }
127 }
128 if (!j && !*(s = ss)) break;
129 }
130
131 // If we never encountered even one separator, print whole line (posix!)
132 if (!j && end == start) {
133 if (toys.optflags&FLAG_D) break;
134 if (toys.optflags&FLAG_s) return;
135 fwrite(line, len, 1, stdout);
136 break;
137 } else if (!*s) continue;
138 count = ss-s;
139 }
140 if (i && TT.O) fputs(TT.O, stdout);
141 fwrite(s, count, 1, stdout);
142 }
143 xputc('\n');
144 }
145
compar(unsigned * a,unsigned * b)146 static int compar(unsigned *a, unsigned *b)
147 {
148 if (*a<*b) return -1;
149 if (*a>*b) return 1;
150 if (a[1]<b[1]) return -1;
151 if (a[1]>b[1]) return 1;
152
153 return 0;
154 }
155
156 // parse A or A-B or A- or -B
get_range(void * data,char * str,int len)157 static char *get_range(void *data, char *str, int len)
158 {
159 char *end = str;
160 unsigned *pairs = (void *)toybuf, i;
161
162 // Using toybuf[] to store ranges means we can have 512 selections max.
163 if (TT.pairs == sizeof(toybuf)/sizeof(int)) perror_exit("select limit");
164 pairs += 2*TT.pairs++;
165
166 pairs[1] = UINT_MAX;
167 for (i = 0; ;i++) {
168 if (i==2) return end;
169 if (isdigit(*end)) {
170 long long ll = estrtol(end, &end, 10);
171
172 if (ll<1 || ll>UINT_MAX || errno) return end;
173 pairs[i] = ll;
174 }
175 if (*end++ != '-') break;
176 }
177 if (!i) pairs[1] = pairs[0];
178 if ((end-str)<len) return end;
179 if (pairs[0]>pairs[1]) return str;
180
181 // No error
182 return 0;
183 }
184
cut_main(void)185 void cut_main(void)
186 {
187 int i;
188 char buf[8];
189
190 // Parse command line arguments
191 if ((toys.optflags&(FLAG_s|FLAG_f|FLAG_F))==FLAG_s)
192 error_exit("-s needs -Ff");
193 if ((toys.optflags&(FLAG_d|FLAG_f|FLAG_F))==FLAG_d)
194 error_exit("-d needs -Ff");
195 if (!TT.d) TT.d = (toys.optflags&FLAG_F) ? "[[:space:]][[:space:]]*" : "\t";
196 if (toys.optflags&FLAG_F) xregcomp(&TT.reg, TT.d, REG_EXTENDED);
197 if (!TT.O) {
198 if (toys.optflags&FLAG_F) TT.O = " ";
199 else if (toys.optflags&FLAG_f) TT.O = TT.d;
200 }
201
202 // Parse ranges, which are attached to a selection type (only one can be set)
203 for (i = 0; i<ARRAY_LEN(TT.select); i++) {
204 sprintf(buf, "bad -%c", "CFfcb"[i]); // reverse order from newtoy optstr
205 if (TT.select[i]) comma_args(TT.select[i], 0, buf, get_range);
206 }
207 if (!TT.pairs) error_exit("no selections");
208
209 // Sort and collate selections
210 if (!(toys.optflags&FLAG_D)) {
211 int from, to;
212 unsigned *pairs = (void *)toybuf;
213
214 qsort(toybuf, TT.pairs, 8, (void *)compar);
215 for (to = 0, from = 2; from/2 < TT.pairs; from += 2) {
216 if (pairs[from] > pairs[to+1]) {
217 to += 2;
218 memcpy(pairs+to, pairs+from, 2*sizeof(unsigned));
219 } else if (pairs[from+1] > pairs[to+1]) pairs[to+1] = pairs[from+1];
220 }
221 TT.pairs = (to/2)+1;
222 }
223
224 // For each argument, loop through lines of file and call cut_line() on each
225 loopfiles_lines(toys.optargs, cut_line);
226 }
227