1 /* cut.c - Cut from a file.
2 *
3 * Copyright 2012 Ranjan Kumar <ranjankumar.bth@gmail.com>
4 * Copyright 2012 Kyungwan Han <asura321@gmail.com>
5 *
6 * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cut.html
7
8 USE_CUT(NEWTOY(cut, "b:|c:|f:|d:sn[!cbf]", TOYFLAG_USR|TOYFLAG_BIN))
9
10 config CUT
11 bool "cut"
12 default y
13 help
14 usage: cut OPTION... [FILE]...
15
16 Print selected parts of lines from each FILE to standard output.
17
18 -b LIST select only these bytes from LIST.
19 -c LIST select only these characters from LIST.
20 -f LIST select only these fields.
21 -d DELIM use DELIM instead of TAB for field delimiter.
22 -s do not print lines not containing delimiters.
23 -n don't split multibyte characters (Ignored).
24 */
25 #define FOR_cut
26 #include "toys.h"
27
28 GLOBALS(
29 char *delim;
30 char *flist;
31 char *clist;
32 char *blist;
33
34 void *slist_head;
35 unsigned nelem;
36 void (*do_cut)(int fd);
37 )
38
39 struct slist {
40 struct slist *next;
41 int start, end;
42 };
43
add_to_list(int start,int end)44 static void add_to_list(int start, int end)
45 {
46 struct slist *current, *head_ref, *temp1_node;
47
48 head_ref = TT.slist_head;
49 temp1_node = xzalloc(sizeof(struct slist));
50 temp1_node->start = start;
51 temp1_node->end = end;
52
53 /* Special case for the head end */
54 if (!head_ref || head_ref->start >= start) {
55 temp1_node->next = head_ref;
56 head_ref = temp1_node;
57 } else {
58 /* Locate the node before the point of insertion */
59 current = head_ref;
60 while (current->next && current->next->start < temp1_node->start)
61 current = current->next;
62 temp1_node->next = current->next;
63 current->next = temp1_node;
64 }
65 TT.slist_head = head_ref;
66 }
67
68 // parse list and add to slist.
parse_list(char * list)69 static void parse_list(char *list)
70 {
71 for (;;) {
72 char *ctoken = strsep(&list, ","), *dtoken;
73 int start = 0, end = INT_MAX;
74
75 if (!ctoken) break;
76 if (!*ctoken) continue;
77
78 //Get start position.
79 if (*(dtoken = strsep(&ctoken, "-"))) {
80 start = atolx_range(dtoken, 0, INT_MAX);
81 start = (start?(start-1):start);
82 }
83
84 //Get end position.
85 if (!ctoken) end = -1; //case e.g. 1,2,3
86 else if (*ctoken) {//case e.g. N-M
87 end = atolx_range(ctoken, 0, INT_MAX);
88 if (!end) end = INT_MAX;
89 end--;
90 if(end == start) end = -1;
91 }
92 add_to_list(start, end);
93 TT.nelem++;
94 }
95 //if list is missing in command line.
96 if (!TT.nelem) error_exit("missing positions list");
97 }
98
99 /*
100 * retrive data from the file/s.
101 */
get_data(void)102 static void get_data(void)
103 {
104 char **argv = toys.optargs; //file name.
105 toys.exitval = EXIT_SUCCESS;
106
107 if(!*argv) TT.do_cut(0); //for stdin
108 else {
109 for(; *argv; ++argv) {
110 if(strcmp(*argv, "-") == 0) TT.do_cut(0); //for stdin
111 else {
112 int fd = open(*argv, O_RDONLY, 0);
113 if(fd < 0) {//if file not present then continue with other files.
114 perror_msg("%s", *argv);
115 continue;
116 }
117 TT.do_cut(fd);
118 xclose(fd);
119 }
120 }
121 }
122 }
123
124 // perform cut operation on the given delimiter.
do_fcut(int fd)125 static void do_fcut(int fd)
126 {
127 char *buff, *pfield = 0, *delimiter = TT.delim;
128
129 for (;;) {
130 unsigned cpos = 0;
131 int start, ndelimiters = -1;
132 int nprinted_fields = 0;
133 struct slist *temp_node = TT.slist_head;
134
135 free(pfield);
136 pfield = 0;
137
138 if (!(buff = get_line(fd))) break;
139
140 //does line have any delimiter?.
141 if (strrchr(buff, (int)delimiter[0]) == NULL) {
142 //if not then print whole line and move to next line.
143 if (!(toys.optflags & FLAG_s)) xputs(buff);
144 continue;
145 }
146
147 pfield = xzalloc(strlen(buff) + 1);
148
149 if (temp_node) {
150 //process list on each line.
151 while (cpos < TT.nelem && buff) {
152 if (!temp_node) break;
153 start = temp_node->start;
154 do {
155 char *field = 0;
156
157 //count number of delimeters per line.
158 while (buff) {
159 if (ndelimiters < start) {
160 ndelimiters++;
161 field = strsep(&buff, delimiter);
162 } else break;
163 }
164 //print field (if not yet printed).
165 if (!pfield[ndelimiters]) {
166 if (ndelimiters == start) {
167 //put delimiter.
168 if (nprinted_fields++ > 0) xputc(delimiter[0]);
169 if (field) fputs(field, stdout);
170 //make sure this field won't print again.
171 pfield[ndelimiters] = (char) 0x23; //put some char at this position.
172 }
173 }
174 start++;
175 if ((temp_node->end < 0) || !buff) break;
176 } while(start <= temp_node->end);
177 temp_node = temp_node->next;
178 cpos++;
179 }
180 }
181 xputc('\n');
182 }
183 }
184
185 // perform cut operation char or byte.
do_bccut(int fd)186 static void do_bccut(int fd)
187 {
188 char *buff;
189
190 while ((buff = get_line(fd)) != NULL) {
191 unsigned cpos = 0;
192 int buffln = strlen(buff);
193 char *pfield = xzalloc(buffln + 1);
194 struct slist *temp_node = TT.slist_head;
195
196 if (temp_node != NULL) {
197 while (cpos < TT.nelem) {
198 int start;
199
200 if (!temp_node) break;
201 start = temp_node->start;
202 while (start < buffln) {
203 //to avoid duplicate field printing.
204 if (pfield[start]) {
205 if (++start <= temp_node->end) continue;
206 temp_node = temp_node->next;
207 break;
208 } else {
209 //make sure this field won't print again.
210 pfield[start] = (char) 0x23; //put some char at this position.
211 xputc(buff[start]);
212 }
213 if (++start > temp_node->end) {
214 temp_node = temp_node->next;
215 break;
216 }
217 }
218 cpos++;
219 }
220 xputc('\n');
221 }
222 free(pfield);
223 pfield = NULL;
224 }
225 }
226
cut_main(void)227 void cut_main(void)
228 {
229 char delimiter = '\t'; //default delimiter.
230 char *list;
231
232 TT.nelem = 0;
233 TT.slist_head = NULL;
234
235 //Get list and assign the function.
236 if (toys.optflags & FLAG_f) {
237 list = TT.flist;
238 TT.do_cut = do_fcut;
239 } else if (toys.optflags & FLAG_c) {
240 list = TT.clist;
241 TT.do_cut = do_bccut;
242 } else {
243 list = TT.blist;
244 TT.do_cut = do_bccut;
245 }
246
247 if (toys.optflags & FLAG_d) {
248 //delimiter must be 1 char.
249 if(TT.delim[0] && TT.delim[1])
250 perror_exit("the delimiter must be a single character");
251 delimiter = TT.delim[0];
252 }
253
254 if(!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) {
255 TT.delim = xzalloc(2);
256 TT.delim[0] = delimiter;
257 }
258
259 //when field is not specified, cutting has some special handling.
260 if (!(toys.optflags & FLAG_f)) {
261 if (toys.optflags & FLAG_s)
262 perror_exit("suppressing non-delimited lines operating on fields");
263 if (delimiter != '\t')
264 perror_exit("an input delimiter may be specified only when operating on fields");
265 }
266
267 parse_list(list);
268 get_data();
269 if (!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) {
270 free(TT.delim);
271 TT.delim = NULL;
272 }
273 llist_traverse(TT.slist_head, free);
274 }
275