1 /* cut.c - Cut from a file.
2 *
3 * Copyright 2012 Ranjan Kumar <ranjankumar.bth@gmail.com>
4 * Copyright 2012 Kyungwan Han <asura321@gmail.com>
5 *
6 * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cut.html
7 *
8 * TODO: cleanup
9
10 USE_CUT(NEWTOY(cut, "b:|c:|f:|d:sn[!cbf]", TOYFLAG_USR|TOYFLAG_BIN))
11
12 config CUT
13 bool "cut"
14 default y
15 help
16 usage: cut OPTION... [FILE]...
17
18 Print selected parts of lines from each FILE to standard output.
19
20 -b LIST select only these bytes from LIST.
21 -c LIST select only these characters from LIST.
22 -f LIST select only these fields.
23 -d DELIM use DELIM instead of TAB for field delimiter.
24 -s do not print lines not containing delimiters.
25 -n don't split multibyte characters (Ignored).
26 */
27 #define FOR_cut
28 #include "toys.h"
29
30 GLOBALS(
31 char *delim;
32 char *flist;
33 char *clist;
34 char *blist;
35
36 void *slist_head;
37 unsigned nelem;
38 void (*do_cut)(int fd);
39 )
40
41 struct slist {
42 struct slist *next;
43 int start, end;
44 };
45
add_to_list(int start,int end)46 static void add_to_list(int start, int end)
47 {
48 struct slist *current, *head_ref, *temp1_node;
49
50 head_ref = TT.slist_head;
51 temp1_node = xzalloc(sizeof(struct slist));
52 temp1_node->start = start;
53 temp1_node->end = end;
54
55 /* Special case for the head end */
56 if (!head_ref || head_ref->start >= start) {
57 temp1_node->next = head_ref;
58 head_ref = temp1_node;
59 } else {
60 /* Locate the node before the point of insertion */
61 current = head_ref;
62 while (current->next && current->next->start < temp1_node->start)
63 current = current->next;
64 temp1_node->next = current->next;
65 current->next = temp1_node;
66 }
67 TT.slist_head = head_ref;
68 }
69
70 // parse list and add to slist.
parse_list(char * list)71 static void parse_list(char *list)
72 {
73 for (;;) {
74 char *ctoken = strsep(&list, ","), *dtoken;
75 int start = 0, end = INT_MAX;
76
77 if (!ctoken) break;
78 if (!*ctoken) continue;
79
80 // Get start position.
81 if (*(dtoken = strsep(&ctoken, "-"))) {
82 start = atolx_range(dtoken, 0, INT_MAX);
83 start = (start?(start-1):start);
84 }
85
86 // Get end position.
87 if (!ctoken) end = -1; //case e.g. 1,2,3
88 else if (*ctoken) {//case e.g. N-M
89 end = atolx_range(ctoken, 0, INT_MAX);
90 if (!end) end = INT_MAX;
91 end--;
92 if(end == start) end = -1;
93 }
94 add_to_list(start, end);
95 TT.nelem++;
96 }
97 // if list is missing in command line.
98 if (!TT.nelem) error_exit("missing positions list");
99 }
100
101 /*
102 * retrive data from the file/s.
103 */
get_data(void)104 static void get_data(void)
105 {
106 char **argv = toys.optargs; //file name.
107 toys.exitval = EXIT_SUCCESS;
108
109 if(!*argv) TT.do_cut(0); //for stdin
110 else {
111 for(; *argv; ++argv) {
112 if(strcmp(*argv, "-") == 0) TT.do_cut(0); //for stdin
113 else {
114 int fd = open(*argv, O_RDONLY, 0);
115 if (fd < 0) {//if file not present then continue with other files.
116 perror_msg_raw(*argv);
117 continue;
118 }
119 TT.do_cut(fd);
120 xclose(fd);
121 }
122 }
123 }
124 }
125
126 // perform cut operation on the given delimiter.
do_fcut(int fd)127 static void do_fcut(int fd)
128 {
129 char *buff, *pfield = 0, *delimiter = TT.delim;
130
131 for (;;) {
132 unsigned cpos = 0;
133 int start, ndelimiters = -1;
134 int nprinted_fields = 0;
135 struct slist *temp_node = TT.slist_head;
136
137 free(pfield);
138 pfield = 0;
139
140 if (!(buff = get_line(fd))) break;
141
142 //does line have any delimiter?.
143 if (strrchr(buff, (int)delimiter[0]) == NULL) {
144 //if not then print whole line and move to next line.
145 if (!(toys.optflags & FLAG_s)) xputs(buff);
146 continue;
147 }
148
149 pfield = xzalloc(strlen(buff) + 1);
150
151 if (temp_node) {
152 //process list on each line.
153 while (cpos < TT.nelem && buff) {
154 if (!temp_node) break;
155 start = temp_node->start;
156 do {
157 char *field = 0;
158
159 //count number of delimeters per line.
160 while (buff) {
161 if (ndelimiters < start) {
162 ndelimiters++;
163 field = strsep(&buff, delimiter);
164 } else break;
165 }
166 //print field (if not yet printed).
167 if (!pfield[ndelimiters]) {
168 if (ndelimiters == start) {
169 //put delimiter.
170 if (nprinted_fields++ > 0) xputc(delimiter[0]);
171 if (field) fputs(field, stdout);
172 //make sure this field won't print again.
173 pfield[ndelimiters] = (char) 0x23; //put some char at this position.
174 }
175 }
176 start++;
177 if ((temp_node->end < 0) || !buff) break;
178 } while(start <= temp_node->end);
179 temp_node = temp_node->next;
180 cpos++;
181 }
182 }
183 xputc('\n');
184 }
185 }
186
187 // perform cut operation char or byte.
do_bccut(int fd)188 static void do_bccut(int fd)
189 {
190 char *buff;
191
192 while ((buff = get_line(fd)) != NULL) {
193 unsigned cpos = 0;
194 int buffln = strlen(buff);
195 char *pfield = xzalloc(buffln + 1);
196 struct slist *temp_node = TT.slist_head;
197
198 if (temp_node != NULL) {
199 while (cpos < TT.nelem) {
200 int start;
201
202 if (!temp_node) break;
203 start = temp_node->start;
204 while (start < buffln) {
205 //to avoid duplicate field printing.
206 if (pfield[start]) {
207 if (++start <= temp_node->end) continue;
208 temp_node = temp_node->next;
209 break;
210 } else {
211 //make sure this field won't print again.
212 pfield[start] = (char) 0x23; //put some char at this position.
213 xputc(buff[start]);
214 }
215 if (++start > temp_node->end) {
216 temp_node = temp_node->next;
217 break;
218 }
219 }
220 cpos++;
221 }
222 xputc('\n');
223 }
224 free(pfield);
225 pfield = NULL;
226 }
227 }
228
cut_main(void)229 void cut_main(void)
230 {
231 char delimiter = '\t'; //default delimiter.
232 char *list;
233
234 TT.nelem = 0;
235 TT.slist_head = NULL;
236
237 //Get list and assign the function.
238 if (toys.optflags & FLAG_f) {
239 list = TT.flist;
240 TT.do_cut = do_fcut;
241 } else if (toys.optflags & FLAG_c) {
242 list = TT.clist;
243 TT.do_cut = do_bccut;
244 } else {
245 list = TT.blist;
246 TT.do_cut = do_bccut;
247 }
248
249 if (toys.optflags & FLAG_d) {
250 //delimiter must be 1 char.
251 if(TT.delim[0] && TT.delim[1])
252 perror_exit("the delimiter must be a single character");
253 delimiter = TT.delim[0];
254 }
255
256 if(!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) {
257 TT.delim = xzalloc(2);
258 TT.delim[0] = delimiter;
259 }
260
261 //when field is not specified, cutting has some special handling.
262 if (!(toys.optflags & FLAG_f)) {
263 if (toys.optflags & FLAG_s)
264 perror_exit("suppressing non-delimited lines operating on fields");
265 if (delimiter != '\t')
266 perror_exit("an input delimiter may be specified only when operating on fields");
267 }
268
269 parse_list(list);
270 get_data();
271 if (!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) {
272 free(TT.delim);
273 TT.delim = NULL;
274 }
275 llist_traverse(TT.slist_head, free);
276 }
277