1 /* cut.c - Cut from a file.
2  *
3  * Copyright 2012 Ranjan Kumar <ranjankumar.bth@gmail.com>
4  * Copyright 2012 Kyungwan Han <asura321@gmail.com>
5  *
6  * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cut.html
7 
8 USE_CUT(NEWTOY(cut, "b:|c:|f:|d:sn[!cbf]", TOYFLAG_USR|TOYFLAG_BIN))
9 
10 config CUT
11   bool "cut"
12   default y
13   help
14     usage: cut OPTION... [FILE]...
15 
16     Print selected parts of lines from each FILE to standard output.
17 
18     -b LIST	select only these bytes from LIST.
19     -c LIST	select only these characters from LIST.
20     -f LIST	select only these fields.
21     -d DELIM	use DELIM instead of TAB for field delimiter.
22     -s	do not print lines not containing delimiters.
23     -n	don't split multibyte characters (Ignored).
24 */
25 #define FOR_cut
26 #include "toys.h"
27 
28 GLOBALS(
29   char *delim;
30   char *flist;
31   char *clist;
32   char *blist;
33 
34   void *slist_head;
35   unsigned nelem;
36   void (*do_cut)(int fd);
37 )
38 
39 struct slist {
40   struct slist *next;
41   int start, end;
42 };
43 
add_to_list(int start,int end)44 static void add_to_list(int start, int end)
45 {
46   struct slist *current, *head_ref, *temp1_node;
47 
48   head_ref = TT.slist_head;
49   temp1_node = xzalloc(sizeof(struct slist));
50   temp1_node->start = start;
51   temp1_node->end = end;
52 
53   /* Special case for the head end */
54   if (!head_ref || head_ref->start >= start) {
55       temp1_node->next = head_ref;
56       head_ref = temp1_node;
57   } else {
58     /* Locate the node before the point of insertion */
59     current = head_ref;
60     while (current->next && current->next->start < temp1_node->start)
61         current = current->next;
62     temp1_node->next = current->next;
63     current->next = temp1_node;
64   }
65   TT.slist_head = head_ref;
66 }
67 
68 // parse list and add to slist.
parse_list(char * list)69 static void parse_list(char *list)
70 {
71   for (;;) {
72     char *ctoken = strsep(&list, ","), *dtoken;
73     int start = 0, end = INT_MAX;
74 
75     if (!ctoken) break;
76     if (!*ctoken) continue;
77 
78     //Get start position.
79     if (*(dtoken = strsep(&ctoken, "-"))) {
80       start = atolx_range(dtoken, 0, INT_MAX);
81       start = (start?(start-1):start);
82     }
83 
84     //Get end position.
85     if (!ctoken) end = -1; //case e.g. 1,2,3
86     else if (*ctoken) {//case e.g. N-M
87       end = atolx_range(ctoken, 0, INT_MAX);
88       if (!end) end = INT_MAX;
89       end--;
90       if(end == start) end = -1;
91     }
92     add_to_list(start, end);
93     TT.nelem++;
94   }
95   //if list is missing in command line.
96   if (!TT.nelem) error_exit("missing positions list");
97 }
98 
99 /*
100  * retrive data from the file/s.
101  */
get_data(void)102 static void get_data(void)
103 {
104   char **argv = toys.optargs; //file name.
105   toys.exitval = EXIT_SUCCESS;
106 
107   if(!*argv) TT.do_cut(0); //for stdin
108   else {
109     for(; *argv; ++argv) {
110       if(strcmp(*argv, "-") == 0) TT.do_cut(0); //for stdin
111       else {
112         int fd = open(*argv, O_RDONLY, 0);
113         if(fd < 0) {//if file not present then continue with other files.
114           perror_msg("%s", *argv);
115           continue;
116         }
117         TT.do_cut(fd);
118         xclose(fd);
119       }
120     }
121   }
122 }
123 
124 // perform cut operation on the given delimiter.
do_fcut(int fd)125 static void do_fcut(int fd)
126 {
127   char *buff, *pfield = 0, *delimiter = TT.delim;
128 
129   for (;;) {
130     unsigned cpos = 0;
131     int start, ndelimiters = -1;
132     int  nprinted_fields = 0;
133     struct slist *temp_node = TT.slist_head;
134 
135     free(pfield);
136     pfield = 0;
137 
138     if (!(buff = get_line(fd))) break;
139 
140     //does line have any delimiter?.
141     if (strrchr(buff, (int)delimiter[0]) == NULL) {
142       //if not then print whole line and move to next line.
143       if (!(toys.optflags & FLAG_s)) xputs(buff);
144       continue;
145     }
146 
147     pfield = xzalloc(strlen(buff) + 1);
148 
149     if (temp_node) {
150       //process list on each line.
151       while (cpos < TT.nelem && buff) {
152         if (!temp_node) break;
153         start = temp_node->start;
154         do {
155           char *field = 0;
156 
157           //count number of delimeters per line.
158           while (buff) {
159             if (ndelimiters < start) {
160               ndelimiters++;
161               field = strsep(&buff, delimiter);
162             } else break;
163           }
164           //print field (if not yet printed).
165           if (!pfield[ndelimiters]) {
166             if (ndelimiters == start) {
167               //put delimiter.
168               if (nprinted_fields++ > 0) xputc(delimiter[0]);
169               if (field) fputs(field, stdout);
170               //make sure this field won't print again.
171               pfield[ndelimiters] = (char) 0x23; //put some char at this position.
172             }
173           }
174           start++;
175           if ((temp_node->end < 0) || !buff) break;
176         } while(start <= temp_node->end);
177         temp_node = temp_node->next;
178         cpos++;
179       }
180     }
181     xputc('\n');
182   }
183 }
184 
185 // perform cut operation char or byte.
do_bccut(int fd)186 static void do_bccut(int fd)
187 {
188   char *buff;
189 
190   while ((buff = get_line(fd)) != NULL) {
191     unsigned cpos = 0;
192     int buffln = strlen(buff);
193     char *pfield = xzalloc(buffln + 1);
194     struct slist *temp_node = TT.slist_head;
195 
196     if (temp_node != NULL) {
197       while (cpos < TT.nelem) {
198         int start;
199 
200         if (!temp_node) break;
201         start = temp_node->start;
202         while (start < buffln) {
203           //to avoid duplicate field printing.
204           if (pfield[start]) {
205               if (++start <= temp_node->end) continue;
206               temp_node = temp_node->next;
207               break;
208           } else {
209             //make sure this field won't print again.
210             pfield[start] = (char) 0x23; //put some char at this position.
211             xputc(buff[start]);
212           }
213           if (++start > temp_node->end) {
214             temp_node = temp_node->next;
215             break;
216           }
217         }
218         cpos++;
219       }
220       xputc('\n');
221     }
222     free(pfield);
223     pfield = NULL;
224   }
225 }
226 
cut_main(void)227 void cut_main(void)
228 {
229   char delimiter = '\t'; //default delimiter.
230   char *list;
231 
232   TT.nelem = 0;
233   TT.slist_head = NULL;
234 
235   //Get list and assign the function.
236   if (toys.optflags & FLAG_f) {
237     list = TT.flist;
238     TT.do_cut = do_fcut;
239   } else if (toys.optflags & FLAG_c) {
240     list = TT.clist;
241     TT.do_cut = do_bccut;
242   } else {
243     list = TT.blist;
244     TT.do_cut = do_bccut;
245   }
246 
247   if (toys.optflags & FLAG_d) {
248     //delimiter must be 1 char.
249     if(TT.delim[0] && TT.delim[1])
250       perror_exit("the delimiter must be a single character");
251     delimiter = TT.delim[0];
252   }
253 
254   if(!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) {
255     TT.delim = xzalloc(2);
256     TT.delim[0] = delimiter;
257   }
258 
259   //when field is not specified, cutting has some special handling.
260   if (!(toys.optflags & FLAG_f)) {
261     if (toys.optflags & FLAG_s)
262       perror_exit("suppressing non-delimited lines operating on fields");
263     if (delimiter != '\t')
264       perror_exit("an input delimiter may be specified only when operating on fields");
265   }
266 
267   parse_list(list);
268   get_data();
269   if (!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) {
270     free(TT.delim);
271     TT.delim = NULL;
272   }
273   llist_traverse(TT.slist_head, free);
274 }
275