1 /* cut.c - Cut from a file.
2  *
3  * Copyright 2012 Ranjan Kumar <ranjankumar.bth@gmail.com>
4  * Copyright 2012 Kyungwan Han <asura321@gmail.com>
5  *
6  * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cut.html
7  *
8  * TODO: cleanup
9 
10 USE_CUT(NEWTOY(cut, "b:|c:|f:|d:sn[!cbf]", TOYFLAG_USR|TOYFLAG_BIN))
11 
12 config CUT
13   bool "cut"
14   default y
15   help
16     usage: cut OPTION... [FILE]...
17 
18     Print selected parts of lines from each FILE to standard output.
19 
20     -b LIST	select only these bytes from LIST.
21     -c LIST	select only these characters from LIST.
22     -f LIST	select only these fields.
23     -d DELIM	use DELIM instead of TAB for field delimiter.
24     -s	do not print lines not containing delimiters.
25     -n	don't split multibyte characters (Ignored).
26 */
27 #define FOR_cut
28 #include "toys.h"
29 
30 GLOBALS(
31   char *delim;
32   char *flist;
33   char *clist;
34   char *blist;
35 
36   void *slist_head;
37   unsigned nelem;
38   void (*do_cut)(int fd);
39 )
40 
41 struct slist {
42   struct slist *next;
43   int start, end;
44 };
45 
add_to_list(int start,int end)46 static void add_to_list(int start, int end)
47 {
48   struct slist *current, *head_ref, *temp1_node;
49 
50   head_ref = TT.slist_head;
51   temp1_node = xzalloc(sizeof(struct slist));
52   temp1_node->start = start;
53   temp1_node->end = end;
54 
55   /* Special case for the head end */
56   if (!head_ref || head_ref->start >= start) {
57       temp1_node->next = head_ref;
58       head_ref = temp1_node;
59   } else {
60     /* Locate the node before the point of insertion */
61     current = head_ref;
62     while (current->next && current->next->start < temp1_node->start)
63         current = current->next;
64     temp1_node->next = current->next;
65     current->next = temp1_node;
66   }
67   TT.slist_head = head_ref;
68 }
69 
70 // parse list and add to slist.
parse_list(char * list)71 static void parse_list(char *list)
72 {
73   for (;;) {
74     char *ctoken = strsep(&list, ","), *dtoken;
75     int start = 0, end = INT_MAX;
76 
77     if (!ctoken) break;
78     if (!*ctoken) continue;
79 
80     // Get start position.
81     if (*(dtoken = strsep(&ctoken, "-"))) {
82       start = atolx_range(dtoken, 0, INT_MAX);
83       start = (start?(start-1):start);
84     }
85 
86     // Get end position.
87     if (!ctoken) end = -1; //case e.g. 1,2,3
88     else if (*ctoken) {//case e.g. N-M
89       end = atolx_range(ctoken, 0, INT_MAX);
90       if (!end) end = INT_MAX;
91       end--;
92       if(end == start) end = -1;
93     }
94     add_to_list(start, end);
95     TT.nelem++;
96   }
97   // if list is missing in command line.
98   if (!TT.nelem) error_exit("missing positions list");
99 }
100 
101 /*
102  * retrive data from the file/s.
103  */
get_data(void)104 static void get_data(void)
105 {
106   char **argv = toys.optargs; //file name.
107   toys.exitval = EXIT_SUCCESS;
108 
109   if(!*argv) TT.do_cut(0); //for stdin
110   else {
111     for(; *argv; ++argv) {
112       if(strcmp(*argv, "-") == 0) TT.do_cut(0); //for stdin
113       else {
114         int fd = open(*argv, O_RDONLY, 0);
115         if (fd < 0) {//if file not present then continue with other files.
116           perror_msg_raw(*argv);
117           continue;
118         }
119         TT.do_cut(fd);
120         xclose(fd);
121       }
122     }
123   }
124 }
125 
126 // perform cut operation on the given delimiter.
do_fcut(int fd)127 static void do_fcut(int fd)
128 {
129   char *buff, *pfield = 0, *delimiter = TT.delim;
130 
131   for (;;) {
132     unsigned cpos = 0;
133     int start, ndelimiters = -1;
134     int  nprinted_fields = 0;
135     struct slist *temp_node = TT.slist_head;
136 
137     free(pfield);
138     pfield = 0;
139 
140     if (!(buff = get_line(fd))) break;
141 
142     //does line have any delimiter?.
143     if (strrchr(buff, (int)delimiter[0]) == NULL) {
144       //if not then print whole line and move to next line.
145       if (!(toys.optflags & FLAG_s)) xputs(buff);
146       continue;
147     }
148 
149     pfield = xzalloc(strlen(buff) + 1);
150 
151     if (temp_node) {
152       //process list on each line.
153       while (cpos < TT.nelem && buff) {
154         if (!temp_node) break;
155         start = temp_node->start;
156         do {
157           char *field = 0;
158 
159           //count number of delimeters per line.
160           while (buff) {
161             if (ndelimiters < start) {
162               ndelimiters++;
163               field = strsep(&buff, delimiter);
164             } else break;
165           }
166           //print field (if not yet printed).
167           if (!pfield[ndelimiters]) {
168             if (ndelimiters == start) {
169               //put delimiter.
170               if (nprinted_fields++ > 0) xputc(delimiter[0]);
171               if (field) fputs(field, stdout);
172               //make sure this field won't print again.
173               pfield[ndelimiters] = (char) 0x23; //put some char at this position.
174             }
175           }
176           start++;
177           if ((temp_node->end < 0) || !buff) break;
178         } while(start <= temp_node->end);
179         temp_node = temp_node->next;
180         cpos++;
181       }
182     }
183     xputc('\n');
184   }
185 }
186 
187 // perform cut operation char or byte.
do_bccut(int fd)188 static void do_bccut(int fd)
189 {
190   char *buff;
191 
192   while ((buff = get_line(fd)) != NULL) {
193     unsigned cpos = 0;
194     int buffln = strlen(buff);
195     char *pfield = xzalloc(buffln + 1);
196     struct slist *temp_node = TT.slist_head;
197 
198     if (temp_node != NULL) {
199       while (cpos < TT.nelem) {
200         int start;
201 
202         if (!temp_node) break;
203         start = temp_node->start;
204         while (start < buffln) {
205           //to avoid duplicate field printing.
206           if (pfield[start]) {
207               if (++start <= temp_node->end) continue;
208               temp_node = temp_node->next;
209               break;
210           } else {
211             //make sure this field won't print again.
212             pfield[start] = (char) 0x23; //put some char at this position.
213             xputc(buff[start]);
214           }
215           if (++start > temp_node->end) {
216             temp_node = temp_node->next;
217             break;
218           }
219         }
220         cpos++;
221       }
222       xputc('\n');
223     }
224     free(pfield);
225     pfield = NULL;
226   }
227 }
228 
cut_main(void)229 void cut_main(void)
230 {
231   char delimiter = '\t'; //default delimiter.
232   char *list;
233 
234   TT.nelem = 0;
235   TT.slist_head = NULL;
236 
237   //Get list and assign the function.
238   if (toys.optflags & FLAG_f) {
239     list = TT.flist;
240     TT.do_cut = do_fcut;
241   } else if (toys.optflags & FLAG_c) {
242     list = TT.clist;
243     TT.do_cut = do_bccut;
244   } else {
245     list = TT.blist;
246     TT.do_cut = do_bccut;
247   }
248 
249   if (toys.optflags & FLAG_d) {
250     //delimiter must be 1 char.
251     if(TT.delim[0] && TT.delim[1])
252       perror_exit("the delimiter must be a single character");
253     delimiter = TT.delim[0];
254   }
255 
256   if(!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) {
257     TT.delim = xzalloc(2);
258     TT.delim[0] = delimiter;
259   }
260 
261   //when field is not specified, cutting has some special handling.
262   if (!(toys.optflags & FLAG_f)) {
263     if (toys.optflags & FLAG_s)
264       perror_exit("suppressing non-delimited lines operating on fields");
265     if (delimiter != '\t')
266       perror_exit("an input delimiter may be specified only when operating on fields");
267   }
268 
269   parse_list(list);
270   get_data();
271   if (!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) {
272     free(TT.delim);
273     TT.delim = NULL;
274   }
275   llist_traverse(TT.slist_head, free);
276 }
277