1 /* tr.c - translate or delete characters
2  *
3  * Copyright 2014 Sandeep Sharma <sandeep.jack2756@gmail.com>
4  *
5  * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/tr.html
6 
7 USE_TR(NEWTOY(tr, "^>2<1Ccsd[+cC]", TOYFLAG_USR|TOYFLAG_BIN))
8 
9 config TR
10   bool "tr"
11   default n
12   help
13     usage: tr [-cds] SET1 [SET2]
14 
15     Translate, squeeze, or delete characters from stdin, writing to stdout
16 
17     -c/-C  Take complement of SET1
18     -d     Delete input characters coded SET1
19     -s     Squeeze multiple output characters of SET2 into one character
20 */
21 
22 #define FOR_tr
23 #include "toys.h"
24 
25 GLOBALS(
26   short map[256]; //map of chars
27   int len1, len2;
28 )
29 
30 enum {
31   class_alpha, class_alnum, class_digit,
32   class_lower,class_upper,class_space,class_blank,
33   class_punct,class_cntrl,class_xdigit,class_invalid
34 };
35 
36 static void map_translation(char *set1 , char *set2)
37 {
38   int i = TT.len1, k = 0;
39 
40   if (toys.optflags & FLAG_d)
41     for (; i; i--, k++) TT.map[set1[k]] = set1[k]|0x100; //set delete bit
42 
43   if (toys.optflags & FLAG_s) {
44     for (i = TT.len1, k = 0; i; i--, k++)
45       TT.map[set1[k]] = TT.map[set1[k]]|0x200;
46     for (i = TT.len2, k = 0; i; i--, k++)
47       TT.map[set2[k]] = TT.map[set2[k]]|0x200;
48   }
49   i = k = 0;
50   while (!(toys.optflags & FLAG_d) && set2 && TT.len1--) { //ignore set2 if -d present
51     TT.map[set1[i]] = ((TT.map[set1[i]] & 0xFF00) | set2[k]);
52     if (set2[k + 1]) k++;
53     i++;
54   }
55 }
56 
57 static int handle_escape_char(char **esc_val) //taken from printf
58 {
59   char *ptr = *esc_val;
60   int esc_length = 0;
61   unsigned  base = 0, num = 0, result = 0, count = 0;
62 
63   if (*ptr == 'x') {
64     ptr++;
65     esc_length++;
66     base = 16;
67   } else if (isdigit(*ptr)) base = 8;
68 
69   while (esc_length < 3 && base) {
70     num = tolower(*ptr) - '0';
71     if (num > 10) num += ('0' - 'a' + 10);
72     if (num >= base) {
73       if (base == 16) {
74         esc_length--;
75         if (!esc_length) {// Invalid hex value eg. /xvd, print as it is /xvd
76           result = '\\';
77           ptr--;
78         }
79       }
80       break;
81     }
82     esc_length++;
83     count = result = (count * base) + num;
84     ptr++;
85   }
86   if (base) {
87     ptr--;
88     *esc_val = ptr;
89     return (char)result;
90   } else {
91     switch (*ptr) {
92       case 'n':  result = '\n'; break;
93       case 't':  result = '\t'; break;
94       case 'e':  result = (char)27; break;
95       case 'b':  result = '\b'; break;
96       case 'a':  result = '\a'; break;
97       case 'f':  result = '\f'; break;
98       case 'v':  result = '\v'; break;
99       case 'r':  result = '\r'; break;
100       case '\\': result = '\\'; break;
101       default :
102         result = '\\';
103         ptr--; // Let pointer pointing to / we will increment after returning.
104         break;
105     }
106   }
107   *esc_val = ptr;
108   return (char)result;
109 }
110 
111 static int find_class(char *class_name)
112 {
113   int i;
114   static char *class[] = {
115     "[:alpha:]","[:alnum:]","[:digit:]",
116     "[:lower:]","[:upper:]","[:space:]",
117     "[:blank:]","[:punct:]","[:cntrl:]",
118     "[:xdigit:]","NULL"
119   };
120 
121   for (i = 0; i != class_invalid; i++) {
122     if (!memcmp(class_name, class[i], (class_name[0] == 'x')?10:9)) break;
123   }
124   return i;
125 }
126 
127 static char *expand_set(char *arg, int *len)
128 {
129   int i = 0, j, k, size = 256;
130   char *set = xzalloc(size*sizeof(char));
131 
132   while (*arg) {
133 
134     if (i >= size) {
135       size += 256;
136       set = xrealloc(set, size);
137     }
138     if (*arg == '\\') {
139       arg++;
140       set[i++] = (int)handle_escape_char(&arg);
141       arg++;
142       continue;
143     }
144     if (arg[1] == '-') {
145       if (arg[2] == '\0') goto save;
146       j = arg[0];
147       k = arg[2];
148       if (j > k) perror_exit("reverse colating order");
149       while (j <= k) set[i++] = j++;
150       arg += 3;
151       continue;
152     }
153     if (arg[0] == '[' && arg[1] == ':') {
154 
155       if ((j = find_class(arg)) == class_invalid) goto save;
156 
157       if ((j == class_alpha) || (j == class_upper) || (j == class_alnum)) {
158       for (k = 'A'; k <= 'Z'; k++) set[i++] = k;
159       }
160       if ((j == class_alpha) || (j == class_lower) || (j == class_alnum)) {
161         for (k = 'a'; k <= 'z'; k++) set[i++] = k;
162       }
163       if ((j == class_alnum) || (j == class_digit) || (j == class_xdigit)) {
164         for (k = '0'; k <= '9'; k++) set[i++] = k;
165       }
166       if (j == class_space || j == class_blank) {
167         set[i++] = '\t';
168         if (j == class_space) {
169           set[i++] = '\n';
170           set[i++] = '\f';
171           set[i++] = '\r';
172           set[i++] = '\v';
173         }
174         set[i++] = ' ';
175       }
176       if (j == class_punct) {
177         for (k = 0; k <= 255; k++)
178           if (ispunct(k)) set[i++] = k;
179       }
180       if (j == class_cntrl) {
181         for (k = 0; k <= 255; k++)
182           if (iscntrl(k)) set[i++] = k;
183       }
184       if (j == class_xdigit) {
185         for (k = 'A'; k <= 'F'; k++) {
186           set[i + 6] = k | 0x20;
187           set[i++] = k;
188         }
189         i += 6;
190         arg += 10;
191         continue;
192       }
193 
194       arg += 9; //never here for class_xdigit.
195       continue;
196     }
197     if (arg[0] == '[' && arg[1] == '=') { //[=char=] only
198       arg += 2;
199       if (*arg) set[i++] = *arg;
200       if (!arg[1] || arg[1] != '=' || arg[2] != ']')
201         error_exit("bad equiv class");
202       continue;
203     }
204 save:
205     set[i++] = *arg++;
206   }
207   *len = i;
208   return set;
209 }
210 
211 static void print_map(char *set1, char *set2)
212 {
213   int r = 0, i, prev_char = -1;
214 
215   while (1)
216   {
217     i = 0;
218     r = read(STDIN_FILENO, (toybuf), sizeof(toybuf));
219     if (!r) break;
220     for (;r > i;i++) {
221 
222       if ((toys.optflags & FLAG_d) && (TT.map[(int)toybuf[i]] & 0x100)) continue;
223       if (toys.optflags & FLAG_s) {
224         if ((TT.map[(int)toybuf[i]] & 0x200) &&
225             (prev_char == TT.map[(int)toybuf[i]])) {
226           continue;
227         }
228       }
229       xputc(TT.map[(int)toybuf[i]] & 0xFF);
230       prev_char = TT.map[(int)toybuf[i]];
231       fflush(stdout);
232     }
233   }
234 }
235 
236 static void do_complement(char **set)
237 {
238   int i, j;
239   char *comp = xmalloc(256);
240 
241   for (i = 0, j = 0;i < 256; i++) {
242     if (memchr(*set, i, TT.len1)) continue;
243     else comp[j++] = (char)i;
244   }
245   free(*set);
246   TT.len1 = j;
247   *set = comp;
248 }
249 
250 void tr_main(void)
251 {
252   char *set1, *set2 = NULL;
253   int i;
254 
255   for (i = 0; i < 256; i++) TT.map[i] = i; //init map
256 
257   set1 = expand_set(toys.optargs[0], &TT.len1);
258   if (toys.optflags & FLAG_c) do_complement(&set1);
259   if (toys.optargs[1]) {
260     if (toys.optargs[1][0] == '\0') error_exit("set2 can't be empty string");
261     set2 = expand_set(toys.optargs[1], &TT.len2);
262   }
263   map_translation(set1, set2);
264 
265   print_map(set1, set2);
266   free(set1);
267   free(set2);
268 }
269