1 /*
2 * Copyright 2011 - 2014
3 * Andr\xe9 Malo or his licensors, as applicable
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 #include "cext.h"
19 EXT_INIT_FUNC;
20
21 #define RJSMIN_DULL_BIT (1 << 0)
22 #define RJSMIN_PRE_REGEX_BIT (1 << 1)
23 #define RJSMIN_REGEX_DULL_BIT (1 << 2)
24 #define RJSMIN_REGEX_CC_DULL_BIT (1 << 3)
25 #define RJSMIN_ID_LIT_BIT (1 << 4)
26 #define RJSMIN_ID_LIT_O_BIT (1 << 5)
27 #define RJSMIN_ID_LIT_C_BIT (1 << 6)
28 #define RJSMIN_STRING_DULL_BIT (1 << 7)
29 #define RJSMIN_SPACE_BIT (1 << 8)
30
31 #ifdef EXT3
32 typedef Py_UNICODE rchar;
33 #else
34 typedef unsigned char rchar;
35 #endif
36 #define U(c) ((rchar)(c))
37
38 #define RJSMIN_IS_DULL(c) ((U(c) > 127) || \
39 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_DULL_BIT))
40
41 #define RJSMIN_IS_REGEX_DULL(c) ((U(c) > 127) || \
42 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_REGEX_DULL_BIT))
43
44 #define RJSMIN_IS_REGEX_CC_DULL(c) ((U(c) > 127) || \
45 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_REGEX_CC_DULL_BIT))
46
47 #define RJSMIN_IS_STRING_DULL(c) ((U(c) > 127) || \
48 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_STRING_DULL_BIT))
49
50 #define RJSMIN_IS_ID_LITERAL(c) ((U(c) > 127) || \
51 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_BIT))
52
53 #define RJSMIN_IS_ID_LITERAL_OPEN(c) ((U(c) > 127) || \
54 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_O_BIT))
55
56 #define RJSMIN_IS_ID_LITERAL_CLOSE(c) ((U(c) > 127) || \
57 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_C_BIT))
58
59 #define RJSMIN_IS_SPACE(c) ((U(c) <= 127) && \
60 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_SPACE_BIT))
61
62 #define RJSMIN_IS_PRE_REGEX_1(c) ((U(c) <= 127) && \
63 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_PRE_REGEX_BIT))
64
65
66 static const unsigned short rjsmin_charmask[128] = {
67 396, 396, 396, 396, 396, 396, 396, 396,
68 396, 396, 2, 396, 396, 2, 396, 396,
69 396, 396, 396, 396, 396, 396, 396, 396,
70 396, 396, 396, 396, 396, 396, 396, 396,
71 396, 175, 76, 141, 253, 141, 143, 76,
72 175, 205, 141, 237, 143, 237, 141, 136,
73 253, 253, 253, 253, 253, 253, 253, 253,
74 253, 253, 143, 143, 141, 143, 141, 143,
75 141, 253, 253, 253, 253, 253, 253, 253,
76 253, 253, 253, 253, 253, 253, 253, 253,
77 253, 253, 253, 253, 253, 253, 253, 253,
78 253, 253, 253, 171, 1, 197, 141, 253,
79 141, 253, 253, 253, 253, 253, 253, 253,
80 253, 253, 253, 253, 253, 253, 253, 253,
81 253, 253, 253, 253, 253, 253, 253, 253,
82 253, 253, 253, 175, 143, 207, 141, 253
83 };
84
85 static Py_ssize_t
rjsmin(const rchar * source,rchar * target,Py_ssize_t length,int keep_bang_comments)86 rjsmin(const rchar *source, rchar *target, Py_ssize_t length,
87 int keep_bang_comments)
88 {
89 const rchar *reset, *sentinel = source + length;
90 rchar *tstart = target;
91 rchar c, quote;
92
93 while (source < sentinel) {
94 c = *source++;
95 if (RJSMIN_IS_DULL(c)) {
96 *target++ = c;
97 continue;
98 }
99 switch (c) {
100
101 /* String */
102 case U('\''): case U('"'):
103 reset = source;
104 *target++ = quote = c;
105 while (source < sentinel) {
106 c = *source++;
107 *target++ = c;
108 if (RJSMIN_IS_STRING_DULL(c))
109 continue;
110 switch (c) {
111 case U('\''): case U('"'):
112 if (c == quote)
113 goto cont;
114 continue;
115 case U('\\'):
116 if (source < sentinel) {
117 c = *source++;
118 *target++ = c;
119 if (c == U('\r') && source < sentinel
120 && *source == U('\n'))
121 *target++ = *source++;
122 }
123 continue;
124 }
125 break;
126 }
127 target -= source - reset;
128 source = reset;
129 continue;
130
131 /* Comment or Regex or something else entirely */
132 case U('/'):
133 if (!(source < sentinel)) {
134 *target++ = c;
135 }
136 else {
137 switch (*source) {
138 /* Comment */
139 case U('*'): case U('/'):
140 goto skip_or_copy_ws;
141
142 default:
143 if ( target == tstart
144 || RJSMIN_IS_PRE_REGEX_1(*(target - 1))
145 || (
146 (target - tstart >= 6)
147 && *(target - 1) == U('n')
148 && *(target - 2) == U('r')
149 && *(target - 3) == U('u')
150 && *(target - 4) == U('t')
151 && *(target - 5) == U('e')
152 && *(target - 6) == U('r')
153 && (
154 target - tstart == 6
155 || !RJSMIN_IS_ID_LITERAL(*(target - 7))
156 )
157 )) {
158
159 /* Regex */
160 reset = source;
161 *target++ = U('/');
162 while (source < sentinel) {
163 c = *source++;
164 *target++ = c;
165 if (RJSMIN_IS_REGEX_DULL(c))
166 continue;
167 switch (c) {
168 case U('/'):
169 goto cont;
170 case U('\\'):
171 if (source < sentinel) {
172 c = *source++;
173 *target++ = c;
174 if (c == U('\r') || c == U('\n'))
175 break;
176 }
177 continue;
178 case U('['):
179 while (source < sentinel) {
180 c = *source++;
181 *target++ = c;
182 if (RJSMIN_IS_REGEX_CC_DULL(c))
183 continue;
184 switch (c) {
185 case U('\\'):
186 if (source < sentinel) {
187 c = *source++;
188 *target++ = c;
189 if (c == U('\r') || c == U('\n'))
190 break;
191 }
192 continue;
193 case U(']'):
194 goto cont_regex;
195 }
196 }
197 break;
198 }
199 break;
200 cont_regex:
201 continue;
202 }
203 target -= source - reset;
204 source = reset;
205 }
206 else {
207 /* Just a slash */
208 *target++ = c;
209 }
210 continue;
211 }
212 }
213 continue;
214
215 /* Whitespace */
216 default:
217 skip_or_copy_ws:
218 quote = U(' ');
219 --source;
220 while (source < sentinel) {
221 c = *source++;
222 if (RJSMIN_IS_SPACE(c))
223 continue;
224 switch (c) {
225 case U('\r'): case U('\n'):
226 quote = U('\n');
227 continue;
228 case U('/'):
229 if (source < sentinel) {
230 switch (*source) {
231 case U('*'):
232 reset = source++;
233 /* copy bang comment, if requested */
234 if ( keep_bang_comments && source < sentinel
235 && *source == U('!')) {
236 *target++ = U('/');
237 *target++ = U('*');
238 *target++ = *source++;
239 while (source < sentinel) {
240 c = *source++;
241 *target++ = c;
242 if (c == U('*') && source < sentinel
243 && *source == U('/')) {
244 *target++ = *source++;
245 reset = NULL;
246 break;
247 }
248 }
249 if (!reset)
250 continue;
251 target -= source - reset;
252 source = reset;
253 }
254 /* strip regular comment */
255 else {
256 while (source < sentinel) {
257 c = *source++;
258 if (c == U('*') && source < sentinel
259 && *source == U('/')) {
260 ++source;
261 reset = NULL;
262 break;
263 }
264 }
265 if (!reset)
266 continue;
267 source = reset;
268 *target++ = U('/');
269 }
270 goto cont;
271 case U('/'):
272 ++source;
273 while (source < sentinel) {
274 c = *source++;
275 switch (c) {
276 case U('\n'):
277 break;
278 case U('\r'):
279 if (source < sentinel
280 && *source == U('\n'))
281 ++source;
282 break;
283 default:
284 continue;
285 }
286 break;
287 }
288 quote = U('\n');
289 continue;
290 }
291 }
292 }
293 --source;
294 break;
295 }
296
297 if ((tstart < target && source < sentinel)
298 && ((quote == U('\n')
299 && RJSMIN_IS_ID_LITERAL_CLOSE(*(target - 1))
300 && RJSMIN_IS_ID_LITERAL_OPEN(*source))
301 ||
302 (quote == U(' ')
303 && ((RJSMIN_IS_ID_LITERAL(*(target - 1))
304 && RJSMIN_IS_ID_LITERAL(*source))
305 || (source < sentinel
306 && ((*(target - 1) == U('+')
307 && *source == U('+'))
308 || (*(target - 1) == U('-')
309 && *source == U('-'))))))))
310 *target++ = quote;
311 }
312 cont:
313 continue;
314 }
315 return (Py_ssize_t)(target - tstart);
316 }
317
318
319 PyDoc_STRVAR(rjsmin_jsmin__doc__,
320 "jsmin(script, keep_bang_comments=False)\n\
321 \n\
322 Minify javascript based on `jsmin.c by Douglas Crockford`_\\.\n\
323 \n\
324 Instead of parsing the stream char by char, it uses a regular\n\
325 expression approach which minifies the whole script with one big\n\
326 substitution regex.\n\
327 \n\
328 .. _jsmin.c by Douglas Crockford:\n\
329 http://www.crockford.com/javascript/jsmin.c\n\
330 \n\
331 :Note: This is a hand crafted C implementation built on the regex\n\
332 semantics.\n\
333 \n\
334 :Parameters:\n\
335 `script` : ``str``\n\
336 Script to minify\n\
337 \n\
338 `keep_bang_comments` : ``bool``\n\
339 Keep comments starting with an exclamation mark? (``/*!...*/``)\n\
340 \n\
341 :Return: Minified script\n\
342 :Rtype: ``str``");
343
344 static PyObject *
rjsmin_jsmin(PyObject * self,PyObject * args,PyObject * kwds)345 rjsmin_jsmin(PyObject *self, PyObject *args, PyObject *kwds)
346 {
347 PyObject *script, *keep_bang_comments_ = NULL, *result;
348 static char *kwlist[] = {"script", "keep_bang_comments", NULL};
349 Py_ssize_t slength, length;
350 int keep_bang_comments;
351 #ifdef EXT2
352 int uni;
353 #define UOBJ "O"
354 #endif
355 #ifdef EXT3
356 #define UOBJ "U"
357 #endif
358
359 if (!PyArg_ParseTupleAndKeywords(args, kwds, UOBJ "|O", kwlist,
360 &script, &keep_bang_comments_))
361 return NULL;
362
363 if (!keep_bang_comments_)
364 keep_bang_comments = 0;
365 else {
366 keep_bang_comments = PyObject_IsTrue(keep_bang_comments_);
367 if (keep_bang_comments == -1)
368 return NULL;
369 }
370
371 #ifdef EXT2
372 if (PyUnicode_Check(script)) {
373 if (!(script = PyUnicode_AsUTF8String(script)))
374 return NULL;
375 uni = 1;
376 }
377 else {
378 if (!(script = PyObject_Str(script)))
379 return NULL;
380 uni = 0;
381 }
382 #endif
383
384 #ifdef EXT3
385 Py_INCREF(script);
386 #define PyString_GET_SIZE PyUnicode_GET_SIZE
387 #define PyString_AS_STRING PyUnicode_AS_UNICODE
388 #define _PyString_Resize PyUnicode_Resize
389 #define PyString_FromStringAndSize PyUnicode_FromUnicode
390 #endif
391
392 slength = PyString_GET_SIZE(script);
393 if (!(result = PyString_FromStringAndSize(NULL, slength))) {
394 Py_DECREF(script);
395 return NULL;
396 }
397 Py_BEGIN_ALLOW_THREADS
398 length = rjsmin((rchar *)PyString_AS_STRING(script),
399 (rchar *)PyString_AS_STRING(result),
400 slength, keep_bang_comments);
401 Py_END_ALLOW_THREADS
402
403 Py_DECREF(script);
404 if (length < 0) {
405 Py_DECREF(result);
406 return NULL;
407 }
408 if (length != slength && _PyString_Resize(&result, length) == -1)
409 return NULL;
410
411 #ifdef EXT2
412 if (uni) {
413 script = PyUnicode_DecodeUTF8(PyString_AS_STRING(result),
414 PyString_GET_SIZE(result), "strict");
415 Py_DECREF(result);
416 if (!script)
417 return NULL;
418 result = script;
419 }
420 #endif
421 return result;
422 }
423
424 /* ------------------------ BEGIN MODULE DEFINITION ------------------------ */
425
426 EXT_METHODS = {
427 {"jsmin",
428 (PyCFunction)rjsmin_jsmin, METH_VARARGS | METH_KEYWORDS,
429 rjsmin_jsmin__doc__},
430
431 {NULL} /* Sentinel */
432 };
433
434 PyDoc_STRVAR(EXT_DOCS_VAR,
435 "C implementation of rjsmin\n\
436 ==========================\n\
437 \n\
438 C implementation of rjsmin.");
439
440
441 EXT_DEFINE(EXT_MODULE_NAME, EXT_METHODS_VAR, EXT_DOCS_VAR);
442
443 EXT_INIT_FUNC {
444 PyObject *m;
445
446 /* Create the module and populate stuff */
447 if (!(m = EXT_CREATE(&EXT_DEFINE_VAR)))
448 EXT_INIT_ERROR(NULL);
449
450 EXT_ADD_UNICODE(m, "__author__", "Andr\xe9 Malo", "latin-1");
451 EXT_ADD_STRING(m, "__docformat__", "restructuredtext en");
452
453 EXT_INIT_RETURN(m);
454 }
455
456 /* ------------------------- END MODULE DEFINITION ------------------------- */
457