/* * Copyright 2011 - 2014 * Andr\xe9 Malo or his licensors, as applicable * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "cext.h" EXT_INIT_FUNC; #define RJSMIN_DULL_BIT (1 << 0) #define RJSMIN_PRE_REGEX_BIT (1 << 1) #define RJSMIN_REGEX_DULL_BIT (1 << 2) #define RJSMIN_REGEX_CC_DULL_BIT (1 << 3) #define RJSMIN_ID_LIT_BIT (1 << 4) #define RJSMIN_ID_LIT_O_BIT (1 << 5) #define RJSMIN_ID_LIT_C_BIT (1 << 6) #define RJSMIN_STRING_DULL_BIT (1 << 7) #define RJSMIN_SPACE_BIT (1 << 8) #ifdef EXT3 typedef Py_UNICODE rchar; #else typedef unsigned char rchar; #endif #define U(c) ((rchar)(c)) #define RJSMIN_IS_DULL(c) ((U(c) > 127) || \ (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_DULL_BIT)) #define RJSMIN_IS_REGEX_DULL(c) ((U(c) > 127) || \ (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_REGEX_DULL_BIT)) #define RJSMIN_IS_REGEX_CC_DULL(c) ((U(c) > 127) || \ (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_REGEX_CC_DULL_BIT)) #define RJSMIN_IS_STRING_DULL(c) ((U(c) > 127) || \ (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_STRING_DULL_BIT)) #define RJSMIN_IS_ID_LITERAL(c) ((U(c) > 127) || \ (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_BIT)) #define RJSMIN_IS_ID_LITERAL_OPEN(c) ((U(c) > 127) || \ (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_O_BIT)) #define RJSMIN_IS_ID_LITERAL_CLOSE(c) ((U(c) > 127) || \ (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_C_BIT)) #define RJSMIN_IS_SPACE(c) ((U(c) <= 127) && \ (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_SPACE_BIT)) #define RJSMIN_IS_PRE_REGEX_1(c) ((U(c) <= 127) && \ (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_PRE_REGEX_BIT)) static const unsigned short rjsmin_charmask[128] = { 396, 396, 396, 396, 396, 396, 396, 396, 396, 396, 2, 396, 396, 2, 396, 396, 396, 396, 396, 396, 396, 396, 396, 396, 396, 396, 396, 396, 396, 396, 396, 396, 396, 175, 76, 141, 253, 141, 143, 76, 175, 205, 141, 237, 143, 237, 141, 136, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 143, 143, 141, 143, 141, 143, 141, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 171, 1, 197, 141, 253, 141, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 175, 143, 207, 141, 253 }; static Py_ssize_t rjsmin(const rchar *source, rchar *target, Py_ssize_t length, int keep_bang_comments) { const rchar *reset, *sentinel = source + length; rchar *tstart = target; rchar c, quote; while (source < sentinel) { c = *source++; if (RJSMIN_IS_DULL(c)) { *target++ = c; continue; } switch (c) { /* String */ case U('\''): case U('"'): reset = source; *target++ = quote = c; while (source < sentinel) { c = *source++; *target++ = c; if (RJSMIN_IS_STRING_DULL(c)) continue; switch (c) { case U('\''): case U('"'): if (c == quote) goto cont; continue; case U('\\'): if (source < sentinel) { c = *source++; *target++ = c; if (c == U('\r') && source < sentinel && *source == U('\n')) *target++ = *source++; } continue; } break; } target -= source - reset; source = reset; continue; /* Comment or Regex or something else entirely */ case U('/'): if (!(source < sentinel)) { *target++ = c; } else { switch (*source) { /* Comment */ case U('*'): case U('/'): goto skip_or_copy_ws; default: if ( target == tstart || RJSMIN_IS_PRE_REGEX_1(*(target - 1)) || ( (target - tstart >= 6) && *(target - 1) == U('n') && *(target - 2) == U('r') && *(target - 3) == U('u') && *(target - 4) == U('t') && *(target - 5) == U('e') && *(target - 6) == U('r') && ( target - tstart == 6 || !RJSMIN_IS_ID_LITERAL(*(target - 7)) ) )) { /* Regex */ reset = source; *target++ = U('/'); while (source < sentinel) { c = *source++; *target++ = c; if (RJSMIN_IS_REGEX_DULL(c)) continue; switch (c) { case U('/'): goto cont; case U('\\'): if (source < sentinel) { c = *source++; *target++ = c; if (c == U('\r') || c == U('\n')) break; } continue; case U('['): while (source < sentinel) { c = *source++; *target++ = c; if (RJSMIN_IS_REGEX_CC_DULL(c)) continue; switch (c) { case U('\\'): if (source < sentinel) { c = *source++; *target++ = c; if (c == U('\r') || c == U('\n')) break; } continue; case U(']'): goto cont_regex; } } break; } break; cont_regex: continue; } target -= source - reset; source = reset; } else { /* Just a slash */ *target++ = c; } continue; } } continue; /* Whitespace */ default: skip_or_copy_ws: quote = U(' '); --source; while (source < sentinel) { c = *source++; if (RJSMIN_IS_SPACE(c)) continue; switch (c) { case U('\r'): case U('\n'): quote = U('\n'); continue; case U('/'): if (source < sentinel) { switch (*source) { case U('*'): reset = source++; /* copy bang comment, if requested */ if ( keep_bang_comments && source < sentinel && *source == U('!')) { *target++ = U('/'); *target++ = U('*'); *target++ = *source++; while (source < sentinel) { c = *source++; *target++ = c; if (c == U('*') && source < sentinel && *source == U('/')) { *target++ = *source++; reset = NULL; break; } } if (!reset) continue; target -= source - reset; source = reset; } /* strip regular comment */ else { while (source < sentinel) { c = *source++; if (c == U('*') && source < sentinel && *source == U('/')) { ++source; reset = NULL; break; } } if (!reset) continue; source = reset; *target++ = U('/'); } goto cont; case U('/'): ++source; while (source < sentinel) { c = *source++; switch (c) { case U('\n'): break; case U('\r'): if (source < sentinel && *source == U('\n')) ++source; break; default: continue; } break; } quote = U('\n'); continue; } } } --source; break; } if ((tstart < target && source < sentinel) && ((quote == U('\n') && RJSMIN_IS_ID_LITERAL_CLOSE(*(target - 1)) && RJSMIN_IS_ID_LITERAL_OPEN(*source)) || (quote == U(' ') && ((RJSMIN_IS_ID_LITERAL(*(target - 1)) && RJSMIN_IS_ID_LITERAL(*source)) || (source < sentinel && ((*(target - 1) == U('+') && *source == U('+')) || (*(target - 1) == U('-') && *source == U('-')))))))) *target++ = quote; } cont: continue; } return (Py_ssize_t)(target - tstart); } PyDoc_STRVAR(rjsmin_jsmin__doc__, "jsmin(script, keep_bang_comments=False)\n\ \n\ Minify javascript based on `jsmin.c by Douglas Crockford`_\\.\n\ \n\ Instead of parsing the stream char by char, it uses a regular\n\ expression approach which minifies the whole script with one big\n\ substitution regex.\n\ \n\ .. _jsmin.c by Douglas Crockford:\n\ http://www.crockford.com/javascript/jsmin.c\n\ \n\ :Note: This is a hand crafted C implementation built on the regex\n\ semantics.\n\ \n\ :Parameters:\n\ `script` : ``str``\n\ Script to minify\n\ \n\ `keep_bang_comments` : ``bool``\n\ Keep comments starting with an exclamation mark? (``/*!...*/``)\n\ \n\ :Return: Minified script\n\ :Rtype: ``str``"); static PyObject * rjsmin_jsmin(PyObject *self, PyObject *args, PyObject *kwds) { PyObject *script, *keep_bang_comments_ = NULL, *result; static char *kwlist[] = {"script", "keep_bang_comments", NULL}; Py_ssize_t slength, length; int keep_bang_comments; #ifdef EXT2 int uni; #define UOBJ "O" #endif #ifdef EXT3 #define UOBJ "U" #endif if (!PyArg_ParseTupleAndKeywords(args, kwds, UOBJ "|O", kwlist, &script, &keep_bang_comments_)) return NULL; if (!keep_bang_comments_) keep_bang_comments = 0; else { keep_bang_comments = PyObject_IsTrue(keep_bang_comments_); if (keep_bang_comments == -1) return NULL; } #ifdef EXT2 if (PyUnicode_Check(script)) { if (!(script = PyUnicode_AsUTF8String(script))) return NULL; uni = 1; } else { if (!(script = PyObject_Str(script))) return NULL; uni = 0; } #endif #ifdef EXT3 Py_INCREF(script); #define PyString_GET_SIZE PyUnicode_GET_SIZE #define PyString_AS_STRING PyUnicode_AS_UNICODE #define _PyString_Resize PyUnicode_Resize #define PyString_FromStringAndSize PyUnicode_FromUnicode #endif slength = PyString_GET_SIZE(script); if (!(result = PyString_FromStringAndSize(NULL, slength))) { Py_DECREF(script); return NULL; } Py_BEGIN_ALLOW_THREADS length = rjsmin((rchar *)PyString_AS_STRING(script), (rchar *)PyString_AS_STRING(result), slength, keep_bang_comments); Py_END_ALLOW_THREADS Py_DECREF(script); if (length < 0) { Py_DECREF(result); return NULL; } if (length != slength && _PyString_Resize(&result, length) == -1) return NULL; #ifdef EXT2 if (uni) { script = PyUnicode_DecodeUTF8(PyString_AS_STRING(result), PyString_GET_SIZE(result), "strict"); Py_DECREF(result); if (!script) return NULL; result = script; } #endif return result; } /* ------------------------ BEGIN MODULE DEFINITION ------------------------ */ EXT_METHODS = { {"jsmin", (PyCFunction)rjsmin_jsmin, METH_VARARGS | METH_KEYWORDS, rjsmin_jsmin__doc__}, {NULL} /* Sentinel */ }; PyDoc_STRVAR(EXT_DOCS_VAR, "C implementation of rjsmin\n\ ==========================\n\ \n\ C implementation of rjsmin."); EXT_DEFINE(EXT_MODULE_NAME, EXT_METHODS_VAR, EXT_DOCS_VAR); EXT_INIT_FUNC { PyObject *m; /* Create the module and populate stuff */ if (!(m = EXT_CREATE(&EXT_DEFINE_VAR))) EXT_INIT_ERROR(NULL); EXT_ADD_UNICODE(m, "__author__", "Andr\xe9 Malo", "latin-1"); EXT_ADD_STRING(m, "__docformat__", "restructuredtext en"); EXT_INIT_RETURN(m); } /* ------------------------- END MODULE DEFINITION ------------------------- */