1 /*
2  * Copyright 2011 - 2014
3  * Andr\xe9 Malo or his licensors, as applicable
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 #include "cext.h"
19 EXT_INIT_FUNC;
20 
21 #ifdef EXT3
22 typedef Py_UNICODE rchar;
23 #else
24 typedef unsigned char rchar;
25 #endif
26 #define U(c) ((rchar)(c))
27 
28 typedef struct {
29     const rchar *start;
30     const rchar *sentinel;
31     const rchar *tsentinel;
32     Py_ssize_t at_group;
33     int in_macie5;
34     int in_rule;
35     int keep_bang_comments;
36 } rcssmin_ctx_t;
37 
38 typedef enum {
39     NEED_SPACE_MAYBE = 0,
40     NEED_SPACE_NEVER
41 } need_space_flag;
42 
43 
44 #define RCSSMIN_DULL_BIT         (1 << 0)
45 #define RCSSMIN_HEX_BIT          (1 << 1)
46 #define RCSSMIN_ESC_BIT          (1 << 2)
47 #define RCSSMIN_SPACE_BIT        (1 << 3)
48 #define RCSSMIN_STRING_DULL_BIT  (1 << 4)
49 #define RCSSMIN_NMCHAR_BIT       (1 << 5)
50 #define RCSSMIN_URI_DULL_BIT     (1 << 6)
51 #define RCSSMIN_PRE_CHAR_BIT     (1 << 7)
52 #define RCSSMIN_POST_CHAR_BIT    (1 << 8)
53 
54 static const unsigned short rcssmin_charmask[128] = {
55      21,  21,  21,  21,  21,  21,  21,  21,
56      21,  28,   8,  21,   8,   8,  21,  21,
57      21,  21,  21,  21,  21,  21,  21,  21,
58      21,  21,  21,  21,  21,  21,  21,  21,
59      28, 469,   4,  85,  85,  85,  85,   4,
60     149, 277,  85, 469, 469, 117,  85,  84,
61     115, 115, 115, 115, 115, 115, 115, 115,
62     115, 115, 468, 340,  85, 469, 468,  85,
63      84, 115, 115, 115, 115, 115, 115, 117,
64     117, 117, 117, 117, 117, 117, 117, 117,
65     117, 117, 117, 117, 117, 117, 117, 117,
66     117, 117, 117, 213,   4, 341,  85, 117,
67      85, 115, 115, 115, 115, 115, 115, 117,
68     117, 117, 117, 117, 117, 117, 117, 117,
69     117, 117, 117, 117, 117, 116, 117, 117,
70     117, 117, 117, 468,  85, 468,  85,  21
71 };
72 
73 #define RCSSMIN_IS_DULL(c) ((U(c) > 127) || \
74     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_DULL_BIT))
75 
76 #define RCSSMIN_IS_HEX(c) ((U(c) <= 127) && \
77     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_HEX_BIT))
78 
79 #define RCSSMIN_IS_ESC(c) ((U(c) > 127) || \
80     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_ESC_BIT))
81 
82 #define RCSSMIN_IS_SPACE(c) ((U(c) <= 127) && \
83     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_SPACE_BIT))
84 
85 #define RCSSMIN_IS_STRING_DULL(c) ((U(c) > 127) || \
86     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_STRING_DULL_BIT))
87 
88 #define RCSSMIN_IS_NMCHAR(c) ((U(c) > 127) || \
89     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_NMCHAR_BIT))
90 
91 #define RCSSMIN_IS_URI_DULL(c) ((U(c) > 127) || \
92     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_URI_DULL_BIT))
93 
94 #define RCSSMIN_IS_PRE_CHAR(c) ((U(c) <= 127) && \
95     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_PRE_CHAR_BIT))
96 
97 #define RCSSMIN_IS_POST_CHAR(c) ((U(c) <= 127) && \
98     (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_POST_CHAR_BIT))
99 
100 
101 static const rchar pattern_url[] = {
102     /*U('u'),*/ U('r'), U('l'), U('(')
103 };
104 
105 static const rchar pattern_ie7[] = {
106     /*U('>'),*/ U('/'), U('*'), U('*'), U('/')
107 };
108 
109 static const rchar pattern_media[] = {
110     U('m'), U('e'), U('d'), U('i'), U('a'),
111     U('M'), U('E'), U('D'), U('I'), U('A')
112 };
113 
114 static const rchar pattern_document[] = {
115     U('d'), U('o'), U('c'), U('u'), U('m'), U('e'), U('n'), U('t'),
116     U('D'), U('O'), U('C'), U('U'), U('M'), U('E'), U('N'), U('T')
117 };
118 
119 static const rchar pattern_supports[] = {
120     U('s'), U('u'), U('p'), U('p'), U('o'), U('r'), U('t'), U('s'),
121     U('S'), U('U'), U('P'), U('P'), U('O'), U('R'), U('T'), U('S')
122 };
123 
124 static const rchar pattern_keyframes[] = {
125     U('k'), U('e'), U('y'), U('f'), U('r'), U('a'), U('m'), U('e'), U('s'),
126     U('K'), U('E'), U('Y'), U('F'), U('R'), U('A'), U('M'), U('E'), U('S')
127 };
128 
129 static const rchar pattern_vendor_o[] = {
130     U('-'), U('o'), U('-'),
131     U('-'), U('O'), U('-')
132 };
133 
134 static const rchar pattern_vendor_moz[] = {
135     U('-'), U('m'), U('o'), U('z'), U('-'),
136     U('-'), U('M'), U('O'), U('Z'), U('-')
137 };
138 
139 static const rchar pattern_vendor_webkit[] = {
140     U('-'), U('w'), U('e'), U('b'), U('k'), U('i'), U('t'), U('-'),
141     U('-'), U('W'), U('E'), U('B'), U('K'), U('I'), U('T'), U('-')
142 };
143 
144 static const rchar pattern_vendor_ms[] = {
145     U('-'), U('m'), U('s'), U('-'),
146     U('-'), U('M'), U('S'), U('-')
147 };
148 
149 static const rchar pattern_first[] = {
150     U('f'), U('i'), U('r'), U('s'), U('t'), U('-'), U('l'),
151     U('F'), U('I'), U('R'), U('S'), U('T'), U('-'), U('L')
152 };
153 
154 static const rchar pattern_line[] = {
155     U('i'), U('n'), U('e'),
156     U('I'), U('N'), U('E'),
157 };
158 
159 static const rchar pattern_letter[] = {
160     U('e'), U('t'), U('t'), U('e'), U('r'),
161     U('E'), U('T'), U('T'), U('E'), U('R')
162 };
163 
164 static const rchar pattern_macie5_init[] = {
165     U('/'), U('*'), U('\\'), U('*'), U('/')
166 };
167 
168 static const rchar pattern_macie5_exit[] = {
169     U('/'), U('*'), U('*'), U('/')
170 };
171 
172 /*
173  * Match a pattern (and copy immediately to target)
174  */
175 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
176 #pragma GCC diagnostic push
177 #pragma GCC diagnostic ignored "-Wstrict-overflow"
178 #endif
179 static int
copy_match(const rchar * pattern,const rchar * psentinel,const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)180 copy_match(const rchar *pattern, const rchar *psentinel,
181            const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
182 {
183     const rchar *source = *source_;
184     rchar *target = *target_;
185     rchar c;
186 
187     while (pattern < psentinel
188            && source < ctx->sentinel && target < ctx->tsentinel
189            && ((c = *source++) == *pattern++))
190         *target++ = c;
191 
192     *source_ = source;
193     *target_ = target;
194 
195     return (pattern == psentinel);
196 }
197 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
198 #pragma GCC diagnostic pop
199 #endif
200 
201 #define MATCH(PAT, source, target, ctx) (                              \
202     copy_match(pattern_##PAT,                                          \
203                pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar),  \
204                source, target, ctx)                                    \
205 )
206 
207 
208 /*
209  * Match a pattern (and copy immediately to target) - CI version
210  */
211 static int
copy_imatch(const rchar * pattern,const rchar * psentinel,const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)212 copy_imatch(const rchar *pattern, const rchar *psentinel,
213             const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
214 {
215     const rchar *source = *source_, *pstart = pattern;
216     rchar *target = *target_;
217     rchar c;
218 
219     while (pattern < psentinel
220            && source < ctx->sentinel && target < ctx->tsentinel
221            && ((c = *source++) == *pattern
222                || c == pstart[(pattern - pstart) + (psentinel - pstart)])) {
223         ++pattern;
224         *target++ = c;
225     }
226 
227     *source_ = source;
228     *target_ = target;
229 
230     return (pattern == psentinel);
231 }
232 
233 #define IMATCH(PAT, source, target, ctx) (                                  \
234     copy_imatch(pattern_##PAT,                                              \
235                 pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar) / 2,  \
236                 source, target, ctx)                                        \
237 )
238 
239 
240 /*
241  * Copy characters
242  */
243 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
244 #pragma GCC diagnostic push
245 #pragma GCC diagnostic ignored "-Wstrict-overflow"
246 #endif
247 static int
copy(const rchar * source,const rchar * sentinel,rchar ** target_,rcssmin_ctx_t * ctx)248 copy(const rchar *source, const rchar *sentinel, rchar **target_,
249      rcssmin_ctx_t *ctx)
250 {
251     rchar *target = *target_;
252 
253     while (source < sentinel && target < ctx->tsentinel)
254         *target++ = *source++;
255 
256     *target_ = target;
257 
258     return (source == sentinel);
259 }
260 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
261 #pragma GCC diagnostic pop
262 #endif
263 
264 #define COPY_PAT(PAT, target, ctx) (                             \
265     copy(pattern_##PAT,                                          \
266          pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar),  \
267          target, ctx)                                            \
268 )
269 
270 
271 /*
272  * The ABORT macros work with known local variables!
273  */
274 #define ABORT_(RET) do {                                         \
275     if (source < ctx->sentinel && !(target < ctx->tsentinel)) {  \
276         *source_ = source;                                       \
277         *target_ = target;                                       \
278     }                                                            \
279     return RET;                                                  \
280 } while(0)
281 
282 
283 #define CRAPPY_C90_COMPATIBLE_EMPTY
284 #define ABORT ABORT_(CRAPPY_C90_COMPATIBLE_EMPTY)
285 #define RABORT(RET) ABORT_((RET))
286 
287 
288 /*
289  * Copy escape
290  */
291 static void
copy_escape(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)292 copy_escape(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
293 {
294     const rchar *source = *source_, *hsentinel;
295     rchar *target = *target_;
296     rchar c;
297 
298     *target++ = U('\\');
299     *target_ = target;
300 
301     if (source < ctx->sentinel && target < ctx->tsentinel) {
302         c = *source++;
303         if (RCSSMIN_IS_ESC(c)) {
304             *target++ = c;
305         }
306         else if (RCSSMIN_IS_HEX(c)) {
307             *target++ = c;
308 
309             /* 6 hex chars max, one we got already */
310             if (ctx->sentinel - source > 5)
311                 hsentinel = source + 5;
312             else
313                 hsentinel = ctx->sentinel;
314 
315             while (source < hsentinel && target < ctx->tsentinel
316                    && (c = *source, RCSSMIN_IS_HEX(c))) {
317                 ++source;
318                 *target++ = c;
319             }
320 
321             /* One optional space after */
322             if (source < ctx->sentinel && target < ctx->tsentinel) {
323                 if (source == hsentinel)
324                     c = *source;
325                 if (RCSSMIN_IS_SPACE(c)) {
326                     ++source;
327                     *target++ = U(' ');
328                     if (c == U('\r') && source < ctx->sentinel
329                         && *source == U('\n'))
330                         ++source;
331                 }
332             }
333         }
334     }
335 
336     *target_ = target;
337     *source_ = source;
338 }
339 
340 
341 /*
342  * Copy string
343  */
344 static void
copy_string(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)345 copy_string(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
346 {
347     const rchar *source = *source_;
348     rchar *target = *target_;
349     rchar c, quote = source[-1];
350 
351     *target++ = quote;
352     *target_ = target;
353 
354     while (source < ctx->sentinel && target < ctx->tsentinel) {
355         c = *target++ = *source++;
356         if (RCSSMIN_IS_STRING_DULL(c))
357             continue;
358 
359         switch (c) {
360         case U('\''): case U('"'):
361             if (c == quote) {
362                 *target_ = target;
363                 *source_ = source;
364                 return;
365             }
366             continue;
367 
368         case U('\\'):
369             if (source < ctx->sentinel && target < ctx->tsentinel) {
370                 c = *source++;
371                 switch (c) {
372                 case U('\r'):
373                     if (source < ctx->sentinel && *source == U('\n'))
374                         ++source;
375                     /* fall through */
376 
377                 case U('\n'): case U('\f'):
378                     --target;
379                     break;
380 
381                 default:
382                     *target++ = c;
383                 }
384             }
385             continue;
386         }
387         break; /* forbidden characters */
388     }
389 
390     ABORT;
391 }
392 
393 
394 /*
395  * Copy URI string
396  */
397 static int
copy_uri_string(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)398 copy_uri_string(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
399 {
400     const rchar *source = *source_;
401     rchar *target = *target_;
402     rchar c, quote = source[-1];
403 
404     *target++ = quote;
405     *target_ = target;
406 
407     while (source < ctx->sentinel && target < ctx->tsentinel) {
408         c = *source++;
409         if (RCSSMIN_IS_SPACE(c))
410             continue;
411         *target++ = c;
412         if (RCSSMIN_IS_STRING_DULL(c))
413             continue;
414 
415         switch (c) {
416         case U('\''): case U('"'):
417             if (c == quote) {
418                 *target_ = target;
419                 *source_ = source;
420                 return 0;
421             }
422             continue;
423 
424         case U('\\'):
425             if (source < ctx->sentinel && target < ctx->tsentinel) {
426                 c = *source;
427                 switch (c) {
428                 case U('\r'):
429                     if ((source + 1) < ctx->sentinel && source[1] == U('\n'))
430                         ++source;
431                     /* fall through */
432 
433                 case U('\n'): case U('\f'):
434                     --target;
435                     ++source;
436                     break;
437 
438                 default:
439                     --target;
440                     copy_escape(&source, &target, ctx);
441                 }
442             }
443             continue;
444         }
445 
446         break; /* forbidden characters */
447     }
448 
449     RABORT(-1);
450 }
451 
452 
453 /*
454  * Copy URI (unquoted)
455  */
456 static int
copy_uri_unquoted(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)457 copy_uri_unquoted(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
458 {
459     const rchar *source = *source_;
460     rchar *target = *target_;
461     rchar c;
462 
463     *target++ = source[-1];
464     *target_ = target;
465 
466     while (source < ctx->sentinel && target < ctx->tsentinel) {
467         c = *source++;
468         if (RCSSMIN_IS_SPACE(c))
469             continue;
470         *target++ = c;
471         if (RCSSMIN_IS_URI_DULL(c))
472             continue;
473 
474         switch (c) {
475 
476         case U(')'):
477             *target_ = target - 1;
478             *source_ = source - 1;
479             return 0;
480 
481         case U('\\'):
482             if (source < ctx->sentinel && target < ctx->tsentinel) {
483                 c = *source;
484                 switch (c) {
485                 case U('\r'):
486                     if ((source + 1) < ctx->sentinel && source[1] == U('\n'))
487                         ++source;
488                     /* fall through */
489 
490                 case U('\n'): case U('\f'):
491                     --target;
492                     ++source;
493                     break;
494 
495                 default:
496                     --target;
497                     copy_escape(&source, &target, ctx);
498                 }
499             }
500             continue;
501         }
502 
503         break; /* forbidden characters */
504     }
505 
506     RABORT(-1);
507 }
508 
509 
510 /*
511  * Copy url
512  */
513 static void
copy_url(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)514 copy_url(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
515 {
516     const rchar *source = *source_;
517     rchar *target = *target_;
518     rchar c;
519 
520     *target++ = U('u');
521     *target_ = target;
522 
523     /* Must not be inside an identifier */
524     if ((source != ctx->start + 1) && RCSSMIN_IS_NMCHAR(source[-2]))
525         return;
526 
527     if (!MATCH(url, &source, &target, ctx)
528         || !(source < ctx->sentinel && target < ctx->tsentinel))
529         ABORT;
530 
531     while (source < ctx->sentinel && RCSSMIN_IS_SPACE(*source))
532         ++source;
533 
534     if (!(source < ctx->sentinel))
535         ABORT;
536 
537     c = *source++;
538     switch (c) {
539     case U('"'): case U('\''):
540         if (copy_uri_string(&source, &target, ctx) == -1)
541             ABORT;
542 
543         while (source < ctx->sentinel && RCSSMIN_IS_SPACE(*source))
544             ++source;
545         break;
546 
547     default:
548         if (copy_uri_unquoted(&source, &target, ctx) == -1)
549             ABORT;
550     }
551 
552     if (!(source < ctx->sentinel && target < ctx->tsentinel))
553         ABORT;
554 
555     if ((*target++ = *source++) != U(')'))
556         ABORT;
557 
558     *target_ = target;
559     *source_ = source;
560 }
561 
562 
563 /*
564  * Copy @-group
565  */
566 static void
copy_at_group(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)567 copy_at_group(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
568 {
569     const rchar *source = *source_;
570     rchar *target = *target_;
571 
572     *target++ = U('@');
573     *target_ = target;
574 
575 #define REMATCH(what) ( \
576     source = *source_, \
577     target = *target_, \
578     IMATCH(what, &source, &target, ctx) \
579 )
580 #define CMATCH(what) IMATCH(what, &source, &target, ctx)
581 
582     if ((  !CMATCH(media)
583         && !REMATCH(supports)
584         && !REMATCH(document)
585         && !REMATCH(keyframes)
586         && !(REMATCH(vendor_webkit) && CMATCH(keyframes))
587         && !(REMATCH(vendor_moz) && CMATCH(keyframes))
588         && !(REMATCH(vendor_o) && CMATCH(keyframes))
589         && !(REMATCH(vendor_ms) && CMATCH(keyframes)))
590         || !(source < ctx->sentinel && target < ctx->tsentinel)
591         || RCSSMIN_IS_NMCHAR(*source))
592         ABORT;
593 
594 #undef CMATCH
595 #undef REMATCH
596 
597     ++ctx->at_group;
598 
599     *target_ = target;
600     *source_ = source;
601 }
602 
603 
604 /*
605  * Skip space
606  */
607 static const rchar *
skip_space(const rchar * source,rcssmin_ctx_t * ctx)608 skip_space(const rchar *source, rcssmin_ctx_t *ctx)
609 {
610     const rchar *begin = source;
611     int res;
612     rchar c;
613 
614     while (source < ctx->sentinel) {
615         c = *source;
616         if (RCSSMIN_IS_SPACE(c)) {
617             ++source;
618             continue;
619         }
620         else if (c == U('/')) {
621             ++source;
622             if (!(source < ctx->sentinel && *source == U('*'))) {
623                 --source;
624                 break;
625             }
626             ++source;
627             res = 0;
628             while (source < ctx->sentinel) {
629                 c = *source++;
630                 if (c != U('*'))
631                     continue;
632                 if (!(source < ctx->sentinel))
633                     return begin;
634                 if (*source != U('/'))
635                     continue;
636 
637                 /* Comment complete */
638                 ++source;
639                 res = 1;
640                 break;
641             }
642             if (!res)
643                 return begin;
644 
645             continue;
646         }
647 
648         break;
649     }
650 
651     return source;
652 }
653 
654 
655 /*
656  * Copy space
657  */
658 static void
copy_space(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx,need_space_flag need_space)659 copy_space(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx,
660            need_space_flag need_space)
661 {
662     const rchar *source = *source_, *end, *comment;
663     rchar *target = *target_;
664     int res;
665     rchar c;
666 
667     --source;
668     if (need_space == NEED_SPACE_MAYBE
669         && source > ctx->start
670         && !RCSSMIN_IS_PRE_CHAR(source[-1])
671         && (end = skip_space(source, ctx)) < ctx->sentinel
672         && (!RCSSMIN_IS_POST_CHAR(*end)
673             || (*end == U(':') && !ctx->in_rule && !ctx->at_group))) {
674 
675         if (!(target < ctx->tsentinel))
676             ABORT;
677         *target++ = U(' ');
678     }
679 
680     while (source < ctx->sentinel) {
681         switch (c = *source) {
682 
683         /* comment */
684         case U('/'):
685             comment = source++;
686             if (!((source < ctx->sentinel && *source == U('*')))) {
687                 --source;
688                 break;
689             }
690             ++source;
691             res = 0;
692             while (source < ctx->sentinel) {
693                 c = *source++;
694                 if (c != U('*'))
695                     continue;
696                 if (!(source < ctx->sentinel))
697                     ABORT;
698                 if (*source != U('/'))
699                     continue;
700 
701                 /* Comment complete */
702                 ++source;
703                 res = 1;
704 
705                 if (ctx->keep_bang_comments && comment[2] == U('!')) {
706                     ctx->in_macie5 = (source[-3] == U('\\'));
707                     if (!copy(comment, source, &target, ctx))
708                         ABORT;
709                 }
710                 else if (source[-3] == U('\\')) {
711                     if (!ctx->in_macie5) {
712                         if (!COPY_PAT(macie5_init, &target, ctx))
713                             ABORT;
714                     }
715                     ctx->in_macie5 = 1;
716                 }
717                 else if (ctx->in_macie5) {
718                     if (!COPY_PAT(macie5_exit, &target, ctx))
719                         ABORT;
720                     ctx->in_macie5 = 0;
721                 }
722                 /* else don't copy anything */
723                 break;
724             }
725             if (!res)
726                 ABORT;
727             continue;
728 
729         /* space */
730         case U(' '): case U('\t'): case U('\r'): case U('\n'): case U('\f'):
731             ++source;
732             continue;
733         }
734 
735         break;
736     }
737 
738     *source_ = source;
739     *target_ = target;
740 }
741 
742 
743 /*
744  * Copy space if comment
745  */
746 static int
copy_space_comment(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx,need_space_flag need_space)747 copy_space_comment(const rchar **source_, rchar **target_,
748                    rcssmin_ctx_t *ctx, need_space_flag need_space)
749 {
750     const rchar *source = *source_;
751     rchar *target = *target_;
752 
753     if (source < ctx->sentinel && *source == U('*')) {
754         copy_space(source_, target_, ctx, need_space);
755         if (*source_ > source)
756             return 0;
757     }
758     if (!(target < ctx->tsentinel))
759         RABORT(-1);
760 
761     *target++ = source[-1];
762 
763     /* *source_ = source; <-- unchanged */
764     *target_ = target;
765 
766     return -1;
767 }
768 
769 
770 /*
771  * Copy space if exists
772  */
773 static int
copy_space_optional(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)774 copy_space_optional(const rchar **source_, rchar **target_,
775                     rcssmin_ctx_t *ctx)
776 {
777     const rchar *source = *source_;
778 
779     if (!(source < ctx->sentinel))
780         return -1;
781 
782     if (*source == U('/')) {
783         *source_ = source + 1;
784         return copy_space_comment(source_, target_, ctx, NEED_SPACE_NEVER);
785     }
786     else if (RCSSMIN_IS_SPACE(*source)) {
787         *source_ = source + 1;
788         copy_space(source_, target_, ctx, NEED_SPACE_NEVER);
789         return 0;
790     }
791 
792     return -1;
793 }
794 
795 
796 /*
797  * Copy :first-line|letter
798  */
799 static void
copy_first(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)800 copy_first(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
801 {
802     const rchar *source = *source_, *next, *source_fork;
803     rchar *target = *target_, *target_fork;
804 
805     *target++ = U(':');
806     *target_ = target;
807 
808     if (!IMATCH(first, &source, &target, ctx)
809         || !(source < ctx->sentinel && target < ctx->tsentinel))
810         ABORT;
811 
812     source_fork = source;
813     target_fork = target;
814 
815     if (!IMATCH(line, &source, &target, ctx)) {
816         source = source_fork;
817         target = target_fork;
818 
819         if (!IMATCH(letter, &source, &target, ctx)
820             || !(source < ctx->sentinel && target < ctx->tsentinel))
821             ABORT;
822     }
823 
824     next = skip_space(source, ctx);
825     if (!(next < ctx->sentinel && target < ctx->tsentinel
826         && (*next == U('{') || *next == U(','))))
827         ABORT;
828 
829     *target++ = U(' ');
830     *target_ = target;
831     *source_ = source;
832     (void)copy_space_optional(source_, target_, ctx);
833 }
834 
835 
836 /*
837  * Copy IE7 hack
838  */
839 static void
copy_ie7hack(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)840 copy_ie7hack(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
841 {
842     const rchar *source = *source_;
843     rchar *target = *target_;
844 
845     *target++ = U('>');
846     *target_ = target;
847 
848     if (ctx->in_rule || ctx->at_group)
849         return; /* abort */
850 
851     if (!MATCH(ie7, &source, &target, ctx))
852         ABORT;
853 
854     ctx->in_macie5 = 0;
855 
856     *target_ = target;
857     *source_ = source;
858 
859     (void)copy_space_optional(source_, target_, ctx);
860 }
861 
862 
863 /*
864  * Copy semicolon; miss out duplicates or even this one (before '}')
865  */
866 static void
copy_semicolon(const rchar ** source_,rchar ** target_,rcssmin_ctx_t * ctx)867 copy_semicolon(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx)
868 {
869     const rchar *source = *source_, *begin, *end;
870     rchar *target = *target_;
871 
872     begin = source;
873     while (source < ctx->sentinel) {
874         end = skip_space(source, ctx);
875         if (!(end < ctx->sentinel)) {
876             if (!(target < ctx->tsentinel))
877                 ABORT;
878             *target++ = U(';');
879             break;
880         }
881         switch (*end) {
882         case U(';'):
883             source = end + 1;
884             continue;
885 
886         case U('}'):
887             if (ctx->in_rule)
888                 break;
889 
890             /* fall through */
891         default:
892             if (!(target < ctx->tsentinel))
893                 ABORT;
894             *target++ = U(';');
895             break;
896         }
897 
898         break;
899     }
900 
901     source = begin;
902     *target_ = target;
903     while (source < ctx->sentinel) {
904         if (*source == U(';')) {
905             ++source;
906             continue;
907         }
908 
909         if (copy_space_optional(&source, target_, ctx) == 0)
910             continue;
911 
912         break;
913     }
914 
915     *source_ = source;
916 }
917 
918 
919 /*
920  * Main function
921  *
922  * The return value determines the result length (kept in the target buffer).
923  * However, if the target buffer is too small, the return value is greater
924  * than tlength. The difference to tlength is the number of unconsumed source
925  * characters at the time the buffer was full. In this case you should resize
926  * the target buffer to the return value and call rcssmin again. Repeat as
927  * often as needed.
928  */
929 static Py_ssize_t
rcssmin(const rchar * source,rchar * target,Py_ssize_t slength,Py_ssize_t tlength,int keep_bang_comments)930 rcssmin(const rchar *source, rchar *target, Py_ssize_t slength,
931         Py_ssize_t tlength, int keep_bang_comments)
932 {
933     rcssmin_ctx_t ctx_, *ctx = &ctx_;
934     const rchar *tstart = target;
935     rchar c;
936 
937     ctx->start = source;
938     ctx->sentinel = source + slength;
939     ctx->tsentinel = target + tlength;
940     ctx->at_group = 0;
941     ctx->in_macie5 = 0;
942     ctx->in_rule = 0;
943     ctx->keep_bang_comments = keep_bang_comments;
944 
945     while (source < ctx->sentinel && target < ctx->tsentinel) {
946         c = *source++;
947         if (RCSSMIN_IS_DULL(c)) {
948             *target++ = c;
949             continue;
950         }
951         else if (RCSSMIN_IS_SPACE(c)) {
952             copy_space(&source, &target, ctx, NEED_SPACE_MAYBE);
953             continue;
954         }
955 
956         switch (c) {
957 
958         /* Escape */
959         case U('\\'):
960             copy_escape(&source, &target, ctx);
961             continue;
962 
963         /* String */
964         case U('"'): case U('\''):
965             copy_string(&source, &target, ctx);
966             continue;
967 
968         /* URL */
969         case U('u'):
970             copy_url(&source, &target, ctx);
971             continue;
972 
973         /* IE7hack */
974         case U('>'):
975             copy_ie7hack(&source, &target, ctx);
976             continue;
977 
978         /* @-group */
979         case U('@'):
980             copy_at_group(&source, &target, ctx);
981             continue;
982 
983         /* ; */
984         case U(';'):
985             copy_semicolon(&source, &target, ctx);
986             continue;
987 
988         /* :first-line|letter followed by [{,] */
989         /* (apparently needed for IE6) */
990         case U(':'):
991             copy_first(&source, &target, ctx);
992             continue;
993 
994         /* { */
995         case U('{'):
996             if (ctx->at_group)
997                 --ctx->at_group;
998             else
999                 ++ctx->in_rule;
1000             *target++ = c;
1001             continue;
1002 
1003         /* } */
1004         case U('}'):
1005             if (ctx->in_rule)
1006                 --ctx->in_rule;
1007             *target++ = c;
1008             continue;
1009 
1010         /* space starting with comment */
1011         case U('/'):
1012             (void)copy_space_comment(&source, &target, ctx, NEED_SPACE_MAYBE);
1013             continue;
1014 
1015         /* Fallback: copy character. Better safe than sorry. Should not be
1016          * reached, though */
1017         default:
1018             *target++ = c;
1019             continue;
1020         }
1021     }
1022 
1023     return
1024         (Py_ssize_t)(target - tstart) + (Py_ssize_t)(ctx->sentinel - source);
1025 }
1026 
1027 
1028 PyDoc_STRVAR(rcssmin_cssmin__doc__,
1029 "cssmin(style, keep_bang_comments=False)\n\
1030 \n\
1031 Minify CSS.\n\
1032 \n\
1033 :Note: This is a hand crafted C implementation built on the regex\n\
1034        semantics.\n\
1035 \n\
1036 :Parameters:\n\
1037   `style` : ``str``\n\
1038     CSS to minify\n\
1039 \n\
1040 :Return: Minified style\n\
1041 :Rtype: ``str``");
1042 
1043 static PyObject *
rcssmin_cssmin(PyObject * self,PyObject * args,PyObject * kwds)1044 rcssmin_cssmin(PyObject *self, PyObject *args, PyObject *kwds)
1045 {
1046     PyObject *style, *keep_bang_comments_ = NULL, *result;
1047     static char *kwlist[] = {"style", "keep_bang_comments", NULL};
1048     Py_ssize_t rlength, slength, length;
1049     int keep_bang_comments;
1050 #ifdef EXT2
1051     int uni;
1052 #define UOBJ "O"
1053 #endif
1054 #ifdef EXT3
1055 #define UOBJ "U"
1056 #endif
1057 
1058     if (!PyArg_ParseTupleAndKeywords(args, kwds, UOBJ "|O", kwlist,
1059                                      &style, &keep_bang_comments_))
1060         return NULL;
1061 
1062     if (!keep_bang_comments_)
1063         keep_bang_comments = 0;
1064     else {
1065         keep_bang_comments = PyObject_IsTrue(keep_bang_comments_);
1066         if (keep_bang_comments == -1)
1067             return NULL;
1068     }
1069 
1070 #ifdef EXT2
1071     if (PyUnicode_Check(style)) {
1072         if (!(style = PyUnicode_AsUTF8String(style)))
1073             return NULL;
1074         uni = 1;
1075     }
1076     else {
1077         if (!(style = PyObject_Str(style)))
1078             return NULL;
1079         uni = 0;
1080     }
1081 #endif
1082 
1083 #ifdef EXT3
1084     Py_INCREF(style);
1085 #define PyString_GET_SIZE PyUnicode_GET_SIZE
1086 #define PyString_AS_STRING PyUnicode_AS_UNICODE
1087 #define _PyString_Resize PyUnicode_Resize
1088 #define PyString_FromStringAndSize PyUnicode_FromUnicode
1089 #endif
1090 
1091     rlength = slength = PyString_GET_SIZE(style);
1092 
1093 again:
1094     if (!(result = PyString_FromStringAndSize(NULL, rlength))) {
1095         Py_DECREF(style);
1096         return NULL;
1097     }
1098     Py_BEGIN_ALLOW_THREADS
1099     length = rcssmin((rchar *)PyString_AS_STRING(style),
1100                      (rchar *)PyString_AS_STRING(result),
1101                      slength, rlength, keep_bang_comments);
1102     Py_END_ALLOW_THREADS
1103 
1104     if (length > rlength) {
1105         Py_DECREF(result);
1106         rlength = length;
1107         goto again;
1108     }
1109 
1110     Py_DECREF(style);
1111     if (length < 0) {
1112         Py_DECREF(result);
1113         return NULL;
1114     }
1115     if (length != rlength && _PyString_Resize(&result, length) == -1)
1116         return NULL;
1117 
1118 #ifdef EXT2
1119     if (uni) {
1120         style = PyUnicode_DecodeUTF8(PyString_AS_STRING(result),
1121                                      PyString_GET_SIZE(result), "strict");
1122         Py_DECREF(result);
1123         if (!style)
1124             return NULL;
1125         result = style;
1126     }
1127 #endif
1128     return result;
1129 }
1130 
1131 /* ------------------------ BEGIN MODULE DEFINITION ------------------------ */
1132 
1133 EXT_METHODS = {
1134     {"cssmin",
1135         (PyCFunction)rcssmin_cssmin, METH_VARARGS | METH_KEYWORDS,
1136         rcssmin_cssmin__doc__},
1137 
1138     {NULL}  /* Sentinel */
1139 };
1140 
1141 PyDoc_STRVAR(EXT_DOCS_VAR,
1142 "C implementation of rcssmin\n\
1143 ===========================\n\
1144 \n\
1145 C implementation of rcssmin.");
1146 
1147 
1148 EXT_DEFINE(EXT_MODULE_NAME, EXT_METHODS_VAR, EXT_DOCS_VAR);
1149 
1150 EXT_INIT_FUNC {
1151     PyObject *m;
1152 
1153     /* Create the module and populate stuff */
1154     if (!(m = EXT_CREATE(&EXT_DEFINE_VAR)))
1155         EXT_INIT_ERROR(NULL);
1156 
1157     EXT_ADD_UNICODE(m, "__author__", "Andr\xe9 Malo", "latin-1");
1158     EXT_ADD_STRING(m, "__docformat__", "restructuredtext en");
1159 
1160     EXT_INIT_RETURN(m);
1161 }
1162 
1163 /* ------------------------- END MODULE DEFINITION ------------------------- */
1164