1# 2# Secret Labs' Regular Expression Engine 3# 4# various symbols used by the regular expression engine. 5# run this script to update the _sre include files! 6# 7# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. 8# 9# See the sre.py file for information on usage and redistribution. 10# 11 12"""Internal support module for sre""" 13 14# update when constants are added or removed 15 16MAGIC = 20031017 17 18# max code word in this release 19 20MAXREPEAT = 65535 21 22# SRE standard exception (access as sre.error) 23# should this really be here? 24 25class error(Exception): 26 pass 27 28# operators 29 30FAILURE = "failure" 31SUCCESS = "success" 32 33ANY = "any" 34ANY_ALL = "any_all" 35ASSERT = "assert" 36ASSERT_NOT = "assert_not" 37AT = "at" 38BIGCHARSET = "bigcharset" 39BRANCH = "branch" 40CALL = "call" 41CATEGORY = "category" 42CHARSET = "charset" 43GROUPREF = "groupref" 44GROUPREF_IGNORE = "groupref_ignore" 45GROUPREF_EXISTS = "groupref_exists" 46IN = "in" 47IN_IGNORE = "in_ignore" 48INFO = "info" 49JUMP = "jump" 50LITERAL = "literal" 51LITERAL_IGNORE = "literal_ignore" 52MARK = "mark" 53MAX_REPEAT = "max_repeat" 54MAX_UNTIL = "max_until" 55MIN_REPEAT = "min_repeat" 56MIN_UNTIL = "min_until" 57NEGATE = "negate" 58NOT_LITERAL = "not_literal" 59NOT_LITERAL_IGNORE = "not_literal_ignore" 60RANGE = "range" 61REPEAT = "repeat" 62REPEAT_ONE = "repeat_one" 63SUBPATTERN = "subpattern" 64MIN_REPEAT_ONE = "min_repeat_one" 65 66# positions 67AT_BEGINNING = "at_beginning" 68AT_BEGINNING_LINE = "at_beginning_line" 69AT_BEGINNING_STRING = "at_beginning_string" 70AT_BOUNDARY = "at_boundary" 71AT_NON_BOUNDARY = "at_non_boundary" 72AT_END = "at_end" 73AT_END_LINE = "at_end_line" 74AT_END_STRING = "at_end_string" 75AT_LOC_BOUNDARY = "at_loc_boundary" 76AT_LOC_NON_BOUNDARY = "at_loc_non_boundary" 77AT_UNI_BOUNDARY = "at_uni_boundary" 78AT_UNI_NON_BOUNDARY = "at_uni_non_boundary" 79 80# categories 81CATEGORY_DIGIT = "category_digit" 82CATEGORY_NOT_DIGIT = "category_not_digit" 83CATEGORY_SPACE = "category_space" 84CATEGORY_NOT_SPACE = "category_not_space" 85CATEGORY_WORD = "category_word" 86CATEGORY_NOT_WORD = "category_not_word" 87CATEGORY_LINEBREAK = "category_linebreak" 88CATEGORY_NOT_LINEBREAK = "category_not_linebreak" 89CATEGORY_LOC_WORD = "category_loc_word" 90CATEGORY_LOC_NOT_WORD = "category_loc_not_word" 91CATEGORY_UNI_DIGIT = "category_uni_digit" 92CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit" 93CATEGORY_UNI_SPACE = "category_uni_space" 94CATEGORY_UNI_NOT_SPACE = "category_uni_not_space" 95CATEGORY_UNI_WORD = "category_uni_word" 96CATEGORY_UNI_NOT_WORD = "category_uni_not_word" 97CATEGORY_UNI_LINEBREAK = "category_uni_linebreak" 98CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak" 99 100OPCODES = [ 101 102 # failure=0 success=1 (just because it looks better that way :-) 103 FAILURE, SUCCESS, 104 105 ANY, ANY_ALL, 106 ASSERT, ASSERT_NOT, 107 AT, 108 BRANCH, 109 CALL, 110 CATEGORY, 111 CHARSET, BIGCHARSET, 112 GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE, 113 IN, IN_IGNORE, 114 INFO, 115 JUMP, 116 LITERAL, LITERAL_IGNORE, 117 MARK, 118 MAX_UNTIL, 119 MIN_UNTIL, 120 NOT_LITERAL, NOT_LITERAL_IGNORE, 121 NEGATE, 122 RANGE, 123 REPEAT, 124 REPEAT_ONE, 125 SUBPATTERN, 126 MIN_REPEAT_ONE 127 128] 129 130ATCODES = [ 131 AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, 132 AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING, 133 AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY, 134 AT_UNI_NON_BOUNDARY 135] 136 137CHCODES = [ 138 CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE, 139 CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD, 140 CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD, 141 CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, 142 CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, 143 CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, 144 CATEGORY_UNI_NOT_LINEBREAK 145] 146 147def makedict(list): 148 d = {} 149 i = 0 150 for item in list: 151 d[item] = i 152 i = i + 1 153 return d 154 155OPCODES = makedict(OPCODES) 156ATCODES = makedict(ATCODES) 157CHCODES = makedict(CHCODES) 158 159# replacement operations for "ignore case" mode 160OP_IGNORE = { 161 GROUPREF: GROUPREF_IGNORE, 162 IN: IN_IGNORE, 163 LITERAL: LITERAL_IGNORE, 164 NOT_LITERAL: NOT_LITERAL_IGNORE 165} 166 167AT_MULTILINE = { 168 AT_BEGINNING: AT_BEGINNING_LINE, 169 AT_END: AT_END_LINE 170} 171 172AT_LOCALE = { 173 AT_BOUNDARY: AT_LOC_BOUNDARY, 174 AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY 175} 176 177AT_UNICODE = { 178 AT_BOUNDARY: AT_UNI_BOUNDARY, 179 AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY 180} 181 182CH_LOCALE = { 183 CATEGORY_DIGIT: CATEGORY_DIGIT, 184 CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT, 185 CATEGORY_SPACE: CATEGORY_SPACE, 186 CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, 187 CATEGORY_WORD: CATEGORY_LOC_WORD, 188 CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, 189 CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, 190 CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK 191} 192 193CH_UNICODE = { 194 CATEGORY_DIGIT: CATEGORY_UNI_DIGIT, 195 CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT, 196 CATEGORY_SPACE: CATEGORY_UNI_SPACE, 197 CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE, 198 CATEGORY_WORD: CATEGORY_UNI_WORD, 199 CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, 200 CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, 201 CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK 202} 203 204# flags 205SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking) 206SRE_FLAG_IGNORECASE = 2 # case insensitive 207SRE_FLAG_LOCALE = 4 # honour system locale 208SRE_FLAG_MULTILINE = 8 # treat target as multiline string 209SRE_FLAG_DOTALL = 16 # treat target as a single string 210SRE_FLAG_UNICODE = 32 # use unicode locale 211SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments 212SRE_FLAG_DEBUG = 128 # debugging 213 214# flags for INFO primitive 215SRE_INFO_PREFIX = 1 # has prefix 216SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) 217SRE_INFO_CHARSET = 4 # pattern starts with character from given set 218 219if __name__ == "__main__": 220 def dump(f, d, prefix): 221 items = d.items() 222 items.sort(key=lambda a: a[1]) 223 for k, v in items: 224 f.write("#define %s_%s %s\n" % (prefix, k.upper(), v)) 225 f = open("sre_constants.h", "w") 226 f.write("""\ 227/* 228 * Secret Labs' Regular Expression Engine 229 * 230 * regular expression matching engine 231 * 232 * NOTE: This file is generated by sre_constants.py. If you need 233 * to change anything in here, edit sre_constants.py and run it. 234 * 235 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. 236 * 237 * See the _sre.c file for information on usage and redistribution. 238 */ 239 240""") 241 242 f.write("#define SRE_MAGIC %d\n" % MAGIC) 243 244 dump(f, OPCODES, "SRE_OP") 245 dump(f, ATCODES, "SRE") 246 dump(f, CHCODES, "SRE") 247 248 f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE) 249 f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE) 250 f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE) 251 f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE) 252 f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL) 253 f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE) 254 f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE) 255 256 f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX) 257 f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL) 258 f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET) 259 260 f.close() 261 print "done" 262