1""" Encoding Aliases Support 2 3 This module is used by the encodings package search function to 4 map encodings names to module names. 5 6 Note that the search function normalizes the encoding names before 7 doing the lookup, so the mapping will have to map normalized 8 encoding names to module names. 9 10 Contents: 11 12 The following aliases dictionary contains mappings of all IANA 13 character set names for which the Python core library provides 14 codecs. In addition to these, a few Python specific codec 15 aliases have also been added. 16 17""" 18aliases = { 19 20 # Please keep this list sorted alphabetically by value ! 21 22 # ascii codec 23 '646' : 'ascii', 24 'ansi_x3.4_1968' : 'ascii', 25 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name 26 'ansi_x3.4_1986' : 'ascii', 27 'cp367' : 'ascii', 28 'csascii' : 'ascii', 29 'ibm367' : 'ascii', 30 'iso646_us' : 'ascii', 31 'iso_646.irv_1991' : 'ascii', 32 'iso_ir_6' : 'ascii', 33 'us' : 'ascii', 34 'us_ascii' : 'ascii', 35 36 # base64_codec codec 37 'base64' : 'base64_codec', 38 'base_64' : 'base64_codec', 39 40 # big5 codec 41 'big5_tw' : 'big5', 42 'csbig5' : 'big5', 43 44 # big5hkscs codec 45 'big5_hkscs' : 'big5hkscs', 46 'hkscs' : 'big5hkscs', 47 48 # bz2_codec codec 49 'bz2' : 'bz2_codec', 50 51 # cp037 codec 52 '037' : 'cp037', 53 'csibm037' : 'cp037', 54 'ebcdic_cp_ca' : 'cp037', 55 'ebcdic_cp_nl' : 'cp037', 56 'ebcdic_cp_us' : 'cp037', 57 'ebcdic_cp_wt' : 'cp037', 58 'ibm037' : 'cp037', 59 'ibm039' : 'cp037', 60 61 # cp1026 codec 62 '1026' : 'cp1026', 63 'csibm1026' : 'cp1026', 64 'ibm1026' : 'cp1026', 65 66 # cp1140 codec 67 '1140' : 'cp1140', 68 'ibm1140' : 'cp1140', 69 70 # cp1250 codec 71 '1250' : 'cp1250', 72 'windows_1250' : 'cp1250', 73 74 # cp1251 codec 75 '1251' : 'cp1251', 76 'windows_1251' : 'cp1251', 77 78 # cp1252 codec 79 '1252' : 'cp1252', 80 'windows_1252' : 'cp1252', 81 82 # cp1253 codec 83 '1253' : 'cp1253', 84 'windows_1253' : 'cp1253', 85 86 # cp1254 codec 87 '1254' : 'cp1254', 88 'windows_1254' : 'cp1254', 89 90 # cp1255 codec 91 '1255' : 'cp1255', 92 'windows_1255' : 'cp1255', 93 94 # cp1256 codec 95 '1256' : 'cp1256', 96 'windows_1256' : 'cp1256', 97 98 # cp1257 codec 99 '1257' : 'cp1257', 100 'windows_1257' : 'cp1257', 101 102 # cp1258 codec 103 '1258' : 'cp1258', 104 'windows_1258' : 'cp1258', 105 106 # cp424 codec 107 '424' : 'cp424', 108 'csibm424' : 'cp424', 109 'ebcdic_cp_he' : 'cp424', 110 'ibm424' : 'cp424', 111 112 # cp437 codec 113 '437' : 'cp437', 114 'cspc8codepage437' : 'cp437', 115 'ibm437' : 'cp437', 116 117 # cp500 codec 118 '500' : 'cp500', 119 'csibm500' : 'cp500', 120 'ebcdic_cp_be' : 'cp500', 121 'ebcdic_cp_ch' : 'cp500', 122 'ibm500' : 'cp500', 123 124 # cp775 codec 125 '775' : 'cp775', 126 'cspc775baltic' : 'cp775', 127 'ibm775' : 'cp775', 128 129 # cp850 codec 130 '850' : 'cp850', 131 'cspc850multilingual' : 'cp850', 132 'ibm850' : 'cp850', 133 134 # cp852 codec 135 '852' : 'cp852', 136 'cspcp852' : 'cp852', 137 'ibm852' : 'cp852', 138 139 # cp855 codec 140 '855' : 'cp855', 141 'csibm855' : 'cp855', 142 'ibm855' : 'cp855', 143 144 # cp857 codec 145 '857' : 'cp857', 146 'csibm857' : 'cp857', 147 'ibm857' : 'cp857', 148 149 # cp858 codec 150 '858' : 'cp858', 151 'csibm858' : 'cp858', 152 'ibm858' : 'cp858', 153 154 # cp860 codec 155 '860' : 'cp860', 156 'csibm860' : 'cp860', 157 'ibm860' : 'cp860', 158 159 # cp861 codec 160 '861' : 'cp861', 161 'cp_is' : 'cp861', 162 'csibm861' : 'cp861', 163 'ibm861' : 'cp861', 164 165 # cp862 codec 166 '862' : 'cp862', 167 'cspc862latinhebrew' : 'cp862', 168 'ibm862' : 'cp862', 169 170 # cp863 codec 171 '863' : 'cp863', 172 'csibm863' : 'cp863', 173 'ibm863' : 'cp863', 174 175 # cp864 codec 176 '864' : 'cp864', 177 'csibm864' : 'cp864', 178 'ibm864' : 'cp864', 179 180 # cp865 codec 181 '865' : 'cp865', 182 'csibm865' : 'cp865', 183 'ibm865' : 'cp865', 184 185 # cp866 codec 186 '866' : 'cp866', 187 'csibm866' : 'cp866', 188 'ibm866' : 'cp866', 189 190 # cp869 codec 191 '869' : 'cp869', 192 'cp_gr' : 'cp869', 193 'csibm869' : 'cp869', 194 'ibm869' : 'cp869', 195 196 # cp932 codec 197 '932' : 'cp932', 198 'ms932' : 'cp932', 199 'mskanji' : 'cp932', 200 'ms_kanji' : 'cp932', 201 202 # cp949 codec 203 '949' : 'cp949', 204 'ms949' : 'cp949', 205 'uhc' : 'cp949', 206 207 # cp950 codec 208 '950' : 'cp950', 209 'ms950' : 'cp950', 210 211 # euc_jis_2004 codec 212 'jisx0213' : 'euc_jis_2004', 213 'eucjis2004' : 'euc_jis_2004', 214 'euc_jis2004' : 'euc_jis_2004', 215 216 # euc_jisx0213 codec 217 'eucjisx0213' : 'euc_jisx0213', 218 219 # euc_jp codec 220 'eucjp' : 'euc_jp', 221 'ujis' : 'euc_jp', 222 'u_jis' : 'euc_jp', 223 224 # euc_kr codec 225 'euckr' : 'euc_kr', 226 'korean' : 'euc_kr', 227 'ksc5601' : 'euc_kr', 228 'ks_c_5601' : 'euc_kr', 229 'ks_c_5601_1987' : 'euc_kr', 230 'ksx1001' : 'euc_kr', 231 'ks_x_1001' : 'euc_kr', 232 233 # gb18030 codec 234 'gb18030_2000' : 'gb18030', 235 236 # gb2312 codec 237 'chinese' : 'gb2312', 238 'csiso58gb231280' : 'gb2312', 239 'euc_cn' : 'gb2312', 240 'euccn' : 'gb2312', 241 'eucgb2312_cn' : 'gb2312', 242 'gb2312_1980' : 'gb2312', 243 'gb2312_80' : 'gb2312', 244 'iso_ir_58' : 'gb2312', 245 246 # gbk codec 247 '936' : 'gbk', 248 'cp936' : 'gbk', 249 'ms936' : 'gbk', 250 251 # hex_codec codec 252 'hex' : 'hex_codec', 253 254 # hp_roman8 codec 255 'roman8' : 'hp_roman8', 256 'r8' : 'hp_roman8', 257 'csHPRoman8' : 'hp_roman8', 258 259 # hz codec 260 'hzgb' : 'hz', 261 'hz_gb' : 'hz', 262 'hz_gb_2312' : 'hz', 263 264 # iso2022_jp codec 265 'csiso2022jp' : 'iso2022_jp', 266 'iso2022jp' : 'iso2022_jp', 267 'iso_2022_jp' : 'iso2022_jp', 268 269 # iso2022_jp_1 codec 270 'iso2022jp_1' : 'iso2022_jp_1', 271 'iso_2022_jp_1' : 'iso2022_jp_1', 272 273 # iso2022_jp_2 codec 274 'iso2022jp_2' : 'iso2022_jp_2', 275 'iso_2022_jp_2' : 'iso2022_jp_2', 276 277 # iso2022_jp_2004 codec 278 'iso_2022_jp_2004' : 'iso2022_jp_2004', 279 'iso2022jp_2004' : 'iso2022_jp_2004', 280 281 # iso2022_jp_3 codec 282 'iso2022jp_3' : 'iso2022_jp_3', 283 'iso_2022_jp_3' : 'iso2022_jp_3', 284 285 # iso2022_jp_ext codec 286 'iso2022jp_ext' : 'iso2022_jp_ext', 287 'iso_2022_jp_ext' : 'iso2022_jp_ext', 288 289 # iso2022_kr codec 290 'csiso2022kr' : 'iso2022_kr', 291 'iso2022kr' : 'iso2022_kr', 292 'iso_2022_kr' : 'iso2022_kr', 293 294 # iso8859_10 codec 295 'csisolatin6' : 'iso8859_10', 296 'iso_8859_10' : 'iso8859_10', 297 'iso_8859_10_1992' : 'iso8859_10', 298 'iso_ir_157' : 'iso8859_10', 299 'l6' : 'iso8859_10', 300 'latin6' : 'iso8859_10', 301 302 # iso8859_11 codec 303 'thai' : 'iso8859_11', 304 'iso_8859_11' : 'iso8859_11', 305 'iso_8859_11_2001' : 'iso8859_11', 306 307 # iso8859_13 codec 308 'iso_8859_13' : 'iso8859_13', 309 'l7' : 'iso8859_13', 310 'latin7' : 'iso8859_13', 311 312 # iso8859_14 codec 313 'iso_8859_14' : 'iso8859_14', 314 'iso_8859_14_1998' : 'iso8859_14', 315 'iso_celtic' : 'iso8859_14', 316 'iso_ir_199' : 'iso8859_14', 317 'l8' : 'iso8859_14', 318 'latin8' : 'iso8859_14', 319 320 # iso8859_15 codec 321 'iso_8859_15' : 'iso8859_15', 322 'l9' : 'iso8859_15', 323 'latin9' : 'iso8859_15', 324 325 # iso8859_16 codec 326 'iso_8859_16' : 'iso8859_16', 327 'iso_8859_16_2001' : 'iso8859_16', 328 'iso_ir_226' : 'iso8859_16', 329 'l10' : 'iso8859_16', 330 'latin10' : 'iso8859_16', 331 332 # iso8859_2 codec 333 'csisolatin2' : 'iso8859_2', 334 'iso_8859_2' : 'iso8859_2', 335 'iso_8859_2_1987' : 'iso8859_2', 336 'iso_ir_101' : 'iso8859_2', 337 'l2' : 'iso8859_2', 338 'latin2' : 'iso8859_2', 339 340 # iso8859_3 codec 341 'csisolatin3' : 'iso8859_3', 342 'iso_8859_3' : 'iso8859_3', 343 'iso_8859_3_1988' : 'iso8859_3', 344 'iso_ir_109' : 'iso8859_3', 345 'l3' : 'iso8859_3', 346 'latin3' : 'iso8859_3', 347 348 # iso8859_4 codec 349 'csisolatin4' : 'iso8859_4', 350 'iso_8859_4' : 'iso8859_4', 351 'iso_8859_4_1988' : 'iso8859_4', 352 'iso_ir_110' : 'iso8859_4', 353 'l4' : 'iso8859_4', 354 'latin4' : 'iso8859_4', 355 356 # iso8859_5 codec 357 'csisolatincyrillic' : 'iso8859_5', 358 'cyrillic' : 'iso8859_5', 359 'iso_8859_5' : 'iso8859_5', 360 'iso_8859_5_1988' : 'iso8859_5', 361 'iso_ir_144' : 'iso8859_5', 362 363 # iso8859_6 codec 364 'arabic' : 'iso8859_6', 365 'asmo_708' : 'iso8859_6', 366 'csisolatinarabic' : 'iso8859_6', 367 'ecma_114' : 'iso8859_6', 368 'iso_8859_6' : 'iso8859_6', 369 'iso_8859_6_1987' : 'iso8859_6', 370 'iso_ir_127' : 'iso8859_6', 371 372 # iso8859_7 codec 373 'csisolatingreek' : 'iso8859_7', 374 'ecma_118' : 'iso8859_7', 375 'elot_928' : 'iso8859_7', 376 'greek' : 'iso8859_7', 377 'greek8' : 'iso8859_7', 378 'iso_8859_7' : 'iso8859_7', 379 'iso_8859_7_1987' : 'iso8859_7', 380 'iso_ir_126' : 'iso8859_7', 381 382 # iso8859_8 codec 383 'csisolatinhebrew' : 'iso8859_8', 384 'hebrew' : 'iso8859_8', 385 'iso_8859_8' : 'iso8859_8', 386 'iso_8859_8_1988' : 'iso8859_8', 387 'iso_ir_138' : 'iso8859_8', 388 389 # iso8859_9 codec 390 'csisolatin5' : 'iso8859_9', 391 'iso_8859_9' : 'iso8859_9', 392 'iso_8859_9_1989' : 'iso8859_9', 393 'iso_ir_148' : 'iso8859_9', 394 'l5' : 'iso8859_9', 395 'latin5' : 'iso8859_9', 396 397 # johab codec 398 'cp1361' : 'johab', 399 'ms1361' : 'johab', 400 401 # koi8_r codec 402 'cskoi8r' : 'koi8_r', 403 404 # latin_1 codec 405 # 406 # Note that the latin_1 codec is implemented internally in C and a 407 # lot faster than the charmap codec iso8859_1 which uses the same 408 # encoding. This is why we discourage the use of the iso8859_1 409 # codec and alias it to latin_1 instead. 410 # 411 '8859' : 'latin_1', 412 'cp819' : 'latin_1', 413 'csisolatin1' : 'latin_1', 414 'ibm819' : 'latin_1', 415 'iso8859' : 'latin_1', 416 'iso8859_1' : 'latin_1', 417 'iso_8859_1' : 'latin_1', 418 'iso_8859_1_1987' : 'latin_1', 419 'iso_ir_100' : 'latin_1', 420 'l1' : 'latin_1', 421 'latin' : 'latin_1', 422 'latin1' : 'latin_1', 423 424 # mac_cyrillic codec 425 'maccyrillic' : 'mac_cyrillic', 426 427 # mac_greek codec 428 'macgreek' : 'mac_greek', 429 430 # mac_iceland codec 431 'maciceland' : 'mac_iceland', 432 433 # mac_latin2 codec 434 'maccentraleurope' : 'mac_latin2', 435 'maclatin2' : 'mac_latin2', 436 437 # mac_roman codec 438 'macroman' : 'mac_roman', 439 440 # mac_turkish codec 441 'macturkish' : 'mac_turkish', 442 443 # mbcs codec 444 'dbcs' : 'mbcs', 445 446 # ptcp154 codec 447 'csptcp154' : 'ptcp154', 448 'pt154' : 'ptcp154', 449 'cp154' : 'ptcp154', 450 'cyrillic_asian' : 'ptcp154', 451 452 # quopri_codec codec 453 'quopri' : 'quopri_codec', 454 'quoted_printable' : 'quopri_codec', 455 'quotedprintable' : 'quopri_codec', 456 457 # rot_13 codec 458 'rot13' : 'rot_13', 459 460 # shift_jis codec 461 'csshiftjis' : 'shift_jis', 462 'shiftjis' : 'shift_jis', 463 'sjis' : 'shift_jis', 464 's_jis' : 'shift_jis', 465 466 # shift_jis_2004 codec 467 'shiftjis2004' : 'shift_jis_2004', 468 'sjis_2004' : 'shift_jis_2004', 469 's_jis_2004' : 'shift_jis_2004', 470 471 # shift_jisx0213 codec 472 'shiftjisx0213' : 'shift_jisx0213', 473 'sjisx0213' : 'shift_jisx0213', 474 's_jisx0213' : 'shift_jisx0213', 475 476 # tactis codec 477 'tis260' : 'tactis', 478 479 # tis_620 codec 480 'tis620' : 'tis_620', 481 'tis_620_0' : 'tis_620', 482 'tis_620_2529_0' : 'tis_620', 483 'tis_620_2529_1' : 'tis_620', 484 'iso_ir_166' : 'tis_620', 485 486 # utf_16 codec 487 'u16' : 'utf_16', 488 'utf16' : 'utf_16', 489 490 # utf_16_be codec 491 'unicodebigunmarked' : 'utf_16_be', 492 'utf_16be' : 'utf_16_be', 493 494 # utf_16_le codec 495 'unicodelittleunmarked' : 'utf_16_le', 496 'utf_16le' : 'utf_16_le', 497 498 # utf_32 codec 499 'u32' : 'utf_32', 500 'utf32' : 'utf_32', 501 502 # utf_32_be codec 503 'utf_32be' : 'utf_32_be', 504 505 # utf_32_le codec 506 'utf_32le' : 'utf_32_le', 507 508 # utf_7 codec 509 'u7' : 'utf_7', 510 'utf7' : 'utf_7', 511 'unicode_1_1_utf_7' : 'utf_7', 512 513 # utf_8 codec 514 'u8' : 'utf_8', 515 'utf' : 'utf_8', 516 'utf8' : 'utf_8', 517 'utf8_ucs2' : 'utf_8', 518 'utf8_ucs4' : 'utf_8', 519 520 # uu_codec codec 521 'uu' : 'uu_codec', 522 523 # zlib_codec codec 524 'zip' : 'zlib_codec', 525 'zlib' : 'zlib_codec', 526 527} 528