1#! /usr/bin/env python 2 3"""world -- Print mappings between country names and DNS country codes. 4 5Contact: Barry Warsaw 6Email: barry@python.org 7Version: %(__version__)s 8 9This script will take a list of Internet addresses and print out where in the 10world those addresses originate from, based on the top-level domain country 11code found in the address. Addresses can be in any of the following forms: 12 13 xx -- just the country code or top-level domain identifier 14 host.domain.xx -- any Internet host or network name 15 somebody@where.xx -- an Internet email address 16 17If no match is found, the address is interpreted as a regular expression and a 18reverse lookup is attempted. This script will search the country names and 19print a list of matching entries. You can force reverse mappings with the 20`-r' flag (see below). 21 22For example: 23 24 %% world tz us 25 tz originated from Tanzania, United Republic of 26 us originated from United States 27 28 %% world united 29 united matches 6 countries: 30 ae: United Arab Emirates 31 uk: United Kingdom (common practice) 32 um: United States Minor Outlying Islands 33 us: United States 34 tz: Tanzania, United Republic of 35 gb: United Kingdom 36 37Country codes are maintained by the RIPE Network Coordination Centre, 38in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The 39authoritative source of country code mappings is: 40 41 <url:ftp://ftp.ripe.net/iso3166-countrycodes.txt> 42 43The latest known change to this information was: 44 45 Friday, 5 April 2002, 12.00 CET 2002 46 47This script also knows about non-geographic top-level domains, and the 48additional ccTLDs reserved by IANA. 49 50Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...] 51 52 --dump 53 -d 54 Print mapping of all top-level domains. 55 56 --parse file 57 -p file 58 Parse an iso3166-countrycodes file extracting the two letter country 59 code followed by the country name. Note that the three letter country 60 codes and numbers, which are also provided in the standard format 61 file, are ignored. 62 63 --outputdict 64 -o 65 When used in conjunction with the `-p' option, output is in the form 66 of a Python dictionary, and country names are normalized 67 w.r.t. capitalization. This makes it appropriate for cutting and 68 pasting back into this file. Output is always to standard out. 69 70 --reverse 71 -r 72 Force reverse lookup. In this mode the address can be any Python 73 regular expression; this is matched against all country names and a 74 list of matching mappings is printed. In normal mode (e.g. without 75 this flag), reverse lookup is performed on addresses if no matching 76 country code is found. 77 78 -h 79 --help 80 Print this message. 81""" 82__version__ = '$Revision$' 83 84 85import sys 86import getopt 87import re 88 89PROGRAM = sys.argv[0] 90 91 92 93def usage(code, msg=''): 94 print __doc__ % globals() 95 if msg: 96 print msg 97 sys.exit(code) 98 99 100 101def resolve(rawaddr): 102 parts = rawaddr.split('.') 103 if not len(parts): 104 # no top level domain found, bounce it to the next step 105 return rawaddr 106 addr = parts[-1] 107 if nameorgs.has_key(addr): 108 print rawaddr, 'is in the', nameorgs[addr], 'top level domain' 109 return None 110 elif countries.has_key(addr): 111 print rawaddr, 'originated from', countries[addr] 112 return None 113 else: 114 # Not resolved, bounce it to the next step 115 return rawaddr 116 117 118 119def reverse(regexp): 120 matches = [] 121 cre = re.compile(regexp, re.IGNORECASE) 122 for code, country in all.items(): 123 mo = cre.search(country) 124 if mo: 125 matches.append(code) 126 # print results 127 if not matches: 128 # not resolved, bounce it to the next step 129 return regexp 130 if len(matches) == 1: 131 code = matches[0] 132 print regexp, "matches code `%s', %s" % (code, all[code]) 133 else: 134 print regexp, 'matches %d countries:' % len(matches) 135 for code in matches: 136 print " %s: %s" % (code, all[code]) 137 return None 138 139 140 141def parse(file, normalize): 142 try: 143 fp = open(file) 144 except IOError, (err, msg): 145 print msg, ':', file 146 147 cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}') 148 scanning = 0 149 150 if normalize: 151 print 'countries = {' 152 153 while 1: 154 line = fp.readline() 155 if line == '': 156 break # EOF 157 if scanning: 158 mo = cre.match(line) 159 if not mo: 160 line = line.strip() 161 if not line: 162 continue 163 elif line[0] == '-': 164 break 165 else: 166 print 'Could not parse line:', line 167 continue 168 country, code = mo.group(1, 2) 169 if normalize: 170 words = country.split() 171 for i in range(len(words)): 172 w = words[i] 173 # XXX special cases 174 if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'): 175 words[i] = w.lower() 176 elif w == 'THE' and i <> 1: 177 words[i] = w.lower() 178 elif len(w) > 3 and w[1] == "'": 179 words[i] = w[0:3].upper() + w[3:].lower() 180 elif w in ('(U.S.)', 'U.S.'): 181 pass 182 elif w[0] == '(' and w <> '(local': 183 words[i] = '(' + w[1:].capitalize() 184 elif w.find('-') <> -1: 185 words[i] = '-'.join( 186 [s.capitalize() for s in w.split('-')]) 187 else: 188 words[i] = w.capitalize() 189 code = code.lower() 190 country = ' '.join(words) 191 print ' "%s": "%s",' % (code, country) 192 else: 193 print code, country 194 195 elif line[0] == '-': 196 scanning = 1 197 198 if normalize: 199 print ' }' 200 201 202def main(): 203 help = 0 204 status = 0 205 dump = 0 206 parsefile = None 207 normalize = 0 208 forcerev = 0 209 210 try: 211 opts, args = getopt.getopt( 212 sys.argv[1:], 213 'p:rohd', 214 ['parse=', 'reverse', 'outputdict', 'help', 'dump']) 215 except getopt.error, msg: 216 usage(1, msg) 217 218 for opt, arg in opts: 219 if opt in ('-h', '--help'): 220 help = 1 221 elif opt in ('-d', '--dump'): 222 dump = 1 223 elif opt in ('-p', '--parse'): 224 parsefile = arg 225 elif opt in ('-o', '--outputdict'): 226 normalize = 1 227 elif opt in ('-r', '--reverse'): 228 forcerev = 1 229 230 if help: 231 usage(status) 232 233 if dump: 234 print 'Non-geographic domains:' 235 codes = nameorgs.keys() 236 codes.sort() 237 for code in codes: 238 print ' %4s:' % code, nameorgs[code] 239 240 print '\nCountry coded domains:' 241 codes = countries.keys() 242 codes.sort() 243 for code in codes: 244 print ' %2s:' % code, countries[code] 245 elif parsefile: 246 parse(parsefile, normalize) 247 else: 248 if not forcerev: 249 args = filter(None, map(resolve, args)) 250 args = filter(None, map(reverse, args)) 251 for arg in args: 252 print 'Where in the world is %s?' % arg 253 254 255 256# The mappings 257nameorgs = { 258 # New top level domains as described by ICANN 259 # http://www.icann.org/tlds/ 260 "aero": "air-transport industry", 261 "arpa": "Arpanet", 262 "biz": "business", 263 "com": "commercial", 264 "coop": "cooperatives", 265 "edu": "educational", 266 "gov": "government", 267 "info": "unrestricted `info'", 268 "int": "international", 269 "mil": "military", 270 "museum": "museums", 271 "name": "`name' (for registration by individuals)", 272 "net": "networking", 273 "org": "non-commercial", 274 "pro": "professionals", 275 # These additional ccTLDs are included here even though they are not part 276 # of ISO 3166. IANA has 5 reserved ccTLDs as described here: 277 # 278 # http://www.iso.org/iso/en/prods-services/iso3166ma/04background-on-iso-3166/iso3166-1-and-ccTLDs.html 279 # 280 # but I can't find an official list anywhere. 281 # 282 # Note that `uk' is the common practice country code for the United 283 # Kingdom. AFAICT, the official `gb' code is routinely ignored! 284 # 285 # <D.M.Pick@qmw.ac.uk> tells me that `uk' was long in use before ISO3166 286 # was adopted for top-level DNS zone names (although in the reverse order 287 # like uk.ac.qmw) and was carried forward (with the reversal) to avoid a 288 # large-scale renaming process as the UK switched from their old `Coloured 289 # Book' protocols over X.25 to Internet protocols over IP. 290 # 291 # See <url:ftp://ftp.ripe.net/ripe/docs/ripe-159.txt> 292 # 293 # Also, `su', while obsolete is still in limited use. 294 "ac": "Ascension Island", 295 "gg": "Guernsey", 296 "im": "Isle of Man", 297 "je": "Jersey", 298 "uk": "United Kingdom (common practice)", 299 "su": "Soviet Union (still in limited use)", 300 } 301 302 303 304countries = { 305 "af": "Afghanistan", 306 "al": "Albania", 307 "dz": "Algeria", 308 "as": "American Samoa", 309 "ad": "Andorra", 310 "ao": "Angola", 311 "ai": "Anguilla", 312 "aq": "Antarctica", 313 "ag": "Antigua and Barbuda", 314 "ar": "Argentina", 315 "am": "Armenia", 316 "aw": "Aruba", 317 "au": "Australia", 318 "at": "Austria", 319 "az": "Azerbaijan", 320 "bs": "Bahamas", 321 "bh": "Bahrain", 322 "bd": "Bangladesh", 323 "bb": "Barbados", 324 "by": "Belarus", 325 "be": "Belgium", 326 "bz": "Belize", 327 "bj": "Benin", 328 "bm": "Bermuda", 329 "bt": "Bhutan", 330 "bo": "Bolivia", 331 "ba": "Bosnia and Herzegowina", 332 "bw": "Botswana", 333 "bv": "Bouvet Island", 334 "br": "Brazil", 335 "io": "British Indian Ocean Territory", 336 "bn": "Brunei Darussalam", 337 "bg": "Bulgaria", 338 "bf": "Burkina Faso", 339 "bi": "Burundi", 340 "kh": "Cambodia", 341 "cm": "Cameroon", 342 "ca": "Canada", 343 "cv": "Cape Verde", 344 "ky": "Cayman Islands", 345 "cf": "Central African Republic", 346 "td": "Chad", 347 "cl": "Chile", 348 "cn": "China", 349 "cx": "Christmas Island", 350 "cc": "Cocos (Keeling) Islands", 351 "co": "Colombia", 352 "km": "Comoros", 353 "cg": "Congo", 354 "cd": "Congo, The Democratic Republic of the", 355 "ck": "Cook Islands", 356 "cr": "Costa Rica", 357 "ci": "Cote D'Ivoire", 358 "hr": "Croatia", 359 "cu": "Cuba", 360 "cy": "Cyprus", 361 "cz": "Czech Republic", 362 "dk": "Denmark", 363 "dj": "Djibouti", 364 "dm": "Dominica", 365 "do": "Dominican Republic", 366 "tp": "East Timor", 367 "ec": "Ecuador", 368 "eg": "Egypt", 369 "sv": "El Salvador", 370 "gq": "Equatorial Guinea", 371 "er": "Eritrea", 372 "ee": "Estonia", 373 "et": "Ethiopia", 374 "fk": "Falkland Islands (Malvinas)", 375 "fo": "Faroe Islands", 376 "fj": "Fiji", 377 "fi": "Finland", 378 "fr": "France", 379 "gf": "French Guiana", 380 "pf": "French Polynesia", 381 "tf": "French Southern Territories", 382 "ga": "Gabon", 383 "gm": "Gambia", 384 "ge": "Georgia", 385 "de": "Germany", 386 "gh": "Ghana", 387 "gi": "Gibraltar", 388 "gr": "Greece", 389 "gl": "Greenland", 390 "gd": "Grenada", 391 "gp": "Guadeloupe", 392 "gu": "Guam", 393 "gt": "Guatemala", 394 "gn": "Guinea", 395 "gw": "Guinea-Bissau", 396 "gy": "Guyana", 397 "ht": "Haiti", 398 "hm": "Heard Island and Mcdonald Islands", 399 "va": "Holy See (Vatican City State)", 400 "hn": "Honduras", 401 "hk": "Hong Kong", 402 "hu": "Hungary", 403 "is": "Iceland", 404 "in": "India", 405 "id": "Indonesia", 406 "ir": "Iran, Islamic Republic of", 407 "iq": "Iraq", 408 "ie": "Ireland", 409 "il": "Israel", 410 "it": "Italy", 411 "jm": "Jamaica", 412 "jp": "Japan", 413 "jo": "Jordan", 414 "kz": "Kazakstan", 415 "ke": "Kenya", 416 "ki": "Kiribati", 417 "kp": "Korea, Democratic People's Republic of", 418 "kr": "Korea, Republic of", 419 "kw": "Kuwait", 420 "kg": "Kyrgyzstan", 421 "la": "Lao People's Democratic Republic", 422 "lv": "Latvia", 423 "lb": "Lebanon", 424 "ls": "Lesotho", 425 "lr": "Liberia", 426 "ly": "Libyan Arab Jamahiriya", 427 "li": "Liechtenstein", 428 "lt": "Lithuania", 429 "lu": "Luxembourg", 430 "mo": "Macau", 431 "mk": "Macedonia, The Former Yugoslav Republic of", 432 "mg": "Madagascar", 433 "mw": "Malawi", 434 "my": "Malaysia", 435 "mv": "Maldives", 436 "ml": "Mali", 437 "mt": "Malta", 438 "mh": "Marshall Islands", 439 "mq": "Martinique", 440 "mr": "Mauritania", 441 "mu": "Mauritius", 442 "yt": "Mayotte", 443 "mx": "Mexico", 444 "fm": "Micronesia, Federated States of", 445 "md": "Moldova, Republic of", 446 "mc": "Monaco", 447 "mn": "Mongolia", 448 "ms": "Montserrat", 449 "ma": "Morocco", 450 "mz": "Mozambique", 451 "mm": "Myanmar", 452 "na": "Namibia", 453 "nr": "Nauru", 454 "np": "Nepal", 455 "nl": "Netherlands", 456 "an": "Netherlands Antilles", 457 "nc": "New Caledonia", 458 "nz": "New Zealand", 459 "ni": "Nicaragua", 460 "ne": "Niger", 461 "ng": "Nigeria", 462 "nu": "Niue", 463 "nf": "Norfolk Island", 464 "mp": "Northern Mariana Islands", 465 "no": "Norway", 466 "om": "Oman", 467 "pk": "Pakistan", 468 "pw": "Palau", 469 "ps": "Palestinian Territory, Occupied", 470 "pa": "Panama", 471 "pg": "Papua New Guinea", 472 "py": "Paraguay", 473 "pe": "Peru", 474 "ph": "Philippines", 475 "pn": "Pitcairn", 476 "pl": "Poland", 477 "pt": "Portugal", 478 "pr": "Puerto Rico", 479 "qa": "Qatar", 480 "re": "Reunion", 481 "ro": "Romania", 482 "ru": "Russian Federation", 483 "rw": "Rwanda", 484 "sh": "Saint Helena", 485 "kn": "Saint Kitts and Nevis", 486 "lc": "Saint Lucia", 487 "pm": "Saint Pierre and Miquelon", 488 "vc": "Saint Vincent and the Grenadines", 489 "ws": "Samoa", 490 "sm": "San Marino", 491 "st": "Sao Tome and Principe", 492 "sa": "Saudi Arabia", 493 "sn": "Senegal", 494 "sc": "Seychelles", 495 "sl": "Sierra Leone", 496 "sg": "Singapore", 497 "sk": "Slovakia", 498 "si": "Slovenia", 499 "sb": "Solomon Islands", 500 "so": "Somalia", 501 "za": "South Africa", 502 "gs": "South Georgia and the South Sandwich Islands", 503 "es": "Spain", 504 "lk": "Sri Lanka", 505 "sd": "Sudan", 506 "sr": "Suriname", 507 "sj": "Svalbard and Jan Mayen", 508 "sz": "Swaziland", 509 "se": "Sweden", 510 "ch": "Switzerland", 511 "sy": "Syrian Arab Republic", 512 "tw": "Taiwan, Province of China", 513 "tj": "Tajikistan", 514 "tz": "Tanzania, United Republic of", 515 "th": "Thailand", 516 "tg": "Togo", 517 "tk": "Tokelau", 518 "to": "Tonga", 519 "tt": "Trinidad and Tobago", 520 "tn": "Tunisia", 521 "tr": "Turkey", 522 "tm": "Turkmenistan", 523 "tc": "Turks and Caicos Islands", 524 "tv": "Tuvalu", 525 "ug": "Uganda", 526 "ua": "Ukraine", 527 "ae": "United Arab Emirates", 528 "gb": "United Kingdom", 529 "us": "United States", 530 "um": "United States Minor Outlying Islands", 531 "uy": "Uruguay", 532 "uz": "Uzbekistan", 533 "vu": "Vanuatu", 534 "ve": "Venezuela", 535 "vn": "Viet Nam", 536 "vg": "Virgin Islands, British", 537 "vi": "Virgin Islands, U.S.", 538 "wf": "Wallis and Futuna", 539 "eh": "Western Sahara", 540 "ye": "Yemen", 541 "yu": "Yugoslavia", 542 "zm": "Zambia", 543 "zw": "Zimbabwe", 544 } 545 546all = nameorgs.copy() 547all.update(countries) 548 549 550if __name__ == '__main__': 551 main() 552