1#!/usr/bin/python 2 3import os 4import re 5import sys 6 7link_re = re.compile('\[' + '[^\[\]]+' + '\]' + '\(' + '([^\(\)]+)' + '\)') 8 9if len(sys.argv) < 3: 10 print 'Usage: <root_dir> <doc_files>...' 11 sys.exit(1) 12 13root = sys.argv[1] 14docs = sys.argv[2:] 15 16links = [] 17 18for doc in docs: 19 with open(doc) as f: 20 data = f.read() 21 r = link_re.findall(data) 22 for link in r: 23 links += [(doc, link)] 24 25def filter_link((doc, link)): 26 if link.startswith('http'): 27 return False 28 if link.startswith('#'): 29 return False 30 return True 31 32links = filter(filter_link, links) 33 34def fix_link((doc, link)): 35 link = link.split('#')[0] 36 link = link.split('?')[0] 37 return (doc, link) 38 39links = map(fix_link, links) 40 41errors = [] 42 43def check_link((doc, link)): 44 path = os.path.dirname(doc) 45 full_link = None 46 if link[0] == '/': 47 link = link[1:] 48 full_link = os.path.join(root, link) 49 else: 50 full_link = os.path.join(root, path, link) 51 if not os.path.exists(full_link): 52 return False 53 return True 54 55for link in links: 56 if not check_link(link): 57 errors += [link] 58 59if len(errors) == 0: 60 print '%d links checked: OK' % (len(links),) 61 sys.exit(0) 62 63for (doc, link) in errors: 64 print 'File %s linked from %s not found' % (link, doc) 65 66sys.exit(2) 67