1#!/usr/bin/python
2
3import os
4import re
5import sys
6
7link_re = re.compile('\[' + '[^\[\]]+' + '\]' + '\(' + '([^\(\)]+)' + '\)')
8
9if len(sys.argv) < 3:
10	print 'Usage: <root_dir> <doc_files>...'
11	sys.exit(1)
12
13root = sys.argv[1]
14docs = sys.argv[2:]
15
16links = []
17
18for doc in docs:
19	with open(doc) as f:
20		data = f.read()
21		r = link_re.findall(data)
22		for link in r:
23			links += [(doc, link)]
24
25def filter_link((doc, link)):
26	if link.startswith('http'):
27		return False
28	if link.startswith('#'):
29		return False
30	return True
31
32links = filter(filter_link, links)
33
34def fix_link((doc, link)):
35	link = link.split('#')[0]
36	link = link.split('?')[0]
37	return (doc, link)
38
39links = map(fix_link, links)
40
41errors = []
42
43def check_link((doc, link)):
44	path = os.path.dirname(doc)
45	full_link = None
46	if link[0] == '/':
47		link = link[1:]
48		full_link = os.path.join(root, link)
49	else:
50		full_link = os.path.join(root, path, link)
51	if not os.path.exists(full_link):
52		return False
53	return True
54
55for link in links:
56	if not check_link(link):
57		errors += [link]
58
59if len(errors) == 0:
60	print '%d links checked: OK' % (len(links),)
61	sys.exit(0)
62
63for (doc, link) in errors:
64	print 'File %s linked from %s not found' % (link, doc)
65
66sys.exit(2)
67