1#! /usr/bin/env python 2 3"""GUI interface to webchecker. 4 5This works as a Grail applet too! E.g. 6 7 <APPLET CODE=wcgui.py NAME=CheckerWindow></APPLET> 8 9Checkpoints are not (yet??? ever???) supported. 10 11User interface: 12 13Enter a root to check in the text entry box. To enter more than one root, 14enter them one at a time and press <Return> for each one. 15 16Command buttons Start, Stop and "Check one" govern the checking process in 17the obvious way. Start and "Check one" also enter the root from the text 18entry box if one is present. There's also a check box (enabled by default) 19to decide whether actually to follow external links (since this can slow 20the checking down considerably). Finally there's a Quit button. 21 22A series of checkbuttons determines whether the corresponding output panel 23is shown. List panels are also automatically shown or hidden when their 24status changes between empty to non-empty. There are six panels: 25 26Log -- raw output from the checker (-v, -q affect this) 27To check -- links discovered but not yet checked 28Checked -- links that have been checked 29Bad links -- links that failed upon checking 30Errors -- pages containing at least one bad link 31Details -- details about one URL; double click on a URL in any of 32 the above list panels (not in Log) will show details 33 for that URL 34 35Use your window manager's Close command to quit. 36 37Command line options: 38 39-m bytes -- skip HTML pages larger than this size (default %(MAXPAGE)d) 40-q -- quiet operation (also suppresses external links report) 41-v -- verbose operation; repeating -v will increase verbosity 42-t root -- specify root dir which should be treated as internal (can repeat) 43-a -- don't check name anchors 44 45Command line arguments: 46 47rooturl -- URL to start checking 48 (default %(DEFROOT)s) 49 50XXX The command line options (-m, -q, -v) should be GUI accessible. 51 52XXX The roots should be visible as a list (?). 53 54XXX The multipanel user interface is clumsy. 55 56""" 57 58# ' Emacs bait 59 60 61import sys 62import getopt 63from Tkinter import * 64import tktools 65import webchecker 66 67def main(): 68 try: 69 opts, args = getopt.getopt(sys.argv[1:], 't:m:qva') 70 except getopt.error, msg: 71 sys.stdout = sys.stderr 72 print msg 73 print __doc__%vars(webchecker) 74 sys.exit(2) 75 webchecker.verbose = webchecker.VERBOSE 76 webchecker.nonames = webchecker.NONAMES 77 webchecker.maxpage = webchecker.MAXPAGE 78 extra_roots = [] 79 for o, a in opts: 80 if o == '-m': 81 webchecker.maxpage = int(a) 82 if o == '-q': 83 webchecker.verbose = 0 84 if o == '-v': 85 webchecker.verbose = webchecker.verbose + 1 86 if o == '-t': 87 extra_roots.append(a) 88 if o == '-a': 89 webchecker.nonames = not webchecker.nonames 90 root = Tk(className='Webchecker') 91 root.protocol("WM_DELETE_WINDOW", root.quit) 92 c = CheckerWindow(root) 93 c.setflags(verbose=webchecker.verbose, maxpage=webchecker.maxpage, 94 nonames=webchecker.nonames) 95 if args: 96 for arg in args[:-1]: 97 c.addroot(arg) 98 c.suggestroot(args[-1]) 99 # Usually conditioned on whether external links 100 # will be checked, but since that's not a command 101 # line option, just toss them in. 102 for url_root in extra_roots: 103 # Make sure it's terminated by a slash, 104 # so that addroot doesn't discard the last 105 # directory component. 106 if url_root[-1] != "/": 107 url_root = url_root + "/" 108 c.addroot(url_root, add_to_do = 0) 109 root.mainloop() 110 111 112class CheckerWindow(webchecker.Checker): 113 114 def __init__(self, parent, root=webchecker.DEFROOT): 115 self.__parent = parent 116 117 self.__topcontrols = Frame(parent) 118 self.__topcontrols.pack(side=TOP, fill=X) 119 self.__label = Label(self.__topcontrols, text="Root URL:") 120 self.__label.pack(side=LEFT) 121 self.__rootentry = Entry(self.__topcontrols, width=60) 122 self.__rootentry.pack(side=LEFT) 123 self.__rootentry.bind('<Return>', self.enterroot) 124 self.__rootentry.focus_set() 125 126 self.__controls = Frame(parent) 127 self.__controls.pack(side=TOP, fill=X) 128 self.__running = 0 129 self.__start = Button(self.__controls, text="Run", command=self.start) 130 self.__start.pack(side=LEFT) 131 self.__stop = Button(self.__controls, text="Stop", command=self.stop, 132 state=DISABLED) 133 self.__stop.pack(side=LEFT) 134 self.__step = Button(self.__controls, text="Check one", 135 command=self.step) 136 self.__step.pack(side=LEFT) 137 self.__cv = BooleanVar(parent) 138 self.__cv.set(self.checkext) 139 self.__checkext = Checkbutton(self.__controls, variable=self.__cv, 140 command=self.update_checkext, 141 text="Check nonlocal links",) 142 self.__checkext.pack(side=LEFT) 143 self.__reset = Button(self.__controls, text="Start over", command=self.reset) 144 self.__reset.pack(side=LEFT) 145 if __name__ == '__main__': # No Quit button under Grail! 146 self.__quit = Button(self.__controls, text="Quit", 147 command=self.__parent.quit) 148 self.__quit.pack(side=RIGHT) 149 150 self.__status = Label(parent, text="Status: initial", anchor=W) 151 self.__status.pack(side=TOP, fill=X) 152 self.__checking = Label(parent, text="Idle", anchor=W) 153 self.__checking.pack(side=TOP, fill=X) 154 self.__mp = mp = MultiPanel(parent) 155 sys.stdout = self.__log = LogPanel(mp, "Log") 156 self.__todo = ListPanel(mp, "To check", self, self.showinfo) 157 self.__done = ListPanel(mp, "Checked", self, self.showinfo) 158 self.__bad = ListPanel(mp, "Bad links", self, self.showinfo) 159 self.__errors = ListPanel(mp, "Pages w/ bad links", self, self.showinfo) 160 self.__details = LogPanel(mp, "Details") 161 self.root_seed = None 162 webchecker.Checker.__init__(self) 163 if root: 164 root = str(root).strip() 165 if root: 166 self.suggestroot(root) 167 self.newstatus() 168 169 def reset(self): 170 webchecker.Checker.reset(self) 171 for p in self.__todo, self.__done, self.__bad, self.__errors: 172 p.clear() 173 if self.root_seed: 174 self.suggestroot(self.root_seed) 175 176 def suggestroot(self, root): 177 self.__rootentry.delete(0, END) 178 self.__rootentry.insert(END, root) 179 self.__rootentry.select_range(0, END) 180 self.root_seed = root 181 182 def enterroot(self, event=None): 183 root = self.__rootentry.get() 184 root = root.strip() 185 if root: 186 self.__checking.config(text="Adding root "+root) 187 self.__checking.update_idletasks() 188 self.addroot(root) 189 self.__checking.config(text="Idle") 190 try: 191 i = self.__todo.items.index(root) 192 except (ValueError, IndexError): 193 pass 194 else: 195 self.__todo.list.select_clear(0, END) 196 self.__todo.list.select_set(i) 197 self.__todo.list.yview(i) 198 self.__rootentry.delete(0, END) 199 200 def start(self): 201 self.__start.config(state=DISABLED, relief=SUNKEN) 202 self.__stop.config(state=NORMAL) 203 self.__step.config(state=DISABLED) 204 self.enterroot() 205 self.__running = 1 206 self.go() 207 208 def stop(self): 209 self.__stop.config(state=DISABLED, relief=SUNKEN) 210 self.__running = 0 211 212 def step(self): 213 self.__start.config(state=DISABLED) 214 self.__step.config(state=DISABLED, relief=SUNKEN) 215 self.enterroot() 216 self.__running = 0 217 self.dosomething() 218 219 def go(self): 220 if self.__running: 221 self.__parent.after_idle(self.dosomething) 222 else: 223 self.__checking.config(text="Idle") 224 self.__start.config(state=NORMAL, relief=RAISED) 225 self.__stop.config(state=DISABLED, relief=RAISED) 226 self.__step.config(state=NORMAL, relief=RAISED) 227 228 __busy = 0 229 230 def dosomething(self): 231 if self.__busy: return 232 self.__busy = 1 233 if self.todo: 234 l = self.__todo.selectedindices() 235 if l: 236 i = l[0] 237 else: 238 i = 0 239 self.__todo.list.select_set(i) 240 self.__todo.list.yview(i) 241 url = self.__todo.items[i] 242 self.__checking.config(text="Checking "+self.format_url(url)) 243 self.__parent.update() 244 self.dopage(url) 245 else: 246 self.stop() 247 self.__busy = 0 248 self.go() 249 250 def showinfo(self, url): 251 d = self.__details 252 d.clear() 253 d.put("URL: %s\n" % self.format_url(url)) 254 if self.bad.has_key(url): 255 d.put("Error: %s\n" % str(self.bad[url])) 256 if url in self.roots: 257 d.put("Note: This is a root URL\n") 258 if self.done.has_key(url): 259 d.put("Status: checked\n") 260 o = self.done[url] 261 elif self.todo.has_key(url): 262 d.put("Status: to check\n") 263 o = self.todo[url] 264 else: 265 d.put("Status: unknown (!)\n") 266 o = [] 267 if (not url[1]) and self.errors.has_key(url[0]): 268 d.put("Bad links from this page:\n") 269 for triple in self.errors[url[0]]: 270 link, rawlink, msg = triple 271 d.put(" HREF %s" % self.format_url(link)) 272 if self.format_url(link) != rawlink: d.put(" (%s)" %rawlink) 273 d.put("\n") 274 d.put(" error %s\n" % str(msg)) 275 self.__mp.showpanel("Details") 276 for source, rawlink in o: 277 d.put("Origin: %s" % source) 278 if rawlink != self.format_url(url): 279 d.put(" (%s)" % rawlink) 280 d.put("\n") 281 d.text.yview("1.0") 282 283 def setbad(self, url, msg): 284 webchecker.Checker.setbad(self, url, msg) 285 self.__bad.insert(url) 286 self.newstatus() 287 288 def setgood(self, url): 289 webchecker.Checker.setgood(self, url) 290 self.__bad.remove(url) 291 self.newstatus() 292 293 def newlink(self, url, origin): 294 webchecker.Checker.newlink(self, url, origin) 295 if self.done.has_key(url): 296 self.__done.insert(url) 297 elif self.todo.has_key(url): 298 self.__todo.insert(url) 299 self.newstatus() 300 301 def markdone(self, url): 302 webchecker.Checker.markdone(self, url) 303 self.__done.insert(url) 304 self.__todo.remove(url) 305 self.newstatus() 306 307 def seterror(self, url, triple): 308 webchecker.Checker.seterror(self, url, triple) 309 self.__errors.insert((url, '')) 310 self.newstatus() 311 312 def newstatus(self): 313 self.__status.config(text="Status: "+self.status()) 314 self.__parent.update() 315 316 def update_checkext(self): 317 self.checkext = self.__cv.get() 318 319 320class ListPanel: 321 322 def __init__(self, mp, name, checker, showinfo=None): 323 self.mp = mp 324 self.name = name 325 self.showinfo = showinfo 326 self.checker = checker 327 self.panel = mp.addpanel(name) 328 self.list, self.frame = tktools.make_list_box( 329 self.panel, width=60, height=5) 330 self.list.config(exportselection=0) 331 if showinfo: 332 self.list.bind('<Double-Button-1>', self.doubleclick) 333 self.items = [] 334 335 def clear(self): 336 self.items = [] 337 self.list.delete(0, END) 338 self.mp.hidepanel(self.name) 339 340 def doubleclick(self, event): 341 l = self.selectedindices() 342 if l: 343 self.showinfo(self.items[l[0]]) 344 345 def selectedindices(self): 346 l = self.list.curselection() 347 if not l: return [] 348 return map(int, l) 349 350 def insert(self, url): 351 if url not in self.items: 352 if not self.items: 353 self.mp.showpanel(self.name) 354 # (I tried sorting alphabetically, but the display is too jumpy) 355 i = len(self.items) 356 self.list.insert(i, self.checker.format_url(url)) 357 self.list.yview(i) 358 self.items.insert(i, url) 359 360 def remove(self, url): 361 try: 362 i = self.items.index(url) 363 except (ValueError, IndexError): 364 pass 365 else: 366 was_selected = i in self.selectedindices() 367 self.list.delete(i) 368 del self.items[i] 369 if not self.items: 370 self.mp.hidepanel(self.name) 371 elif was_selected: 372 if i >= len(self.items): 373 i = len(self.items) - 1 374 self.list.select_set(i) 375 376 377class LogPanel: 378 379 def __init__(self, mp, name): 380 self.mp = mp 381 self.name = name 382 self.panel = mp.addpanel(name) 383 self.text, self.frame = tktools.make_text_box(self.panel, height=10) 384 self.text.config(wrap=NONE) 385 386 def clear(self): 387 self.text.delete("1.0", END) 388 self.text.yview("1.0") 389 390 def put(self, s): 391 self.text.insert(END, s) 392 if '\n' in s: 393 self.text.yview(END) 394 395 def write(self, s): 396 self.text.insert(END, s) 397 if '\n' in s: 398 self.text.yview(END) 399 self.panel.update() 400 401 402class MultiPanel: 403 404 def __init__(self, parent): 405 self.parent = parent 406 self.frame = Frame(self.parent) 407 self.frame.pack(expand=1, fill=BOTH) 408 self.topframe = Frame(self.frame, borderwidth=2, relief=RAISED) 409 self.topframe.pack(fill=X) 410 self.botframe = Frame(self.frame) 411 self.botframe.pack(expand=1, fill=BOTH) 412 self.panelnames = [] 413 self.panels = {} 414 415 def addpanel(self, name, on=0): 416 v = StringVar(self.parent) 417 if on: 418 v.set(name) 419 else: 420 v.set("") 421 check = Checkbutton(self.topframe, text=name, 422 offvalue="", onvalue=name, variable=v, 423 command=self.checkpanel) 424 check.pack(side=LEFT) 425 panel = Frame(self.botframe) 426 label = Label(panel, text=name, borderwidth=2, relief=RAISED, anchor=W) 427 label.pack(side=TOP, fill=X) 428 t = v, check, panel 429 self.panelnames.append(name) 430 self.panels[name] = t 431 if on: 432 panel.pack(expand=1, fill=BOTH) 433 return panel 434 435 def showpanel(self, name): 436 v, check, panel = self.panels[name] 437 v.set(name) 438 panel.pack(expand=1, fill=BOTH) 439 440 def hidepanel(self, name): 441 v, check, panel = self.panels[name] 442 v.set("") 443 panel.pack_forget() 444 445 def checkpanel(self): 446 for name in self.panelnames: 447 v, check, panel = self.panels[name] 448 panel.pack_forget() 449 for name in self.panelnames: 450 v, check, panel = self.panels[name] 451 if v.get(): 452 panel.pack(expand=1, fill=BOTH) 453 454 455if __name__ == '__main__': 456 main() 457