1#!/usr/bin/perl
2    eval 'exec perl -S $0 "$@"'
3    if $runnning_under_some_shell;
4#
5# txt2html.pl
6# Convert raw text to something with a little HTML formatting
7#
8# Written by Seth Golub <seth@cs.wustl.edu>
9#            http://www.cs.wustl.edu/~seth/txt2html/
10#
11# $Revision: 1.15 $
12# $Date: 2004/10/05 20:30:33 $
13# $Author: cristy $
14#
15#
16# $Log: txt2html,v $
17# Revision 1.15  2004/10/05 20:30:33  cristy
18# *** empty log message ***
19#
20# Revision 1.14  2004/04/26 19:53:42  cristy
21# *** empty log message ***
22#
23# Revision 1.13  2004/04/26 15:03:00  cristy
24# *** empty log message ***
25#
26# Revision 1.12  2004/04/24 13:48:50  cristy
27# *** empty log message ***
28#
29# Revision 1.11  2003/10/28 18:40:59  cristy
30# *** empty log message ***
31#
32# Revision 1.10  2003/10/28 03:44:38  cristy
33# *** empty log message ***
34#
35# Revision 1.9  2003/10/17 13:59:12  cristy
36# *** empty log message ***
37#
38# Revision 1.8  2003/10/16 22:26:06  cristy
39# *** empty log message ***
40#
41# Revision 1.7  2003/10/12 04:10:15  cristy
42# *** empty log message ***
43#
44# Revision 1.6  2003/07/20 03:39:50  cristy
45# *** empty log message ***
46#
47# Revision 1.5  2003/07/19 19:44:20  cristy
48# *** empty log message ***
49#
50# Revision 1.4  2003/04/07 23:35:40  cristy
51# *** empty log message ***
52#
53# Revision 1.3  2003/04/05 02:52:42  cristy
54# *** empty log message ***
55#
56# Revision 1.2  2003/04/04 20:50:50  cristy
57# *** empty log message ***
58#
59# Revision 1.1  2003/03/25 15:10:23  cristy
60# genesis
61#
62# Revision 1.1  2003/03/22 17:02:00  cristy
63# *** empty log message ***
64#
65# Revision 1.10  1994/12/28  20:10:25  seth
66#  * Added --extract, etc.
67#
68# Revision 1.9  94/12/13  15:16:23  15:16:23  seth (Seth Golub)
69#  * Changed from #!/usr/local/bin/perl to the more clever version in
70#    the man page.  (How did I manage not to read this for so long?)
71#  * Swapped hrule & header back to handle double lines.  Why should
72#    this order screw up headers?
73#
74# Revision 1.8  1994/11/30  21:07:03  seth
75#  * put mail_anchor back in.  (Why did I take this out?)
76#  * Finally added handling of lettered lists (ordered lists marked with
77#    letters)
78#  * Added title option (--title, -t)
79#  * Shortline now looks at how long the line was before txt2html
80#    started adding tags.   ($line_length)
81#  * Changed list references to scalars where appropriate.  (@foo[0] -> $foo[0])
82#  * Added untabify() to homogenize leading indentation for list
83#    prefixes and functions that use line length
84#  * Added "underline tolerance" for when underlines are not exactly the
85#    same length as what they underline.
86#  * Added error message for unrecognized options
87#  * removed \w matching on --capstag
88#  * Tagline now removes leading & trailing whitespace before tagging
89#  * swapped order of caps & heading in main loop
90#  * Cleaned up code for speed and to get rid of warnings
91#  * Added more restrictions to something being a mail header
92#  * Added indentation for lists, just to make the output more readable.
93#  * Fixed major bug in lists: $OL and $UL were never set, so when a
94#    list was ended "</UL>" was *always* used!
95#  * swapped order of hrule & header to properly handle long underlines
96#
97# Revision 1.7  94/10/28  13:16:11  13:16:11  seth (Seth Golub)
98#  * Added to comments in options section
99#  * renamed blank to is_blank
100#  * Page break is converted to horizontal rule <HR>
101#  * moved usage subroutine up top so people who look through code see
102#    it sooner
103#
104# Revision 1.6  94/10/28  12:43:46  12:43:46  seth (Seth Golub)
105#  * Creates anchors at each heading
106#
107# Revision 1.5  94/07/14  17:43:59  17:43:59  seth (Seth Golub)
108#  * Fixed minor bug in Headers
109#  * Preformatting can be set to only start/stop when TWO lines of
110#    [non]formatted-looking-text are encountered.  Old behavior is still
111#    possible through command line options (-pb 1 -pe 1).
112#  * Can preformat entire document (-pb 0) or disable preformatting
113#    completely (-pe 0).
114#  * Fixed minor bug in CAPS handling (paragraph breaks broke)
115#  * Puts paragraph tags *before* paragraphs, not just between them.
116#
117# Revision 1.4  94/06/20  16:42:55  16:42:55  seth (Seth Golub)
118#  * Allow ':' for numbered lists (e.g. "1: Figs")
119#  * Whitespace at end of line will not start or end preformatting
120#  * Mailmode is now off by default
121#  * Doesn't break short lines if they are the first line in a list
122#    item.  It *should* break them anyway if the next line is a
123#    continuation of the list item, but I haven't dealt with this yet.
124#  * Added action on lines that are all capital letters.  You can change
125#    how these lines get tagged, as well as the mininum number of
126#    consecutive capital letters required to fire off this action.
127#
128# Revision 1.3  94/05/17  15:58:58  15:58:58  seth (Seth Golub)
129# * Tiny bugfix in unhyphenation
130#
131# Revision 1.2  94/05/16  18:15:16  18:15:16  seth (Seth Golub)
132#  * Added unhyphenation
133#
134# Revision 1.1  94/05/16  16:19:03  16:19:03  seth (Seth Golub)
135# Initial revision
136#
137#
138# 1.02  Allow '-' in mail headers
139#       Added handling for multiline mail headers
140#
141#
142#
143# Oscar Nierstrasz has a nice script for hypertextifying URLs.
144# It is available at:
145#   http://cui_www.unige.ch/ftp/PUBLIC/oscar/scripts/html.pl
146#
147
148#########################
149# Configurable options
150#
151
152# [-s <n>    ] | [--shortline <n>                 ]
153$short_line_length = 40;        # Lines this short (or shorter) must be
154                                # intentionally broken and are kept
155                                # that short. <BR>
156
157# [-p <n>    ] | [--prewhite <n>                  ]
158$preformat_whitespace_min = 5;  # Minimum number of consecutive leading
159                                # whitespace characters to trigger
160                                # preformatting.
161                                # NOTE: Tabs are now expanded to
162                                # spaces before this check is made.
163                                # That means if $tab_width is 8 and
164                                # this is 5, then one tab is expanded
165                                # to 8 spaces, which is enough to
166                                # trigger preformatting.
167
168# [-pb <n>   ] | [--prebegin <n>                  ]
169$preformat_trigger_lines = 2;   # How many lines of preformatted-looking
170                                # text are needed to switch to <PRE>
171                                # <= 0 : Preformat entire document
172                                #    1 : one line triggers
173                                # >= 2 : two lines trigger
174
175# [-pe <n>   ] | [--preend <n>                    ]
176$endpreformat_trigger_lines = 2; # How many lines of unpreformatted-looking
177                                 # text are needed to switch from <PRE>
178                                 # <= 0 : Never preformat within document
179                                 #    1 : one line triggers
180                                 # >= 2 : two lines trigger
181# NOTE for --prebegin and --preend:
182# A zero takes precedence.  If one is zero, the other is ignored.
183# If both are zero, entire document is preformatted.
184
185
186# [-r <n>    ] | [--hrule <n>                     ]
187$hrule_min = 4;                 # Min number of ---s for an HRule.
188
189# [-c <n>    ] | [--caps <n>                      ]
190$min_caps_length = 3;           # min sequential CAPS for an all-caps line
191
192# [-ct <tag> ] | [--capstag <tag>                 ]
193$caps_tag = "STRONG";           # Tag to put around all-caps lines
194
195# [-m/+m     ] | [--mail        / --nomail        ]
196$mailmode = 0;                  # Deal with mail headers & quoted text
197
198# [-u/+u     ] | [--unhyphenate / --nounhyphenate ]
199$unhyphenation = 1;             # Enables unhyphenation of text.
200
201# [-a <file> ] | [--append <file>                 ]
202# [+a        ] | [--noappend                      ]
203$append_file = 0;               # If you want something appended by
204                                # default, put the filename here.
205                                # The appended text will not be
206                                # processed at all, so make sure it's
207                                # plain text or decent HTML.  i.e. do
208                                # not have things like:
209                                #   Seth Golub <seth@cs.wustl.edu>
210                                # but instead, have:
211                                #   Seth Golub &lt;seth@cs.wustl.edu&gt;
212
213# [-t <title>] | [--title <title>                 ]
214$title = 0;                     # You can specify a title.
215                                # Otherwise it won't put one in.
216
217# [-ul <n>   ] | [--underlinelong <n>             ]
218$underline_tolerance_long = 1;  # How much longer can underlines
219                                # be and still be underlines?
220
221# [-us <n>   ] | [--underlineshort <n>            ]
222$underline_tolerance_short = 1; # How much shorter can underlines
223                                # be and still be underlines?
224
225# [-tw <n>   ] | [--tabwidth <n>                  ]
226$tab_width = 8;                 # How many spaces equal a tab?
227
228
229# [-iw <n>   ] | [--indent <n>                    ]
230$indent_width = 2;              # Indents this many spaces for each
231                                # level of a list
232
233# [-/+e      ] | [--extract / --noextract         ]
234$extract = 0;                   # Extract Mode (suitable for inserting)
235
236# END OF CONFIGURABLE OPTIONS
237########################################
238
239
240########################################
241# Definitions  (Don't change these)
242#
243$NONE       =   0;
244$LIST       =   1;
245$HRULE      =   2;
246$PAR        =   4;
247$PRE        =   8;
248$END        =  16;
249$BREAK      =  32;
250$HEADER     =  64;
251$MAILHEADER = 128;
252$MAILQUOTE  = 256;
253$CAPS       = 512;
254
255$OL = 1;
256$UL = 2;
257
258sub usage
259{
260    $0 =~ s#.*/##;
261    local($s) = " " x length($0);
262    print STDERR <<EOF;
263
264Usage: $0 [options]
265
266where options are:
267       $s [-v        ] | [--version                       ]
268       $s [-h        ] | [--help                          ]
269       $s [-s <n>    ] | [--shortline <n>                 ]
270       $s [-p <n>    ] | [--prewhite <n>                  ]
271       $s [-pb <n>   ] | [--prebegin <n>                  ]
272       $s [-pe <n>   ] | [--preend <n>                    ]
273       $s [-e/+e     ] | [--extract / --noextract         ]
274       $s [-r <n>    ] | [--hrule <n>                     ]
275       $s [-c <n>    ] | [--caps <n>                      ]
276       $s [-ct <tag> ] | [--capstag <tag>                 ]
277       $s [-m/+m     ] | [--mail     / --nomail           ]
278       $s [-u/+u     ] | [--unhyphen / --nounhyphen       ]
279       $s [-a <file> ] | [--append <file>                 ]
280       $s [+a        ] | [--noappend                      ]
281       $s [-t <title>] | [--title <title>                 ]
282       $s [-tw <n>   ] | [--tabwidth <n>                  ]
283       $s [-iw <n>   ] | [--indent <n>                    ]
284       $s [-ul <n>   ] | [--underlinelong <n>             ]
285       $s [-us <n>   ] | [--underlineshort <n>            ]
286
287  More complete explanations of these options can be found in
288  comments near the beginning of the script.
289
290EOF
291}
292
293
294sub deal_with_options
295{
296    while ($ARGV[0] =~ /^[-+].+/)
297    {
298        if (($ARGV[0] eq "-r" || $ARGV[0] eq "--hrule") &&
299            $ARGV[1] =~ /^%d+$/)
300        {
301            $hrule_min = $ARGV[1];
302            shift @ARGV;
303            next;
304        }
305
306        if (($ARGV[0] eq "-s" || $ARGV[0] eq "--shortline") &&
307            $ARGV[1] =~ /^\d+$/)
308        {
309            $short_line_length = $ARGV[1];
310            shift @ARGV;
311            next;
312        }
313
314        if (($ARGV[0] eq "-p" || $ARGV[0] eq "--prewhite") &&
315            $ARGV[1] =~ /^\d+$/)
316        {
317            $preformat_whitespace_min = $ARGV[1];
318            shift @ARGV;
319            next;
320        }
321
322        if (($ARGV[0] eq "-pb" || $ARGV[0] eq "--prebegin") &&
323            $ARGV[1] =~ /^\d+$/)
324        {
325            $preformat_trigger_lines = $ARGV[1];
326            shift @ARGV;
327            next;
328        }
329
330        if (($ARGV[0] eq "-pe" || $ARGV[0] eq "--preend") &&
331            $ARGV[1] =~ /^\d+$/)
332        {
333            $endpreformat_trigger_lines = $ARGV[1];
334            shift @ARGV;
335            next;
336        }
337
338        if (($ARGV[0] eq "-e" || $ARGV[0] eq "--extract"))
339        {
340            $extract = 1;
341            shift @ARGV;
342            next;
343        }
344
345        if (($ARGV[0] eq "+e" || $ARGV[0] eq "--noextract"))
346        {
347            $extract = 0;
348            shift @ARGV;
349            next;
350        }
351
352        if (($ARGV[0] eq "-c" || $ARGV[0] eq "--caps") &&
353            $ARGV[1] =~ /^\d+$/)
354        {
355            $min_caps_length = $ARGV[1];
356            shift @ARGV;
357            next;
358        }
359
360        if (($ARGV[0] eq "-ct" || $ARGV[0] eq "--capstag") &&
361            $ARGV[1])
362        {
363            $caps_tag = $ARGV[1];
364            shift @ARGV;
365            next;
366        }
367
368        if ($ARGV[0] eq "-m" || $ARGV[0] eq "--mail")
369        {
370            $mailmode = 1;
371            next;
372        }
373
374        if ($ARGV[0] eq "+m" || $ARGV[0] eq "--nomail")
375        {
376            $mailmode = 0;
377            next;
378        }
379
380        if ($ARGV[0] eq "-u" || $ARGV[0] eq "--unhyphen")
381        {
382            $unhyphenation = 1;
383            next;
384        }
385
386        if ($ARGV[0] eq "+u" || $ARGV[0] eq "--nounhyphen")
387        {
388            $unhyphenation = 0;
389            next;
390        }
391
392        if (($ARGV[0] eq "-a" || $ARGV[0] eq "--append") &&
393            $ARGV[1])
394        {
395            if (-r $ARGV[1]) {
396                $append_file = $ARGV[1];
397            } else {
398                print STDERR "Can't find or read $ARGV[1].\n";
399            }
400            shift @ARGV;
401            next;
402        }
403
404        if ($ARGV[0] eq "+a" || $ARGV[0] eq "--noappend")
405        {
406            $append_file = 0;
407            next;
408        }
409
410        if (($ARGV[0] eq "-t" || $ARGV[0] eq "--title") &&
411            $ARGV[1])
412        {
413            $title = $ARGV[1];
414            shift @ARGV;
415            next;
416        }
417
418        if (($ARGV[0] eq "-ul" || $ARGV[0] eq "--underlinelong") &&
419            $ARGV[1] =~ /^\d+$/)
420        {
421            $underline_tolerance_long = $ARGV[1];
422            shift @ARGV;
423            next;
424        }
425
426        if (($ARGV[0] eq "-us" || $ARGV[0] eq "--underlineshort") &&
427            $ARGV[1] =~ /^\d+$/)
428        {
429            $underline_tolerance_short = $ARGV[1];
430            shift @ARGV;
431            next;
432        }
433
434        if (($ARGV[0] eq "-tw" || $ARGV[0] eq "--tabwidth") &&
435            $ARGV[1] =~ /^\d+$/)
436        {
437            $tab_width = $ARGV[1];
438            shift @ARGV;
439            next;
440        }
441
442        if (($ARGV[0] eq "-iw" || $ARGV[0] eq "--indentwidth") &&
443            $ARGV[1] =~ /^\d+$/)
444        {
445            $indent_width = $ARGV[1];
446            shift @ARGV;
447            next;
448        }
449
450        if ($ARGV[0] eq "-v" || $ARGV[0] eq "--version")
451        {
452            print '$Header: /users/hilco/seth/projects/txt2html/txt2html.pl,v 1
453.10 1994/12/28 20:10:25 seth Exp seth $ ';
454            print "\n";
455            exit;
456        }
457
458        if ($ARGV[0] eq "-h" || $ARGV[0] eq "--help")
459        {
460            &usage;
461            exit;
462        }
463
464        print STDERR "Unrecognized option: $ARGV[0]\n";
465        print STDERR " or bad paramater: $ARGV[1]\n" if($ARGV[1]);
466
467        &usage;
468        exit(1);
469
470    } continue {
471
472        shift @ARGV;
473    }
474
475    $preformat_trigger_lines = 0 if ($preformat_trigger_lines < 0);
476    $preformat_trigger_lines = 2 if ($preformat_trigger_lines > 2);
477
478    $endpreformat_trigger_lines = 1 if ($preformat_trigger_lines == 0);
479    $endpreformat_trigger_lines = 0 if ($endpreformat_trigger_lines < 0);
480    $endpreformat_trigger_lines = 2 if ($endpreformat_trigger_lines > 2);
481
482    $underline_tolerance_long  = 0 if $underline_tolerance_long < 0;
483    $underline_tolerance_short = 0 if $underline_tolerance_short < 0;
484}
485
486sub is_blank
487{
488    return $_[0] =~ /^\s*$/;
489}
490
491sub escape
492{
493    $line =~ s/&/&amp;/g;
494    $line =~ s/>/&gt;/g;
495    $line =~ s/</&lt;/g;
496    $line =~ s/\014/\n<HR>\n/g; # Linefeeds become horizontal rules
497}
498
499sub hrule
500{
501    if ($line =~ /^\s*([-_~=\*]\s*){$hrule_min,}$/)
502    {
503        $line = "<HR>\n";
504        $prev =~ s/<p>//;
505        $line_action |= $HRULE;
506    }
507}
508
509sub shortline
510{
511    if (!($mode & $PRE) &&
512        !&is_blank($line) &&
513        ($line_length < $short_line_length) &&
514        !&is_blank($nextline) &&
515        !($line_action & ($HEADER | $HRULE | $BREAK | $LIST)))
516    {
517        $line =~ s/$/<BR>/;
518        $line_action |= $BREAK;
519    }
520}
521
522sub mailstuff
523{
524    if ((($line =~ /^\w*&gt/) || # Handle "FF> Werewolves."
525         ($line =~ /^\w*\|/))&&  # Handle "Igor| There wolves."
526        !&is_blank($nextline))
527    {
528        $line =~ s/$/<BR>/;
529        $line_action |= $BREAK | $MAILQUOTE;
530    } elsif (($line =~ /^[\w\-]*:/) # Handle "Some-Header: blah"
531             && (($previous_action & $MAILHEADER) || &is_blank($prev))
532             && !&is_blank($nextline))
533    {
534        &anchor_mail if !($previous_action & $MAILHEADER);
535        $line =~ s/$/<BR>/;
536        $line_action |= $BREAK | $MAILHEADER;
537    } elsif (($line =~ /^\s+\S/) &&   # Handle multi-line mail headers
538             ($previous_action & $MAILHEADER) &&
539             !&is_blank($nextline))
540    {
541        $line =~ s/$/<BR>/;
542        $line_action |= $BREAK | $MAILHEADER;
543    }
544}
545
546sub paragraph
547{
548    $prev .= "<p>\n";
549    $line_action |= $PAR;
550}
551
552sub listprefix
553{
554    local($line) = @_;
555    local($prefix, $number, $rawprefix);
556
557    return (0,0,0) if (!($line =~ /^\s*[-=\*o]\s+\S/ ) &&
558                       !($line =~ /^\s*(\d+|[a-zA-Z])[\.\)\]:]\s+\S/ ));
559
560    ($number) = $line =~ /^\s*(\d+|[a-zA-Z])/;
561
562    # That slippery exception of "o" as a bullet
563    # (This ought to be determined more through the context of what lists
564    #  we have in progress, but this will probably work well enough.)
565    if($line =~ /^\s*o\s/)
566    {
567        $number = 0;
568    }
569
570    if ($number)
571    {
572        ($rawprefix) = $line =~ /^(\s*(\d+|[a-zA-Z]).)/;
573        $prefix = $rawprefix;
574        $prefix =~ s/(\d+|[a-zA-Z])//;  # Take the number out
575    } else {
576        ($rawprefix) = $line =~ /^(\s*[-=o\*].)/;
577        $prefix = $rawprefix;
578    }
579    ($prefix, $number, $rawprefix);
580}
581
582sub startlist
583{
584    local($prefix, $number, $rawprefix) = @_;
585
586    $listprefix[$listnum] = $prefix;
587    if($number)
588    {
589        # It doesn't start with 1,a,A.  Let's not screw with it.
590        if (($number != 1) && ($number ne "a") && ($number ne "A"))
591        {
592            return;
593        }
594        $prev .= "$list_indent<OL>\n";
595        $list[$listnum] = $OL;
596    } else {
597        $prev .= "$list_indent<font size=-2><UL>\n";
598        $list[$listnum] = $UL;
599    }
600    $listnum++;
601    $list_indent = " " x $listnum x $indent_width;
602    $line_action |= $LIST;
603    $mode |= $LIST;
604}
605
606
607sub endlist                     # End N lists
608{
609    local($n) = @_;
610    for(; $n > 0; $n--, $listnum--)
611    {
612        $list_indent = " " x ($listnum-1) x $indent_width;
613        if($list[$listnum-1] == $UL)
614        {
615            $prev .= "$list_indent</UL></font>\n";
616        } elsif($list[$listnum-1] == $OL)
617        {
618            $prev .= "$list_indent</OL>\n";
619        } else
620        {
621            print STDERR "Encountered list of unknown type\n";
622        }
623    }
624    $line_action |= $END;
625    $mode ^= ($LIST & $mode) if (!$listnum);
626}
627
628sub continuelist
629{
630    $line =~ s/^\s*[-=o\*]\s*/$list_indent<LI> / if $list[$listnum-1] == $UL;
631    $line =~ s/^\s*(\d+|[a-zA-Z]).\s*/$list_indent<LI> /    if $list[$listnum-1
632] == $OL;
633    $line_action |= $LIST;
634}
635
636sub liststuff
637{
638    local($i);
639
640    local($prefix, $number, $rawprefix) = &listprefix($line);
641
642    $i = $listnum;
643    if (!$prefix)
644    {
645        return if !&is_blank($prev); # inside a list item
646
647        # This ain't no list.  We'll want to end all of them.
648        return if !($mode & $LIST);     # This just speeds up the inevitable
649        $i = 0;
650    } else
651    {
652        # Maybe we're going back up to a previous list
653        $i-- while (($prefix ne $listprefix[$i-1]) && ($i >= 0));
654    }
655
656    if (($i >= 0) && ($i != $listnum))
657    {
658        &endlist($listnum - $i);
659    } elsif (!$listnum || $i != $listnum)
660    {
661        &startlist($prefix, $number, $rawprefix);
662    }
663
664    &continuelist($prefix, $number, $rawprefix) if ($mode & $LIST);
665}
666
667sub endpreformat
668{
669    if(!($line =~ /\s{$preformat_whitespace_min,}\S+/) &&
670       ($endpreformat_trigger_lines == 1 ||
671        !($nextline =~ /\s{$preformat_whitespace_min,}\S+/)))
672    {
673        $prev =~ s#$#\n</PRE></font>#;
674        $mode ^= ($PRE & $mode);
675        $line_action |= $END;
676    }
677}
678
679sub preformat
680{
681    if($preformat_trigger_lines == 0 ||
682       (($line =~ /\s{$preformat_whitespace_min,}\S+/) &&
683        ($preformat_trigger_lines == 1 ||
684         $nextline =~ /\s{$preformat_whitespace_min,}\S+/)))
685    {
686        $line =~ s/^/<font size=-1><PRE>\n/;
687        $prev =~ s/<p>//;
688        $mode |= $PRE;
689        $line_action |= $PRE;
690    }
691}
692
693sub make_new_anchor
694{
695    $anchor++;
696    $anchor;
697}
698
699sub anchor_mail
700{
701    local($text) = $line =~ /\S+: *(.*) *$/;
702    local($anchor) = &make_new_anchor($text);
703    $line =~ s/(.*)/<A NAME="$anchor">$1<\/A>/;
704}
705
706sub anchor_heading
707{
708    local($heading) = @_;
709    local($anchor) = &make_new_anchor($heading);
710    $line =~ s/(<H.>.*<\/H.>)/<A NAME="$anchor">$1<\/A>/;
711}
712
713sub heading
714{
715    local($hindent, $heading) = $line =~ /^(\s*)(.+)$/;
716    $hindent = 0;               # This isn't used yet, but Perl warns of
717                                # "possible typo" if I declare a var
718                                # and never reference it.
719
720    # This is now taken care of in main()
721#    $heading =~ s/\s+$//;      # get rid of trailing whitespace.
722
723    local($underline) = $nextline =~ /^\s*(\S+)\s*$/;
724
725    if((length($heading) > (length($underline) + $underline_tolerance_short))
726       || (length($heading) < (length($underline) -$underline_tolerance_long)))
727    {
728        return;
729    }
730
731#    $underline =~ s/(^.).*/$1/;     # Could I do this any less efficiently?
732    $underline = substr($underline,0,1);
733
734    local($hlevel);
735    $hlevel = 1 if $underline eq "*";
736    $hlevel = 2 if $underline eq "=";
737    $hlevel = 3 if $underline eq "+";
738    $hlevel = 4 if $underline eq "-";
739    $hlevel = 5 if $underline eq "~";
740    $hlevel = 6 if $underline eq ".";
741    return if !$hlevel;
742
743    $nextline = <STDIN>;        # Eat the underline
744    &tagline("H${hlevel}");
745    &anchor_heading($heading);
746    $line_action |= $HEADER;
747}
748
749sub unhyphenate
750{
751    local($second);
752
753    # This looks hairy because of all the quoted characters.
754    # All I'm doing is pulling out the word that begins the next line.
755    # Along with it, I pull out any punctuation that follows.
756    # Preceding whitespace is preserved.  We don't want to screw up
757    # our own guessing systems that rely on indentation.
758    ($second) = $nextline =~ /^\s*([a-zA-Z]+[\)\}\]\.,:;\'\"\>]*\s*)/; # "
759    $nextline =~ s/^(\s*)[a-zA-Z]+[\)\}\]\.,:;\'\"\>]*\s*/$1/; # "
760    # (The silly comments are for my less-than-perfect code hilighter)
761
762    $line =~ s/\-\s*$/$second/;
763    $line .= "\n";
764}
765
766sub untabify
767{
768    local($oldws) = $line =~ /^([ \011]+)/;
769    local($oldlen) = (length($oldws));
770
771    local($i, $column);
772    for($i=0, $column = 0; $i < $oldlen; $i++)
773    {
774        if(substr($oldws, $i, 1) eq " ")
775        {                       # Space
776            $column++;
777        } else {                # Tab
778            $column += $tab_width - ($column % $tab_width);
779        }
780    }
781    $line = (" " x $column) . substr($line, $oldlen);
782}
783
784sub tagline
785{
786    local($tag) = @_;
787    $line =~ s/^\s*(.*)\s*$/<$tag>$1<\/$tag>\n/;
788}
789
790sub caps
791{
792    if($line =~ /^[^a-z<]*[A-Z]{$min_caps_length,}[^a-z<]*$/)
793    {
794        &tagline($caps_tag);
795        $line_action |= $CAPS;
796    }
797}
798
799
800
801sub main
802{
803    &deal_with_options;
804
805    if(1)
806    {
807	print q(
808<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
809  "http://www.w3.org/TR/html4/loose.dtd">
810<html lang="en-US">
811<head>
812<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
813<meta name="Description" content="ImageMagick - a robust collection of tools and libraries to read, write and manipulate an image in any of the popular image formats. ImageMagick allows dynamic creation of GIFs, making it suitable for Web applications.">
814<meta name="Keywords" content="ImageMagick,Image Magick,Image Magic,PerlMagick,Perl Magick,Perl Magic,WebMagick,Web Magic,image processing,software development,simulation,image software,AniMagick,Animagic,Magick++">
815<meta name="Resource-type" content="document">
816<meta name="Robots" content="ALL">
817<link rel="stylesheet" type="text/css" href="../www/magick.css">
818</head>
819
820<body marginheight=1 marginwidth=1 topmargin=1 leftmargin=1>
821<a name="top"></a>
822<table border="0" cellpadding="0" cellspacing="0" summary="Masthead" width="100%">
823<tbody>
824<tr>
825<td bgcolor="#003399" width="25%" height="118" background="../images/background.gif"><a href="http://www.imagemagick.org/"><img src="../images/script.gif" width="278" height="118" border="0" alt="" /></a></td>
826<td bgcolor="#003399" width="60%" height="118" background="../images/background.gif"><a href="http://www.networkeleven.com/direct.php?magick_all"><img src="../images/promote.png" border="0" width="186" height="52" vspace="29" alt="Powered by NetworkEleven" /></a></td>
827<td bgcolor="#003399" width="114" height="118" align="right"><img src="../images/sprite.png" width="114" height="118" alt="" /></td>
828<td bgcolor="#003399" width="114" height="118" align="right"><a href="http://www.imagemagick.net"><img src="../images/logo.png" width="114" height="118" border="0" alt="ImageMagick logo" /></a></td>
829</tr></tbody></table>
830</table><table align="left" border=0 cellpadding=2 cellspacing=2 summary="Navigation buttons" width="20%">
831 <tr>
832  <td>
833   <form target="_self" action="../index.html"><input type="submit" title="ImageMagick Home" value=" Home " style="background-image:url('../images/background.gif'); color:#fbc713; font-weight:bold"></form>
834  </td><td>
835   <form target="_self" action="../www/apis.html"><input type="submit" title="ImageMagick API" value=" API " style="background-image:url('../images/background.gif'); color:#fbc713; font-weight:bold"></form>
836  </td><td>
837   <form target="_self" action="../www/archives.html"><input type="submit" title="ImageMagick Download" value="Download" style="background-image:url('../images/background.gif'); color:#fbc713; font-weight:bold"></form>
838  </td>
839 </tr>
840</table>
841<div align="right" style="margin-top:3px; padding-right:4px">
842  <form action="http://studio.imagemagick.org/Sage/scripts/Sage.cgi">
843	<input type="TEXT" name="query" size=32 maxlength=255>
844	<input type="SUBMIT" name="sa" value="Search" style="background-image:url('../images/background.gif'); bgcolor:#003399; color:#fbc713; font-weight:bold">
845 </form><br>
846</div>
847<table align="left" border=0 cellpadding=10 cellspacing=0 style="margin-top:-17px" width="100%"><tr><td>
848
849<br>&nbsp;<br>
850) . "\n";
851        print "<HTML>\n";
852        print "<HEAD>\n";
853
854        # It'd be nice if we could guess a title from the first header,
855        # but even that would be too late if we're doing this in one pass.
856        print "<TITLE>$title</TITLE>\n" if($title);
857
858        print "</HEAD>\n";
859
860	print q(<body text="#000000" bgcolor="#fbc713" link="#1F00FF" vlink="#9900DD" alink="#FF0000">) . "\n";
861
862        if ($title) {
863	    print "<h3>$title</h3>\n";
864	}
865    }
866
867    $prev     = "";
868    $line     = <STDIN>;
869    $nextline = <STDIN>;
870    do
871    {
872        $line =~ s/[ \011]*$//; # Chop trailing whitespace
873
874        &untabify;              # Change leading whitespace into spaces
875
876        $line_length = length($line); # Do this before tags go in
877
878        &escape;
879
880        &endpreformat if (($mode & $PRE) && ($preformat_trigger_lines != 0));
881
882        &hrule if !($mode & $PRE);
883
884        &heading   if (!($mode & $PRE) &&
885                       $nextline =~ /^\s*[=\-\*\.~\+]+$/);
886
887        &caps if  !($mode & $PRE);
888
889        &liststuff if (!($mode & $PRE) &&
890                       !&is_blank($line));
891
892        &mailstuff if ($mailmode &&
893                       !($mode & $PRE) &&
894                       !($line_action & $HEADER));
895
896        &preformat if (!($line_action & ($HEADER | $LIST | $MAILHEADER)) &&
897                       !($mode & ($LIST | $PRE)) &&
898                       ($endpreformat_trigger_lines != 0));
899
900        &paragraph if ((&is_blank($prev) || ($line_action & $END)) &&
901                       !&is_blank($line) &&
902                       !($mode & ($LIST | $PRE)) && # paragraphs in lists
903                                                    # *should* be allowed.
904                       (!$line_action ||
905                        ($line_action & ($CAPS | $END | $MAILQUOTE))));
906
907        &shortline;
908
909        &unhyphenate if ($unhyphenation &&
910                         ($line =~ /[a-zA-Z]\-$/) && # ends in hyphen
911                         # next line starts w/letters
912                         ($nextline =~ /^\s*[a-zA-Z]/) &&
913                         !($mode & ($PRE | $HEADER | $MAILHEADER | $BREAK)));
914
915
916        # Print it out and move on.
917
918        print $prev;
919
920        if (!&is_blank($nextline))
921        {
922            $previous_action = $line_action;
923            $line_action     = $NONE;
924        }
925
926        $prev = $line;
927        $line = $nextline;
928        $nextline = <STDIN>;
929    } until (!$nextline && !$line && !$prev);
930
931    $prev = "";
932    &endlist($listnum) if ($mode & $LIST); # End all lists
933    print $prev;
934
935    print "\n";
936
937    print "</PRE></font>\n" if ($mode & $PRE);
938
939    if ($append_file)
940    {
941        if(-r $append_file)
942        {
943            open(APPEND, $append_file);
944            print while <APPEND>;
945        } else {
946            print STDERR "Can't find or read file $append_file to append.\n";
947        }
948    } else {
949        print q(<hr>) . "\n";
950print q(
951<a href="#top"><img src="../images/top.gif" border=0 width="35" height="46" align="right" alt="Top of page"></a>
952<form action="http://studio.imagemagick.org/magick/" style="margin-top:5px">
953 <input type="submit" title="Help!" value="Help!" style="background-image:url('../images/background.gif'); color:#fbc713; font-weight:bold">
954  <small>&quot;Image manipulation software that works like magick&quot;</small>
955	</form></td>
956	</tr></table>
957) . "\n";
958    }
959
960    if(!$extract)
961    {
962        print "</BODY>\n";
963        print "</HTML>\n";
964    }
965}
966
967&main();
968
969
970