• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/perl -w
2#--*-Perl-*--
3
4# NOTES:
5#
6# 'tagscan' refers to the procedure of examining the CVS data (rlog output
7# for each file) and determining what bug IDs exist between two tags.
8#
9# 'dcuthelp' refers to the procedures of examining the CVS rlog cache
10# given a tag and a list of bugs, and helping to incorporate those bug
11# fixes into the tag.  For this to occur, in each file, any changes after
12# tag within the bug list must be contiguous and must begin in the tag's
13# revision.
14#
15# Params:
16#  debug - if set, output debugging info
17#  user - user name
18#  path_info - override actual path info, for debugging, e.g., "/form"
19#  mod - module(s) list
20#  include_attic - if set, include Attic during search (ignored by default)
21
22use strict;
23use CGI;
24#use CGI::Carp qw(fatalsToBrowser); # Do NOT use this -- doesn't work
25use File::Path;
26use IO::Handle;
27use Time::Local 'timelocal_nocheck';
28use Carp;
29#use Data::Dumper;
30
31use vars qw($QUERY $DEBUG $USER $TITLE $CLDR
32	    $DIFF_URL $DIFF_URL_SUFFIX $CVSWEB_REP_ID $CVSWEB_REP_SUFF $LOG_URL_SUFFIX $SHOW_URL $SHOW_URL_SUFFIX $LOG_URL
33	    $CVSROOT $BASE_REV %MOD_ABBREV $DEFAULT_MOD $NO_JITTERBUG
34	    $CACHE $INSTA $INSTA_ATTIC
35	    $UPDATE_COUNT $UPDATE_ATTIC_COUNT $UPDATE_NONATTIC_COUNT
36	    $TAGSCAN_TAG_LO $TAGSCAN_TAG_HI %TAGSCAN_IDS $TAGSCAN_COUNT
37	    $TAGSCAN_TAG_HI_DATE
38	    %TAGSCAN_ALLTAGS %TAGSCAN_WHY
39	    $DCUTHELP_TAG %DCUTHELP_IDS
40	    @DCUTHELP_BADFILES $DCUTHELP_COUNT @DCUTHELP_RETAGS
41	    @TAGLESS_FILES @BRANCHED_FILES @NO_JITTERBUG_FILES
42	    %MODE_MAP $NOW $YEAR $CVS_MSG_KW
43	    );
44
45&initGlobals;
46&main;
47exit(0);
48
49#---------------------------------------------------------------------
50sub initGlobals() {
51    $QUERY = new CGI;
52
53    $DEBUG = $QUERY->param('debug');
54    $CLDR=1;
55
56    # User name, if any.  We try to propagate the user name so a logged-in
57    # jitterbug user can stay that way.
58    $USER = $QUERY->param('user');
59
60    $CVSWEB_REP_ID = "ICU";
61
62    if ($CLDR == 0) {
63    	$TITLE="ICU Jitterbug Diffs";
64    } else {
65	$TITLE="CLDR Jitterbug Diffs";
66    }
67    #$CVSWEB_REP_SUFF = "&cvsroot=" . $CVSWEB_REP_ID;
68    $CVSWEB_REP_SUFF = "";
69
70    # The following URLs should be suffixed with a module name
71    # such as "icu/icu".
72
73    # Display the diffs between two revisions of a file
74    # E.g., suffix with "/icu/icu/license.html.diff?r1=1.2&r2=1.3"
75    $DIFF_URL = "http://www.unicode.org/cgi-bin/viewcvs.cgi"; # No trailing "/"
76    $DIFF_URL_SUFFIX = $CVSWEB_REP_SUFF;
77
78    # Display a specific file revision
79    # E.g., suffix with "/icu/icu/license.html?rev=1.1$SHOW_URL_SUFFIX"
80    $SHOW_URL = $DIFF_URL; # No trailing "/"
81    $SHOW_URL_SUFFIX = "&content-type=text/x-cvsweb-markup" . $CVSWEB_REP_SUFF;
82
83    # Display the CVS log for a file
84    # E.g., suffix with "/icu/icu/license.html"
85    $LOG_URL = $DIFF_URL; # No trailing "/"
86    $LOG_URL_SUFFIX = $CVSWEB_REP_SUFF;
87
88    # CVS root
89    if ( $CLDR == 0 ) {
90	$CVSROOT = "/data/mirrors/icu"; # Must NOT end with "/"
91    } else {
92	$CVSROOT = "/home/cvsroot";
93    }
94
95    # A fake revision number indicating the slot before the oldest revision in
96    # the rlog history.  Not user visible.
97    $BASE_REV = "0";
98
99    if ($CLDR == 0) {
100    # Recognized abbreviated module names.
101    %MOD_ABBREV = (
102        icu          => 'icu',
103        icuapps      => 'icuapps',
104        icu4j        => 'icu4j',
105        icu4jni      => 'icu4jni',
106        unicodetools => 'unicodetools',
107        charset      => 'charset',
108    );
109
110    # Default modules to search
111    $DEFAULT_MOD = 'icu icu4j';
112    } else {
113    # Recognized abbreviated module names.
114    %MOD_ABBREV = (
115        cldr      => 'cldr',
116        common      => 'cldr/common',
117    );
118
119    # Default modules to search
120    $DEFAULT_MOD = 'common';
121    }
122
123
124    # Magic Jitterbug ID used when a CVS checkin does not include a
125    # Jitterbug ID.  Should be unlikely (or impossible) to be a real
126    # Jitterbug ID.
127    $NO_JITTERBUG = 9999987;
128
129    # Root of our cache of CVS meta-information.  Right now this cache
130    # takes the form of a mirror of /usr/cvs.  We only mirror
131    # /usr/cvs/icu/icu and /usr/cvs/icu4j/icu4j at this point.  All CVS
132    # files (*,v) have an identically named file in the same location in
133    # the cache.  Currently the cache file is the output of rlog.  In the
134    # future a more compressed form could be used (although there isn't
135    # much to be gained, maybe 10%).  Instead of grepping over the CVS
136    # repository, we grep over the cache.  This cuts the grep time by
137    # about 90%.  Before using the cache, we update it by walking through
138    # the CVS repository and checking file mod dates.  Any file that's
139    # been changed gets updated in the cache.
140    # Use real path; link causes problems.
141    #$CACHE = "/www/software10/cgi-bin/icu/grepj.cache";
142    if($CLDR==0) {
143    	$CACHE = "/tmp/icu-grepj.cache"; # No trailing "/"
144    } else {
145    	$CACHE = "/tmp/icu-grepj-cldr.cache"; # No trailing "/"
146    }
147
148    # Another cache that holds the results of the last searches.
149    # Invalidate this cache whenever the main cache needs updating.
150    # This cache consists of files named "1234".  Each file
151    # contains the final HTML for that bug ID.  Searches that include
152    # the attic are kept in a subdirectory 'Attic'.
153    $INSTA = "$CACHE/insta";
154    $INSTA_ATTIC = "$INSTA/Attic";
155
156    # Count of updated cache files
157    $UPDATE_COUNT = 0;
158    $UPDATE_ATTIC_COUNT = 0;
159    $UPDATE_NONATTIC_COUNT = 0;
160
161    # Dispatch table mapping path_info to sub
162    %MODE_MAP = (
163	'/top'         =>  \&emit_top,
164	'/form'        =>  \&emit_form,
165	'/difflist'    =>  \&emit_difflist,
166	'/nav'         =>  \&emit_nav,
167	'/result'      =>  \&emit_result,
168	'/help'        =>  \&emit_help,
169	'/admintop'    =>  \&emit_admintop,
170	'/adminform'   =>  \&emit_adminform,
171	'/adminresult' =>  \&emit_adminresult,
172        '/localdiff'   =>  \&emit_localdiff,
173    );
174
175    $NOW = time();
176    $YEAR = 1900+@{[localtime]}[5]; # Get the current year
177
178    # Regex for grepping for jitterbug checkin comments
179    # Will be surrounded by parens
180    if($CLDR == 0) {
181	$CVS_MSG_KW = "jitterbug|fixed";
182    } else {
183    	$CVS_MSG_KW = "cldrbug";
184    }
185}
186
187#---------------------------------------------------------------------
188# This script generates various frames within framesets.  The 'mode'
189# parameter determines which frame is generated.
190sub main() {
191
192    STDOUT->autoflush(1); # Make progress output appear progressively...
193
194    my $needed = 'h'; # next up: 'h'eader or 'e'nd_html
195
196    eval {
197	local $SIG{'__DIE__'}; # disable installed DIE hooks
198	local $SIG{'__WARN__'} = sub {  die $_[0]; }; # transmute warnings
199
200	# The path info specifies what we are being called to emit.
201	# This script emits the frameset and the frames within it
202	# depending on this param.  For the URL
203	# "http://oss.software.ibm.com/cvs/icu-jinfo/foo", the path
204	# info is "/foo".  The path info can be overridden (for debugging)
205	# with a CGI param of "path_info=/bar".
206	my $path_info = $QUERY->path_info;
207	if ($QUERY->param('path_info')) {
208	    $path_info = $QUERY->param('path_info');
209	}
210
211	# Simplify it:  "/foo/..." or "/foo&..." => "/foo"
212	$path_info =~ s|(\w)\W.*|$1|;
213	$path_info ||= '/top'; # default
214
215	my $fn = $MODE_MAP{$path_info};
216	die "unknown path_info \"$path_info\"" unless ($fn);
217
218	if ($path_info ne '/localdiff') {
219	    print $QUERY->header;
220	    $needed = 'e';
221	}
222
223	$fn->();
224    };
225
226    if ($@) {
227	if ($needed eq 'h') {
228	    print $QUERY->header;
229	    $needed = 'e';
230	}
231	print "<hr><b>Internal error: ", $@,
232              "<br>Please contact <a href=\"mailto:alanliu\@us.ibm.com\">Alan</a></b>";
233    }
234
235    if ($needed eq 'e') {
236	print $QUERY->end_html;
237    }
238}
239
240#---------------------------------------------------------------------
241# Create URL for the reviewer index
242# @param user (or empty string if none)
243sub reviewersURL {
244    my $user = shift || '';
245    $user = "?user=$user" if ($user);
246    return "http://bugs.icu-project.org/cgibin/private/byname/review$user";
247}
248
249#---------------------------------------------------------------------
250# Create URL for jitterbug
251# @param user (or empty string if none)
252# @param ID (or empty if none);
253sub jitterbugURL {
254    my $user = shift || '';
255    my $id = shift || '';
256
257   if($CLDR == 0) {
258    if ($id ne '') {
259	if ($user) {
260	    return "http://bugs.icu-project.org/cgibin/private/icu-bugs-private?;user=$user;findid=$id";
261	} else {
262	    return "http://bugs.icu-project.org/cgibin/icu-bugs?findid=$id";
263	}
264    } else {
265	if ($user) {
266	    return "http://bugs.icu-project.org/cgibin/private/icu-bugs-private?;user=$user;";
267	} else {
268	    return "http://bugs.icu-project.org/cgibin/icu-bugs";
269	}
270    }
271  } else {
272    if ($id ne '') {
273	if ($user) {
274	    return "http://bugs.icu-project.org/cgibin/cldr/locale-bugs-private?;user=$user;findid=$id";
275	} else {
276	    return "http://bugs.icu-project.org/cgibin/locale-bugs?findid=$id";
277	}
278    } else {
279	if ($user) {
280	    return "http://bugs.icu-project.org/cgibin/cldr/locale-bugs-private?;user=$user;";
281	} else {
282	    return "http://bugs.icu-project.org/cgibin/locale-bugs";
283	}
284    }
285   }
286}
287
288######################################################################
289# HTML GUI
290######################################################################
291
292# Emit the HTML for the top frameset in normal (bug diffs) mode
293sub emit_top {
294    # Propagate url parameters down to the frames within the frameset
295
296    my $self = $QUERY->url(-full=>1, -query=>1);
297    my $f  = urlPathInfo($self, '/form');
298    my $dl = urlPathInfo($self, '/difflist');
299    my $n  = urlPathInfo($self, '/nav');
300    my $r  = urlPathInfo($self, '/result');
301
302    print <<END;
303<html><head><title>$TITLE</title></head>
304<!--$self-->
305<frameset cols="300,*">
306 <frameset rows="135,*">
307  <frame src="$f" name="form" scrolling=no>
308  <frame src="$dl" name="difflist">
309 </frameset>
310 <frame src="$r" name="result">
311</frameset>
312END
313
314# <frameset rows="30,*">
315#  <frame src="$n" name="nav" scrolling=no>
316#  <frame src="$r" name="result">
317# </frameset>
318}
319
320sub emit_form {
321    print $QUERY->start_html(-title=>$TITLE,
322                             -target=>'difflist');
323
324    my $script_name = $QUERY->script_name;
325
326    print $QUERY->startform(-action=>urlPathInfo($script_name, '/difflist'),
327                            -target=>'difflist',
328			    -method=>'GET');
329
330    my $user = $QUERY->param('user') || '';
331
332    print "<H2>$TITLE"; # h1 too big
333    print " <FONT SIZE=-1>($user)</FONT>" if ($user);
334    print "</H2>";
335
336    print "ID? ",$QUERY->textfield(-name=>'id',-size=>5)
337	, $QUERY->submit(-name=>'Search')
338	, " <FONT SIZE=-1><A href=\""
339	, urlPathInfo($script_name, '/help')
340	, "\">Help</A></FONT>";
341
342    print "\&nbsp;<FONT SIZE=-1>"
343	, "<A href=\"", urlPathInfo($script_name, '/admintop')
344	, "?user=$user\" target=\"_top\">Admin</A></FONT>";
345
346    print "<BR>\nModules:&nbsp;";
347    print $QUERY->textfield(-name=>'mod',
348			    -default=>$DEFAULT_MOD,
349			    -size=>30);
350
351    print "<BR>\n";
352
353    print "<FONT SIZE=-1>";
354    print $QUERY->checkbox(-name=>"include_attic",
355                           -label=>"Incl. Attic");
356    print $QUERY->checkbox(-name=>"localdiff",
357			   -label=>"Local Diff");
358    print "</FONT>";
359
360    print "\&nbsp;<A href=\"", reviewersURL($user), "\" target=\"_top\" title=\"List bugs by reviewer\">Reviewers</A>";
361
362    print "\&nbsp;<A href=\"", jitterbugURL($user), "\" target=\"_top\" title=\"Go to main Jitterbug page\">Jitterbug</A>";
363
364    # Propagate params that don't have corresponding form elements
365    print $QUERY->hidden('user');
366    print $QUERY->hidden('debug');
367    if($CLDR==1) {
368      print $QUERY->hidden('cldr');
369    }
370
371    print $QUERY->end_form;
372}
373
374sub emit_nav {
375    print $QUERY->start_html(-title=>$TITLE,
376                             -target=>'result');
377    print "Under construction: Navigation bar goes here";
378}
379
380sub emit_difflist {
381    print $QUERY->start_html(-title=>$TITLE,
382                             -target=>'result');
383
384    ############################################################
385    # ID
386
387    my $ID = $QUERY->param('id') || '';
388    $ID =~ s/\s//g;
389
390    #print "<br/><b>query:</b>";
391    #print $QUERY->Dump;
392    #print "<br/>";
393
394    if ($ID eq '') {
395	print "(Warning: search, but No ID given.)<br/> \n";
396        &emit_help;
397        return;
398    }
399
400    if ($ID =~ /^0*(\d+)$/) {
401	$ID = $1;
402    } else {
403        print "\"$ID\" is not a valid Jitterbug ID.  Please ";
404        print "enter one or more decimal digits.";
405        return;
406    }
407
408    ############################################################
409    # User
410
411    my $user = $QUERY->param('user');
412
413    ############################################################
414    # Modules
415
416    my @m;
417    return if (!parseMod(\@m)); # what modules are we searching?
418
419    my $localDiff = $QUERY->param('localdiff');
420
421    # Only use the INSTA cache for standard module searches.
422    my $isStd = (join(' ', sort @m) eq 'icu/icu icu4j/icu4j')
423	&& !$localDiff;
424
425    ############################################################
426    # Output
427
428    print "What is Jitterbug ", jitterbugLink($user, $ID), "?";
429
430    foreach (@m) {
431	updateCacheDir($_);
432    }
433
434    # If the cache has been updated then the instaCache entries
435    # are all invalid and must be deleted.  Otherwise try to
436    # look up the diffs from the instaCache.
437    mkpath($INSTA_ATTIC, 0, 0777);
438    if ($UPDATE_COUNT) {
439	print "done ($UPDATE_NONATTIC_COUNT,$UPDATE_ATTIC_COUNT).";
440	resetInstaCache(0);
441    } elsif ($isStd) {
442	my $diffs = instaGet($ID);
443	if ($diffs) {
444	    print $diffs;
445	    print "<BR><EM><FONT SIZE=-1>(Results from cache)</FONT></EM>";
446	    return;
447	}
448    }
449
450    # If we don't find the ID in the instaCache, then generate
451    # the diffs the hard way and store the result in the
452    # instaCache.
453    my $diffs;
454    foreach my $module (@m) {
455	debugOut("module $module") if ($DEBUG);
456	my $m = $module;
457	$m =~ s|^.+/||;
458        $diffs .= out("<HR><CENTER><B><FONT SIZE=+1>", uc($m),
459		      "</FONT></B></CENTER><HR>");
460        debugOut("+generateDiffsList($ID, $module)") if ($DEBUG);
461        $diffs .= generateDiffsList($ID, $module);
462        debugOut("-generateDiffsList($ID, $module)") if ($DEBUG);
463    }
464    instaPut($ID, $diffs) if ($isStd);
465}
466
467sub emit_localdiff {
468    print $QUERY->header(-type=>'application/octet-stream',
469			 -attachment=>'localdiff.bat');
470    my $file = $QUERY->param('file');
471    my $r1 = $QUERY->param('r1');
472    my $r2 = $QUERY->param('r2');
473    my $mod = $QUERY->param('m');
474    my $leaf = $file;
475    $leaf =~ s|.*[/\\]([^/\\]+)+$|$1|;
476    $file = "$mod/$file";
477    my $eol = "\015\012"; # DOS eol
478    print "cd %TEMP%$eol";
479    print "mkdir grepj$eol";
480    print "cd grepj$eol";
481    print "set CVSROOT=:pserver:$USER\@oss.software.ibm.com:/usr/cvs/$mod$eol";
482    print "cvs checkout -p -r $r1 $file > $leaf-$r1$eol";
483    print "cvs checkout -p -r $r2 $file > $leaf-$r2$eol";
484    print "start wincmp $leaf-$r1 $leaf-$r2$eol";
485    print "del \%0$eol";
486}
487
488sub emit_result {
489    print $QUERY->start_html(-title=>$TITLE);
490}
491
492sub emit_help {
493    my $x = join(" ", sort keys(%MOD_ABBREV));
494    print <<END;
495Search the ICU and ICU4J CVS repositories for changes committed against
496a specific Jitterbug.
497
498<P>For a change to be recognized,
499its commit comment must start with "<CODE>Jitterbug <B>n</B></CODE>",
500where <CODE><B>n</B></CODE> is the bug ID.
501
502<P>The search generates a list of all files changes for this bug,
503together with the specific revisions in each
504file that are relevant (there may be more than one).
505
506<P>In the diff list,
507select a <B>file name link</B> to see the CVS log
508for that file.
509
510<P>Select a <B>revision link</B> to see changes
511checked in against that revision.  "Diff" revision links
512show diffs against the previous revision.  "View" links
513show initial check in revisions.
514
515<P>If a file contains more than one revision relevant to this
516Jitterbug ID, then an <B>overall revision link</B> will be available.
517Use this to see the effect of all changes at once.  <I>If the revisions
518are not contiguous, then this diff will contain changes
519not related to this Jitterbug.</I>  In that case you may
520prefer to view the individual diffs instead.
521
522<P><B>Incl. Attic</B> causes files under any directory named
523"Attic" to be included.
524
525<P><B>Local Diff</B> enables special links that look like this [*]
526which cause your browser to download a Windows batch file.  The
527batch file, when executed, will bring up the relevant diffs in
528Compare It!.  For this to work, you need the following:
529
530<UL><LI><B>cvs</B> must be on your PATH.  For example, you may
531add <CODE>C:\\Program Files\\GNU\\WinCVS 1.2</CODE> to your PATH.
532<LI><B>wincmp</B> must be on your PATH.  This is the Compare It!
533executable.  For example, you may add <CODE>C:\\Program Files\\Compare
534It!</CODE> to your PATH.
535<LI>You must be "logged in" for the cvs checkouts to work.  If your
536name is present in parentheses next to "ICU Jitterbug Diffs" in the
537upper left frame, you are logged in.
538</UL>
539
540<P><B>Modules</B> lists the modules to be searched.  By default
541this is "icu icu4j" but any modules (under /usr/cvs) may be listed.
542Full module names (e.g., "icu/icuapps") may be used.  The following
543abbreviations are recognized:  <CODE>$x</CODE>.
544END
545}
546
547######################################################################
548# Admin GUI
549######################################################################
550
551# Emit the HTML for the top frameset in admin mode
552sub emit_admintop {
553    # Propagate url parameters down to the frames within the frameset
554
555    my $self = $QUERY->url(-full=>1, -query=>1);
556    my $f = urlPathInfo($self, '/adminform');
557    my $r = urlPathInfo($self, '/adminresult');
558    my $TITLETXT = $TITLE;
559
560    #if ($id ne '') {
561#`h	TITLETXT = "$id - $TITLETXT";
562  #  }
563
564    print <<END;
565<html><head><title>$TITLE</title></head>
566<frameset cols="300,*">
567  <frame src="$f" name="adminform" scrolling=yes>
568  <frame src="$r" name="adminresult">
569</frameset>
570END
571}
572
573# Print the admin input form.
574sub emit_adminform {
575
576    print $QUERY->start_html(-title=>$TITLE,
577                             -target=>'adminresult');
578
579    my $script_name = $QUERY->script_name;
580
581    print $QUERY->startform(-action=>urlPathInfo($script_name, '/adminresult'),
582                            -TARGET=>'adminresult');
583
584    print "<FONT SIZE=+2><B>Administrative Tools</B></FONT>";
585
586    my $user = $QUERY->param('user');
587    my $u = $user ? "?user=$user" : '';
588    print "\&nbsp;<FONT SIZE=-1>"
589	, "<A href=\"$script_name$u\" target=\"_top\">Back</A></FONT><BR>";
590
591    print '<FONT SIZE=-1>Tags may be specified in full, e.g. '
592	, '"release-2-4", or as release numbers, such as "2.4".  ',
593	'Specify module(s) here for commands below.',
594	'</FONT><BR>';
595
596    print "Modules:&nbsp;";
597    print $QUERY->textfield(-name=>'mod',
598			    -default=>$DEFAULT_MOD,
599			    -size=>30);
600    print "<HR>";
601
602    print "<B>List Bugs Between CVS Tags</B><BR>";
603    print "<TABLE><TR><TD nowrap>Start Tag:</TD><TD>";
604    print $QUERY->textfield(-name=>'tag_lo',-size=>30);
605    print "</TD></TR><TR><TD nowrap>End Tag:</TD><TD>";
606    print $QUERY->textfield(-name=>'tag_hi',-size=>30);
607    print "</TD></TR><TR><TD></TD><TD>";
608    print $QUERY->submit(-name=>'Find Bugs');
609    print "</TD></TR></TABLE>";
610    print '<FONT SIZE=-1>Bugs are listed that occur after the start tag, up to and including the end tag.  Specify module(s) above.</FONT>';
611
612    print "<HR>\n";
613
614    print "<B>DCUT Helper</B><BR>";
615    print "<TABLE><TR><TD>Tag:</TD><TD>";
616    print $QUERY->textfield(-name=>'dcut_tag',-size=>33);
617    print "</TD></TR><TR VALIGN=TOP><TD>Bug IDs:</TD><TD>";
618    print $QUERY->textarea(-name=>'dcut_ids',-rows=>8,-columns=>26);
619    print "</TD></TR><TR><TD></TD><TD>";
620    print $QUERY->submit(-name=>'Check');
621    print "</TD></TR></TABLE>";
622    print '<FONT SIZE=-1>Enter a CVS tag and list of bugs to incorporate '
623	, 'those bugs into the tag.  '
624	, 'Specify module(s) above.</FONT>';
625
626    print "<HR>\n";
627
628    print $QUERY->submit(-name=>'Reset Insta Cache'), "<BR>";
629    print '<FONT SIZE=-1>The insta cache contains the HTML output for previous'
630        , ' bug diff search results.  In some cases (typically during script'
631        , ' development), it can get out of sync.</FONT>';
632
633    print "<HR>\n";
634
635    print $QUERY->submit(-name=>'Delete Cache File:'), "&nbsp;";
636    print $QUERY->textfield(-name=>'del_cache',-size=>17), "<BR>";
637    print '<FONT SIZE=-1 >Delete a file from the cache.  Path is relative'
638	, ' to cache root and must begin with the module path'
639	, ' (e.g. "icu/icu").</FONT>';
640
641    # Propagate params that don't have corresponding form elements
642    print $QUERY->hidden('user');
643    print $QUERY->hidden('debug');
644
645    print $QUERY->end_form;
646}
647
648# Implement the admin functions.
649sub emit_adminresult {
650    print $QUERY->start_html(-title=>$TITLE);
651
652    if ($QUERY->param('Find Bugs')) {
653	&do_tagscan;
654	return;
655    }
656
657    if ($QUERY->param('Check')) {
658	&do_dcuthelp;
659	return;
660    }
661
662    if ($QUERY->param('Reset Insta Cache')) {
663	resetInstaCache(1);
664	print "Cache at $INSTA has been erased.";
665	return;
666    }
667
668    if ($QUERY->param('Delete Cache File:')) {
669	my $f = $QUERY->param('del_cache');
670	# Careful here -- don't let the user delete anything but a
671	# legitimate cache file.  Watch out for "..", "~", "$", etc.
672	if ($f !~ m|^[a-z0-9_]+(/[a-z0-9_]+)+\.[a-z0-9]+$|i) {
673	    print "\"$f\" does not look like a valid path.";
674	    return;
675	}
676	$f = $CACHE . '/' . $f . ',v';
677 	if (! -e $f) {
678	    print "\"$f\" does not exist.";
679	    return;
680	}
681 	if (! -f $f) {
682	    print "\"$f\" is not a file.";
683	    return;
684	}
685	unlink($f);
686	# This check doesn't seem to work.
687 	#if (! -e $f) {
688	#    print "Error: Could not delete \"$f\".";
689	#    return;
690	#} else {
691	    print "Cache file \"$f\" deleted.";
692	#}
693	return;
694    }
695}
696
697######################################################################
698# Jitterbug diffs
699######################################################################
700
701#---------------------------------------------------------------------
702# Find the diffs for a jitterbug and display them.
703# Also display other useful links for this bug.
704# Param: ID number
705# Param: module name ("icu/icu" or "icu4j/icu4j" or other)
706# Return: The generated HTML.  Also print it to STDOUT
707# on the fly.
708sub generateDiffsList {
709    my $ID = shift;
710    my $module = shift;
711    my $result;
712
713    my $greproot = "$CACHE/$module";
714    my $log_url  = "$LOG_URL/$module/";
715    my $show_url = "$SHOW_URL/$module/";
716    my $diff_url = "$DIFF_URL/$module/";
717
718    # ID matching pattern
719    my $pat = "0*$ID";
720
721    # During merging, the bug IDs 1-98 for icu4j were migrated to
722    # 1301-1398.  Therefore, when the user requests a bug in the range
723    # 1301-1398, we search under both n and n-1300 in icu4j
724    # repository.
725    if ($module =~ /^icu4j/ && $ID >= 1301 && $ID <= 1398) {
726        my $ID2 = $ID - 1300;
727        $pat = "($pat|0*$ID2)";
728    }
729
730    # -E use extended regexp
731    # -i ignore case
732    # -I ignore binary files
733    # -l stop at first match and list file name
734    # -r recurse
735    # N/A now that we cache the rlog output
736    #my $flags = $ignoreBinaries ? "-EiIlr" : "-Eilr";
737
738    # (1 of 3 REGEXPS) SEE ALSO other regexps; keep them in sync
739    # TODO improve error handling in following line
740    my @files = `grep -Eilr "($CVS_MSG_KW)[ \\t]*$pat\\b" $greproot`;
741
742    if (!$QUERY->param('include_attic')) {
743        @files = grep(!m|/attic/|i, @files);
744    }
745
746    if (@files < 1) {
747        $result .= out("No changes found for Jitterbug $ID.\n");
748        return $result;
749    }
750
751    $result .= out("<FONT SIZE=-1>");
752
753    my $first = 1;
754
755    foreach my $f (sort cmpfiles @files) {
756        my @r = findRevisions($f, $pat);
757
758        if ($first) {
759            $first = 0;
760        } else {
761            $result .= out("<HR>\n");
762        }
763
764	my $localDiff = $QUERY->param('localdiff');
765
766        my $relFile = $f;
767        $relFile =~ s/^$greproot\///;
768        $relFile =~ s/,v//;
769        my $a = '';
770        my $b = $relFile;
771        if ($b =~ m|(.*/)(.+)|) {
772            ($a ,$b) = ($1, $2);
773        }
774        $result .= out("$a<A href=\"$log_url$relFile?$LOG_URL_SUFFIX\" title=\"View CVS log for $b\"><B>$b</B></A><BR>");
775        if (@r > 1) {
776            # Show diff of earliest to latest.
777            my $discontiguous = 0;
778            for (my $i=0; $i<$#r; $i++) { # [sic] from first to last-1
779                if ($r[$i]->{old} ne $r[$i+1]->{new}) {
780                    $discontiguous = 1;
781                    last;
782                }
783            }
784            my $new = $r[0]->{new};
785            my $old = $r[$#r]->{old};
786            $result .= out("<CENTER>");
787	    if ($discontiguous) {
788		$result .= out("<B>Contains other changes: </B>");
789	    }
790            if ($old eq $BASE_REV) {
791                $result .= out("<A href=\"$show_url$relFile?rev=$new$SHOW_URL_SUFFIX\">");
792                $result .= out("<B>View $new</B></A>");
793            } else {
794                $result .= out("<A href=\"$diff_url$relFile?r1=$old&r2=$new$DIFF_URL_SUFFIX\">");
795                $result .= out("<B>Diff $new vs $old</B></A>");
796		if ($localDiff) {
797		    my $self = $QUERY->url(-full=>1, -query=>1);
798		    my $url = urlPathInfo($self, '/localdiff');
799		    my $mod = $module;
800		    $mod =~ s|/.+||;
801		    out(" [<A href=\"$url;m=$mod;file=$relFile;r1=$old;r2=$new$DIFF_URL_SUFFIX\">*</A>]");
802		}
803            }
804
805            # Construct contiguous ranges if the overall diff is
806            # discontiguous.
807            if ($discontiguous) {
808                my @ranges;
809                my $start = 0;
810                for (my $i=0; $i<$#r; $i++) { # [sic] from first to last-1
811                    if ($r[$i]->{old} ne $r[$i+1]->{new}) {
812                        push @ranges, [$start, $i];
813                        $start = $i+1;
814                    }
815                }
816                push @ranges, [$start, $#r];
817                my $first = 1;
818                foreach my $range (@ranges) {
819                    my $new = $r[$range->[0]]->{new};
820                    my $old = $r[$range->[1]]->{old};
821                    if ($first) {
822                        $result .= out("<BR>\n(");
823                        $first = 0;
824                    } else {
825                        $result .= out("<BR>\n");
826                    }
827                    if ($old eq $BASE_REV) {
828                        $result .= out("<A href=\"$show_url$relFile?rev=$new$SHOW_URL_SUFFIX\">");
829                        $result .= out("View $new</A>");
830                    } else {
831                        $result .= out("<A href=\"$diff_url$relFile?r1=$old&r2=$new$DIFF_URL_SUFFIX\">");
832                        $result .= out("Diff $new vs $old</A>");
833			if ($localDiff) {
834			    my $self = $QUERY->url(-full=>1, -query=>1);
835			    my $url = urlPathInfo($self, '/localdiff');
836			    my $mod = $module;
837			    $mod =~ s|/.+||;
838			    out(" [<A href=\"$url;m=$mod;file=$relFile;r1=$old;r2=$new$DIFF_URL_SUFFIX\">*</A>]");
839			}
840                    }
841                }
842                $result .= out(")");
843            }
844
845            $result .= out("</CENTER>");
846        }
847
848        for (my $i=0; $i<@r; $i++) {
849            my $h = $r[$i];
850            my $new = $h->{new};
851            my $old = $h->{old};
852            if ($old eq $BASE_REV) {
853                $result .= out("<A href=\"$show_url$relFile?rev=$new$SHOW_URL_SUFFIX\">");
854                $result .= out("<B>View $new</B></A>");
855            } else {
856                $result .= out("<A href=\"$diff_url$relFile?r1=$old&r2=$new$DIFF_URL_SUFFIX\">");
857                $result .= out("<B>Diff $new</B></A>");
858		if ($localDiff) {
859		    my $self = $QUERY->url(-full=>1, -query=>1);
860		    my $url = urlPathInfo($self, '/localdiff');
861		    my $mod = $module;
862		    $mod =~ s|/.+||;
863		    out(" [<A href=\"$url;m=$mod;file=$relFile;r1=$old;r2=$new$DIFF_URL_SUFFIX\">*</A>]");
864		}
865            }
866            $result .= out(" <EM>", $h->{date}, "</EM> by <EM>", $h->{author}, "</EM><BR>");
867            $result .= out($h->{comment});
868            $result .= out("<BR>\n");
869        }
870    }
871
872    $result .= out("</FONT>");
873    $result;
874}
875
876# Sort criterion for file diffs
877sub cmpfiles {
878    my $aa = $a;
879    my $bb = $b;
880    $aa =~ s|/unicode(/[^/]+)$|$1|;
881    $bb =~ s|/unicode(/[^/]+)$|$1|;
882    $aa =~ s|\.h,|.1h,|;
883    $bb =~ s|\.h,|.1h,|;
884    return $aa cmp $bb;
885}
886
887# Sort criterion for revision numbers, e.g. "1.9" vs "1.10"
888sub cmprevs {
889    my @a = split('\.', $a);
890    my @b = split('\.', $b);
891    for (my $i=0; $i<=$#a && $i<=$#b; ++$i) {
892        my $c = $b[$i] - $a[$i];
893        return $c if ($c);
894    }
895    return $#b - $#a;
896}
897
898######################################################################
899# tagscan
900######################################################################
901
902# Perform a "tagscan" and emit the results.  A tagscan is a scan of
903# the CVS rlog cache in which bug IDs between two tags are compiled.
904# If a file is marked 'dead' it is ignored.  If it was created after
905# the latest date of the HI tag (as determined by checking _every_
906# file's date for that tag) then it is ignored.
907sub do_tagscan {
908    $TAGSCAN_TAG_LO = expandTag($QUERY->param('tag_lo'));
909    $TAGSCAN_TAG_HI = expandTag($QUERY->param('tag_hi'));
910
911    $TAGSCAN_TAG_HI_DATE = '';
912
913    if (!$TAGSCAN_TAG_LO || !$TAGSCAN_TAG_HI) {
914	print "Please enter two CVS tags and try again.";
915	return;
916    }
917
918    my $user = $QUERY->param('user');
919
920    my @m;
921    return if (!parseMod(\@m)); # what modules are we searching?
922
923    # Slight limitation -- our tagLink will only refer to the first module
924    print "Searching module(s) <B>", join(", ", @m)
925	, "</B> for bugs after tag <B>",
926	tagLink($TAGSCAN_TAG_LO,$m[0],'grepj_2'),
927	"</B> up to and including tag <B>",
928	tagLink($TAGSCAN_TAG_HI,$m[0],'grepj_2'),
929	"</B>.  <EM>Note: Dead files and Attic files will be ignored.</EM><BR>\n";
930
931    foreach (@m) {
932	updateCacheDir($_);
933    }
934
935    if ($UPDATE_COUNT) {
936	print "done ($UPDATE_NONATTIC_COUNT,$UPDATE_ATTIC_COUNT).";
937    }
938
939    %TAGSCAN_IDS = ();
940#at	%TAGSCAN_ALLTAGS = ();
941    %TAGSCAN_WHY = ();
942    $TAGSCAN_COUNT = 0;
943    print "<HR>Scanning CVS tree for bug IDs...";
944    foreach (@m) {
945	tagscanDir($_);
946    }
947    print "done.<HR>";
948
949    # Filter out tagless files that were created after the HI tag
950    # date.
951    my @a;
952    foreach my $f (@TAGLESS_FILES) {
953	my $d = getRev11Date("$CACHE/$f");
954	if ($d && $d le $TAGSCAN_TAG_HI_DATE) {
955	    push @a, $f;
956	}
957    }
958    @TAGLESS_FILES = @a;
959
960    if (@NO_JITTERBUG_FILES) {
961	print "The following revisions have no associated Jitterbug, or the bug number could not be parsed from the checkin comment.\n";
962	print "Checkins older than a year are not listed.\n";
963	print "<BLOCKQUOTE>";
964	print join("<BR>\n",
965		   map {logLink($_->[0],'grepj_2') .
966			", " . $_->[1] . "<BR><CODE>" .
967			$_->[2] . "</CODE>"}
968		   @NO_JITTERBUG_FILES);
969	print "</BLOCKQUOTE><HR>\n";
970    }
971
972    if (@TAGLESS_FILES) {
973	print "<EM>The following ", scalar @TAGLESS_FILES
974	    , " files were ignored because they are missing one or both tags."
975	    , " </EM>Files created after <B>$TAGSCAN_TAG_HI</B> should not be listed"
976	    , " here.\n<BLOCKQUOTE>";
977	print join("<BR>\n",
978		   map {logLink($_,'grepj_2')}
979		   @TAGLESS_FILES)
980	    , "</BLOCKQUOTE><HR>\n";
981    }
982
983    if (@BRANCHED_FILES) {
984	print "<EM>The following ", scalar @BRANCHED_FILES
985	    , " files were ignored because the tags occur on different"
986	    , " branches.\n</EM><BLOCKQUOTE>";
987	print join("<BR>\n",
988		   map {logLink($_->[0],'grepj_2') .
989			": " . $_->[1] . " => " . $_->[2]}
990		   @BRANCHED_FILES)
991	    , "</BLOCKQUOTE><HR>\n";
992    }
993
994#at	print "Other tags seen: ",
995#at	      join(" ",
996#at		   map {my $a=tagToRelease($_); $a?"$_($a)":"$_*"}
997#at                sort keys %TAGSCAN_ALLTAGS), "\n<HR>";
998
999    print "Details: "
1000	, join("; ",
1001	       map {"(" . jitterbugLink($user, $_, 'grepj_2') .
1002                    ": " . join(", ",
1003                 map {s|^.+?/||; s|,v$||; $_} sort keys %{$TAGSCAN_WHY{$_}}) . ")"}
1004	       sort {$a<=>$b} keys %TAGSCAN_WHY)
1005	, "<HR>\n";
1006
1007    print "Jitterbug IDs found (",scalar keys %TAGSCAN_IDS,"): "
1008	, join(", ",
1009	       map {jitterbugLink($user, $_, 'grepj_2')}
1010	       sort {$a<=>$b} keys %TAGSCAN_IDS);
1011
1012    my $bugs = join(',', sort {$a<=>$b} keys %TAGSCAN_IDS);
1013    print <<END;
1014  <form method=post action=http://bugs.icu-project.org/cgibin/private/tasklist/buglist.html>
1015    <input type=hidden name=tag1 value=$TAGSCAN_TAG_LO>
1016    <input type=hidden name=tag2 value=$TAGSCAN_TAG_HI>
1017    <input type=hidden name=bugs value="$bugs">
1018   <input type=submit value="Bug List Report">
1019  </form>
1020END
1021    my $bugs2 = join(' ', sort {$a<=>$b} keys %TAGSCAN_IDS);
1022    print <<END;
1023  <form method=GET action=http://bugs.icu-project.org/cgibin/private/byname/review>
1024    <input type=hidden name=user value=$user>
1025    <input type=hidden name=bugs value="$bugs2">
1026    <input type=hidden name=showclosed value=>
1027   <input type=submit value="Reviewer Report">
1028  </form>
1029END
1030    print <<END;
1031  <form method=GET action=http://bugs.icu-project.org/cgibin/private/byname/assign>
1032    <input type=hidden name=user value=$user>
1033    <input type=hidden name=bugs value="$bugs2">
1034    <input type=hidden name=showclosed value=>
1035   <input type=submit value="Assignee Report">
1036  </form>
1037END
1038}
1039
1040# Given a relative path to $CVSROOT, tagscan the
1041# corresponding item under $CACHE.  Path may point to a
1042# file or a directory.
1043# @param relative directory, not ending in "/", e.g. "icu/icu"
1044# @param item name in that directory
1045sub tagscanEntry {
1046    my $relDir = shift;
1047    my $item = shift; # A file or dir in $CVSROOT/$relDir
1048
1049    if (-d "$CACHE/$relDir/$item") {
1050        tagscanDir("$relDir/$item");
1051    } elsif ($item =~ /,v$/) {
1052        tagscanFile("$relDir/$item");
1053    }
1054}
1055
1056# Given a relative directory path to $CACHE, tagscan the
1057# underlying files.
1058# @param relative directory, not ending in "/", e.g. "icu/icu"
1059sub tagscanDir {
1060    my $relDir = shift;
1061
1062    # Ignore stuff in the Attic
1063    return if ($relDir eq 'Attic');
1064
1065    debugOut("+tagscanDir($relDir)") if ($DEBUG);
1066
1067    my $cacheDir = "$CACHE/$relDir";
1068
1069    # First tagscan files in this directory
1070    opendir(DIR, $cacheDir);
1071    my @cacheList = grep !/^\.\.?$/, readdir(DIR);
1072    closedir(DIR);
1073
1074    # Tagscan each individual entry
1075    foreach (@cacheList) {
1076        tagscanEntry($relDir, $_);
1077    }
1078
1079    debugOut("-tagscanDir($relDir)") if ($DEBUG);
1080}
1081
1082# Given a relative file path to $CVSROOT, tagscan the
1083# corresponding file under $CACHE, if necessary.
1084# @param relative file path
1085sub tagscanFile {
1086    my $relFile = shift;
1087
1088    # Display progress; it takes awhile
1089    if (++$TAGSCAN_COUNT % 100 == 0) {
1090	print " $TAGSCAN_COUNT...";
1091    }
1092
1093    # This file contains the output of rlog.
1094    my $file = "$CACHE/$relFile";
1095
1096    # Parse the rlog file.  Start by extracting the tag names.  Look
1097    # for the TAGSCAN_TAG_LO and TAGSCAN_TAG_HI's associated revision
1098    # numbers.
1099    open(IN, $file);
1100    while (<IN>) {
1101	last if (/^symbolic names:\s*$/);
1102    }
1103    my $rev_lo;
1104    my $rev_hi;
1105    my $rel_min; # lowest release number seen
1106    my @odd_tags;
1107    if ($TAGSCAN_TAG_HI eq 'HEAD') {
1108	$rev_hi = 'HEAD';
1109    }
1110    while (<IN>) {
1111	last if (/^\S/);
1112	if (!$rev_lo && /^\s+$TAGSCAN_TAG_LO:\s*(\S+)/) {
1113	    $rev_lo = $1;
1114	}
1115	elsif (!$rev_hi && /^\s+$TAGSCAN_TAG_HI:\s*(\S+)/) {
1116	    $rev_hi = $1;
1117	}
1118	elsif (/^\s+(\S+?):/) {
1119	    my $tag = $1;
1120#at	    $TAGSCAN_ALLTAGS{$tag} = 1;
1121	    my $r = tagToRelease($tag);
1122	    if ($r) {
1123		if (!$rel_min) {
1124		    $rel_min = $r;
1125		} elsif ($r < $rel_min) {
1126		    $rel_min = $r;
1127		}
1128	    } else {
1129		push @odd_tags, $tag;
1130	    }
1131	}
1132    }
1133
1134    # Check for dead files.  Look ahead and find the state of the head
1135    # revision.
1136    my $pos = tell(IN);
1137    my $state = '';
1138    while (<IN>) {
1139	if (/^date:.+state: ([A-Za-z]+)/) {
1140	    $state = $1;
1141	    last;
1142	}
1143    }
1144    seek(IN,$pos,0);
1145
1146    # If this file is 'dead', we're done.
1147    return if ($state eq 'dead');
1148
1149    # Usually we find both tags.  However, in several special cases one
1150    # or both tags will be missing.
1151    if (!$rev_lo || !$rev_hi) {
1152	my $ok = 0;
1153
1154	# If we see the high tag, but not the low, then this may be a
1155	# new file (created after the low tag).  To check for this, examine
1156	# the other tags.  If this is a new file; we can just scan
1157	# from rev_hi all the end of the log (with rev_lo set to '1.1').
1158	if ($rev_hi) {
1159	    if (!$rel_min) {
1160		# The only tag seen was the HI tag.
1161		$ok = 1;
1162	    } else {
1163		my $lo = tagToRelease($TAGSCAN_TAG_LO);
1164		if ($lo && $rel_min > $lo && (scalar @odd_tags)==0) {
1165		    # Other tags were seen, but all were above the LO tag.
1166		    $ok = 1;
1167		}
1168	    }
1169	    $rev_lo = '1.1';
1170	}
1171
1172	if (!$ok) {
1173	    push @TAGLESS_FILES, $relFile;
1174	    return;
1175	}
1176    }
1177
1178    # If the low and high revisions are the same then there are no bugs
1179    # to record from this file.
1180    if ($rev_lo eq $rev_hi) {
1181	# Scan down to get the date of the rev_hi
1182	while (<IN>) {
1183	    if (/^revision $rev_hi\s*$/) {
1184		$_ = <IN>; # Read date line
1185		if (/^date: (.+?);/) {
1186		    $TAGSCAN_TAG_HI_DATE = $1
1187			if ($TAGSCAN_TAG_HI_DATE lt $1);
1188		} else {
1189		    cantParse('date', $relFile, $_, $rev_hi);
1190		}
1191	    }
1192	}
1193	return;
1194    }
1195
1196    my $inRange;
1197
1198    my @result;
1199
1200    # The rlog output (the CACHE file) contains a series
1201    # of groups of lines, like so:
1202    #|----------------------------
1203    #|revision 1.40
1204    #|date: 2001/08/02 18:24:58;  author: grhoten;  state: Exp;  lines: +82 -73
1205    #|jitterbug 1080: general readme.html updates
1206    # That is, the first line has the revision #.
1207    # The third line has the bug ID.
1208
1209    # Are revisions on the same branch?
1210    my $branch_lo = revToBranch($rev_lo);
1211    my $branch_hi = revToBranch($rev_hi);
1212    if ($branch_lo eq $branch_hi) {
1213
1214	while (<IN>) {
1215	    if (/^-{20,}$/) {
1216		$_ = <IN>; # Read revision line
1217		if (/revision (\S+)/) {
1218		    my $rev = $1;
1219		    last if ($rev eq $rev_lo);
1220		    if (!$inRange) {
1221			if ($rev eq $rev_hi || $rev_hi eq 'HEAD') {
1222			    $inRange = 1;
1223			}
1224		    }
1225		    if ($inRange) {
1226			my $date = <IN>; # Read date line
1227			$_ = <IN>; # Read comment or branches: line
1228			$_ = <IN> if (/^branches:/); # Read line after branches:
1229			my $id;
1230			if (/^\s*jitterbug\s+0*(\d+)/i) {
1231			    $id = $1;
1232			} else {
1233			    push @NO_JITTERBUG_FILES, [$relFile, $rev, $_]
1234				if (noJitterbugFilter($rev, $date));
1235			    $id = $NO_JITTERBUG;
1236			}
1237			push @result, [$rev, $id, $date];
1238		    }
1239		} else {
1240		    cantParse('revision', $relFile, $_);
1241		    last; # This is very bad - bail out
1242		}
1243	    }
1244	}
1245    }
1246
1247    elsif ($branch_hi =~ /^\Q$branch_lo\E\./) {
1248	# Special case:  E.g., going from 1.25 => 1.25.2.1 means
1249	# going from branch 1 to 1.25.2.  We can handle this.
1250
1251	my @revs = traverseRevisions($rev_lo, $rev_hi);
1252
1253	#print "[$relFile: ", join(",",@revs), "]";
1254
1255	shift(@revs); # discard rev_lo
1256	my %revs;
1257	foreach (@revs) { $revs{$_} = 1; } # convert to hash
1258
1259	while (<IN>) {
1260	    if (/^-{20,}$/) {
1261		$_ = <IN>; # Read revision line
1262		if (/revision (\S+)/) {
1263		    my $rev = $1;
1264		    if (exists $revs{$rev}) {
1265			delete $revs{$rev};
1266			my $date = <IN>; # Read date line
1267			if ($rev eq $rev_hi) {
1268			    # Record latest date corresponding to HI tag
1269			    if ($date =~ /^date: (.+?);/) {
1270				$TAGSCAN_TAG_HI_DATE = $1
1271				    if ($TAGSCAN_TAG_HI_DATE lt $1);
1272			    } else {
1273				cantParse('date', $relFile, $date, $rev);
1274			    }
1275			}
1276			$_ = <IN>; # Read comment or branches: line
1277			$_ = <IN> if (/^branches:/); # Read line after branches:
1278			my $id;
1279			if (/^\s*jitterbug\s+0*(\d+)/i) {
1280			    $id = $1;
1281			    $TAGSCAN_WHY{$id}->{$relFile} = 1;
1282			} else {
1283			    push @NO_JITTERBUG_FILES, [$relFile, $rev, $_]
1284				if (noJitterbugFilter($rev, $date));
1285			    $id = $NO_JITTERBUG;
1286			}
1287			$TAGSCAN_IDS{$id} = 1;
1288			last unless (%revs);
1289		    }
1290		} else {
1291		    cantParse('revision', $relFile, $_);
1292		    last; # This is very bad - bail out
1293		}
1294	    }
1295	}
1296    }
1297
1298    else {
1299	# Tags on different branches
1300	push @BRANCHED_FILES, [$relFile, $rev_lo, $rev_hi];
1301    }
1302
1303    close(IN);
1304    my $a = \@result;
1305
1306    foreach my $revision (@$a) {
1307	# $revision->[ revision, jitterbug ID, date: line ]
1308	$TAGSCAN_IDS{$revision->[1]} = 1;
1309	$TAGSCAN_WHY{$revision->[1]}->{$relFile} = 1;
1310    }
1311
1312    if (@$a) {
1313	# Record latest date corresponding to HI tag
1314	if ($a->[0]->[2] =~ /^date: (.+?);/) {
1315	    $TAGSCAN_TAG_HI_DATE = $1
1316		if ($TAGSCAN_TAG_HI_DATE lt $1);
1317	} else {
1318	    cantParse('date', $relFile, $a->[0]->[2], $a->[0]->[0]);
1319	}
1320    }
1321}
1322
1323######################################################################
1324# dcuthelp
1325######################################################################
1326
1327# Perform a "dcuthelp" and emit the results.
1328sub do_dcuthelp {
1329    $DCUTHELP_TAG = expandTag($QUERY->param('dcut_tag'));
1330    my $ids = $QUERY->param('dcut_ids');
1331    my $user = $QUERY->param('user');
1332
1333    # Process the ID list; create a hash of IDs in %DCUTHELP_IDS
1334    $ids =~ s/,/ /g;
1335    my @ids = grep { /\S/ } split(/\s+/, $ids);
1336    my @bogus = grep { !/^\d+$/ } @ids;
1337    if (@bogus) {
1338	print "These are not valid Jitterbug IDs: ", join(", ", @bogus);
1339	return;
1340    }
1341    foreach my $id (@ids) {
1342	local $_ = $id;
1343	s/^0+//;
1344	if (!$_) { print "0 is not a valid Jitterbug ID."; return; }
1345	if (exists $DCUTHELP_IDS{$_}) { print "$id is duplicated in the Jitterbug ID list."; return; }
1346	$DCUTHELP_IDS{$_} = 1;
1347    }
1348
1349    if ($DCUTHELP_TAG!~/\S/ || 0==scalar keys %DCUTHELP_IDS) {
1350	print "Please enter a CVS tag and list of Jitterbug IDs and try again.";
1351	return;
1352    }
1353
1354    my @m;
1355    return if (!parseMod(\@m)); # what modules are we searching?
1356
1357    # Announce our intentions
1358    print "Performing a DCUT check in module(s) <B>", join(", ", @m)
1359        , "</B> against tag <B>", tagLink($DCUTHELP_TAG,$m[0],'grepj_2'),
1360	"</B>";
1361    print " with Jitterbug IDs <B>";
1362    print join(", ",
1363	       map {jitterbugLink($user, $_, 'grepj_2')}
1364	       sort {$a<=>$b} keys %DCUTHELP_IDS)
1365	, "</B>";
1366    print ".\n";
1367
1368    foreach (@m) {
1369	updateCacheDir($_);
1370    }
1371
1372    if ($UPDATE_COUNT) {
1373	print "done ($UPDATE_NONATTIC_COUNT,$UPDATE_ATTIC_COUNT).";
1374    }
1375
1376    $DCUTHELP_COUNT = 0;
1377    print "<HR>Scanning CVS tree...";
1378    foreach (@m) {
1379	dcuthelpDir($_);
1380    }
1381    print "done.";
1382
1383    if (@NO_JITTERBUG_FILES) {
1384	print "<HR>The following revisions have no associated Jitterbug, or the bug number could not be parsed from the checkin comment.\n";
1385	print "Checkins older than a year are not listed.\n";
1386	print "<BLOCKQUOTE>";
1387	print join("<BR>\n",
1388		   map {logLink($_->[0],'grepj_2') .
1389			", " . $_->[1] . "<BR><CODE>" .
1390			$_->[2] . "</CODE>"}
1391		   @NO_JITTERBUG_FILES);
1392	print "</BLOCKQUOTE>\n";
1393    }
1394
1395    my %tagless;
1396    if (@TAGLESS_FILES) {
1397	print "<HR><EM>The following ", scalar @TAGLESS_FILES
1398	    , " files are missing the tag <B>"
1399	    , $DCUTHELP_TAG, "</B>.  They were treated as if the tag existed "
1400	    , "on the initial revision.</EM>\n<BLOCKQUOTE>";
1401	print join("<BR>\n",
1402		   map {logLink($_, 'grepj_2')}
1403		   @TAGLESS_FILES);
1404	print "</BLOCKQUOTE>\n";
1405	for my $f (@TAGLESS_FILES) { $tagless{$f} = 1; }
1406    }
1407
1408    if (@BRANCHED_FILES) {
1409	print "<HR><EM><B>Error: The following ", scalar @BRANCHED_FILES
1410	    , " files contain the listed bug changes on different "
1411	    , " branches.\n</B></EM><BLOCKQUOTE>";
1412	print join("<BR>\n",
1413		   map {logLink($_->[0],'grepj_2') .
1414			": " . $_->[1] . ", " . $_->[2]}
1415		   @BRANCHED_FILES)
1416	    , "</BLOCKQUOTE>\n";
1417    }
1418
1419    if (@DCUTHELP_BADFILES) {
1420	print "<HR><EM><B>Error: The following "
1421	    , scalar @DCUTHELP_BADFILES,
1422	    " files contain intermingled bug fixes not specified in the list.",
1423	    "</B></EM>\n<BLOCKQUOTE>";
1424	my %badids;
1425	foreach (@DCUTHELP_BADFILES) {
1426	    my $relFile = $_->[0];
1427	    my $ids = $_->[1];
1428	    print logLink($relFile, 'grepj_2'), ": "
1429		, join(", ",
1430		       map {jitterbugLink($user, $_, 'grepj_2')}
1431		       @$ids)
1432		, "<BR>\n";
1433	    foreach my $i (@$ids) { $badids{$i} = 1; }
1434	}
1435	print "</BLOCKQUOTE>\n";
1436	print "Jitterbug changes not in the list: "
1437            , join(", ",
1438                   map {jitterbugLink($user, $_, 'grepj_2')}
1439                   sort {$a<=>$b} keys %badids)
1440            , "\n";
1441    }
1442
1443    if (@DCUTHELP_RETAGS) {
1444	print "<HR>CVS commands to update the tags in files containing "
1445	    ,"only the listed bugs (copy & paste into a shell window).";
1446	if (@DCUTHELP_BADFILES || @BRANCHED_FILES) {
1447	    print "<B>WARNING!  Some files (see above) contain other bug changes!  Files below are all \"legal\" but you may wish to address above problems before retagging.</B>";
1448	}
1449	print "<BR><BR><CODE><FONT SIZE=-1>";
1450	print "cd $CVSROOT<BR>\n";
1451	# Two passes, one for normal files, another for tagless
1452	my $tagless_count = 0;
1453	for (my $pass=0; $pass<2; ++$pass) {
1454	    print "<FONT COLOR=\"#0000FF\"># The following files do not contain the tag $DCUTHELP_TAG<BR>\n" if ($pass);
1455	    foreach (@DCUTHELP_RETAGS) {
1456		my $relFile = $_->[0];
1457		if ($pass == 0) {
1458		    if ($tagless{$relFile}) {
1459			++$tagless_count;
1460			next;
1461		    }
1462		} else {
1463		    next unless ($tagless{$relFile});
1464		}
1465		my $rev_hi = $_->[1];
1466		$relFile =~ s/,v$//;
1467		my $onBranch = ($rev_hi =~ /\d+\.\d+\.\d+/);
1468		print "<FONT COLOR=\"#FF0000\">" if ($onBranch);
1469		print "cvs tag -F -r $rev_hi $DCUTHELP_TAG $relFile";
1470		print "</FONT>" if ($onBranch);
1471		print "<BR>\n";
1472	    }
1473	    last unless ($tagless_count);
1474	    print "</FONT>\n" if ($pass);
1475	}
1476	print "</FONT></CODE>";
1477    } else {
1478	print "<HR>Nothing to do; no clean checkins for bugs "
1479	    , join(", ",
1480		   map {jitterbugLink($user, $_, 'grepj_2')}
1481		   sort {$a<=>$b} keys %DCUTHELP_IDS)
1482	    , " after "
1483	    , tagLink($DCUTHELP_TAG,$m[0],'grepj_2')
1484	    , " in module(s) <B>"
1485	    , join(", ", @m), "</B>.\n"
1486	    ;
1487    }
1488}
1489
1490# Given a relative path to $CVSROOT, dcuthelp the
1491# corresponding item under $CACHE.  Path may point to a
1492# file or a directory.
1493# @param relative directory, not ending in "/", e.g. "icu/icu"
1494# @param item name in that directory
1495sub dcuthelpEntry {
1496    my $relDir = shift;
1497    my $item = shift; # A file or dir in $CVSROOT/$relDir
1498
1499    # Ignore stuff in the Attic
1500    return if ($item eq 'Attic');
1501
1502    if (-d "$CACHE/$relDir/$item") {
1503        dcuthelpDir("$relDir/$item");
1504    } elsif ($item =~ /,v$/) {
1505        dcuthelpFile("$relDir/$item");
1506    }
1507}
1508
1509# Given a relative directory path to $CACHE, dcuthelp the
1510# underlying files.
1511# @param relative directory, not ending in "/", e.g. "icu/icu"
1512sub dcuthelpDir {
1513    my $relDir = shift;
1514
1515    debugOut("dcuthelpDir($relDir)") if ($DEBUG);
1516
1517    my $cacheDir = "$CACHE/$relDir";
1518
1519    # First dcuthelp files in this directory
1520    opendir(DIR, $cacheDir);
1521    my @cacheList = grep !/^\.\.?$/, readdir(DIR);
1522    closedir(DIR);
1523
1524    # Dcuthelp each individual entry
1525    foreach (@cacheList) {
1526        dcuthelpEntry($relDir, $_);
1527    }
1528}
1529
1530# Given a relative file path to $CVSROOT, dcuthelp the
1531# corresponding file under $CACHE.
1532# @param relative file path
1533sub dcuthelpFile {
1534    my $relFile = shift;
1535
1536    # Display progress; it takes awhile
1537    if (++$DCUTHELP_COUNT % 100 == 0) {
1538	print " $DCUTHELP_COUNT...";
1539    }
1540
1541    # This file contains the output of rlog.
1542    my $file = "$CACHE/$relFile";
1543
1544    # Parse the rlog file.  Start by extracting the tag names.  Look
1545    # for the DCUTHELP_TAG and its associated revision
1546    # number.
1547    open(IN, $file);
1548    while (<IN>) {
1549	last if (/^symbolic names:\s*$/);
1550    }
1551    my $rev_tag = '';
1552    while (<IN>) {
1553	last if (/^\S/);
1554	if (/^\s+$DCUTHELP_TAG:\s*(\S+)/) {
1555	    $rev_tag = $1;
1556	    last;
1557	}
1558    }
1559
1560    # Check for dead files.  Look ahead and find the state of the head
1561    # revision.
1562    my $pos = tell(IN);
1563    my $state = '';
1564    while (<IN>) {
1565	if (/^date:.+state: ([A-Za-z]+)/) {
1566	    $state = $1;
1567	    last;
1568	}
1569    }
1570    seek(IN,$pos,0);
1571
1572    # If this file is 'dead', we're done.
1573    return if ($state eq 'dead');
1574
1575    # If the tag is missing, record the fact.  Continue to process
1576    # the file as if the tag existed on the earliest revision.
1577    # This allows the tagging of newly added files.
1578    if (!$rev_tag) {
1579	push @TAGLESS_FILES, $relFile;
1580    }
1581
1582    # I'm going to assume the rlog output (the CACHE file) contains a series
1583    # of groups of lines, like so:
1584    #|----------------------------
1585    #|revision 1.40
1586    #|date: 2001/08/02 18:24:58;  author: grhoten;  state: Exp;  lines: +82 -73
1587    #|jitterbug 1080: general readme.html updates
1588    # That is, the first line has the revision #.
1589    # The third line has the bug ID.  Sometimes the third line has a
1590    # branch field.
1591
1592    # Find bug IDs later than the given tag, and record any that aren't
1593    # on the allowed list.  Locate $rev_hi - the high
1594    # revision of any bug found in the list.
1595    my @problem_ids; # Bug IDs between $rev_tag and $rev_hi not in the list
1596    my $rev_hi;
1597    my $bottom_rev = ''; # Last revision in the file
1598    while (<IN>) {
1599        if (/^-{20,}$/) {
1600	    $_ = <IN>; # Read revision line
1601	    if (/revision (\S+)/) {
1602		my $rev = $1;
1603		$bottom_rev = $rev;
1604		if ($rev eq $rev_tag) {
1605		    # Scan remainder of file to record last rev
1606		    while (<IN>) {
1607			if (/^-{20,}$/) {
1608			    $_ = <IN>; # Read revision line
1609			    $bottom_rev = $1 if (/revision (\S+)/);
1610			}
1611		    }
1612		    last;
1613		}
1614		my $date = <IN>; # Read date line
1615		$_ = <IN>; # Read comment or branches: line
1616		$_ = <IN> if (/^branches:/); # Read line after branches:
1617		my $id;
1618		if (/^\s*jitterbug\s+0*(\d+)/i) {
1619		    $id = $1;
1620		} else {
1621		    push @NO_JITTERBUG_FILES, [$relFile, $rev, $_]
1622			if (noJitterbugFilter($rev, $date));
1623		    $id = $NO_JITTERBUG;
1624		}
1625		my $in_list = (exists $DCUTHELP_IDS{$id});
1626#		# Handle tagless files a little differently
1627#		if (!$rev_tag) {
1628#		    if (!$rev_hi) {
1629#			if ($in_list) {
1630#			    $rev_hi = $rev;
1631#			} else {
1632#			}
1633#		    }
1634#
1635#		}
1636		if (!$rev_hi) {
1637		    if ($in_list) {
1638			$rev_hi = $rev;
1639		    }
1640		} else {
1641		    if (!$in_list) {
1642			push @problem_ids, $id;
1643		    }
1644		}
1645	    } else {
1646		cantParse('revision', $relFile, $_);
1647	    }
1648        }
1649    }
1650
1651    # If the bottom revision looks like a branch, then we need
1652    # to do extra processing.  Branch revisions are listed at the
1653    # end of the rlog output.
1654    if ($bottom_rev =~ /\d+\.\d+\.\d+\.\d+/ &&
1655	$bottom_rev ne '1.1.1.1') {
1656
1657	# This file contains branches; do special handling
1658
1659	# Parse all the revisions and form a branch tree.
1660	# Construct a hash (%tree) of revision numbers to jitterbugs.
1661	# In addition, "$rev-" maps to a ref to an array of branches,
1662	# if any.
1663	my %tree;
1664	seek(IN,0,0); # rewind to start
1665	while (<IN>) {
1666	    if (/^-{20,}$/) {
1667		$_ = <IN>; # Read revision line
1668		if (/revision (\S+)/) {
1669		    my $rev = $1;
1670		    my $date = <IN>; # Read date line
1671		    $_ = <IN>; # Read comment or branches: line
1672		    if (/^branches:\s*(.*)/) {
1673			my @branches = split(/;\s*/, $1);
1674			$tree{$rev . '-'} = \@branches;
1675			$_ = <IN>; # Read comment line
1676		    }
1677		    my $id;
1678		    if (/^\s*jitterbug\s+0*(\d+)/i) {
1679			$id = $1;
1680		    } else {
1681			push @NO_JITTERBUG_FILES, [$relFile, $rev, $_]
1682			    if (noJitterbugFilter($rev, $date));
1683			$id = $NO_JITTERBUG;
1684		    }
1685		    $tree{$rev} = $id;
1686		} else {
1687		    cantParse('revision', $relFile, $_);
1688		}
1689	    }
1690	}
1691
1692#	print "[$relFile: ";
1693#	print join("; ",
1694#		   map {$_ . " => " .
1695#		       (ref($tree{$_})
1696#			?("(".join(",",@{$tree{$_}}).")")
1697#			:$tree{$_})}
1698#		   sort keys %tree);
1699
1700	$rev_hi = dcuthelpScan(\%tree, $rev_tag, 1);
1701
1702#	print ": scan=>$rev_hi]";
1703
1704	@problem_ids = ();
1705	if ($rev_hi =~ /;/) {
1706	    # Tags on different branches
1707	    my @a = split(/;/, $rev_hi);
1708	    unshift @a, $relFile;
1709	    push @BRANCHED_FILES, \@a;
1710	    return;
1711	} elsif ($rev_hi) {
1712	    my @revs = traverseRevisions($rev_tag, $rev_hi);
1713
1714	    shift(@revs); # discard rev_lo
1715	    my %revs;
1716	    foreach (@revs) { $revs{$_} = 1; } # convert to hash
1717
1718	    seek(IN,0,0); # rewind to start
1719	    while (<IN>) {
1720		if (/^-{20,}$/) {
1721		    $_ = <IN>; # Read revision line
1722		    if (/revision (\S+)/) {
1723			my $rev = $1;
1724			if (exists $revs{$rev}) {
1725			    delete $revs{$rev};
1726			    my $date = <IN>; # Read date line
1727			    $_ = <IN>; # Read comment or branches: line
1728			    $_ = <IN> if (/^branches:/); # Read line after branches:
1729			    my $id;
1730			    if (/^\s*jitterbug\s+0*(\d+)/i) {
1731				$id = $1;
1732			    } else {
1733				push @NO_JITTERBUG_FILES, [$relFile, $rev, $_]
1734				    if (noJitterbugFilter($rev, $date));
1735				$id = $NO_JITTERBUG;
1736			    }
1737			    if (!exists $DCUTHELP_IDS{$id}) {
1738				push @problem_ids, $id;
1739			    }
1740			    last unless (%revs);
1741			}
1742		    } else {
1743			cantParse('revision', $relFile, $_);
1744			last; # This is very bad - bail out
1745		    }
1746		}
1747	    }
1748	}
1749    }
1750
1751    if (@problem_ids) {
1752	my @a = sortedUniqueInts(@problem_ids);
1753	push @DCUTHELP_BADFILES, [$relFile, \@a];
1754    } elsif ($rev_hi) {
1755	# This file is okay; record the data needed for moving the tag
1756	push @DCUTHELP_RETAGS, [$relFile, $rev_hi];
1757    }
1758
1759    close(IN);
1760}
1761
1762# Given a revision tree (see dcuthelpFile), look for %DCUTHELP_IDS
1763# bugs along various branches, starting at a given revision.  Proceed
1764# along the branch of the given revision by incrementing it using
1765# incRev().  If any revision along the way is a branch point, follow
1766# that branch by recursing.  If found on two split branches,
1767# return 'rev;rev'.  If not found at all, return ''.  If found on
1768# exactly one branch, return the furthest revision at which it was
1769# found.
1770#
1771# @param tree, as created by dcuthelpFile
1772# @param first revision to examine
1773# @param if true, exclude given revision from bug search
1774#        but not from branch analysis.
1775#
1776# @return either a revision, or 'rev;rev' if the bugs occur
1777#         on two split branches, or '' if the bugs aren't seen.
1778sub dcuthelpScan {
1779    my $tree = shift; # parsed revision tree; see dcuthelpFile
1780    my $rev = shift; # rev to start at
1781    my $exclusive = shift || ''; # is $rev exclusive?
1782
1783#   print "[scan $tree $rev $exclusive]";
1784
1785    # If there are no branches between $rev and the end of its branch,
1786    # then return the top revision at which one of %DCUTHELP_IDS is seen.
1787    my $branchrev = ''; # First rev at which branch was seen, if any
1788    my $lastbugrev = ''; # Last rev at which bug was seen
1789    my $r;
1790    for ($r=$rev ;exists $tree->{$r}; $r=incRev($r)) {
1791#	print "{$r}";
1792	if (exists $DCUTHELP_IDS{$tree->{$r}}) {
1793	    $lastbugrev = $r;
1794	}
1795	if (exists $tree->{"$r-"}) {
1796	    $branchrev = $r;
1797	    last;
1798	}
1799    }
1800
1801    # If $exclusive it true, can't return this rev.
1802    if ($exclusive && ($lastbugrev eq $rev)) {
1803	$lastbugrev = '';
1804    }
1805
1806    # If there are no branches we are done.
1807    if (!$branchrev) {
1808	return $lastbugrev;
1809    }
1810
1811    # Otherwise, examine the n branches and the continuation of
1812    # this branch separately.  Convert branch revisions to the first
1813    # rev on each branch, e.g., "1.14.2" => "1.14.2.1"
1814    my @branches = map {"$_.1"} @{$tree->{"$branchrev-"}};
1815    $r = incRev($branchrev);
1816    push @branches, $r if (exists $tree->{$r});
1817
1818    $r = '';
1819    foreach (@branches) {
1820	my $a = dcuthelpScan($tree, $_);
1821	return $a if ($a =~ /;/);
1822	if ($a) {
1823	    if ($r) {
1824		# Our bugs were seen on more than one branch
1825		return "$r;$a";
1826	    }
1827	    $r = $a;
1828	}
1829    }
1830
1831    # If we haven't seen it on any branches, use result up to the
1832    # branch point, found above.
1833    $r ||= $lastbugrev;
1834
1835    return $r;
1836}
1837
1838######################################################################
1839# CVS rlog cache
1840######################################################################
1841
1842#---------------------------------------------------------------------
1843# Given a relative path to $CVSROOT, update the
1844# corresponding item under $CACHE.  Path may point to a
1845# file or a directory.
1846# @param relative directory, not ending in "/", e.g. "icu/icu"
1847# @param item name in that directory
1848sub updateCacheEntry {
1849    my $relDir = shift;
1850    my $item = shift; # A file or dir in $CVSROOT/$relDir
1851
1852    if (-d "$CVSROOT/$relDir/$item") {
1853        updateCacheDir("$relDir/$item");
1854    } elsif ($item =~ /,v$/) {
1855        updateCacheFile("$relDir/$item");
1856    }
1857}
1858
1859#---------------------------------------------------------------------
1860# Given a relative directory path to $CVSROOT, update the
1861# corresponding directory under $CACHE.
1862# @param relative directory, not ending in "/", e.g. "icu/icu"
1863sub updateCacheDir {
1864    my $relDir = shift;
1865
1866    debugOut("+updateCacheDir($relDir)") if ($DEBUG);
1867
1868    my $cvsDir = "$CVSROOT/$relDir";
1869    my $cacheDir = "$CACHE/$relDir";
1870
1871    # First update files in this directory
1872    opendir(DIR, $cvsDir);
1873    my @cvsList = grep !/^\.\.?$/ && $_ ne 'CVS', readdir(DIR);
1874    closedir(DIR);
1875    my %cvsPruneHash;
1876    foreach (@cvsList) { $cvsPruneHash{$_} = 1; }
1877    if (!$QUERY->param('include_attic')) {
1878        @cvsList = grep !/^attic$/i, @cvsList;
1879    }
1880    my %cvsHash;
1881    foreach (@cvsList) { $cvsHash{$_} = 1; }
1882
1883    # Update/create the cache directory.  If it doesn't exist,
1884    # create it.  If it does, prune out any obsolete entries.
1885    if (-d $cacheDir) {
1886        if (!opendir(DIR, $cacheDir)) {
1887            print "Can't open dir $cacheDir: $!";
1888	    debugOut("-!updateCacheDir($relDir)") if ($DEBUG);
1889            return;
1890        }
1891        my @cacheList = grep !/^\.\.?$/, readdir(DIR);
1892        closedir(DIR);
1893
1894        # Delete things that don't exist in CVS
1895        foreach (@cacheList) {
1896            if (!exists $cvsPruneHash{$_}) {
1897		debugOut ( " Removing $cacheDir/$_ .." ) if ($DEBUG);
1898                rmtree("$cacheDir/$_", 0, 1);
1899            }
1900        }
1901    } else {
1902        mkpath($cacheDir, 0, 0777);
1903    }
1904
1905    # Update each individual entry
1906    foreach (@cvsList) {
1907        updateCacheEntry($relDir, $_);
1908    }
1909
1910    debugOut("-updateCacheDir($relDir)") if ($DEBUG);
1911}
1912
1913#---------------------------------------------------------------------
1914# Given a relative file path to $CVSROOT, update the
1915# corresponding file under $CACHE, if necessary.
1916# @param relative file path
1917sub updateCacheFile {
1918    my $relFile = shift;
1919
1920    if (! -e "$CACHE/$relFile" ||
1921        (-M "$CACHE/$relFile" > -M "$CVSROOT/$relFile")) {
1922        if (!$UPDATE_COUNT) {
1923            print "<HR>Updating cache...";
1924	    if(! -e "$CACHE/$relFile") {
1925		debugOut ( " because $CACHE/$relFile was not cached.." ) if ($DEBUG);
1926	    } else {
1927		debugOut ( " because $relFile was updated.." ) if ($DEBUG);
1928	    }
1929        } elsif ($UPDATE_COUNT % 25 == 0) {
1930	    print " $UPDATE_COUNT...";
1931	}
1932        ++$UPDATE_COUNT;
1933	if ($relFile =~ m|/attic/|i) {
1934	    ++$UPDATE_ATTIC_COUNT;
1935	} else {
1936	    ++$UPDATE_NONATTIC_COUNT;
1937	}
1938	my $f = "$CACHE/$relFile";
1939	command("rlog $CVSROOT/$relFile > $f", $f);
1940	my $size = -s $f;
1941	if ($size <= 0) {
1942	    print " <B>{Fatal Error: rlog of $relFile failed}</B> ";
1943	    unlink($f);
1944	}
1945	command("touch -r $CVSROOT/$relFile $f");
1946    }
1947}
1948
1949######################################################################
1950# instaCache
1951######################################################################
1952
1953#---------------------------------------------------------------------
1954# Lookup an ID in the instaCache, and return the diffs stored
1955# there.  If there is no entry for the ID, then return the
1956# empty string.  The ID will be suffixed with 'a' if the
1957# Attic is included.
1958sub instaGet {
1959    my $id = shift;
1960    my $diffs;
1961    my $dir = $QUERY->param('include_attic') ? $INSTA_ATTIC : $INSTA;
1962    my $file = "$dir/$id";
1963    if (-e $file) {
1964	if (open(IN, $file)) {
1965	    while (<IN>) { $diffs .= $_; }
1966	    close(IN);
1967	}
1968    }
1969    return $diffs;
1970}
1971
1972#---------------------------------------------------------------------
1973# Store diffs for the given ID in the instaCache.  The ID will be
1974# suffixed with 'a' if the Attic is included.
1975sub instaPut {
1976    my $id = shift;
1977    my $diffs = shift;
1978    my $dir = $QUERY->param('include_attic') ? $INSTA_ATTIC : $INSTA;
1979    my $file = "$dir/$id";
1980    open(IN, ">$file") or return;
1981    print IN $diffs;
1982    close(IN);
1983}
1984
1985#---------------------------------------------------------------------
1986# Reset the instaCache by deleting all entries.  We need
1987# to do this whenever the main cache is invalidated.
1988# Param: if true, then force reset of all instaCaches.
1989# Otherwise do a smart reset based on the update counts.
1990sub resetInstaCache {
1991    if (shift) {
1992	command("rm -rf $INSTA"); # Recursive
1993	return;
1994    }
1995
1996    # If there have been changes to non-Attic files, we
1997    # have to reset everything.
1998    if ($UPDATE_NONATTIC_COUNT) {
1999	# The following will fail with:
2000	# rm: cannot remove `/tmp/icu-grepj.cache/insta/Attic': Is a directory
2001	#command("rm -f $INSTA/*") if (-d $INSTA);
2002	command("find $INSTA -type f -maxdepth 1 -exec rm {} \\;")
2003	    if (-d $INSTA);
2004    } else {
2005	# Otherwise just clear the attic instaCache
2006	command("rm -f $INSTA_ATTIC/*") if (-d $INSTA_ATTIC);
2007    }
2008}
2009
2010######################################################################
2011# CVS Utilities
2012######################################################################
2013
2014#---------------------------------------------------------------------
2015# Get the date corresponding to the revision 1.1 in the
2016# given rlog output.  We use this as the "creation date" for the
2017# corresponding CVS file.
2018# @param absolute rlog output file path (in the cache)
2019# @return date string of the form "2002/08/23 23:21:38"
2020sub getRev11Date {
2021    my $file = shift;
2022
2023    # Parse the rlog file.  Return the date line for 1.1
2024    open(IN, $file);
2025    while (<IN>) {
2026	if (/^-{20,}$/) {
2027	    $_ = <IN>;
2028	    if (/revision 1.1$/) {
2029		$_ = <IN>;
2030		if (/^date: (.+?);/) {
2031		    return $1;
2032		}
2033	    }
2034	}
2035    }
2036    close(IN);
2037
2038    ''; # Parse failure - should never happen
2039}
2040
2041#---------------------------------------------------------------------
2042# Given a ,v file, find the revisions containing the
2043# jitterbug ID change.  Return an array of hash refs.
2044# Newest revision is first, that is, it is $result[0].
2045# Each hash has:
2046#   new (revision#)
2047#   old (revision#)
2048#   date
2049#   author
2050#   comment
2051# If the very first revision is labeled with the jitterbug
2052# $ID, then {old} will be $BASE_REV.
2053#
2054sub findRevisions {
2055    my $file = shift;
2056    my $pat = shift;
2057    my @result;
2058
2059    # rlog output:
2060    #|revision 1.3
2061    #|date: 1999/10/14 22:14:04;  author: schererm;  state: Exp;  lines: +4 -2
2062    #|jitterbug 14: echo off now and use the Release versions of the tools
2063    #|----------------------------
2064    #|revision 1.2
2065    #|date: 1999/10/13 01:10:24;  author: schererm;  state: Exp;  lines: +9 -6
2066    #|jitterbug 15: windows: genrb puts .res files into the current directory
2067    #|more text
2068    #|----------------------------
2069    #|revision 1.1
2070    #|date: 1999/10/12 21:50:30;  author: schererm;  state: Exp;
2071    #|jitterbug 14: Windows: create a batch file to make the /icu/data files
2072    #|=============================================================================
2073
2074    # We read our rlog info from the cache now
2075    my %log; # $log{<revision>} = <block of text>
2076    my $l=''; my $r='';
2077    open(IN, $file);
2078    while (<IN>) {
2079        if (/^-{20,}$/) {
2080	    $log{$r} = $l if ($r);
2081            $l = $r = '';
2082        } elsif ($r) {
2083            $l .= $_;
2084        } else {
2085	    if (/revision\s+(\S+)/) {
2086		$r = $1;
2087		die "Duplicate revision $r in $file" if (exists $log{$r});
2088	    }
2089	}
2090    }
2091    close(IN);
2092    $log{$r} = $l if ($r);
2093
2094    for $r (sort cmprevs keys %log) {
2095        local $_ = $log{$r};
2096
2097        # (2 of 3 REGEXPS) SEE ALSO other regexps; keep them in sync
2098        if (/^\s*(?:$CVS_MSG_KW)\s*$pat\b/im) {
2099            my %h;
2100            $h{new} = $r;
2101	    my $rold = decRev($r);
2102            if (exists $log{$rold}) {
2103                $h{old} = $rold;
2104            } else {
2105                $h{old} = $BASE_REV;
2106            }
2107            if (/date:\s*(.+?);/) {
2108                $h{date} = $1;
2109            }
2110            if (/author:\s*(.+?);/) {
2111                $h{author} = $1;
2112            }
2113
2114            # (3 of 3 REGEXPS) SEE ALSO other regexps; keep them in sync
2115            if (/^\s*(?:$CVS_MSG_KW)\s*$pat\b(.*)/ism) {
2116                local $_ = $1;
2117                s/^\s*:?\s*//;
2118                s/\s*----+\s*$//;
2119                s/\s*====+\s*$//;
2120                s/\s*\n+\s*/ /g;
2121                $h{comment} = $_;
2122            }
2123            push @result, \%h;
2124        }
2125    }
2126
2127    @result;
2128}
2129
2130######################################################################
2131# CVS tag parsing
2132######################################################################
2133
2134#---------------------------------------------------------------------
2135# Given a tag name like this: "2.1", expand it to "release-2-1".
2136# Convert 'head' (case insens.) to 'HEAD'.
2137# Otherwise leave it alone.
2138sub expandTag {
2139    local $_ = shift;
2140    s/^\s+//;
2141    s/\s+$//;
2142    if (/^\d+(\.\d+)/) {
2143	s|\.|-|g;
2144	$_ = "release-" . $_;
2145    } elsif (/^head$/i) {
2146	$_ = 'HEAD';
2147    }
2148    $_;
2149}
2150
2151#---------------------------------------------------------------------
2152# Given a tag name like this: "release-1-5-0-d03", return a normalized
2153# release number.  The release number in this case would be 1500003.
2154# The final release (no 'd') "release-1-5-0" is 1500099; that is, it
2155# behaves like "d99".  Up to 5 digits are allowed prior to the 'd'
2156# number (if any).  This should suffice; in practice we use only 4
2157# (e.g., "release-1-4-1-2").  Assume all numbers are single digits
2158# except for the 'd' number.  The tag must start with /release-?/.
2159# All digits must be separated by '-', except the '-' before the 'd03'
2160# may be omitted.  One or two digits are allowed after the 'd'.
2161# Trailing text after an otherwise valid tag, with no 'd', is treated
2162# as a 'd' of 00, e.g., "release-2-0-2s-branch".
2163#
2164# @param a tag string, like "release-1-5-0-d03"
2165# @param a release integer, that can be compared numerically,
2166#        like 1500003, or if the tag can't be parsed.
2167sub tagToRelease {
2168    local $_ = shift;
2169    if (s/^release-?//i) {
2170	my @a;
2171	my $d = -1;
2172	for (;;) {
2173	    if (s/^(\d)-// ||
2174		s/^(\d)$// ||
2175		s/(\d)(\D)/$2/) { # e.g., "release-1-4-2d01"
2176		push @a, $1;
2177	    } elsif ($d<0 && s/^d(\d{1,2})$//) {
2178		$d = $1;
2179	    } else {
2180		last;
2181	    }
2182	}
2183	# If we have some trailing non-standard text, and no 'd',
2184	# then treat it as a 'd' of 00.
2185	if ($_ && $d<0 && (scalar @a)>0) {
2186	    $_ = '';
2187	    $d = 0;
2188	}
2189	if (!$_) {
2190	    push @a, (0, 0, 0, 0); # Pad with 0's
2191	    @a = @a[0..4];
2192	    return join('',@a) . sprintf("%02d", $d<0?99:$d);
2193	}
2194    }
2195    0; # parse failure
2196}
2197
2198######################################################################
2199# Utilities
2200######################################################################
2201
2202# Output a string in debug mode
2203# Usage:  debugOut("string") if ($DEBUG);
2204sub debugOut {
2205    print "<P><FONT SIZE=-1><B>", join(" ", @_), "</B></FONT></P>";
2206}
2207
2208#|# Set or change a GET param of a URL.  If the param exists,
2209#|# change it.  If it doesn't, add it.
2210#|# @param a URL, with or without trailing parameters
2211#|# @param a parameter string of the form a=b, a=, or a
2212#|# @param modified URL
2213#|sub urlParam {
2214#|    my $url = shift;
2215#|    my $param = shift;
2216#|    my $key = $param;
2217#|    $key =~ s/=.*//;
2218#|    if ($url =~ s/([\?&;])$key=[^&;]*/$1$param/ ||
2219#|	$url =~ s/([\?&;])$key$/$1$param/) {
2220#|	return $url;
2221#|    }
2222#|    $url . ($url =~ /\?/ ? '&' : '?') . $param;
2223#|}
2224
2225# Append the given path-info to the given URL
2226# Param: URL, possibly including '?xxx=yyy' params, NOT ending in '/'
2227# Param: Path info, MUST start with '/'
2228sub urlPathInfo {
2229    my $url = shift;
2230    my $pi = shift;
2231    if ($url =~ s|\?|$pi?|) {
2232    } else {
2233	$url .= $pi;
2234    }
2235    $url;
2236}
2237
2238# Parse the module params given by the user
2239# @param ref to array to receive list of modules.  Prior contents will
2240#        be lost.
2241# @return 1 on success, or 0 if bad or no modules were seen.
2242sub parseMod {
2243    my $m = shift; # ref to array
2244    my @badMod;
2245
2246    my $mod = $QUERY->param('mod') || $DEFAULT_MOD;
2247    $mod =~ s|^\s+||;
2248    $mod =~ s|\s+$||;
2249    $mod =~ s|\s+| |g;
2250    @$m = split(' ', $mod);
2251    foreach (@$m) {
2252	# !Modify element of @m in place!
2253	$_ = $MOD_ABBREV{$_} if (exists $MOD_ABBREV{$_});
2254	push @badMod, $_ if (! -d "$CVSROOT/$_");
2255    }
2256    if (@badMod) {
2257	print "Invalid modules: <CODE>",
2258	      join(" ", @badMod), "</CODE>";
2259	print "<BR>Did you try the full module name (e.g. \"icu/charset\")?  Only some modules can be abbreviated: <CODE>", join(" ", sort keys %MOD_ABBREV), "</CODE>.";
2260	return 0;
2261    }
2262    1;
2263}
2264
2265# Return the HTML for a link to the given jitterbug.
2266# @param user
2267# @param bug ID
2268# @param OPTIONAL target
2269# @return HTML for A tag
2270sub jitterbugLink {
2271    my $user = shift;
2272    my $id = shift;
2273    my $targ = shift || '';
2274    if ($id eq $NO_JITTERBUG) {
2275	return "<EM>no jitterbug</EM>";
2276    }
2277    $targ = " target=\"$targ\"" if ($targ);
2278    "<A href=\"" . jitterbugURL($user, $id) . "\"$targ>$id</A>";
2279}
2280
2281# Return the HTML for a link to the WebCVS log of a file.
2282# @param relative path (from $CVSROOT) to file, optionally with
2283#        trailing ",v"
2284# @param OPTIONAL target
2285# @return HTML for A tag
2286sub logLink {
2287    my $relFile = shift;
2288    my $targ = shift;
2289    $targ = " target=\"$targ\"" if ($targ);
2290    $relFile =~ s/,v$//;
2291    "<A href=\"$LOG_URL/$relFile\"$targ>$relFile</A>";
2292}
2293
2294# Return the HTML for a link to the WebCVS "tag" page.  This will
2295# just be the page for the root of the given module, with the given
2296# tag selected.
2297# @param tag
2298# @param module, e.g., "icu/icu"
2299# @param OPTIONAL target
2300# @return HTML for A tag
2301sub tagLink {
2302    my $tag = shift;
2303    my $mod = shift;
2304    my $targ = shift;
2305    $targ = " target=\"$targ\"" if ($targ);
2306    "<A href=\"$LOG_URL/$mod/?only_with_tag=$tag\"$targ>$tag</A>";
2307}
2308
2309# Emit an error (in HTML) about failing to parse a line.
2310# @param what can't be parsed, e.g., 'revision'
2311# @param relative file path, e.g., 'icu/icu/readme.html'
2312# @param the line that can't be parsed
2313# @param revision
2314sub cantParse {
2315    my $what = shift;
2316    my $relFile = shift;
2317    my $line = shift;
2318    my $rev = shift;
2319    $rev = ', '.$rev if ($rev);
2320    print "<BR>Error: Can't parse $what in "
2321	, logLink($relFile, 'grepj_2'), "$rev:<BR>\n";
2322    print "<CODE>$line</CODE><BR>";
2323}
2324
2325# Print the given string(s) to STDOUT and also return the
2326# output as a single string.
2327sub out {
2328    local $_ = join('', @_);
2329    print;
2330    $_;
2331}
2332
2333# Given an array of numbers, return a sorted unique list.
2334sub sortedUniqueInts {
2335    my @a = @_;
2336    my %a;
2337    foreach (@a) {
2338	s/^0+(\d)/$1/;
2339	$a{$_} = 1;
2340    }
2341    sort {$a<=>$b} keys %a;
2342}
2343
2344# Convert a revision number to a branch number.
2345# Generally this means dropping the last dotted integer, but if
2346# the last two dotted integers are 0.n, then the 0. must be dropped:
2347# 1.14.0.2 => 1.14.2.  (This is a magic CVS revision representing
2348# the branch.)  Also 'HEAD' is branch '1'.
2349sub revToBranch {
2350    local $_ = shift;
2351    s/\.0(\.\d+)$/$1/ || s/\.\d+$// || s/HEAD/1/;
2352    $_;
2353}
2354
2355# Given two CVS revisions, return a sequence of revisions traversing
2356# the logical path between them.
2357#
2358# WARNING!: The revisions must actually have a path between them.  If
2359# you pass in 1.4 => 1.2 or 1.5 => 1.2.2.4 the sub will run
2360# infinitely.
2361#
2362# @param low revision, e.g. 1.2 or 1.2.0.4
2363# @param high revision, e.g., 1.5.2.3
2364# @return an array of revisions from low to high inclusive
2365sub traverseRevisions {
2366    my $rev_lo = shift;
2367    my $rev_hi = shift;
2368    my @a = split(/\./, $rev_lo);
2369    my @limit = split(/\./, $rev_hi);
2370    my @list;
2371    for (;;) {
2372	push @list, join('.', @a);
2373	if (@a == @limit) {
2374	    last if ($a[-1] == $limit[-1]);
2375	    # Fall through
2376	} else {
2377	    my $a = join('.', @a);
2378	    if ($rev_hi =~ /^\Q$a\E\./) {
2379		push @a, $limit[@a];
2380		push @a, 1;
2381		next;
2382	    }
2383	    # Else fall through
2384	}
2385
2386	if ($a[-2] == 0) {
2387	    # Handle magic CVS revisions like 1.14.0.2
2388	    $a[-2] = $a[-1];
2389	    $a[-1] = 1;
2390	} else {
2391	    $a[-1]++;
2392	}
2393    }
2394    @list;
2395}
2396
2397# Given a CVS numeric revision, increment it (increment last integer)
2398sub incRev {
2399    local $_ = shift;
2400    if (/(\d+)$/) {
2401	my $i = $1 + 1;
2402	s/\d+$/$i/;
2403	return $_;
2404    }
2405    die "Can't increment $_";
2406}
2407
2408# Given a CVS numeric revisions, decrement it.  This handles
2409# branches.  If the resulting revision number goes to zero,
2410# return BASE_REV.  Does not handle magic revisions like 1.14.0.2.
2411# 1.3 => 1.2
2412# 1.3.2.1 => 1.3
2413# 1.3.2.2 => 1.3.2.1
2414sub decRev {
2415    local $_ = shift;
2416    if (/(\d+)$/) {
2417	my $i = $1 - 1;
2418	if ($i >= 1) {
2419	    s/\d+$/$i/;
2420	} elsif (s/(^1\.\d+)\.2\.1$/$1/) {
2421	    # 1.3.2.1 => 1.3
2422	} else {
2423	    return $BASE_REV;
2424	}
2425	return $_;
2426    }
2427    die "Can't decrement $_";
2428}
2429
2430# Given a date string, in CVS format, like "2003/05/29 22:10:17",
2431# return the duration $NOW - x, in days.
2432sub ageInDays {
2433    local $_ = shift;
2434    if (m|(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+)|) {
2435	my ($y,$m,$d,$H,$M,$S) = ($1,$2-1,$3,$4,$5,$6);
2436	if ($y =~ /^\d\d$/) {
2437	    $y = 100*int($YEAR / 100) + $y;
2438	    $y -= 100 if ($y > $YEAR);
2439	}
2440	return ($NOW - timelocal_nocheck($S,$M,$H,$d,$m,$y)) / 86400.0;
2441    } else {
2442	die "Can't parse date $_\n";
2443    }
2444}
2445
2446# Filter for which files we care about that don't have jitterbugs.
2447# Our rule is that if the checkin is over a year old, we don't care
2448# about it.  We used to also require the revision to be 1.1 or 1.1.1.1
2449# to be ignored, but we dropped this.
2450sub noJitterbugFilter {
2451    my $rev = shift;
2452    my $date = shift;
2453    #if ($rev eq '1.1' || $rev eq '1.1.1.1') {
2454	return ageInDays($date) <= 365.25;
2455    #}
2456    #1;
2457}
2458
2459# Execute a command, trapping errors.
2460# Options second arg: Path to a file to delete upon failure
2461sub command {
2462    my $cmd = shift;
2463    my $fileToDeleteOnFailure = shift;
2464
2465    my $err = "$CACHE/grepj.stderr";
2466    my $status = system($cmd . " 2> $err");
2467    if ($status != 0) {
2468	unlink($fileToDeleteOnFailure) if defined($fileToDeleteOnFailure);
2469	print "<HR><B>Fatal Error: "
2470	    . "\"$cmd\" exited with value "
2471	    . ($status >> 8)
2472	    . " (signal " . ($status & 127) . ")"
2473	    . (($status & 128) ? " (core dumped)" : "")
2474	    . "<BR></B>";
2475	print "stderr:<BR>";
2476	if (open(IN, $err)) {
2477	    while (<IN>) {
2478		print $_, "<BR>";
2479	    }
2480	    close(IN);
2481	}
2482	croak "Couldn't execute \"$cmd\"";
2483    }
2484}
2485
2486#eof
2487