1 /*
2  * Online help index routines for CUPS.
3  *
4  * Copyright © 2007-2019 by Apple Inc.
5  * Copyright © 1997-2007 by Easy Software Products.
6  *
7  * Licensed under Apache License v2.0.  See the file "LICENSE" for more
8  * information.
9  */
10 
11 /*
12  * Include necessary headers...
13  */
14 
15 #include "cgi-private.h"
16 #include <cups/dir.h>
17 
18 
19 /*
20  * List of common English words that should not be indexed...
21  */
22 
23 static char		help_common_words[][6] =
24 			{
25 			  "about",
26 			  "all",
27 			  "an",
28 			  "and",
29 			  "are",
30 			  "as",
31 			  "at",
32 			  "be",
33 			  "been",
34 			  "but",
35 			  "by",
36 			  "call",
37 			  "can",
38 			  "come",
39 			  "could",
40 			  "day",
41 			  "did",
42 			  "do",
43 			  "down",
44 			  "each",
45 			  "find",
46 			  "first",
47 			  "for",
48 			  "from",
49 			  "go",
50 			  "had",
51 			  "has",
52 			  "have",
53 			  "he",
54 			  "her",
55 			  "him",
56 			  "his",
57 			  "hot",
58 			  "how",
59 			  "if",
60 			  "in",
61 			  "is",
62 			  "it",
63 			  "know",
64 			  "like",
65 			  "long",
66 			  "look",
67 			  "make",
68 			  "many",
69 			  "may",
70 			  "more",
71 			  "most",
72 			  "my",
73 			  "no",
74 			  "now",
75 			  "of",
76 			  "on",
77 			  "one",
78 			  "or",
79 			  "other",
80 			  "out",
81 			  "over",
82 			  "said",
83 			  "see",
84 			  "she",
85 			  "side",
86 			  "so",
87 			  "some",
88 			  "sound",
89 			  "than",
90 			  "that",
91 			  "the",
92 			  "their",
93 			  "them",
94 			  "then",
95 			  "there",
96 			  "these",
97 			  "they",
98 			  "thing",
99 			  "this",
100 			  "time",
101 			  "to",
102 			  "two",
103 			  "up",
104 			  "use",
105 			  "was",
106 			  "water",
107 			  "way",
108 			  "we",
109 			  "were",
110 			  "what",
111 			  "when",
112 			  "which",
113 			  "who",
114 			  "will",
115 			  "with",
116 			  "word",
117 			  "would",
118 			  "write",
119 			  "you",
120 			  "your"
121 			};
122 
123 
124 /*
125  * Local functions...
126  */
127 
128 static help_word_t	*help_add_word(help_node_t *n, const char *text);
129 static void		help_delete_node(help_node_t *n);
130 static void		help_delete_word(help_word_t *w);
131 static int		help_load_directory(help_index_t *hi,
132 			                    const char *directory,
133 					    const char *relative);
134 static int		help_load_file(help_index_t *hi,
135 			               const char *filename,
136 				       const char *relative,
137 				       time_t     mtime);
138 static help_node_t	*help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
139 static int		help_sort_by_name(help_node_t *p1, help_node_t *p2);
140 static int		help_sort_by_score(help_node_t *p1, help_node_t *p2);
141 static int		help_sort_words(help_word_t *w1, help_word_t *w2);
142 
143 
144 /*
145  * 'helpDeleteIndex()' - Delete an index, freeing all memory used.
146  */
147 
148 void
helpDeleteIndex(help_index_t * hi)149 helpDeleteIndex(help_index_t *hi)	/* I - Help index */
150 {
151   help_node_t	*node;			/* Current node */
152 
153 
154   if (!hi)
155     return;
156 
157   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
158        node;
159        node = (help_node_t *)cupsArrayNext(hi->nodes))
160   {
161     if (!hi->search)
162       help_delete_node(node);
163   }
164 
165   cupsArrayDelete(hi->nodes);
166   cupsArrayDelete(hi->sorted);
167 
168   free(hi);
169 }
170 
171 
172 /*
173  * 'helpFindNode()' - Find a node in an index.
174  */
175 
176 help_node_t *				/* O - Node pointer or NULL */
helpFindNode(help_index_t * hi,const char * filename,const char * anchor)177 helpFindNode(help_index_t *hi,		/* I - Index */
178              const char   *filename,	/* I - Filename */
179              const char   *anchor)	/* I - Anchor */
180 {
181   help_node_t	key;			/* Search key */
182 
183 
184  /*
185   * Range check input...
186   */
187 
188   if (!hi || !filename)
189     return (NULL);
190 
191  /*
192   * Initialize the search key...
193   */
194 
195   key.filename = (char *)filename;
196   key.anchor   = (char *)anchor;
197 
198  /*
199   * Return any match...
200   */
201 
202   return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
203 }
204 
205 
206 /*
207  * 'helpLoadIndex()' - Load a help index from disk.
208  */
209 
210 help_index_t *				/* O - Index pointer or NULL */
helpLoadIndex(const char * hifile,const char * directory)211 helpLoadIndex(const char *hifile,	/* I - Index filename */
212               const char *directory)	/* I - Directory that is indexed */
213 {
214   help_index_t	*hi;			/* Help index */
215   cups_file_t	*fp;			/* Current file */
216   char		line[2048],		/* Line from file */
217 		*ptr,			/* Pointer into line */
218 		*filename,		/* Filename in line */
219 		*anchor,		/* Anchor in line */
220 		*sectptr,		/* Section pointer in line */
221 		section[1024],		/* Section name */
222 		*text;			/* Text in line */
223   time_t	mtime;			/* Modification time */
224   off_t		offset;			/* Offset into file */
225   size_t	length;			/* Length in bytes */
226   int		update;			/* Update? */
227   help_node_t	*node;			/* Current node */
228   help_word_t	*word;			/* Current word */
229 
230 
231  /*
232   * Create a new, empty index.
233   */
234 
235   if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
236     return (NULL);
237 
238   hi->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
239   hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
240 
241   if (!hi->nodes || !hi->sorted)
242   {
243     cupsArrayDelete(hi->nodes);
244     cupsArrayDelete(hi->sorted);
245     free(hi);
246     return (NULL);
247   }
248 
249  /*
250   * Try loading the existing index file...
251   */
252 
253   if ((fp = cupsFileOpen(hifile, "r")) != NULL)
254   {
255    /*
256     * Lock the file and then read the first line...
257     */
258 
259     cupsFileLock(fp, 1);
260 
261     if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
262     {
263      /*
264       * Got a valid header line, now read the data lines...
265       */
266 
267       node = NULL;
268 
269       while (cupsFileGets(fp, line, sizeof(line)))
270       {
271        /*
272 	* Each line looks like one of the following:
273 	*
274 	*     filename mtime offset length "section" "text"
275 	*     filename#anchor offset length "text"
276 	*     SP count word
277 	*/
278 
279         if (line[0] == ' ')
280 	{
281 	 /*
282 	  * Read a word in the current node...
283 	  */
284 
285           if (!node || (ptr = strrchr(line, ' ')) == NULL)
286 	    continue;
287 
288           if ((word = help_add_word(node, ptr + 1)) != NULL)
289 	    word->count = atoi(line + 1);
290         }
291 	else
292 	{
293 	 /*
294 	  * Add a node...
295 	  */
296 
297 	  filename = line;
298 
299 	  if ((ptr = strchr(line, ' ')) == NULL)
300             break;
301 
302 	  while (isspace(*ptr & 255))
303             *ptr++ = '\0';
304 
305 	  if ((anchor = strrchr(filename, '#')) != NULL)
306 	  {
307             *anchor++ = '\0';
308 	    mtime = 0;
309 	  }
310 	  else
311 	    mtime = strtol(ptr, &ptr, 10);
312 
313 	  offset = strtoll(ptr, &ptr, 10);
314 	  length = (size_t)strtoll(ptr, &ptr, 10);
315 
316 	  while (isspace(*ptr & 255))
317             ptr ++;
318 
319           if (!anchor)
320 	  {
321 	   /*
322 	    * Get section...
323 	    */
324 
325             if (*ptr != '\"')
326 	      break;
327 
328             ptr ++;
329 	    sectptr = ptr;
330 
331             while (*ptr && *ptr != '\"')
332 	      ptr ++;
333 
334             if (*ptr != '\"')
335 	      break;
336 
337             *ptr++ = '\0';
338 
339             strlcpy(section, sectptr, sizeof(section));
340 
341 	    while (isspace(*ptr & 255))
342               ptr ++;
343           }
344           else
345             section[0] = '\0';
346 
347           if (*ptr != '\"')
348 	    break;
349 
350           ptr ++;
351 	  text = ptr;
352 
353           while (*ptr && *ptr != '\"')
354 	    ptr ++;
355 
356           if (*ptr != '\"')
357 	    break;
358 
359           *ptr++ = '\0';
360 
361 	  if ((node = help_new_node(filename, anchor, section, text,
362 				    mtime, offset, length)) == NULL)
363             break;
364 
365 	  node->score = -1;
366 
367 	  cupsArrayAdd(hi->nodes, node);
368         }
369       }
370     }
371 
372     cupsFileClose(fp);
373   }
374 
375  /*
376   * Scan for new/updated files...
377   */
378 
379   update = help_load_directory(hi, directory, NULL);
380 
381  /*
382   * Remove any files that are no longer installed...
383   */
384 
385   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
386        node;
387        node = (help_node_t *)cupsArrayNext(hi->nodes))
388     if (node->score < 0)
389     {
390      /*
391       * Delete this node...
392       */
393 
394       cupsArrayRemove(hi->nodes, node);
395       help_delete_node(node);
396     }
397 
398  /*
399   * Add nodes to the sorted array...
400   */
401 
402   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
403        node;
404        node = (help_node_t *)cupsArrayNext(hi->nodes))
405     cupsArrayAdd(hi->sorted, node);
406 
407  /*
408   * Save the index if we updated it...
409   */
410 
411   if (update)
412     helpSaveIndex(hi, hifile);
413 
414  /*
415   * Return the index...
416   */
417 
418   return (hi);
419 }
420 
421 
422 /*
423  * 'helpSaveIndex()' - Save a help index to disk.
424  */
425 
426 int					/* O - 0 on success, -1 on error */
helpSaveIndex(help_index_t * hi,const char * hifile)427 helpSaveIndex(help_index_t *hi,		/* I - Index */
428               const char   *hifile)	/* I - Index filename */
429 {
430   cups_file_t	*fp;			/* Index file */
431   help_node_t	*node;			/* Current node */
432   help_word_t	*word;			/* Current word */
433 
434 
435  /*
436   * Try creating a new index file...
437   */
438 
439   if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
440     return (-1);
441 
442  /*
443   * Lock the file while we write it...
444   */
445 
446   cupsFileLock(fp, 1);
447 
448   cupsFilePuts(fp, "HELPV2\n");
449 
450   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
451        node;
452        node = (help_node_t *)cupsArrayNext(hi->nodes))
453   {
454    /*
455     * Write the current node with/without the anchor...
456     */
457 
458     if (node->anchor)
459     {
460       if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
461                          node->filename, node->anchor,
462                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
463 			 node->text) < 0)
464         break;
465     }
466     else
467     {
468       if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
469                          node->filename, (int)node->mtime,
470                          CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
471 			 node->section ? node->section : "", node->text) < 0)
472         break;
473     }
474 
475    /*
476     * Then write the words associated with the node...
477     */
478 
479     for (word = (help_word_t *)cupsArrayFirst(node->words);
480          word;
481 	 word = (help_word_t *)cupsArrayNext(node->words))
482       if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
483         break;
484   }
485 
486   cupsFileFlush(fp);
487 
488   if (cupsFileClose(fp) < 0)
489     return (-1);
490   else if (node)
491     return (-1);
492   else
493     return (0);
494 }
495 
496 
497 /*
498  * 'helpSearchIndex()' - Search an index.
499  */
500 
501 help_index_t *				/* O - Search index */
helpSearchIndex(help_index_t * hi,const char * query,const char * section,const char * filename)502 helpSearchIndex(help_index_t *hi,	/* I - Index */
503                 const char   *query,	/* I - Query string */
504 		const char   *section,	/* I - Limit search to this section */
505 		const char   *filename)	/* I - Limit search to this file */
506 {
507   help_index_t	*search;		/* Search index */
508   help_node_t	*node;			/* Current node */
509   help_word_t	*word;			/* Current word */
510   void		*sc;			/* Search context */
511   int		matches;		/* Number of matches */
512 
513 
514  /*
515   * Range check...
516   */
517 
518   if (!hi || !query)
519     return (NULL);
520 
521  /*
522   * Reset the scores of all nodes to 0...
523   */
524 
525   for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
526        node;
527        node = (help_node_t *)cupsArrayNext(hi->nodes))
528     node->score = 0;
529 
530  /*
531   * Find the first node to search in...
532   */
533 
534   if (filename)
535   {
536     node = helpFindNode(hi, filename, NULL);
537     if (!node)
538       return (NULL);
539   }
540   else
541     node = (help_node_t *)cupsArrayFirst(hi->nodes);
542 
543  /*
544   * Convert the query into a regular expression...
545   */
546 
547   sc = cgiCompileSearch(query);
548   if (!sc)
549     return (NULL);
550 
551  /*
552   * Allocate a search index...
553   */
554 
555   search = calloc(1, sizeof(help_index_t));
556   if (!search)
557   {
558     cgiFreeSearch(sc);
559     return (NULL);
560   }
561 
562   search->nodes  = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
563   search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
564 
565   if (!search->nodes || !search->sorted)
566   {
567     cupsArrayDelete(search->nodes);
568     cupsArrayDelete(search->sorted);
569     free(search);
570     cgiFreeSearch(sc);
571     return (NULL);
572   }
573 
574   search->search = 1;
575 
576  /*
577   * Check each node in the index, adding matching nodes to the
578   * search index...
579   */
580 
581   for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
582     if (section && strcmp(node->section, section))
583       continue;
584     else if (filename && strcmp(node->filename, filename))
585       continue;
586     else
587     {
588       matches = cgiDoSearch(sc, node->text);
589 
590       for (word = (help_word_t *)cupsArrayFirst(node->words);
591            word;
592 	   word = (help_word_t *)cupsArrayNext(node->words))
593         if (cgiDoSearch(sc, word->text) > 0)
594           matches += word->count;
595 
596       if (matches > 0)
597       {
598        /*
599 	* Found a match, add the node to the search index...
600 	*/
601 
602 	node->score = matches;
603 
604 	cupsArrayAdd(search->nodes, node);
605 	cupsArrayAdd(search->sorted, node);
606       }
607     }
608 
609  /*
610   * Free the search context...
611   */
612 
613   cgiFreeSearch(sc);
614 
615  /*
616   * Return the results...
617   */
618 
619   return (search);
620 }
621 
622 
623 /*
624  * 'help_add_word()' - Add a word to a node.
625  */
626 
627 static help_word_t *			/* O - New word */
help_add_word(help_node_t * n,const char * text)628 help_add_word(help_node_t *n,		/* I - Node */
629               const char  *text)	/* I - Word text */
630 {
631   help_word_t	*w,			/* New word */
632 		key;			/* Search key */
633 
634 
635  /*
636   * Create the words array as needed...
637   */
638 
639   if (!n->words)
640     n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
641 
642  /*
643   * See if the word is already added...
644   */
645 
646   key.text = (char *)text;
647 
648   if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
649   {
650    /*
651     * Create a new word...
652     */
653 
654     if ((w = calloc(1, sizeof(help_word_t))) == NULL)
655       return (NULL);
656 
657     if ((w->text = strdup(text)) == NULL)
658     {
659       free(w);
660       return (NULL);
661     }
662 
663     cupsArrayAdd(n->words, w);
664   }
665 
666  /*
667   * Bump the counter for this word and return it...
668   */
669 
670   w->count ++;
671 
672   return (w);
673 }
674 
675 
676 /*
677  * 'help_delete_node()' - Free all memory used by a node.
678  */
679 
680 static void
help_delete_node(help_node_t * n)681 help_delete_node(help_node_t *n)	/* I - Node */
682 {
683   help_word_t	*w;			/* Current word */
684 
685 
686   if (!n)
687     return;
688 
689   if (n->filename)
690     free(n->filename);
691 
692   if (n->anchor)
693     free(n->anchor);
694 
695   if (n->section)
696     free(n->section);
697 
698   if (n->text)
699     free(n->text);
700 
701   for (w = (help_word_t *)cupsArrayFirst(n->words);
702        w;
703        w = (help_word_t *)cupsArrayNext(n->words))
704     help_delete_word(w);
705 
706   cupsArrayDelete(n->words);
707 
708   free(n);
709 }
710 
711 
712 /*
713  * 'help_delete_word()' - Free all memory used by a word.
714  */
715 
716 static void
help_delete_word(help_word_t * w)717 help_delete_word(help_word_t *w)	/* I - Word */
718 {
719   if (!w)
720     return;
721 
722   if (w->text)
723     free(w->text);
724 
725   free(w);
726 }
727 
728 
729 /*
730  * 'help_load_directory()' - Load a directory of files into an index.
731  */
732 
733 static int				/* O - 0 = success, -1 = error, 1 = updated */
help_load_directory(help_index_t * hi,const char * directory,const char * relative)734 help_load_directory(
735     help_index_t *hi,			/* I - Index */
736     const char   *directory,		/* I - Directory */
737     const char   *relative)		/* I - Relative path */
738 {
739   cups_dir_t	*dir;			/* Directory file */
740   cups_dentry_t	*dent;			/* Directory entry */
741   char		*ext,			/* Pointer to extension */
742 		filename[1024],		/* Full filename */
743 		relname[1024];		/* Relative filename */
744   int		update;			/* Updated? */
745   help_node_t	*node;			/* Current node */
746 
747 
748  /*
749   * Open the directory and scan it...
750   */
751 
752   if ((dir = cupsDirOpen(directory)) == NULL)
753     return (0);
754 
755   update = 0;
756 
757   while ((dent = cupsDirRead(dir)) != NULL)
758   {
759    /*
760     * Skip "." files...
761     */
762 
763     if (dent->filename[0] == '.')
764       continue;
765 
766    /*
767     * Get absolute and relative filenames...
768     */
769 
770     snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
771     if (relative)
772       snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
773     else
774       strlcpy(relname, dent->filename, sizeof(relname));
775 
776    /*
777     * Check if we have a HTML file...
778     */
779 
780     if ((ext = strstr(dent->filename, ".html")) != NULL &&
781         (!ext[5] || !strcmp(ext + 5, ".gz")))
782     {
783      /*
784       * HTML file, see if we have already indexed the file...
785       */
786 
787       if ((node = helpFindNode(hi, relname, NULL)) != NULL)
788       {
789        /*
790         * File already indexed - check dates to confirm that the
791 	* index is up-to-date...
792 	*/
793 
794         if (node->mtime == dent->fileinfo.st_mtime)
795 	{
796 	 /*
797 	  * Same modification time, so mark all of the nodes
798 	  * for this file as up-to-date...
799 	  */
800 
801           for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
802 	    if (!strcmp(node->filename, relname))
803 	      node->score = 0;
804 	    else
805 	      break;
806 
807           continue;
808 	}
809       }
810 
811       update = 1;
812 
813       help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
814     }
815     else if (S_ISDIR(dent->fileinfo.st_mode))
816     {
817      /*
818       * Process sub-directory...
819       */
820 
821       if (help_load_directory(hi, filename, relname) == 1)
822         update = 1;
823     }
824   }
825 
826   cupsDirClose(dir);
827 
828   return (update);
829 }
830 
831 
832 /*
833  * 'help_load_file()' - Load a HTML files into an index.
834  */
835 
836 static int				/* O - 0 = success, -1 = error */
help_load_file(help_index_t * hi,const char * filename,const char * relative,time_t mtime)837 help_load_file(
838     help_index_t *hi,			/* I - Index */
839     const char   *filename,		/* I - Filename */
840     const char   *relative,		/* I - Relative path */
841     time_t       mtime)			/* I - Modification time */
842 {
843   cups_file_t	*fp;			/* HTML file */
844   help_node_t	*node;			/* Current node */
845   char		line[1024],		/* Line from file */
846 		temp[1024],		/* Temporary word */
847                 section[1024],		/* Section */
848 		*ptr,			/* Pointer into line */
849 		*anchor,		/* Anchor name */
850 		*text;			/* Text for anchor */
851   off_t		offset;			/* File offset */
852   char		quote;			/* Quote character */
853   help_word_t	*word;			/* Current word */
854   int		wordlen;		/* Length of word */
855 
856 
857   if ((fp = cupsFileOpen(filename, "r")) == NULL)
858     return (-1);
859 
860   node   = NULL;
861   offset = 0;
862 
863   strlcpy(section, "Other", sizeof(section));
864 
865   while (cupsFileGets(fp, line, sizeof(line)))
866   {
867    /*
868     * Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
869     */
870 
871     if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
872     {
873      /*
874       * Got section line, copy it!
875       */
876 
877       for (ptr += 13; isspace(*ptr & 255); ptr ++);
878 
879       strlcpy(section, ptr, sizeof(section));
880       if ((ptr = strstr(section, "-->")) != NULL)
881       {
882        /*
883         * Strip comment stuff from end of line...
884 	*/
885 
886         for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
887 
888 	if (isspace(*ptr & 255))
889 	  *ptr = '\0';
890       }
891       continue;
892     }
893 
894     for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
895     {
896       ptr ++;
897 
898       if (!_cups_strncasecmp(ptr, "TITLE>", 6))
899       {
900        /*
901         * Found the title...
902 	*/
903 
904 	anchor = NULL;
905 	ptr += 6;
906       }
907       else
908       {
909         char *idptr;			/* Pointer to ID */
910 
911 	if (!_cups_strncasecmp(ptr, "A NAME=", 7))
912 	  ptr += 7;
913 	else if ((idptr = strstr(ptr, " ID=")) != NULL)
914 	  ptr = idptr + 4;
915 	else if ((idptr = strstr(ptr, " id=")) != NULL)
916 	  ptr = idptr + 4;
917 	else
918 	  continue;
919 
920        /*
921         * Found an anchor...
922 	*/
923 
924 	if (*ptr == '\"' || *ptr == '\'')
925 	{
926 	 /*
927 	  * Get quoted anchor...
928 	  */
929 
930 	  quote  = *ptr;
931           anchor = ptr + 1;
932 	  if ((ptr = strchr(anchor, quote)) != NULL)
933 	    *ptr++ = '\0';
934 	  else
935 	    break;
936 	}
937 	else
938 	{
939 	 /*
940 	  * Get unquoted anchor...
941 	  */
942 
943           anchor = ptr + 1;
944 
945 	  for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
946 
947 	  if (*ptr != '>')
948 	    *ptr++ = '\0';
949 	  else
950 	    break;
951 	}
952 
953        /*
954         * Got the anchor, now lets find the end...
955 	*/
956 
957         while (*ptr && *ptr != '>')
958 	  ptr ++;
959 
960         if (*ptr != '>')
961 	  break;
962 
963         *ptr++ = '\0';
964       }
965 
966      /*
967       * Now collect text for the link...
968       */
969 
970       text = ptr;
971       while ((ptr = strchr(text, '<')) == NULL)
972       {
973 	ptr = text + strlen(text);
974 	if (ptr >= (line + sizeof(line) - 2))
975 	  break;
976 
977         *ptr++ = ' ';
978 
979         if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
980 	  break;
981       }
982 
983       *ptr = '\0';
984 
985       if (node)
986 	node->length = (size_t)(offset - node->offset);
987 
988       if (!*text)
989       {
990         node = NULL;
991         break;
992       }
993 
994       if ((node = helpFindNode(hi, relative, anchor)) != NULL)
995       {
996        /*
997 	* Node already in the index, so replace the text and other
998 	* data...
999 	*/
1000 
1001         cupsArrayRemove(hi->nodes, node);
1002 
1003         if (node->section)
1004 	  free(node->section);
1005 
1006 	if (node->text)
1007 	  free(node->text);
1008 
1009         if (node->words)
1010 	{
1011 	  for (word = (help_word_t *)cupsArrayFirst(node->words);
1012 	       word;
1013 	       word = (help_word_t *)cupsArrayNext(node->words))
1014 	    help_delete_word(word);
1015 
1016 	  cupsArrayDelete(node->words);
1017 	  node->words = NULL;
1018 	}
1019 
1020 	node->section = section[0] ? strdup(section) : NULL;
1021 	node->text    = strdup(text);
1022 	node->mtime   = mtime;
1023 	node->offset  = offset;
1024 	node->score   = 0;
1025       }
1026       else
1027       {
1028        /*
1029 	* New node...
1030 	*/
1031 
1032         node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
1033       }
1034 
1035      /*
1036       * Go through the text value and replace tabs and newlines with
1037       * whitespace and eliminate extra whitespace...
1038       */
1039 
1040       for (ptr = node->text, text = node->text; *ptr;)
1041 	if (isspace(*ptr & 255))
1042 	{
1043 	  while (isspace(*ptr & 255))
1044 	    ptr ++;
1045 
1046 	  *text++ = ' ';
1047         }
1048 	else if (text != ptr)
1049 	  *text++ = *ptr++;
1050 	else
1051 	{
1052 	  text ++;
1053 	  ptr ++;
1054 	}
1055 
1056       *text = '\0';
1057 
1058      /*
1059       * (Re)add the node to the array...
1060       */
1061 
1062       cupsArrayAdd(hi->nodes, node);
1063 
1064       if (!anchor)
1065         node = NULL;
1066       break;
1067     }
1068 
1069     if (node)
1070     {
1071      /*
1072       * Scan this line for words...
1073       */
1074 
1075       for (ptr = line; *ptr; ptr ++)
1076       {
1077        /*
1078 	* Skip HTML stuff...
1079 	*/
1080 
1081 	if (*ptr == '<')
1082 	{
1083           if (!strncmp(ptr, "<!--", 4))
1084 	  {
1085 	   /*
1086 	    * Skip HTML comment...
1087 	    */
1088 
1089             if ((text = strstr(ptr + 4, "-->")) == NULL)
1090 	      ptr += strlen(ptr) - 1;
1091 	    else
1092 	      ptr = text + 2;
1093 	  }
1094 	  else
1095 	  {
1096 	   /*
1097             * Skip HTML element...
1098 	    */
1099 
1100             for (ptr ++; *ptr && *ptr != '>'; ptr ++)
1101 	    {
1102 	      if (*ptr == '\"' || *ptr == '\'')
1103 	      {
1104 		for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
1105 
1106 		if (!*ptr)
1107 		  ptr --;
1108 	      }
1109 	    }
1110 
1111 	    if (!*ptr)
1112 	      ptr --;
1113           }
1114 
1115           continue;
1116 	}
1117 	else if (*ptr == '&')
1118 	{
1119 	 /*
1120 	  * Skip HTML entity...
1121 	  */
1122 
1123 	  for (ptr ++; *ptr && *ptr != ';'; ptr ++);
1124 
1125 	  if (!*ptr)
1126 	    ptr --;
1127 
1128 	  continue;
1129 	}
1130 	else if (!isalnum(*ptr & 255))
1131           continue;
1132 
1133        /*
1134 	* Found the start of a word, search until we find the end...
1135 	*/
1136 
1137 	for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
1138 
1139 	wordlen = (int)(ptr - text);
1140 
1141         memcpy(temp, text, (size_t)wordlen);
1142 	temp[wordlen] = '\0';
1143 
1144         ptr --;
1145 
1146 	if (wordlen > 1 && !bsearch(temp, help_common_words,
1147 	                            (sizeof(help_common_words) /
1148 				     sizeof(help_common_words[0])),
1149 				    sizeof(help_common_words[0]),
1150 				    (int (*)(const void *, const void *))
1151 				        _cups_strcasecmp))
1152           help_add_word(node, temp);
1153       }
1154     }
1155 
1156    /*
1157     * Get the offset of the next line...
1158     */
1159 
1160     offset = cupsFileTell(fp);
1161   }
1162 
1163   cupsFileClose(fp);
1164 
1165   if (node)
1166     node->length = (size_t)(offset - node->offset);
1167 
1168   return (0);
1169 }
1170 
1171 
1172 /*
1173  * 'help_new_node()' - Create a new node and add it to an index.
1174  */
1175 
1176 static help_node_t *			/* O - Node pointer or NULL on error */
help_new_node(const char * filename,const char * anchor,const char * section,const char * text,time_t mtime,off_t offset,size_t length)1177 help_new_node(const char   *filename,	/* I - Filename */
1178               const char   *anchor,	/* I - Anchor */
1179 	      const char   *section,	/* I - Section */
1180 	      const char   *text,	/* I - Text */
1181 	      time_t       mtime,	/* I - Modification time */
1182               off_t        offset,	/* I - Offset in file */
1183 	      size_t       length)	/* I - Length in bytes */
1184 {
1185   help_node_t	*n;			/* Node */
1186 
1187 
1188   n = (help_node_t *)calloc(1, sizeof(help_node_t));
1189   if (!n)
1190     return (NULL);
1191 
1192   n->filename = strdup(filename);
1193   n->anchor   = anchor ? strdup(anchor) : NULL;
1194   n->section  = (section && *section) ? strdup(section) : NULL;
1195   n->text     = strdup(text);
1196   n->mtime    = mtime;
1197   n->offset   = offset;
1198   n->length   = length;
1199 
1200   return (n);
1201 }
1202 
1203 
1204 /*
1205  * 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
1206  */
1207 
1208 static int				/* O - Difference */
help_sort_by_name(help_node_t * n1,help_node_t * n2)1209 help_sort_by_name(help_node_t *n1,	/* I - First node */
1210                   help_node_t *n2)	/* I - Second node */
1211 {
1212   int		diff;			/* Difference */
1213 
1214 
1215   if ((diff = strcmp(n1->filename, n2->filename)) != 0)
1216     return (diff);
1217 
1218   if (!n1->anchor && !n2->anchor)
1219     return (0);
1220   else if (!n1->anchor)
1221     return (-1);
1222   else if (!n2->anchor)
1223     return (1);
1224   else
1225     return (strcmp(n1->anchor, n2->anchor));
1226 }
1227 
1228 
1229 /*
1230  * 'help_sort_nodes_by_score()' - Sort nodes by score and text.
1231  */
1232 
1233 static int				/* O - Difference */
help_sort_by_score(help_node_t * n1,help_node_t * n2)1234 help_sort_by_score(help_node_t *n1,	/* I - First node */
1235                    help_node_t *n2)	/* I - Second node */
1236 {
1237   int		diff;			/* Difference */
1238 
1239 
1240   if (n1->score != n2->score)
1241     return (n2->score - n1->score);
1242 
1243   if (n1->section && !n2->section)
1244     return (1);
1245   else if (!n1->section && n2->section)
1246     return (-1);
1247   else if (n1->section && n2->section &&
1248            (diff = strcmp(n1->section, n2->section)) != 0)
1249     return (diff);
1250 
1251   return (_cups_strcasecmp(n1->text, n2->text));
1252 }
1253 
1254 
1255 /*
1256  * 'help_sort_words()' - Sort words alphabetically.
1257  */
1258 
1259 static int				/* O - Difference */
help_sort_words(help_word_t * w1,help_word_t * w2)1260 help_sort_words(help_word_t *w1,	/* I - Second word */
1261                 help_word_t *w2)	/* I - Second word */
1262 {
1263   return (_cups_strcasecmp(w1->text, w2->text));
1264 }
1265