1 /*
2  * Copyright (C) 2013 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.doclava;
18 
19 import java.io.*;
20 import java.text.BreakIterator;
21 import java.util.ArrayList;
22 import java.util.Collections;
23 import java.util.Comparator;
24 import java.util.List;
25 import java.util.regex.Pattern;
26 import java.util.regex.Matcher;
27 import java.io.File;
28 
29 import com.google.clearsilver.jsilver.data.Data;
30 
31 import org.ccil.cowan.tagsoup.*;
32 import org.xml.sax.XMLReader;
33 import org.xml.sax.InputSource;
34 import org.xml.sax.Attributes;
35 import org.xml.sax.helpers.DefaultHandler;
36 
37 import org.w3c.dom.Node;
38 import org.w3c.dom.NodeList;
39 
40 import javax.xml.transform.dom.DOMResult;
41 import javax.xml.transform.sax.SAXSource;
42 import javax.xml.transform.Transformer;
43 import javax.xml.transform.TransformerFactory;
44 import javax.xml.xpath.XPath;
45 import javax.xml.xpath.XPathConstants;
46 import javax.xml.xpath.XPathExpression;
47 import javax.xml.xpath.XPathFactory;
48 
49 /**
50 * Metadata associated with a specific documentation page. Extracts
51 * metadata based on the page's declared hdf vars (meta.tags and others)
52 * as well as implicit data relating to the page, such as url, type, etc.
53 * Includes a Node class that represents the metadata and lets it attach
54 * to parent/child elements in the tree metadata nodes for all pages.
55 * Node also includes methods for rendering the node tree to a json file
56 * in docs output, which is then used by JavaScript to load metadata
57 * objects into html pages.
58 */
59 
60 public class PageMetadata {
61   File mSource;
62   String mDest;
63   String mTagList;
64   static boolean sLowercaseTags = true;
65   static boolean sLowercaseKeywords = true;
66   //static String linkPrefix = (Doclava.META_DBG) ? "/" : "http://developer.android.com/";
67   /**
68    * regex pattern to match javadoc @link and similar tags. Extracts
69    * root symbol to $1.
70    */
71   private static final Pattern JD_TAG_PATTERN =
72       Pattern.compile("\\{@.*?[\\s\\.\\#]([A-Za-z\\(\\)\\d_]+)(?=\u007D)\u007D");
73 
PageMetadata(File source, String dest, List<Node> taglist)74   public PageMetadata(File source, String dest, List<Node> taglist) {
75     mSource = source;
76     mDest = dest;
77 
78     if (dest != null) {
79       int len = dest.length();
80       if (len > 1 && dest.charAt(len - 1) != '/') {
81         mDest = dest + '/';
82       } else {
83         mDest = dest;
84       }
85     }
86   }
87 
88   /**
89   * Given a list of metadata nodes organized by type, sort the
90   * root nodes by type name and render the types and their child
91   * metadata nodes to a json file in the out dir.
92   *
93   * @param rootTypeNodesList A list of root metadata nodes, each
94   *        representing a type and it's member child pages.
95   * @deprecated
96   */
WriteList(List<Node> rootTypeNodesList)97   public static void WriteList(List<Node> rootTypeNodesList) {
98     Collections.sort(rootTypeNodesList, BY_TYPE_NAME);
99     Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(rootTypeNodesList).build();
100 
101     StringBuilder buf = new StringBuilder();
102     // write the taglist to string format
103     pageMeta.renderTypeResources(buf);
104     pageMeta.renderTypesByTag(buf);
105     // write the taglist to js file
106     Data data = Doclava.makeHDF();
107     data.setValue("reference_tree", buf.toString());
108     ClearPage.write(data, "jd_lists_unified.cs",  "jd_lists_unified.js");
109   }
110 
111   /**
112   * Given a list of metadata nodes organized by lang, sort the
113   * root nodes by type name and render the types and their child
114   * metadata nodes to separate lang-specific json files in the out dir.
115   *
116   * @param rootNodesList A list of root metadata nodes, each
117   *        representing a type and it's member child pages.
118   */
WriteListByLang(List<Node> rootNodesList)119   public static void WriteListByLang(List<Node> rootNodesList) {
120     Collections.sort(rootNodesList, BY_LANG_NAME);
121     for (Node n : rootNodesList) {
122       String langFilename = "";
123       String langname = n.getLang();
124       langFilename = "_" + langname;
125       Collections.sort(n.getChildren(), BY_TYPE_NAME);
126       Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(n.getChildren()).build();
127 
128       StringBuilder buf = new StringBuilder();
129       // write the taglist to string format
130       pageMeta.renderLangResources(buf,langname);
131       //pageMeta.renderTypesByTag(buf);
132       // write the taglist to js file
133       Data data = Doclava.makeHDF();
134       data.setValue("reference_tree", buf.toString());
135       data.setValue("metadata.lang", langname);
136       String unifiedFilename = "jd_lists_unified" + langFilename + ".js";
137       String extrasFilename = "jd_extras" + langFilename + ".js";
138       // write out jd_lists_unified for each lang
139       ClearPage.write(data, "jd_lists_unified.cs", unifiedFilename);
140       // append jd_extras to jd_lists_unified for each lang, then delete.
141       appendExtrasMetadata(extrasFilename, unifiedFilename);
142     }
143   }
144 
145   /**
146   * Given a list of metadata nodes organized by lang, sort the
147   * root nodes by type name and render the types and their child
148   * samples metadata nodes only to separate lang-specific json files
149   * in the out dir. Only used by devsite (ds) builds.
150   *
151   * @param rootNodesList A list of root metadata nodes, each
152   *        representing a type and it's member child pages.
153   */
WriteSamplesListByLang(List<Node> rootNodesList)154   public static void WriteSamplesListByLang(List<Node> rootNodesList) {
155     Collections.sort(rootNodesList, BY_LANG_NAME);
156     for (Node n : rootNodesList) {
157       boolean langHasSamples = false;
158       String langFilename = "";
159       String langname = n.getLang();
160       langFilename = "_" + langname;
161       Collections.sort(n.getChildren(), BY_TYPE_NAME);
162       Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(n.getChildren()).build();
163 
164       StringBuilder buf = new StringBuilder();
165       // write the taglist to string format
166       langHasSamples = pageMeta.renderSamplesResources(buf,langname);
167       // write the taglist to js file
168       Data data = Doclava.makeHDF();
169       data.setValue("reference_tree", buf.toString());
170       data.setValue("metadata.lang", langname);
171 
172       if (langHasSamples) {
173         data.setValue("samples_only", "1");
174         // write out jd_lists_unified for each lang
175         String unifiedFilename = "android_samples_metadata" + langFilename + ".js";
176         ClearPage.write(data, "jd_lists_unified.cs", unifiedFilename);
177       }
178     }
179   }
180 
181   /**
182   * Extract supported metadata values from a page and add them as
183   * a child node of a root node based on type. Some metadata values
184   * are normalized. Unsupported metadata fields are ignored. See
185   * Node for supported metadata fields and methods for accessing values.
186   *
187   * @param docfile The file from which to extract metadata.
188   * @param dest The output path for the file, used to set link to page.
189   * @param filename The file from which to extract metadata.
190   * @param hdf Data object in which to store the metadata values.
191   * @param tagList The file from which to extract metadata.
192   */
setPageMetadata(String docfile, String dest, String filename, Data hdf, List<Node> tagList)193   public static void setPageMetadata(String docfile, String dest, String filename,
194       Data hdf, List<Node> tagList) {
195     //exclude this page if author does not want it included
196     boolean excludeNode = "true".equals(hdf.getValue("excludeFromSuggestions",""));
197 
198     //check whether summary and image exist and if not, get them from itemprop/markup
199     Boolean needsSummary = "".equals(hdf.getValue("page.metaDescription", ""));
200     Boolean needsImage = "".equals(hdf.getValue("page.image", ""));
201     if ((needsSummary) || (needsImage)) {
202       //try to extract the metadata from itemprop and markup
203       inferMetadata(docfile, hdf, needsSummary, needsImage);
204     }
205 
206     //extract available metadata and set it in a node
207     if (!excludeNode) {
208       Node pageMeta = new Node.Builder().build();
209       pageMeta.setLabel(getTitleNormalized(hdf, "page.title"));
210       pageMeta.setCategory(hdf.getValue("page.category",""));
211       pageMeta.setSummary(hdf.getValue("page.metaDescription",""));
212       pageMeta.setLink(getPageUrlNormalized(filename));
213       pageMeta.setGroup(getStringValueNormalized(hdf,"sample.group"));
214       pageMeta.setKeywords(getPageTagsNormalized(hdf, "page.tags"));
215       pageMeta.setTags(getPageTagsNormalized(hdf, "meta.tags"));
216       pageMeta.setImage(getImageUrlNormalized(hdf.getValue("page.image", "")));
217       pageMeta.setLang(getLangStringNormalized(hdf, filename));
218       pageMeta.setType(getStringValueNormalized(hdf, "page.type"));
219       pageMeta.setTimestamp(hdf.getValue("page.timestamp",""));
220       appendMetaNodeByLang(pageMeta, tagList);
221     }
222   }
223 
224   /**
225   * Attempt to infer page metadata based on the contents of the
226   * file. Load and parse the file as a dom tree. Select values
227   * in this order: 1. dom node specifically tagged with
228   * microdata (itemprop). 2. first qualitifed p or img node.
229   *
230   * @param docfile The file from which to extract metadata.
231   * @param hdf Data object in which to store the metadata values.
232   * @param needsSummary Whether to extract summary metadata.
233   * @param needsImage Whether to extract image metadata.
234   */
inferMetadata(String docfile, Data hdf, Boolean needsSummary, Boolean needsImage)235   public static void inferMetadata(String docfile, Data hdf,
236       Boolean needsSummary, Boolean needsImage) {
237     String sum = "";
238     String imageUrl = "";
239     String sumFrom = needsSummary ? "none" : "hdf";
240     String imgFrom = needsImage ? "none" : "hdf";
241     String filedata = hdf.getValue("commentText", "");
242     if (Doclava.META_DBG) System.out.println("----- " + docfile + "\n");
243 
244     try {
245       XPathFactory xpathFac = XPathFactory.newInstance();
246       XPath xpath = xpathFac.newXPath();
247       InputStream inputStream = new ByteArrayInputStream(filedata.getBytes());
248       XMLReader reader = new Parser();
249       reader.setFeature(Parser.namespacesFeature, false);
250       reader.setFeature(Parser.namespacePrefixesFeature, false);
251       reader.setFeature(Parser.ignoreBogonsFeature, true);
252 
253       Transformer transformer = TransformerFactory.newInstance().newTransformer();
254       DOMResult result = new DOMResult();
255       transformer.transform(new SAXSource(reader, new InputSource(inputStream)), result);
256       org.w3c.dom.Node htmlNode = result.getNode();
257 
258       if (needsSummary) {
259         StringBuilder sumStrings = new StringBuilder();
260         XPathExpression ItempropDescExpr = xpath.compile("/descendant-or-self::*"
261             + "[@itemprop='description'][1]//text()[string(.)]");
262         org.w3c.dom.NodeList nodes = (org.w3c.dom.NodeList) ItempropDescExpr.evaluate(htmlNode,
263             XPathConstants.NODESET);
264         if (nodes.getLength() > 0) {
265           for (int i = 0; i < nodes.getLength(); i++) {
266             String tx = nodes.item(i).getNodeValue();
267             sumStrings.append(tx);
268             sumFrom = "itemprop";
269           }
270         } else {
271           XPathExpression FirstParaExpr = xpath.compile("//p[not(../../../"
272               + "@class='notice-developers') and not(../@class='sidebox')"
273               + "and not(@class)]//text()");
274           nodes = (org.w3c.dom.NodeList) FirstParaExpr.evaluate(htmlNode, XPathConstants.NODESET);
275           if (nodes.getLength() > 0) {
276             for (int i = 0; i < nodes.getLength(); i++) {
277               String tx = nodes.item(i).getNodeValue();
278               sumStrings.append(tx + " ");
279               sumFrom = "markup";
280             }
281           }
282         }
283         //found a summary string, now normalize it
284         sum = sumStrings.toString().trim();
285         if ((sum != null) && (!"".equals(sum))) {
286           sum = getSummaryNormalized(sum);
287         }
288         //normalized summary ended up being too short to be meaningful
289         if ("".equals(sum)) {
290            if (Doclava.META_DBG) System.out.println("Warning: description too short! ("
291             + sum.length() + "chars) ...\n\n");
292         }
293         //summary looks good, store it to the file hdf data
294         hdf.setValue("page.metaDescription", sum);
295       }
296       if (needsImage) {
297         XPathExpression ItempropImageExpr = xpath.compile("//*[@itemprop='image']/@src");
298         org.w3c.dom.NodeList imgNodes = (org.w3c.dom.NodeList) ItempropImageExpr.evaluate(htmlNode,
299             XPathConstants.NODESET);
300         if (imgNodes.getLength() > 0) {
301           imageUrl = imgNodes.item(0).getNodeValue();
302           imageUrl = getImageUrlNormalized(imageUrl);
303           imgFrom = "itemprop";
304         } else {
305           XPathExpression FirstImgExpr = xpath.compile("//img/@src");
306           imgNodes = (org.w3c.dom.NodeList) FirstImgExpr.evaluate(htmlNode, XPathConstants.NODESET);
307           if (imgNodes.getLength() > 0) {
308             //iterate nodes looking for valid image url and normalize.
309             for (int i = 0; i < imgNodes.getLength(); i++) {
310               String tx = imgNodes.item(i).getNodeValue();
311               //qualify and normalize the image
312               imageUrl = getImageUrlNormalized(tx);
313               //this img src did not qualify, keep looking...
314               if ("".equals(imageUrl)) {
315                 if (Doclava.META_DBG) System.out.println("    >>>>> Discarded image: " + tx);
316                 continue;
317               } else {
318                 imgFrom = "markup";
319                 break;
320               }
321             }
322           }
323         }
324         //img src url looks good, store it to the file hdf data
325         hdf.setValue("page.image", imageUrl);
326       }
327       if (Doclava.META_DBG) System.out.println("Image (" + imgFrom + "): " + imageUrl);
328       if (Doclava.META_DBG) System.out.println("Summary (" + sumFrom + "): " + sum.length()
329           + " chars\n\n" + sum + "\n");
330       return;
331 
332     } catch (Exception e) {
333       if (Doclava.META_DBG) System.out.println("    >>>>> Exception: " + e + "\n");
334     }
335   }
336 
337   /**
338   * Normalize a comma-delimited, multi-string value. Split on commas, remove
339   * quotes, trim whitespace, optionally make keywords/tags lowercase for
340   * easier matching.
341   *
342   * @param hdf Data object in which the metadata values are stored.
343   * @param tag The hdf var from which the metadata was extracted.
344   * @return A normalized string value for the specified tag.
345   */
getPageTagsNormalized(Data hdf, String tag)346   public static String getPageTagsNormalized(Data hdf, String tag) {
347 
348     String normTags = "";
349     StringBuilder tags = new StringBuilder();
350     String tagList = hdf.getValue(tag, "");
351     if (tag.equals("meta.tags") && (tagList.equals(""))) {
352       //use keywords as tags if no meta tags are available
353       tagList = hdf.getValue("page.tags", "");
354     }
355     if (!tagList.equals("")) {
356       tagList = tagList.replaceAll("\"", "");
357 
358       String[] tagParts = tagList.split("[,\u3001]");
359       for (int iter = 0; iter < tagParts.length; iter++) {
360         tags.append("\"");
361         if (tag.equals("meta.tags") && sLowercaseTags) {
362           tagParts[iter] = tagParts[iter].toLowerCase();
363         } else if (tag.equals("page.tags") && sLowercaseKeywords) {
364           tagParts[iter] = tagParts[iter].toLowerCase();
365         }
366         if (tag.equals("meta.tags")) {
367           //tags.append("#"); //to match hashtag format used with yt/blogger resources
368           tagParts[iter] = tagParts[iter].replaceAll(" ","");
369         }
370         tags.append(tagParts[iter].trim());
371         tags.append("\"");
372         if (iter < tagParts.length - 1) {
373           tags.append(",");
374         }
375       }
376     }
377     //write this back to hdf to expose through js
378     if (tag.equals("meta.tags")) {
379       hdf.setValue(tag, tags.toString());
380     }
381     return tags.toString();
382   }
383 
384   /**
385   * Normalize a string for which only a single value is supported.
386   * Extract the string up to the first comma, remove quotes, remove
387   * any forward-slash prefix, trim any whitespace, optionally make
388   * lowercase for easier matching.
389   *
390   * @param hdf Data object in which the metadata values are stored.
391   * @param tag The hdf var from which the metadata should be extracted.
392   * @return A normalized string value for the specified tag.
393   */
getStringValueNormalized(Data hdf, String tag)394   public static String getStringValueNormalized(Data hdf, String tag) {
395     StringBuilder outString =  new StringBuilder();
396     String tagList = hdf.getValue(tag, "");
397     tagList.replaceAll("\"", "");
398     if ("".equals(tagList)) {
399       return tagList;
400     } else {
401       int end = tagList.indexOf(",");
402       if (end != -1) {
403         tagList = tagList.substring(0,end);
404       }
405       tagList = tagList.startsWith("/") ? tagList.substring(1) : tagList;
406       if ("sample.group".equals(tag) && sLowercaseTags) {
407         tagList = tagList.toLowerCase();
408       }
409       outString.append(tagList.trim());
410       return outString.toString();
411     }
412   }
413 
414   /**
415   * Normalize a page title. Extract the string, remove quotes, remove
416   * markup, and trim any whitespace.
417   *
418   * @param hdf Data object in which the metadata values are stored.
419   * @param tag The hdf var from which the metadata should be extracted.
420   * @return A normalized string value for the specified tag.
421   */
getTitleNormalized(Data hdf, String tag)422   public static String getTitleNormalized(Data hdf, String tag) {
423     StringBuilder outTitle =  new StringBuilder();
424     String title = hdf.getValue(tag, "");
425     if (!title.isEmpty()) {
426       title = escapeString(title);
427       if (title.indexOf("<span") != -1) {
428         String[] splitTitle = title.split("<span(.*?)</span>");
429         title = splitTitle[0];
430         for (int j = 1; j < splitTitle.length; j++) {
431           title.concat(splitTitle[j]);
432         }
433       }
434       outTitle.append(title.trim());
435     }
436     return outTitle.toString();
437   }
438 
439   /**
440   * Extract and normalize a page's language string based on the
441   * lowercased dir path. Non-supported langs are ignored and assigned
442   * the default lang string of "en".
443   *
444   * @param filename A path string to the file relative to root.
445   * @return A normalized lang value.
446   */
getLangStringNormalized(Data data, String filename)447   public static String getLangStringNormalized(Data data, String filename) {
448     String[] stripStr = filename.toLowerCase().split("\\/", 3);
449     String outFrag = "en";
450     String pathCanonical = filename;
451     if (stripStr.length > 0) {
452       for (String t : DocFile.DEVSITE_VALID_LANGS) {
453         if ("intl".equals(stripStr[0])) {
454           if (t.equals(stripStr[1])) {
455             outFrag = stripStr[1];
456             //extract the root url (exclusive of intl/nn)
457             pathCanonical = stripStr[2];
458             break;
459           }
460         }
461       }
462     }
463     //extract the root url (exclusive of intl/nn)
464     data.setValue("path.canonical", pathCanonical);
465     return outFrag;
466   }
467 
468   /**
469   * Normalize a page summary string and truncate as needed. Strings
470   * exceeding max_chars are truncated at the first word boundary
471   * following the max_size marker. Strings smaller than min_chars
472   * are discarded (as they are assumed to be too little context).
473   *
474   * @param s String extracted from the page as it's summary.
475   * @return A normalized string value.
476   */
getSummaryNormalized(String s)477   public static String getSummaryNormalized(String s) {
478     String str = "";
479     int max_chars = 250;
480     int min_chars = 50;
481     int marker = 0;
482     if (s.length() < min_chars) {
483       return str;
484     } else {
485       str = s.replaceAll("^\"|\"$", "");
486       str = str.replaceAll("\\s+", " ");
487       str = JD_TAG_PATTERN.matcher(str).replaceAll("$1");
488       str = escapeString(str);
489       BreakIterator bi = BreakIterator.getWordInstance();
490       bi.setText(str);
491       if (str.length() > max_chars) {
492         marker = bi.following(max_chars);
493       } else {
494         marker = bi.last();
495       }
496       str = str.substring(0, marker);
497       str = str.concat("\u2026" );
498     }
499     return str;
500   }
501 
escapeString(String s)502   public static String escapeString(String s) {
503     s = s.replaceAll("\"", "&quot;");
504     s = s.replaceAll("\'", "&#39;");
505     s = s.replaceAll("<", "&lt;");
506     s = s.replaceAll(">", "&gt;");
507     s = s.replaceAll("/", "&#47;");
508     return s;
509   }
510 
511   //Disqualify img src urls that include these substrings
512   public static String[] IMAGE_EXCLUDE = {"/triangle-", "favicon","android-logo",
513       "icon_play.png", "robot-tiny"};
514 
inList(String s, String[] list)515   public static boolean inList(String s, String[] list) {
516     for (String t : list) {
517       if (s.contains(t)) {
518         return true;
519       }
520     }
521     return false;
522   }
523 
524   /**
525   * Normalize an img src url by removing docRoot and leading
526   * slash for local image references. These are added later
527   * in js to support offline mode and keep path reference
528   * format consistent with hrefs.
529   *
530   * @param url Abs or rel url sourced from img src.
531   * @return Normalized url if qualified, else empty
532   */
getImageUrlNormalized(String url)533   public static String getImageUrlNormalized(String url) {
534     String absUrl = "";
535     // validate to avoid choosing using specific images
536     if ((url != null) && (!url.equals("")) && (!inList(url, IMAGE_EXCLUDE))) {
537       absUrl = url.replace("{@docRoot}", "");
538       absUrl = absUrl.replaceFirst("^/(?!/)", "");
539     }
540     return absUrl;
541   }
542 
543   /**
544   * Normalize an href url by removing docRoot and leading
545   * slash for local image references. These are added later
546   * in js to support offline mode and keep path reference
547   * format consistent with hrefs.
548   *
549   * @param url Abs or rel page url sourced from href
550   * @return Normalized url, either abs or rel to root
551   */
getPageUrlNormalized(String url)552   public static String getPageUrlNormalized(String url) {
553     String absUrl = "";
554 
555     if ((url !=null) && (!url.equals(""))) {
556       absUrl = url.replace("{@docRoot}", "");
557       if (Doclava.USE_DEVSITE_LOCALE_OUTPUT_PATHS) {
558         absUrl = absUrl.replaceFirst("^en/", "");
559       }
560       absUrl = absUrl.replaceFirst("^/(?!/)", "");
561     }
562     return absUrl;
563   }
564 
565   /**
566   * Given a metadata node, add it as a child of a root node based on its
567   * type. If there is no root node that matches the node's type, create one
568   * and add the metadata node as a child node.
569   *
570   * @param gNode The node to attach to a root node or add as a new root node.
571   * @param rootList The current list of root nodes.
572   * @return The updated list of root nodes.
573   */
appendMetaNodeByLang(Node gNode, List<Node> rootList)574   public static List<Node> appendMetaNodeByLang(Node gNode, List<Node> rootList) {
575 
576     String nodeLang = gNode.getLang();
577     boolean matched = false;
578     for (Node n : rootList) {
579       if (n.getLang().equals(nodeLang)) {  //find any matching lang node
580         appendMetaNodeByType(gNode,n.getChildren());
581         //n.getChildren().add(gNode);
582         matched = true;
583         break; // add to the first root node only
584       } // tag did not match
585     } // end rootnodes matching iterator
586     if (!matched) {
587       List<Node> mlangList = new ArrayList<Node>(); // list of file objects that have a given lang
588       //mlangList.add(gNode);
589       Node tnode = new Node.Builder().setChildren(mlangList).setLang(nodeLang).build();
590       rootList.add(tnode);
591       appendMetaNodeByType(gNode, mlangList);
592     }
593     return rootList;
594   }
595 
596   /**
597   * Given a metadata node, add it as a child of a root node based on its
598   * type. If there is no root node that matches the node's type, create one
599   * and add the metadata node as a child node.
600   *
601   * @param gNode The node to attach to a root node or add as a new root node.
602   * @param rootList The current list of root nodes.
603   * @return The updated list of root nodes.
604   */
appendMetaNodeByType(Node gNode, List<Node> rootList)605   public static List<Node> appendMetaNodeByType(Node gNode, List<Node> rootList) {
606 
607     String nodeTags = gNode.getType();
608     boolean matched = false;
609     for (Node n : rootList) {
610       if (n.getType().equals(nodeTags)) {  //find any matching type node
611         n.getChildren().add(gNode);
612         matched = true;
613         break; // add to the first root node only
614       } // tag did not match
615     } // end rootnodes matching iterator
616     if (!matched) {
617       List<Node> mtaglist = new ArrayList<Node>(); // list of file objects that have a given type
618       mtaglist.add(gNode);
619       Node tnode = new Node.Builder().setChildren(mtaglist).setType(nodeTags).build();
620       rootList.add(tnode);
621     }
622     return rootList;
623   }
624 
625   /**
626   * Given a metadata node, add it as a child of a root node based on its
627   * tag. If there is no root node matching the tag, create one for it
628   * and add the metadata node as a child node.
629   *
630   * @param gNode The node to attach to a root node or add as a new root node.
631   * @param rootTagNodesList The current list of root nodes.
632   * @return The updated list of root nodes.
633   */
appendMetaNodeByTagIndex(Node gNode, List<Node> rootTagNodesList)634   public static List<Node> appendMetaNodeByTagIndex(Node gNode, List<Node> rootTagNodesList) {
635 
636     for (int iter = 0; iter < gNode.getChildren().size(); iter++) {
637       if (gNode.getChildren().get(iter).getTags() != null) {
638         List<String> nodeTags = gNode.getChildren().get(iter).getTags();
639         boolean matched = false;
640         for (String t : nodeTags) { //process each of the meta.tags
641           for (Node n : rootTagNodesList) {
642             if (n.getLabel().equals(t.toString())) {
643               n.getTags().add(String.valueOf(iter));
644               matched = true;
645               break; // add to the first root node only
646             } // tag did not match
647           } // end rootnodes matching iterator
648           if (!matched) {
649             List<String> mtaglist = new ArrayList<String>(); // list of objects with a given tag
650             mtaglist.add(String.valueOf(iter));
651             Node tnode = new Node.Builder().setLabel(t.toString()).setTags(mtaglist).build();
652             rootTagNodesList.add(tnode);
653           }
654         }
655       }
656     }
657     return rootTagNodesList;
658   }
659 
660   /**
661   * Append the contents of jd_extras to jd_lists_unified for each language.
662   *
663   * @param extrasFilename The lang-specific extras file to append.
664   * @param unifiedFilename The lang-specific unified metadata file.
665   */
appendExtrasMetadata(String extrasFilename, String unifiedFilename)666   public static void appendExtrasMetadata (String extrasFilename, String unifiedFilename) {
667 
668     File f = new File(ClearPage.outputDir + "/" + extrasFilename);
669     if (f.exists() && !f.isDirectory()) {
670       ClearPage.copyFile(true, f, unifiedFilename, true);
671     }
672   }
673 
674   public static final Comparator<Node> BY_TAG_NAME = new Comparator<Node>() {
675     public int compare (Node one, Node other) {
676       return one.getLabel().compareTo(other.getLabel());
677     }
678   };
679 
680   public static final Comparator<Node> BY_TYPE_NAME = new Comparator<Node>() {
681     public int compare (Node one, Node other) {
682       return one.getType().compareTo(other.getType());
683     }
684   };
685 
686     public static final Comparator<Node> BY_LANG_NAME = new Comparator<Node>() {
687     public int compare (Node one, Node other) {
688       return one.getLang().compareTo(other.getLang());
689     }
690   };
691 
692   /**
693   * A node for storing page metadata. Use Builder.build() to instantiate.
694   */
695   public static class Node {
696 
697     private String mLabel; // holds page.title or similar identifier
698     private String mCategory; // subtabs, example 'training' 'guides'
699     private String mSummary; // Summary for card or similar use
700     private String mLink; //link href for item click
701     private String mGroup; // from sample.group in _index.jd
702     private List<String> mKeywords; // from page.tags
703     private List<String> mTags; // from meta.tags
704     private String mImage; // holds an href, fully qualified or relative to root
705     private List<Node> mChildren;
706     private String mLang;
707     private String mType; // design, develop, distribute, youtube, blog, etc
708     private String mTimestamp; // optional timestamp eg 1447452827
709 
Node(Builder builder)710     private Node(Builder builder) {
711       mLabel = builder.mLabel;
712       mCategory = builder.mCategory;
713       mSummary = builder.mSummary;
714       mLink = builder.mLink;
715       mGroup = builder.mGroup;
716       mKeywords = builder.mKeywords;
717       mTags = builder.mTags;
718       mImage = builder.mImage;
719       mChildren = builder.mChildren;
720       mLang = builder.mLang;
721       mType = builder.mType;
722       mTimestamp = builder.mTimestamp;
723     }
724 
725     private static class Builder {
726       private String mLabel, mCategory, mSummary, mLink, mGroup, mImage, mLang, mType, mTimestamp;
727       private List<String> mKeywords = null;
728       private List<String> mTags = null;
729       private List<Node> mChildren = null;
setLabel(String mLabel)730       public Builder setLabel(String mLabel) { this.mLabel = mLabel; return this;}
setCategory(String mCategory)731       public Builder setCategory(String mCategory) {
732         this.mCategory = mCategory; return this;
733       }
setSummary(String mSummary)734       public Builder setSummary(String mSummary) {this.mSummary = mSummary; return this;}
setLink(String mLink)735       public Builder setLink(String mLink) {this.mLink = mLink; return this;}
setGroup(String mGroup)736       public Builder setGroup(String mGroup) {this.mGroup = mGroup; return this;}
setKeywords(List<String> mKeywords)737       public Builder setKeywords(List<String> mKeywords) {
738         this.mKeywords = mKeywords; return this;
739       }
setTags(List<String> mTags)740       public Builder setTags(List<String> mTags) {this.mTags = mTags; return this;}
setImage(String mImage)741       public Builder setImage(String mImage) {this.mImage = mImage; return this;}
setChildren(List<Node> mChildren)742       public Builder setChildren(List<Node> mChildren) {this.mChildren = mChildren; return this;}
setLang(String mLang)743       public Builder setLang(String mLang) {this.mLang = mLang; return this;}
setType(String mType)744       public Builder setType(String mType) {this.mType = mType; return this;}
setTimestamp(String mTimestamp)745       public Builder setTimestamp(String mTimestamp) {this.mTimestamp = mTimestamp; return this;}
build()746       public Node build() {return new Node(this);}
747     }
748 
749     /**
750     * Render a tree of metadata nodes organized by type.
751     * @param buf Output buffer to render to.
752     */
renderTypeResources(StringBuilder buf)753     void renderTypeResources(StringBuilder buf) {
754       List<Node> list = mChildren; //list of type rootnodes
755       if (list == null || list.size() == 0) {
756         buf.append("null");
757       } else {
758         final int n = list.size();
759         for (int i = 0; i < n; i++) {
760           buf.append("var " + list.get(i).mType.toUpperCase() + "_RESOURCES = [");
761           list.get(i).renderTypes(buf); //render this type's children
762           buf.append("\n];\n\n");
763         }
764       }
765     }
766 
767     /**
768     * Render a tree of metadata nodes organized by lang.
769     * @param buf Output buffer to render to.
770     */
renderLangResources(StringBuilder buf, String langname)771     void renderLangResources(StringBuilder buf, String langname) {
772       List<Node> list = mChildren; //list of type rootnodes
773       if (list == null || list.size() == 0) {
774         buf.append("null");
775       } else {
776         final int n = list.size();
777         for (int i = 0; i < n; i++) {
778           buf.append("METADATA['" + langname + "']." + list.get(i).mType + " = [");
779           list.get(i).renderTypes(buf); //render this lang's children
780           buf.append("\n];\n\n");
781         }
782       }
783     }
784 
785     /**
786     * Render a tree of metadata nodes of type 'develop' to extract
787     * samples metadata. Only used by devsite (ds) builds.
788     * @param buf Output buffer to render to.
789     * @return true if samples were rendered to buf
790     */
renderSamplesResources(StringBuilder buf, String langname)791     boolean renderSamplesResources(StringBuilder buf, String langname) {
792       boolean langHasSamples = false;
793       List<Node> list = mChildren; //list of type rootnodes
794       if (list == null || list.size() == 0) {
795         buf.append("null");
796       } else {
797         final int n = list.size();
798         for (int i = 0; i < n; i++) {
799           //samples are always in type 'develop', so restrict
800           if ("develop".equals(list.get(i).mType)) {
801             //render this type's children
802             langHasSamples = list.get(i).renderTypeForSamples(buf);
803           }
804         }
805       }
806       return langHasSamples;
807     }
808 
809     /**
810     * Render all metadata nodes for a specific type.
811     * @param buf Output buffer to render to.
812     */
renderTypes(StringBuilder buf)813     void renderTypes(StringBuilder buf) {
814       List<Node> list = mChildren;
815       if (list == null || list.size() == 0) {
816         buf.append("nulltype");
817       } else {
818         final int n = list.size();
819         for (int i = 0; i < n; i++) {
820           buf.append("\n      {\n");
821           buf.append("        \"title\":\"");
822           renderStrWithUcs(buf, list.get(i).mLabel);
823           buf.append("\",\n" );
824           buf.append("        \"summary\":\"");
825           renderStrWithUcs(buf, list.get(i).mSummary);
826           buf.append("\",\n" );
827           buf.append("        \"url\":\"" + list.get(i).mLink + "\",\n" );
828           if (!"".equals(list.get(i).mImage)) {
829             buf.append("        \"image\":\"" + list.get(i).mImage + "\",\n" );
830           }
831           if (!"".equals(list.get(i).mGroup)) {
832             buf.append("        \"group\":\"");
833             renderStrWithUcs(buf, list.get(i).mGroup);
834             buf.append("\",\n" );
835           }
836           if (!"".equals(list.get(i).mCategory)) {
837             buf.append("        \"category\":\"" + list.get(i).mCategory + "\",\n" );
838           }
839           if ((list.get(i).mType != null) && (list.get(i).mType != "")) {
840             buf.append("        \"type\":\"" + list.get(i).mType + "\",\n");
841           }
842           list.get(i).renderArrayType(buf, list.get(i).mKeywords, "keywords");
843           list.get(i).renderArrayType(buf, list.get(i).mTags, "tags");
844           if (!"".equals(list.get(i).mTimestamp)) {
845             buf.append("        \"timestamp\":\"" + list.get(i).mTimestamp + "\",\n");
846           }
847           buf.append("        \"lang\":\"" + list.get(i).mLang + "\"" );
848           buf.append("\n      }");
849           if (i != n - 1) {
850             buf.append(", ");
851           }
852         }
853       }
854     }
855 
856     /**
857     * Render all metadata nodes for samples only.
858     * Only used by devsite (ds) builds.
859     * @param buf Output buffer to render to.
860     * @return whether any samples were rendered to buf
861     */
renderTypeForSamples(StringBuilder buf)862     boolean renderTypeForSamples(StringBuilder buf) {
863       boolean typeHasSamples = false;
864       List<Node> list = mChildren;
865       if (list == null || list.size() == 0) {
866         buf.append("nulltype");
867       } else {
868         final int n = list.size();
869         for (int i = 0; i < n; i++) {
870           // valid samples must have category 'samples'
871           if ("samples".equals(list.get(i).mCategory)) {
872             typeHasSamples = true;
873             buf.append("\n      {\n");
874             buf.append("        \"title\":\"");
875             renderStrWithUcs(buf, list.get(i).mLabel);
876             buf.append("\",\n" );
877             buf.append("        \"summary\":\"");
878             renderStrWithUcs(buf, list.get(i).mSummary);
879             buf.append("\",\n" );
880             buf.append("        \"url\":\"" + list.get(i).mLink + "\",\n" );
881             if (!"".equals(list.get(i).mImage)) {
882               buf.append("        \"image\":\"" + list.get(i).mImage + "\",\n" );
883             }
884             if (!"".equals(list.get(i).mGroup)) {
885               buf.append("        \"group\":\"");
886               renderStrWithUcs(buf, list.get(i).mGroup);
887               buf.append("\",\n" );
888             }
889             if (!"".equals(list.get(i).mCategory)) {
890               buf.append("        \"category\":\"" + list.get(i).mCategory + "\",\n" );
891             }
892             if ((list.get(i).mType != null) && (list.get(i).mType != "")) {
893               buf.append("        \"type\":\"" + list.get(i).mType + "\",\n");
894             }
895             list.get(i).renderArrayType(buf, list.get(i).mKeywords, "keywords");
896             list.get(i).renderArrayType(buf, list.get(i).mTags, "tags");
897             if (!"".equals(list.get(i).mTimestamp)) {
898               buf.append("        \"timestamp\":\"" + list.get(i).mTimestamp + "\",\n");
899             }
900             buf.append("        \"lang\":\"" + list.get(i).mLang + "\"" );
901             buf.append("\n      }");
902             if (i != n - 1) {
903               buf.append(", ");
904             }
905           }
906         }
907       }
908       return typeHasSamples;
909     }
910 
911     /**
912     * Build and render a list of tags associated with each type.
913     * @param buf Output buffer to render to.
914     */
renderTypesByTag(StringBuilder buf)915     void renderTypesByTag(StringBuilder buf) {
916       List<Node> list = mChildren; //list of rootnodes
917       if (list == null || list.size() == 0) {
918         buf.append("null");
919       } else {
920         final int n = list.size();
921         for (int i = 0; i < n; i++) {
922         buf.append("var " + list.get(i).mType.toUpperCase() + "_BY_TAG = {");
923         List<Node> mTagList = new ArrayList(); //list of rootnodes
924         mTagList = appendMetaNodeByTagIndex(list.get(i), mTagList);
925         list.get(i).renderTagIndices(buf, mTagList);
926           buf.append("\n};\n\n");
927         }
928       }
929     }
930 
931     /**
932     * Render a list of tags associated with a type, including the
933     * tag's indices in the type array.
934     * @param buf Output buffer to render to.
935     * @param tagList Node tree of types to render.
936     */
renderTagIndices(StringBuilder buf, List<Node> tagList)937     void renderTagIndices(StringBuilder buf, List<Node> tagList) {
938       List<Node> list = tagList;
939       if (list == null || list.size() == 0) {
940         buf.append("");
941       } else {
942         final int n = list.size();
943         for (int i = 0; i < n; i++) {
944           buf.append("\n    " + list.get(i).mLabel + ":[");
945           renderArrayValue(buf, list.get(i).mTags);
946           buf.append("]");
947           if (i != n - 1) {
948             buf.append(", ");
949           }
950         }
951       }
952     }
953 
954     /**
955     * Render key:arrayvalue pair.
956     * @param buf Output buffer to render to.
957     * @param type The list value to render as an arrayvalue.
958     * @param key The key for the pair.
959     */
renderArrayType(StringBuilder buf, List<String> type, String key)960     void renderArrayType(StringBuilder buf, List<String> type, String key) {
961       buf.append("        \"" + key + "\": [");
962       renderArrayValue(buf, type);
963       buf.append("],\n");
964     }
965 
966     /**
967     * Render an array value to buf, with special handling of unicode characters.
968     * @param buf Output buffer to render to.
969     * @param type The list value to render as an arrayvalue.
970     */
renderArrayValue(StringBuilder buf, List<String> type)971     void renderArrayValue(StringBuilder buf, List<String> type) {
972       List<String> list = type;
973       if (list != null) {
974         final int n = list.size();
975         for (int i = 0; i < n; i++) {
976           String tagval = list.get(i).toString();
977           renderStrWithUcs(buf,tagval);
978           if (i != n - 1) {
979             buf.append(",");
980           }
981         }
982       }
983     }
984 
985     /**
986     * Render a string that can include ucs2 encoded characters.
987     * @param buf Output buffer to render to.
988     * @param chars String to append to buf with any necessary encoding
989     */
renderStrWithUcs(StringBuilder buf, String chars)990     void renderStrWithUcs(StringBuilder buf, String chars) {
991       String strval = chars;
992       final int L = strval.length();
993       for (int t = 0; t < L; t++) {
994         char c = strval.charAt(t);
995         if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_HIGH_SURROGATE ) {
996           // we have a UTF-16 multi-byte character
997           int codePoint = strval.codePointAt(t);
998           int charSize = Character.charCount(codePoint);
999           t += charSize - 1;
1000           buf.append(String.format("\\u%04x",codePoint));
1001         } else if (c >= ' ' && c <= '~' && c != '\\') {
1002           buf.append(c);
1003         } else {
1004           // we are encoding a two byte character
1005           buf.append(String.format("\\u%04x", (int) c));
1006         }
1007       }
1008     }
1009 
getLabel()1010     public String getLabel() {
1011       return mLabel;
1012     }
1013 
setLabel(String label)1014     public void setLabel(String label) {
1015        mLabel = label;
1016     }
1017 
getCategory()1018     public String getCategory() {
1019       return mCategory;
1020     }
1021 
setCategory(String title)1022     public void setCategory(String title) {
1023        mCategory = title;
1024     }
1025 
getSummary()1026     public String getSummary() {
1027       return mSummary;
1028     }
1029 
setSummary(String summary)1030     public void setSummary(String summary) {
1031        mSummary = summary;
1032     }
1033 
getLink()1034     public String getLink() {
1035       return mLink;
1036     }
1037 
setLink(String ref)1038     public void setLink(String ref) {
1039        mLink = ref;
1040     }
1041 
getGroup()1042     public String getGroup() {
1043       return mGroup;
1044     }
1045 
setGroup(String group)1046     public void setGroup(String group) {
1047       mGroup = group;
1048     }
1049 
getTags()1050     public List<String> getTags() {
1051         return mTags;
1052     }
1053 
setTags(String tags)1054     public void setTags(String tags) {
1055       if ("".equals(tags)) {
1056         mTags = null;
1057       } else {
1058         List<String> tagList = new ArrayList();
1059         String[] tagParts = tags.split(",");
1060 
1061         for (String t : tagParts) {
1062           tagList.add(t);
1063         }
1064         mTags = tagList;
1065       }
1066     }
1067 
getKeywords()1068     public List<String> getKeywords() {
1069         return mKeywords;
1070     }
1071 
setKeywords(String keywords)1072     public void setKeywords(String keywords) {
1073       if ("".equals(keywords)) {
1074         mKeywords = null;
1075       } else {
1076         List<String> keywordList = new ArrayList();
1077         String[] keywordParts = keywords.split(",");
1078 
1079         for (String k : keywordParts) {
1080           keywordList.add(k);
1081         }
1082         mKeywords = keywordList;
1083       }
1084     }
1085 
getImage()1086     public String getImage() {
1087         return mImage;
1088     }
1089 
setImage(String ref)1090     public void setImage(String ref) {
1091        mImage = ref;
1092     }
1093 
getChildren()1094     public List<Node> getChildren() {
1095         return mChildren;
1096     }
1097 
setChildren(List<Node> node)1098     public void setChildren(List<Node> node) {
1099         mChildren = node;
1100     }
1101 
getLang()1102     public String getLang() {
1103       return mLang;
1104     }
1105 
setLang(String lang)1106     public void setLang(String lang) {
1107       mLang = lang;
1108     }
1109 
getType()1110     public String getType() {
1111       return mType;
1112     }
1113 
getTimestamp()1114     public String getTimestamp() {
1115       return mTimestamp;
1116     }
1117 
setType(String type)1118     public void setType(String type) {
1119       mType = type;
1120     }
1121 
setTimestamp(String timestamp)1122     public void setTimestamp(String timestamp) {
1123       mTimestamp = timestamp;
1124     }
1125   }
1126 }
1127