1 // Copyright (c) 2011, Mike Samuel
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions
6 // are met:
7 //
8 // Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // Neither the name of the OWASP nor the names of its contributors may
14 // be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 // POSSIBILITY OF SUCH DAMAGE.
28 
29 package org.owasp.html;
30 
31 import java.util.List;
32 import java.util.Map;
33 import java.util.Set;
34 import java.util.regex.Pattern;
35 
36 import javax.annotation.Nullable;
37 import javax.annotation.concurrent.NotThreadSafe;
38 
39 import com.google.common.base.Predicate;
40 import com.google.common.collect.ImmutableList;
41 import com.google.common.collect.ImmutableMap;
42 import com.google.common.collect.ImmutableSet;
43 import com.google.common.collect.Maps;
44 import com.google.common.collect.Sets;
45 
46 
47 /**
48  * Conveniences for configuring policies for the {@link HtmlSanitizer}.
49  *
50  * <h3>Usage</h3>
51  * <p>
52  * To create a policy, first construct an instance of this class; then call
53  * <code>allow&hellip;</code> methods to turn on tags, attributes, and other
54  * processing modes; and finally call <code>build(renderer)</code> or
55  * <code>toFactory()</code>.
56  * </p>
57  * <pre class="prettyprint lang-java">
58  * // Define the policy.
59  * Function&lt;HtmlStreamEventReceiver, HtmlSanitizer.Policy&gt; policy
60  *     = new HtmlPolicyBuilder()
61  *         .allowElements("a", "p")
62  *         .allowAttributes("href").onElements("a")
63  *         .toFactory();
64  *
65  * // Sanitize your output.
66  * HtmlSanitizer.sanitize(myHtml, policy.apply(myHtmlStreamRenderer));
67  * </pre>
68  *
69  * <h3>Embedded Content</h3>
70  * <p>
71  * Embedded URLs are filtered by
72  * {@link HtmlPolicyBuilder#allowUrlProtocols protocol}.
73  * There is a {@link HtmlPolicyBuilder#allowStandardUrlProtocols canned policy}
74  * so you can easily white-list widely used policies that don't violate the
75  * current pages origin.  See "Customization" below for ways to do further
76  * filtering.  If you allow links it might be worthwhile to
77  * {@link HtmlPolicyBuilder#requireRelNofollowOnLinks() require}
78  * {@code rel=nofollow}.
79  * </p>
80  * <p>
81  * This class simply throws out all embedded JS.
82  * Use a custom element or attribute policy to allow through
83  * signed or otherwise known-safe code.
84  * Check out the Caja project if you need a way to contain third-party JS.
85  * </p>
86  * <p>
87  * This class does not attempt to faithfully parse and sanitize CSS.
88  * It does provide {@link HtmlPolicyBuilder#allowStyling() one} styling option
89  * that allows through a few CSS properties that allow textual styling, but that
90  * disallow image loading, history stealing, layout breaking, code execution,
91  * etc.
92  * </p>
93  *
94  * <h3>Customization</h3>
95  * <p>
96  * You can easily do custom processing on tags and attributes by supplying your
97  * own {@link ElementPolicy element policy} or
98  * {@link AttributePolicy attribute policy} when calling
99  * <code>allow&hellip;</code>.
100  * E.g. to convert headers into {@code <div>}s, you could use an element policy
101  * </p>
102  * <pre class="prettyprint lang-java">
103  * new HtmlPolicyBuilder()
104  *   .allowElement(
105  *     new ElementPolicy() {
106  *       public String apply(String elementName, List&lt;String> attributes) {
107  *         attributes.add("class");
108  *         attributes.add("header-" + elementName);
109  *         return "div";
110  *       }
111  *     },
112  *     "h1", "h2", "h3", "h4", "h5", "h6")
113  *   .build(outputChannel)
114  * </pre>
115  *
116  * <h3>Rules of Thumb</h3>
117  * <p>
118  * Throughout this class, several rules hold:
119  * <ul>
120  *   <li>Everything is denied by default.  There are
121  *     <code>disallow&hellip;</code> methods, but those reverse
122  *     allows instead of rolling back overly permissive defaults.
123  *   <li>The order of allows and disallows does not matter.
124  *     Disallows trump allows whether they occur before or after them.
125  *     The only method that needs to be called in a particular place is
126  *     {@link HtmlPolicyBuilder#build}.
127  *     Allows or disallows after {@code build} is called have no
128  *     effect on the already built policy.
129  *   <li>Element and attribute policies are applied in the following order:
130  *     element specific attribute policy, global attribute policy, element
131  *     policy.
132  *     Element policies come last so they can observe all the post-processed
133  *     attributes, and so they can add attributes that are exempt from
134  *     attribute policies.
135  *     Element specific policies go first, so they can normalize content to
136  *     a form that might be acceptable to a more simplistic global policy.
137  * </ul>
138  *
139  * <h3>Thread safety and efficiency</h3>
140  * <p>
141  * This class is not thread-safe.  The resulting policy will not violate its
142  * security guarantees as a result of race conditions, but is not thread safe
143  * because it maintains state to track whether text inside disallowed elements
144  * should be suppressed.
145  * <p>
146  * The resulting policy can be reused, but if you use the
147  * {@link HtmlPolicyBuilder#toFactory()} method instead of {@link #build}, then
148  * binding policies to output channels is cheap so there's no need.
149  * </p>
150  *
151  * @author Mike Samuel <mikesamuel@gmail.com>
152  */
153 @TCB
154 @NotThreadSafe
155 public class HtmlPolicyBuilder {
156   /**
157    * The default set of elements that are removed if they have no attributes.
158    * Since {@code <img>} is in this set, by default, a policy will remove
159    * {@code <img src=javascript:alert(1337)>} because its URL is not allowed
160    * and it has no other attributes that would warrant it appearing in the
161    * output.
162    */
163   public static final ImmutableSet<String> DEFAULT_SKIP_IF_EMPTY
164       = ImmutableSet.of("a", "font", "img", "input", "span");
165 
166   private final Map<String, ElementPolicy> elPolicies = Maps.newLinkedHashMap();
167   private final Map<String, Map<String, AttributePolicy>> attrPolicies
168       = Maps.newLinkedHashMap();
169   private final Map<String, AttributePolicy> globalAttrPolicies
170       = Maps.newLinkedHashMap();
171   private final Set<String> allowedProtocols = Sets.newLinkedHashSet();
172   private final Set<String> skipIfEmpty = Sets.newLinkedHashSet(
173       DEFAULT_SKIP_IF_EMPTY);
174   private final Map<String, Boolean> textContainers = Maps.newLinkedHashMap();
175   private boolean requireRelNofollowOnLinks;
176 
177   /**
178    * Allows the named elements.
179    */
allowElements(String... elementNames)180   public HtmlPolicyBuilder allowElements(String... elementNames) {
181     return allowElements(ElementPolicy.IDENTITY_ELEMENT_POLICY, elementNames);
182   }
183 
184   /**
185    * Disallows the named elements.  Elements are disallowed by default, so
186    * there is no need to disallow elements, unless you are making an exception
187    * based on an earlier allow.
188    */
disallowElements(String... elementNames)189   public HtmlPolicyBuilder disallowElements(String... elementNames) {
190     return allowElements(ElementPolicy.REJECT_ALL_ELEMENT_POLICY, elementNames);
191   }
192 
193   /**
194    * Allow the given elements with the given policy.
195    *
196    * @param policy May remove or add attributes, change the element name, or
197    *    deny the element.
198    */
allowElements( ElementPolicy policy, String... elementNames)199   public HtmlPolicyBuilder allowElements(
200       ElementPolicy policy, String... elementNames) {
201     invalidateCompiledState();
202     for (String elementName : elementNames) {
203       elementName = HtmlLexer.canonicalName(elementName);
204       ElementPolicy newPolicy = ElementPolicy.Util.join(
205           elPolicies.get(elementName), policy);
206       // Don't remove if newPolicy is the always reject policy since we want
207       // that to infect later allowElement calls for this particular element
208       // name.  rejects should have higher priority than allows.
209       elPolicies.put(elementName, newPolicy);
210       if (!textContainers.containsKey(elementName)
211           && TagBalancingHtmlStreamEventReceiver
212               .allowsPlainTextualContent(elementName)) {
213         textContainers.put(elementName, true);
214       }
215     }
216     return this;
217   }
218 
219   /**
220    * A canned policy that allows a number of common formatting elements.
221    */
allowCommonInlineFormattingElements()222   public HtmlPolicyBuilder allowCommonInlineFormattingElements() {
223     return allowElements(
224         "b", "i", "font", "s", "u", "o", "sup", "sub", "ins", "del", "strong",
225         "strike", "tt", "code", "big", "small", "br", "span");
226   }
227 
228   /**
229    * A canned policy that allows a number of common block elements.
230    */
allowCommonBlockElements()231   public HtmlPolicyBuilder allowCommonBlockElements() {
232     return allowElements(
233         "p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "li",
234         "blockquote");
235   }
236 
237   /**
238    * Allows text content in the named elements.
239    * By default, text content is allowed in any
240    * {@link #allowElements allowed elements} that can contain character data per
241    * the HTML5 spec, but text content is not allowed by default in elements that
242    * contain content of other kinds (like JavaScript in {@code <script>}
243    * elements.
244    * <p>
245    * To write a policy that whitelists {@code <script>} or {@code <style>}
246    * elements, first {@code allowTextIn("script")}.
247    */
allowTextIn(String... elementNames)248   public HtmlPolicyBuilder allowTextIn(String... elementNames) {
249     invalidateCompiledState();
250     for (String elementName : elementNames) {
251       elementName = HtmlLexer.canonicalName(elementName);
252       textContainers.put(elementName, true);
253     }
254     return this;
255   }
256 
disallowTextIn(String... elementNames)257   public HtmlPolicyBuilder disallowTextIn(String... elementNames) {
258     invalidateCompiledState();
259     for (String elementName : elementNames) {
260       elementName = HtmlLexer.canonicalName(elementName);
261       textContainers.put(elementName, false);
262     }
263     return this;
264   }
265 
266   /**
267    * Assuming the given elements are allowed, allows them to appear without
268    * attributes.
269    *
270    * @see #DEFAULT_SKIP_IF_EMPTY
271    * @see #disallowWithoutAttributes
272    */
allowWithoutAttributes(String... elementNames)273   public HtmlPolicyBuilder allowWithoutAttributes(String... elementNames) {
274     invalidateCompiledState();
275     for (String elementName : elementNames) {
276       elementName = HtmlLexer.canonicalName(elementName);
277       skipIfEmpty.remove(elementName);
278     }
279     return this;
280   }
281 
282   /**
283    * Disallows the given elements from appearing without attributes.
284    *
285    * @see #DEFAULT_SKIP_IF_EMPTY
286    * @see #allowWithoutAttributes
287    */
disallowWithoutAttributes(String... elementNames)288   public HtmlPolicyBuilder disallowWithoutAttributes(String... elementNames) {
289     invalidateCompiledState();
290     for (String elementName : elementNames) {
291       elementName = HtmlLexer.canonicalName(elementName);
292       skipIfEmpty.add(elementName);
293     }
294     return this;
295   }
296 
297   /**
298    * Returns an object that lets you associate policies with the given
299    * attributes, and allow them globally or on specific elements.
300    */
allowAttributes(String... attributeNames)301   public AttributeBuilder allowAttributes(String... attributeNames) {
302     ImmutableList.Builder<String> b = ImmutableList.builder();
303     for (String attributeName : attributeNames) {
304       b.add(HtmlLexer.canonicalName(attributeName));
305     }
306     return new AttributeBuilder(b.build());
307   }
308 
309   /**
310    * Reverse an earlier attribute {@link #allowAttributes allow}.
311    * <p>
312    * For this to have an effect you must call at least one of
313    * {@link AttributeBuilder#globally} and {@link AttributeBuilder#onElements}.
314    * <p>
315    * Attributes are disallowed by default, so there is no need to call this
316    * with a laundry list of attribute/element pairs.
317    */
disallowAttributes(String... attributeNames)318   public AttributeBuilder disallowAttributes(String... attributeNames) {
319     return this.allowAttributes(attributeNames)
320         .matching(AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY);
321   }
322 
323 
allowAttributesGlobally( AttributePolicy policy, List<String> attributeNames)324   private HtmlPolicyBuilder allowAttributesGlobally(
325       AttributePolicy policy, List<String> attributeNames) {
326     invalidateCompiledState();
327     for (String attributeName : attributeNames) {
328       // We reinterpret the identity policy later via policy joining since its
329       // the default passed from the policy-less method, but we don't do
330       // anything here since we don't know until build() is called whether the
331       // policy author wants to allow certain URL protocols or wants to deal
332       // with styles.
333       AttributePolicy oldPolicy = globalAttrPolicies.get(attributeName);
334       globalAttrPolicies.put(
335           attributeName, AttributePolicy.Util.join(oldPolicy, policy));
336     }
337     return this;
338   }
339 
allowAttributesOnElements( AttributePolicy policy, List<String> attributeNames, List<String> elementNames)340   private HtmlPolicyBuilder allowAttributesOnElements(
341       AttributePolicy policy, List<String> attributeNames,
342       List<String> elementNames) {
343     invalidateCompiledState();
344     for (String elementName : elementNames) {
345       Map<String, AttributePolicy> policies = attrPolicies.get(elementName);
346       if (policies == null) {
347         policies = Maps.newLinkedHashMap();
348         attrPolicies.put(elementName, policies);
349       }
350       for (String attributeName : attributeNames) {
351         AttributePolicy oldPolicy = policies.get(attributeName);
352         policies.put(
353             attributeName,
354             AttributePolicy.Util.join(oldPolicy, policy));
355       }
356     }
357     return this;
358   }
359 
360   /**
361    * Adds <a href="http://en.wikipedia.org/wiki/Nofollow"><code>rel=nofollow</code></a>
362    * to links.
363    */
requireRelNofollowOnLinks()364   public HtmlPolicyBuilder requireRelNofollowOnLinks() {
365     invalidateCompiledState();
366     this.requireRelNofollowOnLinks = true;
367     return this;
368   }
369 
370   /**
371    * Adds to the set of protocols that are allowed in URL attributes.
372    * For each URL attribute that is allowed, we further constrain it by
373    * only allowing the value through if it specifies no protocol, or if it
374    * specifies one in the allowedProtocols white-list.
375    * This is done regardless of whether any protocols have been allowed, so
376    * allowing the attribute "href" globally with the identity policy but
377    * not white-listing any protocols, effectively disallows the "href"
378    * attribute globally.
379    * <p>
380    * Do not allow any <code>*script</code> such as <code>javascript</code>
381    * protocols if you might use this policy with untrusted code.
382    */
allowUrlProtocols(String... protocols)383   public HtmlPolicyBuilder allowUrlProtocols(String... protocols) {
384     invalidateCompiledState();
385     // If there is at least one allowed protocol, then allow URLs and
386     // add a filter that checks href and src values.
387 
388     // Do not allow href and srcs through otherwise, and only allow on images
389     // and links.
390     for (String protocol : protocols) {
391       protocol = Strings.toLowerCase(protocol);
392       allowedProtocols.add(protocol);
393     }
394     return this;
395   }
396 
397   /**
398    * Reverses a decision made by {@link #allowUrlProtocols}.
399    */
disallowUrlProtocols(String... protocols)400   public HtmlPolicyBuilder disallowUrlProtocols(String... protocols) {
401     invalidateCompiledState();
402     for (String protocol : protocols) {
403       protocol = Strings.toLowerCase(protocol);
404       allowedProtocols.remove(protocol);
405     }
406     return this;
407   }
408 
409   /**
410    * A canned URL protocol policy that allows <code>http</code>,
411    * <code>https</code>, and <code>mailto</code>.
412    */
allowStandardUrlProtocols()413   public HtmlPolicyBuilder allowStandardUrlProtocols() {
414     return allowUrlProtocols("http", "https", "mailto");
415   }
416 
417   /**
418    * Convert <code>style="&lt;CSS&gt;"</code> to sanitized CSS which allows
419    * color, font-size, type-face, and other styling using the default schema;
420    * but which does not allow content to escape its clipping context.
421    */
allowStyling()422   public HtmlPolicyBuilder allowStyling() {
423     allowStyling(CssSchema.DEFAULT);
424     return this;
425   }
426 
427   /**
428    * Convert <code>style="&lt;CSS&gt;"</code> to sanitized CSS which allows
429    * color, font-size, type-face, and other styling using the given schema.
430    */
allowStyling(CssSchema whitelist)431   public HtmlPolicyBuilder allowStyling(CssSchema whitelist) {
432     invalidateCompiledState();
433     allowAttributesGlobally(
434         new StylingPolicy(whitelist), ImmutableList.of("style"));
435     return this;
436   }
437 
438   /**
439    * Names of attributes from HTML 4 whose values are URLs.
440    * Other attributes, e.g. <code>style</code> may contain URLs even though
441    * there values are not URLs.
442    */
443   private static final Set<String> URL_ATTRIBUTE_NAMES = ImmutableSet.of(
444       "action", "archive", "background", "cite", "classid", "codebase", "data",
445       "dsync", "formaction", "href", "icon", "longdesc", "manifest", "poster",
446       "profile", "src", "srcset", "usemap");
447 
448   /**
449    * Produces a policy based on the allow and disallow calls previously made.
450    *
451    * @param out receives calls to open only tags allowed by
452    *      previous calls to this object.
453    *      Typically a {@link HtmlStreamRenderer}.
454    */
build(HtmlStreamEventReceiver out)455   public HtmlSanitizer.Policy build(HtmlStreamEventReceiver out) {
456     return toFactory().apply(out);
457   }
458 
459   /**
460    * Produces a policy based on the allow and disallow calls previously made.
461    *
462    * @param out receives calls to open only tags allowed by
463    *      previous calls to this object.
464    *      Typically a {@link HtmlStreamRenderer}.
465    * @param listener is notified of dropped tags and attributes so that
466    *      intrusion detection systems can be alerted to questionable HTML.
467    *      If {@code null} then no notifications are sent.
468    * @param context if {@code (listener != null)} then the context value passed
469    *      with alerts.  This can be used to let the listener know from which
470    *      connection or request the questionable HTML was received.
471    */
build( HtmlStreamEventReceiver out, @Nullable HtmlChangeListener<? super CTX> listener, @Nullable CTX context)472   public <CTX> HtmlSanitizer.Policy build(
473       HtmlStreamEventReceiver out,
474       @Nullable HtmlChangeListener<? super CTX> listener,
475       @Nullable CTX context) {
476     return toFactory().apply(out, listener, context);
477   }
478 
479   /**
480    * Like {@link #build} but can be reused to create many different policies
481    * each backed by a different output channel.
482    */
toFactory()483   public PolicyFactory toFactory() {
484     ImmutableSet.Builder<String> textContainers = ImmutableSet.builder();
485     for (Map.Entry<String, Boolean> textContainer
486          : this.textContainers.entrySet()) {
487       if (Boolean.TRUE.equals(textContainer.getValue())) {
488         textContainers.add(textContainer.getKey());
489       }
490     }
491     return new PolicyFactory(compilePolicies(), textContainers.build(),
492                              ImmutableMap.copyOf(globalAttrPolicies));
493   }
494 
495   // Speed up subsequent builds by caching the compiled policies.
496   private transient ImmutableMap<String, ElementAndAttributePolicies>
497       compiledPolicies;
498 
499   /** Called by mutators to signal that any compiled policy is out-of-date. */
invalidateCompiledState()500   private void invalidateCompiledState() {
501     compiledPolicies = null;
502   }
503 
compilePolicies()504   private ImmutableMap<String, ElementAndAttributePolicies> compilePolicies() {
505     if (compiledPolicies != null) { return compiledPolicies; }
506 
507     // Copy maps before normalizing in case builder is reused.
508     Map<String, ElementPolicy> elPolicies
509         = Maps.newLinkedHashMap(this.elPolicies);
510     Map<String, Map<String, AttributePolicy>> attrPolicies
511         = Maps.newLinkedHashMap(this.attrPolicies);
512     for (Map.Entry<String, Map<String, AttributePolicy>> e :
513          attrPolicies.entrySet()) {
514       e.setValue(Maps.newLinkedHashMap(e.getValue()));
515     }
516     Map<String, AttributePolicy> globalAttrPolicies
517         = Maps.newLinkedHashMap(this.globalAttrPolicies);
518     Set<String> allowedProtocols = ImmutableSet.copyOf(this.allowedProtocols);
519 
520     // Implement requireRelNofollowOnLinks
521     if (requireRelNofollowOnLinks) {
522       ElementPolicy linkPolicy = elPolicies.get("a");
523       if (linkPolicy == null) {
524         linkPolicy = ElementPolicy.REJECT_ALL_ELEMENT_POLICY;
525       }
526       elPolicies.put(
527           "a",
528           ElementPolicy.Util.join(
529               linkPolicy,
530               new ElementPolicy() {
531                 public String apply(String elementName, List<String> attrs) {
532                   for (int i = 0, n = attrs.size(); i < n; i += 2) {
533                     if ("href".equals(attrs.get(i))) {
534                       attrs.add("rel");
535                       attrs.add("nofollow");
536                       break;
537                     }
538                   }
539                   return elementName;
540                 }
541               }));
542     }
543 
544     // Implement protocol policies.
545     // For each URL attribute that is allowed, we further constrain it by
546     // only allowing the value through if it specifies no protocol, or if it
547     // specifies one in the allowedProtocols white-list.
548     // This is done regardless of whether any protocols have been allowed, so
549     // allowing the attribute "href" globally with the identity policy but
550     // not white-listing any protocols, effectively disallows the "href"
551     // attribute globally.
552     {
553       AttributePolicy urlAttributePolicy;
554       if (allowedProtocols.size() == 3
555           && allowedProtocols.contains("mailto")
556           && allowedProtocols.contains("http")
557           && allowedProtocols.contains("https")) {
558         urlAttributePolicy = StandardUrlAttributePolicy.INSTANCE;
559       } else {
560         urlAttributePolicy = new FilterUrlByProtocolAttributePolicy(
561             allowedProtocols);
562       }
563       Set<String> toGuard = Sets.newLinkedHashSet(URL_ATTRIBUTE_NAMES);
564       for (String urlAttributeName : URL_ATTRIBUTE_NAMES) {
565         if (globalAttrPolicies.containsKey(urlAttributeName)) {
566           toGuard.remove(urlAttributeName);
567           globalAttrPolicies.put(urlAttributeName, AttributePolicy.Util.join(
568               urlAttributePolicy, globalAttrPolicies.get(urlAttributeName)));
569         }
570       }
571       // Implement guards not implemented on global policies in the per-element
572       // policy maps.
573       for (Map.Entry<String, Map<String, AttributePolicy>> e
574            : attrPolicies.entrySet()) {
575         Map<String, AttributePolicy> policies = e.getValue();
576         for (String urlAttributeName : toGuard) {
577           if (policies.containsKey(urlAttributeName)) {
578             policies.put(urlAttributeName, AttributePolicy.Util.join(
579                 urlAttributePolicy, policies.get(urlAttributeName)));
580           }
581         }
582       }
583     }
584 
585     ImmutableMap.Builder<String, ElementAndAttributePolicies> policiesBuilder
586         = ImmutableMap.builder();
587     for (Map.Entry<String, ElementPolicy> e : elPolicies.entrySet()) {
588       String elementName = e.getKey();
589       ElementPolicy elPolicy = e.getValue();
590       if (ElementPolicy.REJECT_ALL_ELEMENT_POLICY.equals(elPolicy)) {
591         continue;
592       }
593 
594       Map<String, AttributePolicy> elAttrPolicies
595           = attrPolicies.get(elementName);
596       if (elAttrPolicies == null) { elAttrPolicies = ImmutableMap.of(); }
597       ImmutableMap.Builder<String, AttributePolicy> attrs
598           = ImmutableMap.builder();
599       for (Map.Entry<String, AttributePolicy> ape : elAttrPolicies.entrySet()) {
600         String attributeName = ape.getKey();
601         // Handle below so we don't end up putting the same key into the map
602         // twice.  ImmutableMap.Builder hates that.
603         if (globalAttrPolicies.containsKey(attributeName)) { continue; }
604         AttributePolicy policy = ape.getValue();
605         if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) {
606           attrs.put(attributeName, policy);
607         }
608       }
609       for (Map.Entry<String, AttributePolicy> ape
610            : globalAttrPolicies.entrySet()) {
611         String attributeName = ape.getKey();
612         AttributePolicy policy = AttributePolicy.Util.join(
613             elAttrPolicies.get(attributeName), ape.getValue());
614         if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) {
615           attrs.put(attributeName, policy);
616         }
617       }
618 
619       policiesBuilder.put(
620           elementName,
621           new ElementAndAttributePolicies(
622               elementName,
623               elPolicy, attrs.build(), skipIfEmpty.contains(elementName)));
624     }
625     return compiledPolicies = policiesBuilder.build();
626   }
627 
628   /**
629    * Builds the relationship between attributes, the values that they may have,
630    * and the elements on which they may appear.
631    *
632    * @author Mike Samuel
633    */
634   public final class AttributeBuilder {
635     private final List<String> attributeNames;
636     private AttributePolicy policy = AttributePolicy.IDENTITY_ATTRIBUTE_POLICY;
637 
AttributeBuilder(List<? extends String> attributeNames)638     AttributeBuilder(List<? extends String> attributeNames) {
639       this.attributeNames = ImmutableList.copyOf(attributeNames);
640     }
641 
642     /**
643      * Filters and/or transforms the attribute values
644      * allowed by later {@code allow*} calls.
645      * Multiple calls to {@code matching} are combined so that the policies
646      * receive the value in order, each seeing the value after any
647      * transformation by a previous policy.
648      */
matching(AttributePolicy policy)649     public AttributeBuilder matching(AttributePolicy policy) {
650       this.policy = AttributePolicy.Util.join(this.policy, policy);
651       return this;
652     }
653 
654     /**
655      * Restrict the values allowed by later {@code allow*} calls to those
656      * matching the pattern.
657      * Multiple calls to {@code matching} are combined to restrict to the
658      * intersection of possible matched values.
659      */
matching(final Pattern pattern)660     public AttributeBuilder matching(final Pattern pattern) {
661       return matching(new AttributePolicy() {
662         public @Nullable String apply(
663             String elementName, String attributeName, String value) {
664           return pattern.matcher(value).matches() ? value : null;
665         }
666       });
667     }
668 
669     /**
670      * Restrict the values allowed by later {@code allow*} calls to those
671      * matching the given predicate.
672      * Multiple calls to {@code matching} are combined to restrict to the
673      * intersection of possible matched values.
674      */
matching( final Predicate<? super String> filter)675     public AttributeBuilder matching(
676         final Predicate<? super String> filter) {
677       return matching(new AttributePolicy() {
678         public @Nullable String apply(
679             String elementName, String attributeName, String value) {
680           return filter.apply(value) ? value : null;
681         }
682       });
683     }
684 
685     /**
686      * Restrict the values allowed by later {@code allow*} calls to those
687      * supplied.
688      * Multiple calls to {@code matching} are combined to restrict to the
689      * intersection of possible matched values.
690      */
691     public AttributeBuilder matching(
692         boolean ignoreCase, String... allowedValues) {
693       return matching(ignoreCase, ImmutableSet.copyOf(allowedValues));
694     }
695 
696     /**
697      * Restrict the values allowed by later {@code allow*} calls to those
698      * supplied.
699      * Multiple calls to {@code matching} are combined to restrict to the
700      * intersection of possible matched values.
701      */
702     public AttributeBuilder matching(
703         final boolean ignoreCase, Set<? extends String> allowedValues) {
704       final ImmutableSet<String> allowed = ImmutableSet.copyOf(allowedValues);
705       return matching(new AttributePolicy() {
706         public @Nullable String apply(
707             String elementName, String attributeName, String value) {
708           if (ignoreCase) { value = Strings.toLowerCase(value); }
709           return allowed.contains(value) ? value : null;
710         }
711       });
712     }
713 
714     /**
715      * Allows the given attributes on any elements but filters the
716      * attributes' values based on previous calls to {@code matching(...)}.
717      * Global attribute policies are applied after element specific policies.
718      * Be careful of using this with attributes like <code>type</code> which
719      * have different meanings on different attributes.
720      * Also be careful of allowing globally attributes like <code>href</code>
721      * which can have more far-reaching effects on tags like
722      * <code>&lt;base&gt;</code> and <code>&lt;link&gt;</code> than on
723      * <code>&lt;a&gt;</code> because in the former, they have an effect without
724      * user interaction and can change the behavior of the current page.
725      */
726     public HtmlPolicyBuilder globally() {
727       return HtmlPolicyBuilder.this.allowAttributesGlobally(
728           policy, attributeNames);
729     }
730 
731     /**
732      * Allows the named attributes on the given elements but filters the
733      * attributes' values based on previous calls to {@code matching(...)}.
734      */
735     public HtmlPolicyBuilder onElements(String... elementNames) {
736       ImmutableList.Builder<String> b = ImmutableList.builder();
737       for (String elementName : elementNames) {
738         b.add(HtmlLexer.canonicalName(elementName));
739       }
740       return HtmlPolicyBuilder.this.allowAttributesOnElements(
741           policy, attributeNames, b.build());
742     }
743   }
744 }
745