1 package org.unicode.cldr.util;
2 
3 import java.util.ArrayList;
4 import java.util.Collections;
5 import java.util.Comparator;
6 import java.util.HashMap;
7 import java.util.Iterator;
8 import java.util.LinkedHashMap;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.Map.Entry;
12 import java.util.Set;
13 import java.util.TreeSet;
14 import java.util.regex.Matcher;
15 import java.util.regex.Pattern;
16 
17 import org.unicode.cldr.util.CldrUtility.VariableReplacer;
18 import org.unicode.cldr.util.RegexFileParser.RegexLineParser;
19 import org.unicode.cldr.util.RegexFileParser.VariableProcessor;
20 import org.unicode.cldr.util.RegexLookup.Finder;
21 import org.unicode.cldr.util.RegexLookup.Finder.Info;
22 
23 import com.google.common.base.Splitter;
24 import com.ibm.icu.text.Transform;
25 import com.ibm.icu.util.Output;
26 
27 /**
28  * Lookup items according to a set of regex patterns. Returns the value according to the first pattern that matches. Not
29  * thread-safe.
30  *
31  * @param <T>
32  */
33 public class RegexLookup<T> implements Iterable<Map.Entry<Finder, T>> {
34     private VariableReplacer variables = new VariableReplacer();
35     private StorageInterfaceBase<T> storage;
36 //    private StarPatternMap<T> SPEntries;
37 //    private RegexTree<T> RTEntries;
38     private Map<Finder, T> MEntries;
39     private Transform<String, ? extends Finder> patternTransform = RegexFinderTransform;
40     private Transform<String, ? extends T> valueTransform;
41     private Merger<T> valueMerger;
42     private final boolean allowNull = false;
43     private static PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*");
44 
45     public enum LookupType {
46         STAR_PATTERN_LOOKUP, OPTIMIZED_DIRECTORY_PATTERN_LOOKUP, STANDARD
47     }
48 
49     private LookupType _lookupType;
50 
51     /*
52      * STAR_PATTERN_LOOKUP
53      *
54      * When true, RegexLookup can match regex's even faster than the OPTIMIZED_DIRECTORY_PATTERN_LOOKUP below.
55      * It requires some additional structure, such that the only regular expression constructs such as (a|b) occur
56      * in attributes, so that the star pattern for a given path can match the star pattern of a given regular expression,
57      * thus greatly reducing the number of actual regex matches that need to occur.
58      */
59 
60     /*
61      * OPTIMIZED_DIRECTORY_PATTERN_LOOKUP
62      *
63      * When true, RegexLookup can match regex's in O(log(n)) time, where n is the number of regex's stored.
64      * This is provided that all regex patterns follow a directory based format and all directories are separated by a forward slash '/'
65      * for example: ^//ldml/dates/calendars/calendar[@type="([^"']++)"]/alias[@source="locale"][@path="../calendar[@type='([^"']++)']"]
66      *
67      * When false, RegexLookup will match regex's in O(n) time, where n is the number of regex's stored.
68      * However regex's no longer need to follow any specific format (Slower but more versatile).
69      */
70 
RegexLookup(LookupType type)71     public RegexLookup(LookupType type) {
72         _lookupType = type;
73         switch (type) {
74         case STAR_PATTERN_LOOKUP:
75             //   SPEntries = new StarPatternMap<T>();
76             storage = new StarPatternMap<>();
77             break;
78         case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
79             //   RTEntries = new RegexTree<T>();
80             storage = new RegexTree<>();
81             break;
82         default:
83             MEntries = new LinkedHashMap<>();
84             break;
85         }
86     }
87 
RegexLookup()88     public RegexLookup() {
89         this(LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP);
90 //        _lookupType = RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP;
91 //        RTEntries = new RegexTree<T>();
92     }
93 
94     public abstract static class Finder {
95         public static class Info {
96             public String[] value;
97         }
98 
99         //   abstract public String[] getInfo();
100 
101         // abstract public boolean find(String item, Object context);
102 
find(String item, Object context, Info info)103         abstract public boolean find(String item, Object context, Info info);
104 
matches(String item, Object context, Info info)105         abstract public boolean matches(String item, Object context, Info info);
106 
getFailPoint(String source)107         public int getFailPoint(String source) {
108             return -1;
109         }
110         // must also define toString
111     }
112 
113     public static class RegexFinder extends Finder {
114         /**
115          * The matcher used by this RegexFinder
116          */
117         private final Matcher matcher;
118 
119         /**
120          * The Pattern used by this RegexFinder
121          */
122         protected final Pattern pattern;
123 
RegexFinder(String pattern)124         public RegexFinder(String pattern) {
125             this.pattern = Pattern.compile(pattern, Pattern.COMMENTS);
126             matcher = this.pattern.matcher("");
127         }
128 
129         /**
130          * Call Matches on the pattern, returning additional information in the Info field,
131          * if it is non null
132          */
133         @Override
matches(String item, Object context, Info info)134         public boolean matches(String item, Object context, Info info) {
135             synchronized (matcher) {
136                 try {
137                     boolean result = matcher.reset(item).matches();
138                     extractInfo(info, result);
139                     return result;
140                 } catch (StringIndexOutOfBoundsException e) {
141                     // We don't know what causes this error (cldrbug 5051) so
142                     // make the exception message more detailed.
143                     throw new IllegalArgumentException("Matching error caused by pattern: ["
144                         + matcher.toString() + "] on text: [" + item + "]", e);
145                 }
146             }
147         }
148 
149         /**
150          * Extract match related information into  the info field, if result is true, and info
151          * is not null.
152          * @param info
153          * @param result
154          */
extractInfo(Info info, boolean result)155         private void extractInfo(Info info, boolean result) {
156             if (result && info != null) {
157                 int limit = matcher.groupCount() + 1;
158                 String[] value = new String[limit];
159                 for (int i = 0; i < limit; ++i) {
160                     value[i] = matcher.group(i);
161                 }
162                 info.value = value;
163             }
164         }
165 
166         /**
167          * Call find() on the pattern, returning additional information in the info field,
168          * if it is non-null
169          */
170         @Override
find(String item, Object context, Info info)171         public boolean find(String item, Object context, Info info) {
172             synchronized (matcher) {
173                 try {
174                     boolean result = matcher.reset(item).find();
175                     extractInfo(info, result);
176                     return result;
177                 } catch (StringIndexOutOfBoundsException e) {
178                     // We don't know what causes this error (cldrbug 5051) so
179                     // make the exception message more detailed.
180                     throw new IllegalArgumentException("Matching error caused by pattern: ["
181                         + matcher.toString() + "] on text: [" + item + "]", e);
182                 }
183             }
184         }
185 
186         @Override
toString()187         public String toString() {
188             // Use pattern here, to avoid having to synchronize on matcher
189             return pattern.pattern();
190         }
191 
192         @Override
equals(Object obj)193         public boolean equals(Object obj) {
194             if (obj == null) {
195                 return false;
196             } else {
197                 return toString().equals(obj.toString());
198             }
199         }
200 
201         @Override
hashCode()202         public int hashCode() {
203             return toString().hashCode();
204         }
205 
206         @Override
getFailPoint(String source)207         public int getFailPoint(String source) {
208             synchronized (matcher) {
209                 return RegexUtilities.findMismatch(matcher, source);
210             }
211         }
212     }
213 
214     private static interface StorageInterfaceBase<T> {
entrySet()215         Set<Entry<Finder, T>> entrySet();
216 
get(Finder finder)217         T get(Finder finder);
218 
get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)219         T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound);
220 
getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)221         List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo);
222 
put(Finder pattern, T value)223         void put(Finder pattern, T value);
224 
size()225         int size();
226     }
227 
228 //    private static class FinderWithInfo {
229 //        Finder _finder;
230 //        Info _info;
231 //        public FinderWithInfo(Finder finder,Info info) {
232 //            _info=info;
233 //            _finder=finder;
234 //        }
235 //    }
236     private static class RegexTree<T> implements StorageInterfaceBase<T> {
237         private RTNode root;
238         private int _size;
239         private RTNodeRankComparator rankComparator = new RTNodeRankComparator();
240 
RegexTree()241         public RegexTree() {
242             root = new RTNode("", null);
243             _size = 0;
244         }
245 
246         @Override
size()247         public int size() {
248             return _size;
249         }
250 
251         @Override
put(Finder pattern, T value)252         public void put(Finder pattern, T value) {
253             root.put(new RTNode(pattern, value, _size));
254             _size++;
255         }
256 
257         @Override
get(Finder finder)258         public T get(Finder finder) {
259             return root.get(finder);
260         }
261 
262         @Override
getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)263         public List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo) {
264             List<RTNode> list = new ArrayList<>();
265             List<T> retList = new ArrayList<>();
266 
267             root.addToList(pattern, context, list);
268             Collections.sort(list, rankComparator);
269 
270             boolean isFirst = true;
271             if (firstInfo != null && !list.isEmpty()) {
272                 RTNode firstNode = list.get(0);
273                 if (firstNode._info != null) {
274                     firstInfo.value = firstNode._info.value;
275                 }
276             }
277 
278             for (RTNode n : list) {
279                 if (isFirst) {
280                     firstInfo.value = n._info.value;
281                     isFirst = false;
282                 }
283             }
284 
285             for (RTNode n : list) {
286                 retList.add(n._val);
287                 if (matcherList != null) {
288                     matcherList.add(n._finder);
289                 }
290             }
291 
292             return retList;
293         }
294 
295         @Override
get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)296         public T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound) {
297             List<Finder> matcherList = new ArrayList<>();
298             Output<String[]> firstInfo = new Output<>();
299             List<T> matches = getAll(pattern, context, matcherList, firstInfo); //need to get whole list because we want value that was entered first
300             if (arguments != null) {
301 //               arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null;
302                 arguments.value = firstInfo.value;
303 //               arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null;
304                 arguments.value = firstInfo.value;
305             }
306             if (matcherFound != null) {
307                 matcherFound.value = (matcherList.size() > 0) ? matcherList.get(0) : null;
308             }
309             return (matches.size() > 0) ? matches.get(0) : null;
310         }
311 
312         @Override
entrySet()313         public Set<Entry<Finder, T>> entrySet() {
314             LinkedHashMap<Finder, T> ret = new LinkedHashMap<>();
315             TreeSet<RTNode> s = new TreeSet<>(rankComparator);
316             root.addToEntrySet(s);
317 
318             for (RTNode n : s) {
319                 ret.put(n._finder, n._val);
320             }
321 
322             return ret.entrySet();
323         }
324 
325         public class RTNode extends NodeBase<T> {
326 //            Finder _finder;
327 //            T _val;
328             List<RTNode> _children = new ArrayList<>();
329             int _rank = -1; //rank -1 means the node was not inserted, but only used for structural purposes
330 
331             //constructor for regular nodes with a Finder
RTNode(Finder finder, T val, int rank)332             public RTNode(Finder finder, T val, int rank) {
333                 super(finder, val);
334 //                _finder = finder;
335 //                _val = val;
336                 _rank = rank;
337             }
338 
339             //constructors for nodes without a Finder
RTNode(String key, T val)340             public RTNode(String key, T val) {
341                 super(new RegexFinder(key), val);
342 //                _finder = new RegexFinder(key);
343 //                _val = val;
344 //                _rank = -1;
345                 _info = new Info();
346             }
347 
put(RTNode node)348             public void put(RTNode node) {
349                 if (_children.size() == 0) {
350                     _children.add(node);
351                 } else {
352                     String maxSimilarChars = ""; //most similar characters up to the last similar directory
353                     int insertIndex = 0;
354                     for (int i = 0; i < _children.size(); i++) {
355                         RTNode child = _children.get(i);
356                         String childFinderPattern = child._finder.toString();
357                         if (childFinderPattern.length() > 0 && childFinderPattern.charAt(childFinderPattern.length() - 1) == '$') {
358                             childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 1); //takes into account the added "$"
359                         } else if (child._rank == -1) {
360                             childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 2); //takes into account the added ".*"
361                         }
362 
363                         //check if child has the same Finder as node to insert, then replace it
364                         if (node._finder.equals(child._finder)) {
365                             child._finder = node._finder;
366                             child._val = node._val;
367                             if (child._rank == -1) {
368                                 child._rank = node._rank;
369                             } else {
370                                 _size--;
371                             }
372                             return;
373                         }
374 
375                         //check if child is the parent of node
376                         if (child._rank == -1 && node._finder.toString().startsWith(childFinderPattern)) {
377                             child.put(node);
378                             return;
379                         }
380 
381                         //if not parent then check if candidate for most similar RTNode
382                         String gcp = greatestCommonPrefix(childFinderPattern, node._finder.toString());
383                         gcp = removeExtraChars(gcp);
384                         if (gcp.length() > maxSimilarChars.length()) {
385                             maxSimilarChars = gcp;
386                             insertIndex = i;
387                         }
388                     }
389 
390                     String finderPattern = this._finder.toString();
391                     if (finderPattern.length() > 0 && finderPattern.charAt(finderPattern.length() - 1) == '$') {
392                         finderPattern = finderPattern.substring(0, finderPattern.length() - 1); //takes into account the added "$"
393                     } else if (!(finderPattern.equals("")) && this._rank == -1) {
394                         finderPattern = finderPattern.substring(0, finderPattern.length() - 2); //takes into account the added ".*"
395                     }
396 
397                     if ((maxSimilarChars).equals(finderPattern)) { //add under this if no similar children
398                         _children.add(node);
399                     } else {
400                         //create the common parent of the chosen candidate above and node, then add to the insert index
401                         RTNode newParent = new RTNode(maxSimilarChars + ".*", null);
402                         newParent._children.add(this._children.get(insertIndex));
403                         newParent._children.add(node);
404                         this._children.remove(insertIndex);
405                         this._children.add(insertIndex, newParent);
406                     }
407                 }
408             }
409 
410             //takes a string in a directory regex form and removes all characters up to the last full directory
removeExtraChars(String input)411             private String removeExtraChars(String input) {
412                 String ret = input.substring(0, Math.max(0, input.lastIndexOf('/')));
413                 while ((ret.lastIndexOf('(') != -1 && ret.lastIndexOf('(') > ret.lastIndexOf(')')) ||
414                     (ret.lastIndexOf('[') != -1 && ret.lastIndexOf('[') > ret.lastIndexOf(']')) ||
415                     (ret.lastIndexOf('{') != -1 && ret.lastIndexOf('{') > ret.lastIndexOf('}'))) {
416                     ret = ret.substring(0, Math.max(0, ret.lastIndexOf('/')));
417                 }
418                 return ret;
419             }
420 
421             //traverse tree to get value
get(Finder finder)422             public T get(Finder finder) {
423                 T ret = null; //return value
424 
425                 if (_children.size() == 0) {
426                     return null;
427                 } else {
428                     for (RTNode child : _children) {
429 
430                         //check if child is the node
431                         if (child._rank != -1 && finder.equals(child._finder)) {
432                             return child._val;
433                         }
434 
435                         String childFinderPattern = child._finder.toString();
436 
437                         if (childFinderPattern.length() > 0 && childFinderPattern.charAt(childFinderPattern.length() - 1) == '$') {
438                             childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 1); //takes into account the added "$"
439                         } else if (child._rank == -1) {
440                             childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 2); //takes into account the added ".*"
441                         }
442 
443                         //check if child is the parent of node
444                         if (finder.toString().startsWith(childFinderPattern)) {
445                             ret = child.get(finder);
446                             if (ret != null) {
447                                 break;
448                             }
449                         }
450                     }
451 
452                     return ret;
453                 }
454             }
455 
456             //traverse tree to get an entry set
addToEntrySet(TreeSet<RTNode> s)457             public void addToEntrySet(TreeSet<RTNode> s) {
458                 if (_children.size() == 0) {
459                     return;
460                 } else {
461                     for (RTNode child : _children) {
462                         if (child._rank != -1) {
463                             s.add(child);
464                         }
465                         child.addToEntrySet(s);
466                     }
467                 }
468             }
469 
470             //traverse tree to get list of all values who's key matcher matches pattern
addToList(String pattern, Object context, List<RTNode> list)471             public void addToList(String pattern, Object context, List<RTNode> list) {
472                 if (_children.size() == 0) {
473                     return;
474                 } else {
475                     Info firstInfo = new Info();
476                     for (RTNode child : _children) {
477                         boolean found;
478                         synchronized (child._finder) {
479                             found = child._finder.find(pattern, context, firstInfo);
480                         }
481 
482                         //check if child matches pattern
483                         if (found) {
484                             if (child._rank != -1) {
485                                 list.add(child);
486                             }
487                             // if this node's info value is unset, set it to the result of the
488                             // lookup
489 //                            if (child._info!=null && child._info.value==null) {
490                             if (child._info != null) {
491                                 // set the value to the result of the last find
492                                 child._info.value = firstInfo.value;
493                             } else {
494                                 // for some reason, child._info is null, so simply initialize it.
495                                 child._info = new Info();
496                                 // set the value to the result of the last find
497                                 child._info.value = firstInfo.value;
498                             }
499                             //check if child is the parent of node then enter that node
500                             child.addToList(pattern, context, list);
501                         }
502                     }
503                 }
504             }
505 
506             @Override
toString()507             public String toString() {
508                 return toString("", new StringBuilder()).toString();
509             }
510 
toString(String prefix, StringBuilder result)511             private StringBuilder toString(String prefix, StringBuilder result) {
512                 result.append(prefix).append(this._finder.toString()).append("\n");
513                 for (RegexTree<T>.RTNode child : _children) {
514                     child.toString(prefix + "\t", result);
515                 }
516                 return result;
517             }
518 
519             //greatest common prefix between two strings
greatestCommonPrefix(String a, String b)520             public String greatestCommonPrefix(String a, String b) {
521                 int minLength = Math.min(a.length(), b.length());
522                 for (int i = 0; i < minLength; i++) {
523                     if (a.charAt(i) != b.charAt(i)) {
524                         return a.substring(0, i);
525                     }
526                 }
527                 return a.substring(0, minLength);
528             }
529         }
530 
531         class RTNodeRankComparator implements Comparator<RTNode> {
532             @Override
compare(RTNode a, RTNode b)533             public int compare(RTNode a, RTNode b) {
534                 if (a == b) {
535                     return 0;
536                 } else if (a == null) {
537                     return -1;
538                 } else if (b == null) {
539                     return 1;
540                 } else if (a._rank == b._rank) {
541                     return 0;
542                 } else if (a._rank > b._rank) {
543                     return 1;
544                 } else {
545                     return -1;
546                 }
547             }
548         }
549 
550         @Override
toString()551         public String toString() {
552             return root.toString();
553         }
554     }
555 
556     private static class StarPatternMap<T> implements StorageInterfaceBase<T> {
557         private Map<String, List<SPNode>> _spmap;
558         private int _size = 0;
559 
StarPatternMap()560         public StarPatternMap() {
561             _spmap = new HashMap<>();
562 //            _size = 0;
563         }
564 
565         @Override
size()566         public int size() {
567             return _size;
568         }
569 
570         @Override
put(Finder pattern, T value)571         public void put(Finder pattern, T value) {
572             //System.out.println("pattern.toString() is => "+pattern.toString());
573             String starPattern = pathStarrer.transform2(pattern.toString().replaceAll("\\(\\[\\^\"\\]\\*\\)", "*"));
574             //System.out.println("Putting => "+starPattern);
575             List<SPNode> candidates = _spmap.get(starPattern);
576             if (candidates == null) {
577                 candidates = new ArrayList<>();
578             }
579             SPNode newNode = new SPNode(pattern, value);
580             candidates.add(newNode);
581             _spmap.put(starPattern, candidates);
582             _size++;
583         }
584 
585         @Override
get(Finder finder)586         public T get(Finder finder) {
587             String starPattern = pathStarrer.transform2(finder.toString());
588             List<SPNode> candidates = _spmap.get(starPattern);
589             if (candidates == null) {
590                 return null;
591             }
592             for (SPNode cand : candidates) {
593                 if (cand._finder.equals(finder)) {
594                     return cand._val;
595                 }
596             }
597             return null;
598         }
599 
600         @Override
getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)601         public List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo) {
602             List<SPNode> list = new ArrayList<>();
603             List<T> retList = new ArrayList<>();
604 
605             String starPattern = pathStarrer.transform2(pattern);
606             List<SPNode> candidates = _spmap.get(starPattern);
607             if (candidates == null) {
608                 return retList;
609             }
610             for (SPNode cand : candidates) {
611                 Info info = new Info();
612                 if (cand._finder.find(pattern, context, info)) {
613                     list.add(cand);
614                     if (firstInfo != null) {
615                         firstInfo.value = info.value;
616                     }
617                 }
618             }
619 
620             for (SPNode n : list) {
621                 retList.add(n._val);
622                 if (matcherList != null) {
623                     matcherList.add(n._finder);
624                 }
625             }
626 
627             return retList;
628         }
629 
630         @Override
get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)631         public T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound) {
632             List<Finder> matcherList = new ArrayList<>();
633             Output<String[]> firstInfo = new Output<>();
634             List<T> matches = getAll(pattern, context, matcherList, firstInfo); //need to get whole list because we want value that was entered first
635             if (arguments != null && firstInfo.value != null) {
636 //                arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null;
637                 arguments.value = matcherList.isEmpty() ? null : firstInfo.value;
638             }
639             if (matcherFound != null) {
640                 matcherFound.value = (matcherList.size() > 0) ? matcherList.get(0) : null;
641             }
642             return (matches.size() > 0) ? matches.get(0) : null;
643         }
644 
645         @Override
entrySet()646         public Set<Entry<Finder, T>> entrySet() {
647             LinkedHashMap<Finder, T> ret = new LinkedHashMap<>();
648 
649             for (Entry<String, List<SPNode>> entry : _spmap.entrySet()) {
650                 List<SPNode> candidates = entry.getValue();
651                 for (SPNode node : candidates) {
652                     ret.put(node._finder, node._val);
653                 }
654             }
655             return ret.entrySet();
656         }
657 
658         /**
659          * A Node of a StarPatternMap
660          * @author ribnitz
661          *
662          */
663         public class SPNode extends NodeBase<T> {
664 //            Finder _finder;
665 //            T _val;
666 
SPNode(Finder finder, T val)667             public SPNode(Finder finder, T val) {
668 //                _finder = finder;
669 //                _val = val;
670                 super(finder, val);
671             }
672 
673             @Override
toString()674             public String toString() {
675                 return this._finder.toString();
676             }
677         }
678     }
679 
680     /**
681      * The basic class of an information node, featuring a Finder, a value and an Info
682      *
683      * @author ribnitz
684      *
685      * @param <T>
686      */
687     private static class NodeBase<T> {
688         Finder _finder;
689         T _val;
690         Info _info = new Info();
691 
NodeBase(Finder finder, T value)692         public NodeBase(Finder finder, T value) {
693             this._finder = finder;
694             this._val = value;
695         }
696     }
697 
698     public static Transform<String, RegexFinder> RegexFinderTransform = new Transform<String, RegexFinder>() {
699         @Override
700         public RegexFinder transform(String source) {
701             return new RegexFinder(source);
702         }
703     };
704 
705     /**
706      * The same as a RegexFinderTransform, except that [@ is changed to \[@, and ^ is added before //. To work better
707      * with XPaths.
708      */
709     public static Transform<String, RegexFinder> RegexFinderTransformPath = new Transform<String, RegexFinder>() {
710         @Override
711         public RegexFinder transform(String source) {
712             final String newSource = source.replace("[@", "\\[@");
713             return new RegexFinder(newSource.startsWith("//")
714                 ? "^" + newSource
715                 : newSource);
716         }
717     };
718 
719     /**
720      * The same as a RegexFinderTransform, except that [@ is changed to \[@, and ^ is added before //, and ' is changed to ".
721      * To work better with XPaths.
722      */
723     public static Transform<String, RegexFinder> RegexFinderTransformPath2 = new Transform<String, RegexFinder>() {
724         @Override
725         public RegexFinder transform(String source) {
726             final String newSource = source.replace("[@", "\\[@").replace('\'', '"');
727             return new RegexFinder(newSource.startsWith("//")
728                 ? "^" + newSource
729                 : newSource);
730         }
731     };
732 
733     /**
734      * Allows for merging items of the same type.
735      *
736      * @param <T>
737      */
738     public interface Merger<T> {
merge(T a, T into)739         T merge(T a, T into);
740     }
741 
742     /**
743      * Returns the result of a regex lookup.
744      *
745      * @param source
746      * @return
747      */
get(String source)748     public final T get(String source) {
749         return get(source, null, null, null, null);
750     }
751 
752     /**
753      * Returns the result of a regex lookup, with the group arguments that matched.
754      *
755      * @param source
756      * @param context
757      *            TODO
758      * @return
759      */
get(String source, Object context, Output<String[]> arguments)760     public T get(String source, Object context, Output<String[]> arguments) {
761         return get(source, context, arguments, null, null);
762     }
763 
764     /**
765      * Returns the result of a regex lookup, with the group arguments that matched. Supplies failure cases for
766      * debugging.
767      *
768      * @param source
769      * @param context
770      * @return
771      */
get(String source, Object context, Output<String[]> arguments, Output<Finder> matcherFound, List<String> failures)772     public T get(String source, Object context, Output<String[]> arguments,
773         Output<Finder> matcherFound, List<String> failures) {
774 
775         if (_lookupType == RegexLookup.LookupType.STAR_PATTERN_LOOKUP) {
776             //   T ret = SPEntries.get(source, context, arguments, matcherFound);
777             T ret = storage.get(source, context, arguments, matcherFound);
778             if (ret != null) {
779                 return ret;
780             }
781 
782             if (failures != null) {
783                 for (Map.Entry<Finder, T> entry : storage.entrySet()) {
784 //                for (Map.Entry<Finder, T> entry : SPEntries.entrySet()) {
785                     Finder matcher = entry.getKey();
786                     synchronized (matcher) {
787                         int failPoint = matcher.getFailPoint(source);
788                         String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
789                             + matcher.toString();
790                         failures.add(show);
791                     }
792                 }
793             }
794         } else if (_lookupType == RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP) {
795             //      T ret = RTEntries.get(source, context, arguments, matcherFound);
796             T ret = storage.get(source, context, arguments, matcherFound);
797             if (ret != null) {
798                 return ret;
799             }
800 
801             if (failures != null) {
802                 for (Map.Entry<Finder, T> entry : storage.entrySet()) {
803 //                for (Map.Entry<Finder, T> entry : RTEntries.entrySet()) {
804                     Finder matcher = entry.getKey();
805                     synchronized (matcher) {
806                         int failPoint = matcher.getFailPoint(source);
807                         String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
808                             + matcher.toString();
809                         failures.add(show);
810                     }
811                 }
812             }
813         } else {
814             //slow but versatile implementation
815             for (Map.Entry<Finder, T> entry : MEntries.entrySet()) {
816                 Finder matcher = entry.getKey();
817                 synchronized (matcher) {
818                     Info firstInfo = new Info();
819                     if (matcher.find(source, context, firstInfo)) {
820                         if (arguments != null) {
821 //                            arguments.value = matcher.getInfo();
822                             arguments.value = firstInfo.value;
823                         }
824                         if (matcherFound != null) {
825                             matcherFound.value = matcher;
826                         }
827                         return entry.getValue();
828                     } else if (failures != null) {
829                         int failPoint = matcher.getFailPoint(source);
830                         String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
831                             + matcher.toString();
832                         failures.add(show);
833                     }
834                 }
835             }
836         }
837 
838         // not really necessary, but makes debugging easier.
839         if (arguments != null) {
840             arguments.value = null;
841         }
842         if (matcherFound != null) {
843             matcherFound.value = null;
844         }
845         return null;
846     }
847 
848     /**
849      * Returns all results of a regex lookup, with the group arguments that matched. Supplies failure cases for
850      * debugging.
851      *
852      * @param source
853      * @param context
854      *            TODO
855      * @return
856      */
getAll(String source, Object context, List<Finder> matcherList, List<String> failures)857     public List<T> getAll(String source, Object context, List<Finder> matcherList, List<String> failures) {
858         if (_lookupType == RegexLookup.LookupType.STAR_PATTERN_LOOKUP) {
859             Output<String[]> firstInfo = new Output<>();
860 //            List<T> matches = SPEntries.getAll(source, context, matcherList,firstInfo);
861             List<T> matches = storage.getAll(source, context, matcherList, firstInfo);
862             if (matches != null) {
863                 return matches;
864             }
865 
866             if (failures != null) {
867                 for (Map.Entry<Finder, T> entry : storage.entrySet()) {
868 //                for (Map.Entry<Finder, T> entry : SPEntries.entrySet()) {
869                     Finder matcher = entry.getKey();
870                     synchronized (matcher) {
871                         int failPoint = matcher.getFailPoint(source);
872                         String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
873                             + matcher.toString();
874                         failures.add(show);
875                     }
876                 }
877             }
878             return null;
879         } else if (_lookupType == RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP) {
880             Output<String[]> info = new Output<>();
881 //            List<T> matches = RTEntries.getAll(source, context, matcherList,info);
882             List<T> matches = storage.getAll(source, context, matcherList, info);
883             if (matches != null) {
884                 return matches;
885             }
886 
887             if (failures != null) {
888                 for (Map.Entry<Finder, T> entry : storage.entrySet()) {
889 //                for (Map.Entry<Finder, T> entry : RTEntries.entrySet()) {
890                     Finder matcher = entry.getKey();
891                     synchronized (matcher) {
892                         int failPoint = matcher.getFailPoint(source);
893                         String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
894                             + matcher.toString();
895                         failures.add(show);
896                     }
897                 }
898             }
899             return null;
900         } else {
901             //slow but versatile implementation
902             List<T> matches = new ArrayList<>();
903             for (Map.Entry<Finder, T> entry : MEntries.entrySet()) {
904                 Finder matcher = entry.getKey();
905                 Info firstInfo = new Info();
906                 if (matcher.find(source, context, firstInfo)) {
907                     if (matcherList != null) {
908                         matcherList.add(matcher);
909                     }
910                     matches.add(entry.getValue());
911                 } else if (failures != null) {
912                     int failPoint = matcher.getFailPoint(source);
913                     String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
914                         + matcher.toString();
915                     failures.add(show);
916                 }
917             }
918             return matches;
919         }
920     }
921 
922     /**
923      * Find the patterns that haven't been matched. Requires the caller to collect the patterns that have, using
924      * matcherFound.
925      *
926      * @return outputUnmatched
927      */
getUnmatchedPatterns(Set<String> matched, Map<String, T> outputUnmatched)928     public Map<String, T> getUnmatchedPatterns(Set<String> matched, Map<String, T> outputUnmatched) {
929         outputUnmatched.clear();
930 
931         Set<Map.Entry<Finder, T>> entrySet;
932         switch (_lookupType) {
933         case STAR_PATTERN_LOOKUP:
934 //            entrySet = SPEntries.entrySet();
935             entrySet = storage.entrySet();
936             break;
937         case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
938 //            entrySet = RTEntries.entrySet();
939             entrySet = storage.entrySet();
940             break;
941         default:
942             entrySet = MEntries.entrySet();
943             break;
944         }
945 
946         for (Map.Entry<Finder, T> entry : entrySet) {
947             String pattern = entry.getKey().toString();
948             if (!matched.contains(pattern)) {
949                 outputUnmatched.put(pattern, entry.getValue());
950             }
951         }
952         return outputUnmatched;
953     }
954 
955     /**
956      * Create a RegexLookup. It will take a list of key/value pairs, where the key is a regex pattern and the value is
957      * what gets returned.
958      *
959      * @param patternTransform
960      *            Used to transform string patterns into a Pattern. Can be used to process replacements (like
961      *            variables).
962      * @param valueTransform
963      *            Used to transform string values into another form.
964      * @param valueMerger
965      *            Used to merge values with the same key.
966      */
of(Transform<String, Finder> patternTransform, Transform<String, T> valueTransform, Merger<T> valueMerger)967     public static <T, U> RegexLookup<T> of(Transform<String, Finder> patternTransform,
968         Transform<String, T> valueTransform, Merger<T> valueMerger) {
969         return new RegexLookup<T>().setPatternTransform(patternTransform).setValueTransform(valueTransform)
970             .setValueMerger(valueMerger);
971     }
972 
of(Transform<String, T> valueTransform)973     public static <T> RegexLookup<T> of(Transform<String, T> valueTransform) {
974         return new RegexLookup<T>().setValueTransform(valueTransform).setPatternTransform(RegexFinderTransform);
975     }
976 
of()977     public static <T> RegexLookup<T> of() {
978         return new RegexLookup<T>().setPatternTransform(RegexFinderTransform);
979     }
980 
981     /**
982      * @deprecated Use {@link #of(LookupType,Transform)} instead
983      */
984     @Deprecated
of(LookupType lookupType)985     public static <T> RegexLookup<T> of(LookupType lookupType) {
986         return of(lookupType, RegexFinderTransform);
987     }
988 
of(LookupType lookupType, Transform<String, RegexFinder> transform)989     public static <T> RegexLookup<T> of(LookupType lookupType, Transform<String, RegexFinder> transform) {
990         return new RegexLookup<T>(lookupType).setPatternTransform(transform);
991     }
992 
setValueTransform(Transform<String, ? extends T> valueTransform)993     public RegexLookup<T> setValueTransform(Transform<String, ? extends T> valueTransform) {
994         this.valueTransform = valueTransform;
995         return this;
996     }
997 
setPatternTransform(Transform<String, ? extends Finder> patternTransform)998     public RegexLookup<T> setPatternTransform(Transform<String, ? extends Finder> patternTransform) {
999         this.patternTransform = patternTransform != null ? patternTransform : RegexFinderTransform;
1000         return this;
1001     }
1002 
setValueMerger(Merger<T> valueMerger)1003     public RegexLookup<T> setValueMerger(Merger<T> valueMerger) {
1004         this.valueMerger = valueMerger;
1005         return this;
1006     }
1007 
1008     /**
1009      * Load a RegexLookup from a file. Opens a file relative to the class, and adds lines separated by "; ". Lines
1010      * starting with # are comments.
1011      */
loadFromFile(Class<?> baseClass, String filename)1012     public RegexLookup<T> loadFromFile(Class<?> baseClass, String filename) {
1013         RegexFileParser parser = setupRegexFileParser();
1014         parser.parse(baseClass, filename);
1015         return this;
1016     }
1017 
1018     /**
1019      * Load a RegexLookup from a list of strings, as if they came from a file. See loadFromFile
1020      */
loadFromString(String source)1021     public RegexLookup<T> loadFromString(String source) {
1022         RegexFileParser parser = setupRegexFileParser();
1023         parser.parseStrings("string", Splitter.on('\n').split(source));
1024         return this;
1025     }
1026 
setupRegexFileParser()1027     private RegexFileParser setupRegexFileParser() {
1028         RegexFileParser parser = new RegexFileParser();
1029         parser.setLineParser(new RegexLineParser() {
1030             @Override
1031             public void parse(String line) {
1032                 int pos = line.indexOf("; ");
1033                 if (pos < 0) {
1034                     throw new IllegalArgumentException("Illegal line, doesn't contain semicolon: " + line);
1035                 }
1036                 String source = line.substring(0, pos).trim();
1037                 String target = line.substring(pos + 2).trim();
1038                 try {
1039                     @SuppressWarnings("unchecked")
1040                     T result = valueTransform == null ? (T) target : valueTransform.transform(target);
1041                     add(source, result);
1042                 } catch (Exception e) {
1043                     throw new IllegalArgumentException("Failed to add <" + source + "> => <" + target + ">", e);
1044                 }
1045             }
1046         });
1047         parser.setVariableProcessor(new VariableProcessor() {
1048             @Override
1049             public void add(String variable, String variableName) {
1050                 addVariable(variable, variableName);
1051             }
1052 
1053             @Override
1054             public String replace(String str) {
1055                 return variables.replace(str);
1056             }
1057         });
1058         return parser;
1059     }
1060 
addVariable(String variable, String variableValue)1061     public RegexLookup<T> addVariable(String variable, String variableValue) {
1062         if (!variable.startsWith("%")) {
1063             throw new IllegalArgumentException("Variables must start with %");
1064         }
1065         variables.add(variable.trim(), variableValue.trim());
1066         return this;
1067     }
1068 
1069     /**
1070      * Add a pattern/value pair.
1071      *
1072      * @param stringPattern
1073      * @param target
1074      * @return this, for chaining
1075      */
add(String stringPattern, T target)1076     public RegexLookup<T> add(String stringPattern, T target) {
1077         if (stringPattern.contains("%")) {
1078             stringPattern = variables.replace(stringPattern);
1079         }
1080         Finder pattern0 = patternTransform.transform(stringPattern);
1081         return add(pattern0, target);
1082     }
1083 
1084     /**
1085      * Add a pattern/value pair.
1086      *
1087      * @param pattern
1088      * @param target
1089      * @return this, for chaining
1090      */
add(Finder pattern, T target)1091     public RegexLookup<T> add(Finder pattern, T target) {
1092         if (!allowNull && target == null) {
1093             throw new NullPointerException("null disallowed, unless allowNull(true) is called.");
1094         }
1095 
1096         T old;
1097         switch (_lookupType) {
1098         case STAR_PATTERN_LOOKUP: // fallthrough
1099         case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1100             old = storage.get(pattern);
1101 //            old = SPEntries.get(pattern);
1102             break;
1103 //        case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1104 //            old = storage.get(pattern);
1105 //            old = RTEntries.get(pattern);
1106 //            break;
1107         default:
1108             old = MEntries.get(pattern);
1109             break;
1110         }
1111 
1112         if (old == null) {
1113             switch (_lookupType) {
1114             case STAR_PATTERN_LOOKUP: // fallthrough
1115             case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1116                 storage.put(pattern, target);
1117 //                SPEntries.put(pattern, target);
1118                 break;
1119 //            case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1120 //                storage.put(pattern, target);
1121 //                RTEntries.put(pattern, target);
1122 //                break;
1123             default:
1124                 MEntries.put(pattern, target);
1125                 break;
1126             }
1127         } else if (valueMerger != null) {
1128             valueMerger.merge(target, old);
1129         } else {
1130             throw new IllegalArgumentException("Duplicate matcher without Merger defined " + pattern + "; old: " + old
1131                 + "; new: " + target);
1132         }
1133         return this;
1134     }
1135 
1136     @Override
iterator()1137     public Iterator<Map.Entry<Finder, T>> iterator() {
1138         switch (_lookupType) {
1139         case STAR_PATTERN_LOOKUP: // fall through
1140         case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1141 //            return Collections.unmodifiableCollection(SPEntries.entrySet()).iterator();
1142             return Collections.unmodifiableCollection(storage.entrySet()).iterator();
1143 //        case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1144 //            return Collections.unmodifiableCollection(RTEntries.entrySet()).iterator();
1145 //            return Collections.unmodifiableCollection(storage.entrySet()).iterator();
1146         default:
1147             return Collections.unmodifiableCollection(MEntries.entrySet()).iterator();
1148         }
1149     }
1150 
replace(String lookup, String... arguments)1151     public static String replace(String lookup, String... arguments) {
1152         StringBuilder result = new StringBuilder();
1153         int last = 0;
1154         while (true) {
1155             int pos = lookup.indexOf("$", last);
1156             if (pos < 0) {
1157                 result.append(lookup.substring(last, lookup.length()));
1158                 break;
1159             }
1160             result.append(lookup.substring(last, pos));
1161             final int arg = lookup.charAt(pos + 1) - '0';
1162             try {
1163                 result.append(arguments[arg]);
1164             } catch (Exception e) {
1165                 throw new IllegalArgumentException("Replacing $" + arg + " in <" + lookup
1166                     + ">, but too few arguments supplied.");
1167             }
1168             last = pos + 2;
1169         }
1170         return result.toString();
1171     }
1172 
1173     /**
1174      * @return the number of entries
1175      */
size()1176     public int size() {
1177         switch (_lookupType) {
1178         case STAR_PATTERN_LOOKUP: // fall through
1179         case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1180 //            return SPEntries.size();
1181             return storage.size();
1182 //        case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1183 //            return storage.size();
1184 //            return RTEntries.size();
1185         default:
1186             return MEntries.size();
1187         }
1188     }
1189 
1190     @Override
toString()1191     public String toString() {
1192         return storage.toString();
1193     }
1194 }