1 package org.unicode.cldr.util;
2 
3 import java.util.ArrayList;
4 import java.util.Collections;
5 import java.util.Comparator;
6 import java.util.HashMap;
7 import java.util.Iterator;
8 import java.util.LinkedHashMap;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.Map.Entry;
12 import java.util.Set;
13 import java.util.TreeSet;
14 import java.util.regex.Matcher;
15 import java.util.regex.Pattern;
16 
17 import org.unicode.cldr.util.CldrUtility.VariableReplacer;
18 import org.unicode.cldr.util.RegexFileParser.RegexLineParser;
19 import org.unicode.cldr.util.RegexFileParser.VariableProcessor;
20 import org.unicode.cldr.util.RegexLookup.Finder;
21 import org.unicode.cldr.util.RegexLookup.Finder.Info;
22 
23 import com.ibm.icu.text.Transform;
24 import com.ibm.icu.util.Output;
25 
26 /**
27  * Lookup items according to a set of regex patterns. Returns the value according to the first pattern that matches. Not
28  * thread-safe.
29  *
30  * @param <T>
31  */
32 public class RegexLookup<T> implements Iterable<Map.Entry<Finder, T>> {
33     private VariableReplacer variables = new VariableReplacer();
34     private StorageInterfaceBase<T> storage;
35 //    private StarPatternMap<T> SPEntries;
36 //    private RegexTree<T> RTEntries;
37     private Map<Finder, T> MEntries;
38     private Transform<String, ? extends Finder> patternTransform = RegexFinderTransform;
39     private Transform<String, ? extends T> valueTransform;
40     private Merger<T> valueMerger;
41     private final boolean allowNull = false;
42     private static PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*");
43 
44     public enum LookupType {
45         STAR_PATTERN_LOOKUP, OPTIMIZED_DIRECTORY_PATTERN_LOOKUP, STANDARD
46     };
47 
48     private LookupType _lookupType;
49 
50     /*
51      * STAR_PATTERN_LOOKUP
52      *
53      * When true, RegexLookup can match regex's even faster than the OPTIMIZED_DIRECTORY_PATTERN_LOOKUP below.
54      * It requires some additional structure, such that the only regular expression constructs such as (a|b) occur
55      * in attributes, so that the star pattern for a given path can match the star pattern of a given regular expression,
56      * thus greatly reducing the number of actual regex matches that need to occur.
57      */
58 
59     /*
60      * OPTIMIZED_DIRECTORY_PATTERN_LOOKUP
61      *
62      * When true, RegexLookup can match regex's in O(log(n)) time, where n is the number of regex's stored.
63      * This is provided that all regex patterns follow a directory based format and all directories are separated by a forward slash '/'
64      * for example: ^//ldml/dates/calendars/calendar[@type="([^"']++)"]/alias[@source="locale"][@path="../calendar[@type='([^"']++)']"]
65      *
66      * When false, RegexLookup will match regex's in O(n) time, where n is the number of regex's stored.
67      * However regex's no longer need to follow any specific format (Slower but more versatile).
68      */
69 
RegexLookup(LookupType type)70     public RegexLookup(LookupType type) {
71         _lookupType = type;
72         switch (type) {
73         case STAR_PATTERN_LOOKUP:
74             //   SPEntries = new StarPatternMap<T>();
75             storage = new StarPatternMap<T>();
76             break;
77         case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
78             //   RTEntries = new RegexTree<T>();
79             storage = new RegexTree<T>();
80             break;
81         default:
82             MEntries = new LinkedHashMap<Finder, T>();
83             break;
84         }
85     }
86 
RegexLookup()87     public RegexLookup() {
88         this(LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP);
89 //        _lookupType = RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP;
90 //        RTEntries = new RegexTree<T>();
91     }
92 
93     public abstract static class Finder {
94         public static class Info {
95             public String[] value;
96         }
97 
98         //   abstract public String[] getInfo();
99 
100         // abstract public boolean find(String item, Object context);
101 
find(String item, Object context, Info info)102         abstract public boolean find(String item, Object context, Info info);
103 
matches(String item, Object context, Info info)104         abstract public boolean matches(String item, Object context, Info info);
105 
getFailPoint(String source)106         public int getFailPoint(String source) {
107             return -1;
108         }
109         // must also define toString
110     }
111 
112     public static class RegexFinder extends Finder {
113         /**
114          * The matcher used by this RegexFinder
115          */
116         private final Matcher matcher;
117 
118         /**
119          * The Pattern used by this RegexFinder
120          */
121         protected final Pattern pattern;
122 
RegexFinder(String pattern)123         public RegexFinder(String pattern) {
124             this.pattern = Pattern.compile(pattern, Pattern.COMMENTS);
125             matcher = this.pattern.matcher("");
126         }
127 
128         /**
129          * Call Matches on the pattern, returning additional information in the Info field,
130          * if it is non null
131          */
matches(String item, Object context, Info info)132         public boolean matches(String item, Object context, Info info) {
133             synchronized (matcher) {
134                 try {
135                     boolean result = matcher.reset(item).matches();
136                     extractInfo(info, result);
137                     return result;
138                 } catch (StringIndexOutOfBoundsException e) {
139                     // We don't know what causes this error (cldrbug 5051) so
140                     // make the exception message more detailed.
141                     throw new IllegalArgumentException("Matching error caused by pattern: ["
142                         + matcher.toString() + "] on text: [" + item + "]", e);
143                 }
144             }
145         }
146 
147         /**
148          * Extract match related information into  the info field, if result is true, and info
149          * is not null.
150          * @param info
151          * @param result
152          */
extractInfo(Info info, boolean result)153         private void extractInfo(Info info, boolean result) {
154             if (result && info != null) {
155                 int limit = matcher.groupCount() + 1;
156                 String[] value = new String[limit];
157                 for (int i = 0; i < limit; ++i) {
158                     value[i] = matcher.group(i);
159                 }
160                 info.value = value;
161             }
162         }
163 
164         /**
165          * Call find() on the pattern, returning additional information in the info field,
166          * if it is non-null
167          */
find(String item, Object context, Info info)168         public boolean find(String item, Object context, Info info) {
169             synchronized (matcher) {
170                 try {
171                     boolean result = matcher.reset(item).find();
172                     extractInfo(info, result);
173                     return result;
174                 } catch (StringIndexOutOfBoundsException e) {
175                     // We don't know what causes this error (cldrbug 5051) so
176                     // make the exception message more detailed.
177                     throw new IllegalArgumentException("Matching error caused by pattern: ["
178                         + matcher.toString() + "] on text: [" + item + "]", e);
179                 }
180             }
181         }
182 
toString()183         public String toString() {
184             // Use pattern here, to avoid having to synchronize on matcher
185             return pattern.pattern();
186         }
187 
188         @Override
equals(Object obj)189         public boolean equals(Object obj) {
190             if (obj == null) {
191                 return false;
192             } else {
193                 return toString().equals(obj.toString());
194             }
195         }
196 
197         @Override
hashCode()198         public int hashCode() {
199             return toString().hashCode();
200         }
201 
202         @Override
getFailPoint(String source)203         public int getFailPoint(String source) {
204             synchronized (matcher) {
205                 return RegexUtilities.findMismatch(matcher, source);
206             }
207         }
208     }
209 
210     private static interface StorageInterfaceBase<T> {
entrySet()211         Set<Entry<Finder, T>> entrySet();
212 
get(Finder finder)213         T get(Finder finder);
214 
get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)215         T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound);
216 
getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)217         List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo);
218 
put(Finder pattern, T value)219         void put(Finder pattern, T value);
220 
size()221         int size();
222     }
223 
224 //    private static class FinderWithInfo {
225 //        Finder _finder;
226 //        Info _info;
227 //        public FinderWithInfo(Finder finder,Info info) {
228 //            _info=info;
229 //            _finder=finder;
230 //        }
231 //    }
232     private static class RegexTree<T> implements StorageInterfaceBase<T> {
233         private RTNode root;
234         private int _size;
235         private RTNodeRankComparator rankComparator = new RTNodeRankComparator();
236 
RegexTree()237         public RegexTree() {
238             root = new RTNode("", null);
239             _size = 0;
240         }
241 
242         @Override
size()243         public int size() {
244             return _size;
245         }
246 
247         @Override
put(Finder pattern, T value)248         public void put(Finder pattern, T value) {
249             root.put(new RTNode(pattern, value, _size));
250             _size++;
251         }
252 
253         @Override
get(Finder finder)254         public T get(Finder finder) {
255             return root.get(finder);
256         }
257 
258         @Override
getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)259         public List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo) {
260             List<RTNode> list = new ArrayList<RTNode>();
261             List<T> retList = new ArrayList<T>();
262 
263             root.addToList(pattern, context, list);
264             Collections.sort(list, rankComparator);
265 
266             boolean isFirst = true;
267             if (firstInfo != null && !list.isEmpty()) {
268                 RTNode firstNode = list.get(0);
269                 if (firstNode._info != null) {
270                     firstInfo.value = firstNode._info.value;
271                 }
272             }
273 
274             for (RTNode n : list) {
275                 if (isFirst) {
276                     firstInfo.value = n._info.value;
277                     isFirst = false;
278                 }
279             }
280 
281             for (RTNode n : list) {
282                 retList.add(n._val);
283                 if (matcherList != null) {
284                     matcherList.add(n._finder);
285                 }
286             }
287 
288             return retList;
289         }
290 
get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)291         public T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound) {
292             List<Finder> matcherList = new ArrayList<Finder>();
293             Output<String[]> firstInfo = new Output<>();
294             List<T> matches = getAll(pattern, context, matcherList, firstInfo); //need to get whole list because we want value that was entered first
295             if (arguments != null) {
296 //               arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null;
297                 arguments.value = firstInfo.value;
298 //               arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null;
299                 arguments.value = firstInfo.value;
300             }
301             if (matcherFound != null) {
302                 matcherFound.value = (matcherList.size() > 0) ? matcherList.get(0) : null;
303             }
304             return (matches.size() > 0) ? matches.get(0) : null;
305         }
306 
entrySet()307         public Set<Entry<Finder, T>> entrySet() {
308             LinkedHashMap<Finder, T> ret = new LinkedHashMap<Finder, T>();
309             TreeSet<RTNode> s = new TreeSet<RTNode>(rankComparator);
310             root.addToEntrySet(s);
311 
312             for (RTNode n : s) {
313                 ret.put(n._finder, n._val);
314             }
315 
316             return ret.entrySet();
317         }
318 
319         public class RTNode extends NodeBase<T> {
320 //            Finder _finder;
321 //            T _val;
322             List<RTNode> _children = new ArrayList<RTNode>();
323             int _rank = -1; //rank -1 means the node was not inserted, but only used for structural purposes
324 
325             //constructor for regular nodes with a Finder
RTNode(Finder finder, T val, int rank)326             public RTNode(Finder finder, T val, int rank) {
327                 super(finder, val);
328 //                _finder = finder;
329 //                _val = val;
330                 _rank = rank;
331             }
332 
333             //constructors for nodes without a Finder
RTNode(String key, T val)334             public RTNode(String key, T val) {
335                 super(new RegexFinder(key), val);
336 //                _finder = new RegexFinder(key);
337 //                _val = val;
338 //                _rank = -1;
339                 _info = new Info();
340             }
341 
put(RTNode node)342             public void put(RTNode node) {
343                 if (_children.size() == 0) {
344                     _children.add(node);
345                 } else {
346                     String maxSimilarChars = ""; //most similar characters up to the last similar directory
347                     int insertIndex = 0;
348                     for (int i = 0; i < _children.size(); i++) {
349                         RTNode child = _children.get(i);
350                         String childFinderPattern = child._finder.toString();
351                         if (childFinderPattern.length() > 0 && childFinderPattern.charAt(childFinderPattern.length() - 1) == '$') {
352                             childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 1); //takes into account the added "$"
353                         } else if (child._rank == -1) {
354                             childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 2); //takes into account the added ".*"
355                         }
356 
357                         //check if child has the same Finder as node to insert, then replace it
358                         if (node._finder.equals(child._finder)) {
359                             child._finder = node._finder;
360                             child._val = node._val;
361                             if (child._rank == -1) {
362                                 child._rank = node._rank;
363                             } else {
364                                 _size--;
365                             }
366                             return;
367                         }
368 
369                         //check if child is the parent of node
370                         if (child._rank == -1 && node._finder.toString().startsWith(childFinderPattern)) {
371                             child.put(node);
372                             return;
373                         }
374 
375                         //if not parent then check if candidate for most similar RTNode
376                         String gcp = greatestCommonPrefix(childFinderPattern, node._finder.toString());
377                         gcp = removeExtraChars(gcp);
378                         if (gcp.length() > maxSimilarChars.length()) {
379                             maxSimilarChars = gcp;
380                             insertIndex = i;
381                         }
382                     }
383 
384                     String finderPattern = this._finder.toString();
385                     if (finderPattern.length() > 0 && finderPattern.charAt(finderPattern.length() - 1) == '$') {
386                         finderPattern = finderPattern.substring(0, finderPattern.length() - 1); //takes into account the added "$"
387                     } else if (!(finderPattern.equals("")) && this._rank == -1) {
388                         finderPattern = finderPattern.substring(0, finderPattern.length() - 2); //takes into account the added ".*"
389                     }
390 
391                     if ((maxSimilarChars).equals(finderPattern)) { //add under this if no similar children
392                         _children.add(node);
393                     } else {
394                         //create the common parent of the chosen candidate above and node, then add to the insert index
395                         RTNode newParent = new RTNode(maxSimilarChars + ".*", null);
396                         newParent._children.add(this._children.get(insertIndex));
397                         newParent._children.add(node);
398                         this._children.remove(insertIndex);
399                         this._children.add(insertIndex, newParent);
400                     }
401                 }
402             }
403 
404             //takes a string in a directory regex form and removes all characters up to the last full directory
removeExtraChars(String input)405             private String removeExtraChars(String input) {
406                 String ret = input.substring(0, Math.max(0, input.lastIndexOf('/')));
407                 while ((ret.lastIndexOf('(') != -1 && ret.lastIndexOf('(') > ret.lastIndexOf(')')) ||
408                     (ret.lastIndexOf('[') != -1 && ret.lastIndexOf('[') > ret.lastIndexOf(']')) ||
409                     (ret.lastIndexOf('{') != -1 && ret.lastIndexOf('{') > ret.lastIndexOf('}'))) {
410                     ret = ret.substring(0, Math.max(0, ret.lastIndexOf('/')));
411                 }
412                 return ret;
413             }
414 
415             //traverse tree to get value
get(Finder finder)416             public T get(Finder finder) {
417                 T ret = null; //return value
418 
419                 if (_children.size() == 0) {
420                     return null;
421                 } else {
422                     for (RTNode child : _children) {
423 
424                         //check if child is the node
425                         if (child._rank != -1 && finder.equals(child._finder)) {
426                             return child._val;
427                         }
428 
429                         String childFinderPattern = child._finder.toString();
430 
431                         if (childFinderPattern.length() > 0 && childFinderPattern.charAt(childFinderPattern.length() - 1) == '$') {
432                             childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 1); //takes into account the added "$"
433                         } else if (child._rank == -1) {
434                             childFinderPattern = childFinderPattern.substring(0, childFinderPattern.length() - 2); //takes into account the added ".*"
435                         }
436 
437                         //check if child is the parent of node
438                         if (finder.toString().startsWith(childFinderPattern)) {
439                             ret = child.get(finder);
440                             if (ret != null) {
441                                 break;
442                             }
443                         }
444                     }
445 
446                     return ret;
447                 }
448             }
449 
450             //traverse tree to get an entry set
addToEntrySet(TreeSet<RTNode> s)451             public void addToEntrySet(TreeSet<RTNode> s) {
452                 if (_children.size() == 0) {
453                     return;
454                 } else {
455                     for (RTNode child : _children) {
456                         if (child._rank != -1) {
457                             s.add(child);
458                         }
459                         child.addToEntrySet(s);
460                     }
461                 }
462             }
463 
464             //traverse tree to get list of all values who's key matcher matches pattern
addToList(String pattern, Object context, List<RTNode> list)465             public void addToList(String pattern, Object context, List<RTNode> list) {
466                 if (_children.size() == 0) {
467                     return;
468                 } else {
469                     Info firstInfo = new Info();
470                     for (RTNode child : _children) {
471                         boolean found;
472                         synchronized (child._finder) {
473                             found = child._finder.find(pattern, context, firstInfo);
474                         }
475 
476                         //check if child matches pattern
477                         if (found) {
478                             if (child._rank != -1) {
479                                 list.add(child);
480                             }
481                             // if this node's info value is unset, set it to the result of the
482                             // lookup
483 //                            if (child._info!=null && child._info.value==null) {
484                             if (child._info != null) {
485                                 // set the value to the result of the last find
486                                 child._info.value = firstInfo.value;
487                             } else {
488                                 // for some reason, child._info is null, so simply initialize it.
489                                 child._info = new Info();
490                                 // set the value to the result of the last find
491                                 child._info.value = firstInfo.value;
492                             }
493                             //check if child is the parent of node then enter that node
494                             child.addToList(pattern, context, list);
495                         }
496                     }
497                 }
498             }
499 
toString()500             public String toString() {
501                 return toString("", new StringBuilder()).toString();
502             }
503 
toString(String prefix, StringBuilder result)504             private StringBuilder toString(String prefix, StringBuilder result) {
505                 result.append(prefix).append(this._finder.toString()).append("\n");
506                 for (RegexTree<T>.RTNode child : _children) {
507                     child.toString(prefix + "\t", result);
508                 }
509                 return result;
510             }
511 
512             //greatest common prefix between two strings
greatestCommonPrefix(String a, String b)513             public String greatestCommonPrefix(String a, String b) {
514                 int minLength = Math.min(a.length(), b.length());
515                 for (int i = 0; i < minLength; i++) {
516                     if (a.charAt(i) != b.charAt(i)) {
517                         return a.substring(0, i);
518                     }
519                 }
520                 return a.substring(0, minLength);
521             }
522         }
523 
524         class RTNodeRankComparator implements Comparator<RTNode> {
compare(RTNode a, RTNode b)525             public int compare(RTNode a, RTNode b) {
526                 if (a == b) {
527                     return 0;
528                 } else if (a == null) {
529                     return -1;
530                 } else if (b == null) {
531                     return 1;
532                 } else if (a._rank == b._rank) {
533                     return 0;
534                 } else if (a._rank > b._rank) {
535                     return 1;
536                 } else {
537                     return -1;
538                 }
539             }
540         }
541 
toString()542         public String toString() {
543             return root.toString();
544         }
545     }
546 
547     private static class StarPatternMap<T> implements StorageInterfaceBase<T> {
548         private Map<String, List<SPNode>> _spmap;
549         private int _size = 0;
550 
StarPatternMap()551         public StarPatternMap() {
552             _spmap = new HashMap<String, List<SPNode>>();
553 //            _size = 0;
554         }
555 
size()556         public int size() {
557             return _size;
558         }
559 
put(Finder pattern, T value)560         public void put(Finder pattern, T value) {
561             //System.out.println("pattern.toString() is => "+pattern.toString());
562             String starPattern = pathStarrer.transform2(pattern.toString().replaceAll("\\(\\[\\^\"\\]\\*\\)", "*"));
563             //System.out.println("Putting => "+starPattern);
564             List<SPNode> candidates = _spmap.get(starPattern);
565             if (candidates == null) {
566                 candidates = new ArrayList<SPNode>();
567             }
568             SPNode newNode = new SPNode(pattern, value);
569             candidates.add(newNode);
570             _spmap.put(starPattern, candidates);
571             _size++;
572         }
573 
get(Finder finder)574         public T get(Finder finder) {
575             String starPattern = pathStarrer.transform2(finder.toString());
576             List<SPNode> candidates = _spmap.get(starPattern);
577             if (candidates == null) {
578                 return null;
579             }
580             for (SPNode cand : candidates) {
581                 if (cand._finder.equals(finder)) {
582                     return cand._val;
583                 }
584             }
585             return null;
586         }
587 
getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)588         public List<T> getAll(String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo) {
589             List<SPNode> list = new ArrayList<SPNode>();
590             List<T> retList = new ArrayList<T>();
591 
592             String starPattern = pathStarrer.transform2(pattern);
593             List<SPNode> candidates = _spmap.get(starPattern);
594             if (candidates == null) {
595                 return retList;
596             }
597             for (SPNode cand : candidates) {
598                 Info info = new Info();
599                 if (cand._finder.find(pattern, context, info)) {
600                     list.add(cand);
601                     if (firstInfo != null) {
602                         firstInfo.value = info.value;
603                     }
604                 }
605             }
606 
607             for (SPNode n : list) {
608                 retList.add(n._val);
609                 if (matcherList != null) {
610                     matcherList.add(n._finder);
611                 }
612             }
613 
614             return retList;
615         }
616 
get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)617         public T get(String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound) {
618             List<Finder> matcherList = new ArrayList<Finder>();
619             Output<String[]> firstInfo = new Output<>();
620             List<T> matches = getAll(pattern, context, matcherList, firstInfo); //need to get whole list because we want value that was entered first
621             if (arguments != null && firstInfo.value != null) {
622 //                arguments.value = (matcherList.size() > 0) ? matcherList.get(0).getInfo() : null;
623                 arguments.value = matcherList.isEmpty() ? null : firstInfo.value;
624             }
625             if (matcherFound != null) {
626                 matcherFound.value = (matcherList.size() > 0) ? matcherList.get(0) : null;
627             }
628             return (matches.size() > 0) ? matches.get(0) : null;
629         }
630 
entrySet()631         public Set<Entry<Finder, T>> entrySet() {
632             LinkedHashMap<Finder, T> ret = new LinkedHashMap<Finder, T>();
633 
634             for (Entry<String, List<SPNode>> entry : _spmap.entrySet()) {
635                 List<SPNode> candidates = entry.getValue();
636                 for (SPNode node : candidates) {
637                     ret.put(node._finder, node._val);
638                 }
639             }
640             return ret.entrySet();
641         }
642 
643         /**
644          * A Node of a StarPatternMap
645          * @author ribnitz
646          *
647          */
648         public class SPNode extends NodeBase<T> {
649 //            Finder _finder;
650 //            T _val;
651 
SPNode(Finder finder, T val)652             public SPNode(Finder finder, T val) {
653 //                _finder = finder;
654 //                _val = val;
655                 super(finder, val);
656             }
657 
toString()658             public String toString() {
659                 return this._finder.toString();
660             }
661         }
662     }
663 
664     /**
665      * The basic class of an information node, featuring a Finder, a value and an Info
666      *
667      * @author ribnitz
668      *
669      * @param <T>
670      */
671     private static class NodeBase<T> {
672         Finder _finder;
673         T _val;
674         Info _info = new Info();
675 
NodeBase(Finder finder, T value)676         public NodeBase(Finder finder, T value) {
677             this._finder = finder;
678             this._val = value;
679         }
680     }
681 
682     public static Transform<String, RegexFinder> RegexFinderTransform = new Transform<String, RegexFinder>() {
683         public RegexFinder transform(String source) {
684             return new RegexFinder(source);
685         }
686     };
687 
688     /**
689      * The same as a RegexFinderTransform, except that [@ is changed to \[@, and ^ is added before //. To work better
690      * with XPaths.
691      */
692     public static Transform<String, RegexFinder> RegexFinderTransformPath = new Transform<String, RegexFinder>() {
693         public RegexFinder transform(String source) {
694             final String newSource = source.replace("[@", "\\[@");
695             return new RegexFinder(newSource.startsWith("//")
696                 ? "^" + newSource
697                 : newSource);
698         }
699     };
700 
701     /**
702      * The same as a RegexFinderTransform, except that [@ is changed to \[@, and ^ is added before //, and ' is changed to ".
703      * To work better with XPaths.
704      */
705     public static Transform<String, RegexFinder> RegexFinderTransformPath2 = new Transform<String, RegexFinder>() {
706         public RegexFinder transform(String source) {
707             final String newSource = source.replace("[@", "\\[@").replace('\'', '"');
708             return new RegexFinder(newSource.startsWith("//")
709                 ? "^" + newSource
710                 : newSource);
711         }
712     };
713 
714     /**
715      * Allows for merging items of the same type.
716      *
717      * @param <T>
718      */
719     public interface Merger<T> {
merge(T a, T into)720         T merge(T a, T into);
721     }
722 
723     /**
724      * Returns the result of a regex lookup.
725      *
726      * @param source
727      * @return
728      */
get(String source)729     public final T get(String source) {
730         return get(source, null, null, null, null);
731     }
732 
733     /**
734      * Returns the result of a regex lookup, with the group arguments that matched.
735      *
736      * @param source
737      * @param context
738      *            TODO
739      * @return
740      */
get(String source, Object context, Output<String[]> arguments)741     public T get(String source, Object context, Output<String[]> arguments) {
742         return get(source, context, arguments, null, null);
743     }
744 
745     /**
746      * Returns the result of a regex lookup, with the group arguments that matched. Supplies failure cases for
747      * debugging.
748      *
749      * @param source
750      * @param context
751      * @return
752      */
get(String source, Object context, Output<String[]> arguments, Output<Finder> matcherFound, List<String> failures)753     public T get(String source, Object context, Output<String[]> arguments,
754         Output<Finder> matcherFound, List<String> failures) {
755 
756         if (_lookupType == RegexLookup.LookupType.STAR_PATTERN_LOOKUP) {
757             //   T ret = SPEntries.get(source, context, arguments, matcherFound);
758             T ret = storage.get(source, context, arguments, matcherFound);
759             if (ret != null) {
760                 return ret;
761             }
762 
763             if (failures != null) {
764                 for (Map.Entry<Finder, T> entry : storage.entrySet()) {
765 //                for (Map.Entry<Finder, T> entry : SPEntries.entrySet()) {
766                     Finder matcher = entry.getKey();
767                     synchronized (matcher) {
768                         int failPoint = matcher.getFailPoint(source);
769                         String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
770                             + matcher.toString();
771                         failures.add(show);
772                     }
773                 }
774             }
775         } else if (_lookupType == RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP) {
776             //      T ret = RTEntries.get(source, context, arguments, matcherFound);
777             T ret = storage.get(source, context, arguments, matcherFound);
778             if (ret != null) {
779                 return ret;
780             }
781 
782             if (failures != null) {
783                 for (Map.Entry<Finder, T> entry : storage.entrySet()) {
784 //                for (Map.Entry<Finder, T> entry : RTEntries.entrySet()) {
785                     Finder matcher = entry.getKey();
786                     synchronized (matcher) {
787                         int failPoint = matcher.getFailPoint(source);
788                         String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
789                             + matcher.toString();
790                         failures.add(show);
791                     }
792                 }
793             }
794         } else {
795             //slow but versatile implementation
796             for (Map.Entry<Finder, T> entry : MEntries.entrySet()) {
797                 Finder matcher = entry.getKey();
798                 synchronized (matcher) {
799                     Info firstInfo = new Info();
800                     if (matcher.find(source, context, firstInfo)) {
801                         if (arguments != null) {
802 //                            arguments.value = matcher.getInfo();
803                             arguments.value = firstInfo.value;
804                         }
805                         if (matcherFound != null) {
806                             matcherFound.value = matcher;
807                         }
808                         return entry.getValue();
809                     } else if (failures != null) {
810                         int failPoint = matcher.getFailPoint(source);
811                         String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
812                             + matcher.toString();
813                         failures.add(show);
814                     }
815                 }
816             }
817         }
818 
819         // not really necessary, but makes debugging easier.
820         if (arguments != null) {
821             arguments.value = null;
822         }
823         if (matcherFound != null) {
824             matcherFound.value = null;
825         }
826         return null;
827     }
828 
829     /**
830      * Returns all results of a regex lookup, with the group arguments that matched. Supplies failure cases for
831      * debugging.
832      *
833      * @param source
834      * @param context
835      *            TODO
836      * @return
837      */
getAll(String source, Object context, List<Finder> matcherList, List<String> failures)838     public List<T> getAll(String source, Object context, List<Finder> matcherList, List<String> failures) {
839         if (_lookupType == RegexLookup.LookupType.STAR_PATTERN_LOOKUP) {
840             Output<String[]> firstInfo = new Output<>();
841 //            List<T> matches = SPEntries.getAll(source, context, matcherList,firstInfo);
842             List<T> matches = storage.getAll(source, context, matcherList, firstInfo);
843             if (matches != null) {
844                 return matches;
845             }
846 
847             if (failures != null) {
848                 for (Map.Entry<Finder, T> entry : storage.entrySet()) {
849 //                for (Map.Entry<Finder, T> entry : SPEntries.entrySet()) {
850                     Finder matcher = entry.getKey();
851                     synchronized (matcher) {
852                         int failPoint = matcher.getFailPoint(source);
853                         String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
854                             + matcher.toString();
855                         failures.add(show);
856                     }
857                 }
858             }
859             return null;
860         } else if (_lookupType == RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP) {
861             Output<String[]> info = new Output<>();
862 //            List<T> matches = RTEntries.getAll(source, context, matcherList,info);
863             List<T> matches = storage.getAll(source, context, matcherList, info);
864             if (matches != null) {
865                 return matches;
866             }
867 
868             if (failures != null) {
869                 for (Map.Entry<Finder, T> entry : storage.entrySet()) {
870 //                for (Map.Entry<Finder, T> entry : RTEntries.entrySet()) {
871                     Finder matcher = entry.getKey();
872                     synchronized (matcher) {
873                         int failPoint = matcher.getFailPoint(source);
874                         String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
875                             + matcher.toString();
876                         failures.add(show);
877                     }
878                 }
879             }
880             return null;
881         } else {
882             //slow but versatile implementation
883             List<T> matches = new ArrayList<T>();
884             for (Map.Entry<Finder, T> entry : MEntries.entrySet()) {
885                 Finder matcher = entry.getKey();
886                 Info firstInfo = new Info();
887                 if (matcher.find(source, context, firstInfo)) {
888                     if (matcherList != null) {
889                         matcherList.add(matcher);
890                     }
891                     matches.add(entry.getValue());
892                 } else if (failures != null) {
893                     int failPoint = matcher.getFailPoint(source);
894                     String show = source.substring(0, failPoint) + "☹" + source.substring(failPoint) + "\t"
895                         + matcher.toString();
896                     failures.add(show);
897                 }
898             }
899             return matches;
900         }
901     }
902 
903     /**
904      * Find the patterns that haven't been matched. Requires the caller to collect the patterns that have, using
905      * matcherFound.
906      *
907      * @return outputUnmatched
908      */
getUnmatchedPatterns(Set<String> matched, Map<String, T> outputUnmatched)909     public Map<String, T> getUnmatchedPatterns(Set<String> matched, Map<String, T> outputUnmatched) {
910         outputUnmatched.clear();
911 
912         Set<Map.Entry<Finder, T>> entrySet;
913         switch (_lookupType) {
914         case STAR_PATTERN_LOOKUP:
915 //            entrySet = SPEntries.entrySet();
916             entrySet = storage.entrySet();
917             break;
918         case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
919 //            entrySet = RTEntries.entrySet();
920             entrySet = storage.entrySet();
921             break;
922         default:
923             entrySet = MEntries.entrySet();
924             break;
925         }
926 
927         for (Map.Entry<Finder, T> entry : entrySet) {
928             String pattern = entry.getKey().toString();
929             if (!matched.contains(pattern)) {
930                 outputUnmatched.put(pattern, entry.getValue());
931             }
932         }
933         return outputUnmatched;
934     }
935 
936     /**
937      * Create a RegexLookup. It will take a list of key/value pairs, where the key is a regex pattern and the value is
938      * what gets returned.
939      *
940      * @param patternTransform
941      *            Used to transform string patterns into a Pattern. Can be used to process replacements (like
942      *            variables).
943      * @param valueTransform
944      *            Used to transform string values into another form.
945      * @param valueMerger
946      *            Used to merge values with the same key.
947      */
of(Transform<String, Finder> patternTransform, Transform<String, T> valueTransform, Merger<T> valueMerger)948     public static <T, U> RegexLookup<T> of(Transform<String, Finder> patternTransform,
949         Transform<String, T> valueTransform, Merger<T> valueMerger) {
950         return new RegexLookup<T>().setPatternTransform(patternTransform).setValueTransform(valueTransform)
951             .setValueMerger(valueMerger);
952     }
953 
of(Transform<String, T> valueTransform)954     public static <T> RegexLookup<T> of(Transform<String, T> valueTransform) {
955         return new RegexLookup<T>().setValueTransform(valueTransform).setPatternTransform(RegexFinderTransform);
956     }
957 
of()958     public static <T> RegexLookup<T> of() {
959         return new RegexLookup<T>().setPatternTransform(RegexFinderTransform);
960     }
961 
962     /**
963      * @deprecated Use {@link #of(LookupType,Transform)} instead
964      */
of(LookupType lookupType)965     public static <T> RegexLookup<T> of(LookupType lookupType) {
966         return of(lookupType, RegexFinderTransform);
967     }
968 
of(LookupType lookupType, Transform<String, RegexFinder> transform)969     public static <T> RegexLookup<T> of(LookupType lookupType, Transform<String, RegexFinder> transform) {
970         return new RegexLookup<T>(lookupType).setPatternTransform(transform);
971     }
972 
setValueTransform(Transform<String, ? extends T> valueTransform)973     public RegexLookup<T> setValueTransform(Transform<String, ? extends T> valueTransform) {
974         this.valueTransform = valueTransform;
975         return this;
976     }
977 
setPatternTransform(Transform<String, ? extends Finder> patternTransform)978     public RegexLookup<T> setPatternTransform(Transform<String, ? extends Finder> patternTransform) {
979         this.patternTransform = patternTransform != null ? patternTransform : RegexFinderTransform;
980         return this;
981     }
982 
setValueMerger(Merger<T> valueMerger)983     public RegexLookup<T> setValueMerger(Merger<T> valueMerger) {
984         this.valueMerger = valueMerger;
985         return this;
986     }
987 
988     /**
989      * Load a RegexLookup from a file. Opens a file relative to the class, and adds lines separated by "; ". Lines
990      * starting with # are comments.
991      */
loadFromFile(Class<?> baseClass, String filename)992     public RegexLookup<T> loadFromFile(Class<?> baseClass, String filename) {
993         RegexFileParser parser = new RegexFileParser();
994         parser.setLineParser(new RegexLineParser() {
995             @Override
996             public void parse(String line) {
997                 int pos = line.indexOf("; ");
998                 if (pos < 0) {
999                     throw new IllegalArgumentException("Illegal line, doesn't contain semicolon: " + line);
1000                 }
1001                 String source = line.substring(0, pos).trim();
1002                 String target = line.substring(pos + 2).trim();
1003                 try {
1004                     @SuppressWarnings("unchecked")
1005                     T result = valueTransform == null ? (T) target : valueTransform.transform(target);
1006                     add(source, result);
1007                 } catch (Exception e) {
1008                     throw new IllegalArgumentException("Failed to add <" + source + "> => <" + target + ">", e);
1009                 }
1010             }
1011         });
1012         parser.setVariableProcessor(new VariableProcessor() {
1013             @Override
1014             public void add(String variable, String variableName) {
1015                 addVariable(variable, variableName);
1016             }
1017 
1018             @Override
1019             public String replace(String str) {
1020                 return variables.replace(str);
1021             }
1022         });
1023         parser.parse(baseClass, filename);
1024         return this;
1025     }
1026 
addVariable(String variable, String variableValue)1027     public RegexLookup<T> addVariable(String variable, String variableValue) {
1028         if (!variable.startsWith("%")) {
1029             throw new IllegalArgumentException("Variables must start with %");
1030         }
1031         variables.add(variable.trim(), variableValue.trim());
1032         return this;
1033     }
1034 
1035     /**
1036      * Add a pattern/value pair.
1037      *
1038      * @param stringPattern
1039      * @param target
1040      * @return this, for chaining
1041      */
add(String stringPattern, T target)1042     public RegexLookup<T> add(String stringPattern, T target) {
1043         if (stringPattern.contains("%")) {
1044             stringPattern = variables.replace(stringPattern);
1045         }
1046         Finder pattern0 = patternTransform.transform(stringPattern);
1047         return add(pattern0, target);
1048     }
1049 
1050     /**
1051      * Add a pattern/value pair.
1052      *
1053      * @param pattern
1054      * @param target
1055      * @return this, for chaining
1056      */
add(Finder pattern, T target)1057     public RegexLookup<T> add(Finder pattern, T target) {
1058         if (!allowNull && target == null) {
1059             throw new NullPointerException("null disallowed, unless allowNull(true) is called.");
1060         }
1061 
1062         T old;
1063         switch (_lookupType) {
1064         case STAR_PATTERN_LOOKUP: // fallthrough
1065         case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1066             old = storage.get(pattern);
1067 //            old = SPEntries.get(pattern);
1068             break;
1069 //        case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1070 //            old = storage.get(pattern);
1071 //            old = RTEntries.get(pattern);
1072 //            break;
1073         default:
1074             old = MEntries.get(pattern);
1075             break;
1076         }
1077 
1078         if (old == null) {
1079             switch (_lookupType) {
1080             case STAR_PATTERN_LOOKUP: // fallthrough
1081             case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1082                 storage.put(pattern, target);
1083 //                SPEntries.put(pattern, target);
1084                 break;
1085 //            case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1086 //                storage.put(pattern, target);
1087 //                RTEntries.put(pattern, target);
1088 //                break;
1089             default:
1090                 MEntries.put(pattern, target);
1091                 break;
1092             }
1093         } else if (valueMerger != null) {
1094             valueMerger.merge(target, old);
1095         } else {
1096             throw new IllegalArgumentException("Duplicate matcher without Merger defined " + pattern + "; old: " + old
1097                 + "; new: " + target);
1098         }
1099         return this;
1100     }
1101 
1102     @Override
iterator()1103     public Iterator<Map.Entry<Finder, T>> iterator() {
1104         switch (_lookupType) {
1105         case STAR_PATTERN_LOOKUP: // fall through
1106         case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1107 //            return Collections.unmodifiableCollection(SPEntries.entrySet()).iterator();
1108             return Collections.unmodifiableCollection(storage.entrySet()).iterator();
1109 //        case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1110 //            return Collections.unmodifiableCollection(RTEntries.entrySet()).iterator();
1111 //            return Collections.unmodifiableCollection(storage.entrySet()).iterator();
1112         default:
1113             return Collections.unmodifiableCollection(MEntries.entrySet()).iterator();
1114         }
1115     }
1116 
replace(String lookup, String... arguments)1117     public static String replace(String lookup, String... arguments) {
1118         StringBuilder result = new StringBuilder();
1119         int last = 0;
1120         while (true) {
1121             int pos = lookup.indexOf("$", last);
1122             if (pos < 0) {
1123                 result.append(lookup.substring(last, lookup.length()));
1124                 break;
1125             }
1126             result.append(lookup.substring(last, pos));
1127             final int arg = lookup.charAt(pos + 1) - '0';
1128             try {
1129                 result.append(arguments[arg]);
1130             } catch (Exception e) {
1131                 throw new IllegalArgumentException("Replacing $" + arg + " in <" + lookup
1132                     + ">, but too few arguments supplied.");
1133             }
1134             last = pos + 2;
1135         }
1136         return result.toString();
1137     }
1138 
1139     /**
1140      * @return the number of entries
1141      */
size()1142     public int size() {
1143         switch (_lookupType) {
1144         case STAR_PATTERN_LOOKUP: // fall through
1145         case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1146 //            return SPEntries.size();
1147             return storage.size();
1148 //        case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1149 //            return storage.size();
1150 //            return RTEntries.size();
1151         default:
1152             return MEntries.size();
1153         }
1154     }
1155 
1156     @Override
toString()1157     public String toString() {
1158         return storage.toString();
1159     }
1160 }