1 /*
2  * Copyright (c) 1995, 2001, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package sun.misc;
27 import java.io.*;
28 
29 /**
30  * A class to represent a pool of regular expressions.  A string
31  * can be matched against the whole pool all at once.  It is much
32  * faster than doing individual regular expression matches one-by-one.
33  *
34  * @see java.misc.RegexpTarget
35  * @author  James Gosling
36  */
37 
38 public class RegexpPool {
39     private RegexpNode prefixMachine = new RegexpNode();
40     private RegexpNode suffixMachine = new RegexpNode();
41     private static final int BIG = 0x7FFFFFFF;
42     private int lastDepth = BIG;
43 
RegexpPool()44     public RegexpPool () {
45     }
46 
47     /**
48      * Add a regular expression to the pool of regular expressions.
49      * @param   re  The regular expression to add to the pool.
50             For now, only handles strings that either begin or end with
51             a '*'.
52      * @param   ret The object to be returned when this regular expression is
53             matched.  If ret is an instance of the RegexpTarget class, ret.found
54             is called with the string fragment that matched the '*' as its
55             parameter.
56      * @exception REException error
57      */
add(String re, Object ret)58     public void add(String re, Object ret) throws REException {
59         add(re, ret, false);
60     }
61 
62     /**
63      * Replace the target for the regular expression with a different
64      * target.
65      *
66      * @param   re  The regular expression to be replaced in the pool.
67      *      For now, only handles strings that either begin or end with
68      *      a '*'.
69      * @param   ret The object to be returned when this regular expression is
70      *      matched.  If ret is an instance of the RegexpTarget class, ret.found
71      *      is called with the string fragment that matched the '*' as its
72      *      parameter.
73      */
replace(String re, Object ret)74     public void replace(String re, Object ret) {
75         try {
76             add(re, ret, true);
77         } catch(Exception e) {
78             // should never occur if replace is true
79         }
80     }
81 
82     /**
83      * Delete the regular expression and its target.
84      * @param re The regular expression to be deleted from the pool.
85      *           must begin or end with a '*'
86      * @return target - the old target.
87      */
delete(String re)88     public Object delete(String re) {
89         Object o = null;
90         RegexpNode p = prefixMachine;
91         RegexpNode best = p;
92         int len = re.length() - 1;
93         int i;
94         boolean prefix = true;
95 
96         if (!re.startsWith("*") ||
97             !re.endsWith("*"))
98             len++;
99 
100         if (len <= 0)
101             return null;
102 
103         /* March forward through the prefix machine */
104         for (i = 0; p != null; i++) {
105             if (p.result != null && p.depth < BIG
106                 && (!p.exact || i == len)) {
107                 best = p;
108             }
109             if (i >= len)
110                 break;
111             p = p.find(re.charAt(i));
112         }
113 
114         /* march backward through the suffix machine */
115         p = suffixMachine;
116         for (i = len; --i >= 0 && p != null;) {
117             if (p.result != null && p.depth < BIG) {
118                 prefix = false;
119                 best = p;
120             }
121             p = p.find(re.charAt(i));
122         }
123 
124         // delete only if there is an exact match
125         if (prefix) {
126             if (re.equals(best.re)) {
127                 o = best.result;
128                 best.result = null;
129 
130             }
131         } else {
132             if (re.equals(best.re)) {
133                 o = best.result;
134                 best.result = null;
135             }
136         }
137         return o;
138     }
139 
140     /** Search for a match to a string & return the object associated
141         with it with the match.  When multiple regular expressions
142         would match the string, the best match is returned first.
143         The next best match is returned the next time matchNext is
144         called.
145         @param s    The string to match against the regular expressions
146                     in the pool.
147         @return     null on failure, otherwise the object associated with
148                     the regular expression when it was added to the pool.
149                     If the object is an instance of RegexpTarget, then
150                     the return value is the result from calling
151                     return.found(string_that_matched_wildcard).
152     */
match(String s)153     public Object match(String s) {
154         return matchAfter(s, BIG);
155     }
156 
157     /** Identical to match except that it will only find matches to
158         regular expressions that were added to the pool <i>after</i>
159         the last regular expression that matched in the last call
160         to match() or matchNext() */
matchNext(String s)161     public Object matchNext(String s) {
162         return matchAfter(s, lastDepth);
163     }
164 
add(String re, Object ret, boolean replace)165     private void add(String re, Object ret, boolean replace) throws REException {
166         int len = re.length();
167         RegexpNode p;
168         if (re.charAt(0) == '*') {
169             p = suffixMachine;
170             while (len > 1)
171                 p = p.add(re.charAt(--len));
172         } else {
173             boolean exact = false;
174             if (re.charAt(len - 1) == '*')
175                 len--;
176             else
177                 exact = true;
178             p = prefixMachine;
179             for (int i = 0; i < len; i++)
180                 p = p.add(re.charAt(i));
181             p.exact = exact;
182         }
183 
184         if (p.result != null && !replace)
185             throw new REException(re + " is a duplicate");
186 
187         p.re = re;
188         p.result = ret;
189     }
190 
matchAfter(String s, int lastMatchDepth)191     private Object matchAfter(String s, int lastMatchDepth) {
192         RegexpNode p = prefixMachine;
193         RegexpNode best = p;
194         int bst = 0;
195         int bend = 0;
196         int len = s.length();
197         int i;
198         if (len <= 0)
199             return null;
200         /* March forward through the prefix machine */
201         for (i = 0; p != null; i++) {
202             if (p.result != null && p.depth < lastMatchDepth
203                 && (!p.exact || i == len)) {
204                 lastDepth = p.depth;
205                 best = p;
206                 bst = i;
207                 bend = len;
208             }
209             if (i >= len)
210                 break;
211             p = p.find(s.charAt(i));
212         }
213         /* march backward through the suffix machine */
214         p = suffixMachine;
215         for (i = len; --i >= 0 && p != null;) {
216             if (p.result != null && p.depth < lastMatchDepth) {
217                 lastDepth = p.depth;
218                 best = p;
219                 bst = 0;
220                 bend = i+1;
221             }
222             p = p.find(s.charAt(i));
223         }
224         Object o = best.result;
225         if (o != null && o instanceof RegexpTarget)
226             o = ((RegexpTarget) o).found(s.substring(bst, bend));
227         return o;
228     }
229 
230     /** Resets the pool so that the next call to matchNext looks
231         at all regular expressions in the pool.  match(s); is equivalent
232         to reset(); matchNext(s);
233         <p><b>Multithreading note:</b> reset/nextMatch leave state in the
234         regular expression pool.  If multiple threads could be using this
235         pool this way, they should be syncronized to avoid race hazards.
236         match() was done in such a way that there are no such race
237         hazards: multiple threads can be matching in the same pool
238         simultaneously. */
reset()239     public void reset() {
240         lastDepth = BIG;
241     }
242 
243     /** Print this pool to standard output */
print(PrintStream out)244     public void print(PrintStream out) {
245         out.print("Regexp pool:\n");
246         if (suffixMachine.firstchild != null) {
247             out.print(" Suffix machine: ");
248             suffixMachine.firstchild.print(out);
249             out.print("\n");
250         }
251         if (prefixMachine.firstchild != null) {
252             out.print(" Prefix machine: ");
253             prefixMachine.firstchild.print(out);
254             out.print("\n");
255         }
256     }
257 
258 }
259 
260 /* A node in a regular expression finite state machine. */
261 class RegexpNode {
262     char c;
263     RegexpNode firstchild;
264     RegexpNode nextsibling;
265     int depth;
266     boolean exact;
267     Object result;
268     String re = null;
269 
RegexpNode()270     RegexpNode () {
271         c = '#';
272         depth = 0;
273     }
RegexpNode(char C, int depth)274     RegexpNode (char C, int depth) {
275         c = C;
276         this.depth = depth;
277     }
add(char C)278     RegexpNode add(char C) {
279         RegexpNode p = firstchild;
280         if (p == null)
281             p = new RegexpNode (C, depth+1);
282         else {
283             while (p != null)
284                 if (p.c == C)
285                     return p;
286                 else
287                     p = p.nextsibling;
288             p = new RegexpNode (C, depth+1);
289             p.nextsibling = firstchild;
290         }
291         firstchild = p;
292         return p;
293     }
find(char C)294     RegexpNode find(char C) {
295         for (RegexpNode p = firstchild;
296                 p != null;
297                 p = p.nextsibling)
298             if (p.c == C)
299                 return p;
300         return null;
301     }
print(PrintStream out)302     void print(PrintStream out) {
303         if (nextsibling != null) {
304             RegexpNode p = this;
305             out.print("(");
306             while (p != null) {
307                 out.write(p.c);
308                 if (p.firstchild != null)
309                     p.firstchild.print(out);
310                 p = p.nextsibling;
311                 out.write(p != null ? '|' : ')');
312             }
313         } else {
314             out.write(c);
315             if (firstchild != null)
316                 firstchild.print(out);
317         }
318     }
319 }
320