/* * Copyright (c) 1995, 2001, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package sun.misc; import java.io.*; /** * A class to represent a pool of regular expressions. A string * can be matched against the whole pool all at once. It is much * faster than doing individual regular expression matches one-by-one. * * @see java.misc.RegexpTarget * @author James Gosling */ public class RegexpPool { private RegexpNode prefixMachine = new RegexpNode(); private RegexpNode suffixMachine = new RegexpNode(); private static final int BIG = 0x7FFFFFFF; private int lastDepth = BIG; public RegexpPool () { } /** * Add a regular expression to the pool of regular expressions. * @param re The regular expression to add to the pool. For now, only handles strings that either begin or end with a '*'. * @param ret The object to be returned when this regular expression is matched. If ret is an instance of the RegexpTarget class, ret.found is called with the string fragment that matched the '*' as its parameter. * @exception REException error */ public void add(String re, Object ret) throws REException { add(re, ret, false); } /** * Replace the target for the regular expression with a different * target. * * @param re The regular expression to be replaced in the pool. * For now, only handles strings that either begin or end with * a '*'. * @param ret The object to be returned when this regular expression is * matched. If ret is an instance of the RegexpTarget class, ret.found * is called with the string fragment that matched the '*' as its * parameter. */ public void replace(String re, Object ret) { try { add(re, ret, true); } catch(Exception e) { // should never occur if replace is true } } /** * Delete the regular expression and its target. * @param re The regular expression to be deleted from the pool. * must begin or end with a '*' * @return target - the old target. */ public Object delete(String re) { Object o = null; RegexpNode p = prefixMachine; RegexpNode best = p; int len = re.length() - 1; int i; boolean prefix = true; if (!re.startsWith("*") || !re.endsWith("*")) len++; if (len <= 0) return null; /* March forward through the prefix machine */ for (i = 0; p != null; i++) { if (p.result != null && p.depth < BIG && (!p.exact || i == len)) { best = p; } if (i >= len) break; p = p.find(re.charAt(i)); } /* march backward through the suffix machine */ p = suffixMachine; for (i = len; --i >= 0 && p != null;) { if (p.result != null && p.depth < BIG) { prefix = false; best = p; } p = p.find(re.charAt(i)); } // delete only if there is an exact match if (prefix) { if (re.equals(best.re)) { o = best.result; best.result = null; } } else { if (re.equals(best.re)) { o = best.result; best.result = null; } } return o; } /** Search for a match to a string & return the object associated with it with the match. When multiple regular expressions would match the string, the best match is returned first. The next best match is returned the next time matchNext is called. @param s The string to match against the regular expressions in the pool. @return null on failure, otherwise the object associated with the regular expression when it was added to the pool. If the object is an instance of RegexpTarget, then the return value is the result from calling return.found(string_that_matched_wildcard). */ public Object match(String s) { return matchAfter(s, BIG); } /** Identical to match except that it will only find matches to regular expressions that were added to the pool after the last regular expression that matched in the last call to match() or matchNext() */ public Object matchNext(String s) { return matchAfter(s, lastDepth); } private void add(String re, Object ret, boolean replace) throws REException { int len = re.length(); RegexpNode p; if (re.charAt(0) == '*') { p = suffixMachine; while (len > 1) p = p.add(re.charAt(--len)); } else { boolean exact = false; if (re.charAt(len - 1) == '*') len--; else exact = true; p = prefixMachine; for (int i = 0; i < len; i++) p = p.add(re.charAt(i)); p.exact = exact; } if (p.result != null && !replace) throw new REException(re + " is a duplicate"); p.re = re; p.result = ret; } private Object matchAfter(String s, int lastMatchDepth) { RegexpNode p = prefixMachine; RegexpNode best = p; int bst = 0; int bend = 0; int len = s.length(); int i; if (len <= 0) return null; /* March forward through the prefix machine */ for (i = 0; p != null; i++) { if (p.result != null && p.depth < lastMatchDepth && (!p.exact || i == len)) { lastDepth = p.depth; best = p; bst = i; bend = len; } if (i >= len) break; p = p.find(s.charAt(i)); } /* march backward through the suffix machine */ p = suffixMachine; for (i = len; --i >= 0 && p != null;) { if (p.result != null && p.depth < lastMatchDepth) { lastDepth = p.depth; best = p; bst = 0; bend = i+1; } p = p.find(s.charAt(i)); } Object o = best.result; if (o != null && o instanceof RegexpTarget) o = ((RegexpTarget) o).found(s.substring(bst, bend)); return o; } /** Resets the pool so that the next call to matchNext looks at all regular expressions in the pool. match(s); is equivalent to reset(); matchNext(s);
Multithreading note: reset/nextMatch leave state in the regular expression pool. If multiple threads could be using this pool this way, they should be syncronized to avoid race hazards. match() was done in such a way that there are no such race hazards: multiple threads can be matching in the same pool simultaneously. */ public void reset() { lastDepth = BIG; } /** Print this pool to standard output */ public void print(PrintStream out) { out.print("Regexp pool:\n"); if (suffixMachine.firstchild != null) { out.print(" Suffix machine: "); suffixMachine.firstchild.print(out); out.print("\n"); } if (prefixMachine.firstchild != null) { out.print(" Prefix machine: "); prefixMachine.firstchild.print(out); out.print("\n"); } } } /* A node in a regular expression finite state machine. */ class RegexpNode { char c; RegexpNode firstchild; RegexpNode nextsibling; int depth; boolean exact; Object result; String re = null; RegexpNode () { c = '#'; depth = 0; } RegexpNode (char C, int depth) { c = C; this.depth = depth; } RegexpNode add(char C) { RegexpNode p = firstchild; if (p == null) p = new RegexpNode (C, depth+1); else { while (p != null) if (p.c == C) return p; else p = p.nextsibling; p = new RegexpNode (C, depth+1); p.nextsibling = firstchild; } firstchild = p; return p; } RegexpNode find(char C) { for (RegexpNode p = firstchild; p != null; p = p.nextsibling) if (p.c == C) return p; return null; } void print(PrintStream out) { if (nextsibling != null) { RegexpNode p = this; out.print("("); while (p != null) { out.write(p.c); if (p.firstchild != null) p.firstchild.print(out); p = p.nextsibling; out.write(p != null ? '|' : ')'); } } else { out.write(c); if (firstchild != null) firstchild.print(out); } } }