1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the  "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 /*
19  * $Id: DTMStringPool.java 468653 2006-10-28 07:07:05Z minchau $
20  */
21 
22 package org.apache.xml.dtm.ref;
23 
24 import java.util.Vector;
25 
26 import org.apache.xml.utils.IntVector;
27 
28 /** <p>DTMStringPool is an "interning" mechanism for strings. It will
29  * create a stable 1:1 mapping between a set of string values and a set of
30  * integer index values, so the integers can be used to reliably and
31  * uniquely identify (and when necessary retrieve) the strings.</p>
32  *
33  * <p>Design Priorities:
34  * <ul>
35  * <li>String-to-index lookup speed is critical.</li>
36  * <li>Index-to-String lookup speed is slightly less so.</li>
37  * <li>Threadsafety is not guaranteed at this level.
38  * Enforce that in the application if needed.</li>
39  * <li>Storage efficiency is an issue but not a huge one.
40  * It is expected that string pools won't exceed about 2000 entries.</li>
41  * </ul>
42  * </p>
43  *
44  * <p>Implementation detail: A standard Hashtable is relatively
45  * inefficient when looking up primitive int values, especially when
46  * we're already maintaining an int-to-string vector.  So I'm
47  * maintaining a simple hash chain within this class.</p>
48  *
49  * <p>NOTE: There is nothing in the code that has a real dependency upon
50  * String. It would work with any object type that implements reliable
51  * .hashCode() and .equals() operations. The API enforces Strings because
52  * it's safer that way, but this could trivially be turned into a general
53  * ObjectPool if one was needed.</p>
54  *
55  * <p>Status: Passed basic test in main().</p>
56  * */
57 public class DTMStringPool
58 {
59   Vector m_intToString;
60   static final int HASHPRIME=101;
61   int[] m_hashStart=new int[HASHPRIME];
62   IntVector m_hashChain;
63   public static final int NULL=-1;
64 
65   /**
66    * Create a DTMStringPool using the given chain size
67    *
68    * @param chainSize The size of the hash chain vector
69    */
DTMStringPool(int chainSize)70   public DTMStringPool(int chainSize)
71     {
72       m_intToString=new Vector();
73       m_hashChain=new IntVector(chainSize);
74       removeAllElements();
75 
76       // -sb Add this to force empty strings to be index 0.
77       stringToIndex("");
78     }
79 
DTMStringPool()80   public DTMStringPool()
81     {
82       this(512);
83     }
84 
removeAllElements()85   public void removeAllElements()
86     {
87       m_intToString.removeAllElements();
88       for(int i=0;i<HASHPRIME;++i)
89         m_hashStart[i]=NULL;
90       m_hashChain.removeAllElements();
91     }
92 
93   /** @return string whose value is uniquely identified by this integer index.
94    * @throws java.lang.ArrayIndexOutOfBoundsException
95    *  if index doesn't map to a string.
96    * */
indexToString(int i)97   public String indexToString(int i)
98     throws java.lang.ArrayIndexOutOfBoundsException
99     {
100       if(i==NULL) return null;
101       return (String) m_intToString.elementAt(i);
102     }
103 
104   /** @return integer index uniquely identifying the value of this string. */
stringToIndex(String s)105   public int stringToIndex(String s)
106     {
107       if(s==null) return NULL;
108 
109       int hashslot=s.hashCode()%HASHPRIME;
110       if(hashslot<0) hashslot=-hashslot;
111 
112       // Is it one we already know?
113       int hashlast=m_hashStart[hashslot];
114       int hashcandidate=hashlast;
115       while(hashcandidate!=NULL)
116         {
117           if(m_intToString.elementAt(hashcandidate).equals(s))
118             return hashcandidate;
119 
120           hashlast=hashcandidate;
121           hashcandidate=m_hashChain.elementAt(hashcandidate);
122         }
123 
124       // New value. Add to tables.
125       int newIndex=m_intToString.size();
126       m_intToString.addElement(s);
127 
128       m_hashChain.addElement(NULL);	// Initialize to no-following-same-hash
129       if(hashlast==NULL)  // First for this hash
130         m_hashStart[hashslot]=newIndex;
131       else // Link from previous with same hash
132         m_hashChain.setElementAt(newIndex,hashlast);
133 
134       return newIndex;
135     }
136 
137   /** Command-line unit test driver. This test relies on the fact that
138    * this version of the pool assigns indices consecutively, starting
139    * from zero, as new unique strings are encountered.
140    */
main(String[] args)141   public static void main(String[] args)
142   {
143     String[] word={
144       "Zero","One","Two","Three","Four","Five",
145       "Six","Seven","Eight","Nine","Ten",
146       "Eleven","Twelve","Thirteen","Fourteen","Fifteen",
147       "Sixteen","Seventeen","Eighteen","Nineteen","Twenty",
148       "Twenty-One","Twenty-Two","Twenty-Three","Twenty-Four",
149       "Twenty-Five","Twenty-Six","Twenty-Seven","Twenty-Eight",
150       "Twenty-Nine","Thirty","Thirty-One","Thirty-Two",
151       "Thirty-Three","Thirty-Four","Thirty-Five","Thirty-Six",
152       "Thirty-Seven","Thirty-Eight","Thirty-Nine"};
153 
154     DTMStringPool pool=new DTMStringPool();
155 
156     System.out.println("If no complaints are printed below, we passed initial test.");
157 
158     for(int pass=0;pass<=1;++pass)
159       {
160         int i;
161 
162         for(i=0;i<word.length;++i)
163           {
164             int j=pool.stringToIndex(word[i]);
165             if(j!=i)
166               System.out.println("\tMismatch populating pool: assigned "+
167                                  j+" for create "+i);
168           }
169 
170         for(i=0;i<word.length;++i)
171           {
172             int j=pool.stringToIndex(word[i]);
173             if(j!=i)
174               System.out.println("\tMismatch in stringToIndex: returned "+
175                                  j+" for lookup "+i);
176           }
177 
178         for(i=0;i<word.length;++i)
179           {
180             String w=pool.indexToString(i);
181             if(!word[i].equals(w))
182               System.out.println("\tMismatch in indexToString: returned"+
183                                  w+" for lookup "+i);
184           }
185 
186         pool.removeAllElements();
187 
188         System.out.println("\nPass "+pass+" complete\n");
189       } // end pass loop
190   }
191 }
192