1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the  "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 /*
19  * $Id: Lexer.java 524810 2007-04-02 15:51:55Z zongaro $
20  */
21 package org.apache.xpath.compiler;
22 
23 import java.util.Vector;
24 
25 import org.apache.xml.utils.PrefixResolver;
26 import org.apache.xpath.res.XPATHErrorResources;
27 
28 /**
29  * This class is in charge of lexical processing of the XPath
30  * expression into tokens.
31  */
32 class Lexer
33 {
34 
35   /**
36    * The target XPath.
37    */
38   private Compiler m_compiler;
39 
40   /**
41    * The prefix resolver to map prefixes to namespaces in the XPath.
42    */
43   PrefixResolver m_namespaceContext;
44 
45   /**
46    * The XPath processor object.
47    */
48   XPathParser m_processor;
49 
50   /**
51    * This value is added to each element name in the TARGETEXTRA
52    * that is a 'target' (right-most top-level element name).
53    */
54   static final int TARGETEXTRA = 10000;
55 
56   /**
57    * Ignore this, it is going away.
58    * This holds a map to the m_tokenQueue that tells where the top-level elements are.
59    * It is used for pattern matching so the m_tokenQueue can be walked backwards.
60    * Each element that is a 'target', (right-most top level element name) has
61    * TARGETEXTRA added to it.
62    *
63    */
64   private int m_patternMap[] = new int[100];
65 
66   /**
67    * Ignore this, it is going away.
68    * The number of elements that m_patternMap maps;
69    */
70   private int m_patternMapSize;
71 
72   /**
73    * Create a Lexer object.
74    *
75    * @param compiler The owning compiler for this lexer.
76    * @param resolver The prefix resolver for mapping qualified name prefixes
77    *                 to namespace URIs.
78    * @param xpathProcessor The parser that is processing strings to opcodes.
79    */
Lexer(Compiler compiler, PrefixResolver resolver, XPathParser xpathProcessor)80   Lexer(Compiler compiler, PrefixResolver resolver,
81         XPathParser xpathProcessor)
82   {
83 
84     m_compiler = compiler;
85     m_namespaceContext = resolver;
86     m_processor = xpathProcessor;
87   }
88 
89   /**
90    * Walk through the expression and build a token queue, and a map of the top-level
91    * elements.
92    * @param pat XSLT Expression.
93    *
94    * @throws javax.xml.transform.TransformerException
95    */
tokenize(String pat)96   void tokenize(String pat) throws javax.xml.transform.TransformerException
97   {
98     tokenize(pat, null);
99   }
100 
101   /**
102    * Walk through the expression and build a token queue, and a map of the top-level
103    * elements.
104    * @param pat XSLT Expression.
105    * @param targetStrings Vector to hold Strings, may be null.
106    *
107    * @throws javax.xml.transform.TransformerException
108    */
tokenize(String pat, Vector targetStrings)109   void tokenize(String pat, Vector targetStrings)
110           throws javax.xml.transform.TransformerException
111   {
112 
113     m_compiler.m_currentPattern = pat;
114     m_patternMapSize = 0;
115 
116     // This needs to grow too.  Use a conservative estimate that the OpMapVector
117     // needs about five time the length of the input path expression - to a
118     // maximum of MAXTOKENQUEUESIZE*5.  If the OpMapVector needs to grow, grow
119     // it freely (second argument to constructor).
120     int initTokQueueSize = ((pat.length() < OpMap.MAXTOKENQUEUESIZE)
121                                  ? pat.length() :  OpMap.MAXTOKENQUEUESIZE) * 5;
122     m_compiler.m_opMap = new OpMapVector(initTokQueueSize,
123                                          OpMap.BLOCKTOKENQUEUESIZE * 5,
124                                          OpMap.MAPINDEX_LENGTH);
125 
126     int nChars = pat.length();
127     int startSubstring = -1;
128     int posOfNSSep = -1;
129     boolean isStartOfPat = true;
130     boolean isAttrName = false;
131     boolean isNum = false;
132 
133     // Nesting of '[' so we can know if the given element should be
134     // counted inside the m_patternMap.
135     int nesting = 0;
136 
137     // char[] chars = pat.toCharArray();
138     for (int i = 0; i < nChars; i++)
139     {
140       char c = pat.charAt(i);
141 
142       switch (c)
143       {
144       case '\"' :
145       {
146         if (startSubstring != -1)
147         {
148           isNum = false;
149           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
150           isAttrName = false;
151 
152           if (-1 != posOfNSSep)
153           {
154             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
155           }
156           else
157           {
158             addToTokenQueue(pat.substring(startSubstring, i));
159           }
160         }
161 
162         startSubstring = i;
163 
164         for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++);
165 
166         if (c == '\"' && i < nChars)
167         {
168           addToTokenQueue(pat.substring(startSubstring, i + 1));
169 
170           startSubstring = -1;
171         }
172         else
173         {
174           m_processor.error(XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE,
175                             null);  //"misquoted literal... expected double quote!");
176         }
177       }
178       break;
179       case '\'' :
180         if (startSubstring != -1)
181         {
182           isNum = false;
183           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
184           isAttrName = false;
185 
186           if (-1 != posOfNSSep)
187           {
188             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
189           }
190           else
191           {
192             addToTokenQueue(pat.substring(startSubstring, i));
193           }
194         }
195 
196         startSubstring = i;
197 
198         for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++);
199 
200         if (c == '\'' && i < nChars)
201         {
202           addToTokenQueue(pat.substring(startSubstring, i + 1));
203 
204           startSubstring = -1;
205         }
206         else
207         {
208           m_processor.error(XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE,
209                             null);  //"misquoted literal... expected single quote!");
210         }
211         break;
212       case 0x0A :
213       case 0x0D :
214       case ' ' :
215       case '\t' :
216         if (startSubstring != -1)
217         {
218           isNum = false;
219           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
220           isAttrName = false;
221 
222           if (-1 != posOfNSSep)
223           {
224             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
225           }
226           else
227           {
228             addToTokenQueue(pat.substring(startSubstring, i));
229           }
230 
231           startSubstring = -1;
232         }
233         break;
234       case '@' :
235         isAttrName = true;
236 
237       // fall-through on purpose
238       case '-' :
239         if ('-' == c)
240         {
241           if (!(isNum || (startSubstring == -1)))
242           {
243             break;
244           }
245 
246           isNum = false;
247         }
248 
249       // fall-through on purpose
250       case '(' :
251       case '[' :
252       case ')' :
253       case ']' :
254       case '|' :
255       case '/' :
256       case '*' :
257       case '+' :
258       case '=' :
259       case ',' :
260       case '\\' :  // Unused at the moment
261       case '^' :  // Unused at the moment
262       case '!' :  // Unused at the moment
263       case '$' :
264       case '<' :
265       case '>' :
266         if (startSubstring != -1)
267         {
268           isNum = false;
269           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
270           isAttrName = false;
271 
272           if (-1 != posOfNSSep)
273           {
274             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
275           }
276           else
277           {
278             addToTokenQueue(pat.substring(startSubstring, i));
279           }
280 
281           startSubstring = -1;
282         }
283         else if (('/' == c) && isStartOfPat)
284         {
285           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
286         }
287         else if ('*' == c)
288         {
289           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
290           isAttrName = false;
291         }
292 
293         if (0 == nesting)
294         {
295           if ('|' == c)
296           {
297             if (null != targetStrings)
298             {
299               recordTokenString(targetStrings);
300             }
301 
302             isStartOfPat = true;
303           }
304         }
305 
306         if ((')' == c) || (']' == c))
307         {
308           nesting--;
309         }
310         else if (('(' == c) || ('[' == c))
311         {
312           nesting++;
313         }
314 
315         addToTokenQueue(pat.substring(i, i + 1));
316         break;
317       case ':' :
318         if (i>0)
319         {
320           if (posOfNSSep == (i - 1))
321           {
322             if (startSubstring != -1)
323             {
324               if (startSubstring < (i - 1))
325                 addToTokenQueue(pat.substring(startSubstring, i - 1));
326             }
327 
328             isNum = false;
329             isAttrName = false;
330             startSubstring = -1;
331             posOfNSSep = -1;
332 
333             addToTokenQueue(pat.substring(i - 1, i + 1));
334 
335             break;
336           }
337           else
338           {
339             posOfNSSep = i;
340           }
341         }
342 
343       // fall through on purpose
344       default :
345         if (-1 == startSubstring)
346         {
347           startSubstring = i;
348           isNum = Character.isDigit(c);
349         }
350         else if (isNum)
351         {
352           isNum = Character.isDigit(c);
353         }
354       }
355     }
356 
357     if (startSubstring != -1)
358     {
359       isNum = false;
360       isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
361 
362       if ((-1 != posOfNSSep) ||
363          ((m_namespaceContext != null) && (m_namespaceContext.handlesNullPrefixes())))
364       {
365         posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, nChars);
366       }
367       else
368       {
369         addToTokenQueue(pat.substring(startSubstring, nChars));
370       }
371     }
372 
373     if (0 == m_compiler.getTokenQueueSize())
374     {
375       m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION, null);  //"Empty expression!");
376     }
377     else if (null != targetStrings)
378     {
379       recordTokenString(targetStrings);
380     }
381 
382     m_processor.m_queueMark = 0;
383   }
384 
385   /**
386    * Record the current position on the token queue as long as
387    * this is a top-level element.  Must be called before the
388    * next token is added to the m_tokenQueue.
389    *
390    * @param nesting The nesting count for the pattern element.
391    * @param isStart true if this is the start of a pattern.
392    * @param isAttrName true if we have determined that this is an attribute name.
393    *
394    * @return true if this is the start of a pattern.
395    */
mapPatternElemPos(int nesting, boolean isStart, boolean isAttrName)396   private boolean mapPatternElemPos(int nesting, boolean isStart,
397                                     boolean isAttrName)
398   {
399 
400     if (0 == nesting)
401     {
402       if(m_patternMapSize >= m_patternMap.length)
403       {
404         int patternMap[] = m_patternMap;
405         int len = m_patternMap.length;
406         m_patternMap = new int[m_patternMapSize + 100];
407         System.arraycopy(patternMap, 0, m_patternMap, 0, len);
408       }
409       if (!isStart)
410       {
411         m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA;
412       }
413       m_patternMap[m_patternMapSize] =
414         (m_compiler.getTokenQueueSize() - (isAttrName ? 1 : 0)) + TARGETEXTRA;
415 
416       m_patternMapSize++;
417 
418       isStart = false;
419     }
420 
421     return isStart;
422   }
423 
424   /**
425    * Given a map pos, return the corresponding token queue pos.
426    *
427    * @param i The index in the m_patternMap.
428    *
429    * @return the token queue position.
430    */
getTokenQueuePosFromMap(int i)431   private int getTokenQueuePosFromMap(int i)
432   {
433 
434     int pos = m_patternMap[i];
435 
436     return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos;
437   }
438 
439   /**
440    * Reset token queue mark and m_token to a
441    * given position.
442    * @param mark The new position.
443    */
resetTokenMark(int mark)444   private final void resetTokenMark(int mark)
445   {
446 
447     int qsz = m_compiler.getTokenQueueSize();
448 
449     m_processor.m_queueMark = (mark > 0)
450                               ? ((mark <= qsz) ? mark - 1 : mark) : 0;
451 
452     if (m_processor.m_queueMark < qsz)
453     {
454       m_processor.m_token =
455         (String) m_compiler.getTokenQueue().elementAt(m_processor.m_queueMark++);
456       m_processor.m_tokenChar = m_processor.m_token.charAt(0);
457     }
458     else
459     {
460       m_processor.m_token = null;
461       m_processor.m_tokenChar = 0;
462     }
463   }
464 
465   /**
466    * Given a string, return the corresponding keyword token.
467    *
468    * @param key The keyword.
469    *
470    * @return An opcode value.
471    */
getKeywordToken(String key)472   final int getKeywordToken(String key)
473   {
474 
475     int tok;
476 
477     try
478     {
479       Integer itok = (Integer) Keywords.getKeyWord(key);
480 
481       tok = (null != itok) ? itok.intValue() : 0;
482     }
483     catch (NullPointerException npe)
484     {
485       tok = 0;
486     }
487     catch (ClassCastException cce)
488     {
489       tok = 0;
490     }
491 
492     return tok;
493   }
494 
495   /**
496    * Record the current token in the passed vector.
497    *
498    * @param targetStrings Vector of string.
499    */
recordTokenString(Vector targetStrings)500   private void recordTokenString(Vector targetStrings)
501   {
502 
503     int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1);
504 
505     resetTokenMark(tokPos + 1);
506 
507     if (m_processor.lookahead('(', 1))
508     {
509       int tok = getKeywordToken(m_processor.m_token);
510 
511       switch (tok)
512       {
513       case OpCodes.NODETYPE_COMMENT :
514         targetStrings.addElement(PsuedoNames.PSEUDONAME_COMMENT);
515         break;
516       case OpCodes.NODETYPE_TEXT :
517         targetStrings.addElement(PsuedoNames.PSEUDONAME_TEXT);
518         break;
519       case OpCodes.NODETYPE_NODE :
520         targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
521         break;
522       case OpCodes.NODETYPE_ROOT :
523         targetStrings.addElement(PsuedoNames.PSEUDONAME_ROOT);
524         break;
525       case OpCodes.NODETYPE_ANYELEMENT :
526         targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
527         break;
528       case OpCodes.NODETYPE_PI :
529         targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
530         break;
531       default :
532         targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
533       }
534     }
535     else
536     {
537       if (m_processor.tokenIs('@'))
538       {
539         tokPos++;
540 
541         resetTokenMark(tokPos + 1);
542       }
543 
544       if (m_processor.lookahead(':', 1))
545       {
546         tokPos += 2;
547       }
548 
549       targetStrings.addElement(m_compiler.getTokenQueue().elementAt(tokPos));
550     }
551   }
552 
553   /**
554    * Add a token to the token queue.
555    *
556    *
557    * @param s The token.
558    */
addToTokenQueue(String s)559   private final void addToTokenQueue(String s)
560   {
561     m_compiler.getTokenQueue().addElement(s);
562   }
563 
564   /**
565    * When a seperator token is found, see if there's a element name or
566    * the like to map.
567    *
568    * @param pat The XPath name string.
569    * @param startSubstring The start of the name string.
570    * @param posOfNSSep The position of the namespace seperator (':').
571    * @param posOfScan The end of the name index.
572    *
573    * @throws javax.xml.transform.TransformerException
574    *
575    * @return -1 always.
576    */
mapNSTokens(String pat, int startSubstring, int posOfNSSep, int posOfScan)577   private int mapNSTokens(String pat, int startSubstring, int posOfNSSep,
578                           int posOfScan)
579            throws javax.xml.transform.TransformerException
580  {
581 
582     String prefix = "";
583 
584     if ((startSubstring >= 0) && (posOfNSSep >= 0))
585     {
586        prefix = pat.substring(startSubstring, posOfNSSep);
587     }
588     String uName;
589 
590     if ((null != m_namespaceContext) &&!prefix.equals("*")
591             &&!prefix.equals("xmlns"))
592     {
593       try
594       {
595         if (prefix.length() > 0)
596           uName = ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
597             prefix);
598         else
599         {
600 
601           // Assume last was wildcard. This is not legal according
602           // to the draft. Set the below to true to make namespace
603           // wildcards work.
604           if (false)
605           {
606             addToTokenQueue(":");
607 
608             String s = pat.substring(posOfNSSep + 1, posOfScan);
609 
610             if (s.length() > 0)
611               addToTokenQueue(s);
612 
613             return -1;
614           }
615           else
616           {
617             uName =
618               ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
619                 prefix);
620           }
621         }
622       }
623       catch (ClassCastException cce)
624       {
625         uName = m_namespaceContext.getNamespaceForPrefix(prefix);
626       }
627     }
628     else
629     {
630       uName = prefix;
631     }
632 
633     if ((null != uName) && (uName.length() > 0))
634     {
635       addToTokenQueue(uName);
636       addToTokenQueue(":");
637 
638       String s = pat.substring(posOfNSSep + 1, posOfScan);
639 
640       if (s.length() > 0)
641         addToTokenQueue(s);
642     }
643     else
644     {
645         // To older XPath code it doesn't matter if
646         // error() is called or errorForDOM3().
647 		m_processor.errorForDOM3(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
648 						 new String[] {prefix});  //"Prefix must resolve to a namespace: {0}";
649 
650 /** old code commented out 17-Sep-2004
651 // error("Could not locate namespace for prefix: "+prefix);
652 //		  m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
653 //					 new String[] {prefix});  //"Prefix must resolve to a namespace: {0}";
654 */
655 
656       /***  Old code commented out 10-Jan-2001
657       addToTokenQueue(prefix);
658       addToTokenQueue(":");
659 
660       String s = pat.substring(posOfNSSep + 1, posOfScan);
661 
662       if (s.length() > 0)
663         addToTokenQueue(s);
664       ***/
665     }
666 
667     return -1;
668   }
669 }
670