1 // =================================================================================================
2 // ADOBE SYSTEMS INCORPORATED
3 // Copyright 2006 Adobe Systems Incorporated
4 // All Rights Reserved
5 //
6 // NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
7 // of the Adobe license agreement accompanying it.
8 // =================================================================================================
9 
10 package com.adobe.xmp.impl.xpath;
11 
12 import com.adobe.xmp.XMPError;
13 import com.adobe.xmp.XMPException;
14 import com.adobe.xmp.XMPMetaFactory;
15 import com.adobe.xmp.impl.Utils;
16 import com.adobe.xmp.properties.XMPAliasInfo;
17 
18 
19 /**
20  * Parser for XMP XPaths.
21  *
22  * @since   01.03.2006
23  */
24 public final class XMPPathParser
25 {
26 	/**
27 	 * Private constructor
28 	 */
XMPPathParser()29 	private XMPPathParser()
30 	{
31 		// empty
32 	}
33 
34 
35 	/**
36 	 * Split an XMPPath expression apart at the conceptual steps, adding the
37 	 * root namespace prefix to the first property component. The schema URI is
38 	 * put in the first (0th) slot in the expanded XMPPath. Check if the top
39 	 * level component is an alias, but don't resolve it.
40 	 * <p>
41 	 * In the most verbose case steps are separated by '/', and each step can be
42 	 * of these forms:
43 	 * <dl>
44 	 * <dt>prefix:name
45 	 * <dd> A top level property or struct field.
46 	 * <dt>[index]
47 	 * <dd> An element of an array.
48 	 * <dt>[last()]
49 	 * <dd> The last element of an array.
50 	 * <dt>[fieldName=&quot;value&quot;]
51 	 * <dd> An element in an array of structs, chosen by a field value.
52 	 * <dt>[@xml:lang=&quot;value&quot;]
53 	 * <dd> An element in an alt-text array, chosen by the xml:lang qualifier.
54 	 * <dt>[?qualName=&quot;value&quot;]
55 	 * <dd> An element in an array, chosen by a qualifier value.
56 	 * <dt>@xml:lang
57 	 * <dd> An xml:lang qualifier.
58 	 * <dt>?qualName
59 	 * <dd> A general qualifier.
60 	 * </dl>
61 	 * <p>
62 	 * The logic is complicated though by shorthand for arrays, the separating
63 	 * '/' and leading '*' are optional. These are all equivalent: array/*[2]
64 	 * array/[2] array*[2] array[2] All of these are broken into the 2 steps
65 	 * "array" and "[2]".
66 	 * <p>
67 	 * The value portion in the array selector forms is a string quoted by '''
68 	 * or '"'. The value may contain any character including a doubled quoting
69 	 * character. The value may be empty.
70 	 * <p>
71 	 * The syntax isn't checked, but an XML name begins with a letter or '_',
72 	 * and contains letters, digits, '.', '-', '_', and a bunch of special
73 	 * non-ASCII Unicode characters. An XML qualified name is a pair of names
74 	 * separated by a colon.
75 	 * @param schemaNS
76 	 *            schema namespace
77 	 * @param path
78 	 *            property name
79 	 * @return Returns the expandet XMPPath.
80 	 * @throws XMPException
81 	 *             Thrown if the format is not correct somehow.
82 	 *
83 	 */
expandXPath(String schemaNS, String path)84 	public static XMPPath expandXPath(String schemaNS, String path) throws XMPException
85 	{
86 		if (schemaNS == null  ||  path == null)
87 		{
88 			throw new XMPException("Parameter must not be null", XMPError.BADPARAM);
89 		}
90 
91 		XMPPath expandedXPath = new XMPPath();
92 		PathPosition pos = new PathPosition();
93 		pos.path = path;
94 
95 		// Pull out the first component and do some special processing on it: add the schema
96 		// namespace prefix and and see if it is an alias. The start must be a "qualName".
97 		parseRootNode(schemaNS, pos, expandedXPath);
98 
99 		// Now continue to process the rest of the XMPPath string.
100 		while (pos.stepEnd < path.length())
101 		{
102 			pos.stepBegin = pos.stepEnd;
103 
104 			skipPathDelimiter(path, pos);
105 
106 			pos.stepEnd = pos.stepBegin;
107 
108 
109 			XMPPathSegment segment;
110 			if (path.charAt(pos.stepBegin) != '[')
111 			{
112 				// A struct field or qualifier.
113 				segment = parseStructSegment(pos);
114 			}
115 			else
116 			{
117 				// One of the array forms.
118 				segment = parseIndexSegment(pos);
119 			}
120 
121 
122 			if (segment.getKind() == XMPPath.STRUCT_FIELD_STEP)
123 			{
124 				if (segment.getName().charAt(0) == '@')
125 				{
126 					segment.setName("?" + segment.getName().substring(1));
127 					if (!"?xml:lang".equals(segment.getName()))
128 					{
129 						throw new XMPException("Only xml:lang allowed with '@'",
130 								XMPError.BADXPATH);
131 					}
132 				}
133 				if (segment.getName().charAt(0) == '?')
134 				{
135 					pos.nameStart++;
136 					segment.setKind(XMPPath.QUALIFIER_STEP);
137 				}
138 
139 				verifyQualName(pos.path.substring(pos.nameStart, pos.nameEnd));
140 			}
141 			else if (segment.getKind() == XMPPath.FIELD_SELECTOR_STEP)
142 			{
143 				if (segment.getName().charAt(1) == '@')
144 				{
145 					segment.setName("[?" + segment.getName().substring(2));
146 					if (!segment.getName().startsWith("[?xml:lang="))
147 					{
148 						throw new XMPException("Only xml:lang allowed with '@'",
149 								XMPError.BADXPATH);
150 					}
151 				}
152 
153 				if (segment.getName().charAt(1) == '?')
154 				{
155 					pos.nameStart++;
156 					segment.setKind(XMPPath.QUAL_SELECTOR_STEP);
157 					verifyQualName(pos.path.substring(pos.nameStart, pos.nameEnd));
158 				}
159 			}
160 
161 			expandedXPath.add(segment);
162 		}
163 		return expandedXPath;
164 	}
165 
166 
167 	/**
168 	 * @param path
169 	 * @param pos
170 	 * @throws XMPException
171 	 */
skipPathDelimiter(String path, PathPosition pos)172 	private static void skipPathDelimiter(String path, PathPosition pos) throws XMPException
173 	{
174 		if (path.charAt(pos.stepBegin) == '/')
175 		{
176 			// skip slash
177 
178 			pos.stepBegin++;
179 
180 			// added for Java
181 			if (pos.stepBegin >= path.length())
182 			{
183 				throw new XMPException("Empty XMPPath segment", XMPError.BADXPATH);
184 			}
185 		}
186 
187 		if (path.charAt(pos.stepBegin) == '*')
188 		{
189 			// skip asterisk
190 
191 			pos.stepBegin++;
192 			if (pos.stepBegin >= path.length() || path.charAt(pos.stepBegin) != '[')
193 			{
194 				throw new XMPException("Missing '[' after '*'", XMPError.BADXPATH);
195 			}
196 		}
197 	}
198 
199 
200 	/**
201 	 * Parses a struct segment
202 	 * @param pos the current position in the path
203 	 * @return Retusn the segment or an errror
204 	 * @throws XMPException If the sement is empty
205 	 */
parseStructSegment(PathPosition pos)206 	private static XMPPathSegment parseStructSegment(PathPosition pos) throws XMPException
207 	{
208 		pos.nameStart = pos.stepBegin;
209 		while (pos.stepEnd < pos.path.length() && "/[*".indexOf(pos.path.charAt(pos.stepEnd)) < 0)
210 		{
211 			pos.stepEnd++;
212 		}
213 		pos.nameEnd = pos.stepEnd;
214 
215 		if (pos.stepEnd == pos.stepBegin)
216 		{
217 			throw new XMPException("Empty XMPPath segment", XMPError.BADXPATH);
218 		}
219 
220 		// ! Touch up later, also changing '@' to '?'.
221 		XMPPathSegment segment = new XMPPathSegment(pos.path.substring(pos.stepBegin, pos.stepEnd),
222 				XMPPath.STRUCT_FIELD_STEP);
223 		return segment;
224 	}
225 
226 
227 	/**
228 	 * Parses an array index segment.
229 	 *
230 	 * @param pos the xmp path
231 	 * @return Returns the segment or an error
232 	 * @throws XMPException thrown on xmp path errors
233 	 *
234 	 */
parseIndexSegment(PathPosition pos)235 	private static XMPPathSegment parseIndexSegment(PathPosition pos) throws XMPException
236 	{
237 		XMPPathSegment segment;
238 		pos.stepEnd++; // Look at the character after the leading '['.
239 
240 		if ('0' <= pos.path.charAt(pos.stepEnd) && pos.path.charAt(pos.stepEnd) <= '9')
241 		{
242 			// A numeric (decimal integer) array index.
243 			while (pos.stepEnd < pos.path.length() && '0' <= pos.path.charAt(pos.stepEnd)
244 					&& pos.path.charAt(pos.stepEnd) <= '9')
245 			{
246 				pos.stepEnd++;
247 			}
248 
249 			segment = new XMPPathSegment(null, XMPPath.ARRAY_INDEX_STEP);
250 		}
251 		else
252 		{
253 			// Could be "[last()]" or one of the selector forms. Find the ']' or '='.
254 
255 			while (pos.stepEnd < pos.path.length() && pos.path.charAt(pos.stepEnd) != ']'
256 					&& pos.path.charAt(pos.stepEnd) != '=')
257 			{
258 				pos.stepEnd++;
259 			}
260 
261 			if (pos.stepEnd >= pos.path.length())
262 			{
263 				throw new XMPException("Missing ']' or '=' for array index", XMPError.BADXPATH);
264 			}
265 
266 			if (pos.path.charAt(pos.stepEnd) == ']')
267 			{
268 				if (!"[last()".equals(pos.path.substring(pos.stepBegin, pos.stepEnd)))
269 				{
270 					throw new XMPException(
271 						"Invalid non-numeric array index", XMPError.BADXPATH);
272 				}
273 				segment = new XMPPathSegment(null, XMPPath.ARRAY_LAST_STEP);
274 			}
275 			else
276 			{
277 				pos.nameStart = pos.stepBegin + 1;
278 				pos.nameEnd = pos.stepEnd;
279 				pos.stepEnd++; // Absorb the '=', remember the quote.
280 				char quote = pos.path.charAt(pos.stepEnd);
281 				if (quote != '\'' && quote != '"')
282 				{
283 					throw new XMPException(
284 						"Invalid quote in array selector", XMPError.BADXPATH);
285 				}
286 
287 				pos.stepEnd++; // Absorb the leading quote.
288 				while (pos.stepEnd < pos.path.length())
289 				{
290 					if (pos.path.charAt(pos.stepEnd) == quote)
291 					{
292 						// check for escaped quote
293 						if (pos.stepEnd + 1 >= pos.path.length()
294 								|| pos.path.charAt(pos.stepEnd + 1) != quote)
295 						{
296 							break;
297 						}
298 						pos.stepEnd++;
299 					}
300 					pos.stepEnd++;
301 				}
302 
303 				if (pos.stepEnd >= pos.path.length())
304 				{
305 					throw new XMPException("No terminating quote for array selector",
306 							XMPError.BADXPATH);
307 				}
308 				pos.stepEnd++; // Absorb the trailing quote.
309 
310 				// ! Touch up later, also changing '@' to '?'.
311 				segment = new XMPPathSegment(null, XMPPath.FIELD_SELECTOR_STEP);
312 			}
313 		}
314 
315 
316 		if (pos.stepEnd >= pos.path.length() || pos.path.charAt(pos.stepEnd) != ']')
317 		{
318 			throw new XMPException("Missing ']' for array index", XMPError.BADXPATH);
319 		}
320 		pos.stepEnd++;
321 		segment.setName(pos.path.substring(pos.stepBegin, pos.stepEnd));
322 
323 		return segment;
324 	}
325 
326 
327 	/**
328 	 * Parses the root node of an XMP Path, checks if namespace and prefix fit together
329 	 * and resolve the property to the base property if it is an alias.
330 	 * @param schemaNS the root namespace
331 	 * @param pos the parsing position helper
332 	 * @param expandedXPath  the path to contribute to
333 	 * @throws XMPException If the path is not valid.
334 	 */
parseRootNode(String schemaNS, PathPosition pos, XMPPath expandedXPath)335 	private static void parseRootNode(String schemaNS, PathPosition pos, XMPPath expandedXPath)
336 			throws XMPException
337 	{
338 		while (pos.stepEnd < pos.path.length() && "/[*".indexOf(pos.path.charAt(pos.stepEnd)) < 0)
339 		{
340 			pos.stepEnd++;
341 		}
342 
343 		if (pos.stepEnd == pos.stepBegin)
344 		{
345 			throw new XMPException("Empty initial XMPPath step", XMPError.BADXPATH);
346 		}
347 
348 		String rootProp = verifyXPathRoot(schemaNS, pos.path.substring(pos.stepBegin, pos.stepEnd));
349 		XMPAliasInfo aliasInfo = XMPMetaFactory.getSchemaRegistry().findAlias(rootProp);
350 		if (aliasInfo == null)
351 		{
352 			// add schema xpath step
353 			expandedXPath.add(new XMPPathSegment(schemaNS, XMPPath.SCHEMA_NODE));
354 			XMPPathSegment rootStep = new XMPPathSegment(rootProp, XMPPath.STRUCT_FIELD_STEP);
355 			expandedXPath.add(rootStep);
356 		}
357 		else
358 		{
359 			// add schema xpath step and base step of alias
360 			expandedXPath.add(new XMPPathSegment(aliasInfo.getNamespace(), XMPPath.SCHEMA_NODE));
361 			XMPPathSegment rootStep = new XMPPathSegment(verifyXPathRoot(aliasInfo.getNamespace(),
362 					aliasInfo.getPropName()),
363 					XMPPath.STRUCT_FIELD_STEP);
364 			rootStep.setAlias(true);
365 			rootStep.setAliasForm(aliasInfo.getAliasForm().getOptions());
366 			expandedXPath.add(rootStep);
367 
368 			if (aliasInfo.getAliasForm().isArrayAltText())
369 			{
370 				XMPPathSegment qualSelectorStep = new XMPPathSegment("[?xml:lang='x-default']",
371 						XMPPath.QUAL_SELECTOR_STEP);
372 				qualSelectorStep.setAlias(true);
373 				qualSelectorStep.setAliasForm(aliasInfo.getAliasForm().getOptions());
374 				expandedXPath.add(qualSelectorStep);
375 			}
376 			else if (aliasInfo.getAliasForm().isArray())
377 			{
378 				XMPPathSegment indexStep = new XMPPathSegment("[1]",
379 					XMPPath.ARRAY_INDEX_STEP);
380 				indexStep.setAlias(true);
381 				indexStep.setAliasForm(aliasInfo.getAliasForm().getOptions());
382 				expandedXPath.add(indexStep);
383 			}
384 		}
385 	}
386 
387 
388 	/**
389 	 * Verifies whether the qualifier name is not XML conformant or the
390 	 * namespace prefix has not been registered.
391 	 *
392 	 * @param qualName
393 	 *            a qualifier name
394 	 * @throws XMPException
395 	 *             If the name is not conformant
396 	 */
verifyQualName(String qualName)397 	private static void verifyQualName(String qualName) throws XMPException
398 	{
399 		int colonPos = qualName.indexOf(':');
400 		if (colonPos > 0)
401 		{
402 			String prefix = qualName.substring(0, colonPos);
403 			if (Utils.isXMLNameNS(prefix))
404 			{
405 				String regURI = XMPMetaFactory.getSchemaRegistry().getNamespaceURI(
406 						prefix);
407 				if (regURI != null)
408 				{
409 					return;
410 				}
411 
412 				throw new XMPException("Unknown namespace prefix for qualified name",
413 						XMPError.BADXPATH);
414 			}
415 		}
416 
417 		throw new XMPException("Ill-formed qualified name", XMPError.BADXPATH);
418 	}
419 
420 
421 	/**
422 	 * Verify if an XML name is conformant.
423 	 *
424 	 * @param name
425 	 *            an XML name
426 	 * @throws XMPException
427 	 *             When the name is not XML conformant
428 	 */
verifySimpleXMLName(String name)429 	private static void verifySimpleXMLName(String name) throws XMPException
430 	{
431 		if (!Utils.isXMLName(name))
432 		{
433 			throw new XMPException("Bad XML name", XMPError.BADXPATH);
434 		}
435 	}
436 
437 
438 	/**
439 	 * Set up the first 2 components of the expanded XMPPath. Normalizes the various cases of using
440 	 * the full schema URI and/or a qualified root property name. Returns true for normal
441 	 * processing. If allowUnknownSchemaNS is true and the schema namespace is not registered, false
442 	 * is returned. If allowUnknownSchemaNS is false and the schema namespace is not registered, an
443 	 * exception is thrown
444 	 * <P>
445 	 * (Should someday check the full syntax:)
446 	 *
447 	 * @param schemaNS schema namespace
448 	 * @param rootProp the root xpath segment
449 	 * @return Returns root QName.
450 	 * @throws XMPException Thrown if the format is not correct somehow.
451 	 */
verifyXPathRoot(String schemaNS, String rootProp)452 	private static String verifyXPathRoot(String schemaNS, String rootProp)
453 		throws XMPException
454 	{
455 		// Do some basic checks on the URI and name. Try to lookup the URI. See if the name is
456 		// qualified.
457 
458 		if (schemaNS == null || schemaNS.length() == 0)
459 		{
460 			throw new XMPException(
461 				"Schema namespace URI is required", XMPError.BADSCHEMA);
462 		}
463 
464 		if ((rootProp.charAt(0) == '?') || (rootProp.charAt(0) == '@'))
465 		{
466 			throw new XMPException("Top level name must not be a qualifier", XMPError.BADXPATH);
467 		}
468 
469 		if (rootProp.indexOf('/') >= 0 || rootProp.indexOf('[') >= 0)
470 		{
471 			throw new XMPException("Top level name must be simple", XMPError.BADXPATH);
472 		}
473 
474 		String prefix = XMPMetaFactory.getSchemaRegistry().getNamespacePrefix(schemaNS);
475 		if (prefix == null)
476 		{
477 			throw new XMPException("Unregistered schema namespace URI", XMPError.BADSCHEMA);
478 		}
479 
480 		// Verify the various URI and prefix combinations. Initialize the
481 		// expanded XMPPath.
482 		int colonPos = rootProp.indexOf(':');
483 		if (colonPos < 0)
484 		{
485 			// The propName is unqualified, use the schemaURI and associated
486 			// prefix.
487 			verifySimpleXMLName(rootProp); // Verify the part before any colon
488 			return prefix + rootProp;
489 		}
490 		else
491 		{
492 			// The propName is qualified. Make sure the prefix is legit. Use the associated URI and
493 			// qualified name.
494 
495 			// Verify the part before any colon
496 			verifySimpleXMLName(rootProp.substring(0, colonPos));
497 			verifySimpleXMLName(rootProp.substring(colonPos));
498 
499 			prefix = rootProp.substring(0, colonPos + 1);
500 
501 			String regPrefix = XMPMetaFactory.getSchemaRegistry().getNamespacePrefix(schemaNS);
502 			if (regPrefix == null)
503 			{
504 				throw new XMPException("Unknown schema namespace prefix", XMPError.BADSCHEMA);
505 			}
506 			if (!prefix.equals(regPrefix))
507 			{
508 				throw new XMPException("Schema namespace URI and prefix mismatch",
509 						XMPError.BADSCHEMA);
510 			}
511 
512 			return rootProp;
513 		}
514 	}
515 }
516 
517 
518 
519 
520 
521 /**
522  * This objects contains all needed char positions to parse.
523  */
524 class PathPosition
525 {
526 	/** the complete path */
527 	public String path = null;
528 	/** the start of a segment name */
529 	int nameStart = 0;
530 	/** the end of a segment name */
531 	int nameEnd = 0;
532 	/** the begin of a step */
533 	int stepBegin = 0;
534 	/** the end of a step */
535 	int stepEnd = 0;
536 }
537 
538