1 // =================================================================================================
2 // ADOBE SYSTEMS INCORPORATED
3 // Copyright 2006 Adobe Systems Incorporated
4 // All Rights Reserved
5 //
6 // NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
7 // of the Adobe license agreement accompanying it.
8 // =================================================================================================
9 
10 package com.adobe.xmp.impl;
11 
12 
13 import com.adobe.xmp.XMPConst;
14 
15 
16 /**
17  * Utility functions for the XMPToolkit implementation.
18  *
19  * @since 06.06.2006
20  */
21 public class Utils implements XMPConst
22 {
23 	/** segments of a UUID */
24 	public static final int UUID_SEGMENT_COUNT = 4;
25 	/** length of a UUID */
26 	public static final int UUID_LENGTH = 32 + UUID_SEGMENT_COUNT;
27 	/** table of XML name start chars (<= 0xFF) */
28 	private  static boolean[] xmlNameStartChars;
29 	/** table of XML name chars (<= 0xFF) */
30 	private static boolean[] xmlNameChars;
31 	/** init char tables */
32 	static
33 	{
initCharTables()34 		initCharTables();
35 	}
36 
37 
38 	/**
39 	 * Private constructor
40 	 */
Utils()41 	private Utils()
42 	{
43 		// EMPTY
44 	}
45 
46 
47 	/**
48 	 * Normalize an xml:lang value so that comparisons are effectively case
49 	 * insensitive as required by RFC 3066 (which superceeds RFC 1766). The
50 	 * normalization rules:
51 	 * <ul>
52 	 * <li> The primary subtag is lower case, the suggested practice of ISO 639.
53 	 * <li> All 2 letter secondary subtags are upper case, the suggested
54 	 * practice of ISO 3166.
55 	 * <li> All other subtags are lower case.
56 	 * </ul>
57 	 *
58 	 * @param value
59 	 *            raw value
60 	 * @return Returns the normalized value.
61 	 */
normalizeLangValue(String value)62 	public static String normalizeLangValue(String value)
63 	{
64 		// don't normalize x-default
65 		if (XMPConst.X_DEFAULT.equals(value))
66 		{
67 			return value;
68 		}
69 
70 		int subTag = 1;
71 		StringBuffer buffer = new StringBuffer();
72 
73 		for (int i = 0; i < value.length(); i++)
74 		{
75 			switch (value.charAt(i))
76 			{
77 			case '-':
78 			case '_':
79 				// move to next subtag and convert underscore to hyphen
80 				buffer.append('-');
81 				subTag++;
82 				break;
83 			case ' ':
84 				// remove spaces
85 				break;
86 			default:
87 				// convert second subtag to uppercase, all other to lowercase
88 				if (subTag != 2)
89 				{
90 					buffer.append(Character.toLowerCase(value.charAt(i)));
91 				}
92 				else
93 				{
94 					buffer.append(Character.toUpperCase(value.charAt(i)));
95 				}
96 			}
97 
98 		}
99 		return buffer.toString();
100 	}
101 
102 
103 	/**
104 	 * Split the name and value parts for field and qualifier selectors:
105 	 * <ul>
106 	 * <li>[qualName="value"] - An element in an array of structs, chosen by a
107 	 * field value.
108 	 * <li>[?qualName="value"] - An element in an array, chosen by a qualifier
109 	 * value.
110 	 * </ul>
111 	 * The value portion is a string quoted by ''' or '"'. The value may contain
112 	 * any character including a doubled quoting character. The value may be
113 	 * empty. <em>Note:</em> It is assumed that the expression is formal
114 	 * correct
115 	 *
116 	 * @param selector
117 	 *            the selector
118 	 * @return Returns an array where the first entry contains the name and the
119 	 *         second the value.
120 	 */
splitNameAndValue(String selector)121 	static String[] splitNameAndValue(String selector)
122 	{
123 		// get the name
124 		int eq = selector.indexOf('=');
125 		int pos = 1;
126 		if (selector.charAt(pos) == '?')
127 		{
128 			pos++;
129 		}
130 		String name = selector.substring(pos, eq);
131 
132 		// get the value
133 		pos = eq + 1;
134 		char quote = selector.charAt(pos);
135 		pos++;
136 		int end = selector.length() - 2; // quote and ]
137 		StringBuffer value = new StringBuffer(end - eq);
138 		while (pos < end)
139 		{
140 			value.append(selector.charAt(pos));
141 			pos++;
142 			if (selector.charAt(pos) == quote)
143 			{
144 				// skip one quote in value
145 				pos++;
146 			}
147 		}
148 		return new String[] { name, value.toString() };
149 	}
150 
151 
152 	/**
153 	 *
154 	 * @param schema
155 	 *            a schema namespace
156 	 * @param prop
157 	 *            an XMP Property
158 	 * @return Returns true if the property is defined as &quot;Internal
159 	 *         Property&quot;, see XMP Specification.
160 	 */
isInternalProperty(String schema, String prop)161 	static boolean isInternalProperty(String schema, String prop)
162 	{
163 		boolean isInternal = false;
164 
165 		if (NS_DC.equals(schema))
166 		{
167 			if ("dc:format".equals(prop) || "dc:language".equals(prop))
168 			{
169 				isInternal = true;
170 			}
171 		}
172 		else if (NS_XMP.equals(schema))
173 		{
174 			if ("xmp:BaseURL".equals(prop) || "xmp:CreatorTool".equals(prop)
175 					|| "xmp:Format".equals(prop) || "xmp:Locale".equals(prop)
176 					|| "xmp:MetadataDate".equals(prop) || "xmp:ModifyDate".equals(prop))
177 			{
178 				isInternal = true;
179 			}
180 		}
181 		else if (NS_PDF.equals(schema))
182 		{
183 			if ("pdf:BaseURL".equals(prop) || "pdf:Creator".equals(prop)
184 					|| "pdf:ModDate".equals(prop) || "pdf:PDFVersion".equals(prop)
185 					|| "pdf:Producer".equals(prop))
186 			{
187 				isInternal = true;
188 			}
189 		}
190 		else if (NS_TIFF.equals(schema))
191 		{
192 			isInternal = true;
193 			if ("tiff:ImageDescription".equals(prop) || "tiff:Artist".equals(prop)
194 					|| "tiff:Copyright".equals(prop))
195 			{
196 				isInternal = false;
197 			}
198 		}
199 		else if (NS_EXIF.equals(schema))
200 		{
201 			isInternal = true;
202 			if ("exif:UserComment".equals(prop))
203 			{
204 				isInternal = false;
205 			}
206 		}
207 		else if (NS_EXIF_AUX.equals(schema))
208 		{
209 			isInternal = true;
210 		}
211 		else if (NS_PHOTOSHOP.equals(schema))
212 		{
213 			if ("photoshop:ICCProfile".equals(prop))
214 			{
215 				isInternal = true;
216 			}
217 		}
218 		else if (NS_CAMERARAW.equals(schema))
219 		{
220 			if ("crs:Version".equals(prop) || "crs:RawFileName".equals(prop)
221 					|| "crs:ToneCurveName".equals(prop))
222 			{
223 				isInternal = true;
224 			}
225 		}
226 		else if (NS_ADOBESTOCKPHOTO.equals(schema))
227 		{
228 			isInternal = true;
229 		}
230 		else if (NS_XMP_MM.equals(schema))
231 		{
232 			isInternal = true;
233 		}
234 		else if (TYPE_TEXT.equals(schema))
235 		{
236 			isInternal = true;
237 		}
238 		else if (TYPE_PAGEDFILE.equals(schema))
239 		{
240 			isInternal = true;
241 		}
242 		else if (TYPE_GRAPHICS.equals(schema))
243 		{
244 			isInternal = true;
245 		}
246 		else if (TYPE_IMAGE.equals(schema))
247 		{
248 			isInternal = true;
249 		}
250 		else if (TYPE_FONT.equals(schema))
251 		{
252 			isInternal = true;
253 		}
254 
255 		return isInternal;
256 	}
257 
258 
259 	/**
260 	 * Check some requirements for an UUID:
261 	 * <ul>
262 	 * <li>Length of the UUID is 32</li>
263 	 * <li>The Delimiter count is 4 and all the 4 delimiter are on their right
264 	 * position (8,13,18,23)</li>
265 	 * </ul>
266 	 *
267 	 *
268 	 * @param uuid uuid to test
269 	 * @return true - this is a well formed UUID, false - UUID has not the expected format
270 	 */
271 
checkUUIDFormat(String uuid)272 	static boolean checkUUIDFormat(String uuid)
273 	{
274 		boolean result = true;
275 		int delimCnt = 0;
276 		int delimPos = 0;
277 
278 		if (uuid == null)
279 		{
280 			return false;
281 		}
282 
283 		for (delimPos = 0; delimPos < uuid.length(); delimPos++)
284 		{
285 			if (uuid.charAt(delimPos) == '-')
286 			{
287 				delimCnt++;
288 				result = result  &&
289 					(delimPos == 8 || delimPos == 13 || delimPos == 18 || delimPos == 23);
290 			}
291 		}
292 
293 		return result && UUID_SEGMENT_COUNT == delimCnt && UUID_LENGTH == delimPos;
294 	}
295 
296 
297 	/**
298 	 * Simple check for valid XMLNames. Within ASCII range<br>
299 	 * ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6]<br>
300 	 * are accepted, above all characters (which is not entirely
301 	 * correct according to the XML Spec.
302 	 *
303 	 * @param name an XML Name
304 	 * @return Return <code>true</code> if the name is correct.
305 	 */
isXMLName(String name)306 	public static boolean isXMLName(String name)
307 	{
308 		if (name.length() > 0  &&  !isNameStartChar(name.charAt(0)))
309 		{
310 			return false;
311 		}
312 		for (int i = 1; i < name.length(); i++)
313 		{
314 			if (!isNameChar(name.charAt(i)))
315 			{
316 				return false;
317 			}
318 		}
319 		return true;
320 	}
321 
322 
323 	/**
324 	 * Checks if the value is a legal "unqualified" XML name, as
325 	 * defined in the XML Namespaces proposed recommendation.
326 	 * These are XML names, except that they must not contain a colon.
327 	 * @param name the value to check
328 	 * @return Returns true if the name is a valid "unqualified" XML name.
329 	 */
isXMLNameNS(String name)330 	public static boolean isXMLNameNS(String name)
331 	{
332 		if (name.length() > 0  &&  (!isNameStartChar(name.charAt(0))  ||  name.charAt(0) == ':'))
333 		{
334 			return false;
335 		}
336 		for (int i = 1; i < name.length(); i++)
337 		{
338 			if (!isNameChar(name.charAt(i))  ||  name.charAt(i) == ':')
339 			{
340 				return false;
341 			}
342 		}
343 		return true;
344 	}
345 
346 
347 	/**
348 	 * @param c  a char
349 	 * @return Returns true if the char is an ASCII control char.
350 	 */
isControlChar(char c)351 	static boolean isControlChar(char c)
352 	{
353 		return (c <= 0x1F  ||  c == 0x7F)  &&
354 				c != 0x09  &&  c != 0x0A  &&  c != 0x0D;
355 	}
356 
357 
358 	/**
359 	 * Serializes the node value in XML encoding. Its used for tag bodies and
360 	 * attributes.<br>
361 	 * <em>Note:</em> The attribute is always limited by quotes,
362 	 * thats why <code>&amp;apos;</code> is never serialized.<br>
363 	 * <em>Note:</em> Control chars are written unescaped, but if the user uses others than tab, LF
364 	 * and CR the resulting XML will become invalid.
365 	 * @param value a string
366 	 * @param forAttribute flag if string is attribute value (need to additional escape quotes)
367 	 * @param escapeWhitespaces Decides if LF, CR and TAB are escaped.
368 	 * @return Returns the value ready for XML output.
369 	 */
escapeXML(String value, boolean forAttribute, boolean escapeWhitespaces)370 	public static String escapeXML(String value, boolean forAttribute, boolean escapeWhitespaces)
371 	{
372 		// quick check if character are contained that need special treatment
373 		boolean needsEscaping = false;
374 		for (int i = 0; i < value.length (); i++)
375         {
376             char c = value.charAt (i);
377 			if (
378 				 c == '<'  ||  c == '>'  ||  c == '&'  ||							    // XML chars
379 				(escapeWhitespaces  &&  (c == '\t'  ||  c == '\n'  ||  c == '\r'))  ||
380 				(forAttribute  &&  c == '"'))
381 			{
382 				needsEscaping = true;
383 				break;
384 			}
385         }
386 
387 		if (!needsEscaping)
388 		{
389 			// fast path
390 			return value;
391 		}
392 		else
393 		{
394 			// slow path with escaping
395 			StringBuffer buffer = new StringBuffer(value.length() * 4 / 3);
396 	        for (int i = 0; i < value.length (); i++)
397 	        {
398 	            char c = value.charAt (i);
399 	            if (!(escapeWhitespaces  &&  (c == '\t'  ||  c == '\n'  ||  c == '\r')))
400 	            {
401 	            	switch (c)
402 		            {
403 	            		// we do what "Canonical XML" expects
404 	            		// AUDIT: &apos; not serialized as only outer qoutes are used
405 		              	case '<':	buffer.append("&lt;"); continue;
406 		              	case '>':	buffer.append("&gt;"); continue;
407 		              	case '&':	buffer.append("&amp;"); continue;
408 		              	case '"': 	buffer.append(forAttribute ? "&quot;" : "\""); continue;
409 		              	default:	buffer.append(c); continue;
410 		            }
411 		        }
412 	            else
413 	            {
414 	            	// write control chars escaped,
415 	            	// if there are others than tab, LF and CR the xml will become invalid.
416 	            	buffer.append("&#x");
417 	            	buffer.append(Integer.toHexString(c).toUpperCase());
418 	            	buffer.append(';');
419 	            }
420 	        }
421 	        return buffer.toString();
422 		}
423 	}
424 
425 
426 	/**
427 	 * Replaces the ASCII control chars with a space.
428 	 *
429 	 * @param value
430 	 *            a node value
431 	 * @return Returns the cleaned up value
432 	 */
removeControlChars(String value)433 	static String removeControlChars(String value)
434 	{
435 		StringBuffer buffer = new StringBuffer(value);
436 		for (int i = 0; i < buffer.length(); i++)
437 		{
438 			if (isControlChar(buffer.charAt(i)))
439 			{
440 				buffer.setCharAt(i, ' ');
441 			}
442 		}
443 		return buffer.toString();
444 	}
445 
446 
447 	/**
448 	 * Simple check if a character is a valid XML start name char.
449 	 * Within ASCII range<br>
450 	 * ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6]<br>
451 	 * are accepted, above all characters (which is not entirely
452 	 * correct according to the XML Spec)
453 	 *
454 	 * @param ch a character
455 	 * @return Returns true if the character is a valid first char of an XML name.
456 	 */
isNameStartChar(char ch)457 	private static boolean isNameStartChar(char ch)
458 	{
459 		return ch > 0xFF  ||  xmlNameStartChars[ch];
460 	}
461 
462 
463 	/**
464 	 * Simple check if a character is a valid XML name char
465 	 * (every char except the first one).
466 	 * Within ASCII range<br>
467 	 * ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6]<br>
468 	 * are accepted, above all characters (which is not entirely
469 	 * correct according to the XML Spec)
470 	 *
471 	 * @param ch a character
472 	 * @return Returns true if the character is a valid char of an XML name.
473 	 */
isNameChar(char ch)474 	private static boolean isNameChar(char ch)
475 	{
476 		return ch > 0xFF  ||  xmlNameChars[ch];
477 	}
478 
479 
480 	/**
481 	 * Initializes the char tables for later use.
482 	 */
initCharTables()483 	private static void initCharTables()
484 	{
485 		xmlNameChars = new boolean[0x0100];
486 		xmlNameStartChars = new boolean[0x0100];
487 
488 		for (char ch = 0; ch < xmlNameChars.length; ch++)
489 		{
490 			xmlNameStartChars[ch] =
491 				('a' <= ch  &&  ch <= 'z')  ||
492 				('A' <= ch  &&  ch <= 'Z')  ||
493 				ch == ':'  ||
494 				ch == '_'  ||
495 				(0xC0 <= ch  &&  ch <= 0xD6)  ||
496 				(0xD8 <= ch  &&  ch <= 0xF6);
497 
498 			xmlNameChars[ch] =
499 				('a' <= ch  &&  ch <= 'z')  ||
500 				('A' <= ch  &&  ch <= 'Z')  ||
501 				('0' <= ch  &&  ch <= '9')  ||
502 				ch == ':'  ||
503 				ch == '_'  ||
504 				ch == '-'  ||
505 				ch == '.'  ||
506 				ch == 0xB7  ||
507 				(0xC0 <= ch  &&  ch <= 0xD6)  ||
508 				(0xD8 <= ch  &&  ch <= 0xF6);
509 		}
510 	}
511 }