1 /**
2  * Copyright(c) 2011 Trusted Logic.   All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  *  * Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *  * Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in
12  *    the documentation and/or other materials provided with the
13  *    distribution.
14  *  * Neither the name Trusted Logic nor the names of its
15  *    contributors may be used to endorse or promote products derived
16  *    from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 #include "lib_manifest2.h"
31 #include <string.h>
32 
33 #define CHAR_CR  (uint8_t)0x0D
34 #define CHAR_LF  (uint8_t)0x0A
35 #define CHAR_TAB (uint8_t)0x09
36 
37 #ifdef LIB_TOOL_IMPLEMENTATION
38 #include "exos_trace.h"
39 #define LOG_ERROR(pContext, msg, ...) log_error("%s - line %d: " msg, pContext->pManifestName, pContext->nLine, __VA_ARGS__)
log_error(const char * msg,...)40 static void log_error(const char* msg, ...)
41 {
42    va_list arg_list;
43    va_start(arg_list, msg);
44    exosTraceVPrintf("LIB_MANIFEST2", EXOS_TRACE_ORG_APPLI, K_PRINT_ERROR_LOG, msg, &arg_list);
45    va_end(arg_list);
46 }
47 #else
48 /* No error messages on the target */
49 #ifdef __SYMBIAN32__
50 #define LOG_ERROR(pContext...)
51 #else
52 #define LOG_ERROR(...)
53 #endif
54 #endif
55 
libManifest2InitContext(LIB_MANIFEST2_CONTEXT * pContext)56 void libManifest2InitContext(
57    LIB_MANIFEST2_CONTEXT* pContext)
58 {
59    pContext->nOffset = 0;
60    pContext->nLine = 1;
61    pContext->nSectionStartOffset = 0;
62 }
63 
64 
65 #define CHARACTER_NAME_FIRST      1
66 #define CHARACTER_NAME_SUBSEQUENT 2
67 #define CHARACTER_SECTION_NAME    3
68 
static_checkCharacter(uint8_t x,uint32_t nType)69 static bool static_checkCharacter(uint8_t x, uint32_t nType)
70 {
71    /* [A-Za-z0-9] is acceptable for everyone */
72    if (x  >= (uint8_t)'a' && x <= (uint8_t)'z')
73    {
74       return true;
75    }
76    if (x >=(uint8_t)'A' && x <= (uint8_t)'Z')
77    {
78       return true;
79    }
80    if (x >= (uint8_t)'0' && x <= (uint8_t)'9')
81    {
82       return true;
83    }
84    if (nType == CHARACTER_NAME_FIRST)
85    {
86       return false;
87    }
88    /* Subsequent property name or section name characters can be [_.-] */
89    if (x == (uint8_t)'_' || x == (uint8_t)'.' || x == (uint8_t)'-')
90    {
91       return true;
92    }
93    if (nType == CHARACTER_NAME_SUBSEQUENT)
94    {
95       return false;
96    }
97    /* Space is also allowed in section names */
98    if (x == (uint8_t)' ')
99    {
100       return true;
101    }
102    return false;
103 }
104 
static_sectionNameEqualCaseInsensitive(uint8_t * pName1,uint32_t nName1Length,uint8_t * pName2,uint32_t nName2Length)105 static bool static_sectionNameEqualCaseInsensitive(
106    uint8_t* pName1,
107    uint32_t nName1Length,
108    uint8_t* pName2,
109    uint32_t nName2Length)
110 {
111    uint32_t i;
112    if (nName1Length != nName2Length)
113    {
114       return false;
115    }
116    for (i = 0; i < nName1Length; i++)
117    {
118       uint8_t x1 = pName1[i];
119       uint8_t x2 = pName2[i];
120 
121       /* This code assumes the characters have been checked before */
122 
123       if ((x1 & ~0x20) != (x2 & ~0x20))
124       {
125          return false;
126       }
127    }
128    return true;
129 }
130 
static_libManifest2GetNextItemInternal(LIB_MANIFEST2_CONTEXT * pContext,OUT uint8_t ** ppName,OUT uint32_t * pNameLength,OUT uint8_t ** ppValue,OUT uint32_t * pValueLength)131 static S_RESULT static_libManifest2GetNextItemInternal(
132    LIB_MANIFEST2_CONTEXT* pContext,
133    OUT uint8_t** ppName,
134    OUT uint32_t* pNameLength,
135    OUT uint8_t** ppValue,
136    OUT uint32_t* pValueLength)
137 {
138    S_RESULT nResult = S_ERROR_BAD_FORMAT;
139    uint8_t* pCurrent = pContext->pManifestContent + pContext->nOffset;
140    uint8_t* pEnd = pContext->pManifestContent + pContext->nManifestLength;
141    uint8_t* pLastNonWhitespaceChar;
142    uint32_t nCurrentSequenceCount;
143    uint32_t nCurrentChar;
144 
145    if (pContext->nType != LIB_MANIFEST2_TYPE_COMPILED)
146    {
147       /* Skip leading BOM if we're at the start */
148       if (pCurrent == pContext->pManifestContent)
149       {
150          /* We're at the start. Skip leading BOM if present */
151          /* Note that the UTF-8 encoding of the BOM marker is EF BB BF */
152          if (pContext->nManifestLength >= 3
153              && pCurrent[0] == 0xEF
154              && pCurrent[1] == 0xBB
155              && pCurrent[2] == 0xBF)
156          {
157             pCurrent += 3;
158          }
159       }
160       /* Skip comments and newlines */
161       while (pCurrent < pEnd)
162       {
163          if (*pCurrent == (uint8_t)'#')
164          {
165             /* This is the start of a comment. Skip until end of line or end of file */
166             pCurrent++;
167             while (pCurrent < pEnd && *pCurrent != CHAR_LF && *pCurrent != CHAR_CR)
168             {
169                if (*pCurrent == 0)
170                {
171                   LOG_ERROR(pContext, "NUL character forbidden");
172                   goto error;
173                }
174                pCurrent++;
175             }
176          }
177          else if (*pCurrent == CHAR_CR)
178          {
179             /* Check if a LF follows */
180             pCurrent++;
181             if (pCurrent < pEnd && *pCurrent == CHAR_LF)
182             {
183                pCurrent++;
184             }
185             pContext->nLine++;
186          }
187          else if (*pCurrent == CHAR_LF)
188          {
189             pCurrent++;
190             pContext->nLine++;
191          }
192          else if (*pCurrent == ' ' || *pCurrent == '\t')
193          {
194             /* this is the start of a all-whitespace line */
195             /* NOTE: this is not allowed by the current spec: spec update needed */
196             pCurrent++;
197             while (pCurrent < pEnd)
198             {
199                if (*pCurrent == CHAR_LF || *pCurrent == CHAR_CR)
200                {
201                   /* End-of-line reached */
202                   break;
203                }
204                if (! (*pCurrent == ' ' || *pCurrent == '\t'))
205                {
206                   LOG_ERROR(pContext, "A line starting with whitespaces must contain only whitespaces. Illegal character: 0x%02X", *pCurrent);
207                   goto error;
208                }
209                pCurrent++;
210             }
211          }
212          else
213          {
214             break;
215          }
216       }
217    }
218 
219    if (pCurrent >= pEnd)
220    {
221       /* No more properties */
222       nResult = S_ERROR_ITEM_NOT_FOUND;
223       goto error;
224    }
225 
226    if (pContext->nType == LIB_MANIFEST2_TYPE_SOURCE_WITH_SECTIONS)
227    {
228       if (*pCurrent == '[')
229       {
230          /* This is a section descriptor */
231          pCurrent++;
232          *ppName = pCurrent;
233          *ppValue = NULL;
234          *pValueLength = 0;
235          while (true)
236          {
237             if (pCurrent >= pEnd)
238             {
239                LOG_ERROR(pContext, "EOF reached within a section name");
240                goto error;
241             }
242             if (*pCurrent == ']')
243             {
244                /* End of section name */
245                *pNameLength = pCurrent - *ppName;
246                pCurrent++;
247 
248                /* Skip spaces and tabs. Note that this is a deviation from the current spec
249                  (see SWIS). Spec must be updated */
250                while (pCurrent < pEnd)
251                {
252                   if (*pCurrent == ' ' || *pCurrent == '\t')
253                   {
254                      pCurrent++;
255                   }
256                   else if (*pCurrent == CHAR_CR || *pCurrent == CHAR_LF)
257                   {
258                      /* End of line */
259                      break;
260                   }
261                   else
262                   {
263                      LOG_ERROR(pContext, "Non-space character follows a sectino header: 0x02X", *pCurrent);
264                   }
265                }
266                pContext->nOffset = pCurrent - pContext->pManifestContent;
267                pContext->nSectionStartOffset = pContext->nOffset;
268                return S_SUCCESS;
269             }
270             /* Check section name character */
271             if (!static_checkCharacter(*pCurrent, CHARACTER_SECTION_NAME))
272             {
273                LOG_ERROR(pContext, "Invalid character for a section name: 0x%02X", *pCurrent);
274                goto error;
275             }
276             pCurrent++;
277          }
278       }
279 
280       if (pContext->nSectionStartOffset == 0)
281       {
282          /* No section has been found yet. This is a bad format */
283          LOG_ERROR(pContext, "Property found outside any section");
284          goto error;
285       }
286    }
287 
288    *ppName = pCurrent;
289 
290    /* Check first character of name is in [A-Za-z0-9] */
291    if (!static_checkCharacter(*pCurrent, CHARACTER_NAME_FIRST))
292    {
293       LOG_ERROR(pContext, "Invalid first character for a property name: 0x%02X", *pCurrent);
294       goto error;
295    }
296    pCurrent++;
297    pLastNonWhitespaceChar = pCurrent;
298    while (true)
299    {
300       if (pCurrent == pEnd)
301       {
302          LOG_ERROR(pContext, "EOF reached within a property name");
303          goto error;
304       }
305       if (*pCurrent == ':')
306       {
307          /* Colon reached */
308          break;
309       }
310       if (pContext->nType != LIB_MANIFEST2_TYPE_COMPILED)
311       {
312          /* In source manifest, allow space characters before the colon.
313             This is a deviation from the spec. Spec must be updated */
314          if (*pCurrent == ' ' || *pCurrent == '\t')
315          {
316             pCurrent++;
317             continue;
318          }
319       }
320       if (!static_checkCharacter(*pCurrent, CHARACTER_NAME_SUBSEQUENT))
321       {
322          LOG_ERROR(pContext, "Invalid character for a property name: 0x%02X", *pCurrent);
323          goto error;
324       }
325       if (pContext->nType != LIB_MANIFEST2_TYPE_COMPILED)
326       {
327          /* Even in a source manifest, property name cannot contain spaces! */
328          if (pCurrent != pLastNonWhitespaceChar)
329          {
330             LOG_ERROR(pContext, "Property name cannot contain spaces");
331             goto error;
332          }
333       }
334       pCurrent++;
335       pLastNonWhitespaceChar = pCurrent;
336    }
337    *pNameLength = pLastNonWhitespaceChar - *ppName;
338    pCurrent++;
339    /* Skip spaces and tabs on the right of the colon */
340    while (pCurrent < pEnd && (*pCurrent == ' ' || *pCurrent == '\t'))
341    {
342       pCurrent++;
343    }
344    *ppValue = pCurrent;
345    pLastNonWhitespaceChar = pCurrent-1;
346 
347    nCurrentSequenceCount = 0;
348    nCurrentChar = 0;
349 
350    while (pCurrent < pEnd)
351    {
352       uint32_t x;
353       x = *pCurrent;
354       if ((x & 0x80) == 0)
355       {
356          if (nCurrentSequenceCount != 0)
357          {
358             /* We were expecting a 10xxxxxx byte: ill-formed UTF-8 */
359             LOG_ERROR(pContext, "Invalid UTF-8 sequence");
360             goto error;
361          }
362          else if (x == 0)
363          {
364             /* The null character is forbidden */
365             LOG_ERROR(pContext, "NUL character forbidden");
366             goto error;
367          }
368          /* We have a well-formed Unicode character */
369          nCurrentChar = x;
370       }
371       else if ((x & 0xC0) == 0xC0)
372       {
373          /* Start of a sequence */
374          if (nCurrentSequenceCount != 0)
375          {
376             /* We were expecting a 10xxxxxx byte: ill-formed UTF-8 */
377             LOG_ERROR(pContext, "Invalid UTF-8 sequence");
378             goto error;
379          }
380          else if ((x & 0xE0) == 0xC0)
381          {
382             /* 1 byte follows */
383             nCurrentChar = x & 0x1F;
384             nCurrentSequenceCount = 1;
385             if ((x & 0x1E) == 0)
386             {
387                /* Illegal UTF-8: overlong encoding of character in the [0x00-0x7F] range
388                   (must use 1-byte encoding, not a 2-byte encoding) */
389                LOG_ERROR(pContext, "Invalid UTF-8 sequence");
390                goto error;
391             }
392          }
393          else if ((x & 0xF0) == 0xE0)
394          {
395             /* 2 bytes follow */
396             nCurrentChar = x & 0x0F;
397             nCurrentSequenceCount = 2;
398          }
399          else if ((x & 0xF8) == 0xF0)
400          {
401             /* 3 bytes follow */
402             nCurrentChar = x & 0x07;
403             nCurrentSequenceCount = 3;
404          }
405          else
406          {
407             /* Illegal start of sequence */
408             LOG_ERROR(pContext, "Invalid UTF-8 sequence");
409             goto error;
410          }
411       }
412       else if ((x & 0xC0) == 0x80)
413       {
414          /* Continuation byte */
415          if (nCurrentSequenceCount == 0)
416          {
417             /* We were expecting a sequence start, not a continuation byte */
418             LOG_ERROR(pContext, "Invalid UTF-8 sequence");
419             goto error;
420          }
421          else
422          {
423             if (nCurrentSequenceCount == 2)
424             {
425                /* We're in a 3-byte sequence, check that we're not using an overlong sequence */
426                if (nCurrentChar == 0 && (x & 0x20) == 0)
427                {
428                   /* The character starts with at least 5 zero bits, so has fewer than 11 bits. It should
429                      have used a 2-byte sequence, not a 3-byte sequence */
430                   LOG_ERROR(pContext, "Invalid UTF-8 sequence");
431                   goto error;
432                }
433             }
434             else if (nCurrentSequenceCount == 3)
435             {
436                if (nCurrentChar == 0 && (x & 0x30) == 0)
437                {
438                   /* The character starts with at least 5 zero bits, so has fewer than 16 bits. It should
439                      have used a 3-byte sequence, not a 4-byte sequence */
440                   LOG_ERROR(pContext, "Invalid UTF-8 sequence");
441                   goto error;
442                }
443             }
444             nCurrentSequenceCount--;
445             nCurrentChar = (nCurrentChar << 6) | (x & 0x3F);
446          }
447       }
448       else
449       {
450          /* Illegal byte */
451          LOG_ERROR(pContext, "Invalid UTF-8 sequence");
452          goto error;
453       }
454       if (nCurrentSequenceCount == 0)
455       {
456          /* nCurrentChar contains the current Unicode character */
457          /* check character */
458          if ((nCurrentChar >= 0xD800 && nCurrentChar < 0xE000) || nCurrentChar >= 0x110000)
459          {
460             /* Illegal code point */
461             LOG_ERROR(pContext, "Invalid UTF-8 code point 0x%X", nCurrentChar);
462             goto error;
463          }
464 
465          if (*pCurrent == CHAR_CR)
466          {
467             if (pContext->nType == LIB_MANIFEST2_TYPE_COMPILED)
468             {
469                /* Check if a LF follows */
470                pCurrent++;
471                if (pCurrent < pEnd && *pCurrent == CHAR_LF)
472                {
473                   pCurrent++;
474                }
475                pContext->nLine++;
476             }
477             goto end;
478          }
479          else if (*pCurrent == CHAR_LF)
480          {
481             if (pContext->nType == LIB_MANIFEST2_TYPE_COMPILED)
482             {
483                pCurrent++;
484                pContext->nLine++;
485             }
486             goto end;
487          }
488       }
489       if (*pCurrent != ' ' && *pCurrent != CHAR_TAB)
490       {
491          /* It's a non-whitespace char */
492          pLastNonWhitespaceChar = pCurrent;
493       }
494       pCurrent++;
495    }
496 
497    /* Hit the end of the manifest; Check that we're not in the middle of a sequence */
498    if (nCurrentSequenceCount != 0)
499    {
500       LOG_ERROR(pContext, "File ends in the middle of an UTF-8 sequence");
501       goto error;
502    }
503 
504 end:
505 
506    *pValueLength = pLastNonWhitespaceChar - *ppValue + 1;
507    pContext->nOffset = pCurrent - pContext->pManifestContent;
508 
509    return S_SUCCESS;
510 
511 error:
512    *ppName = NULL;
513    *pNameLength = 0;
514    *ppValue = NULL;
515    *pValueLength = 0;
516    return nResult;
517 }
518 
libManifest2GetNextItem(LIB_MANIFEST2_CONTEXT * pContext,OUT uint8_t ** ppName,OUT uint32_t * pNameLength,OUT uint8_t ** ppValue,OUT uint32_t * pValueLength)519 S_RESULT libManifest2GetNextItem(
520    LIB_MANIFEST2_CONTEXT* pContext,
521    OUT uint8_t** ppName,
522    OUT uint32_t* pNameLength,
523    OUT uint8_t** ppValue,
524    OUT uint32_t* pValueLength)
525 {
526    if (pContext->nType == LIB_MANIFEST2_TYPE_COMPILED)
527    {
528       /* Don't check for duplicates in binary manifests */
529       return static_libManifest2GetNextItemInternal(
530          pContext,
531          ppName,
532          pNameLength,
533          ppValue,
534          pValueLength);
535    }
536    else
537    {
538       uint32_t nOriginalOffset = pContext->nOffset;
539       uint32_t nOffset;
540       uint32_t nLine;
541       uint32_t nSectionStartOffset;
542       S_RESULT nResult;
543       uint8_t* pDupName;
544       uint32_t nDupNameLength;
545       uint8_t* pDupValue;
546       uint32_t nDupValueLength;
547 
548       /* First get the item */
549       nResult = static_libManifest2GetNextItemInternal(
550          pContext,
551          ppName,
552          pNameLength,
553          ppValue,
554          pValueLength);
555       if (nResult != S_SUCCESS)
556       {
557          return nResult;
558       }
559       /* Save the state of the parser */
560       nOffset = pContext->nOffset;
561       nLine = pContext->nLine;
562       nSectionStartOffset = pContext->nSectionStartOffset;
563       if (pContext->nType == LIB_MANIFEST2_TYPE_SOURCE)
564       {
565          pContext->nOffset = 0;
566       }
567       else if (*ppValue == NULL)
568       {
569          /* The item was a section header. Iterate on all section headers and
570             check for duplicates */
571          pContext->nOffset = 0;
572       }
573       else
574       {
575          if (nSectionStartOffset == 0)
576          {
577             LOG_ERROR(pContext, "Property definition outside any section");
578             goto bad_format;
579          }
580          /* Iterate only on the properties in the section */
581          pContext->nOffset = nSectionStartOffset;
582       }
583       while (pContext->nOffset < nOriginalOffset)
584       {
585          static_libManifest2GetNextItemInternal(
586             pContext,
587             &pDupName,
588             &nDupNameLength,
589             &pDupValue,
590             &nDupValueLength);
591          if (pContext->nType == LIB_MANIFEST2_TYPE_SOURCE_WITH_SECTIONS && *ppValue == NULL)
592          {
593             /* Check for duplicate section names */
594             if (pDupValue == NULL
595                 &&
596                 static_sectionNameEqualCaseInsensitive(
597                    *ppName,
598                    *pNameLength,
599                    pDupName,
600                    nDupNameLength))
601             {
602                pContext->nOffset = nOffset;
603                pContext->nLine = nLine;
604                pContext->nSectionStartOffset = nSectionStartOffset;
605                LOG_ERROR(pContext, "Duplicate section %.*s", nDupNameLength, pDupName);
606                goto bad_format;
607             }
608          }
609          else
610          {
611             /* Check for duplicate property name */
612             if (nDupNameLength == *pNameLength &&
613                 memcmp(pDupName, *ppName, nDupNameLength) == 0)
614             {
615                /* Duplicated property */
616                pContext->nOffset = nOffset;
617                pContext->nLine = nLine;
618                pContext->nSectionStartOffset = nSectionStartOffset;
619                LOG_ERROR(pContext,"Duplicate property %.*s", nDupNameLength, pDupName);
620                goto bad_format;
621             }
622          }
623       }
624       /* Everything's fine. restore context and exit  */
625       /* Restore the context */
626       pContext->nOffset = nOffset;
627       pContext->nLine = nLine;
628       pContext->nSectionStartOffset = nSectionStartOffset;
629 
630       return S_SUCCESS;
631 bad_format:
632       *ppName = NULL;
633       *pNameLength = 0;
634       *ppValue = NULL;
635       *pValueLength = 0;
636       return S_ERROR_BAD_FORMAT;
637    }
638 }
639 
640 
libManifest2CheckFormat(LIB_MANIFEST2_CONTEXT * pContext,uint32_t * pnItemCount)641 S_RESULT libManifest2CheckFormat(
642    LIB_MANIFEST2_CONTEXT* pContext,
643    uint32_t* pnItemCount)
644 {
645    uint32_t nPropertyCount = 0;
646    uint8_t* pName;
647    uint32_t nNameLength;
648    uint8_t* pValue;
649    uint32_t nValueLength;
650    S_RESULT nResult;
651 
652    pContext->nOffset = 0;
653    pContext->nLine = 1;
654    pContext->nSectionStartOffset = 0;
655 
656    while (true)
657    {
658       nResult = libManifest2GetNextItem(
659          pContext,
660          &pName,
661          &nNameLength,
662          &pValue,
663          &nValueLength);
664       if (nResult == S_ERROR_ITEM_NOT_FOUND)
665       {
666          if (pnItemCount != NULL)
667          {
668             *pnItemCount = nPropertyCount;
669          }
670          return S_SUCCESS;
671       }
672       if (nResult != S_SUCCESS)
673       {
674          return nResult;
675       }
676       nPropertyCount++;
677    }
678 }
679