1## @file
2# Collect all defined strings in multiple uni files.
3#
4# Copyright (c) 2014 - 2015, Intel Corporation. All rights reserved.<BR>
5#
6# This program and the accompanying materials are licensed and made available
7# under the terms and conditions of the BSD License which accompanies this
8# distribution. The full text of the license may be found at
9# http://opensource.org/licenses/bsd-license.php
10#
11# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
13#
14"""
15Collect all defined strings in multiple uni files
16"""
17
18##
19# Import Modules
20#
21import os, codecs, re
22import distutils.util
23from Logger import ToolError
24from Logger import Log as EdkLogger
25from Logger import StringTable as ST
26from Library.String import GetLineNo
27from Library.Misc import PathClass
28from Library.Misc import GetCharIndexOutStr
29from Library import DataType as DT
30from Library.ParserValidate import CheckUTF16FileHeader
31
32##
33# Static definitions
34#
35UNICODE_WIDE_CHAR = u'\\wide'
36UNICODE_NARROW_CHAR = u'\\narrow'
37UNICODE_NON_BREAKING_CHAR = u'\\nbr'
38UNICODE_UNICODE_CR = '\r'
39UNICODE_UNICODE_LF = '\n'
40
41NARROW_CHAR = u'\uFFF0'
42WIDE_CHAR = u'\uFFF1'
43NON_BREAKING_CHAR = u'\uFFF2'
44CR = u'\u000D'
45LF = u'\u000A'
46NULL = u'\u0000'
47TAB = u'\t'
48BACK_SPLASH = u'\\'
49
50gINCLUDE_PATTERN = re.compile("^!include[\s]+([\S]+)[\s]*$", re.MULTILINE | re.UNICODE)
51
52gLANG_CONV_TABLE = {'eng':'en', 'fra':'fr', \
53                 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
54                 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
55                 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
56                 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
57                 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
58                 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
59                 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
60                 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
61                 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
62                 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
63                 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
64                 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
65                 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
66                 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
67                 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
68                 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
69                 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
70                 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
71                 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
72                 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
73                 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
74                 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
75                 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
76                 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
77                 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
78                 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
79                 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
80                 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
81                 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
82                 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
83                 'zho':'zh', 'zul':'zu'}
84
85## Convert a python unicode string to a normal string
86#
87# Convert a python unicode string to a normal string
88# UniToStr(u'I am a string') is 'I am a string'
89#
90# @param Uni:  The python unicode string
91#
92# @retval:     The formatted normal string
93#
94def UniToStr(Uni):
95    return repr(Uni)[2:-1]
96
97## Convert a unicode string to a Hex list
98#
99# Convert a unicode string to a Hex list
100# UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
101#
102# @param Uni:    The python unicode string
103#
104# @retval List:  The formatted hex list
105#
106def UniToHexList(Uni):
107    List = []
108    for Item in Uni:
109        Temp = '%04X' % ord(Item)
110        List.append('0x' + Temp[2:4])
111        List.append('0x' + Temp[0:2])
112    return List
113
114## Convert special unicode characters
115#
116# Convert special characters to (c), (r) and (tm).
117#
118# @param Uni:    The python unicode string
119#
120# @retval NewUni:  The converted unicode string
121#
122def ConvertSpecialUnicodes(Uni):
123    NewUni = Uni
124    NewUni = NewUni.replace(u'\u00A9', '(c)')
125    NewUni = NewUni.replace(u'\u00AE', '(r)')
126    NewUni = NewUni.replace(u'\u2122', '(tm)')
127    return NewUni
128
129## GetLanguageCode1766
130#
131# Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
132# RFC 1766 language codes supported in compatiblity mode
133# RFC 4646 language codes supported in native mode
134#
135# @param LangName:   Language codes read from .UNI file
136#
137# @retval LangName:  Valid lanugage code in RFC 1766 format or None
138#
139def GetLanguageCode1766(LangName, File=None):
140    return LangName
141
142    length = len(LangName)
143    if length == 2:
144        if LangName.isalpha():
145            for Key in gLANG_CONV_TABLE.keys():
146                if gLANG_CONV_TABLE.get(Key) == LangName.lower():
147                    return Key
148    elif length == 3:
149        if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()):
150            return LangName
151        else:
152            EdkLogger.Error("Unicode File Parser",
153                             ToolError.FORMAT_INVALID,
154                             "Invalid RFC 1766 language code : %s" % LangName,
155                             File)
156    elif length == 5:
157        if LangName[0:2].isalpha() and LangName[2] == '-':
158            for Key in gLANG_CONV_TABLE.keys():
159                if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
160                    return Key
161    elif length >= 6:
162        if LangName[0:2].isalpha() and LangName[2] == '-':
163            for Key in gLANG_CONV_TABLE.keys():
164                if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
165                    return Key
166        if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None and LangName[3] == '-':
167            for Key in gLANG_CONV_TABLE.keys():
168                if Key == LangName[0:3].lower():
169                    return Key
170
171    EdkLogger.Error("Unicode File Parser",
172                             ToolError.FORMAT_INVALID,
173                             "Invalid RFC 4646 language code : %s" % LangName,
174                             File)
175
176## GetLanguageCode
177#
178# Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
179# RFC 1766 language codes supported in compatiblity mode
180# RFC 4646 language codes supported in native mode
181#
182# @param LangName:   Language codes read from .UNI file
183#
184# @retval LangName:  Valid lanugage code in RFC 4646 format or None
185#
186def GetLanguageCode(LangName, IsCompatibleMode, File):
187    length = len(LangName)
188    if IsCompatibleMode:
189        if length == 3 and LangName.isalpha():
190            TempLangName = gLANG_CONV_TABLE.get(LangName.lower())
191            if TempLangName != None:
192                return TempLangName
193            return LangName
194        else:
195            EdkLogger.Error("Unicode File Parser",
196                             ToolError.FORMAT_INVALID,
197                             "Invalid RFC 1766 language code : %s" % LangName,
198                             File)
199    if (LangName[0] == 'X' or LangName[0] == 'x') and LangName[1] == '-':
200        return LangName
201    if length == 2:
202        if LangName.isalpha():
203            return LangName
204    elif length == 3:
205        if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None:
206            return LangName
207    elif length == 5:
208        if LangName[0:2].isalpha() and LangName[2] == '-':
209            return LangName
210    elif length >= 6:
211        if LangName[0:2].isalpha() and LangName[2] == '-':
212            return LangName
213        if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None and LangName[3] == '-':
214            return LangName
215
216    EdkLogger.Error("Unicode File Parser",
217                             ToolError.FORMAT_INVALID,
218                             "Invalid RFC 4646 language code : %s" % LangName,
219                             File)
220
221## FormatUniEntry
222#
223# Formated the entry in Uni file.
224#
225# @param StrTokenName    StrTokenName.
226# @param TokenValueList  A list need to be processed.
227# @param ContainerFile   ContainerFile.
228#
229# @return formated entry
230def FormatUniEntry(StrTokenName, TokenValueList, ContainerFile):
231    SubContent = ''
232    PreFormatLength = 40
233    if len(StrTokenName) > PreFormatLength:
234        PreFormatLength = len(StrTokenName) + 1
235    for (Lang, Value) in TokenValueList:
236        if not Value or Lang == DT.TAB_LANGUAGE_EN_X:
237            continue
238        if Lang == '':
239            Lang = DT.TAB_LANGUAGE_EN_US
240        if Lang == 'eng':
241            Lang = DT.TAB_LANGUAGE_EN_US
242        elif len(Lang.split('-')[0]) == 3:
243            Lang = GetLanguageCode(Lang.split('-')[0], True, ContainerFile)
244        else:
245            Lang = GetLanguageCode(Lang, False, ContainerFile)
246        ValueList = Value.split('\n')
247        SubValueContent = ''
248        for SubValue in ValueList:
249            if SubValue.strip():
250                SubValueContent += \
251                ' ' * (PreFormatLength + len('#language en-US ')) + '\"%s\\n\"' % SubValue.strip() + '\r\n'
252        SubValueContent = SubValueContent[(PreFormatLength + len('#language en-US ')):SubValueContent.rfind('\\n')] \
253        + '\"' + '\r\n'
254        SubContent += ' '*PreFormatLength + '#language %-5s ' % Lang + SubValueContent
255    if SubContent:
256        SubContent = StrTokenName + ' '*(PreFormatLength - len(StrTokenName)) + SubContent[PreFormatLength:]
257    return SubContent
258
259
260## StringDefClassObject
261#
262# A structure for language definition
263#
264class StringDefClassObject(object):
265    def __init__(self, Name = None, Value = None, Referenced = False, Token = None, UseOtherLangDef = ''):
266        self.StringName = ''
267        self.StringNameByteList = []
268        self.StringValue = ''
269        self.StringValueByteList = ''
270        self.Token = 0
271        self.Referenced = Referenced
272        self.UseOtherLangDef = UseOtherLangDef
273        self.Length = 0
274
275        if Name != None:
276            self.StringName = Name
277            self.StringNameByteList = UniToHexList(Name)
278        if Value != None:
279            self.StringValue = Value
280            self.StringValueByteList = UniToHexList(self.StringValue)
281            self.Length = len(self.StringValueByteList)
282        if Token != None:
283            self.Token = Token
284
285    def __str__(self):
286        return repr(self.StringName) + ' ' + \
287               repr(self.Token) + ' ' + \
288               repr(self.Referenced) + ' ' + \
289               repr(self.StringValue) + ' ' + \
290               repr(self.UseOtherLangDef)
291
292    def UpdateValue(self, Value = None):
293        if Value != None:
294            if self.StringValue:
295                self.StringValue = self.StringValue + '\r\n' + Value
296            else:
297                self.StringValue = Value
298            self.StringValueByteList = UniToHexList(self.StringValue)
299            self.Length = len(self.StringValueByteList)
300
301## UniFileClassObject
302#
303# A structure for .uni file definition
304#
305class UniFileClassObject(object):
306    def __init__(self, FileList = None, IsCompatibleMode = False, IncludePathList = None):
307        self.FileList = FileList
308        self.File = None
309        self.IncFileList = FileList
310        self.UniFileHeader = ''
311        self.Token = 2
312        self.LanguageDef = []                   #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
313        self.OrderedStringList = {}             #{ u'LanguageIdentifier' : [StringDefClassObject]  }
314        self.OrderedStringDict = {}             #{ u'LanguageIdentifier' : {StringName:(IndexInList)}  }
315        self.OrderedStringListByToken = {}      #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
316        self.IsCompatibleMode = IsCompatibleMode
317        if not IncludePathList:
318            self.IncludePathList = []
319        else:
320            self.IncludePathList = IncludePathList
321        if len(self.FileList) > 0:
322            self.LoadUniFiles(FileList)
323
324    #
325    # Get Language definition
326    #
327    def GetLangDef(self, File, Line):
328        Lang = distutils.util.split_quoted((Line.split(u"//")[0]))
329        if len(Lang) != 3:
330            try:
331                FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').read()
332            except UnicodeError, Xstr:
333                FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').read()
334            except:
335                EdkLogger.Error("Unicode File Parser",
336                                ToolError.FILE_OPEN_FAILURE,
337                                "File read failure: %s" % str(Xstr),
338                                ExtraData=File)
339            LineNo = GetLineNo(FileIn, Line, False)
340            EdkLogger.Error("Unicode File Parser",
341                             ToolError.PARSER_ERROR,
342                             "Wrong language definition",
343                             ExtraData="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line,
344                             File = File, Line = LineNo)
345        else:
346            LangName = GetLanguageCode(Lang[1], self.IsCompatibleMode, self.File)
347            LangPrintName = Lang[2]
348
349        IsLangInDef = False
350        for Item in self.LanguageDef:
351            if Item[0] == LangName:
352                IsLangInDef = True
353                break
354
355        if not IsLangInDef:
356            self.LanguageDef.append([LangName, LangPrintName])
357
358        #
359        # Add language string
360        #
361        self.AddStringToList(u'$LANGUAGE_NAME', LangName, LangName, 0, True, Index=0)
362        self.AddStringToList(u'$PRINTABLE_LANGUAGE_NAME', LangName, LangPrintName, 1, True, Index=1)
363
364        if not IsLangInDef:
365            #
366            # The found STRING tokens will be added into new language string list
367            # so that the unique STRING identifier is reserved for all languages in the package list.
368            #
369            FirstLangName = self.LanguageDef[0][0]
370            if LangName != FirstLangName:
371                for Index in range (2, len (self.OrderedStringList[FirstLangName])):
372                    Item = self.OrderedStringList[FirstLangName][Index]
373                    if Item.UseOtherLangDef != '':
374                        OtherLang = Item.UseOtherLangDef
375                    else:
376                        OtherLang = FirstLangName
377                    self.OrderedStringList[LangName].append (StringDefClassObject(Item.StringName,
378                                                                                  '',
379                                                                                  Item.Referenced,
380                                                                                  Item.Token,
381                                                                                  OtherLang))
382                    self.OrderedStringDict[LangName][Item.StringName] = len(self.OrderedStringList[LangName]) - 1
383        return True
384
385    #
386    # Get String name and value
387    #
388    def GetStringObject(self, Item):
389        Language = ''
390        Value = ''
391
392        Name = Item.split()[1]
393        # Check the string name is the upper character
394        if Name != '':
395            MatchString = re.match('[A-Z0-9_]+', Name, re.UNICODE)
396            if MatchString == None or MatchString.end(0) != len(Name):
397                EdkLogger.Error("Unicode File Parser",
398                             ToolError.FORMAT_INVALID,
399                             'The string token name %s in UNI file %s must be upper case character.' %(Name, self.File))
400        LanguageList = Item.split(u'#language ')
401        for IndexI in range(len(LanguageList)):
402            if IndexI == 0:
403                continue
404            else:
405                Language = LanguageList[IndexI].split()[0]
406                #.replace(u'\r\n', u'')
407                Value = \
408                LanguageList[IndexI][LanguageList[IndexI].find(u'\"') + len(u'\"') : LanguageList[IndexI].rfind(u'\"')]
409                Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
410                self.AddStringToList(Name, Language, Value)
411
412    #
413    # Get include file list and load them
414    #
415    def GetIncludeFile(self, Item, Dir = None):
416        if Dir:
417            pass
418        FileName = Item[Item.find(u'!include ') + len(u'!include ') :Item.find(u' ', len(u'!include '))][1:-1]
419        self.LoadUniFile(FileName)
420
421    #
422    # Pre-process before parse .uni file
423    #
424    def PreProcess(self, File, IsIncludeFile=False):
425        if not os.path.exists(File.Path) or not os.path.isfile(File.Path):
426            EdkLogger.Error("Unicode File Parser",
427                             ToolError.FILE_NOT_FOUND,
428                             ExtraData=File.Path)
429
430        #
431        # Check file header of the Uni file
432        #
433        if not CheckUTF16FileHeader(File.Path):
434            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
435                            ExtraData='The file %s is either invalid UTF-16LE or it is missing the BOM.' % File.Path)
436
437        try:
438            FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines()
439        except UnicodeError:
440            FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines()
441        except:
442            EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=File.Path)
443
444
445        #
446        # get the file header
447        #
448        Lines = []
449        HeaderStart = False
450        HeaderEnd = False
451        if not self.UniFileHeader:
452            FirstGenHeader = True
453        else:
454            FirstGenHeader = False
455        for Line in FileIn:
456            Line = Line.strip()
457            if Line == u'':
458                continue
459            if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and (Line.find(DT.TAB_HEADER_COMMENT) > -1) \
460                and not HeaderEnd and not HeaderStart:
461                HeaderStart = True
462            if not Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd:
463                HeaderEnd = True
464            if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd and FirstGenHeader:
465                self.UniFileHeader += Line + '\r\n'
466                continue
467
468        #
469        # Use unique identifier
470        #
471        FindFlag = -1
472        LineCount = 0
473        MultiLineFeedExits = False
474        #
475        # 0: initial value
476        # 1: signle String entry exist
477        # 2: line feed exist under the some signle String entry
478        #
479        StringEntryExistsFlag = 0
480        for Line in FileIn:
481            Line = FileIn[LineCount]
482            LineCount += 1
483            Line = Line.strip()
484            #
485            # Ignore comment line and empty line
486            #
487            if Line == u'' or Line.startswith(u'//'):
488                #
489                # Change the single line String entry flag status
490                #
491                if StringEntryExistsFlag == 1:
492                    StringEntryExistsFlag = 2
493                #
494                # If the '#string' line and the '#language' line are not in the same line,
495                # there should be only one line feed character betwwen them
496                #
497                if MultiLineFeedExits:
498                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
499                continue
500
501            MultiLineFeedExits = False
502            #
503            # Process comment embeded in string define lines
504            #
505            FindFlag = Line.find(u'//')
506            if FindFlag != -1 and Line.find(u'//') < Line.find(u'"'):
507                Line = Line.replace(Line[FindFlag:], u' ')
508                if FileIn[LineCount].strip().startswith('#language'):
509                    Line = Line + FileIn[LineCount]
510                    FileIn[LineCount-1] = Line
511                    FileIn[LineCount] = '\r\n'
512                    LineCount -= 1
513                    for Index in xrange (LineCount + 1, len (FileIn) - 1):
514                        if (Index == len(FileIn) -1):
515                            FileIn[Index] = '\r\n'
516                        else:
517                            FileIn[Index] = FileIn[Index + 1]
518                    continue
519            CommIndex = GetCharIndexOutStr(u'/', Line)
520            if CommIndex > -1:
521                if (len(Line) - 1) > CommIndex:
522                    if Line[CommIndex+1] == u'/':
523                        Line = Line[:CommIndex].strip()
524                    else:
525                        EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
526                else:
527                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
528
529            Line = Line.replace(UNICODE_WIDE_CHAR, WIDE_CHAR)
530            Line = Line.replace(UNICODE_NARROW_CHAR, NARROW_CHAR)
531            Line = Line.replace(UNICODE_NON_BREAKING_CHAR, NON_BREAKING_CHAR)
532
533            Line = Line.replace(u'\\\\', u'\u0006')
534            Line = Line.replace(u'\\r\\n', CR + LF)
535            Line = Line.replace(u'\\n', CR + LF)
536            Line = Line.replace(u'\\r', CR)
537            Line = Line.replace(u'\\t', u'\t')
538            Line = Line.replace(u'''\"''', u'''"''')
539            Line = Line.replace(u'\t', u' ')
540            Line = Line.replace(u'\u0006', u'\\')
541
542            # IncList = gINCLUDE_PATTERN.findall(Line)
543            IncList = []
544            if len(IncList) == 1:
545                for Dir in [File.Dir] + self.IncludePathList:
546                    IncFile = PathClass(str(IncList[0]), Dir)
547                    self.IncFileList.append(IncFile)
548                    if os.path.isfile(IncFile.Path):
549                        Lines.extend(self.PreProcess(IncFile, True))
550                        break
551                else:
552                    EdkLogger.Error("Unicode File Parser",
553                                    ToolError.FILE_NOT_FOUND,
554                                    Message="Cannot find include file",
555                                    ExtraData=str(IncList[0]))
556                continue
557
558            #
559            # Between Name entry and Language entry can not contain line feed
560            #
561            if Line.startswith(u'#string') and Line.find(u'#language') == -1:
562                MultiLineFeedExits = True
563
564            if Line.startswith(u'#string') and Line.find(u'#language') > 0 and Line.find(u'"') < 0:
565                MultiLineFeedExits = True
566
567            #
568            # Between Language entry and String entry can not contain line feed
569            #
570            if Line.startswith(u'#language') and len(Line.split()) == 2:
571                MultiLineFeedExits = True
572
573            #
574            # Between two String entry, can not contain line feed
575            #
576            if Line.startswith(u'"'):
577                if StringEntryExistsFlag == 2:
578                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
579                                    Message=ST.ERR_UNIPARSE_LINEFEED_UP_EXIST % Line, ExtraData=File.Path)
580
581                StringEntryExistsFlag = 1
582                if not Line.endswith('"'):
583                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
584                                    ExtraData='''The line %s misses '"' at the end of it in file %s'''
585                                              % (LineCount, File.Path))
586            elif Line.startswith(u'#language'):
587                if StringEntryExistsFlag == 2:
588                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
589                                    Message=ST.ERR_UNI_MISS_STRING_ENTRY % Line, ExtraData=File.Path)
590                StringEntryExistsFlag = 0
591            else:
592                StringEntryExistsFlag = 0
593
594            Lines.append(Line)
595
596        #
597        # Convert string def format as below
598        #
599        #     #string MY_STRING_1
600        #     #language eng
601        #     "My first English string line 1"
602        #     "My first English string line 2"
603        #     #string MY_STRING_1
604        #     #language spa
605        #     "Mi segunda secuencia 1"
606        #     "Mi segunda secuencia 2"
607        #
608
609        if not IsIncludeFile and not Lines:
610            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
611                Message=ST.ERR_UNIPARSE_NO_SECTION_EXIST, \
612                ExtraData=File.Path)
613
614        NewLines = []
615        StrName = u''
616        ExistStrNameList = []
617        for Line in Lines:
618            if StrName and not StrName.split()[1].startswith(DT.TAB_STR_TOKENCNAME + DT.TAB_UNDERLINE_SPLIT):
619                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
620                                Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
621                                ExtraData=File.Path)
622
623            if StrName and len(StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)) == 4:
624                StringTokenList = StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)
625                if (StringTokenList[3].upper() in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP] and \
626                    StringTokenList[3] not in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP]) or \
627                    (StringTokenList[2].upper() == DT.TAB_STR_TOKENERR and StringTokenList[2] != DT.TAB_STR_TOKENERR):
628                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
629                                Message=ST.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR % StrName.split()[1], \
630                                ExtraData=File.Path)
631
632            if Line.count(u'#language') > 1:
633                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
634                                Message=ST.ERR_UNIPARSE_SEP_LANGENTRY_LINE % Line, \
635                                ExtraData=File.Path)
636
637            if Line.startswith(u'//'):
638                continue
639            elif Line.startswith(u'#langdef'):
640                if len(Line.split()) == 2:
641                    NewLines.append(Line)
642                    continue
643                elif len(Line.split()) > 2 and Line.find(u'"') > 0:
644                    NewLines.append(Line[:Line.find(u'"')].strip())
645                    NewLines.append(Line[Line.find(u'"'):])
646                else:
647                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
648            elif Line.startswith(u'#string'):
649                if len(Line.split()) == 2:
650                    StrName = Line
651                    if StrName:
652                        if StrName.split()[1] not in ExistStrNameList:
653                            ExistStrNameList.append(StrName.split()[1].strip())
654                        elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
655                                                    DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
656                                                    DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
657                                                    DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
658                            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
659                                            Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
660                                            ExtraData=File.Path)
661                    continue
662                elif len(Line.split()) == 4 and Line.find(u'#language') > 0:
663                    if Line[Line.find(u'#language')-1] != ' ' or \
664                       Line[Line.find(u'#language')+len(u'#language')] != u' ':
665                        EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
666
667                    if Line.find(u'"') > 0:
668                        EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
669
670                    StrName = Line.split()[0] + u' ' + Line.split()[1]
671                    if StrName:
672                        if StrName.split()[1] not in ExistStrNameList:
673                            ExistStrNameList.append(StrName.split()[1].strip())
674                        elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
675                                                    DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
676                                                    DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
677                                                    DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
678                            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
679                                            Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
680                                            ExtraData=File.Path)
681                    if IsIncludeFile:
682                        if StrName not in NewLines:
683                            NewLines.append((Line[:Line.find(u'#language')]).strip())
684                    else:
685                        NewLines.append((Line[:Line.find(u'#language')]).strip())
686                    NewLines.append((Line[Line.find(u'#language'):]).strip())
687                elif len(Line.split()) > 4 and Line.find(u'#language') > 0 and Line.find(u'"') > 0:
688                    if Line[Line.find(u'#language')-1] != u' ' or \
689                       Line[Line.find(u'#language')+len(u'#language')] != u' ':
690                        EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
691
692                    if Line[Line.find(u'"')-1] != u' ':
693                        EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
694
695                    StrName = Line.split()[0] + u' ' + Line.split()[1]
696                    if StrName:
697                        if StrName.split()[1] not in ExistStrNameList:
698                            ExistStrNameList.append(StrName.split()[1].strip())
699                        elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
700                                                    DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
701                                                    DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
702                                                    DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
703                            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
704                                            Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
705                                            ExtraData=File.Path)
706                    if IsIncludeFile:
707                        if StrName not in NewLines:
708                            NewLines.append((Line[:Line.find(u'#language')]).strip())
709                    else:
710                        NewLines.append((Line[:Line.find(u'#language')]).strip())
711                    NewLines.append((Line[Line.find(u'#language'):Line.find(u'"')]).strip())
712                    NewLines.append((Line[Line.find(u'"'):]).strip())
713                else:
714                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
715            elif Line.startswith(u'#language'):
716                if len(Line.split()) == 2:
717                    if IsIncludeFile:
718                        if StrName not in NewLines:
719                            NewLines.append(StrName)
720                    else:
721                        NewLines.append(StrName)
722                    NewLines.append(Line)
723                elif len(Line.split()) > 2 and Line.find(u'"') > 0:
724                    if IsIncludeFile:
725                        if StrName not in NewLines:
726                            NewLines.append(StrName)
727                    else:
728                        NewLines.append(StrName)
729                    NewLines.append((Line[:Line.find(u'"')]).strip())
730                    NewLines.append((Line[Line.find(u'"'):]).strip())
731                else:
732                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
733            elif Line.startswith(u'"'):
734                if u'#string' in Line  or u'#language' in Line:
735                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
736                NewLines.append(Line)
737            else:
738                print Line
739                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
740
741        if StrName and not StrName.split()[1].startswith(u'STR_'):
742            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
743                                Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
744                                ExtraData=File.Path)
745
746        if StrName and not NewLines:
747            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
748                            Message=ST.ERR_UNI_MISS_LANGENTRY % StrName, \
749                            ExtraData=File.Path)
750
751        #
752        # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
753        # should be Abstract, Description, BinaryAbstract, BinaryDesctiption
754        AbstractPosition = -1
755        DescriptionPosition = -1
756        BinaryAbstractPosition = -1
757        BinaryDescriptionPosition = -1
758        for StrName in ExistStrNameList:
759            if DT.TAB_HEADER_ABSTRACT.upper() in StrName:
760                if 'BINARY' in StrName:
761                    BinaryAbstractPosition = ExistStrNameList.index(StrName)
762                else:
763                    AbstractPosition = ExistStrNameList.index(StrName)
764            if DT.TAB_HEADER_DESCRIPTION.upper() in StrName:
765                if 'BINARY' in StrName:
766                    BinaryDescriptionPosition = ExistStrNameList.index(StrName)
767                else:
768                    DescriptionPosition = ExistStrNameList.index(StrName)
769
770        OrderList = sorted([AbstractPosition, DescriptionPosition])
771        BinaryOrderList = sorted([BinaryAbstractPosition, BinaryDescriptionPosition])
772        Min = OrderList[0]
773        Max = OrderList[1]
774        BinaryMin = BinaryOrderList[0]
775        BinaryMax = BinaryOrderList[1]
776        if BinaryDescriptionPosition > -1:
777            if not(BinaryDescriptionPosition == BinaryMax and BinaryAbstractPosition == BinaryMin and \
778                   BinaryMax > Max):
779                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
780                                Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
781                                ExtraData=File.Path)
782        elif BinaryAbstractPosition > -1:
783            if not(BinaryAbstractPosition > Max):
784                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
785                                Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
786                                ExtraData=File.Path)
787
788        if  DescriptionPosition > -1:
789            if not(DescriptionPosition == Max and AbstractPosition == Min and \
790                   DescriptionPosition > AbstractPosition):
791                EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
792                                Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
793                                ExtraData=File.Path)
794
795        if not self.UniFileHeader:
796            EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
797                            Message = ST.ERR_NO_SOURCE_HEADER,
798                            ExtraData=File.Path)
799
800        return NewLines
801
802    #
803    # Load a .uni file
804    #
805    def LoadUniFile(self, File = None):
806        if File == None:
807            EdkLogger.Error("Unicode File Parser",
808                            ToolError.PARSER_ERROR,
809                            Message='No unicode file is given',
810                            ExtraData=File.Path)
811
812        self.File = File
813
814        #
815        # Process special char in file
816        #
817        Lines = self.PreProcess(File)
818
819        #
820        # Get Unicode Information
821        #
822        for IndexI in range(len(Lines)):
823            Line = Lines[IndexI]
824            if (IndexI + 1) < len(Lines):
825                SecondLine = Lines[IndexI + 1]
826            if (IndexI + 2) < len(Lines):
827                ThirdLine = Lines[IndexI + 2]
828
829            #
830            # Get Language def information
831            #
832            if Line.find(u'#langdef ') >= 0:
833                self.GetLangDef(File, Line + u' ' + SecondLine)
834                continue
835
836            Name = ''
837            Language = ''
838            Value = ''
839            CombineToken = False
840            #
841            # Get string def information format as below
842            #
843            #     #string MY_STRING_1
844            #     #language eng
845            #     "My first English string line 1"
846            #     "My first English string line 2"
847            #     #string MY_STRING_1
848            #     #language spa
849            #     "Mi segunda secuencia 1"
850            #     "Mi segunda secuencia 2"
851            #
852            if Line.find(u'#string ') >= 0 and Line.find(u'#language ') < 0 and \
853                SecondLine.find(u'#string ') < 0 and SecondLine.find(u'#language ') >= 0 and \
854                ThirdLine.find(u'#string ') < 0 and ThirdLine.find(u'#language ') < 0:
855                if Line.find('"') > 0 or SecondLine.find('"') > 0:
856                    EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
857                                Message=ST.ERR_UNIPARSE_DBLQUOTE_UNMATCHED,
858                                ExtraData=File.Path)
859
860                Name = Line[Line.find(u'#string ') + len(u'#string ') : ].strip(' ')
861                Language = SecondLine[SecondLine.find(u'#language ') + len(u'#language ') : ].strip(' ')
862                for IndexJ in range(IndexI + 2, len(Lines)):
863                    if Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') < 0 and \
864                    Lines[IndexJ].strip().startswith(u'"') and Lines[IndexJ].strip().endswith(u'"'):
865                        if Lines[IndexJ][-2] == ' ':
866                            CombineToken = True
867                        if CombineToken:
868                            if Lines[IndexJ].strip()[1:-1].strip():
869                                Value = Value + Lines[IndexJ].strip()[1:-1].rstrip() + ' '
870                            else:
871                                Value = Value + Lines[IndexJ].strip()[1:-1]
872                            CombineToken = False
873                        else:
874                            Value = Value + Lines[IndexJ].strip()[1:-1] + '\r\n'
875                    else:
876                        IndexI = IndexJ
877                        break
878                if Value.endswith('\r\n'):
879                    Value = Value[: Value.rfind('\r\n')]
880                Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
881                self.AddStringToList(Name, Language, Value)
882                continue
883
884    #
885    # Load multiple .uni files
886    #
887    def LoadUniFiles(self, FileList):
888        if len(FileList) > 0:
889            for File in FileList:
890                FilePath = File.Path.strip()
891                if FilePath.endswith('.uni') or FilePath.endswith('.UNI') or FilePath.endswith('.Uni'):
892                    self.LoadUniFile(File)
893
894    #
895    # Add a string to list
896    #
897    def AddStringToList(self, Name, Language, Value, Token = 0, Referenced = False, UseOtherLangDef = '', Index = -1):
898        for LangNameItem in self.LanguageDef:
899            if Language == LangNameItem[0]:
900                break
901
902        if Language not in self.OrderedStringList:
903            self.OrderedStringList[Language] = []
904            self.OrderedStringDict[Language] = {}
905
906        IsAdded = True
907        if Name in self.OrderedStringDict[Language]:
908            IsAdded = False
909            if Value != None:
910                ItemIndexInList = self.OrderedStringDict[Language][Name]
911                Item = self.OrderedStringList[Language][ItemIndexInList]
912                Item.UpdateValue(Value)
913                Item.UseOtherLangDef = ''
914
915        if IsAdded:
916            Token = len(self.OrderedStringList[Language])
917            if Index == -1:
918                self.OrderedStringList[Language].append(StringDefClassObject(Name,
919                                                                             Value,
920                                                                             Referenced,
921                                                                             Token,
922                                                                             UseOtherLangDef))
923                self.OrderedStringDict[Language][Name] = Token
924                for LangName in self.LanguageDef:
925                    #
926                    # New STRING token will be added into all language string lists.
927                    # so that the unique STRING identifier is reserved for all languages in the package list.
928                    #
929                    if LangName[0] != Language:
930                        if UseOtherLangDef != '':
931                            OtherLangDef = UseOtherLangDef
932                        else:
933                            OtherLangDef = Language
934                        self.OrderedStringList[LangName[0]].append(StringDefClassObject(Name,
935                                                                                        '',
936                                                                                        Referenced,
937                                                                                        Token,
938                                                                                        OtherLangDef))
939                        self.OrderedStringDict[LangName[0]][Name] = len(self.OrderedStringList[LangName[0]]) - 1
940            else:
941                self.OrderedStringList[Language].insert(Index, StringDefClassObject(Name,
942                                                                                    Value,
943                                                                                    Referenced,
944                                                                                    Token,
945                                                                                    UseOtherLangDef))
946                self.OrderedStringDict[Language][Name] = Index
947
948    #
949    # Set the string as referenced
950    #
951    def SetStringReferenced(self, Name):
952        #
953        # String stoken are added in the same order in all language string lists.
954        # So, only update the status of string stoken in first language string list.
955        #
956        Lang = self.LanguageDef[0][0]
957        if Name in self.OrderedStringDict[Lang]:
958            ItemIndexInList = self.OrderedStringDict[Lang][Name]
959            Item = self.OrderedStringList[Lang][ItemIndexInList]
960            Item.Referenced = True
961
962    #
963    # Search the string in language definition by Name
964    #
965    def FindStringValue(self, Name, Lang):
966        if Name in self.OrderedStringDict[Lang]:
967            ItemIndexInList = self.OrderedStringDict[Lang][Name]
968            return self.OrderedStringList[Lang][ItemIndexInList]
969
970        return None
971
972    #
973    # Search the string in language definition by Token
974    #
975    def FindByToken(self, Token, Lang):
976        for Item in self.OrderedStringList[Lang]:
977            if Item.Token == Token:
978                return Item
979
980        return None
981
982    #
983    # Re-order strings and re-generate tokens
984    #
985    def ReToken(self):
986        if len(self.LanguageDef) == 0:
987            return None
988        #
989        # Retoken all language strings according to the status of string stoken in the first language string.
990        #
991        FirstLangName = self.LanguageDef[0][0]
992
993        # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
994        for LangNameItem in self.LanguageDef:
995            self.OrderedStringListByToken[LangNameItem[0]] = {}
996
997        #
998        # Use small token for all referred string stoken.
999        #
1000        RefToken = 0
1001        for Index in range (0, len (self.OrderedStringList[FirstLangName])):
1002            FirstLangItem = self.OrderedStringList[FirstLangName][Index]
1003            if FirstLangItem.Referenced == True:
1004                for LangNameItem in self.LanguageDef:
1005                    LangName = LangNameItem[0]
1006                    OtherLangItem = self.OrderedStringList[LangName][Index]
1007                    OtherLangItem.Referenced = True
1008                    OtherLangItem.Token = RefToken
1009                    self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
1010                RefToken = RefToken + 1
1011
1012        #
1013        # Use big token for all unreferred string stoken.
1014        #
1015        UnRefToken = 0
1016        for Index in range (0, len (self.OrderedStringList[FirstLangName])):
1017            FirstLangItem = self.OrderedStringList[FirstLangName][Index]
1018            if FirstLangItem.Referenced == False:
1019                for LangNameItem in self.LanguageDef:
1020                    LangName = LangNameItem[0]
1021                    OtherLangItem = self.OrderedStringList[LangName][Index]
1022                    OtherLangItem.Token = RefToken + UnRefToken
1023                    self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
1024                UnRefToken = UnRefToken + 1
1025
1026    #
1027    # Show the instance itself
1028    #
1029    def ShowMe(self):
1030        print self.LanguageDef
1031        #print self.OrderedStringList
1032        for Item in self.OrderedStringList:
1033            print Item
1034            for Member in self.OrderedStringList[Item]:
1035                print str(Member)
1036
1037    #
1038    # Read content from '!include' UNI file
1039    #
1040    def ReadIncludeUNIfile(self, FilaPath):
1041        if self.File:
1042            pass
1043
1044        if not os.path.exists(FilaPath) or not os.path.isfile(FilaPath):
1045            EdkLogger.Error("Unicode File Parser",
1046                             ToolError.FILE_NOT_FOUND,
1047                             ExtraData=FilaPath)
1048        try:
1049            FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16').readlines()
1050        except UnicodeError:
1051            FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16_le').readlines()
1052        except:
1053            EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=FilaPath)
1054        return FileIn
1055
1056