1 /*
2  *******************************************************************************
3  *   Copyright (C) 2003-2014, International Business Machines
4  *   Corporation and others.  All Rights Reserved.
5  *******************************************************************************
6  *
7  * File prscmnts.cpp
8  *
9  * Modification History:
10  *
11  *   Date          Name        Description
12  *   08/22/2003    ram         Creation.
13  *******************************************************************************
14  */
15 
16 // Safer use of UnicodeString.
17 #ifndef UNISTR_FROM_CHAR_EXPLICIT
18 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
19 #endif
20 
21 // Less important, but still a good idea.
22 #ifndef UNISTR_FROM_STRING_EXPLICIT
23 #   define UNISTR_FROM_STRING_EXPLICIT explicit
24 #endif
25 
26 #include "unicode/regex.h"
27 #include "unicode/unistr.h"
28 #include "unicode/parseerr.h"
29 #include "prscmnts.h"
30 #include <stdio.h>
31 #include <stdlib.h>
32 
33 U_NAMESPACE_USE
34 
35 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
36 
37 #define MAX_SPLIT_STRINGS 20
38 
39 const char *patternStrings[UPC_LIMIT]={
40     "^translate\\s*(.*)",
41     "^note\\s*(.*)"
42 };
43 
44 U_CFUNC int32_t
removeText(UChar * source,int32_t srcLen,UnicodeString patString,uint32_t options,UnicodeString replaceText,UErrorCode * status)45 removeText(UChar *source, int32_t srcLen,
46            UnicodeString patString,uint32_t options,
47            UnicodeString replaceText, UErrorCode *status){
48 
49     if(status == NULL || U_FAILURE(*status)){
50         return 0;
51     }
52 
53     UnicodeString src(source, srcLen);
54 
55     RegexMatcher    myMatcher(patString, src, options, *status);
56     if(U_FAILURE(*status)){
57         return 0;
58     }
59     UnicodeString dest;
60 
61 
62     dest = myMatcher.replaceAll(replaceText,*status);
63 
64 
65     return dest.extract(source, srcLen, *status);
66 
67 }
68 U_CFUNC int32_t
trim(UChar * src,int32_t srcLen,UErrorCode * status)69 trim(UChar *src, int32_t srcLen, UErrorCode *status){
70      srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines
71      srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces
72      srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes
73      return srcLen;
74 }
75 
76 U_CFUNC int32_t
removeCmtText(UChar * source,int32_t srcLen,UErrorCode * status)77 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
78     srcLen = trim(source, srcLen, status);
79     UnicodeString patString("^\\s*?\\*\\s*?");  // remove pattern like " * " at the begining of the line
80     srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status);
81     return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines;
82 }
83 
84 U_CFUNC int32_t
getText(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UnicodeString patternString,UErrorCode * status)85 getText(const UChar* source, int32_t srcLen,
86         UChar** dest, int32_t destCapacity,
87         UnicodeString patternString,
88         UErrorCode* status){
89 
90     if(status == NULL || U_FAILURE(*status)){
91         return 0;
92     }
93 
94     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
95     RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status);
96     UnicodeString src (source,srcLen);
97 
98     if (U_FAILURE(*status)) {
99         return 0;
100     }
101     pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
102 
103     RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
104     if (U_FAILURE(*status)) {
105         return 0;
106     }
107     for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
108         matcher.reset(stringArray[i]);
109         if(matcher.lookingAt(*status)){
110             UnicodeString out = matcher.group(1, *status);
111 
112             return out.extract(*dest, destCapacity,*status);
113         }
114     }
115     return 0;
116 }
117 
118 
119 #define AT_SIGN  0x0040
120 
121 U_CFUNC int32_t
getDescription(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UErrorCode * status)122 getDescription( const UChar* source, int32_t srcLen,
123                 UChar** dest, int32_t destCapacity,
124                 UErrorCode* status){
125     if(status == NULL || U_FAILURE(*status)){
126         return 0;
127     }
128 
129     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
130     RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
131     UnicodeString src(source, srcLen);
132 
133     if (U_FAILURE(*status)) {
134         return 0;
135     }
136     pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
137 
138     if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
139         int32_t destLen =  stringArray[0].extract(*dest, destCapacity, *status);
140         return trim(*dest, destLen, status);
141     }
142     return 0;
143 }
144 
145 U_CFUNC int32_t
getCount(const UChar * source,int32_t srcLen,UParseCommentsOption option,UErrorCode * status)146 getCount(const UChar* source, int32_t srcLen,
147          UParseCommentsOption option, UErrorCode *status){
148 
149     if(status == NULL || U_FAILURE(*status)){
150         return 0;
151     }
152 
153     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
154     RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
155     UnicodeString src (source, srcLen);
156 
157 
158     if (U_FAILURE(*status)) {
159         return 0;
160     }
161     int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
162 
163     UnicodeString patternString(patternStrings[option]);
164     RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
165     if (U_FAILURE(*status)) {
166         return 0;
167     }
168     int32_t count = 0;
169     for(int32_t i=0; i<retLen; i++){
170         matcher.reset(stringArray[i]);
171         if(matcher.lookingAt(*status)){
172             count++;
173         }
174     }
175     if(option == UPC_TRANSLATE && count > 1){
176         fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
177         exit(U_UNSUPPORTED_ERROR);
178     }
179     return count;
180 }
181 
182 U_CFUNC int32_t
getAt(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,int32_t index,UParseCommentsOption option,UErrorCode * status)183 getAt(const UChar* source, int32_t srcLen,
184         UChar** dest, int32_t destCapacity,
185         int32_t index,
186         UParseCommentsOption option,
187         UErrorCode* status){
188 
189     if(status == NULL || U_FAILURE(*status)){
190         return 0;
191     }
192 
193     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
194     RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
195     UnicodeString src (source, srcLen);
196 
197 
198     if (U_FAILURE(*status)) {
199         return 0;
200     }
201     int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
202 
203     UnicodeString patternString(patternStrings[option]);
204     RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
205     if (U_FAILURE(*status)) {
206         return 0;
207     }
208     int32_t count = 0;
209     for(int32_t i=0; i<retLen; i++){
210         matcher.reset(stringArray[i]);
211         if(matcher.lookingAt(*status)){
212             if(count == index){
213                 UnicodeString out = matcher.group(1, *status);
214                 return out.extract(*dest, destCapacity,*status);
215             }
216             count++;
217 
218         }
219     }
220     return 0;
221 
222 }
223 
224 U_CFUNC int32_t
getTranslate(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UErrorCode * status)225 getTranslate( const UChar* source, int32_t srcLen,
226               UChar** dest, int32_t destCapacity,
227               UErrorCode* status){
228     UnicodeString     notePatternString("^translate\\s*?(.*)");
229 
230     int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
231     return trim(*dest, destLen, status);
232 }
233 
234 U_CFUNC int32_t
getNote(const UChar * source,int32_t srcLen,UChar ** dest,int32_t destCapacity,UErrorCode * status)235 getNote(const UChar* source, int32_t srcLen,
236         UChar** dest, int32_t destCapacity,
237         UErrorCode* status){
238 
239     UnicodeString     notePatternString("^note\\s*?(.*)");
240     int32_t destLen =  getText(source, srcLen, dest, destCapacity, notePatternString, status);
241     return trim(*dest, destLen, status);
242 
243 }
244 
245 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
246 
247