1 #include "xmpmeta/xmp_parser.h"
2 
3 #include <algorithm>
4 #include <cstring>
5 #include <sstream>
6 #include <stack>
7 
8 #include "android-base/logging.h"
9 #include "strings/case.h"
10 #include "strings/numbers.h"
11 #include "xmpmeta/base64.h"
12 #include "xmpmeta/jpeg_io.h"
13 #include "xmpmeta/xml/const.h"
14 #include "xmpmeta/xml/deserializer_impl.h"
15 #include "xmpmeta/xml/search.h"
16 #include "xmpmeta/xml/utils.h"
17 #include "xmpmeta/xmp_const.h"
18 
19 using ::dynamic_depth::xmpmeta::xml::DepthFirstSearch;
20 using ::dynamic_depth::xmpmeta::xml::DeserializerImpl;
21 using ::dynamic_depth::xmpmeta::xml::FromXmlChar;
22 using ::dynamic_depth::xmpmeta::xml::GetFirstDescriptionElement;
23 
24 namespace dynamic_depth {
25 namespace xmpmeta {
26 namespace {
27 
28 const char kJpgExtension[] = "jpg";
29 const char kJpegExtension[] = "jpeg";
30 
BoolStringToBool(const string & bool_str,bool * value)31 bool BoolStringToBool(const string& bool_str, bool* value) {
32   if (dynamic_depth::StringCaseEqual(bool_str, "true")) {
33     *value = true;
34     return true;
35   }
36   if (dynamic_depth::StringCaseEqual(bool_str, "false")) {
37     *value = false;
38     return true;
39   }
40   return false;
41 }
42 
43 // Converts string_property to the type T.
44 template <typename T>
45 bool ConvertStringPropertyToType(const string& string_property, T* value);
46 
47 // Gets the end of the XMP meta content. If there is no packet wrapper, returns
48 // data.length, otherwise returns 1 + the position of last '>' without '?'
49 // before it. Usually the packet wrapper end is "<?xpacket end="w"?>.
GetXmpContentEnd(const string & data)50 size_t GetXmpContentEnd(const string& data) {
51   if (data.empty()) {
52     return 0;
53   }
54   for (size_t i = data.size() - 1; i >= 1; --i) {
55     if (data[i] == '>') {
56       if (data[i - 1] != '?') {
57         return i + 1;
58       }
59     }
60   }
61   // It should not reach here for a valid XMP meta.
62   LOG(WARNING) << "Failed to find the end of the XMP meta content.";
63   return data.size();
64 }
65 
66 // True if 's' starts with substring 'x'.
StartsWith(const string & s,const string & x)67 bool StartsWith(const string& s, const string& x) {
68   return s.size() >= x.size() && !s.compare(0, x.size(), x);
69 }
70 // True if 's' ends with substring 'x'.
EndsWith(const string & s,const string & x)71 bool EndsWith(const string& s, const string& x) {
72   return s.size() >= x.size() && !s.compare(s.size() - x.size(), x.size(), x);
73 }
74 
75 // Parses the first valid XMP section. Any other valid XMP section will be
76 // ignored.
ParseFirstValidXMPSection(const std::vector<Section> & sections,XmpData * xmp)77 bool ParseFirstValidXMPSection(const std::vector<Section>& sections,
78                                XmpData* xmp) {
79   for (const Section& section : sections) {
80     if (StartsWith(section.data, XmpConst::Header())) {
81       const size_t end = GetXmpContentEnd(section.data);
82       // Increment header length by 1 for the null termination.
83       const size_t header_length = strlen(XmpConst::Header()) + 1;
84       // Check for integer underflow before subtracting.
85       if (header_length >= end) {
86         LOG(ERROR) << "Invalid content length: "
87                    << static_cast<int>(end - header_length);
88         return false;
89       }
90       const size_t content_length = end - header_length;
91       // header_length is guaranteed to be <= data.size due to the if condition
92       // above. If this contract changes we must add an additonal check.
93       const char* content_start = &section.data[header_length];
94       // xmlReadMemory requires an int. Before casting size_t to int we must
95       // check for integer overflow.
96       if (content_length > INT_MAX) {
97         LOG(ERROR) << "First XMP section too large, size: " << content_length;
98         return false;
99       }
100       *xmp->MutableStandardSection() = xmlReadMemory(
101           content_start, static_cast<int>(content_length), nullptr, nullptr, 0);
102       if (xmp->StandardSection() == nullptr) {
103         LOG(WARNING) << "Failed to parse standard section.";
104         return false;
105       }
106       return true;
107     }
108   }
109   return false;
110 }
111 
112 // Collects the extended XMP sections with the given name into a string. Other
113 // sections will be ignored.
GetExtendedXmpSections(const std::vector<Section> & sections,const string & section_name)114 string GetExtendedXmpSections(const std::vector<Section>& sections,
115                               const string& section_name) {
116   string extended_header = XmpConst::ExtensionHeader();
117   extended_header += '\0' + section_name;
118   // section_name is dynamically extracted from the xml file and can have an
119   // arbitrary size. Check for integer overflow before addition.
120   if (extended_header.size() > SIZE_MAX - XmpConst::ExtensionHeaderOffset()) {
121     return "";
122   }
123   const size_t section_start_offset =
124       extended_header.size() + XmpConst::ExtensionHeaderOffset();
125 
126   // Compute the size of the buffer to parse the extended sections.
127   std::vector<const Section*> xmp_sections;
128   std::vector<size_t> xmp_end_offsets;
129   size_t buffer_size = 0;
130   for (const Section& section : sections) {
131     if (extended_header.empty() || StartsWith(section.data, extended_header)) {
132       const size_t end_offset = section.data.size();
133       const size_t section_size = end_offset - section_start_offset;
134       if (end_offset < section_start_offset ||
135           section_size > SIZE_MAX - buffer_size) {
136         return "";
137       }
138       buffer_size += section_size;
139       xmp_sections.push_back(&section);
140       xmp_end_offsets.push_back(end_offset);
141     }
142   }
143 
144   // Copy all the relevant sections' data into a buffer.
145   string buffer(buffer_size, '\0');
146   if (buffer.size() != buffer_size) {
147     return "";
148   }
149   size_t offset = 0;
150   for (int i = 0; i < xmp_sections.size(); ++i) {
151     const Section* section = xmp_sections[i];
152     const size_t length = xmp_end_offsets[i] - section_start_offset;
153     std::copy_n(&section->data[section_start_offset], length, &buffer[offset]);
154     offset += length;
155   }
156   return buffer;
157 }
158 
159 // Parses the extended XMP sections with the given name. All other sections
160 // will be ignored.
ParseExtendedXmpSections(const std::vector<Section> & sections,const string & section_name,XmpData * xmp_data)161 bool ParseExtendedXmpSections(const std::vector<Section>& sections,
162                               const string& section_name, XmpData* xmp_data) {
163   const string extended_sections =
164       GetExtendedXmpSections(sections, section_name);
165   // xmlReadMemory requires an int. Before casting size_t to int we must check
166   // for integer overflow.
167   if (extended_sections.size() > INT_MAX) {
168     LOG(WARNING) << "Extended sections too large, size: "
169                  << extended_sections.size();
170     return false;
171   }
172   *xmp_data->MutableExtendedSection() = xmlReadMemory(
173       extended_sections.data(), static_cast<int>(extended_sections.size()),
174       nullptr, nullptr, XML_PARSE_HUGE);
175   if (xmp_data->ExtendedSection() == nullptr) {
176     LOG(WARNING) << "Failed to parse extended sections.";
177     return false;
178   }
179   return true;
180 }
181 
182 // Extracts a XmpData from a JPEG image stream.
ExtractXmpMeta(const bool skip_extended,std::istream * file,XmpData * xmp_data)183 bool ExtractXmpMeta(const bool skip_extended, std::istream* file,
184                     XmpData* xmp_data) {
185   // We cannot use CHECK because this is ported to AOSP.
186   assert(xmp_data != nullptr);  // NOLINT
187   xmp_data->Reset();
188 
189   ParseOptions parse_options;
190   parse_options.read_meta_only = true;
191   if (skip_extended) {
192     parse_options.section_header = XmpConst::Header();
193     parse_options.section_header_return_first = true;
194   }
195   const std::vector<Section> sections = Parse(parse_options, file);
196   if (sections.empty()) {
197     LOG(WARNING) << "No sections found.";
198     return false;
199   }
200 
201   if (!ParseFirstValidXMPSection(sections, xmp_data)) {
202     LOG(WARNING) << "Could not parse first section.";
203     return false;
204   }
205   if (skip_extended) {
206     return true;
207   }
208   string extension_name;
209   DeserializerImpl deserializer(
210       GetFirstDescriptionElement(xmp_data->StandardSection()));
211   if (!deserializer.ParseString(XmpConst::HasExtensionPrefix(),
212                                 XmpConst::HasExtension(), &extension_name)) {
213     // No extended sections present, so nothing to parse.
214     return true;
215   }
216   if (!ParseExtendedXmpSections(sections, extension_name, xmp_data)) {
217     LOG(WARNING) << "Extended sections present, but could not be parsed.";
218     return false;
219   }
220   return true;
221 }
222 
223 // Extracts the specified string attribute.
GetStringProperty(const xmlNodePtr node,const char * prefix,const char * property,string * value)224 bool GetStringProperty(const xmlNodePtr node, const char* prefix,
225                        const char* property, string* value) {
226   const xmlDocPtr doc = node->doc;
227   for (const _xmlAttr* attribute = node->properties; attribute != nullptr;
228        attribute = attribute->next) {
229     if (attribute->ns &&
230         strcmp(FromXmlChar(attribute->ns->prefix), prefix) == 0 &&
231         strcmp(FromXmlChar(attribute->name), property) == 0) {
232       xmlChar* attribute_string =
233           xmlNodeListGetString(doc, attribute->children, 1);
234       *value = FromXmlChar(attribute_string);
235       xmlFree(attribute_string);
236       return true;
237     }
238   }
239   return false;
240 }
241 
242 // Reads the contents of a node.
243 // E.g. <prefix:node_name>Contents Here</prefix:node_name>
ReadNodeContent(const xmlNodePtr node,const char * prefix,const char * node_name,string * value)244 bool ReadNodeContent(const xmlNodePtr node, const char* prefix,
245                      const char* node_name, string* value) {
246   auto* element = DepthFirstSearch(node, node_name);
247   if (element == nullptr) {
248     return false;
249   }
250   if (prefix != nullptr &&
251       (element->ns == nullptr || element->ns->prefix == nullptr ||
252        strcmp(FromXmlChar(element->ns->prefix), prefix) != 0)) {
253     return false;
254   }
255   xmlChar* node_content = xmlNodeGetContent(element);
256   *value = FromXmlChar(node_content);
257   free(node_content);
258   return true;
259 }
260 
261 template <typename T>
ConvertStringPropertyToType(const string & string_property,T * value)262 bool ConvertStringPropertyToType(const string& string_property, T* value) {
263   QCHECK(value) << "Cannot call this method on a generic type";
264   return false;
265 }
266 
267 template <>
ConvertStringPropertyToType(const string & string_property,bool * value)268 bool ConvertStringPropertyToType<bool>(const string& string_property,
269                                        bool* value) {
270   return BoolStringToBool(string_property, value);
271 }
272 
273 template <>
ConvertStringPropertyToType(const string & string_property,double * value)274 bool ConvertStringPropertyToType<double>(const string& string_property,
275                                          double* value) {
276   *value = std::stod(string_property);
277   return true;
278 }
279 
280 template <>
ConvertStringPropertyToType(const string & string_property,int * value)281 bool ConvertStringPropertyToType<int>(const string& string_property,
282                                       int* value) {
283   *value = 0;
284   for (int i = 0; i < string_property.size(); ++i) {
285     if (!isdigit(string_property[i])) {
286       return false;
287     }
288   }
289 
290   *value = std::atoi(string_property.c_str());  // NOLINT
291   return true;
292 }
293 
294 template <>
ConvertStringPropertyToType(const string & string_property,int64 * value)295 bool ConvertStringPropertyToType<int64>(const string& string_property,
296                                         int64* value) {
297   *value = std::stol(string_property);
298   return true;
299 }
300 
301 }  // namespace
302 
ReadXmpHeader(const string & filename,const bool skip_extended,XmpData * xmp_data)303 bool ReadXmpHeader(const string& filename, const bool skip_extended,
304                    XmpData* xmp_data) {
305   string filename_lower = filename;
306   std::transform(filename_lower.begin(), filename_lower.end(),
307                  filename_lower.begin(), ::tolower);
308   if (!EndsWith(filename_lower, kJpgExtension) &&
309       !EndsWith(filename_lower, kJpegExtension)) {
310     LOG(WARNING) << "XMP parse: only JPEG file is supported";
311     return false;
312   }
313 
314   std::ifstream file(filename.c_str(), std::ios::binary);
315   if (!file.is_open()) {
316     LOG(WARNING) << " Could not read file: " << filename;
317     return false;
318   }
319   return ExtractXmpMeta(skip_extended, &file, xmp_data);
320 }
321 
ReadXmpFromMemory(const string & jpeg_contents,const bool skip_extended,XmpData * xmp_data)322 bool ReadXmpFromMemory(const string& jpeg_contents, const bool skip_extended,
323                        XmpData* xmp_data) {
324   std::istringstream stream(jpeg_contents);
325   return ExtractXmpMeta(skip_extended, &stream, xmp_data);
326 }
327 
ReadXmpHeader(std::istream * input_stream,bool skip_extended,XmpData * xmp_data)328 bool ReadXmpHeader(std::istream* input_stream, bool skip_extended,
329                    XmpData* xmp_data) {
330   return ExtractXmpMeta(skip_extended, input_stream, xmp_data);
331 }
332 
333 }  // namespace xmpmeta
334 }  // namespace dynamic_depth
335