1 #include "xmpmeta/xmp_parser.h"
2 
3 #include <algorithm>
4 #include <cassert>
5 #include <cstring>
6 #include <sstream>
7 #include <stack>
8 
9 #include "android-base/logging.h"
10 #include "strings/case.h"
11 #include "strings/numbers.h"
12 #include "xmpmeta/base64.h"
13 #include "xmpmeta/jpeg_io.h"
14 #include "xmpmeta/xml/const.h"
15 #include "xmpmeta/xml/deserializer_impl.h"
16 #include "xmpmeta/xml/search.h"
17 #include "xmpmeta/xml/utils.h"
18 #include "xmpmeta/xmp_const.h"
19 
20 using ::dynamic_depth::xmpmeta::xml::DepthFirstSearch;
21 using ::dynamic_depth::xmpmeta::xml::DeserializerImpl;
22 using ::dynamic_depth::xmpmeta::xml::FromXmlChar;
23 using ::dynamic_depth::xmpmeta::xml::GetFirstDescriptionElement;
24 
25 namespace dynamic_depth {
26 namespace xmpmeta {
27 namespace {
28 
29 const char kJpgExtension[] = "jpg";
30 const char kJpegExtension[] = "jpeg";
31 
BoolStringToBool(const string & bool_str,bool * value)32 bool BoolStringToBool(const string& bool_str, bool* value) {
33   if (dynamic_depth::StringCaseEqual(bool_str, "true")) {
34     *value = true;
35     return true;
36   }
37   if (dynamic_depth::StringCaseEqual(bool_str, "false")) {
38     *value = false;
39     return true;
40   }
41   return false;
42 }
43 
44 // Converts string_property to the type T.
45 template <typename T>
46 bool ConvertStringPropertyToType(const string& string_property, T* value);
47 
48 // Gets the end of the XMP meta content. If there is no packet wrapper, returns
49 // data.length, otherwise returns 1 + the position of last '>' without '?'
50 // before it. Usually the packet wrapper end is "<?xpacket end="w"?>.
GetXmpContentEnd(const string & data)51 size_t GetXmpContentEnd(const string& data) {
52   if (data.empty()) {
53     return 0;
54   }
55   for (size_t i = data.size() - 1; i >= 1; --i) {
56     if (data[i] == '>') {
57       if (data[i - 1] != '?') {
58         return i + 1;
59       }
60     }
61   }
62   // It should not reach here for a valid XMP meta.
63   LOG(WARNING) << "Failed to find the end of the XMP meta content.";
64   return data.size();
65 }
66 
67 // True if 's' starts with substring 'x'.
StartsWith(const string & s,const string & x)68 bool StartsWith(const string& s, const string& x) {
69   return s.size() >= x.size() && !s.compare(0, x.size(), x);
70 }
71 // True if 's' ends with substring 'x'.
EndsWith(const string & s,const string & x)72 bool EndsWith(const string& s, const string& x) {
73   return s.size() >= x.size() && !s.compare(s.size() - x.size(), x.size(), x);
74 }
75 
76 // Parses the first valid XMP section. Any other valid XMP section will be
77 // ignored.
ParseFirstValidXMPSection(const std::vector<Section> & sections,XmpData * xmp)78 bool ParseFirstValidXMPSection(const std::vector<Section>& sections,
79                                XmpData* xmp) {
80   for (const Section& section : sections) {
81     if (StartsWith(section.data, XmpConst::Header())) {
82       const size_t end = GetXmpContentEnd(section.data);
83       // Increment header length by 1 for the null termination.
84       const size_t header_length = strlen(XmpConst::Header()) + 1;
85       // Check for integer underflow before subtracting.
86       if (header_length >= end) {
87         LOG(ERROR) << "Invalid content length: "
88                    << static_cast<int>(end - header_length);
89         return false;
90       }
91       const size_t content_length = end - header_length;
92       // header_length is guaranteed to be <= data.size due to the if condition
93       // above. If this contract changes we must add an additonal check.
94       const char* content_start = &section.data[header_length];
95       // xmlReadMemory requires an int. Before casting size_t to int we must
96       // check for integer overflow.
97       if (content_length > INT_MAX) {
98         LOG(ERROR) << "First XMP section too large, size: " << content_length;
99         return false;
100       }
101       *xmp->MutableStandardSection() = xmlReadMemory(
102           content_start, static_cast<int>(content_length), nullptr, nullptr, 0);
103       if (xmp->StandardSection() == nullptr) {
104         LOG(WARNING) << "Failed to parse standard section.";
105         return false;
106       }
107       return true;
108     }
109   }
110   return false;
111 }
112 
113 // Collects the extended XMP sections with the given name into a string. Other
114 // sections will be ignored.
GetExtendedXmpSections(const std::vector<Section> & sections,const string & section_name)115 string GetExtendedXmpSections(const std::vector<Section>& sections,
116                               const string& section_name) {
117   string extended_header = XmpConst::ExtensionHeader();
118   extended_header += '\0' + section_name;
119   // section_name is dynamically extracted from the xml file and can have an
120   // arbitrary size. Check for integer overflow before addition.
121   if (extended_header.size() > SIZE_MAX - XmpConst::ExtensionHeaderOffset()) {
122     return "";
123   }
124   const size_t section_start_offset =
125       extended_header.size() + XmpConst::ExtensionHeaderOffset();
126 
127   // Compute the size of the buffer to parse the extended sections.
128   std::vector<const Section*> xmp_sections;
129   std::vector<size_t> xmp_end_offsets;
130   size_t buffer_size = 0;
131   for (const Section& section : sections) {
132     if (extended_header.empty() || StartsWith(section.data, extended_header)) {
133       const size_t end_offset = section.data.size();
134       const size_t section_size = end_offset - section_start_offset;
135       if (end_offset < section_start_offset ||
136           section_size > SIZE_MAX - buffer_size) {
137         return "";
138       }
139       buffer_size += section_size;
140       xmp_sections.push_back(&section);
141       xmp_end_offsets.push_back(end_offset);
142     }
143   }
144 
145   // Copy all the relevant sections' data into a buffer.
146   string buffer(buffer_size, '\0');
147   if (buffer.size() != buffer_size) {
148     return "";
149   }
150   size_t offset = 0;
151   for (int i = 0; i < xmp_sections.size(); ++i) {
152     const Section* section = xmp_sections[i];
153     const size_t length = xmp_end_offsets[i] - section_start_offset;
154     std::copy_n(&section->data[section_start_offset], length, &buffer[offset]);
155     offset += length;
156   }
157   return buffer;
158 }
159 
160 // Parses the extended XMP sections with the given name. All other sections
161 // will be ignored.
ParseExtendedXmpSections(const std::vector<Section> & sections,const string & section_name,XmpData * xmp_data)162 bool ParseExtendedXmpSections(const std::vector<Section>& sections,
163                               const string& section_name, XmpData* xmp_data) {
164   const string extended_sections =
165       GetExtendedXmpSections(sections, section_name);
166   // xmlReadMemory requires an int. Before casting size_t to int we must check
167   // for integer overflow.
168   if (extended_sections.size() > INT_MAX) {
169     LOG(WARNING) << "Extended sections too large, size: "
170                  << extended_sections.size();
171     return false;
172   }
173   *xmp_data->MutableExtendedSection() = xmlReadMemory(
174       extended_sections.data(), static_cast<int>(extended_sections.size()),
175       nullptr, nullptr, XML_PARSE_HUGE);
176   if (xmp_data->ExtendedSection() == nullptr) {
177     LOG(WARNING) << "Failed to parse extended sections.";
178     return false;
179   }
180   return true;
181 }
182 
183 // Extracts a XmpData from a JPEG image stream.
ExtractXmpMeta(const bool skip_extended,std::istream * file,XmpData * xmp_data)184 bool ExtractXmpMeta(const bool skip_extended, std::istream* file,
185                     XmpData* xmp_data) {
186   // We cannot use CHECK because this is ported to AOSP.
187   assert(xmp_data != nullptr);  // NOLINT
188   xmp_data->Reset();
189 
190   ParseOptions parse_options;
191   parse_options.read_meta_only = true;
192   if (skip_extended) {
193     parse_options.section_header = XmpConst::Header();
194     parse_options.section_header_return_first = true;
195   }
196   const std::vector<Section> sections = Parse(parse_options, file);
197   if (sections.empty()) {
198     LOG(WARNING) << "No sections found.";
199     return false;
200   }
201 
202   if (!ParseFirstValidXMPSection(sections, xmp_data)) {
203     LOG(WARNING) << "Could not parse first section.";
204     return false;
205   }
206   if (skip_extended) {
207     return true;
208   }
209   string extension_name;
210   DeserializerImpl deserializer(
211       GetFirstDescriptionElement(xmp_data->StandardSection()));
212   if (!deserializer.ParseString(XmpConst::HasExtensionPrefix(),
213                                 XmpConst::HasExtension(), &extension_name)) {
214     // No extended sections present, so nothing to parse.
215     return true;
216   }
217   if (!ParseExtendedXmpSections(sections, extension_name, xmp_data)) {
218     LOG(WARNING) << "Extended sections present, but could not be parsed.";
219     return false;
220   }
221   return true;
222 }
223 
224 // Extracts the specified string attribute.
GetStringProperty(const xmlNodePtr node,const char * prefix,const char * property,string * value)225 bool GetStringProperty(const xmlNodePtr node, const char* prefix,
226                        const char* property, string* value) {
227   const xmlDocPtr doc = node->doc;
228   for (const _xmlAttr* attribute = node->properties; attribute != nullptr;
229        attribute = attribute->next) {
230     if (attribute->ns &&
231         strcmp(FromXmlChar(attribute->ns->prefix), prefix) == 0 &&
232         strcmp(FromXmlChar(attribute->name), property) == 0) {
233       xmlChar* attribute_string =
234           xmlNodeListGetString(doc, attribute->children, 1);
235       *value = FromXmlChar(attribute_string);
236       xmlFree(attribute_string);
237       return true;
238     }
239   }
240   return false;
241 }
242 
243 // Reads the contents of a node.
244 // E.g. <prefix:node_name>Contents Here</prefix:node_name>
ReadNodeContent(const xmlNodePtr node,const char * prefix,const char * node_name,string * value)245 bool ReadNodeContent(const xmlNodePtr node, const char* prefix,
246                      const char* node_name, string* value) {
247   auto* element = DepthFirstSearch(node, node_name);
248   if (element == nullptr) {
249     return false;
250   }
251   if (prefix != nullptr &&
252       (element->ns == nullptr || element->ns->prefix == nullptr ||
253        strcmp(FromXmlChar(element->ns->prefix), prefix) != 0)) {
254     return false;
255   }
256   xmlChar* node_content = xmlNodeGetContent(element);
257   *value = FromXmlChar(node_content);
258   free(node_content);
259   return true;
260 }
261 
262 template <typename T>
ConvertStringPropertyToType(const string & string_property,T * value)263 bool ConvertStringPropertyToType(const string& string_property, T* value) {
264   QCHECK(value) << "Cannot call this method on a generic type";
265   return false;
266 }
267 
268 template <>
ConvertStringPropertyToType(const string & string_property,bool * value)269 bool ConvertStringPropertyToType<bool>(const string& string_property,
270                                        bool* value) {
271   return BoolStringToBool(string_property, value);
272 }
273 
274 template <>
ConvertStringPropertyToType(const string & string_property,double * value)275 bool ConvertStringPropertyToType<double>(const string& string_property,
276                                          double* value) {
277   *value = std::stod(string_property);
278   return true;
279 }
280 
281 template <>
ConvertStringPropertyToType(const string & string_property,int * value)282 bool ConvertStringPropertyToType<int>(const string& string_property,
283                                       int* value) {
284   *value = 0;
285   for (int i = 0; i < string_property.size(); ++i) {
286     if (!isdigit(string_property[i])) {
287       return false;
288     }
289   }
290 
291   *value = std::atoi(string_property.c_str());  // NOLINT
292   return true;
293 }
294 
295 template <>
ConvertStringPropertyToType(const string & string_property,int64 * value)296 bool ConvertStringPropertyToType<int64>(const string& string_property,
297                                         int64* value) {
298   *value = std::stol(string_property);
299   return true;
300 }
301 
302 }  // namespace
303 
ReadXmpHeader(const string & filename,const bool skip_extended,XmpData * xmp_data)304 bool ReadXmpHeader(const string& filename, const bool skip_extended,
305                    XmpData* xmp_data) {
306   string filename_lower = filename;
307   std::transform(filename_lower.begin(), filename_lower.end(),
308                  filename_lower.begin(), ::tolower);
309   if (!EndsWith(filename_lower, kJpgExtension) &&
310       !EndsWith(filename_lower, kJpegExtension)) {
311     LOG(WARNING) << "XMP parse: only JPEG file is supported";
312     return false;
313   }
314 
315   std::ifstream file(filename.c_str(), std::ios::binary);
316   if (!file.is_open()) {
317     LOG(WARNING) << " Could not read file: " << filename;
318     return false;
319   }
320   return ExtractXmpMeta(skip_extended, &file, xmp_data);
321 }
322 
ReadXmpFromMemory(const string & jpeg_contents,const bool skip_extended,XmpData * xmp_data)323 bool ReadXmpFromMemory(const string& jpeg_contents, const bool skip_extended,
324                        XmpData* xmp_data) {
325   std::istringstream stream(jpeg_contents);
326   return ExtractXmpMeta(skip_extended, &stream, xmp_data);
327 }
328 
ReadXmpHeader(std::istream * input_stream,bool skip_extended,XmpData * xmp_data)329 bool ReadXmpHeader(std::istream* input_stream, bool skip_extended,
330                    XmpData* xmp_data) {
331   return ExtractXmpMeta(skip_extended, input_stream, xmp_data);
332 }
333 
334 }  // namespace xmpmeta
335 }  // namespace dynamic_depth
336