1#!/usr/bin/env python
2#
3# This parser parses the output from Phil Harvey's exiftool (version 9.02)
4# and convert it to xml format. It reads exiftool's output from stdin and
5# write the xml format to stdout.
6#
7# In order to get the raw infomation from exiftool, we need to enable the verbose
8# flag (-v2) of exiftool.
9#
10# Usage:
11#      exiftool -v2 img.jpg | ./parser.py >> output.xml
12#
13#
14
15import os
16import sys
17import re
18
19text = sys.stdin.read()
20
21print """<?xml version="1.0" encoding="utf-8"?>"""
22print "<exif>"
23
24# find the following two groups of string:
25#
26# 1. tag:
27#
28# | | | x) name = value
29# | | |     - Tag 0x1234
30#
31# 2. IFD indicator:
32#
33# | | | + [xxx directory with xx entries]
34#
35p = re.compile(
36        "(((?:\| )+)[0-9]*\)(?:(?:.*? = .*?)|(?:.*? \(SubDirectory\) -->))\n.*?- Tag 0x[0-9a-f]{4})" + "|"
37        + "(((?:\| )*)\+ \[.*? directory with [0-9]+ entries]$)"
38        , re.M)
39tags = p.findall(text)
40
41layer = 0
42ifds = []
43
44for s in tags:
45    # IFD indicator
46    if s[2]:
47        l = len(s[3])
48        ifd = s[2][l + 3:].split()[0]
49        new_layer = l / 2 + 1
50        if new_layer > layer:
51            ifds.append(ifd)
52        else:
53            for i in range(layer - new_layer):
54                ifds.pop()
55            ifds[-1] = ifd
56        layer = new_layer
57    else:
58        l = len(s[1])
59        s = s[0]
60        new_layer = l / 2
61        if new_layer < layer:
62            for i in range(layer - new_layer):
63                ifds.pop()
64        layer = new_layer
65
66        # find the ID
67        _id = re.search("0x[0-9a-f]{4}", s)
68        _id = _id.group(0)
69
70        # find the name
71        name = re.search("[0-9]*?\).*?(?:(?: = )|(?: \(SubDirectory\) -->))", s)
72        name = name.group(0).split()[1]
73
74        # find the raw value in the parenthesis
75        value = re.search("\(SubDirectory\) -->", s)
76        if value:
77            value = "NO_VALUE"
78        else:
79            value = re.search("\(.*\)\n", s)
80            if (name != 'Model' and value):
81                value = value.group(0)[1:-2]
82            else:
83                value = re.search("=.*\n", s)
84                value = value.group(0)[2:-1]
85                if "[snip]" in value:
86                    value = "NO_VALUE"
87
88        print ('    <tag ifd="' + ifds[-1] + '" id="'
89            + _id + '" name="' + name +'">' + value + "</tag>")
90print "</exif>"
91