1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Tests for tensorflow.kernels.edit_distance_op."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import numpy as np
22
23from tensorflow.python.framework import constant_op
24from tensorflow.python.framework import ops
25from tensorflow.python.framework import sparse_tensor
26from tensorflow.python.ops import array_ops
27from tensorflow.python.platform import test
28
29
30def ConstantOf(x):
31  x = np.asarray(x)
32  # Convert to int64 if it's not a string or unicode
33  if x.dtype.char not in "SU":
34    x = np.asarray(x, dtype=np.int64)
35  return constant_op.constant(x)
36
37
38class EditDistanceTest(test.TestCase):
39
40  def _testEditDistanceST(self,
41                          hypothesis_st,
42                          truth_st,
43                          normalize,
44                          expected_output,
45                          expected_shape,
46                          expected_err_re=None):
47    edit_distance = array_ops.edit_distance(
48        hypothesis=hypothesis_st, truth=truth_st, normalize=normalize)
49
50    if expected_err_re is None:
51      self.assertEqual(edit_distance.get_shape(), expected_shape)
52      output = self.evaluate(edit_distance)
53      self.assertAllClose(output, expected_output)
54    else:
55      with self.assertRaisesOpError(expected_err_re):
56        self.evaluate(edit_distance)
57
58  def _testEditDistance(self,
59                        hypothesis,
60                        truth,
61                        normalize,
62                        expected_output,
63                        expected_err_re=None):
64    # Shape inference figures out the shape from the shape variables
65    # Explicit tuple() needed since zip returns an iterator in Python 3.
66    expected_shape = [
67        max(h, t) for h, t in tuple(zip(hypothesis[2], truth[2]))[:-1]
68    ]
69
70    # SparseTensorValue inputs.
71    with ops.Graph().as_default() as g, self.session(g):
72      # hypothesis and truth are (index, value, shape) tuples
73      self._testEditDistanceST(
74          hypothesis_st=sparse_tensor.SparseTensorValue(
75              *[ConstantOf(x) for x in hypothesis]),
76          truth_st=sparse_tensor.SparseTensorValue(
77              *[ConstantOf(x) for x in truth]),
78          normalize=normalize,
79          expected_output=expected_output,
80          expected_shape=expected_shape,
81          expected_err_re=expected_err_re)
82
83    # SparseTensor inputs.
84    with ops.Graph().as_default() as g, self.session(g):
85      # hypothesis and truth are (index, value, shape) tuples
86      self._testEditDistanceST(
87          hypothesis_st=sparse_tensor.SparseTensor(
88              *[ConstantOf(x) for x in hypothesis]),
89          truth_st=sparse_tensor.SparseTensor(*[ConstantOf(x) for x in truth]),
90          normalize=normalize,
91          expected_output=expected_output,
92          expected_shape=expected_shape,
93          expected_err_re=expected_err_re)
94
95  def testEditDistanceNormalized(self):
96    hypothesis_indices = [[0, 0], [0, 1], [1, 0], [1, 1]]
97    hypothesis_values = [0, 1, 1, -1]
98    hypothesis_shape = [2, 2]
99    truth_indices = [[0, 0], [1, 0], [1, 1]]
100    truth_values = [0, 1, 1]
101    truth_shape = [2, 2]
102    expected_output = [1.0, 0.5]
103
104    self._testEditDistance(
105        hypothesis=(hypothesis_indices, hypothesis_values, hypothesis_shape),
106        truth=(truth_indices, truth_values, truth_shape),
107        normalize=True,
108        expected_output=expected_output)
109
110  def testEditDistanceUnnormalized(self):
111    hypothesis_indices = [[0, 0], [1, 0], [1, 1]]
112    hypothesis_values = [10, 10, 11]
113    hypothesis_shape = [2, 2]
114    truth_indices = [[0, 0], [0, 1], [1, 0], [1, 1]]
115    truth_values = [1, 2, 1, -1]
116    truth_shape = [2, 3]
117    expected_output = [2.0, 2.0]
118
119    self._testEditDistance(
120        hypothesis=(hypothesis_indices, hypothesis_values, hypothesis_shape),
121        truth=(truth_indices, truth_values, truth_shape),
122        normalize=False,
123        expected_output=expected_output)
124
125  def testEditDistanceProperDistance(self):
126    # In this case, the values are individual characters stored in the
127    # SparseTensor (type DT_STRING)
128    hypothesis_indices = ([[0, i] for i, _ in enumerate("algorithm")] +
129                          [[1, i] for i, _ in enumerate("altruistic")])
130    hypothesis_values = [x for x in "algorithm"] + [x for x in "altruistic"]
131    hypothesis_shape = [2, 11]
132    truth_indices = ([[0, i] for i, _ in enumerate("altruistic")] +
133                     [[1, i] for i, _ in enumerate("algorithm")])
134    truth_values = [x for x in "altruistic"] + [x for x in "algorithm"]
135    truth_shape = [2, 11]
136    expected_unnormalized = [6.0, 6.0]
137    expected_normalized = [6.0 / len("altruistic"), 6.0 / len("algorithm")]
138
139    self._testEditDistance(
140        hypothesis=(hypothesis_indices, hypothesis_values, hypothesis_shape),
141        truth=(truth_indices, truth_values, truth_shape),
142        normalize=False,
143        expected_output=expected_unnormalized)
144
145    self._testEditDistance(
146        hypothesis=(hypothesis_indices, hypothesis_values, hypothesis_shape),
147        truth=(truth_indices, truth_values, truth_shape),
148        normalize=True,
149        expected_output=expected_normalized)
150
151  def testEditDistance3D(self):
152    hypothesis_indices = [[0, 0, 0], [1, 0, 0]]
153    hypothesis_values = [0, 1]
154    hypothesis_shape = [2, 1, 1]
155    truth_indices = [[0, 1, 0], [1, 0, 0], [1, 1, 0]]
156    truth_values = [0, 1, 1]
157    truth_shape = [2, 2, 1]
158    expected_output = [
159        [np.inf, 1.0],  # (0,0): no truth, (0,1): no hypothesis
160        [0.0, 1.0]
161    ]  # (1,0): match,    (1,1): no hypothesis
162
163    self._testEditDistance(
164        hypothesis=(hypothesis_indices, hypothesis_values, hypothesis_shape),
165        truth=(truth_indices, truth_values, truth_shape),
166        normalize=True,
167        expected_output=expected_output)
168
169  def testEditDistanceZeroLengthHypothesis(self):
170    hypothesis_indices = np.empty((0, 2), dtype=np.int64)
171    hypothesis_values = []
172    hypothesis_shape = [1, 0]
173    truth_indices = [[0, 0]]
174    truth_values = [0]
175    truth_shape = [1, 1]
176    expected_output = [1.0]
177
178    self._testEditDistance(
179        hypothesis=(hypothesis_indices, hypothesis_values, hypothesis_shape),
180        truth=(truth_indices, truth_values, truth_shape),
181        normalize=True,
182        expected_output=expected_output)
183
184  def testEditDistanceZeroLengthTruth(self):
185    hypothesis_indices = [[0, 0]]
186    hypothesis_values = [0]
187    hypothesis_shape = [1, 1]
188    truth_indices = np.empty((0, 2), dtype=np.int64)
189    truth_values = []
190    truth_shape = [1, 0]
191    expected_output = [np.inf]  # Normalized, loss is 1/0 = inf
192
193    self._testEditDistance(
194        hypothesis=(hypothesis_indices, hypothesis_values, hypothesis_shape),
195        truth=(truth_indices, truth_values, truth_shape),
196        normalize=True,
197        expected_output=expected_output)
198
199  def testEditDistanceZeroLengthHypothesisAndTruth(self):
200    hypothesis_indices = np.empty((0, 2), dtype=np.int64)
201    hypothesis_values = []
202    hypothesis_shape = [1, 0]
203    truth_indices = np.empty((0, 2), dtype=np.int64)
204    truth_values = []
205    truth_shape = [1, 0]
206    expected_output = [0]  # Normalized is 0 because of exact match
207
208    self._testEditDistance(
209        hypothesis=(hypothesis_indices, hypothesis_values, hypothesis_shape),
210        truth=(truth_indices, truth_values, truth_shape),
211        normalize=True,
212        expected_output=expected_output)
213
214
215if __name__ == "__main__":
216  test.main()
217