# Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for tf.test.compute_gradient and tf.compute_gradient_error.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging @ops.RegisterGradient("BadGrad") def _bad_grad(unused_op, grad): """A gradient that returns the wrong shape.""" return array_ops.transpose(grad) @ops.RegisterGradient("NaNGrad") def _nan_grad(unused_op, grad): """A gradient that returns NaN.""" return np.nan * grad class GradientCheckerTest(test.TestCase): @test_util.run_deprecated_v1 def testAddSimple(self): np.random.seed(1) # Fix seed to avoid flakiness with self.session(use_gpu=False): # a test case for Add operation size = (2, 3) x1 = constant_op.constant(2.0, shape=size, name="x1") x2 = constant_op.constant(3.0, shape=size, name="x2") y = math_ops.add(x1, x2, name="y") # checking gradients for x1 error = gradient_checker.compute_gradient_error(x1, size, y, size) tf_logging.info("x1 error = %f", error) assert error < 1e-4 @test_util.run_deprecated_v1 def testAddSimpleGPU(self): np.random.seed(2) # Fix seed to avoid flakiness with self.session(use_gpu=True): # a test case for Add operation size = (2, 3) x1 = constant_op.constant(2.0, shape=size, name="x1") x2 = constant_op.constant(3.0, shape=size, name="x2") y = math_ops.add(x1, x2, name="y") # checking gradients for x1 error = gradient_checker.compute_gradient_error(x1, size, y, size) tf_logging.info("x1 error = %f", error) assert error < 1e-4 @test_util.run_deprecated_v1 def testAddCustomized(self): np.random.seed(3) # Fix seed to avoid flakiness with self.cached_session(): # a test case for Add operation size = (2, 3) x1 = constant_op.constant( 2.0, shape=size, dtype=dtypes.float64, name="x1") x2 = constant_op.constant( 3.0, shape=size, dtype=dtypes.float64, name="x2") y = math_ops.add(x1, x2, name="y") # checkint gradients for x2 using a special init_value and delta x_init_value = np.asarray(np.arange(6, dtype=np.float64).reshape(2, 3)) error = gradient_checker.compute_gradient_error( x2, size, y, size, x_init_value=x_init_value, delta=1e-2) tf_logging.info("x2 error = %f", error) assert error < 1e-10 @test_util.run_deprecated_v1 def testGather(self): np.random.seed(4) # Fix seed to avoid flakiness with self.cached_session(): p_shape = (4, 2) p_size = 8 index_values = [1, 3] y_shape = [2, 2] params = constant_op.constant( np.arange(p_size).astype(np.float), shape=p_shape, name="p") indices = constant_op.constant(index_values, name="i") y = array_ops.gather(params, indices, name="y") error = gradient_checker.compute_gradient_error(params, p_shape, y, y_shape) tf_logging.info("gather error = %f", error) assert error < 1e-4 @test_util.run_deprecated_v1 def testNestedGather(self): np.random.seed(5) # Fix seed to avoid flakiness with self.cached_session(): p_shape = (8, 2) p_size = 16 index_values = [1, 3, 5, 6] index_values2 = [0, 2] y2_shape = [2, 2] params = constant_op.constant( np.arange(p_size).astype(np.float), shape=p_shape, name="p") indices = constant_op.constant(index_values, name="i") y = array_ops.gather(params, indices, name="y") indices2 = constant_op.constant(index_values2, name="i2") y2 = array_ops.gather(y, indices2, name="y2") error = gradient_checker.compute_gradient_error(params, p_shape, y2, y2_shape) tf_logging.info("nested gather error = %f", error) assert error < 1e-4 @test_util.run_deprecated_v1 def testComplexMul(self): with self.cached_session(): size = () c = constant_op.constant(5 + 7j, dtype=dtypes.complex64) x = constant_op.constant(11 - 13j, dtype=dtypes.complex64) y = c * x analytical, numerical = gradient_checker.compute_gradient(x, size, y, size) correct = np.array([[5, 7], [-7, 5]]) self.assertAllEqual(correct, analytical) self.assertAllClose(correct, numerical, rtol=1e-4) self.assertLess( gradient_checker.compute_gradient_error(x, size, y, size), 2e-4) @test_util.run_deprecated_v1 def testComplexConj(self): with self.cached_session(): size = () x = constant_op.constant(11 - 13j, dtype=dtypes.complex64) y = math_ops.conj(x) analytical, numerical = gradient_checker.compute_gradient(x, size, y, size) correct = np.array([[1, 0], [0, -1]]) self.assertAllEqual(correct, analytical) self.assertAllClose(correct, numerical, rtol=2e-5) self.assertLess( gradient_checker.compute_gradient_error(x, size, y, size), 2e-5) @test_util.run_deprecated_v1 def testEmptySucceeds(self): with self.cached_session(): x = array_ops.placeholder(dtypes.float32) y = array_ops.identity(x) for grad in gradient_checker.compute_gradient(x, (0, 3), y, (0, 3)): self.assertEqual(grad.shape, (0, 0)) error = gradient_checker.compute_gradient_error(x, (0, 3), y, (0, 3)) self.assertEqual(error, 0) def testEmptyFails(self): with ops.Graph().as_default() as g: with self.session(graph=g): x = array_ops.placeholder(dtypes.float32) with g.gradient_override_map({"Identity": "BadGrad"}): y = array_ops.identity(x) bad = r"Empty gradient has wrong shape: expected \(0, 3\), got \(3, 0\)" with self.assertRaisesRegexp(ValueError, bad): gradient_checker.compute_gradient(x, (0, 3), y, (0, 3)) with self.assertRaisesRegexp(ValueError, bad): gradient_checker.compute_gradient_error(x, (0, 3), y, (0, 3)) def testNaNGradFails(self): with ops.Graph().as_default() as g: with self.session(graph=g): x = array_ops.placeholder(dtypes.float32) with g.gradient_override_map({"Identity": "NaNGrad"}): y = array_ops.identity(x) error = gradient_checker.compute_gradient_error(x, (), y, ()) # Typical test would assert error < max_err, so assert this test would # raise AssertionError, since NaN is not < 1.0. with self.assertRaisesRegexp(AssertionError, "False is not true"): self.assertTrue(error < 1.0) class MiniMNISTTest(test.TestCase): # Gradient checker for MNIST. def _BuildAndTestMiniMNIST(self, param_index, tag): # Fix seed to avoid occasional flakiness np.random.seed(6) # Hyperparameters batch = 3 inputs = 16 features = 32 classes = 10 # Define the parameters inp_data = np.random.random_sample(inputs * batch) hidden_weight_data = np.random.randn(inputs * features) / np.sqrt(inputs) hidden_bias_data = np.random.random_sample(features) sm_weight_data = np.random.randn(features * classes) / np.sqrt(features) sm_bias_data = np.random.random_sample(classes) # special care for labels since they need to be normalized per batch label_data = np.random.random(batch * classes).reshape((batch, classes)) s = label_data.sum(axis=1) label_data /= s[:, None] with self.session(use_gpu=True): # We treat the inputs as "parameters" here inp = constant_op.constant( inp_data.tolist(), shape=[batch, inputs], dtype=dtypes.float64, name="inp") hidden_weight = constant_op.constant( hidden_weight_data.tolist(), shape=[inputs, features], dtype=dtypes.float64, name="hidden_weight") hidden_bias = constant_op.constant( hidden_bias_data.tolist(), shape=[features], dtype=dtypes.float64, name="hidden_bias") softmax_weight = constant_op.constant( sm_weight_data.tolist(), shape=[features, classes], dtype=dtypes.float64, name="softmax_weight") softmax_bias = constant_op.constant( sm_bias_data.tolist(), shape=[classes], dtype=dtypes.float64, name="softmax_bias") # List all the parameter so that we can test them one at a time all_params = [ inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias ] param_sizes = [ [batch, inputs], # inp [inputs, features], # hidden_weight, [features], # hidden_bias [features, classes], # softmax_weight, [classes] ] # softmax_bias # Now, Building MNIST features = nn_ops.relu( nn_ops.xw_plus_b(inp, hidden_weight, hidden_bias), name="features") logits = nn_ops.xw_plus_b( features, softmax_weight, softmax_bias, name="logits") labels = constant_op.constant( label_data.tolist(), shape=[batch, classes], dtype=dtypes.float64, name="labels") cost = nn_ops.softmax_cross_entropy_with_logits( labels=labels, logits=logits, name="cost") # Test the gradients. err = gradient_checker.compute_gradient_error( all_params[param_index], param_sizes[param_index], cost, [batch], delta=1e-5) tf_logging.info("Mini MNIST: %s gradient error = %g", tag, err) return err @test_util.run_deprecated_v1 def testInputGradient(self): self.assertLess(self._BuildAndTestMiniMNIST(0, "input"), 1e-8) @test_util.run_deprecated_v1 def testHiddenWeightGradient(self): self.assertLess(self._BuildAndTestMiniMNIST(1, "hidden_weight"), 1e-8) @test_util.run_deprecated_v1 def testHiddenBiasGradient(self): self.assertLess(self._BuildAndTestMiniMNIST(2, "hidden_bias"), 1e-8) @test_util.run_deprecated_v1 def testSoftmaxWeightGradient(self): self.assertLess(self._BuildAndTestMiniMNIST(3, "softmax_weight"), 1e-8) @test_util.run_deprecated_v1 def testSoftmaxBiasGradient(self): self.assertLess(self._BuildAndTestMiniMNIST(4, "softmax_bias"), 1e-8) if __name__ == "__main__": test.main()