1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15from __future__ import absolute_import 16from __future__ import division 17from __future__ import print_function 18 19import functools 20 21from absl.testing import parameterized 22import numpy as np 23 24from tensorflow.python import pywrap_tfe 25from tensorflow.python.eager import backprop 26from tensorflow.python.eager import context 27from tensorflow.python.eager import def_function 28from tensorflow.python.eager import function 29from tensorflow.python.eager import tape as tape_lib 30from tensorflow.python.eager import test 31from tensorflow.python.framework import constant_op 32from tensorflow.python.framework import dtypes 33from tensorflow.python.framework import errors_impl 34from tensorflow.python.framework import ops 35from tensorflow.python.framework import sparse_tensor 36from tensorflow.python.framework import tensor_shape 37from tensorflow.python.framework import tensor_util 38from tensorflow.python.framework import test_util 39from tensorflow.python.framework.memory_checker import MemoryChecker 40from tensorflow.python.layers.pooling import max_pooling3d 41from tensorflow.python.ops import array_ops 42from tensorflow.python.ops import control_flow_ops 43from tensorflow.python.ops import custom_gradient 44from tensorflow.python.ops import embedding_ops 45from tensorflow.python.ops import functional_ops 46from tensorflow.python.ops import gradient_checker_v2 47from tensorflow.python.ops import gradients 48from tensorflow.python.ops import math_ops 49from tensorflow.python.ops import nn 50from tensorflow.python.ops import nn_grad # pylint: disable=unused-import 51from tensorflow.python.ops import nn_ops 52from tensorflow.python.ops import random_ops 53from tensorflow.python.ops import resource_variable_ops 54from tensorflow.python.ops import sparse_ops 55from tensorflow.python.ops import variables 56from tensorflow.python.training import training 57 58 59class BackpropTest(test.TestCase, parameterized.TestCase): 60 61 @test_util.run_in_graph_and_eager_modes 62 def testAggregateGradients(self): 63 64 def fn(x): 65 ind1 = constant_op.constant(np.array([0, 1])) 66 ind2 = constant_op.constant(np.array([2, 3])) 67 ind3 = constant_op.constant(np.array([1, 3])) 68 g1 = embedding_ops.embedding_lookup(x, ind1) 69 g2 = embedding_ops.embedding_lookup(x, ind2) 70 g3 = embedding_ops.embedding_lookup(x, ind3) 71 return g1 * g2 * g3 72 73 var_np = np.random.rand(4, 2).astype(np.float32) 74 var = constant_op.constant(var_np) 75 grad = backprop.gradients_function(fn, [0])(var)[0] 76 grad = self.evaluate(ops.convert_to_tensor(grad)) 77 78 if not context.executing_eagerly(): 79 tf_var = array_ops.constant(var_np, dtypes.float32) 80 tf_ind1 = array_ops.constant([0, 1]) 81 tf_ind2 = array_ops.constant([2, 3]) 82 tf_ind3 = array_ops.constant([1, 3]) 83 tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1) 84 tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2) 85 tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3) 86 tf_y = tf_g1 * tf_g2 * tf_g3 87 tf_grad = gradients.gradients(tf_y, [tf_var])[0] 88 89 tf_dense_grad = math_ops.unsorted_segment_sum(tf_grad.values, 90 tf_grad.indices, 91 tf_grad.dense_shape[0]) 92 93 self.assertAllClose(grad, self.evaluate(tf_dense_grad)) 94 95 @test_util.run_in_graph_and_eager_modes 96 def testAggregateGradientsWithTensor(self): 97 98 def fn(x): 99 ind1 = constant_op.constant(np.array([0, 1])) 100 # A mixture of IndexedSlices and dense tensor to aggregate. 101 g1 = embedding_ops.embedding_lookup(x, ind1) 102 g2 = math_ops.reduce_sum(x * constant_op.constant(2.0)) 103 return g1 * g2 104 105 var_np = np.random.rand(4, 2).astype(np.float32) 106 var = constant_op.constant(var_np) 107 grad = backprop.gradients_function(fn, [0])(var)[0] 108 grad = self.evaluate(ops.convert_to_tensor(grad)) 109 110 if not context.executing_eagerly(): 111 tf_var = array_ops.constant(var_np, dtypes.float32) 112 tf_ind1 = array_ops.constant([0, 1]) 113 tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1) 114 tf_g2 = math_ops.reduce_sum(tf_var * 2.0, axis=(0, 1)) 115 tf_y = tf_g1 * tf_g2 116 tf_grad = gradients.gradients(tf_y, [tf_var])[0] 117 118 self.assertAllClose(grad, tf_grad) 119 120 def testImplicitGradWithResourceVariable(self): 121 x = resource_variable_ops.ResourceVariable( 122 initial_value=constant_op.constant(1.0), name='x') 123 124 def fn(): 125 b = constant_op.constant(2.0) 126 c = math_ops.add(x.value(), b) 127 return math_ops.add(c, constant_op.constant(3.0)) 128 129 grads_and_vars = backprop.implicit_grad(fn)() 130 self.assertAllEqual(grads_and_vars[0][0], 1.0) 131 self.assertAllEqual(id(grads_and_vars[0][1]), id(x)) 132 133 @parameterized.named_parameters([('Function', def_function.function), 134 ('NoFunction', lambda f: f)]) 135 def testNoOpBehaviorConsistent(self, decorator): 136 137 @decorator 138 def f(x): 139 # Test all different types of no-ops 140 x1 = array_ops.identity(x) 141 x2 = math_ops.add_v2(x, 0) 142 x3 = math_ops.subtract(x, 0) 143 x4 = math_ops.multiply(x, 1) 144 with backprop.GradientTape() as t: 145 t.watch(x) 146 t.watch(x1) 147 t.watch(x2) 148 t.watch(x3) 149 t.watch(x4) 150 y1 = x * 2. 151 y2 = x1 * 3. 152 y3 = x2 * 3. 153 y4 = x3 * 3. 154 y5 = x4 * 3. 155 loss = y1 + y2 + y3 + y4 + y5 156 return t.gradient(loss, [x, x1, x2, x3, x4]) 157 158 self.assertAllClose([2., 3., 3., 3., 3.], f(constant_op.constant(10.))) 159 160 def testGradientInsideLoop(self): 161 with ops.Graph().as_default(): 162 v = resource_variable_ops.ResourceVariable(1.0) 163 164 def body(_): 165 _ = v + 1.0 # This reads the variable inside the loop context 166 with backprop.GradientTape() as t: 167 result = v * 2 168 self.assertIsNotNone(t.gradient(result, v)) 169 return 1.0 170 171 control_flow_ops.while_loop(lambda i: False, body, [1.0]) 172 173 def testWhereGradient(self): 174 # Note: where is special because only some of its arguments are of 175 # differentiable dtypes. 176 177 def f(x): 178 return array_ops.where(x < 10, x, x * x) 179 180 g = backprop.gradients_function(f) 181 182 self.assertAllEqual(g(5.)[0], 1.0) 183 self.assertAllEqual(g(50.)[0], 100.0) 184 185 def testTwoTargets(self): 186 with backprop.GradientTape() as t: 187 x = constant_op.constant(3.0) 188 y = constant_op.constant(2.0) 189 t.watch([x, y]) 190 xx = 2 * x 191 yy = 3 * y 192 dx, dy = t.gradient([xx, yy], [x, y]) 193 self.assertAllEqual(dx, 2.0) 194 self.assertAllEqual(dy, 3.0) 195 196 def testCustomGradientEmptyError(self): 197 198 @custom_gradient.custom_gradient 199 def identity(x): 200 201 def grad(_): 202 return [] # This return value is wrong! 203 204 return x, grad 205 206 x = variables.Variable(1.0) 207 with backprop.GradientTape() as t: 208 y = identity(x) 209 with self.assertRaises(ValueError): 210 t.gradient(y, [x]) 211 212 def testOutputGradUsedInComputation(self): 213 with backprop.GradientTape() as t: 214 x = constant_op.constant(3.0) 215 y = constant_op.constant(2.0) 216 t.watch([x, y]) 217 loss = x * y 218 dx, = t.gradient([loss, x], [x], output_gradients=[1.0, 2.0]) 219 self.assertAllEqual(dx, 4.0) 220 221 def testDy(self): 222 223 def f(x): 224 return x 225 226 grad_fn = backprop.gradients_function(f) 227 self.assertAllEqual(2., grad_fn(1., dy=2.)[0]) 228 229 def testGradientInteger(self): 230 231 def f(x): 232 return x + x 233 234 int_tensor = constant_op.constant(1) 235 self.assertEqual(backprop.gradients_function(f)(int_tensor)[0], None) 236 237 def testErrors(self): 238 239 @custom_gradient.custom_gradient 240 def f(x): 241 242 def grad(_): 243 raise RuntimeError('x') 244 245 return x, grad 246 247 # TODO(apassos) raise the right error here 248 with self.assertRaises(RuntimeError): 249 backprop.gradients_function(f)(constant_op.constant(1.0)) 250 251 def testGradientsFunctionInCustomGradient(self): 252 253 @custom_gradient.custom_gradient 254 def f(x): 255 (y,) = backprop.gradients_function(lambda x: x * x)(x) 256 257 def grad(dy): 258 return [2 * dy] 259 260 return y, grad 261 262 self.assertAllEqual(f(1.0), 2.0) 263 264 def testImplicitGradOverEmbeddingLookup(self): 265 batch_size = 8 266 embedding_size = 512 267 vocab_size = 1000 268 lrn_rate = 0.1 269 random_init = random_ops.random_uniform([vocab_size, embedding_size]) 270 271 x = array_ops.ones((batch_size), dtypes.int64) 272 embedding = resource_variable_ops.ResourceVariable( 273 initial_value=random_init, dtype=dtypes.float32, name='embedding') 274 275 def f(): 276 embedded_x = embedding_ops.embedding_lookup(embedding, x) 277 return constant_op.constant(1.0, dtypes.float32) - embedded_x 278 279 grad = backprop.implicit_grad(f)()[0][0] 280 opt = training.GradientDescentOptimizer(lrn_rate) 281 282 with ops.Graph().as_default(), self.cached_session(): 283 tf_x = array_ops.ones((batch_size), dtypes.int64) 284 # TODO(ashankar,apassos): Change to ResourceVariable. 285 tf_embedding = variables.Variable( 286 random_init.numpy(), name='tf_embedding') 287 tf_embedded_x = embedding_ops.embedding_lookup(tf_embedding, tf_x) 288 tf_y = 1.0 - tf_embedded_x 289 tf_grad = gradients.gradients(tf_y, [tf_embedding])[0] 290 tf_opt = training.GradientDescentOptimizer(0.1) 291 tf_embedding.initializer.run() 292 293 self.assertAllClose(tf_grad.indices, grad.indices) 294 self.assertAllClose(tf_grad.values, grad.values) 295 296 tf_opt.apply_gradients([(tf_grad, tf_embedding)]).run() 297 expected = self.evaluate(tf_embedding) 298 opt.apply_gradients([(grad, embedding)]) 299 self.assertAllClose(expected, embedding.read_value()) 300 301 def testImplicitGradOrdering(self): 302 v0 = resource_variable_ops.ResourceVariable(1.0) 303 v1 = resource_variable_ops.ResourceVariable(2.0) 304 305 def f(): 306 x = v1 * v1 307 y = v0 * v0 308 return x + y 309 310 grads = backprop.implicit_grad(f)() 311 ordered_variables = [x[1] for x in grads] 312 self.assertIs(ordered_variables[0], v0) 313 self.assertIs(ordered_variables[1], v1) 314 315 def testTapeNoOpGradient(self): 316 x = constant_op.constant(3.0) 317 with backprop.GradientTape() as t: 318 t.watch(x) 319 y = x 320 self.assertEqual(t.gradient(y, x).numpy(), 1.0) 321 322 def testTapeIdentityGradientIsIdentity(self): 323 x = constant_op.constant(3.0) 324 with backprop.GradientTape() as t: 325 t.watch(x) 326 y = array_ops.identity(x) 327 self.assertEqual(t.gradient(y, x).numpy(), 1.0) 328 329 def testFunctionIndexedSlicesGradient(self): 330 331 @def_function.function 332 def f(x): 333 return x + 1 334 335 with backprop.GradientTape() as t: 336 x = constant_op.constant([1.0]) 337 t.watch(x) 338 y = f(x) 339 y = array_ops.gather(y, [0]) 340 self.assertAllEqual(t.gradient(y, x), [1.0]) 341 342 def testTapeGradientMultiTargetOneIsSource(self): 343 x = constant_op.constant(2.0) 344 with backprop.GradientTape() as t: 345 t.watch(x) 346 y = x * x 347 self.assertEqual(t.gradient([x, y], x).numpy(), 5.0) 348 349 def testTapeNoOpGradientWithMultiTargetAllSource(self): 350 x = constant_op.constant(3.0) 351 with backprop.GradientTape() as t: 352 t.watch(x) 353 y = x 354 self.assertEqual(t.gradient([y, y], x).numpy(), 2.0) 355 356 def testTapeNoOpGradientWithMultiTargetMultiSource(self): 357 x = constant_op.constant(3.0) 358 y = constant_op.constant(5.0) 359 with backprop.GradientTape() as t: 360 t.watch(x) 361 t.watch(y) 362 z = y * y 363 self.assertAllEqual(t.gradient([x, y, z], [x, y]), [1.0, 11.0]) 364 365 def testTapeGradientStringTarget(self): 366 s = constant_op.constant('unknown', dtype=dtypes.string) 367 x = constant_op.constant(3.0) 368 369 with backprop.GradientTape() as t: 370 t.watch(x) 371 t.watch(s) 372 grads = t.gradient(s, x) 373 self.assertEqual(grads, None) 374 375 def testTapeNoOpGradientStringSourceAndTarget(self): 376 s = constant_op.constant('unknown', dtype=dtypes.string) 377 378 with backprop.GradientTape() as t: 379 t.watch(s) 380 grads = t.gradient(s, s) 381 self.assertEqual(grads, None) 382 383 def testTapeNoOpGradientWithMultiTargetMultiSourceIncludeString(self): 384 x = constant_op.constant(3.0) 385 y = constant_op.constant(5.0) 386 s = constant_op.constant('unknown', dtype=dtypes.string) 387 388 with backprop.GradientTape() as t: 389 t.watch(x) 390 t.watch(y) 391 t.watch(s) 392 z = y * y 393 grads = t.gradient([x, y, z, s], [x, y, s]) 394 self.assertAllEqual(grads[:2], [1.0, 11.0]) 395 self.assertEqual(grads[2], None) 396 397 def testTapeNoOpOnVariableIsIdentity(self): 398 v0 = resource_variable_ops.ResourceVariable(1.0) 399 with backprop.GradientTape() as t: 400 y = v0.read_value() 401 self.assertEqual(t.gradient(y, v0).numpy(), 1.0) 402 403 @test_util.assert_no_new_tensors 404 @test_util.assert_no_garbage_created 405 def testTapeNoOpGradient2By2(self): 406 a_2_by_2 = constant_op.constant(2.0, shape=[2, 2]) 407 with backprop.GradientTape(persistent=True) as tape: 408 tape.watch(a_2_by_2) 409 dy_dy = tape.gradient(a_2_by_2, [a_2_by_2])[0] 410 self.assertAllEqual(dy_dy.numpy(), 411 constant_op.constant(1.0, shape=[2, 2]).numpy()) 412 413 @test_util.assert_no_new_pyobjects_executing_eagerly 414 def testTapeNoOpGradientMultiTarget2By2(self): 415 a_2_by_2 = constant_op.constant(2.0, shape=[2, 2]) 416 with backprop.GradientTape(persistent=True) as tape: 417 tape.watch(a_2_by_2) 418 dy_dy = tape.gradient([a_2_by_2, a_2_by_2], [a_2_by_2])[0] 419 self.assertAllEqual(dy_dy.numpy(), 420 constant_op.constant(2.0, shape=[2, 2]).numpy()) 421 422 def testTapeStopRecording(self): 423 with backprop.GradientTape() as t: 424 x = resource_variable_ops.ResourceVariable(1.0) 425 with t.stop_recording(): 426 y = x * x 427 self.assertEqual(t.gradient(y, x), None) 428 429 def testTapeStopStartRecording(self): 430 with backprop.GradientTape(persistent=True) as t: 431 x = resource_variable_ops.ResourceVariable(1.0) 432 x2 = x * 2 # This should be differentiated through. 433 with t.stop_recording(): 434 y = x2 * x2 435 z = x2 * x2 436 self.assertEqual(t.gradient(y, x2), None) 437 438 # If the x*2 was not differentiated through, this would be 2.0, not 4.0 439 self.assertEqual(t.gradient(z, x2).numpy(), 4.0) 440 441 def testTapeReset(self): 442 with backprop.GradientTape() as t: 443 v = resource_variable_ops.ResourceVariable(1.0) 444 loss = v * v 445 t.reset() 446 loss += v * v 447 self.assertAllEqual(t.gradient(loss, v), 2.0) 448 449 def testPythonMax(self): 450 x = [ 451 resource_variable_ops.ResourceVariable(2.), 452 resource_variable_ops.ResourceVariable(3.), 453 resource_variable_ops.ResourceVariable(5.) 454 ] 455 with backprop.GradientTape() as t: 456 f = max(x) 457 grad = t.gradient(f, x) 458 self.assertAllEqual(self.evaluate(f), 5.) 459 self.assertAllEqual(self.evaluate(grad), [None, None, 1.0]) 460 461 def testAutomaticWatchedVariables(self): 462 with backprop.GradientTape() as t: 463 self.assertEqual(0, len(t.watched_variables())) 464 v = resource_variable_ops.ResourceVariable(1.0) 465 loss = v * v 466 self.assertAllEqual([v], t.watched_variables()) 467 468 t.reset() 469 self.assertEqual(0, len(t.watched_variables())) 470 loss += v * v 471 self.assertAllEqual([v], t.watched_variables()) 472 473 def testExplicitWatchedVariables(self): 474 with backprop.GradientTape() as t: 475 self.assertEqual(0, len(t.watched_variables())) 476 v = resource_variable_ops.ResourceVariable(1.0) 477 t.watch(v) 478 self.assertAllEqual([v], t.watched_variables()) 479 480 t.reset() 481 self.assertEqual(0, len(t.watched_variables())) 482 t.watch(v) 483 self.assertAllEqual([v], t.watched_variables()) 484 485 @test_util.assert_no_new_tensors 486 def testGradientNone(self): 487 488 def loss(x, l): 489 return math_ops.reduce_mean( 490 nn_ops.softmax_cross_entropy_with_logits(logits=x, labels=l), 491 constant_op.constant([0])) 492 493 logits = constant_op.constant([[0.0, 0.0]]) 494 labels = constant_op.constant([[1.0, 0.0]]) 495 # softmax_cross_entropy_with_logits returns two outputs and in this case the 496 # gradient wrt the second is None. 497 g, = backprop.gradients_function(loss, [0])(logits, labels) 498 self.assertAllEqual(g.numpy(), [[-0.5, 0.5]]) 499 500 @test_util.run_in_graph_and_eager_modes 501 def testGradientWithinTapeBlock(self): 502 v1 = resource_variable_ops.ResourceVariable(1.) 503 self.evaluate(v1.initializer) 504 with backprop.GradientTape() as t: 505 loss = 2 * v1 506 grad = t.gradient(loss, v1) 507 self.assertAllEqual(self.evaluate(grad), 2.0) 508 509 with backprop.GradientTape(persistent=True) as t: 510 loss = 2 * v1 511 grad = t.gradient(loss, v1) 512 self.assertAllEqual(self.evaluate(grad), 2.0) 513 514 @test_util.run_in_graph_and_eager_modes 515 def testNestedSelfContexts(self): 516 v1 = resource_variable_ops.ResourceVariable(1.) 517 self.evaluate(v1.initializer) 518 with backprop.GradientTape() as t: 519 with self.assertRaises(ValueError): 520 with t: 521 pass 522 523 @test_util.assert_no_new_tensors 524 def testSecondGrad(self): 525 526 def first(x): 527 l = constant_op.constant([[0.0]]) 528 x = nn_ops.softmax_cross_entropy_with_logits(labels=l, logits=x) 529 x = math_ops.reduce_sum(x, constant_op.constant([0])) 530 return x 531 532 def second(x): 533 grad = backprop.gradients_function(first, [0])(x)[0] 534 return math_ops.reduce_sum(grad, constant_op.constant([0])) 535 536 f = constant_op.constant([[0.1]]) 537 grad = backprop.gradients_function(second, [0])(f)[0] 538 self.assertAllEqual([[0.0]], grad) 539 540 @test_util.run_in_graph_and_eager_modes 541 def testWatchingIsTapeLocal(self): 542 x1 = resource_variable_ops.ResourceVariable(2.0, trainable=False) 543 x2 = resource_variable_ops.ResourceVariable(2.0, trainable=False) 544 545 with backprop.GradientTape() as tape1: 546 with backprop.GradientTape() as tape2: 547 tape1.watch(x1) 548 tape2.watch([x1, x2]) 549 y = x1**3 550 z = x2**2 551 dy, dz = tape2.gradient([y, z], [x1, x2]) 552 d2y, d2z = tape1.gradient([dy, dz], [x1, x2]) 553 554 self.evaluate([x1.initializer, x2.initializer]) 555 self.assertEqual(self.evaluate(d2y), 12.0) 556 self.assertIsNone(d2z) 557 558 @test_util.assert_no_new_tensors 559 def testMakeVJP(self): 560 561 def f(x): 562 return x * x 563 564 wrapped_fn = backprop.make_vjp(f, persistent=False) 565 result, vjp = wrapped_fn(constant_op.constant(3.0)) 566 self.assertAllEqual(result, 9.0) 567 self.assertAllEqual(vjp(2.0)[0], 12.0) 568 569 def testPersistentMakeVJP(self): 570 571 def f(x): 572 return x * x 573 574 wrapped_fn = backprop.make_vjp(f, persistent=True) 575 _, vjp = wrapped_fn(constant_op.constant(3.0)) 576 vjp_result1 = vjp(2.0)[0] 577 vjp_result2 = vjp(2.0)[0] 578 self.assertAllEqual(vjp_result1, vjp_result2, 12.0) 579 580 @test_util.assert_no_new_tensors 581 def testGradGrad(self): 582 583 def sq(x): 584 return x * x 585 586 def grad(x): 587 value = backprop.gradients_function(sq, [0])(x)[0] 588 return value 589 590 gradgrad = backprop.gradients_function(grad, [0]) 591 592 self.assertAllEqual(gradgrad(constant_op.constant(3.0))[0], 2.0) 593 594 @test_util.assert_no_new_tensors 595 def testGradGradExp(self): 596 597 def grad(x): 598 value = backprop.gradients_function(math_ops.exp, [0])(x)[0] 599 return value 600 601 gradgrad = backprop.gradients_function(grad, [0]) 602 603 self.assertAllEqual(gradgrad(constant_op.constant(0.0))[0], 1.0) 604 605 @test_util.assert_no_new_tensors 606 def testStopGradient(self): 607 grad = backprop.gradients_function( 608 lambda x: array_ops.stop_gradient(math_ops.argmax(x))) 609 self.assertAllEqual(grad([0.0])[0], None) 610 611 @test_util.assert_no_new_tensors 612 def testArgmax(self): 613 614 def argmax(x): 615 i = math_ops.argmax(x) 616 return array_ops.stop_gradient(i) 617 618 grad = backprop.gradients_function(argmax) 619 self.assertAllEqual(grad([0.0])[0], None) 620 621 @test_util.run_gpu_only 622 @test_util.assert_no_new_tensors 623 def testGPU(self): 624 625 def fn(x): 626 with context.device('/gpu:0'): 627 b = constant_op.constant(2.0) 628 c = math_ops.add(x.gpu(), b) 629 # TODO(apassos): remove cpu below by making TensorVSPace aware 630 # of devices. 631 return math_ops.add(c, constant_op.constant(3.0)).cpu() 632 633 grad = backprop.gradients_function(fn, [0])(constant_op.constant(1.0))[0] 634 self.assertAllEqual(grad, 1.0) 635 636 @test_util.run_gpu_only 637 @test_util.assert_no_new_tensors 638 def testGPUImplicitGrad(self): 639 with context.device('gpu:0'): 640 v = resource_variable_ops.ResourceVariable( 641 constant_op.constant(1.0), name='v') 642 643 def f(): 644 with context.device('gpu:0'): 645 return v.read_value() 646 647 self.assertEqual(backprop.implicit_grad(f)()[0][0].cpu().numpy(), 1.0) 648 649 @test_util.assert_no_new_tensors 650 def testCPU(self): 651 652 def fn(x): 653 b = constant_op.constant(2.0) 654 c = math_ops.add(x, b) 655 return math_ops.add(c, constant_op.constant(3.0)) 656 657 grad = backprop.gradients_function(fn, [0])(constant_op.constant(1.0))[0] 658 self.assertAllEqual(grad, 1.0) 659 660 @test_util.run_gpu_only 661 @test_util.assert_no_new_tensors 662 def testTensorCopyGPU2CPU2GPU(self): 663 664 def f(a, b): 665 return a.cpu() + b.cpu() 666 667 with context.device('/gpu:0'): 668 a = constant_op.constant(1.0) 669 b = constant_op.constant(2.0) 670 671 grad = backprop.gradients_function(f, [0])(a, b)[0] 672 self.assertAllEqual(grad, 1.0) 673 674 @test_util.assert_no_new_tensors 675 def testEmptyParams(self): 676 677 def fn(a, b): 678 return a * b 679 680 x = constant_op.constant(1.0) 681 y = constant_op.constant(2.0) 682 dx, dy = backprop.gradients_function(fn)(x, y) 683 self.assertAllEqual(dx, y.numpy()) 684 self.assertAllEqual(dy, x.numpy()) 685 686 @test_util.assert_no_new_tensors 687 def testUnconnectedNone(self): 688 v = resource_variable_ops.ResourceVariable(1.0, name='testUnconnectedNone') 689 690 def f(): 691 v.read_value() 692 return constant_op.constant(1.0) 693 694 self.assertEqual(backprop.implicit_grad(f)()[0][0], None) 695 696 @test_util.assert_no_new_tensors 697 def testGradientTapeReEnterContext(self): 698 g = backprop.GradientTape() 699 with g: 700 x = constant_op.constant(3.0) 701 g.watch(x) 702 y = 2 * x 703 with g: 704 z = 2 * y 705 grad = g.gradient(target=z, sources=[x]) 706 self.assertEqual(self.evaluate(grad), [4.0]) 707 708 @test_util.assert_no_new_tensors 709 @test_util.run_in_graph_and_eager_modes 710 def testGradientTapeRepeatedSource(self): 711 with backprop.GradientTape(persistent=False) as g: 712 x = constant_op.constant(3.0) 713 g.watch(x) 714 y = 2 * x 715 grad = g.gradient(target=y, sources=[x, x]) 716 self.assertEqual(self.evaluate(grad), [2.0, 2.0]) 717 718 @test_util.assert_no_new_tensors 719 @test_util.run_in_graph_and_eager_modes 720 def testPersistentGradientTapeRepeatedSource(self): 721 with backprop.GradientTape(persistent=True) as g: 722 x = constant_op.constant(3.0) 723 y = constant_op.constant(5.0) 724 g.watch(x) 725 g.watch(y) 726 z = x * x + x * y 727 grad = g.gradient(target=z, sources=[x, x]) 728 self.assertEqual(self.evaluate(grad), [11.0, 11.0]) 729 grad = g.gradient(target=z, sources=[y, x]) 730 self.assertEqual(self.evaluate(grad), [3.0, 11.0]) 731 732 @test_util.assert_no_new_tensors 733 @test_util.run_in_graph_and_eager_modes 734 def testGradientTapeStructure(self): 735 with backprop.GradientTape(persistent=True) as g: 736 # Using different constant values because constant tensors are 737 # cached, leading to a different gradient then what one might expect. 738 x1 = constant_op.constant(3.0) 739 x2 = constant_op.constant(3.1) 740 x3 = constant_op.constant(3.2) 741 g.watch(x1) 742 g.watch(x2) 743 g.watch(x3) 744 y = x1 + 2 * x2 + 3 * x3 745 self.assertEqual(self.evaluate(g.gradient(y, x1)), [1.0]) 746 self.assertEqual(self.evaluate(g.gradient(y, (x1,))), (1.0,)) 747 self.assertEqual(self.evaluate(g.gradient(y, (x1, x2))), (1.0, 2.0)) 748 self.assertEqual( 749 self.evaluate(g.gradient(y, [(x1, x2), (x2, x3)])), [(1.0, 2.0), 750 (2.0, 3.0)]) 751 self.assertEqual( 752 self.evaluate(g.gradient(y, (x1, x2, [x1, x3]))), 753 (1.0, 2.0, [1.0, 3.0])) 754 self.assertEqual( 755 self.evaluate(g.gradient(y, [x1, { 756 'x2': x2, 757 'x3': x3 758 }])), [1.0, { 759 'x2': 2.0, 760 'x3': 3.0 761 }]) 762 763 @test_util.assert_no_new_tensors 764 @test_util.run_in_graph_and_eager_modes 765 def testGradientTape(self): 766 with backprop.GradientTape() as g: 767 x = constant_op.constant(3.0) 768 g.watch(x) 769 y = x * x 770 with backprop.GradientTape() as gg: 771 gg.watch(y) 772 z = 2 * y 773 inner_grad = gg.gradient(z, [y])[0] 774 self.assertEqual(self.evaluate(inner_grad), 2.0) 775 y += inner_grad 776 grad = g.gradient(y, [x])[0] 777 self.assertEqual(self.evaluate(grad), 6.0) 778 779 @test_util.assert_no_new_tensors 780 @test_util.run_in_graph_and_eager_modes 781 def testGadientTapeCalledOnConstantTarget(self): 782 with backprop.GradientTape() as g: 783 x = variables.Variable([3.0]) 784 y = variables.Variable([2.0]) 785 grad = g.gradient(x, y) 786 self.assertAllEqual(grad, None) 787 788 @test_util.run_in_graph_and_eager_modes 789 @test_util.run_v1_only('b/120545219') 790 def testGradientTapeWithCond(self): 791 x = constant_op.constant(3.0) 792 793 def true_fn(): 794 return x 795 796 def false_fn(): 797 return x * x 798 799 with backprop.GradientTape() as g: 800 g.watch(x) 801 y = control_flow_ops.cond(x < x, true_fn, false_fn) 802 803 if not context.executing_eagerly(): 804 with self.assertRaisesRegex(NotImplementedError, 'tf.gradients'): 805 dy = g.gradient(y, [x])[0] 806 else: 807 dy = g.gradient(y, [x])[0] 808 self.assertEqual(self.evaluate(dy), 6.0) 809 810 @test_util.run_in_graph_and_eager_modes 811 @test_util.run_v1_only('b/120545219') 812 def testGradientTapeWithWhileLoop(self): 813 i = constant_op.constant(1) 814 x = constant_op.constant(2.) 815 816 def cond(i, _): 817 return i < 3 818 819 def body(i, x): 820 return i + 1, x * 2 821 822 with backprop.GradientTape() as g: 823 g.watch([x]) 824 _, y = control_flow_ops.while_loop(cond, body, [i, x]) 825 826 if not context.executing_eagerly(): 827 with self.assertRaisesRegex(NotImplementedError, 'tf.gradients'): 828 dy = g.gradient(y, [x])[0] 829 else: 830 dy = g.gradient(y, [x])[0] 831 self.assertEqual(self.evaluate(dy), 4.0) 832 833 @test_util.assert_no_new_tensors 834 def testGradientTapeGradientCalledMultipleTimes(self): 835 with backprop.GradientTape() as g: 836 x = constant_op.constant(3.0) 837 g.watch(x) 838 y = x * x 839 z = y * y 840 g.gradient(z, [x]) 841 with self.assertRaisesRegex( 842 RuntimeError, 'A non-persistent GradientTape can only'): 843 g.gradient(y, [x]) 844 845 @test_util.assert_no_new_tensors 846 def testGradientTapeJacobianCalledMultipleTimes(self): 847 with backprop.GradientTape() as g: 848 x = constant_op.constant(3.0) 849 g.watch(x) 850 y = x * x 851 z = y * y 852 g.jacobian(z, [x]) 853 with self.assertRaisesRegex( 854 RuntimeError, 'A non-persistent GradientTape can only'): 855 g.jacobian(y, [x]) 856 857 @test_util.assert_no_new_tensors 858 def testJacobianInsideGradientTapeScope(self): 859 with backprop.GradientTape() as g: 860 x = constant_op.constant(3.0) 861 g.watch(x) 862 y = x * x 863 z = y * y 864 self.assertAllClose(4. * 3. ** 3., g.jacobian(z, x)) 865 866 @test_util.assert_no_new_tensors 867 def testBatchJacobianInsideGradientTapeScope(self): 868 with backprop.GradientTape(persistent=True) as g: 869 x = constant_op.constant([[3.0]]) 870 g.watch(x) 871 y = x * x 872 z = y * y 873 self.assertAllClose([[[4. * 3. ** 3.]]], g.batch_jacobian(z, x)) 874 875 @test_util.assert_no_new_tensors 876 def testGradientTapeBatchJacobianCalledMultipleTimes(self): 877 with backprop.GradientTape() as g: 878 x = constant_op.constant([[3.0]]) 879 g.watch(x) 880 y = x * x 881 z = y * y 882 g.batch_jacobian(z, x) 883 with self.assertRaisesRegex( 884 RuntimeError, 'A non-persistent GradientTape can only'): 885 g.batch_jacobian(y, [x]) 886 887 @test_util.assert_no_new_tensors 888 @test_util.run_in_graph_and_eager_modes 889 @test_util.run_v1_only('b/120545219') 890 def testPersistentTape(self): 891 with backprop.GradientTape(persistent=True) as g: 892 x = constant_op.constant(3.0) 893 g.watch(x) 894 y = x * x 895 z = y * y 896 dz_dx = g.gradient(z, [x])[0] 897 self.assertEqual(self.evaluate(dz_dx), 4 * 3 * 3 * 3) 898 dy_dx = g.gradient(y, [x])[0] 899 self.assertEqual(self.evaluate(dy_dx), 2 * 3) 900 del g 901 902 @test_util.assert_no_new_tensors 903 @test_util.run_in_graph_and_eager_modes 904 def testHigherOrderGradient(self): 905 with backprop.GradientTape(persistent=True) as g: 906 x = constant_op.constant(3.0) 907 g.watch(x) 908 y = x**3 # y := x^3 909 dy_dx = g.gradient(y, x) # dy/dx := 3x^2 910 d2y_dx2 = g.gradient(dy_dx, x) # d2y/dx2 := 6x 911 d3y_dx3 = g.gradient(d2y_dx2, x) # d3y/dx3 := 6 912 x = 3 913 self.assertEqual(self.evaluate(y), x**3) 914 self.assertEqual(self.evaluate(dy_dx), 3 * x**2) 915 self.assertEqual(self.evaluate(d2y_dx2), 6 * x) 916 self.assertEqual(self.evaluate(d3y_dx3), 6) 917 del g 918 919 @test_util.assert_no_new_tensors 920 @test_util.run_in_graph_and_eager_modes 921 def testPersistentNestedTape(self): 922 with backprop.GradientTape(persistent=True) as g: 923 x = constant_op.constant(3.0) 924 g.watch(x) 925 y = x * x 926 with backprop.GradientTape(persistent=True) as gg: 927 gg.watch(y) 928 z = 2 * y 929 for _ in range(2): 930 inner_grad = gg.gradient(z, [y])[0] 931 self.assertEqual(self.evaluate(inner_grad), 2.0) 932 y += inner_grad 933 del gg 934 grad = g.gradient(y, [x])[0] 935 self.assertEqual(self.evaluate(grad), 6.0) 936 grad = g.gradient(z, [x])[0] 937 self.assertEqual(self.evaluate(grad), 12.0) 938 del g 939 940 @test_util.assert_no_new_tensors 941 @test_util.run_in_graph_and_eager_modes 942 def testGradientTapeVariable(self): 943 v = resource_variable_ops.ResourceVariable(1.0, name='v') 944 self.evaluate(v.initializer) 945 with backprop.GradientTape() as g: 946 y = v * v 947 grad = g.gradient(y, [v])[0] 948 self.assertAllEqual(self.evaluate(grad), 2.0) 949 950 @test_util.assert_no_new_tensors 951 @test_util.run_in_graph_and_eager_modes 952 def testNestedGradients(self): 953 x = constant_op.constant(3.0) 954 with backprop.GradientTape() as g: 955 g.watch(x) 956 y = x * x 957 z = y * y 958 dz_dx, dz_dy = g.gradient(z, [x, y]) 959 self.assertEqual(self.evaluate(dz_dx), 108.0) 960 self.assertEqual(self.evaluate(dz_dy), 18.0) 961 962 @test_util.assert_no_new_tensors 963 @test_util.run_in_graph_and_eager_modes 964 def testUnconnectedGradientsDefault(self): 965 x = constant_op.constant(1.0) 966 y = constant_op.constant(3.0) 967 with backprop.GradientTape() as g: 968 g.watch([x, y]) 969 z = y * 2 970 dz_dx = g.gradient(z, x) 971 self.assertEqual(dz_dx, None) 972 973 @test_util.assert_no_new_tensors 974 @test_util.run_in_graph_and_eager_modes 975 def testUnconnectedGradientsZeros(self): 976 x = constant_op.constant(1.0, shape=[2, 2]) 977 y = constant_op.constant(3.0) 978 with backprop.GradientTape() as g: 979 g.watch([x, y]) 980 z = y * 2 981 dz_dx = g.gradient(z, x, unconnected_gradients='zero') 982 self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], self.evaluate(dz_dx)) 983 984 @test_util.assert_no_new_tensors 985 @test_util.run_in_graph_and_eager_modes 986 def testUnconnectedGradientsVariablesZeros(self): 987 x = resource_variable_ops.ResourceVariable( 988 constant_op.constant(1., shape=[2, 2])) 989 self.evaluate(x.initializer) 990 y = resource_variable_ops.ResourceVariable(constant_op.constant(3.)) 991 self.evaluate(y.initializer) 992 with backprop.GradientTape() as g: 993 g.watch([x, y]) 994 z = y * 2 995 dz_dx = g.gradient(z, x, unconnected_gradients='zero') 996 self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], self.evaluate(dz_dx)) 997 998 @test_util.run_in_graph_and_eager_modes 999 def testUnknownUnconnectedGradientsValueGiven(self): 1000 x = constant_op.constant(1.0) 1001 y = constant_op.constant(1.0) 1002 with backprop.GradientTape() as g: 1003 g.watch([x, y]) 1004 z = y * 2 1005 with self.assertRaisesRegex( 1006 ValueError, "Unknown value for unconnected_gradients: 'nonsense'"): 1007 g.gradient(z, x, unconnected_gradients='nonsense') 1008 1009 @test_util.run_in_graph_and_eager_modes 1010 def testUnconnectedGradientsNestedDefunZeros(self): 1011 1012 @function.defun 1013 def f(x): 1014 return x * x 1015 1016 @function.defun 1017 def h(y): 1018 z = f(y) 1019 return array_ops.stop_gradient(z) 1020 1021 x = constant_op.constant(1.0) 1022 with backprop.GradientTape() as g: 1023 g.watch(x) 1024 k = x + 2. 1025 y = h(k) 1026 1027 dy_dx = g.gradient(y, x, unconnected_gradients='zero') 1028 self.assertEqual(0.0, self.evaluate(dy_dx)) 1029 1030 def testInvalidRecordOperationMessage(self): 1031 y = constant_op.constant(2.) 1032 x = constant_op.constant(1.) 1033 with backprop.GradientTape() as g: 1034 g.watch(x) 1035 tape_lib.record_operation('InvalidBackprop', [y], [x], lambda dy: []) 1036 with self.assertRaisesRegex(errors_impl.InternalError, 1037 'InvalidBackprop.*too few gradients'): 1038 g.gradient(y, x) 1039 1040 @test_util.assert_no_new_tensors 1041 def testEmptyParamsForValueAndGradFunction(self): 1042 1043 def fn(a, b): 1044 return a * b 1045 1046 val_and_grads_fn = backprop.val_and_grad_function(fn) 1047 1048 x = 2.0 1049 y = 3.0 1050 val, (dx, dy) = val_and_grads_fn(x, y) 1051 self.assertAllClose(val, x * y) 1052 self.assertAllEqual(dx, y) 1053 self.assertAllEqual(dy, x) 1054 1055 @test_util.assert_no_new_tensors 1056 def testNonEmptyParamsForValueAndGradFunction(self): 1057 1058 def fn(a, b): 1059 return a * b 1060 1061 val_and_grad_fn = backprop.val_and_grad_function(fn, params=[1]) 1062 1063 x = 2.0 1064 y = 3.0 1065 val, grads = val_and_grad_fn(x, y) 1066 self.assertAllClose(val, x * y) 1067 self.assertEqual(1, len(grads)) 1068 self.assertAllEqual(grads[0], x) 1069 1070 @test_util.run_gpu_only 1071 @test_util.assert_no_new_tensors 1072 def testTensorCopyCPU2GPU2CPU(self): 1073 # forward: a (cpu->gpu) -> add (gpu) -> c (gpu->cpu) -> add (cpu) -> e (cpu) 1074 # back: e (cpu) -> add (cpu) -> c (cpu->gpu) -> add (gpu) -> grad (gpu->cpu) 1075 def f(a, b): 1076 with context.device('/gpu:0'): 1077 c = math_ops.add(a.gpu(0), b.gpu(0)) 1078 return math_ops.add(c.cpu(), constant_op.constant(3.0)) 1079 1080 with context.device('/cpu:0'): 1081 a = constant_op.constant(1.0) 1082 b = constant_op.constant(2.0) 1083 1084 grad = backprop.gradients_function(f, [0])(a, b)[0] 1085 self.assertAllEqual(grad, 1.0) 1086 1087 def testGetAttrType(self): 1088 typ = backprop.op_attr_type('Add', 'T') 1089 self.assertEqual(typ, int(pywrap_tfe.TF_ATTR_TYPE)) 1090 1091 def testGetAttrList(self): 1092 typ = backprop.op_attr_type('MaxPool', 'ksize') 1093 self.assertEqual(typ, [int(pywrap_tfe.TF_ATTR_INT)]) 1094 1095 def testMakeAttrType(self): 1096 self.assertEqual(dtypes.float32, 1097 backprop.make_attr(int(pywrap_tfe.TF_ATTR_TYPE), 1)) 1098 1099 def testMakeAttrTypeList(self): 1100 self.assertEqual([dtypes.float32], 1101 backprop.make_attr([int(pywrap_tfe.TF_ATTR_TYPE)], [1])) 1102 1103 def testMulType(self): 1104 1105 def mul(x): 1106 return math_ops._mul_dispatch(x, x) # pylint: disable=protected-access 1107 1108 self.assertAllEqual(backprop.gradients_function(mul)(3.0)[0].numpy(), 6.0) 1109 1110 def testMakeAttrShape(self): 1111 for s in ([], None, [1, 2, 3], [None, None], [1, None, 3]): 1112 expected = tensor_shape.TensorShape(s).as_proto() 1113 actual = backprop.make_attr(int(pywrap_tfe.TF_ATTR_SHAPE), s) 1114 self.assertEqual( 1115 expected, 1116 actual, 1117 msg=('For shape %r, expected %r != %r actual' % 1118 (s, expected, actual))) 1119 1120 def testMakeAttrShapeList(self): 1121 shape_list = [[], None, [1, 2, 3], [None, None], [1, None, 3]] 1122 self.assertEqual( 1123 [tensor_shape.TensorShape(s).as_proto() for s in shape_list], 1124 backprop.make_attr([int(pywrap_tfe.TF_ATTR_SHAPE)], shape_list)) 1125 1126 def testArgsGradientFunction(self): 1127 1128 def f(*args): 1129 return args[0] * args[0] 1130 1131 grad = backprop.gradients_function(f) 1132 self.assertAllEqual(grad(1.0)[0], 2.0) 1133 1134 def testPartial(self): 1135 1136 def f(x, y): 1137 return x * y 1138 1139 part = functools.partial(f, constant_op.constant(2.0)) 1140 self.assertAllEqual( 1141 backprop.gradients_function(part)(constant_op.constant(1.0))[0], 2.0) 1142 1143 def testReturnSameThing(self): 1144 1145 def f(x): 1146 return x, 2 * x 1147 1148 self.assertAllEqual(backprop.gradients_function(f)(1.0)[0], 3.0) 1149 1150 @test_util.assert_no_new_tensors 1151 def testExceptionSafety(self): 1152 1153 def f(unused_x): 1154 raise ValueError() 1155 1156 try: 1157 backprop.gradients_function(f)(1.0) 1158 except ValueError: 1159 pass 1160 1161 def real_f(x): 1162 return x * x 1163 1164 self.assertAllEqual(backprop.gradients_function(real_f)(1.0)[0], 2.0) 1165 1166 @test_util.assert_no_new_tensors 1167 def testMultiValueConvertToTensor(self): 1168 x = resource_variable_ops.ResourceVariable( 1169 initial_value=array_ops.constant([1.0]), name='x') 1170 1171 def fn(): 1172 a = math_ops.add(x.value(), 1.0) 1173 # Make sure convert_to_tensor works correctly with list of TensorNodes. 1174 b = array_ops.stack([a, a], axis=0) 1175 return math_ops.reduce_mean(b) 1176 1177 grad = backprop.implicit_grad(fn)()[0][0] 1178 self.assertAllEqual([1.0], grad) 1179 1180 def testOutput(self): 1181 1182 def multiout(x): 1183 return x + 2, x * x 1184 1185 x = constant_op.constant([0.0, 1.0, 2.0]) 1186 1187 grad = backprop.gradients_function(multiout)(x)[0] 1188 self.assertAllEqual([1.0, 3.0, 5.0], grad) 1189 1190 def testMultiValuePreservesIfNotDiffedAgainst(self): 1191 1192 def tfe_conv2d(timage, tkernel, conv2dstrides): 1193 return nn_ops.conv2d(timage, tkernel, conv2dstrides, 'SAME') 1194 1195 i = constant_op.constant([[[[1.0]]]]) 1196 k = constant_op.constant([[[[2.0]]]]) 1197 s = [1, 1, 1, 1] 1198 1199 grad = backprop.gradients_function(tfe_conv2d, params=(0,))(i, k, s)[0] 1200 self.assertAllEqual([[[[2.0]]]], grad) 1201 1202 def testSameObjectForMultipleArguments(self): 1203 1204 def f(x, y): 1205 return math_ops.multiply(x, y) 1206 1207 g = backprop.gradients_function(f) 1208 1209 def np_g(x, y): 1210 dx, dy = g(x, y) 1211 return [dx.numpy(), dy.numpy()] 1212 1213 x = constant_op.constant(1.) 1214 self.assertAllEqual([1., 1.], np_g(x, x)) 1215 x = 1. 1216 self.assertAllEqual([1., 1.], np_g(x, x)) 1217 x = constant_op.constant([[1.]]) 1218 self.assertAllEqual([[[1.]], [[1.]]], np_g(x, x)) 1219 x = [[1.]] 1220 self.assertAllEqual([[[1.]], [[1.]]], np_g(x, x)) 1221 1222 v = resource_variable_ops.ResourceVariable( 1223 initial_value=1., name='testSameObjectForMultipleArguments.Variable') 1224 self.assertAllEqual([1., 1.], np_g(v, v)) 1225 1226 @test_util.assert_no_new_tensors 1227 def testImplicitGradientsCustomGradientAndCachedVariableValue(self): 1228 1229 @custom_gradient.custom_gradient 1230 def my_square(x): 1231 result = math_ops.square(x) 1232 1233 def grad(dr): 1234 return 2 * dr * x + 1 1235 1236 return result, grad 1237 1238 x = resource_variable_ops.ResourceVariable( 1239 initial_value=3., name='X.' + self.id()) 1240 1241 def f(): 1242 return my_square(x) 1243 1244 g = backprop.implicit_grad(f) 1245 1246 grads_and_vars = g() 1247 self.assertEqual(1, len(grads_and_vars)) 1248 grad, var = grads_and_vars[0] 1249 self.assertAllEqual(7, grad) 1250 self.assertAllEqual(x, var) 1251 1252 def testJacobianCustomGradient(self): 1253 1254 class MyCallable(object): 1255 1256 def __init__(self): 1257 self.a = variables.Variable(1.) 1258 self.b = variables.Variable(2.) 1259 self.c = variables.Variable(3.) 1260 1261 def __call__(self, x): 1262 return self.a * x * x + self.b * x + self.c 1263 1264 @def_function.function 1265 def call(c, x): 1266 1267 @custom_gradient.custom_gradient 1268 def _call(): 1269 y = c(x) 1270 1271 def grad(dy, variables=None): # pylint: disable=redefined-outer-name 1272 with backprop.GradientTape(persistent=True) as g: 1273 g.watch(variables) 1274 y = c(x) 1275 grad_vars = [ 1276 2 * math_ops.reduce_sum(dy * g.jacobian(y, v)) for v in variables 1277 ] 1278 del g 1279 return (), grad_vars 1280 1281 return y, grad 1282 1283 return _call() 1284 1285 c = MyCallable() 1286 x = constant_op.constant([1., 2., 3.]) 1287 with backprop.GradientTape(persistent=True) as g: 1288 g.watch([c.a, c.b, c.c]) 1289 y = call(c, x) 1290 self.assertAllEqual(g.gradient(y, x), None) 1291 1292 @test_util.assert_no_new_tensors 1293 def testCustomGradient(self): 1294 1295 @custom_gradient.custom_gradient 1296 def my_mul(x, y): 1297 result = x * y 1298 1299 def grad(dr): 1300 return [dr * y, dr * x] 1301 1302 return result, grad 1303 1304 lr = 0.25 1305 x = resource_variable_ops.ResourceVariable(2., name='x') 1306 1307 def loss(x): 1308 return my_mul(2., x.read_value()) 1309 1310 loss_grads_fn = backprop.implicit_val_and_grad(loss) 1311 1312 losses = [] 1313 for _ in range(5): 1314 loss, grads_and_vars = loss_grads_fn(x) 1315 losses.append(loss.numpy()) 1316 for (grad, var) in grads_and_vars: 1317 var.assign_sub(lr * grad) 1318 self.assertAllEqual(losses, [4.0, 3., 2., 1., 0.]) 1319 1320 @test_util.assert_no_new_tensors 1321 def testCustomGradientIdentity(self): 1322 1323 @custom_gradient.custom_gradient 1324 def my_identity(x): 1325 1326 def grad(dresult): 1327 return [2 * dresult] 1328 1329 return x, grad 1330 1331 self.assertAllEqual(backprop.gradients_function(my_identity)(1.0)[0], 2.0) 1332 1333 def testDifferentiatingFunctionThatReturnsNone(self): 1334 1335 def fn(x, y): 1336 result = x * y # pylint: disable=unused-variable 1337 1338 x = constant_op.constant(1) 1339 y = constant_op.constant(2) 1340 1341 loss_grads_fn = backprop.implicit_val_and_grad(fn) 1342 with self.assertRaisesRegex( 1343 ValueError, 'Cannot differentiate a function that returns None; ' 1344 'did you forget to return a value from fn?'): 1345 loss_grads_fn(x, y) 1346 1347 val_and_grads_fn = backprop.val_and_grad_function(fn) 1348 with self.assertRaisesRegex( 1349 ValueError, 'Cannot differentiate a function that returns None; ' 1350 'did you forget to return a value from fn?'): 1351 val_and_grads_fn(x, y) 1352 1353 def testZerosCacheDoesntLeakAcrossGraphs(self): 1354 with ops.Graph().as_default(): 1355 1356 def get_grad(): 1357 with ops.Graph().as_default(), self.cached_session(): 1358 t = constant_op.constant(1, dtype=dtypes.float32, shape=(10, 4)) 1359 x = constant_op.constant(2, dtype=dtypes.float32, shape=(10, 4)) 1360 with backprop.GradientTape() as tape: 1361 tape.watch(x) 1362 x1, _ = array_ops.split(x, num_or_size_splits=2, axis=1) 1363 y1 = x1**2 1364 y = array_ops.concat([y1, t], axis=1) 1365 return self.evaluate(tape.gradient(y, x)) 1366 1367 grad1 = get_grad() 1368 grad2 = get_grad() 1369 1370 self.assertAllEqual(grad1, grad2) 1371 1372 @test_util.run_in_graph_and_eager_modes 1373 def testSelectivelyWatchVariables(self): 1374 x1 = resource_variable_ops.ResourceVariable(1.0) 1375 x2 = resource_variable_ops.ResourceVariable(1.0) 1376 with backprop.GradientTape(watch_accessed_variables=False) as tape: 1377 tape.watch(x2) 1378 y = x1**2 1379 z = x2**3 1380 self.assertTupleEqual(tape.watched_variables(), (x2,)) 1381 dy, dz = tape.gradient([y, z], [x1, x2]) 1382 self.evaluate([x1.initializer, x2.initializer]) 1383 self.assertIsNone(dy) 1384 self.assertEqual(self.evaluate(dz), 3.0) 1385 1386 @test_util.run_in_graph_and_eager_modes 1387 def testDifferentiatingScalarCache(self): 1388 # In the following test, if x2 = x1 (i.e the objects are the exact same), 1389 # then y is essentially, 2*x1, and dy/dx1 = 2. 1390 # When we had a pure scalar cache in eager, this would be the case. This 1391 # test prevents us from going back to that case. 1392 with backprop.GradientTape(persistent=False) as g: 1393 x1 = constant_op.constant(3.0) 1394 x2 = constant_op.constant(3.0) 1395 g.watch(x1) 1396 g.watch(x2) 1397 y = x1 + x2 1398 grad = g.gradient(target=y, sources=[x1]) 1399 self.assertEqual(self.evaluate(grad), [1.0]) 1400 1401 def testVariablesAndConstantsProduceTheSameGradients(self): 1402 1403 # In the following test, differentiating [y, z] against [a, b] gives: 1404 # (dy/da + dz/da, dy/db + dz/db). 1405 # If a and b are the same constant, dz/da will not be 0 (which it should 1406 # be). 1407 # This is solved by using variable since doing a read_value on a tensor will 1408 # produce a new tensor and corresponding TensorHandle, and not reuse the 1409 # same tensor (which would happen if we are using a cache and reusing 1410 # EagerTensor objects). 1411 def get_grads(a, b): 1412 with backprop.GradientTape() as tape: 1413 tape.watch([a, b]) 1414 y = a**3 1415 z = b**2 1416 return tape.gradient([y, z], [a, b]) 1417 1418 gradients_constants = get_grads( 1419 constant_op.constant(2.0), constant_op.constant(2.0)) 1420 gradients_variables = get_grads( 1421 resource_variable_ops.ResourceVariable(2.0), 1422 resource_variable_ops.ResourceVariable(2.0)) 1423 self.assertAllEqual(gradients_constants, gradients_variables) 1424 1425 def testUnknownShapes(self): 1426 with ops.Graph().as_default(): 1427 with backprop.GradientTape() as tape: 1428 a = array_ops.placeholder(dtype=dtypes.float32, shape=None) 1429 tape.watch(a) 1430 b = a**3 1431 1432 db_da = tape.gradient(b, a) 1433 1434 with self.cached_session() as sess: 1435 self.assertEqual((8.0, 12.0), sess.run((b, db_da), feed_dict={a: 2.0})) 1436 1437 @test_util.run_in_graph_and_eager_modes 1438 def testCustomGradientInEagerAndGraph(self): 1439 1440 @custom_gradient.custom_gradient 1441 def f(x): 1442 y = x * x 1443 1444 def grad(dy): 1445 return [4 * dy] 1446 1447 return y, grad 1448 1449 with backprop.GradientTape() as t: 1450 c = constant_op.constant(1.0) 1451 t.watch(c) 1452 g = f(c) 1453 self.assertAllEqual(self.evaluate(t.gradient(g, c)), 4.0) 1454 1455 def testOverrideSecondOrderWithCustomGradient(self): 1456 1457 @custom_gradient.custom_gradient 1458 def f(x): 1459 1460 def first_order_grad(dz): 1461 1462 @custom_gradient.custom_gradient 1463 def first_order_custom(unused_x): 1464 1465 def h(ddz): 1466 return -2.1 * ddz 1467 1468 return -1.1, h 1469 1470 return dz * first_order_custom(x) 1471 1472 return x + 10., first_order_grad 1473 1474 c = constant_op.constant(1.) 1475 with backprop.GradientTape() as outer: 1476 outer.watch(c) 1477 with backprop.GradientTape() as inner: 1478 inner.watch(c) 1479 d = f(c)**4. 1480 dd = inner.gradient(d, c) 1481 self.assertAllClose(4. * f(c)**3. * -1.1, dd) 1482 self.assertAllClose(3. * 4. * f(c)**2. * -1.1 * -1.1 + 4. * f(c)**3. * -2.1, 1483 outer.gradient(dd, c)) 1484 1485 @test_util.run_in_graph_and_eager_modes 1486 def testCustomGradientForwardprop(self): 1487 1488 @custom_gradient.custom_gradient 1489 def f(x): 1490 z = 2. * tensor_util.constant_value(x) 1491 1492 def g(dz): 1493 1494 @custom_gradient.custom_gradient 1495 def first_order(unused_x, unused_dz): 1496 1497 def second_order_and_transpose(unused_ddz): 1498 return 2.2, 3.1 1499 1500 return 2.1, second_order_and_transpose 1501 1502 return first_order(x, dz) 1503 1504 return z, g 1505 1506 with backprop.GradientTape(persistent=True) as t: 1507 with backprop.GradientTape() as tt: 1508 c = constant_op.constant(1.) 1509 t.watch(c) 1510 tt.watch(c) 1511 output_grad = array_ops.ones([]) 1512 t.watch(output_grad) 1513 output = f(c) 1514 self.assertAllClose(2., output) 1515 gc = tt.gradient(output, c, output_gradients=output_grad) 1516 self.assertAllClose(2.1, gc) 1517 ggc = t.gradient(gc, c) 1518 self.assertAllClose(2.2, ggc) 1519 # Note that executed eagerly this kind of transpose is not efficient. But 1520 # from a tf.function we could prune out the first-order gradient 1521 # computation. 1522 transpose = t.gradient(gc, output_grad) 1523 self.assertAllClose(3.1, transpose) 1524 1525 @test_util.run_in_graph_and_eager_modes 1526 def testMaxPooling3DGradient(self): 1527 1528 def forward(a): 1529 r = max_pooling3d(a, pool_size=pool_size, strides=strides, padding='SAME') 1530 return r 1531 1532 input_sizes = [1, 3, 2, 4, 1] 1533 pool_size = (2, 2, 1) 1534 strides = (1, 1, 1) 1535 1536 total_size = np.prod(input_sizes) 1537 x = np.arange(1, total_size + 1, dtype=np.float32) 1538 aa = constant_op.constant(x, shape=input_sizes, dtype=dtypes.float32) 1539 da = backprop.gradients_function(forward)(aa) 1540 1541 if not context.executing_eagerly(): 1542 tf_aa = constant_op.constant(x, shape=input_sizes, dtype=dtypes.float32) 1543 tf_max = max_pooling3d( 1544 tf_aa, pool_size=pool_size, strides=strides, padding='SAME') 1545 tf_da = gradients.gradients(tf_max, [tf_aa]) 1546 self.assertAllEqual(da[0], tf_da[0]) 1547 1548 @test_util.run_in_graph_and_eager_modes 1549 def testWatchBadThing(self): 1550 g = backprop.GradientTape() 1551 with self.assertRaisesRegex(ValueError, 'ndarray'): 1552 g.watch(np.array(1.)) 1553 1554 def testWatchComposite(self): 1555 """Test that tape.watch expands composites and watches component Tensors.""" 1556 with backprop.GradientTape() as t: 1557 values = constant_op.constant([1.0, 2.0], dtypes.float32) 1558 s = sparse_tensor.SparseTensor( 1559 indices=[[0, 0], [1, 2]], values=values, dense_shape=[3, 4]) 1560 t.watch(s) 1561 z = sparse_ops.sparse_reduce_sum_v2(s) 1562 result = t.gradient(z, values) 1563 self.assertAllEqual(result, [1.0, 1.0]) 1564 1565 def testWatchedVariablesAfterNonPersistentGradientCall(self): 1566 with backprop.GradientTape(persistent=False) as tape: 1567 x = resource_variable_ops.ResourceVariable(1.0) 1568 tape.watch(x) 1569 tape.gradient(x, x) 1570 self.assertEqual((x,), tape.watched_variables()) 1571 1572 def testWatchedVariablesOnlyHasVariablesFromLastTape(self): 1573 with backprop.GradientTape(persistent=False) as tape: 1574 x = resource_variable_ops.ResourceVariable(1.0) 1575 tape.watch(x) 1576 with backprop.GradientTape(persistent=False) as tape: 1577 z = resource_variable_ops.ResourceVariable(2.0) 1578 tape.watch(z) 1579 tape.gradient(z, z) 1580 self.assertEqual((z,), tape.watched_variables()) 1581 1582 def testWatchedVariablesRespectReset(self): 1583 with backprop.GradientTape(persistent=False) as tape: 1584 x = resource_variable_ops.ResourceVariable(1.0) 1585 tape.watch(x) 1586 self.assertEqual((x,), tape.watched_variables()) 1587 tape.reset() 1588 z = resource_variable_ops.ResourceVariable(2.0) 1589 tape.watch(z) 1590 self.assertEqual((z,), tape.watched_variables()) 1591 tape.gradient(z, z) 1592 self.assertEqual((z,), tape.watched_variables()) 1593 1594 def testNameScope(self): 1595 1596 def fn(x): 1597 with ops.name_scope('my_scope'): 1598 a = math_ops.cos(x) 1599 b = math_ops.cos(x) 1600 return math_ops.add(a, b) 1601 1602 @function.defun 1603 def grad_fn(x): 1604 return backprop.gradients_function(fn)(x) 1605 1606 grad_ops = grad_fn.get_concrete_function( 1607 constant_op.constant(1.0)).graph.get_operations() 1608 num_sin_ops_found = 0 1609 for op in grad_ops: 1610 if op.type == 'Sin': 1611 num_sin_ops_found += 1 1612 self.assertIn('gradient_tape/my_scope/', op.name) 1613 self.assertEqual(num_sin_ops_found, 2) 1614 1615 @test_util.assert_no_new_pyobjects_executing_eagerly 1616 def testRecomputeGradWithDifferentShape(self): 1617 1618 @custom_gradient.recompute_grad 1619 def outer(x): 1620 return [x[0] + 1, x[1] + 1] 1621 1622 x = [ 1623 variables.Variable([1.0, 2.0], name='a'), 1624 variables.Variable(1.0, name='b') 1625 ] 1626 with backprop.GradientTape(): 1627 y = outer(x) 1628 self.assertAllEqual(y[0], [2.0, 3.0]) 1629 self.assertAllEqual(y[1], 2.0) 1630 1631 @custom_gradient.recompute_grad 1632 def outer_dict(x): 1633 for key in x.keys(): 1634 x[key] = x[key] + 1 1635 return x 1636 1637 x = {x[0].ref(): x[0], x[1].ref(): x[1]} 1638 with backprop.GradientTape(): 1639 y = outer_dict(x) 1640 y = list(y.values()) 1641 self.assertAllEqual(y[0], [2.0, 3.0]) 1642 self.assertAllEqual(y[1], 2.0) 1643 1644 @test_util.assert_no_new_pyobjects_executing_eagerly 1645 def testRecomputeGradWithNestedFunctionAndWhileLoop(self): 1646 1647 @custom_gradient.recompute_grad 1648 @def_function.function 1649 def outer(x): 1650 1651 @def_function.function 1652 def middle(y): 1653 1654 @def_function.function 1655 def inner(z): 1656 return z + 1 1657 1658 i = constant_op.constant(0.0) 1659 c = lambda y, i: i < 10. 1660 b = lambda y, i: (inner(y), i + 1.0) 1661 y, i = control_flow_ops.while_loop(c, b, [y, i]) 1662 1663 return y 1664 1665 return middle(x) 1666 1667 with MemoryChecker() as memory_checker: 1668 for _ in range(5): 1669 x = variables.Variable(1.0, name='x') 1670 with backprop.GradientTape(): 1671 y = outer(x) 1672 self.assertAllEqual(y, 11.0) 1673 1674 memory_checker.report() 1675 memory_checker.assert_no_leak_if_all_possibly_except_one() 1676 1677 1678class JacobianTest(test.TestCase): 1679 1680 def _jacobian(self, experimental_use_pfor): 1681 persistent = context.executing_eagerly and not experimental_use_pfor 1682 with backprop.GradientTape(persistent=persistent) as g: 1683 x = constant_op.constant([1., 2.]) 1684 y = constant_op.constant([3., 4.]) 1685 g.watch(x) 1686 g.watch(y) 1687 z = x * x * y 1688 jacobian = g.jacobian( 1689 z, [x, y], experimental_use_pfor=experimental_use_pfor) 1690 answer = [array_ops.diag(2 * x * y), array_ops.diag(x * x)] 1691 return jacobian, answer 1692 1693 @test_util.run_v1_only('b/120545219') 1694 def testPfor(self): 1695 jacobian, answer = self._jacobian(experimental_use_pfor=True) 1696 for j, a in zip(jacobian, answer): 1697 self.assertAllEqual(a, j) 1698 1699 @test_util.run_v1_only('b/120545219') 1700 def testWhileLoop(self): 1701 jacobian, answer = self._jacobian(experimental_use_pfor=False) 1702 for j, a in zip(jacobian, answer): 1703 self.assertAllEqual(a, j) 1704 1705 @test_util.run_v1_only('b/120545219') 1706 def testPforDefun(self): 1707 1708 @function.defun 1709 def _f(): 1710 return self._jacobian(experimental_use_pfor=True) 1711 1712 jacobian, answer = _f() 1713 for j, a in zip(jacobian, answer): 1714 self.assertAllEqual(a, j) 1715 1716 @test_util.run_v1_only('b/120545219') 1717 def testWhileLoopDefun(self): 1718 1719 @function.defun 1720 def _f(): 1721 return self._jacobian(experimental_use_pfor=False) 1722 1723 jacobian, answer = _f() 1724 for j, a in zip(jacobian, answer): 1725 self.assertAllEqual(a, j) 1726 1727 @test_util.run_v1_only('b/120545219') 1728 def testPersistentTape(self): 1729 if not context.executing_eagerly(): 1730 return 1731 with backprop.GradientTape() as g: 1732 x = constant_op.constant([1.0, 2.0]) 1733 g.watch(x) 1734 y = x * x 1735 with self.assertRaisesRegex(RuntimeError, 'persistent'): 1736 g.jacobian(y, x, experimental_use_pfor=False) 1737 1738 @test_util.run_v1_only('b/120545219') 1739 def test_parallel_iterations(self): 1740 with backprop.GradientTape(persistent=True) as g: 1741 x = constant_op.constant([[1., 2], [3, 4]]) 1742 g.watch(x) 1743 y = math_ops.matmul(x, x) 1744 self.assertAllClose( 1745 g.jacobian(y, x, parallel_iterations=2), 1746 g.jacobian(y, x, parallel_iterations=3)) 1747 1748 @test_util.run_in_graph_and_eager_modes 1749 def test_nested_jacobian(self): 1750 if context.executing_eagerly(): 1751 # TODO(agarwal): b/128842926 1752 self.skipTest('Conversion of function calls not implemented yet.') 1753 x = array_ops.ones((10, 2)) 1754 with backprop.GradientTape(persistent=False) as g: 1755 g.watch(x) 1756 with backprop.GradientTape(persistent=False) as gg: 1757 gg.watch(x) 1758 y = math_ops.reduce_sum(math_ops.square(x)) 1759 dy_x = gg.jacobian(y, x) 1760 dy_xx = g.batch_jacobian(dy_x, x) 1761 dy_xx_answer = [[[2., 0], [0, 2.]]] * 10 1762 self.assertAllClose(dy_xx_answer, self.evaluate(dy_xx)) 1763 1764 def test_nested_batch_jacobian_foldl(self): 1765 def _grad(f): 1766 def _grad_function(primal): 1767 with backprop.GradientTape() as tape: 1768 tape.watch(primal) 1769 primal_out = f(primal) 1770 return tape.batch_jacobian(primal_out, primal) 1771 return _grad_function 1772 1773 def _func(x): 1774 return array_ops.reshape( 1775 functional_ops.foldl_v2(lambda a, b: math_ops.cos(a + b), 1776 array_ops.transpose(x)), 1777 [1, 1]) 1778 1779 f = _func 1780 x = constant_op.constant([[1., 2.]]) 1781 for _ in range(2): 1782 theoretical, numerical = gradient_checker_v2.compute_gradient(f, [x]) 1783 self.assertAllClose(theoretical, numerical, rtol=1e-3) 1784 f = _grad(f) 1785 expected_flat = array_ops.reshape(numerical, [-1]) 1786 self.assertAllClose(expected_flat, 1787 array_ops.reshape(f(x), [-1]), 1788 rtol=1e-3) 1789 self.assertAllClose(expected_flat, 1790 array_ops.reshape(def_function.function(f)(x), [-1]), 1791 rtol=1e-3) 1792 1793 def test_grad_jacobian_conv(self): 1794 def _inner(x): 1795 kernel = array_ops.ones([3, 3, 1, 9]) 1796 with backprop.GradientTape() as tape: 1797 tape.watch(x) 1798 y = nn_ops.conv2d(x, kernel, strides=(1, 1), padding='SAME', 1799 data_format='NHWC') 1800 reduced = math_ops.reduce_sum(y ** 2., axis=[2, 3]) 1801 return math_ops.reduce_sum(tape.batch_jacobian(reduced, x)) 1802 1803 theoretical, numerical = gradient_checker_v2.compute_gradient( 1804 def_function.function(_inner), [array_ops.ones([10, 4, 4, 1])]) 1805 self.assertAllClose(numerical, theoretical, rtol=1e-1) 1806 1807 @def_function.function 1808 def _outer(): 1809 with backprop.GradientTape() as tape: 1810 x = array_ops.ones([10, 4, 4, 1]) 1811 tape.watch(x) 1812 y = _inner(x) 1813 return tape.gradient(y, x) 1814 1815 self.assertAllClose(array_ops.reshape(numerical, [-1]), 1816 array_ops.reshape(_outer(), [-1]), rtol=1e-1) 1817 1818 @test_util.run_in_graph_and_eager_modes 1819 def test_indexed_slices(self): 1820 with backprop.GradientTape(persistent=True) as g: 1821 inp = random_ops.random_uniform([3, 2]) 1822 g.watch(inp) 1823 output = nn.embedding_lookup(inp, [0, 2]) 1824 self.assertAllClose( 1825 g.jacobian(output, inp, experimental_use_pfor=True), 1826 g.jacobian(output, inp, experimental_use_pfor=False)) 1827 1828 def test_foldl_partial_function(self): 1829 x = array_ops.zeros([3]) 1830 with backprop.GradientTape(persistent=True) as tape: 1831 tape.watch(x) 1832 result = def_function.function( 1833 functools.partial(functional_ops.foldl_v2, lambda a, b: a + b))( 1834 x) 1835 self.assertAllClose([1., 1., 1.], 1836 tape.jacobian(result, x, experimental_use_pfor=True)) 1837 self.assertAllClose([1., 1., 1.], 1838 tape.jacobian(result, x, experimental_use_pfor=False)) 1839 1840 # Non-persistent tapes take a different function gradient path, but also 1841 # work with pfor=True. 1842 x = array_ops.zeros([3]) 1843 with backprop.GradientTape() as tape: 1844 tape.watch(x) 1845 result = def_function.function( 1846 functools.partial(functional_ops.foldl_v2, lambda a, b: a + b))( 1847 x) 1848 self.assertAllClose([1., 1., 1.], 1849 tape.jacobian(result, x, experimental_use_pfor=True)) 1850 1851 def test_foldl_pure_function(self): 1852 1853 @def_function.function 1854 def compute_jacobian(use_pfor): 1855 x = array_ops.zeros([3]) 1856 with backprop.GradientTape(persistent=True) as tape: 1857 tape.watch(x) 1858 result = functools.partial(functional_ops.foldl_v2, lambda a, b: a + b)( 1859 x) 1860 return tape.jacobian(result, x, experimental_use_pfor=use_pfor) 1861 1862 self.assertAllClose(compute_jacobian(use_pfor=True), 1863 compute_jacobian(use_pfor=False)) 1864 1865 def test_cond_func_grad_jacobian(self): 1866 1867 @def_function.function 1868 def f(x): 1869 y = control_flow_ops.cond(x > 0., lambda: x**3., lambda: x**2.) 1870 return y 1871 1872 with backprop.GradientTape(persistent=True) as tape: 1873 x = constant_op.constant(1.) 1874 tape.watch(x) 1875 y = f(x) 1876 grad = tape.gradient(y, x) 1877 self.assertAllClose(3., grad) 1878 jacobian = tape.jacobian(grad, x, experimental_use_pfor=False) 1879 self.assertAllClose(6., jacobian) 1880 jacobian_pfor = tape.jacobian(grad, x, experimental_use_pfor=True) 1881 self.assertAllClose(6., jacobian_pfor) 1882 1883 1884@test_util.run_all_in_graph_and_eager_modes 1885class BatchJacobianTest(test.TestCase, parameterized.TestCase): 1886 1887 def _batch_jacobian(self, experimental_use_pfor): 1888 persistent = context.executing_eagerly and not experimental_use_pfor 1889 with backprop.GradientTape(persistent=persistent) as g: 1890 x = constant_op.constant([[1., 2.], [3., 4.]]) 1891 y = constant_op.constant([[3., 4.], [5., 6.]]) 1892 g.watch(x) 1893 z = x * x * y 1894 batch_jacobian = g.batch_jacobian( 1895 z, x, experimental_use_pfor=experimental_use_pfor) 1896 answer = array_ops.stack( 1897 [array_ops.diag(2 * x[0] * y[0]), 1898 array_ops.diag(2 * x[1] * y[1])]) 1899 return batch_jacobian, answer 1900 1901 def testPfor(self): 1902 batch_jacobian, answer = self._batch_jacobian(experimental_use_pfor=True) 1903 self.assertAllEqual(answer, batch_jacobian) 1904 1905 def testWhileLoop(self): 1906 batch_jacobian, answer = self._batch_jacobian(experimental_use_pfor=False) 1907 self.assertAllEqual(answer, batch_jacobian) 1908 1909 def testPforDefun(self): 1910 1911 @function.defun 1912 def _f(): 1913 return self._batch_jacobian(experimental_use_pfor=True) 1914 1915 batch_jacobian, answer = _f() 1916 self.assertAllEqual(answer, batch_jacobian) 1917 1918 def testWhileLoopDefun(self): 1919 1920 @function.defun 1921 def _f(): 1922 return self._batch_jacobian(experimental_use_pfor=False) 1923 1924 batch_jacobian, answer = _f() 1925 self.assertAllEqual(answer, batch_jacobian) 1926 1927 def testPersistentTape(self): 1928 if not context.executing_eagerly(): 1929 return 1930 with backprop.GradientTape() as g: 1931 x = constant_op.constant([[1.0, 2.0]]) 1932 g.watch(x) 1933 y = x * x 1934 with self.assertRaisesRegex(RuntimeError, 'persistent'): 1935 g.batch_jacobian(y, x, experimental_use_pfor=False) 1936 1937 def testBadShape(self): 1938 x = random_ops.random_uniform([2, 3]) 1939 with backprop.GradientTape() as g: 1940 y = array_ops.concat([x, x], axis=0) 1941 with self.assertRaisesRegex(ValueError, 'Need first dimension'): 1942 g.batch_jacobian(y, x) 1943 1944 def testBadInputRank(self): 1945 x = random_ops.random_uniform([2]) 1946 with backprop.GradientTape() as g: 1947 y = random_ops.random_uniform([2, 2]) 1948 with self.assertRaisesRegex(ValueError, 'must have rank at least 2'): 1949 g.batch_jacobian(y, x) 1950 1951 def testBadOutputRank(self): 1952 x = random_ops.random_uniform([2, 2]) 1953 with backprop.GradientTape() as g: 1954 y = random_ops.random_uniform([2]) 1955 with self.assertRaisesRegex(ValueError, 'must have rank at least 2'): 1956 g.batch_jacobian(y, x) 1957 1958 def test_parallel_iterations(self): 1959 with backprop.GradientTape(persistent=True) as g: 1960 x = constant_op.constant([[1., 2], [3, 4]]) 1961 g.watch(x) 1962 w = constant_op.constant([[1., 2, 3, 4], [5, 6, 7, 8]]) 1963 y = math_ops.matmul(x, w) 1964 self.assertAllClose( 1965 g.batch_jacobian(y, x, parallel_iterations=2), 1966 g.batch_jacobian(y, x, parallel_iterations=3)) 1967 1968 @parameterized.parameters((True, True), (True, False), (False, True), 1969 (False, False)) 1970 def test_degenerate_shape(self, use_function, use_pfor): 1971 1972 def f(x): 1973 with backprop.GradientTape(persistent=True) as tape: 1974 tape.watch(x) 1975 y = x**2 1976 return tape.batch_jacobian(y, x, experimental_use_pfor=use_pfor) 1977 1978 if use_function: 1979 f = def_function.function(f) 1980 self.assertAllEqual([1, 0, 0], array_ops.shape(f(array_ops.zeros([1, 0])))) 1981 1982 @parameterized.parameters((True,), (False)) 1983 def test_zeros_type_correct(self, use_pfor): 1984 for dtype in [dtypes.float32, dtypes.float64]: 1985 @def_function.function 1986 def f(x): 1987 del x 1988 return constant_op.constant([[1.]], dtype=dtype) # pylint: disable=cell-var-from-loop 1989 1990 with backprop.GradientTape(persistent=True) as tape: 1991 x = constant_op.constant([[2.]], dtype=dtype) 1992 tape.watch(x) 1993 y = f(x) 1994 jac = tape.batch_jacobian(y, x, experimental_use_pfor=use_pfor) 1995 self.assertEqual(dtype, jac.dtype) 1996 self.assertAllClose([[[0.]]], jac) 1997 1998 with backprop.GradientTape(persistent=True) as tape: 1999 x = constant_op.constant([[2.]], dtype=dtype) 2000 tape.watch(x) 2001 y = f(x) 2002 jac = tape.batch_jacobian(y, x, unconnected_gradients='zero', 2003 experimental_use_pfor=use_pfor) 2004 self.assertEqual(dtype, jac.dtype) 2005 self.assertAllClose([[[0.]]], jac) 2006 2007 2008class AggregateIndexedSlicesGradientsTest(test_util.TensorFlowTestCase): 2009 2010 def _assert_indexed_slices_equal(self, left, right): 2011 self.assertAllEqual( 2012 self.evaluate(ops.convert_to_tensor(left)), 2013 self.evaluate(ops.convert_to_tensor(right))) 2014 2015 def testNoGradients(self): 2016 self.assertIsNone(backprop.aggregate_indexed_slices_gradients([])) 2017 2018 def testOneGradient(self): 2019 t = math_ops._as_indexed_slices( 2020 constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) 2021 result = backprop.aggregate_indexed_slices_gradients([t]) 2022 self._assert_indexed_slices_equal(t, result) 2023 2024 def testMultipleGradients(self): 2025 t0 = math_ops._as_indexed_slices( 2026 constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) 2027 t1 = math_ops._as_indexed_slices( 2028 constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) 2029 total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]]) 2030 result = backprop.aggregate_indexed_slices_gradients([t0, t1]) 2031 self._assert_indexed_slices_equal(total, result) 2032 2033 def testMultipleGradientsWithNones(self): 2034 t0 = math_ops._as_indexed_slices( 2035 constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) 2036 t1 = math_ops._as_indexed_slices( 2037 constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) 2038 t3 = None 2039 total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]]) 2040 result = backprop.aggregate_indexed_slices_gradients([t0, t1, t3]) 2041 self._assert_indexed_slices_equal(total, result) 2042 2043 def testMixedTensorAndIndexedSlices(self): 2044 t0 = math_ops._as_indexed_slices( 2045 constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) 2046 t1 = constant_op.constant([[0., 0.], [5, 6], [7., 8.]]) 2047 total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]]) 2048 result = backprop.aggregate_indexed_slices_gradients([t0, t1]) 2049 self._assert_indexed_slices_equal(total, result) 2050 2051 2052if __name__ == '__main__': 2053 test.main() 2054