1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15from __future__ import absolute_import
16from __future__ import division
17from __future__ import print_function
18
19import functools
20
21from absl.testing import parameterized
22import numpy as np
23
24from tensorflow.python import pywrap_tfe
25from tensorflow.python.eager import backprop
26from tensorflow.python.eager import context
27from tensorflow.python.eager import def_function
28from tensorflow.python.eager import function
29from tensorflow.python.eager import tape as tape_lib
30from tensorflow.python.eager import test
31from tensorflow.python.framework import constant_op
32from tensorflow.python.framework import dtypes
33from tensorflow.python.framework import errors_impl
34from tensorflow.python.framework import ops
35from tensorflow.python.framework import sparse_tensor
36from tensorflow.python.framework import tensor_shape
37from tensorflow.python.framework import tensor_util
38from tensorflow.python.framework import test_util
39from tensorflow.python.framework.memory_checker import MemoryChecker
40from tensorflow.python.layers.pooling import max_pooling3d
41from tensorflow.python.ops import array_ops
42from tensorflow.python.ops import control_flow_ops
43from tensorflow.python.ops import custom_gradient
44from tensorflow.python.ops import embedding_ops
45from tensorflow.python.ops import functional_ops
46from tensorflow.python.ops import gradient_checker_v2
47from tensorflow.python.ops import gradients
48from tensorflow.python.ops import math_ops
49from tensorflow.python.ops import nn
50from tensorflow.python.ops import nn_grad  # pylint: disable=unused-import
51from tensorflow.python.ops import nn_ops
52from tensorflow.python.ops import random_ops
53from tensorflow.python.ops import resource_variable_ops
54from tensorflow.python.ops import sparse_ops
55from tensorflow.python.ops import variables
56from tensorflow.python.training import training
57
58
59class BackpropTest(test.TestCase, parameterized.TestCase):
60
61  @test_util.run_in_graph_and_eager_modes
62  def testAggregateGradients(self):
63
64    def fn(x):
65      ind1 = constant_op.constant(np.array([0, 1]))
66      ind2 = constant_op.constant(np.array([2, 3]))
67      ind3 = constant_op.constant(np.array([1, 3]))
68      g1 = embedding_ops.embedding_lookup(x, ind1)
69      g2 = embedding_ops.embedding_lookup(x, ind2)
70      g3 = embedding_ops.embedding_lookup(x, ind3)
71      return g1 * g2 * g3
72
73    var_np = np.random.rand(4, 2).astype(np.float32)
74    var = constant_op.constant(var_np)
75    grad = backprop.gradients_function(fn, [0])(var)[0]
76    grad = self.evaluate(ops.convert_to_tensor(grad))
77
78    if not context.executing_eagerly():
79      tf_var = array_ops.constant(var_np, dtypes.float32)
80      tf_ind1 = array_ops.constant([0, 1])
81      tf_ind2 = array_ops.constant([2, 3])
82      tf_ind3 = array_ops.constant([1, 3])
83      tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1)
84      tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2)
85      tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3)
86      tf_y = tf_g1 * tf_g2 * tf_g3
87      tf_grad = gradients.gradients(tf_y, [tf_var])[0]
88
89      tf_dense_grad = math_ops.unsorted_segment_sum(tf_grad.values,
90                                                    tf_grad.indices,
91                                                    tf_grad.dense_shape[0])
92
93      self.assertAllClose(grad, self.evaluate(tf_dense_grad))
94
95  @test_util.run_in_graph_and_eager_modes
96  def testAggregateGradientsWithTensor(self):
97
98    def fn(x):
99      ind1 = constant_op.constant(np.array([0, 1]))
100      # A mixture of IndexedSlices and dense tensor to aggregate.
101      g1 = embedding_ops.embedding_lookup(x, ind1)
102      g2 = math_ops.reduce_sum(x * constant_op.constant(2.0))
103      return g1 * g2
104
105    var_np = np.random.rand(4, 2).astype(np.float32)
106    var = constant_op.constant(var_np)
107    grad = backprop.gradients_function(fn, [0])(var)[0]
108    grad = self.evaluate(ops.convert_to_tensor(grad))
109
110    if not context.executing_eagerly():
111      tf_var = array_ops.constant(var_np, dtypes.float32)
112      tf_ind1 = array_ops.constant([0, 1])
113      tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1)
114      tf_g2 = math_ops.reduce_sum(tf_var * 2.0, axis=(0, 1))
115      tf_y = tf_g1 * tf_g2
116      tf_grad = gradients.gradients(tf_y, [tf_var])[0]
117
118      self.assertAllClose(grad, tf_grad)
119
120  def testImplicitGradWithResourceVariable(self):
121    x = resource_variable_ops.ResourceVariable(
122        initial_value=constant_op.constant(1.0), name='x')
123
124    def fn():
125      b = constant_op.constant(2.0)
126      c = math_ops.add(x.value(), b)
127      return math_ops.add(c, constant_op.constant(3.0))
128
129    grads_and_vars = backprop.implicit_grad(fn)()
130    self.assertAllEqual(grads_and_vars[0][0], 1.0)
131    self.assertAllEqual(id(grads_and_vars[0][1]), id(x))
132
133  @parameterized.named_parameters([('Function', def_function.function),
134                                   ('NoFunction', lambda f: f)])
135  def testNoOpBehaviorConsistent(self, decorator):
136
137    @decorator
138    def f(x):
139      # Test all different types of no-ops
140      x1 = array_ops.identity(x)
141      x2 = math_ops.add_v2(x, 0)
142      x3 = math_ops.subtract(x, 0)
143      x4 = math_ops.multiply(x, 1)
144      with backprop.GradientTape() as t:
145        t.watch(x)
146        t.watch(x1)
147        t.watch(x2)
148        t.watch(x3)
149        t.watch(x4)
150        y1 = x * 2.
151        y2 = x1 * 3.
152        y3 = x2 * 3.
153        y4 = x3 * 3.
154        y5 = x4 * 3.
155        loss = y1 + y2 + y3 + y4 + y5
156      return t.gradient(loss, [x, x1, x2, x3, x4])
157
158    self.assertAllClose([2., 3., 3., 3., 3.], f(constant_op.constant(10.)))
159
160  def testGradientInsideLoop(self):
161    with ops.Graph().as_default():
162      v = resource_variable_ops.ResourceVariable(1.0)
163
164      def body(_):
165        _ = v + 1.0  # This reads the variable inside the loop context
166        with backprop.GradientTape() as t:
167          result = v * 2
168        self.assertIsNotNone(t.gradient(result, v))
169        return 1.0
170
171      control_flow_ops.while_loop(lambda i: False, body, [1.0])
172
173  def testWhereGradient(self):
174    # Note: where is special because only some of its arguments are of
175    # differentiable dtypes.
176
177    def f(x):
178      return array_ops.where(x < 10, x, x * x)
179
180    g = backprop.gradients_function(f)
181
182    self.assertAllEqual(g(5.)[0], 1.0)
183    self.assertAllEqual(g(50.)[0], 100.0)
184
185  def testTwoTargets(self):
186    with backprop.GradientTape() as t:
187      x = constant_op.constant(3.0)
188      y = constant_op.constant(2.0)
189      t.watch([x, y])
190      xx = 2 * x
191      yy = 3 * y
192    dx, dy = t.gradient([xx, yy], [x, y])
193    self.assertAllEqual(dx, 2.0)
194    self.assertAllEqual(dy, 3.0)
195
196  def testCustomGradientEmptyError(self):
197
198    @custom_gradient.custom_gradient
199    def identity(x):
200
201      def grad(_):
202        return []  # This return value is wrong!
203
204      return x, grad
205
206    x = variables.Variable(1.0)
207    with backprop.GradientTape() as t:
208      y = identity(x)
209    with self.assertRaises(ValueError):
210      t.gradient(y, [x])
211
212  def testOutputGradUsedInComputation(self):
213    with backprop.GradientTape() as t:
214      x = constant_op.constant(3.0)
215      y = constant_op.constant(2.0)
216      t.watch([x, y])
217      loss = x * y
218    dx, = t.gradient([loss, x], [x], output_gradients=[1.0, 2.0])
219    self.assertAllEqual(dx, 4.0)
220
221  def testDy(self):
222
223    def f(x):
224      return x
225
226    grad_fn = backprop.gradients_function(f)
227    self.assertAllEqual(2., grad_fn(1., dy=2.)[0])
228
229  def testGradientInteger(self):
230
231    def f(x):
232      return x + x
233
234    int_tensor = constant_op.constant(1)
235    self.assertEqual(backprop.gradients_function(f)(int_tensor)[0], None)
236
237  def testErrors(self):
238
239    @custom_gradient.custom_gradient
240    def f(x):
241
242      def grad(_):
243        raise RuntimeError('x')
244
245      return x, grad
246
247    # TODO(apassos) raise the right error here
248    with self.assertRaises(RuntimeError):
249      backprop.gradients_function(f)(constant_op.constant(1.0))
250
251  def testGradientsFunctionInCustomGradient(self):
252
253    @custom_gradient.custom_gradient
254    def f(x):
255      (y,) = backprop.gradients_function(lambda x: x * x)(x)
256
257      def grad(dy):
258        return [2 * dy]
259
260      return y, grad
261
262    self.assertAllEqual(f(1.0), 2.0)
263
264  def testImplicitGradOverEmbeddingLookup(self):
265    batch_size = 8
266    embedding_size = 512
267    vocab_size = 1000
268    lrn_rate = 0.1
269    random_init = random_ops.random_uniform([vocab_size, embedding_size])
270
271    x = array_ops.ones((batch_size), dtypes.int64)
272    embedding = resource_variable_ops.ResourceVariable(
273        initial_value=random_init, dtype=dtypes.float32, name='embedding')
274
275    def f():
276      embedded_x = embedding_ops.embedding_lookup(embedding, x)
277      return constant_op.constant(1.0, dtypes.float32) - embedded_x
278
279    grad = backprop.implicit_grad(f)()[0][0]
280    opt = training.GradientDescentOptimizer(lrn_rate)
281
282    with ops.Graph().as_default(), self.cached_session():
283      tf_x = array_ops.ones((batch_size), dtypes.int64)
284      # TODO(ashankar,apassos): Change to ResourceVariable.
285      tf_embedding = variables.Variable(
286          random_init.numpy(), name='tf_embedding')
287      tf_embedded_x = embedding_ops.embedding_lookup(tf_embedding, tf_x)
288      tf_y = 1.0 - tf_embedded_x
289      tf_grad = gradients.gradients(tf_y, [tf_embedding])[0]
290      tf_opt = training.GradientDescentOptimizer(0.1)
291      tf_embedding.initializer.run()
292
293      self.assertAllClose(tf_grad.indices, grad.indices)
294      self.assertAllClose(tf_grad.values, grad.values)
295
296      tf_opt.apply_gradients([(tf_grad, tf_embedding)]).run()
297      expected = self.evaluate(tf_embedding)
298    opt.apply_gradients([(grad, embedding)])
299    self.assertAllClose(expected, embedding.read_value())
300
301  def testImplicitGradOrdering(self):
302    v0 = resource_variable_ops.ResourceVariable(1.0)
303    v1 = resource_variable_ops.ResourceVariable(2.0)
304
305    def f():
306      x = v1 * v1
307      y = v0 * v0
308      return x + y
309
310    grads = backprop.implicit_grad(f)()
311    ordered_variables = [x[1] for x in grads]
312    self.assertIs(ordered_variables[0], v0)
313    self.assertIs(ordered_variables[1], v1)
314
315  def testTapeNoOpGradient(self):
316    x = constant_op.constant(3.0)
317    with backprop.GradientTape() as t:
318      t.watch(x)
319      y = x
320    self.assertEqual(t.gradient(y, x).numpy(), 1.0)
321
322  def testTapeIdentityGradientIsIdentity(self):
323    x = constant_op.constant(3.0)
324    with backprop.GradientTape() as t:
325      t.watch(x)
326      y = array_ops.identity(x)
327    self.assertEqual(t.gradient(y, x).numpy(), 1.0)
328
329  def testFunctionIndexedSlicesGradient(self):
330
331    @def_function.function
332    def f(x):
333      return x + 1
334
335    with backprop.GradientTape() as t:
336      x = constant_op.constant([1.0])
337      t.watch(x)
338      y = f(x)
339      y = array_ops.gather(y, [0])
340    self.assertAllEqual(t.gradient(y, x), [1.0])
341
342  def testTapeGradientMultiTargetOneIsSource(self):
343    x = constant_op.constant(2.0)
344    with backprop.GradientTape() as t:
345      t.watch(x)
346      y = x * x
347    self.assertEqual(t.gradient([x, y], x).numpy(), 5.0)
348
349  def testTapeNoOpGradientWithMultiTargetAllSource(self):
350    x = constant_op.constant(3.0)
351    with backprop.GradientTape() as t:
352      t.watch(x)
353      y = x
354    self.assertEqual(t.gradient([y, y], x).numpy(), 2.0)
355
356  def testTapeNoOpGradientWithMultiTargetMultiSource(self):
357    x = constant_op.constant(3.0)
358    y = constant_op.constant(5.0)
359    with backprop.GradientTape() as t:
360      t.watch(x)
361      t.watch(y)
362      z = y * y
363    self.assertAllEqual(t.gradient([x, y, z], [x, y]), [1.0, 11.0])
364
365  def testTapeGradientStringTarget(self):
366    s = constant_op.constant('unknown', dtype=dtypes.string)
367    x = constant_op.constant(3.0)
368
369    with backprop.GradientTape() as t:
370      t.watch(x)
371      t.watch(s)
372    grads = t.gradient(s, x)
373    self.assertEqual(grads, None)
374
375  def testTapeNoOpGradientStringSourceAndTarget(self):
376    s = constant_op.constant('unknown', dtype=dtypes.string)
377
378    with backprop.GradientTape() as t:
379      t.watch(s)
380    grads = t.gradient(s, s)
381    self.assertEqual(grads, None)
382
383  def testTapeNoOpGradientWithMultiTargetMultiSourceIncludeString(self):
384    x = constant_op.constant(3.0)
385    y = constant_op.constant(5.0)
386    s = constant_op.constant('unknown', dtype=dtypes.string)
387
388    with backprop.GradientTape() as t:
389      t.watch(x)
390      t.watch(y)
391      t.watch(s)
392      z = y * y
393    grads = t.gradient([x, y, z, s], [x, y, s])
394    self.assertAllEqual(grads[:2], [1.0, 11.0])
395    self.assertEqual(grads[2], None)
396
397  def testTapeNoOpOnVariableIsIdentity(self):
398    v0 = resource_variable_ops.ResourceVariable(1.0)
399    with backprop.GradientTape() as t:
400      y = v0.read_value()
401    self.assertEqual(t.gradient(y, v0).numpy(), 1.0)
402
403  @test_util.assert_no_new_tensors
404  @test_util.assert_no_garbage_created
405  def testTapeNoOpGradient2By2(self):
406    a_2_by_2 = constant_op.constant(2.0, shape=[2, 2])
407    with backprop.GradientTape(persistent=True) as tape:
408      tape.watch(a_2_by_2)
409    dy_dy = tape.gradient(a_2_by_2, [a_2_by_2])[0]
410    self.assertAllEqual(dy_dy.numpy(),
411                        constant_op.constant(1.0, shape=[2, 2]).numpy())
412
413  @test_util.assert_no_new_pyobjects_executing_eagerly
414  def testTapeNoOpGradientMultiTarget2By2(self):
415    a_2_by_2 = constant_op.constant(2.0, shape=[2, 2])
416    with backprop.GradientTape(persistent=True) as tape:
417      tape.watch(a_2_by_2)
418    dy_dy = tape.gradient([a_2_by_2, a_2_by_2], [a_2_by_2])[0]
419    self.assertAllEqual(dy_dy.numpy(),
420                        constant_op.constant(2.0, shape=[2, 2]).numpy())
421
422  def testTapeStopRecording(self):
423    with backprop.GradientTape() as t:
424      x = resource_variable_ops.ResourceVariable(1.0)
425      with t.stop_recording():
426        y = x * x
427    self.assertEqual(t.gradient(y, x), None)
428
429  def testTapeStopStartRecording(self):
430    with backprop.GradientTape(persistent=True) as t:
431      x = resource_variable_ops.ResourceVariable(1.0)
432      x2 = x * 2  # This should be differentiated through.
433      with t.stop_recording():
434        y = x2 * x2
435      z = x2 * x2
436    self.assertEqual(t.gradient(y, x2), None)
437
438    # If the x*2 was not differentiated through, this would be 2.0, not 4.0
439    self.assertEqual(t.gradient(z, x2).numpy(), 4.0)
440
441  def testTapeReset(self):
442    with backprop.GradientTape() as t:
443      v = resource_variable_ops.ResourceVariable(1.0)
444      loss = v * v
445      t.reset()
446      loss += v * v
447    self.assertAllEqual(t.gradient(loss, v), 2.0)
448
449  def testPythonMax(self):
450    x = [
451        resource_variable_ops.ResourceVariable(2.),
452        resource_variable_ops.ResourceVariable(3.),
453        resource_variable_ops.ResourceVariable(5.)
454    ]
455    with backprop.GradientTape() as t:
456      f = max(x)
457    grad = t.gradient(f, x)
458    self.assertAllEqual(self.evaluate(f), 5.)
459    self.assertAllEqual(self.evaluate(grad), [None, None, 1.0])
460
461  def testAutomaticWatchedVariables(self):
462    with backprop.GradientTape() as t:
463      self.assertEqual(0, len(t.watched_variables()))
464      v = resource_variable_ops.ResourceVariable(1.0)
465      loss = v * v
466      self.assertAllEqual([v], t.watched_variables())
467
468      t.reset()
469      self.assertEqual(0, len(t.watched_variables()))
470      loss += v * v
471      self.assertAllEqual([v], t.watched_variables())
472
473  def testExplicitWatchedVariables(self):
474    with backprop.GradientTape() as t:
475      self.assertEqual(0, len(t.watched_variables()))
476      v = resource_variable_ops.ResourceVariable(1.0)
477      t.watch(v)
478      self.assertAllEqual([v], t.watched_variables())
479
480      t.reset()
481      self.assertEqual(0, len(t.watched_variables()))
482      t.watch(v)
483      self.assertAllEqual([v], t.watched_variables())
484
485  @test_util.assert_no_new_tensors
486  def testGradientNone(self):
487
488    def loss(x, l):
489      return math_ops.reduce_mean(
490          nn_ops.softmax_cross_entropy_with_logits(logits=x, labels=l),
491          constant_op.constant([0]))
492
493    logits = constant_op.constant([[0.0, 0.0]])
494    labels = constant_op.constant([[1.0, 0.0]])
495    # softmax_cross_entropy_with_logits returns two outputs and in this case the
496    # gradient wrt the second is None.
497    g, = backprop.gradients_function(loss, [0])(logits, labels)
498    self.assertAllEqual(g.numpy(), [[-0.5, 0.5]])
499
500  @test_util.run_in_graph_and_eager_modes
501  def testGradientWithinTapeBlock(self):
502    v1 = resource_variable_ops.ResourceVariable(1.)
503    self.evaluate(v1.initializer)
504    with backprop.GradientTape() as t:
505      loss = 2 * v1
506      grad = t.gradient(loss, v1)
507    self.assertAllEqual(self.evaluate(grad), 2.0)
508
509    with backprop.GradientTape(persistent=True) as t:
510      loss = 2 * v1
511      grad = t.gradient(loss, v1)
512    self.assertAllEqual(self.evaluate(grad), 2.0)
513
514  @test_util.run_in_graph_and_eager_modes
515  def testNestedSelfContexts(self):
516    v1 = resource_variable_ops.ResourceVariable(1.)
517    self.evaluate(v1.initializer)
518    with backprop.GradientTape() as t:
519      with self.assertRaises(ValueError):
520        with t:
521          pass
522
523  @test_util.assert_no_new_tensors
524  def testSecondGrad(self):
525
526    def first(x):
527      l = constant_op.constant([[0.0]])
528      x = nn_ops.softmax_cross_entropy_with_logits(labels=l, logits=x)
529      x = math_ops.reduce_sum(x, constant_op.constant([0]))
530      return x
531
532    def second(x):
533      grad = backprop.gradients_function(first, [0])(x)[0]
534      return math_ops.reduce_sum(grad, constant_op.constant([0]))
535
536    f = constant_op.constant([[0.1]])
537    grad = backprop.gradients_function(second, [0])(f)[0]
538    self.assertAllEqual([[0.0]], grad)
539
540  @test_util.run_in_graph_and_eager_modes
541  def testWatchingIsTapeLocal(self):
542    x1 = resource_variable_ops.ResourceVariable(2.0, trainable=False)
543    x2 = resource_variable_ops.ResourceVariable(2.0, trainable=False)
544
545    with backprop.GradientTape() as tape1:
546      with backprop.GradientTape() as tape2:
547        tape1.watch(x1)
548        tape2.watch([x1, x2])
549        y = x1**3
550        z = x2**2
551        dy, dz = tape2.gradient([y, z], [x1, x2])
552      d2y, d2z = tape1.gradient([dy, dz], [x1, x2])
553
554    self.evaluate([x1.initializer, x2.initializer])
555    self.assertEqual(self.evaluate(d2y), 12.0)
556    self.assertIsNone(d2z)
557
558  @test_util.assert_no_new_tensors
559  def testMakeVJP(self):
560
561    def f(x):
562      return x * x
563
564    wrapped_fn = backprop.make_vjp(f, persistent=False)
565    result, vjp = wrapped_fn(constant_op.constant(3.0))
566    self.assertAllEqual(result, 9.0)
567    self.assertAllEqual(vjp(2.0)[0], 12.0)
568
569  def testPersistentMakeVJP(self):
570
571    def f(x):
572      return x * x
573
574    wrapped_fn = backprop.make_vjp(f, persistent=True)
575    _, vjp = wrapped_fn(constant_op.constant(3.0))
576    vjp_result1 = vjp(2.0)[0]
577    vjp_result2 = vjp(2.0)[0]
578    self.assertAllEqual(vjp_result1, vjp_result2, 12.0)
579
580  @test_util.assert_no_new_tensors
581  def testGradGrad(self):
582
583    def sq(x):
584      return x * x
585
586    def grad(x):
587      value = backprop.gradients_function(sq, [0])(x)[0]
588      return value
589
590    gradgrad = backprop.gradients_function(grad, [0])
591
592    self.assertAllEqual(gradgrad(constant_op.constant(3.0))[0], 2.0)
593
594  @test_util.assert_no_new_tensors
595  def testGradGradExp(self):
596
597    def grad(x):
598      value = backprop.gradients_function(math_ops.exp, [0])(x)[0]
599      return value
600
601    gradgrad = backprop.gradients_function(grad, [0])
602
603    self.assertAllEqual(gradgrad(constant_op.constant(0.0))[0], 1.0)
604
605  @test_util.assert_no_new_tensors
606  def testStopGradient(self):
607    grad = backprop.gradients_function(
608        lambda x: array_ops.stop_gradient(math_ops.argmax(x)))
609    self.assertAllEqual(grad([0.0])[0], None)
610
611  @test_util.assert_no_new_tensors
612  def testArgmax(self):
613
614    def argmax(x):
615      i = math_ops.argmax(x)
616      return array_ops.stop_gradient(i)
617
618    grad = backprop.gradients_function(argmax)
619    self.assertAllEqual(grad([0.0])[0], None)
620
621  @test_util.run_gpu_only
622  @test_util.assert_no_new_tensors
623  def testGPU(self):
624
625    def fn(x):
626      with context.device('/gpu:0'):
627        b = constant_op.constant(2.0)
628        c = math_ops.add(x.gpu(), b)
629        # TODO(apassos): remove cpu below by making TensorVSPace aware
630        # of devices.
631        return math_ops.add(c, constant_op.constant(3.0)).cpu()
632
633    grad = backprop.gradients_function(fn, [0])(constant_op.constant(1.0))[0]
634    self.assertAllEqual(grad, 1.0)
635
636  @test_util.run_gpu_only
637  @test_util.assert_no_new_tensors
638  def testGPUImplicitGrad(self):
639    with context.device('gpu:0'):
640      v = resource_variable_ops.ResourceVariable(
641          constant_op.constant(1.0), name='v')
642
643    def f():
644      with context.device('gpu:0'):
645        return v.read_value()
646
647    self.assertEqual(backprop.implicit_grad(f)()[0][0].cpu().numpy(), 1.0)
648
649  @test_util.assert_no_new_tensors
650  def testCPU(self):
651
652    def fn(x):
653      b = constant_op.constant(2.0)
654      c = math_ops.add(x, b)
655      return math_ops.add(c, constant_op.constant(3.0))
656
657    grad = backprop.gradients_function(fn, [0])(constant_op.constant(1.0))[0]
658    self.assertAllEqual(grad, 1.0)
659
660  @test_util.run_gpu_only
661  @test_util.assert_no_new_tensors
662  def testTensorCopyGPU2CPU2GPU(self):
663
664    def f(a, b):
665      return a.cpu() + b.cpu()
666
667    with context.device('/gpu:0'):
668      a = constant_op.constant(1.0)
669      b = constant_op.constant(2.0)
670
671    grad = backprop.gradients_function(f, [0])(a, b)[0]
672    self.assertAllEqual(grad, 1.0)
673
674  @test_util.assert_no_new_tensors
675  def testEmptyParams(self):
676
677    def fn(a, b):
678      return a * b
679
680    x = constant_op.constant(1.0)
681    y = constant_op.constant(2.0)
682    dx, dy = backprop.gradients_function(fn)(x, y)
683    self.assertAllEqual(dx, y.numpy())
684    self.assertAllEqual(dy, x.numpy())
685
686  @test_util.assert_no_new_tensors
687  def testUnconnectedNone(self):
688    v = resource_variable_ops.ResourceVariable(1.0, name='testUnconnectedNone')
689
690    def f():
691      v.read_value()
692      return constant_op.constant(1.0)
693
694    self.assertEqual(backprop.implicit_grad(f)()[0][0], None)
695
696  @test_util.assert_no_new_tensors
697  def testGradientTapeReEnterContext(self):
698    g = backprop.GradientTape()
699    with g:
700      x = constant_op.constant(3.0)
701      g.watch(x)
702      y = 2 * x
703    with g:
704      z = 2 * y
705    grad = g.gradient(target=z, sources=[x])
706    self.assertEqual(self.evaluate(grad), [4.0])
707
708  @test_util.assert_no_new_tensors
709  @test_util.run_in_graph_and_eager_modes
710  def testGradientTapeRepeatedSource(self):
711    with backprop.GradientTape(persistent=False) as g:
712      x = constant_op.constant(3.0)
713      g.watch(x)
714      y = 2 * x
715    grad = g.gradient(target=y, sources=[x, x])
716    self.assertEqual(self.evaluate(grad), [2.0, 2.0])
717
718  @test_util.assert_no_new_tensors
719  @test_util.run_in_graph_and_eager_modes
720  def testPersistentGradientTapeRepeatedSource(self):
721    with backprop.GradientTape(persistent=True) as g:
722      x = constant_op.constant(3.0)
723      y = constant_op.constant(5.0)
724      g.watch(x)
725      g.watch(y)
726      z = x * x + x * y
727    grad = g.gradient(target=z, sources=[x, x])
728    self.assertEqual(self.evaluate(grad), [11.0, 11.0])
729    grad = g.gradient(target=z, sources=[y, x])
730    self.assertEqual(self.evaluate(grad), [3.0, 11.0])
731
732  @test_util.assert_no_new_tensors
733  @test_util.run_in_graph_and_eager_modes
734  def testGradientTapeStructure(self):
735    with backprop.GradientTape(persistent=True) as g:
736      # Using different constant values because constant tensors are
737      # cached, leading to a different gradient then what one might expect.
738      x1 = constant_op.constant(3.0)
739      x2 = constant_op.constant(3.1)
740      x3 = constant_op.constant(3.2)
741      g.watch(x1)
742      g.watch(x2)
743      g.watch(x3)
744      y = x1 + 2 * x2 + 3 * x3
745    self.assertEqual(self.evaluate(g.gradient(y, x1)), [1.0])
746    self.assertEqual(self.evaluate(g.gradient(y, (x1,))), (1.0,))
747    self.assertEqual(self.evaluate(g.gradient(y, (x1, x2))), (1.0, 2.0))
748    self.assertEqual(
749        self.evaluate(g.gradient(y, [(x1, x2), (x2, x3)])), [(1.0, 2.0),
750                                                             (2.0, 3.0)])
751    self.assertEqual(
752        self.evaluate(g.gradient(y, (x1, x2, [x1, x3]))),
753        (1.0, 2.0, [1.0, 3.0]))
754    self.assertEqual(
755        self.evaluate(g.gradient(y, [x1, {
756            'x2': x2,
757            'x3': x3
758        }])), [1.0, {
759            'x2': 2.0,
760            'x3': 3.0
761        }])
762
763  @test_util.assert_no_new_tensors
764  @test_util.run_in_graph_and_eager_modes
765  def testGradientTape(self):
766    with backprop.GradientTape() as g:
767      x = constant_op.constant(3.0)
768      g.watch(x)
769      y = x * x
770      with backprop.GradientTape() as gg:
771        gg.watch(y)
772        z = 2 * y
773      inner_grad = gg.gradient(z, [y])[0]
774      self.assertEqual(self.evaluate(inner_grad), 2.0)
775      y += inner_grad
776    grad = g.gradient(y, [x])[0]
777    self.assertEqual(self.evaluate(grad), 6.0)
778
779  @test_util.assert_no_new_tensors
780  @test_util.run_in_graph_and_eager_modes
781  def testGadientTapeCalledOnConstantTarget(self):
782    with backprop.GradientTape() as g:
783      x = variables.Variable([3.0])
784      y = variables.Variable([2.0])
785    grad = g.gradient(x, y)
786    self.assertAllEqual(grad, None)
787
788  @test_util.run_in_graph_and_eager_modes
789  @test_util.run_v1_only('b/120545219')
790  def testGradientTapeWithCond(self):
791    x = constant_op.constant(3.0)
792
793    def true_fn():
794      return x
795
796    def false_fn():
797      return x * x
798
799    with backprop.GradientTape() as g:
800      g.watch(x)
801      y = control_flow_ops.cond(x < x, true_fn, false_fn)
802
803    if not context.executing_eagerly():
804      with self.assertRaisesRegex(NotImplementedError, 'tf.gradients'):
805        dy = g.gradient(y, [x])[0]
806    else:
807      dy = g.gradient(y, [x])[0]
808      self.assertEqual(self.evaluate(dy), 6.0)
809
810  @test_util.run_in_graph_and_eager_modes
811  @test_util.run_v1_only('b/120545219')
812  def testGradientTapeWithWhileLoop(self):
813    i = constant_op.constant(1)
814    x = constant_op.constant(2.)
815
816    def cond(i, _):
817      return i < 3
818
819    def body(i, x):
820      return i + 1, x * 2
821
822    with backprop.GradientTape() as g:
823      g.watch([x])
824      _, y = control_flow_ops.while_loop(cond, body, [i, x])
825
826    if not context.executing_eagerly():
827      with self.assertRaisesRegex(NotImplementedError, 'tf.gradients'):
828        dy = g.gradient(y, [x])[0]
829    else:
830      dy = g.gradient(y, [x])[0]
831      self.assertEqual(self.evaluate(dy), 4.0)
832
833  @test_util.assert_no_new_tensors
834  def testGradientTapeGradientCalledMultipleTimes(self):
835    with backprop.GradientTape() as g:
836      x = constant_op.constant(3.0)
837      g.watch(x)
838      y = x * x
839      z = y * y
840    g.gradient(z, [x])
841    with self.assertRaisesRegex(
842        RuntimeError, 'A non-persistent GradientTape can only'):
843      g.gradient(y, [x])
844
845  @test_util.assert_no_new_tensors
846  def testGradientTapeJacobianCalledMultipleTimes(self):
847    with backprop.GradientTape() as g:
848      x = constant_op.constant(3.0)
849      g.watch(x)
850      y = x * x
851      z = y * y
852    g.jacobian(z, [x])
853    with self.assertRaisesRegex(
854        RuntimeError, 'A non-persistent GradientTape can only'):
855      g.jacobian(y, [x])
856
857  @test_util.assert_no_new_tensors
858  def testJacobianInsideGradientTapeScope(self):
859    with backprop.GradientTape() as g:
860      x = constant_op.constant(3.0)
861      g.watch(x)
862      y = x * x
863      z = y * y
864      self.assertAllClose(4. * 3. ** 3., g.jacobian(z, x))
865
866  @test_util.assert_no_new_tensors
867  def testBatchJacobianInsideGradientTapeScope(self):
868    with backprop.GradientTape(persistent=True) as g:
869      x = constant_op.constant([[3.0]])
870      g.watch(x)
871      y = x * x
872      z = y * y
873      self.assertAllClose([[[4. * 3. ** 3.]]], g.batch_jacobian(z, x))
874
875  @test_util.assert_no_new_tensors
876  def testGradientTapeBatchJacobianCalledMultipleTimes(self):
877    with backprop.GradientTape() as g:
878      x = constant_op.constant([[3.0]])
879      g.watch(x)
880      y = x * x
881      z = y * y
882    g.batch_jacobian(z, x)
883    with self.assertRaisesRegex(
884        RuntimeError, 'A non-persistent GradientTape can only'):
885      g.batch_jacobian(y, [x])
886
887  @test_util.assert_no_new_tensors
888  @test_util.run_in_graph_and_eager_modes
889  @test_util.run_v1_only('b/120545219')
890  def testPersistentTape(self):
891    with backprop.GradientTape(persistent=True) as g:
892      x = constant_op.constant(3.0)
893      g.watch(x)
894      y = x * x
895      z = y * y
896    dz_dx = g.gradient(z, [x])[0]
897    self.assertEqual(self.evaluate(dz_dx), 4 * 3 * 3 * 3)
898    dy_dx = g.gradient(y, [x])[0]
899    self.assertEqual(self.evaluate(dy_dx), 2 * 3)
900    del g
901
902  @test_util.assert_no_new_tensors
903  @test_util.run_in_graph_and_eager_modes
904  def testHigherOrderGradient(self):
905    with backprop.GradientTape(persistent=True) as g:
906      x = constant_op.constant(3.0)
907      g.watch(x)
908      y = x**3  # y       := x^3
909      dy_dx = g.gradient(y, x)  # dy/dx   := 3x^2
910      d2y_dx2 = g.gradient(dy_dx, x)  # d2y/dx2 := 6x
911    d3y_dx3 = g.gradient(d2y_dx2, x)  # d3y/dx3 := 6
912    x = 3
913    self.assertEqual(self.evaluate(y), x**3)
914    self.assertEqual(self.evaluate(dy_dx), 3 * x**2)
915    self.assertEqual(self.evaluate(d2y_dx2), 6 * x)
916    self.assertEqual(self.evaluate(d3y_dx3), 6)
917    del g
918
919  @test_util.assert_no_new_tensors
920  @test_util.run_in_graph_and_eager_modes
921  def testPersistentNestedTape(self):
922    with backprop.GradientTape(persistent=True) as g:
923      x = constant_op.constant(3.0)
924      g.watch(x)
925      y = x * x
926      with backprop.GradientTape(persistent=True) as gg:
927        gg.watch(y)
928        z = 2 * y
929      for _ in range(2):
930        inner_grad = gg.gradient(z, [y])[0]
931        self.assertEqual(self.evaluate(inner_grad), 2.0)
932      y += inner_grad
933      del gg
934    grad = g.gradient(y, [x])[0]
935    self.assertEqual(self.evaluate(grad), 6.0)
936    grad = g.gradient(z, [x])[0]
937    self.assertEqual(self.evaluate(grad), 12.0)
938    del g
939
940  @test_util.assert_no_new_tensors
941  @test_util.run_in_graph_and_eager_modes
942  def testGradientTapeVariable(self):
943    v = resource_variable_ops.ResourceVariable(1.0, name='v')
944    self.evaluate(v.initializer)
945    with backprop.GradientTape() as g:
946      y = v * v
947    grad = g.gradient(y, [v])[0]
948    self.assertAllEqual(self.evaluate(grad), 2.0)
949
950  @test_util.assert_no_new_tensors
951  @test_util.run_in_graph_and_eager_modes
952  def testNestedGradients(self):
953    x = constant_op.constant(3.0)
954    with backprop.GradientTape() as g:
955      g.watch(x)
956      y = x * x
957      z = y * y
958    dz_dx, dz_dy = g.gradient(z, [x, y])
959    self.assertEqual(self.evaluate(dz_dx), 108.0)
960    self.assertEqual(self.evaluate(dz_dy), 18.0)
961
962  @test_util.assert_no_new_tensors
963  @test_util.run_in_graph_and_eager_modes
964  def testUnconnectedGradientsDefault(self):
965    x = constant_op.constant(1.0)
966    y = constant_op.constant(3.0)
967    with backprop.GradientTape() as g:
968      g.watch([x, y])
969      z = y * 2
970    dz_dx = g.gradient(z, x)
971    self.assertEqual(dz_dx, None)
972
973  @test_util.assert_no_new_tensors
974  @test_util.run_in_graph_and_eager_modes
975  def testUnconnectedGradientsZeros(self):
976    x = constant_op.constant(1.0, shape=[2, 2])
977    y = constant_op.constant(3.0)
978    with backprop.GradientTape() as g:
979      g.watch([x, y])
980      z = y * 2
981    dz_dx = g.gradient(z, x, unconnected_gradients='zero')
982    self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], self.evaluate(dz_dx))
983
984  @test_util.assert_no_new_tensors
985  @test_util.run_in_graph_and_eager_modes
986  def testUnconnectedGradientsVariablesZeros(self):
987    x = resource_variable_ops.ResourceVariable(
988        constant_op.constant(1., shape=[2, 2]))
989    self.evaluate(x.initializer)
990    y = resource_variable_ops.ResourceVariable(constant_op.constant(3.))
991    self.evaluate(y.initializer)
992    with backprop.GradientTape() as g:
993      g.watch([x, y])
994      z = y * 2
995    dz_dx = g.gradient(z, x, unconnected_gradients='zero')
996    self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], self.evaluate(dz_dx))
997
998  @test_util.run_in_graph_and_eager_modes
999  def testUnknownUnconnectedGradientsValueGiven(self):
1000    x = constant_op.constant(1.0)
1001    y = constant_op.constant(1.0)
1002    with backprop.GradientTape() as g:
1003      g.watch([x, y])
1004      z = y * 2
1005    with self.assertRaisesRegex(
1006        ValueError, "Unknown value for unconnected_gradients: 'nonsense'"):
1007      g.gradient(z, x, unconnected_gradients='nonsense')
1008
1009  @test_util.run_in_graph_and_eager_modes
1010  def testUnconnectedGradientsNestedDefunZeros(self):
1011
1012    @function.defun
1013    def f(x):
1014      return x * x
1015
1016    @function.defun
1017    def h(y):
1018      z = f(y)
1019      return array_ops.stop_gradient(z)
1020
1021    x = constant_op.constant(1.0)
1022    with backprop.GradientTape() as g:
1023      g.watch(x)
1024      k = x + 2.
1025      y = h(k)
1026
1027    dy_dx = g.gradient(y, x, unconnected_gradients='zero')
1028    self.assertEqual(0.0, self.evaluate(dy_dx))
1029
1030  def testInvalidRecordOperationMessage(self):
1031    y = constant_op.constant(2.)
1032    x = constant_op.constant(1.)
1033    with backprop.GradientTape() as g:
1034      g.watch(x)
1035      tape_lib.record_operation('InvalidBackprop', [y], [x], lambda dy: [])
1036    with self.assertRaisesRegex(errors_impl.InternalError,
1037                                'InvalidBackprop.*too few gradients'):
1038      g.gradient(y, x)
1039
1040  @test_util.assert_no_new_tensors
1041  def testEmptyParamsForValueAndGradFunction(self):
1042
1043    def fn(a, b):
1044      return a * b
1045
1046    val_and_grads_fn = backprop.val_and_grad_function(fn)
1047
1048    x = 2.0
1049    y = 3.0
1050    val, (dx, dy) = val_and_grads_fn(x, y)
1051    self.assertAllClose(val, x * y)
1052    self.assertAllEqual(dx, y)
1053    self.assertAllEqual(dy, x)
1054
1055  @test_util.assert_no_new_tensors
1056  def testNonEmptyParamsForValueAndGradFunction(self):
1057
1058    def fn(a, b):
1059      return a * b
1060
1061    val_and_grad_fn = backprop.val_and_grad_function(fn, params=[1])
1062
1063    x = 2.0
1064    y = 3.0
1065    val, grads = val_and_grad_fn(x, y)
1066    self.assertAllClose(val, x * y)
1067    self.assertEqual(1, len(grads))
1068    self.assertAllEqual(grads[0], x)
1069
1070  @test_util.run_gpu_only
1071  @test_util.assert_no_new_tensors
1072  def testTensorCopyCPU2GPU2CPU(self):
1073    # forward: a (cpu->gpu) -> add (gpu) -> c (gpu->cpu) -> add (cpu) -> e (cpu)
1074    # back: e (cpu) -> add (cpu) -> c (cpu->gpu) -> add (gpu) -> grad (gpu->cpu)
1075    def f(a, b):
1076      with context.device('/gpu:0'):
1077        c = math_ops.add(a.gpu(0), b.gpu(0))
1078      return math_ops.add(c.cpu(), constant_op.constant(3.0))
1079
1080    with context.device('/cpu:0'):
1081      a = constant_op.constant(1.0)
1082      b = constant_op.constant(2.0)
1083
1084    grad = backprop.gradients_function(f, [0])(a, b)[0]
1085    self.assertAllEqual(grad, 1.0)
1086
1087  def testGetAttrType(self):
1088    typ = backprop.op_attr_type('Add', 'T')
1089    self.assertEqual(typ, int(pywrap_tfe.TF_ATTR_TYPE))
1090
1091  def testGetAttrList(self):
1092    typ = backprop.op_attr_type('MaxPool', 'ksize')
1093    self.assertEqual(typ, [int(pywrap_tfe.TF_ATTR_INT)])
1094
1095  def testMakeAttrType(self):
1096    self.assertEqual(dtypes.float32,
1097                     backprop.make_attr(int(pywrap_tfe.TF_ATTR_TYPE), 1))
1098
1099  def testMakeAttrTypeList(self):
1100    self.assertEqual([dtypes.float32],
1101                     backprop.make_attr([int(pywrap_tfe.TF_ATTR_TYPE)], [1]))
1102
1103  def testMulType(self):
1104
1105    def mul(x):
1106      return math_ops._mul_dispatch(x, x)  # pylint: disable=protected-access
1107
1108    self.assertAllEqual(backprop.gradients_function(mul)(3.0)[0].numpy(), 6.0)
1109
1110  def testMakeAttrShape(self):
1111    for s in ([], None, [1, 2, 3], [None, None], [1, None, 3]):
1112      expected = tensor_shape.TensorShape(s).as_proto()
1113      actual = backprop.make_attr(int(pywrap_tfe.TF_ATTR_SHAPE), s)
1114      self.assertEqual(
1115          expected,
1116          actual,
1117          msg=('For shape %r, expected %r != %r actual' %
1118               (s, expected, actual)))
1119
1120  def testMakeAttrShapeList(self):
1121    shape_list = [[], None, [1, 2, 3], [None, None], [1, None, 3]]
1122    self.assertEqual(
1123        [tensor_shape.TensorShape(s).as_proto() for s in shape_list],
1124        backprop.make_attr([int(pywrap_tfe.TF_ATTR_SHAPE)], shape_list))
1125
1126  def testArgsGradientFunction(self):
1127
1128    def f(*args):
1129      return args[0] * args[0]
1130
1131    grad = backprop.gradients_function(f)
1132    self.assertAllEqual(grad(1.0)[0], 2.0)
1133
1134  def testPartial(self):
1135
1136    def f(x, y):
1137      return x * y
1138
1139    part = functools.partial(f, constant_op.constant(2.0))
1140    self.assertAllEqual(
1141        backprop.gradients_function(part)(constant_op.constant(1.0))[0], 2.0)
1142
1143  def testReturnSameThing(self):
1144
1145    def f(x):
1146      return x, 2 * x
1147
1148    self.assertAllEqual(backprop.gradients_function(f)(1.0)[0], 3.0)
1149
1150  @test_util.assert_no_new_tensors
1151  def testExceptionSafety(self):
1152
1153    def f(unused_x):
1154      raise ValueError()
1155
1156    try:
1157      backprop.gradients_function(f)(1.0)
1158    except ValueError:
1159      pass
1160
1161    def real_f(x):
1162      return x * x
1163
1164    self.assertAllEqual(backprop.gradients_function(real_f)(1.0)[0], 2.0)
1165
1166  @test_util.assert_no_new_tensors
1167  def testMultiValueConvertToTensor(self):
1168    x = resource_variable_ops.ResourceVariable(
1169        initial_value=array_ops.constant([1.0]), name='x')
1170
1171    def fn():
1172      a = math_ops.add(x.value(), 1.0)
1173      # Make sure convert_to_tensor works correctly with list of TensorNodes.
1174      b = array_ops.stack([a, a], axis=0)
1175      return math_ops.reduce_mean(b)
1176
1177    grad = backprop.implicit_grad(fn)()[0][0]
1178    self.assertAllEqual([1.0], grad)
1179
1180  def testOutput(self):
1181
1182    def multiout(x):
1183      return x + 2, x * x
1184
1185    x = constant_op.constant([0.0, 1.0, 2.0])
1186
1187    grad = backprop.gradients_function(multiout)(x)[0]
1188    self.assertAllEqual([1.0, 3.0, 5.0], grad)
1189
1190  def testMultiValuePreservesIfNotDiffedAgainst(self):
1191
1192    def tfe_conv2d(timage, tkernel, conv2dstrides):
1193      return nn_ops.conv2d(timage, tkernel, conv2dstrides, 'SAME')
1194
1195    i = constant_op.constant([[[[1.0]]]])
1196    k = constant_op.constant([[[[2.0]]]])
1197    s = [1, 1, 1, 1]
1198
1199    grad = backprop.gradients_function(tfe_conv2d, params=(0,))(i, k, s)[0]
1200    self.assertAllEqual([[[[2.0]]]], grad)
1201
1202  def testSameObjectForMultipleArguments(self):
1203
1204    def f(x, y):
1205      return math_ops.multiply(x, y)
1206
1207    g = backprop.gradients_function(f)
1208
1209    def np_g(x, y):
1210      dx, dy = g(x, y)
1211      return [dx.numpy(), dy.numpy()]
1212
1213    x = constant_op.constant(1.)
1214    self.assertAllEqual([1., 1.], np_g(x, x))
1215    x = 1.
1216    self.assertAllEqual([1., 1.], np_g(x, x))
1217    x = constant_op.constant([[1.]])
1218    self.assertAllEqual([[[1.]], [[1.]]], np_g(x, x))
1219    x = [[1.]]
1220    self.assertAllEqual([[[1.]], [[1.]]], np_g(x, x))
1221
1222    v = resource_variable_ops.ResourceVariable(
1223        initial_value=1., name='testSameObjectForMultipleArguments.Variable')
1224    self.assertAllEqual([1., 1.], np_g(v, v))
1225
1226  @test_util.assert_no_new_tensors
1227  def testImplicitGradientsCustomGradientAndCachedVariableValue(self):
1228
1229    @custom_gradient.custom_gradient
1230    def my_square(x):
1231      result = math_ops.square(x)
1232
1233      def grad(dr):
1234        return 2 * dr * x + 1
1235
1236      return result, grad
1237
1238    x = resource_variable_ops.ResourceVariable(
1239        initial_value=3., name='X.' + self.id())
1240
1241    def f():
1242      return my_square(x)
1243
1244    g = backprop.implicit_grad(f)
1245
1246    grads_and_vars = g()
1247    self.assertEqual(1, len(grads_and_vars))
1248    grad, var = grads_and_vars[0]
1249    self.assertAllEqual(7, grad)
1250    self.assertAllEqual(x, var)
1251
1252  def testJacobianCustomGradient(self):
1253
1254    class MyCallable(object):
1255
1256      def __init__(self):
1257        self.a = variables.Variable(1.)
1258        self.b = variables.Variable(2.)
1259        self.c = variables.Variable(3.)
1260
1261      def __call__(self, x):
1262        return self.a * x * x + self.b * x + self.c
1263
1264    @def_function.function
1265    def call(c, x):
1266
1267      @custom_gradient.custom_gradient
1268      def _call():
1269        y = c(x)
1270
1271        def grad(dy, variables=None):  # pylint: disable=redefined-outer-name
1272          with backprop.GradientTape(persistent=True) as g:
1273            g.watch(variables)
1274            y = c(x)
1275          grad_vars = [
1276              2 * math_ops.reduce_sum(dy * g.jacobian(y, v)) for v in variables
1277          ]
1278          del g
1279          return (), grad_vars
1280
1281        return y, grad
1282
1283      return _call()
1284
1285    c = MyCallable()
1286    x = constant_op.constant([1., 2., 3.])
1287    with backprop.GradientTape(persistent=True) as g:
1288      g.watch([c.a, c.b, c.c])
1289      y = call(c, x)
1290    self.assertAllEqual(g.gradient(y, x), None)
1291
1292  @test_util.assert_no_new_tensors
1293  def testCustomGradient(self):
1294
1295    @custom_gradient.custom_gradient
1296    def my_mul(x, y):
1297      result = x * y
1298
1299      def grad(dr):
1300        return [dr * y, dr * x]
1301
1302      return result, grad
1303
1304    lr = 0.25
1305    x = resource_variable_ops.ResourceVariable(2., name='x')
1306
1307    def loss(x):
1308      return my_mul(2., x.read_value())
1309
1310    loss_grads_fn = backprop.implicit_val_and_grad(loss)
1311
1312    losses = []
1313    for _ in range(5):
1314      loss, grads_and_vars = loss_grads_fn(x)
1315      losses.append(loss.numpy())
1316      for (grad, var) in grads_and_vars:
1317        var.assign_sub(lr * grad)
1318    self.assertAllEqual(losses, [4.0, 3., 2., 1., 0.])
1319
1320  @test_util.assert_no_new_tensors
1321  def testCustomGradientIdentity(self):
1322
1323    @custom_gradient.custom_gradient
1324    def my_identity(x):
1325
1326      def grad(dresult):
1327        return [2 * dresult]
1328
1329      return x, grad
1330
1331    self.assertAllEqual(backprop.gradients_function(my_identity)(1.0)[0], 2.0)
1332
1333  def testDifferentiatingFunctionThatReturnsNone(self):
1334
1335    def fn(x, y):
1336      result = x * y  # pylint: disable=unused-variable
1337
1338    x = constant_op.constant(1)
1339    y = constant_op.constant(2)
1340
1341    loss_grads_fn = backprop.implicit_val_and_grad(fn)
1342    with self.assertRaisesRegex(
1343        ValueError, 'Cannot differentiate a function that returns None; '
1344        'did you forget to return a value from fn?'):
1345      loss_grads_fn(x, y)
1346
1347    val_and_grads_fn = backprop.val_and_grad_function(fn)
1348    with self.assertRaisesRegex(
1349        ValueError, 'Cannot differentiate a function that returns None; '
1350        'did you forget to return a value from fn?'):
1351      val_and_grads_fn(x, y)
1352
1353  def testZerosCacheDoesntLeakAcrossGraphs(self):
1354    with ops.Graph().as_default():
1355
1356      def get_grad():
1357        with ops.Graph().as_default(), self.cached_session():
1358          t = constant_op.constant(1, dtype=dtypes.float32, shape=(10, 4))
1359          x = constant_op.constant(2, dtype=dtypes.float32, shape=(10, 4))
1360          with backprop.GradientTape() as tape:
1361            tape.watch(x)
1362            x1, _ = array_ops.split(x, num_or_size_splits=2, axis=1)
1363            y1 = x1**2
1364            y = array_ops.concat([y1, t], axis=1)
1365          return self.evaluate(tape.gradient(y, x))
1366
1367      grad1 = get_grad()
1368      grad2 = get_grad()
1369
1370      self.assertAllEqual(grad1, grad2)
1371
1372  @test_util.run_in_graph_and_eager_modes
1373  def testSelectivelyWatchVariables(self):
1374    x1 = resource_variable_ops.ResourceVariable(1.0)
1375    x2 = resource_variable_ops.ResourceVariable(1.0)
1376    with backprop.GradientTape(watch_accessed_variables=False) as tape:
1377      tape.watch(x2)
1378      y = x1**2
1379      z = x2**3
1380    self.assertTupleEqual(tape.watched_variables(), (x2,))
1381    dy, dz = tape.gradient([y, z], [x1, x2])
1382    self.evaluate([x1.initializer, x2.initializer])
1383    self.assertIsNone(dy)
1384    self.assertEqual(self.evaluate(dz), 3.0)
1385
1386  @test_util.run_in_graph_and_eager_modes
1387  def testDifferentiatingScalarCache(self):
1388    # In the following test, if x2 = x1 (i.e the objects are the exact same),
1389    # then y is essentially, 2*x1, and dy/dx1 = 2.
1390    # When we had a pure scalar cache in eager, this would be the case. This
1391    # test prevents us from going back to that case.
1392    with backprop.GradientTape(persistent=False) as g:
1393      x1 = constant_op.constant(3.0)
1394      x2 = constant_op.constant(3.0)
1395      g.watch(x1)
1396      g.watch(x2)
1397      y = x1 + x2
1398    grad = g.gradient(target=y, sources=[x1])
1399    self.assertEqual(self.evaluate(grad), [1.0])
1400
1401  def testVariablesAndConstantsProduceTheSameGradients(self):
1402
1403    # In the following test, differentiating [y, z] against [a, b] gives:
1404    # (dy/da + dz/da, dy/db + dz/db).
1405    # If a and b are the same constant, dz/da will not be 0 (which it should
1406    # be).
1407    # This is solved by using variable since doing a read_value on a tensor will
1408    # produce a new tensor and corresponding TensorHandle, and not reuse the
1409    # same tensor (which would happen if we are using a cache and reusing
1410    # EagerTensor objects).
1411    def get_grads(a, b):
1412      with backprop.GradientTape() as tape:
1413        tape.watch([a, b])
1414        y = a**3
1415        z = b**2
1416      return tape.gradient([y, z], [a, b])
1417
1418    gradients_constants = get_grads(
1419        constant_op.constant(2.0), constant_op.constant(2.0))
1420    gradients_variables = get_grads(
1421        resource_variable_ops.ResourceVariable(2.0),
1422        resource_variable_ops.ResourceVariable(2.0))
1423    self.assertAllEqual(gradients_constants, gradients_variables)
1424
1425  def testUnknownShapes(self):
1426    with ops.Graph().as_default():
1427      with backprop.GradientTape() as tape:
1428        a = array_ops.placeholder(dtype=dtypes.float32, shape=None)
1429        tape.watch(a)
1430        b = a**3
1431
1432      db_da = tape.gradient(b, a)
1433
1434      with self.cached_session() as sess:
1435        self.assertEqual((8.0, 12.0), sess.run((b, db_da), feed_dict={a: 2.0}))
1436
1437  @test_util.run_in_graph_and_eager_modes
1438  def testCustomGradientInEagerAndGraph(self):
1439
1440    @custom_gradient.custom_gradient
1441    def f(x):
1442      y = x * x
1443
1444      def grad(dy):
1445        return [4 * dy]
1446
1447      return y, grad
1448
1449    with backprop.GradientTape() as t:
1450      c = constant_op.constant(1.0)
1451      t.watch(c)
1452      g = f(c)
1453    self.assertAllEqual(self.evaluate(t.gradient(g, c)), 4.0)
1454
1455  def testOverrideSecondOrderWithCustomGradient(self):
1456
1457    @custom_gradient.custom_gradient
1458    def f(x):
1459
1460      def first_order_grad(dz):
1461
1462        @custom_gradient.custom_gradient
1463        def first_order_custom(unused_x):
1464
1465          def h(ddz):
1466            return -2.1 * ddz
1467
1468          return -1.1, h
1469
1470        return dz * first_order_custom(x)
1471
1472      return x + 10., first_order_grad
1473
1474    c = constant_op.constant(1.)
1475    with backprop.GradientTape() as outer:
1476      outer.watch(c)
1477      with backprop.GradientTape() as inner:
1478        inner.watch(c)
1479        d = f(c)**4.
1480      dd = inner.gradient(d, c)
1481      self.assertAllClose(4. * f(c)**3. * -1.1, dd)
1482    self.assertAllClose(3. * 4. * f(c)**2. * -1.1 * -1.1 + 4. * f(c)**3. * -2.1,
1483                        outer.gradient(dd, c))
1484
1485  @test_util.run_in_graph_and_eager_modes
1486  def testCustomGradientForwardprop(self):
1487
1488    @custom_gradient.custom_gradient
1489    def f(x):
1490      z = 2. * tensor_util.constant_value(x)
1491
1492      def g(dz):
1493
1494        @custom_gradient.custom_gradient
1495        def first_order(unused_x, unused_dz):
1496
1497          def second_order_and_transpose(unused_ddz):
1498            return 2.2, 3.1
1499
1500          return 2.1, second_order_and_transpose
1501
1502        return first_order(x, dz)
1503
1504      return z, g
1505
1506    with backprop.GradientTape(persistent=True) as t:
1507      with backprop.GradientTape() as tt:
1508        c = constant_op.constant(1.)
1509        t.watch(c)
1510        tt.watch(c)
1511        output_grad = array_ops.ones([])
1512        t.watch(output_grad)
1513        output = f(c)
1514        self.assertAllClose(2., output)
1515      gc = tt.gradient(output, c, output_gradients=output_grad)
1516      self.assertAllClose(2.1, gc)
1517    ggc = t.gradient(gc, c)
1518    self.assertAllClose(2.2, ggc)
1519    # Note that executed eagerly this kind of transpose is not efficient. But
1520    # from a tf.function we could prune out the first-order gradient
1521    # computation.
1522    transpose = t.gradient(gc, output_grad)
1523    self.assertAllClose(3.1, transpose)
1524
1525  @test_util.run_in_graph_and_eager_modes
1526  def testMaxPooling3DGradient(self):
1527
1528    def forward(a):
1529      r = max_pooling3d(a, pool_size=pool_size, strides=strides, padding='SAME')
1530      return r
1531
1532    input_sizes = [1, 3, 2, 4, 1]
1533    pool_size = (2, 2, 1)
1534    strides = (1, 1, 1)
1535
1536    total_size = np.prod(input_sizes)
1537    x = np.arange(1, total_size + 1, dtype=np.float32)
1538    aa = constant_op.constant(x, shape=input_sizes, dtype=dtypes.float32)
1539    da = backprop.gradients_function(forward)(aa)
1540
1541    if not context.executing_eagerly():
1542      tf_aa = constant_op.constant(x, shape=input_sizes, dtype=dtypes.float32)
1543      tf_max = max_pooling3d(
1544          tf_aa, pool_size=pool_size, strides=strides, padding='SAME')
1545      tf_da = gradients.gradients(tf_max, [tf_aa])
1546      self.assertAllEqual(da[0], tf_da[0])
1547
1548  @test_util.run_in_graph_and_eager_modes
1549  def testWatchBadThing(self):
1550    g = backprop.GradientTape()
1551    with self.assertRaisesRegex(ValueError, 'ndarray'):
1552      g.watch(np.array(1.))
1553
1554  def testWatchComposite(self):
1555    """Test that tape.watch expands composites and watches component Tensors."""
1556    with backprop.GradientTape() as t:
1557      values = constant_op.constant([1.0, 2.0], dtypes.float32)
1558      s = sparse_tensor.SparseTensor(
1559          indices=[[0, 0], [1, 2]], values=values, dense_shape=[3, 4])
1560      t.watch(s)
1561      z = sparse_ops.sparse_reduce_sum_v2(s)
1562    result = t.gradient(z, values)
1563    self.assertAllEqual(result, [1.0, 1.0])
1564
1565  def testWatchedVariablesAfterNonPersistentGradientCall(self):
1566    with backprop.GradientTape(persistent=False) as tape:
1567      x = resource_variable_ops.ResourceVariable(1.0)
1568      tape.watch(x)
1569    tape.gradient(x, x)
1570    self.assertEqual((x,), tape.watched_variables())
1571
1572  def testWatchedVariablesOnlyHasVariablesFromLastTape(self):
1573    with backprop.GradientTape(persistent=False) as tape:
1574      x = resource_variable_ops.ResourceVariable(1.0)
1575      tape.watch(x)
1576    with backprop.GradientTape(persistent=False) as tape:
1577      z = resource_variable_ops.ResourceVariable(2.0)
1578      tape.watch(z)
1579    tape.gradient(z, z)
1580    self.assertEqual((z,), tape.watched_variables())
1581
1582  def testWatchedVariablesRespectReset(self):
1583    with backprop.GradientTape(persistent=False) as tape:
1584      x = resource_variable_ops.ResourceVariable(1.0)
1585      tape.watch(x)
1586      self.assertEqual((x,), tape.watched_variables())
1587      tape.reset()
1588      z = resource_variable_ops.ResourceVariable(2.0)
1589      tape.watch(z)
1590      self.assertEqual((z,), tape.watched_variables())
1591    tape.gradient(z, z)
1592    self.assertEqual((z,), tape.watched_variables())
1593
1594  def testNameScope(self):
1595
1596    def fn(x):
1597      with ops.name_scope('my_scope'):
1598        a = math_ops.cos(x)
1599        b = math_ops.cos(x)
1600        return math_ops.add(a, b)
1601
1602    @function.defun
1603    def grad_fn(x):
1604      return backprop.gradients_function(fn)(x)
1605
1606    grad_ops = grad_fn.get_concrete_function(
1607        constant_op.constant(1.0)).graph.get_operations()
1608    num_sin_ops_found = 0
1609    for op in grad_ops:
1610      if op.type == 'Sin':
1611        num_sin_ops_found += 1
1612        self.assertIn('gradient_tape/my_scope/', op.name)
1613    self.assertEqual(num_sin_ops_found, 2)
1614
1615  @test_util.assert_no_new_pyobjects_executing_eagerly
1616  def testRecomputeGradWithDifferentShape(self):
1617
1618    @custom_gradient.recompute_grad
1619    def outer(x):
1620      return [x[0] + 1, x[1] + 1]
1621
1622    x = [
1623        variables.Variable([1.0, 2.0], name='a'),
1624        variables.Variable(1.0, name='b')
1625    ]
1626    with backprop.GradientTape():
1627      y = outer(x)
1628      self.assertAllEqual(y[0], [2.0, 3.0])
1629      self.assertAllEqual(y[1], 2.0)
1630
1631    @custom_gradient.recompute_grad
1632    def outer_dict(x):
1633      for key in x.keys():
1634        x[key] = x[key] + 1
1635      return x
1636
1637    x = {x[0].ref(): x[0], x[1].ref(): x[1]}
1638    with backprop.GradientTape():
1639      y = outer_dict(x)
1640      y = list(y.values())
1641      self.assertAllEqual(y[0], [2.0, 3.0])
1642      self.assertAllEqual(y[1], 2.0)
1643
1644  @test_util.assert_no_new_pyobjects_executing_eagerly
1645  def testRecomputeGradWithNestedFunctionAndWhileLoop(self):
1646
1647    @custom_gradient.recompute_grad
1648    @def_function.function
1649    def outer(x):
1650
1651      @def_function.function
1652      def middle(y):
1653
1654        @def_function.function
1655        def inner(z):
1656          return z + 1
1657
1658        i = constant_op.constant(0.0)
1659        c = lambda y, i: i < 10.
1660        b = lambda y, i: (inner(y), i + 1.0)
1661        y, i = control_flow_ops.while_loop(c, b, [y, i])
1662
1663        return y
1664
1665      return middle(x)
1666
1667    with MemoryChecker() as memory_checker:
1668      for _ in range(5):
1669        x = variables.Variable(1.0, name='x')
1670        with backprop.GradientTape():
1671          y = outer(x)
1672          self.assertAllEqual(y, 11.0)
1673
1674    memory_checker.report()
1675    memory_checker.assert_no_leak_if_all_possibly_except_one()
1676
1677
1678class JacobianTest(test.TestCase):
1679
1680  def _jacobian(self, experimental_use_pfor):
1681    persistent = context.executing_eagerly and not experimental_use_pfor
1682    with backprop.GradientTape(persistent=persistent) as g:
1683      x = constant_op.constant([1., 2.])
1684      y = constant_op.constant([3., 4.])
1685      g.watch(x)
1686      g.watch(y)
1687      z = x * x * y
1688    jacobian = g.jacobian(
1689        z, [x, y], experimental_use_pfor=experimental_use_pfor)
1690    answer = [array_ops.diag(2 * x * y), array_ops.diag(x * x)]
1691    return jacobian, answer
1692
1693  @test_util.run_v1_only('b/120545219')
1694  def testPfor(self):
1695    jacobian, answer = self._jacobian(experimental_use_pfor=True)
1696    for j, a in zip(jacobian, answer):
1697      self.assertAllEqual(a, j)
1698
1699  @test_util.run_v1_only('b/120545219')
1700  def testWhileLoop(self):
1701    jacobian, answer = self._jacobian(experimental_use_pfor=False)
1702    for j, a in zip(jacobian, answer):
1703      self.assertAllEqual(a, j)
1704
1705  @test_util.run_v1_only('b/120545219')
1706  def testPforDefun(self):
1707
1708    @function.defun
1709    def _f():
1710      return self._jacobian(experimental_use_pfor=True)
1711
1712    jacobian, answer = _f()
1713    for j, a in zip(jacobian, answer):
1714      self.assertAllEqual(a, j)
1715
1716  @test_util.run_v1_only('b/120545219')
1717  def testWhileLoopDefun(self):
1718
1719    @function.defun
1720    def _f():
1721      return self._jacobian(experimental_use_pfor=False)
1722
1723    jacobian, answer = _f()
1724    for j, a in zip(jacobian, answer):
1725      self.assertAllEqual(a, j)
1726
1727  @test_util.run_v1_only('b/120545219')
1728  def testPersistentTape(self):
1729    if not context.executing_eagerly():
1730      return
1731    with backprop.GradientTape() as g:
1732      x = constant_op.constant([1.0, 2.0])
1733      g.watch(x)
1734      y = x * x
1735    with self.assertRaisesRegex(RuntimeError, 'persistent'):
1736      g.jacobian(y, x, experimental_use_pfor=False)
1737
1738  @test_util.run_v1_only('b/120545219')
1739  def test_parallel_iterations(self):
1740    with backprop.GradientTape(persistent=True) as g:
1741      x = constant_op.constant([[1., 2], [3, 4]])
1742      g.watch(x)
1743      y = math_ops.matmul(x, x)
1744    self.assertAllClose(
1745        g.jacobian(y, x, parallel_iterations=2),
1746        g.jacobian(y, x, parallel_iterations=3))
1747
1748  @test_util.run_in_graph_and_eager_modes
1749  def test_nested_jacobian(self):
1750    if context.executing_eagerly():
1751      # TODO(agarwal): b/128842926
1752      self.skipTest('Conversion of function calls not implemented yet.')
1753    x = array_ops.ones((10, 2))
1754    with backprop.GradientTape(persistent=False) as g:
1755      g.watch(x)
1756      with backprop.GradientTape(persistent=False) as gg:
1757        gg.watch(x)
1758        y = math_ops.reduce_sum(math_ops.square(x))
1759      dy_x = gg.jacobian(y, x)
1760    dy_xx = g.batch_jacobian(dy_x, x)
1761    dy_xx_answer = [[[2., 0], [0, 2.]]] * 10
1762    self.assertAllClose(dy_xx_answer, self.evaluate(dy_xx))
1763
1764  def test_nested_batch_jacobian_foldl(self):
1765    def _grad(f):
1766      def _grad_function(primal):
1767        with backprop.GradientTape() as tape:
1768          tape.watch(primal)
1769          primal_out = f(primal)
1770        return tape.batch_jacobian(primal_out, primal)
1771      return _grad_function
1772
1773    def _func(x):
1774      return array_ops.reshape(
1775          functional_ops.foldl_v2(lambda a, b: math_ops.cos(a + b),
1776                                  array_ops.transpose(x)),
1777          [1, 1])
1778
1779    f = _func
1780    x = constant_op.constant([[1., 2.]])
1781    for _ in range(2):
1782      theoretical, numerical = gradient_checker_v2.compute_gradient(f, [x])
1783      self.assertAllClose(theoretical, numerical, rtol=1e-3)
1784      f = _grad(f)
1785      expected_flat = array_ops.reshape(numerical, [-1])
1786      self.assertAllClose(expected_flat,
1787                          array_ops.reshape(f(x), [-1]),
1788                          rtol=1e-3)
1789      self.assertAllClose(expected_flat,
1790                          array_ops.reshape(def_function.function(f)(x), [-1]),
1791                          rtol=1e-3)
1792
1793  def test_grad_jacobian_conv(self):
1794    def _inner(x):
1795      kernel = array_ops.ones([3, 3, 1, 9])
1796      with backprop.GradientTape() as tape:
1797        tape.watch(x)
1798        y = nn_ops.conv2d(x, kernel, strides=(1, 1), padding='SAME',
1799                          data_format='NHWC')
1800        reduced = math_ops.reduce_sum(y ** 2., axis=[2, 3])
1801      return math_ops.reduce_sum(tape.batch_jacobian(reduced, x))
1802
1803    theoretical, numerical = gradient_checker_v2.compute_gradient(
1804        def_function.function(_inner), [array_ops.ones([10, 4, 4, 1])])
1805    self.assertAllClose(numerical, theoretical, rtol=1e-1)
1806
1807    @def_function.function
1808    def _outer():
1809      with backprop.GradientTape() as tape:
1810        x = array_ops.ones([10, 4, 4, 1])
1811        tape.watch(x)
1812        y = _inner(x)
1813      return tape.gradient(y, x)
1814
1815    self.assertAllClose(array_ops.reshape(numerical, [-1]),
1816                        array_ops.reshape(_outer(), [-1]), rtol=1e-1)
1817
1818  @test_util.run_in_graph_and_eager_modes
1819  def test_indexed_slices(self):
1820    with backprop.GradientTape(persistent=True) as g:
1821      inp = random_ops.random_uniform([3, 2])
1822      g.watch(inp)
1823      output = nn.embedding_lookup(inp, [0, 2])
1824    self.assertAllClose(
1825        g.jacobian(output, inp, experimental_use_pfor=True),
1826        g.jacobian(output, inp, experimental_use_pfor=False))
1827
1828  def test_foldl_partial_function(self):
1829    x = array_ops.zeros([3])
1830    with backprop.GradientTape(persistent=True) as tape:
1831      tape.watch(x)
1832      result = def_function.function(
1833          functools.partial(functional_ops.foldl_v2, lambda a, b: a + b))(
1834              x)
1835    self.assertAllClose([1., 1., 1.],
1836                        tape.jacobian(result, x, experimental_use_pfor=True))
1837    self.assertAllClose([1., 1., 1.],
1838                        tape.jacobian(result, x, experimental_use_pfor=False))
1839
1840    # Non-persistent tapes take a different function gradient path, but also
1841    # work with pfor=True.
1842    x = array_ops.zeros([3])
1843    with backprop.GradientTape() as tape:
1844      tape.watch(x)
1845      result = def_function.function(
1846          functools.partial(functional_ops.foldl_v2, lambda a, b: a + b))(
1847              x)
1848    self.assertAllClose([1., 1., 1.],
1849                        tape.jacobian(result, x, experimental_use_pfor=True))
1850
1851  def test_foldl_pure_function(self):
1852
1853    @def_function.function
1854    def compute_jacobian(use_pfor):
1855      x = array_ops.zeros([3])
1856      with backprop.GradientTape(persistent=True) as tape:
1857        tape.watch(x)
1858        result = functools.partial(functional_ops.foldl_v2, lambda a, b: a + b)(
1859            x)
1860      return tape.jacobian(result, x, experimental_use_pfor=use_pfor)
1861
1862    self.assertAllClose(compute_jacobian(use_pfor=True),
1863                        compute_jacobian(use_pfor=False))
1864
1865  def test_cond_func_grad_jacobian(self):
1866
1867    @def_function.function
1868    def f(x):
1869      y = control_flow_ops.cond(x > 0., lambda: x**3., lambda: x**2.)
1870      return y
1871
1872    with backprop.GradientTape(persistent=True) as tape:
1873      x = constant_op.constant(1.)
1874      tape.watch(x)
1875      y = f(x)
1876      grad = tape.gradient(y, x)
1877    self.assertAllClose(3., grad)
1878    jacobian = tape.jacobian(grad, x, experimental_use_pfor=False)
1879    self.assertAllClose(6., jacobian)
1880    jacobian_pfor = tape.jacobian(grad, x, experimental_use_pfor=True)
1881    self.assertAllClose(6., jacobian_pfor)
1882
1883
1884@test_util.run_all_in_graph_and_eager_modes
1885class BatchJacobianTest(test.TestCase, parameterized.TestCase):
1886
1887  def _batch_jacobian(self, experimental_use_pfor):
1888    persistent = context.executing_eagerly and not experimental_use_pfor
1889    with backprop.GradientTape(persistent=persistent) as g:
1890      x = constant_op.constant([[1., 2.], [3., 4.]])
1891      y = constant_op.constant([[3., 4.], [5., 6.]])
1892      g.watch(x)
1893      z = x * x * y
1894    batch_jacobian = g.batch_jacobian(
1895        z, x, experimental_use_pfor=experimental_use_pfor)
1896    answer = array_ops.stack(
1897        [array_ops.diag(2 * x[0] * y[0]),
1898         array_ops.diag(2 * x[1] * y[1])])
1899    return batch_jacobian, answer
1900
1901  def testPfor(self):
1902    batch_jacobian, answer = self._batch_jacobian(experimental_use_pfor=True)
1903    self.assertAllEqual(answer, batch_jacobian)
1904
1905  def testWhileLoop(self):
1906    batch_jacobian, answer = self._batch_jacobian(experimental_use_pfor=False)
1907    self.assertAllEqual(answer, batch_jacobian)
1908
1909  def testPforDefun(self):
1910
1911    @function.defun
1912    def _f():
1913      return self._batch_jacobian(experimental_use_pfor=True)
1914
1915    batch_jacobian, answer = _f()
1916    self.assertAllEqual(answer, batch_jacobian)
1917
1918  def testWhileLoopDefun(self):
1919
1920    @function.defun
1921    def _f():
1922      return self._batch_jacobian(experimental_use_pfor=False)
1923
1924    batch_jacobian, answer = _f()
1925    self.assertAllEqual(answer, batch_jacobian)
1926
1927  def testPersistentTape(self):
1928    if not context.executing_eagerly():
1929      return
1930    with backprop.GradientTape() as g:
1931      x = constant_op.constant([[1.0, 2.0]])
1932      g.watch(x)
1933      y = x * x
1934    with self.assertRaisesRegex(RuntimeError, 'persistent'):
1935      g.batch_jacobian(y, x, experimental_use_pfor=False)
1936
1937  def testBadShape(self):
1938    x = random_ops.random_uniform([2, 3])
1939    with backprop.GradientTape() as g:
1940      y = array_ops.concat([x, x], axis=0)
1941    with self.assertRaisesRegex(ValueError, 'Need first dimension'):
1942      g.batch_jacobian(y, x)
1943
1944  def testBadInputRank(self):
1945    x = random_ops.random_uniform([2])
1946    with backprop.GradientTape() as g:
1947      y = random_ops.random_uniform([2, 2])
1948    with self.assertRaisesRegex(ValueError, 'must have rank at least 2'):
1949      g.batch_jacobian(y, x)
1950
1951  def testBadOutputRank(self):
1952    x = random_ops.random_uniform([2, 2])
1953    with backprop.GradientTape() as g:
1954      y = random_ops.random_uniform([2])
1955    with self.assertRaisesRegex(ValueError, 'must have rank at least 2'):
1956      g.batch_jacobian(y, x)
1957
1958  def test_parallel_iterations(self):
1959    with backprop.GradientTape(persistent=True) as g:
1960      x = constant_op.constant([[1., 2], [3, 4]])
1961      g.watch(x)
1962      w = constant_op.constant([[1., 2, 3, 4], [5, 6, 7, 8]])
1963      y = math_ops.matmul(x, w)
1964    self.assertAllClose(
1965        g.batch_jacobian(y, x, parallel_iterations=2),
1966        g.batch_jacobian(y, x, parallel_iterations=3))
1967
1968  @parameterized.parameters((True, True), (True, False), (False, True),
1969                            (False, False))
1970  def test_degenerate_shape(self, use_function, use_pfor):
1971
1972    def f(x):
1973      with backprop.GradientTape(persistent=True) as tape:
1974        tape.watch(x)
1975        y = x**2
1976      return tape.batch_jacobian(y, x, experimental_use_pfor=use_pfor)
1977
1978    if use_function:
1979      f = def_function.function(f)
1980    self.assertAllEqual([1, 0, 0], array_ops.shape(f(array_ops.zeros([1, 0]))))
1981
1982  @parameterized.parameters((True,), (False))
1983  def test_zeros_type_correct(self, use_pfor):
1984    for dtype in [dtypes.float32, dtypes.float64]:
1985      @def_function.function
1986      def f(x):
1987        del x
1988        return constant_op.constant([[1.]], dtype=dtype)  # pylint: disable=cell-var-from-loop
1989
1990      with backprop.GradientTape(persistent=True) as tape:
1991        x = constant_op.constant([[2.]], dtype=dtype)
1992        tape.watch(x)
1993        y = f(x)
1994      jac = tape.batch_jacobian(y, x, experimental_use_pfor=use_pfor)
1995      self.assertEqual(dtype, jac.dtype)
1996      self.assertAllClose([[[0.]]], jac)
1997
1998      with backprop.GradientTape(persistent=True) as tape:
1999        x = constant_op.constant([[2.]], dtype=dtype)
2000        tape.watch(x)
2001        y = f(x)
2002      jac = tape.batch_jacobian(y, x, unconnected_gradients='zero',
2003                                experimental_use_pfor=use_pfor)
2004      self.assertEqual(dtype, jac.dtype)
2005      self.assertAllClose([[[0.]]], jac)
2006
2007
2008class AggregateIndexedSlicesGradientsTest(test_util.TensorFlowTestCase):
2009
2010  def _assert_indexed_slices_equal(self, left, right):
2011    self.assertAllEqual(
2012        self.evaluate(ops.convert_to_tensor(left)),
2013        self.evaluate(ops.convert_to_tensor(right)))
2014
2015  def testNoGradients(self):
2016    self.assertIsNone(backprop.aggregate_indexed_slices_gradients([]))
2017
2018  def testOneGradient(self):
2019    t = math_ops._as_indexed_slices(
2020        constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
2021    result = backprop.aggregate_indexed_slices_gradients([t])
2022    self._assert_indexed_slices_equal(t, result)
2023
2024  def testMultipleGradients(self):
2025    t0 = math_ops._as_indexed_slices(
2026        constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
2027    t1 = math_ops._as_indexed_slices(
2028        constant_op.constant([[0., 0.], [5, 6], [7., 8.]]))
2029    total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]])
2030    result = backprop.aggregate_indexed_slices_gradients([t0, t1])
2031    self._assert_indexed_slices_equal(total, result)
2032
2033  def testMultipleGradientsWithNones(self):
2034    t0 = math_ops._as_indexed_slices(
2035        constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
2036    t1 = math_ops._as_indexed_slices(
2037        constant_op.constant([[0., 0.], [5, 6], [7., 8.]]))
2038    t3 = None
2039    total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]])
2040    result = backprop.aggregate_indexed_slices_gradients([t0, t1, t3])
2041    self._assert_indexed_slices_equal(total, result)
2042
2043  def testMixedTensorAndIndexedSlices(self):
2044    t0 = math_ops._as_indexed_slices(
2045        constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
2046    t1 = constant_op.constant([[0., 0.], [5, 6], [7., 8.]])
2047    total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]])
2048    result = backprop.aggregate_indexed_slices_gradients([t0, t1])
2049    self._assert_indexed_slices_equal(total, result)
2050
2051
2052if __name__ == '__main__':
2053  test.main()
2054