1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Tests for Grappler LayoutOptimizer."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import numpy as np
22
23from tensorflow.core.protobuf import config_pb2
24from tensorflow.core.protobuf import device_properties_pb2
25from tensorflow.core.protobuf import rewriter_config_pb2
26from tensorflow.core.protobuf import saver_pb2
27from tensorflow.python.client import session
28from tensorflow.python.framework import constant_op
29from tensorflow.python.framework import dtypes
30from tensorflow.python.framework import ops
31from tensorflow.python.framework import random_seed
32from tensorflow.python.framework import test_util
33from tensorflow.python.grappler import cluster as gcluster
34from tensorflow.python.grappler import tf_optimizer
35from tensorflow.python.layers import convolutional as conv_layers
36from tensorflow.python.ops import array_ops
37from tensorflow.python.ops import gen_array_ops
38from tensorflow.python.ops import gen_math_ops
39from tensorflow.python.ops import gen_nn_ops
40from tensorflow.python.ops import map_fn
41from tensorflow.python.ops import math_ops
42from tensorflow.python.ops import nn
43from tensorflow.python.ops import random_ops
44from tensorflow.python.ops import state_ops
45from tensorflow.python.ops import variables
46from tensorflow.python.platform import test
47from tensorflow.python.training import gradient_descent
48from tensorflow.python.training import saver as saver_lib
49
50
51def _weight(shape):
52  """Generates a weight of a given shape."""
53  return random_ops.truncated_normal(shape, seed=0, stddev=0.1)
54
55
56def _bias(shape):
57  """Generates a bias of a given shape."""
58  return constant_op.constant(0.1, shape=shape)
59
60
61def _conv2d(x, w):
62  """Returns a 2d convolution layer with full stride."""
63  return nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
64
65
66def _max_pool_2x2(x):
67  """Downsamples a feature map by 2X."""
68  return nn.max_pool(
69      x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
70
71
72# Taken from tensorflow/examples/tutorials/mnist/mnist_deep.py
73def _two_layer_model(x):
74  x_image = array_ops.reshape(x, [-1, 28, 28, 1])
75  w_conv1 = _weight([5, 5, 1, 32])
76  b_conv1 = _bias([32])
77  h_conv1 = nn.relu(_conv2d(x_image, w_conv1) + b_conv1)
78  h_pool1 = _max_pool_2x2(h_conv1)
79  w_conv2 = _weight([5, 5, 32, 64])
80  b_conv2 = _bias([64])
81  h_conv2 = nn.relu(_conv2d(h_pool1, w_conv2) + b_conv2)
82  h_pool2 = _max_pool_2x2(h_conv2)
83  return h_pool2
84
85
86def _model_with_second_port():
87  random_seed.set_random_seed(0)
88  x = random_ops.truncated_normal([2, 5, 5, 4], seed=0)
89  scale = constant_op.constant(0.1, shape=[4])
90  offset = constant_op.constant(0.3, shape=[4])
91  y, mean, _ = nn.fused_batch_norm(x, scale, offset)
92  mul = math_ops.add(y, mean)
93  output = array_ops.identity(mul)
94  return output
95
96
97def _model_with_branch(x):
98  x_image = array_ops.reshape(x, [-1, 28, 28, 1])
99  w_conv1 = _weight([5, 5, 1, 32])
100  w_conv2 = _weight([5, 5, 1, 32])
101  c_conv1 = _conv2d(x_image, w_conv1)
102  c_conv2 = _conv2d(x_image, w_conv2)
103  add = math_ops.add(c_conv1, c_conv2)
104  return add
105
106
107def _model_with_vec_and_4d(x):
108  x_image = array_ops.reshape(x, [-1, 28, 28, 1])
109  w_conv1 = _weight([5, 5, 1, 32])
110  c_conv1 = _conv2d(x_image, w_conv1)
111  vector = constant_op.constant(6.4, shape=[32])
112  add = math_ops.add(c_conv1, vector)
113  return add
114
115
116def _loop():
117  random_seed.set_random_seed(0)
118  x1 = random_ops.truncated_normal([1, 784], seed=0)
119  x2 = random_ops.truncated_normal([1, 784], seed=0)
120  x3 = random_ops.truncated_normal([1, 784], seed=0)
121  x4 = random_ops.truncated_normal([1, 784], seed=0)
122  elems = (x1, x2, x3, x4)
123  outputs = map_fn.map_fn(_two_layer_model, elems, dtype=dtypes.float32)
124  return outputs
125
126
127def _loop_with_branch():
128  random_seed.set_random_seed(0)
129  x1 = random_ops.truncated_normal([1, 784], seed=0)
130  x2 = random_ops.truncated_normal([1, 784], seed=0)
131  x3 = random_ops.truncated_normal([1, 784], seed=0)
132  x4 = random_ops.truncated_normal([1, 784], seed=0)
133  elems = (x1, x2, x3, x4)
134  outputs = map_fn.map_fn(_model_with_branch, elems, dtype=dtypes.float32)
135  return outputs
136
137
138def _loop_with_vec_and_4d():
139  random_seed.set_random_seed(0)
140  x1 = random_ops.truncated_normal([1, 784], seed=0)
141  x2 = random_ops.truncated_normal([1, 784], seed=0)
142  x3 = random_ops.truncated_normal([1, 784], seed=0)
143  x4 = random_ops.truncated_normal([1, 784], seed=0)
144  elems = (x1, x2, x3, x4)
145  outputs = map_fn.map_fn(_model_with_vec_and_4d, elems, dtype=dtypes.float32)
146  return outputs
147
148
149def _get_config(layout_optimizer=True):
150  if layout_optimizer:
151    rewrite_options = rewriter_config_pb2.RewriterConfig(
152        layout_optimizer=rewriter_config_pb2.RewriterConfig.ON,
153        # do not remove duplicated nodes
154        arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF)
155  else:
156    rewrite_options = rewriter_config_pb2.RewriterConfig(
157        layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF,
158        # do not remove duplicated nodes
159        arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF)
160  rewrite_options.min_graph_nodes = -1
161  graph_options = config_pb2.GraphOptions(
162      rewrite_options=rewrite_options, build_cost_model=1)
163  config = config_pb2.ConfigProto(graph_options=graph_options)
164  config.graph_options.optimizer_options.opt_level = -1
165  return config
166
167
168def _simple_metagraph(depthwise=False):
169  random_seed.set_random_seed(0)
170  x = variables.Variable(random_ops.truncated_normal([1, 200, 200, 3], seed=0))
171  conv = conv_layers.separable_conv2d if depthwise else conv_layers.conv2d
172  y = conv(x, 32, [3, 3])
173  z = conv(y, 32, [3, 3])
174  optimizer = gradient_descent.GradientDescentOptimizer(1e-4)
175  loss = math_ops.reduce_mean(z)
176  train_op = optimizer.minimize(loss)
177  graph = ops.get_default_graph()
178  graph.add_to_collection('train_op', train_op)
179  meta_graph = saver_lib.export_meta_graph(graph_def=graph.as_graph_def())
180  return meta_graph
181
182
183def _get_cluster():
184  named_device = device_properties_pb2.NamedDevice()
185  named_device.name = '/GPU:0'
186  named_device.properties.type = 'GPU'
187  named_device.properties.num_cores = 24
188  named_device.properties.frequency = 1000
189  named_device.properties.environment['architecture'] = '4'
190  cluster = gcluster.Cluster(devices=[named_device])
191  return cluster
192
193
194def _is_transpose(node):
195  return node.endswith('TransposeNHWCToNCHW-LayoutOptimizer') or node.endswith(
196      'TransposeNCHWToNHWC-LayoutOptimizer')
197
198
199def _is_permute(node):
200  return node.endswith('VecPermuteNHWCToNCHW-LayoutOptimizer') or node.endswith(
201      'VecPermuteNCHWToNHWC-LayoutOptimizer')
202
203
204class LayoutOptimizerTest(test.TestCase):
205  """Tests the Grappler layout optimizer."""
206
207  def _assert_trans_nchw_to_nhwc(self, name, nodes):
208    self.assertIn(name + '-TransposeNCHWToNHWC-LayoutOptimizer', nodes)
209
210  def _assert_trans_nhwc_to_nchw(self, name, nodes):
211    self.assertIn(name + '-TransposeNHWCToNCHW-LayoutOptimizer', nodes)
212
213  def _assert_map_nhwc_to_nchw(self, name, nodes):
214    self.assertIn(name + '-DimMapNHWCToNCHW-LayoutOptimizer', nodes)
215
216  def _assert_vec_nchw_to_nhwc(self, name, nodes):
217    self.assertIn(name + '-VecPermuteNCHWToNHWC-LayoutOptimizer', nodes)
218
219  def _assert_vec_nhwc_to_nchw(self, name, nodes):
220    self.assertIn(name + '-VecPermuteNHWCToNCHW-LayoutOptimizer', nodes)
221
222  def _train(self, checkpoint_path, layout_optimizer=False, restore=False):
223    ops.reset_default_graph()
224    graph = ops.get_default_graph()
225    with session.Session(
226        config=_get_config(layout_optimizer), graph=graph) as sess:
227      batch = 2
228      height = 6
229      width = 7
230      input_channels = 3
231      shape = [batch, height, width, input_channels]
232      image = array_ops.placeholder(dtype='float32', shape=shape)
233      conv1 = conv_layers.conv2d(image, 32, [3, 3])
234      conv2 = conv_layers.conv2d(conv1, 32, [3, 3])
235      optimizer = gradient_descent.GradientDescentOptimizer(0.01)
236      loss = math_ops.reduce_mean(conv2)
237      train_op = optimizer.minimize(loss)
238      saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)
239
240      if restore:
241        saver.restore(sess, checkpoint_path)
242      else:
243        self.evaluate(variables.global_variables_initializer())
244
245      np.random.seed(0)
246      for _ in range(2):
247        image_val = np.random.rand(*shape).astype(np.float32)
248        sess.run([loss, train_op], feed_dict={image: image_val})
249
250      if restore:
251        all_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
252        all_vars_values = [var.eval(session=sess) for var in all_vars]
253        return all_vars_values
254      else:
255        saver.save(sess, checkpoint_path)
256
257  @test_util.deprecated_graph_mode_only
258  def testTwoConvLayers(self):
259    if test.is_gpu_available(cuda_only=True):
260      random_seed.set_random_seed(0)
261      x = random_ops.truncated_normal([1, 784], seed=0)
262      output = _two_layer_model(x)
263
264      with session.Session(config=_get_config(False)) as sess:
265        output_val_ref = self.evaluate(output)
266
267      with session.Session(config=_get_config()) as sess:
268        metadata = config_pb2.RunMetadata()
269        output_val = sess.run(output, run_metadata=metadata)
270
271      nodes = []
272      num_transposes = 0
273      for node in metadata.cost_graph.node:
274        if _is_transpose(node.name):
275          num_transposes += 1
276        nodes.append(node.name)
277
278      # Four transposes were initially added in the Expand phase of
279      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
280      expected_num_transposes = 2
281      self.assertEqual(expected_num_transposes, num_transposes)
282      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
283      self._assert_trans_nchw_to_nhwc('Relu_1-0-0', nodes)
284
285      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
286
287  @test_util.deprecated_graph_mode_only
288  def testSplitWithNonConstAxis(self):
289    if test.is_gpu_available(cuda_only=True):
290      random_seed.set_random_seed(0)
291      x = random_ops.truncated_normal([1, 784], seed=0)
292      conv = _two_layer_model(x)
293      dim = array_ops.placeholder(dtype='int32')
294      split = array_ops.split(conv, 2, axis=dim)
295      scale = constant_op.constant(0.1, shape=[32])
296      offset = constant_op.constant(0.3, shape=[32])
297      bn0 = nn.fused_batch_norm(split[0], scale, offset)
298      bn1 = nn.fused_batch_norm(split[1], scale, offset)
299      add = bn0[0] + bn1[0]
300      output = array_ops.identity(add)
301
302      with session.Session(config=_get_config(False)) as sess:
303        output_val_ref = sess.run(output, feed_dict={dim: 3})
304
305      with session.Session(config=_get_config()) as sess:
306        metadata = config_pb2.RunMetadata()
307        output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3})
308
309      nodes = []
310      num_transposes = 0
311      for node in metadata.cost_graph.node:
312        if _is_transpose(node.name):
313          num_transposes += 1
314        nodes.append(node.name)
315
316      expected_num_transposes = 2
317      self.assertEqual(expected_num_transposes, num_transposes)
318      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
319      self._assert_trans_nchw_to_nhwc('add_2-0-0', nodes)
320      self._assert_map_nhwc_to_nchw('split-0', nodes)
321      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
322
323  @test_util.deprecated_graph_mode_only
324  def testSplitVWithNonConstAxis(self):
325    if test.is_gpu_available(cuda_only=True):
326      random_seed.set_random_seed(0)
327      x = random_ops.truncated_normal([1, 784], seed=0)
328      conv = _two_layer_model(x)
329      dim = array_ops.placeholder(dtype='int32')
330      sizes = constant_op.constant([50, 10, 4], shape=[3])
331      split = gen_array_ops.split_v(
332          value=conv, size_splits=sizes, axis=dim, num_split=3)
333      output = math_ops.reduce_sum(split[0])
334
335      with session.Session(config=_get_config(False)) as sess:
336        output_val_ref = sess.run(output, feed_dict={dim: 3})
337
338      with session.Session(config=_get_config()) as sess:
339        metadata = config_pb2.RunMetadata()
340        output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3})
341
342      nodes = []
343      num_transposes = 0
344      for node in metadata.cost_graph.node:
345        if _is_transpose(node.name):
346          num_transposes += 1
347        nodes.append(node.name)
348
349      # Four transposes were initially added in the Expand phase of
350      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
351      expected_num_transposes = 2
352      self.assertEqual(expected_num_transposes, num_transposes)
353      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
354      self._assert_trans_nchw_to_nhwc('SplitV-0-0', nodes)
355      self._assert_map_nhwc_to_nchw('SplitV-2', nodes)
356      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
357
358  @test_util.deprecated_graph_mode_only
359  def testPadWithConstPaddings(self):
360    if test.is_gpu_available(cuda_only=True):
361      random_seed.set_random_seed(0)
362      x = random_ops.truncated_normal([1, 784], seed=0)
363      conv = _two_layer_model(x)
364      paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]]
365      paddings = constant_op.constant(
366          paddings_val, dtype='int32', name='PaddingsConst')
367      pad = array_ops.pad(conv, paddings)
368      output = array_ops.identity(pad)
369
370      with session.Session(config=_get_config(False)) as sess:
371        output_val_ref = self.evaluate(output)
372
373      with session.Session(config=_get_config()) as sess:
374        metadata = config_pb2.RunMetadata()
375        output_val = sess.run(output, run_metadata=metadata)
376
377      nodes = []
378      num_transposes = 0
379      for node in metadata.cost_graph.node:
380        if _is_transpose(node.name):
381          num_transposes += 1
382        nodes.append(node.name)
383
384      # Four transposes were initially added in the Expand phase of
385      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
386      expected_num_transposes = 2
387      self.assertEqual(expected_num_transposes, num_transposes)
388      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
389      self._assert_trans_nchw_to_nhwc('Pad-0-0', nodes)
390      self.assertIn('Pad-1-LayoutOptimizer', nodes)
391      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
392
393  @test_util.deprecated_graph_mode_only
394  def testReduceSum(self):
395    if test.is_gpu_available(cuda_only=True):
396      random_seed.set_random_seed(0)
397      x = random_ops.truncated_normal([1, 784], seed=0)
398      conv = _two_layer_model(x)
399      reduce_sum = math_ops.reduce_sum(conv)
400      output = array_ops.identity(reduce_sum)
401
402      with session.Session(config=_get_config(False)) as sess:
403        output_val_ref = self.evaluate(output)
404
405      with session.Session(config=_get_config()) as sess:
406        metadata = config_pb2.RunMetadata()
407        output_val = sess.run(output, run_metadata=metadata)
408
409      nodes = []
410      num_transposes = 0
411      for node in metadata.cost_graph.node:
412        if _is_transpose(node.name):
413          num_transposes += 1
414        nodes.append(node.name)
415
416      # Three transposes were initially added in the Expand phase of
417      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
418      expected_num_transposes = 1
419      self.assertEqual(expected_num_transposes, num_transposes)
420      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
421      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
422
423  @test_util.deprecated_graph_mode_only
424  def testCast(self):
425    if test.is_gpu_available(cuda_only=True):
426      random_seed.set_random_seed(0)
427      x = random_ops.truncated_normal([1, 784], seed=0)
428      conv = _two_layer_model(x)
429      cast = math_ops.cast(conv, dtype='bool')
430      output = array_ops.identity(cast)
431
432      with session.Session(config=_get_config(False)) as sess:
433        output_val_ref = self.evaluate(output)
434
435      with session.Session(config=_get_config()) as sess:
436        metadata = config_pb2.RunMetadata()
437        output_val = sess.run(output, run_metadata=metadata)
438
439      nodes = []
440      num_transposes = 0
441      for node in metadata.cost_graph.node:
442        if _is_transpose(node.name):
443          num_transposes += 1
444        nodes.append(node.name)
445
446      # Four transposes were initially added in the Expand phase of
447      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
448      expected_num_transposes = 2
449      self.assertEqual(expected_num_transposes, num_transposes)
450      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
451      self._assert_trans_nchw_to_nhwc('Cast-0-0', nodes)
452      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
453
454  @test_util.deprecated_graph_mode_only
455  def testSqueeze(self):
456    if test.is_gpu_available(cuda_only=True):
457      random_seed.set_random_seed(0)
458      x = random_ops.truncated_normal([1, 784], seed=0)
459      conv = _two_layer_model(x)
460      reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2])
461      squeeze = array_ops.squeeze(reduce_sum)
462      output = array_ops.identity(squeeze)
463
464      with session.Session(config=_get_config(False)) as sess:
465        output_val_ref = self.evaluate(output)
466
467      with session.Session(config=_get_config()) as sess:
468        metadata = config_pb2.RunMetadata()
469        output_val = sess.run(output, run_metadata=metadata)
470
471      nodes = []
472      num_transposes = 0
473      for node in metadata.cost_graph.node:
474        if _is_transpose(node.name):
475          num_transposes += 1
476        nodes.append(node.name)
477
478      # Three transposes were initially added in the Expand phase of
479      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
480      expected_num_transposes = 1
481      self.assertEqual(expected_num_transposes, num_transposes)
482      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
483      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
484
485  @test_util.deprecated_graph_mode_only
486  def testSqueezeAlongHW(self):
487    if test.is_gpu_available(cuda_only=True):
488      random_seed.set_random_seed(0)
489      x = random_ops.truncated_normal([1, 784], seed=0)
490      conv = _two_layer_model(x)
491      reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2], keepdims=True)
492      squeeze = array_ops.squeeze(reduce_sum, axis=[1, 2])
493      output = array_ops.identity(squeeze)
494
495      with session.Session(config=_get_config(False)) as sess:
496        output_val_ref = self.evaluate(output)
497
498      with session.Session(config=_get_config()) as sess:
499        metadata = config_pb2.RunMetadata()
500        output_val = sess.run(output, run_metadata=metadata)
501
502      nodes = []
503      num_transposes = 0
504      for node in metadata.cost_graph.node:
505        if _is_transpose(node.name):
506          num_transposes += 1
507        nodes.append(node.name)
508
509      # Three transposes were initially added in the Expand phase of
510      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
511      expected_num_transposes = 1
512      self.assertEqual(expected_num_transposes, num_transposes)
513      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
514      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
515
516  @test_util.deprecated_graph_mode_only
517  def testSqueezeAlongNHW(self):
518    if test.is_gpu_available(cuda_only=True):
519      random_seed.set_random_seed(0)
520      x = random_ops.truncated_normal([1, 784], seed=0)
521      conv = _two_layer_model(x)
522      reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2], keepdims=True)
523      squeeze = array_ops.squeeze(reduce_sum, axis=[0, 1, 2])
524      output = array_ops.identity(squeeze)
525
526      with session.Session(config=_get_config(False)) as sess:
527        output_val_ref = self.evaluate(output)
528
529      with session.Session(config=_get_config()) as sess:
530        metadata = config_pb2.RunMetadata()
531        output_val = sess.run(output, run_metadata=metadata)
532
533      nodes = []
534      num_transposes = 0
535      for node in metadata.cost_graph.node:
536        if _is_transpose(node.name):
537          num_transposes += 1
538        nodes.append(node.name)
539
540      # Three transposes were initially added in the Expand phase of
541      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
542      expected_num_transposes = 1
543      self.assertEqual(expected_num_transposes, num_transposes)
544      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
545      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
546
547  @test_util.deprecated_graph_mode_only
548  def testReduceSumAlongHWC(self):
549    if test.is_gpu_available(cuda_only=True):
550      random_seed.set_random_seed(0)
551      x = random_ops.truncated_normal([1, 784], seed=0)
552      conv = _two_layer_model(x)
553      reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2, 3])
554      output = array_ops.identity(reduce_sum)
555
556      with session.Session(config=_get_config(False)) as sess:
557        output_val_ref = self.evaluate(output)
558
559      with session.Session(config=_get_config()) as sess:
560        metadata = config_pb2.RunMetadata()
561        output_val = sess.run(output, run_metadata=metadata)
562
563      nodes = []
564      num_transposes = 0
565      for node in metadata.cost_graph.node:
566        if _is_transpose(node.name):
567          num_transposes += 1
568        nodes.append(node.name)
569
570      # Three transposes were initially added in the Expand phase of
571      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
572      expected_num_transposes = 1
573      self.assertEqual(expected_num_transposes, num_transposes)
574      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
575      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
576
577  @test_util.deprecated_graph_mode_only
578  def testReduceSumAlongNHW(self):
579    if test.is_gpu_available(cuda_only=True):
580      random_seed.set_random_seed(0)
581      x = random_ops.truncated_normal([1, 784], seed=0)
582      conv = _two_layer_model(x)
583      reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2])
584      output = array_ops.identity(reduce_sum)
585
586      with session.Session(config=_get_config(False)) as sess:
587        output_val_ref = self.evaluate(output)
588
589      with session.Session(config=_get_config()) as sess:
590        metadata = config_pb2.RunMetadata()
591        output_val = sess.run(output, run_metadata=metadata)
592
593      nodes = []
594      num_transposes = 0
595      for node in metadata.cost_graph.node:
596        if _is_transpose(node.name):
597          num_transposes += 1
598        nodes.append(node.name)
599
600      # Three transposes were initially added in the Expand phase of
601      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
602      expected_num_transposes = 1
603      self.assertEqual(expected_num_transposes, num_transposes)
604      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
605      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
606
607  @test_util.deprecated_graph_mode_only
608  def testReduceSumAlongC(self):
609    if test.is_gpu_available(cuda_only=True):
610      random_seed.set_random_seed(0)
611      x = random_ops.truncated_normal([1, 784], seed=0)
612      conv = _two_layer_model(x)
613      reduce_sum = math_ops.reduce_sum(conv, axis=[3])
614      output = array_ops.identity(reduce_sum)
615
616      with session.Session(config=_get_config(False)) as sess:
617        output_val_ref = self.evaluate(output)
618
619      with session.Session(config=_get_config()) as sess:
620        metadata = config_pb2.RunMetadata()
621        output_val = sess.run(output, run_metadata=metadata)
622
623      nodes = []
624      num_transposes = 0
625      for node in metadata.cost_graph.node:
626        if _is_transpose(node.name):
627          num_transposes += 1
628        nodes.append(node.name)
629
630      # Three transposes were initially added in the Expand phase of
631      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
632      expected_num_transposes = 1
633      self.assertEqual(expected_num_transposes, num_transposes)
634      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
635      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
636
637  @test_util.deprecated_graph_mode_only
638  def testReduceSumAlongCKeepDims(self):
639    if test.is_gpu_available(cuda_only=True):
640      random_seed.set_random_seed(0)
641      x = random_ops.truncated_normal([1, 784], seed=0)
642      conv = _two_layer_model(x)
643      reduce_sum = math_ops.reduce_sum(conv, axis=[3], keepdims=True)
644      output = array_ops.identity(reduce_sum)
645
646      with session.Session(config=_get_config(False)) as sess:
647        output_val_ref = self.evaluate(output)
648
649      with session.Session(config=_get_config()) as sess:
650        metadata = config_pb2.RunMetadata()
651        output_val = sess.run(output, run_metadata=metadata)
652
653      nodes = []
654      num_transposes = 0
655      for node in metadata.cost_graph.node:
656        if _is_transpose(node.name):
657          num_transposes += 1
658        nodes.append(node.name)
659
660      # Four transposes were initially added in the Expand phase of
661      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
662      expected_num_transposes = 2
663      self.assertEqual(expected_num_transposes, num_transposes)
664      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
665      self._assert_trans_nchw_to_nhwc('Sum-0-0', nodes)
666      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
667
668  @test_util.deprecated_graph_mode_only
669  def testReduceSumAlongHKeepDims(self):
670    if test.is_gpu_available(cuda_only=True):
671      random_seed.set_random_seed(0)
672      x = random_ops.truncated_normal([1, 784], seed=0)
673      conv = _two_layer_model(x)
674      reduce_sum = math_ops.reduce_sum(conv, axis=[2], keepdims=True)
675      output = array_ops.identity(reduce_sum)
676
677      with session.Session(config=_get_config(False)) as sess:
678        output_val_ref = self.evaluate(output)
679
680      with session.Session(config=_get_config()) as sess:
681        metadata = config_pb2.RunMetadata()
682        output_val = sess.run(output, run_metadata=metadata)
683
684      nodes = []
685      num_transposes = 0
686      for node in metadata.cost_graph.node:
687        if _is_transpose(node.name):
688          num_transposes += 1
689        nodes.append(node.name)
690
691      # Four transposes were initially added in the Expand phase of
692      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
693      expected_num_transposes = 2
694      self.assertEqual(expected_num_transposes, num_transposes)
695      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
696      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
697
698  @test_util.deprecated_graph_mode_only
699  def testReduceSumAlongWCKeepDims(self):
700    if test.is_gpu_available(cuda_only=True):
701      random_seed.set_random_seed(0)
702      x = random_ops.truncated_normal([1, 784], seed=0)
703      conv = _two_layer_model(x)
704      reduce_sum = math_ops.reduce_sum(conv, axis=[2, 3], keepdims=True)
705      output = array_ops.identity(reduce_sum)
706
707      with session.Session(config=_get_config(False)) as sess:
708        output_val_ref = self.evaluate(output)
709
710      with session.Session(config=_get_config()) as sess:
711        metadata = config_pb2.RunMetadata()
712        output_val = sess.run(output, run_metadata=metadata)
713
714      nodes = []
715      num_transposes = 0
716      for node in metadata.cost_graph.node:
717        if _is_transpose(node.name):
718          num_transposes += 1
719        nodes.append(node.name)
720
721      # Four transposes were initially added in the Expand phase of
722      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
723      expected_num_transposes = 2
724      self.assertEqual(expected_num_transposes, num_transposes)
725      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
726      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
727
728  @test_util.deprecated_graph_mode_only
729  def testConcatWithControlDependency(self):
730    if test.is_gpu_available(cuda_only=True):
731      random_seed.set_random_seed(0)
732      x = random_ops.truncated_normal([1, 784], seed=0)
733      conv = _two_layer_model(x)
734      axis = constant_op.constant(3)
735      var = variables.Variable(3)
736      assign = state_ops.assign(var, 6)
737      with ops.control_dependencies([assign]):
738        concat = array_ops.concat([conv, conv], axis)
739      output = array_ops.identity(concat)
740
741      with session.Session(config=_get_config(False)) as sess:
742        output_val_ref = self.evaluate(output)
743
744      with session.Session(config=_get_config()) as sess:
745        metadata = config_pb2.RunMetadata()
746        output_val = sess.run(output, run_metadata=metadata)
747
748      nodes = []
749      num_transposes = 0
750      for node in metadata.cost_graph.node:
751        if _is_transpose(node.name):
752          num_transposes += 1
753        nodes.append(node.name)
754
755      # Four transposes were initially added in the Expand phase of
756      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
757      expected_num_transposes = 2
758      self.assertEqual(expected_num_transposes, num_transposes)
759      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
760      self._assert_trans_nchw_to_nhwc('concat-0-0', nodes)
761      self.assertIn('concat-2-LayoutOptimizer', nodes)
762      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
763
764  @test_util.deprecated_graph_mode_only
765  def testFill(self):
766    if test.is_gpu_available(cuda_only=True):
767      random_seed.set_random_seed(0)
768      x = array_ops.placeholder(dtype='float32')
769      conv = _two_layer_model(x)
770      shape = array_ops.shape(conv)
771      scalar = array_ops.constant(5.7)
772      fill = array_ops.fill(shape, scalar)
773      output = array_ops.identity(fill)
774
775      x_val = [3.4] * 784
776      with session.Session(config=_get_config(False)) as sess:
777        output_val_ref = sess.run(output, feed_dict={x: x_val})
778
779      with session.Session(config=_get_config()) as sess:
780        metadata = config_pb2.RunMetadata()
781        output_val = sess.run(
782            output, run_metadata=metadata, feed_dict={
783                x: x_val
784            })
785
786      nodes = []
787      num_transposes = 0
788      num_vec_permute = 0
789      for node in metadata.cost_graph.node:
790        if _is_transpose(node.name):
791          num_transposes += 1
792        if _is_permute(node.name):
793          num_vec_permute += 1
794        nodes.append(node.name)
795
796      # Four transposes were initially added in the Expand phase of
797      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
798      expected_num_transposes = 2
799      self.assertEqual(expected_num_transposes, num_transposes)
800      # Two vector permute nodes were initially added in the Expand phase of
801      # LayoutOptimizer; they cancelled out each other in the Collapse phase.
802      expected_vec_permute = 0
803      self.assertEqual(expected_vec_permute, num_vec_permute)
804      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
805      self._assert_trans_nchw_to_nhwc('Fill-0-0', nodes)
806      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
807
808  @test_util.deprecated_graph_mode_only
809  def testTile(self):
810    if test.is_gpu_available(cuda_only=True):
811      random_seed.set_random_seed(0)
812      x = random_ops.truncated_normal([1, 784], seed=0)
813      conv = _two_layer_model(x)
814      multiple = array_ops.placeholder(dtype='int32')
815      tile = array_ops.tile(conv, multiple)
816      output = array_ops.identity(tile)
817
818      multiple_val = [2, 3, 4, 1]
819      with session.Session(config=_get_config(False)) as sess:
820        output_val_ref = sess.run(output, feed_dict={multiple: multiple_val})
821
822      with session.Session(config=_get_config()) as sess:
823        metadata = config_pb2.RunMetadata()
824        output_val = sess.run(
825            output, run_metadata=metadata, feed_dict={
826                multiple: multiple_val
827            })
828
829      nodes = []
830      num_transposes = 0
831      for node in metadata.cost_graph.node:
832        if _is_transpose(node.name):
833          num_transposes += 1
834        nodes.append(node.name)
835
836      # Four transposes were initially added in the Expand phase of
837      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
838      expected_num_transposes = 2
839      self.assertEqual(expected_num_transposes, num_transposes)
840      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
841      self._assert_trans_nchw_to_nhwc('Tile-0-0', nodes)
842      self._assert_vec_nhwc_to_nchw('Tile-1', nodes)
843      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
844
845  @test_util.deprecated_graph_mode_only
846  def testReverseWithConstDims(self):
847    if test.is_gpu_available(cuda_only=True):
848      random_seed.set_random_seed(0)
849      x = random_ops.truncated_normal([1, 784], seed=0)
850      conv = _two_layer_model(x)
851      dims = constant_op.constant([3, 1], name='DimsConst')
852      reverse = array_ops.reverse(conv, dims)
853      output = array_ops.identity(reverse)
854
855      with session.Session(config=_get_config(False)) as sess:
856        output_val_ref = self.evaluate(output)
857
858      with session.Session(config=_get_config()) as sess:
859        metadata = config_pb2.RunMetadata()
860        output_val = sess.run(output, run_metadata=metadata)
861
862      nodes = []
863      num_transposes = 0
864      for node in metadata.cost_graph.node:
865        if _is_transpose(node.name):
866          num_transposes += 1
867        nodes.append(node.name)
868
869      # Four transposes were initially added in the Expand phase of
870      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
871      expected_num_transposes = 2
872      self.assertEqual(expected_num_transposes, num_transposes)
873      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
874      self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes)
875      self.assertIn('ReverseV2-1-LayoutOptimizer', nodes)
876      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
877
878  @test_util.deprecated_graph_mode_only
879  def testReverseWithNonConstDims(self):
880    if test.is_gpu_available(cuda_only=True):
881      random_seed.set_random_seed(0)
882      x = random_ops.truncated_normal([1, 784], seed=0)
883      conv = _two_layer_model(x)
884      dims = array_ops.placeholder(dtype='int32')
885      reverse = array_ops.reverse(conv, dims)
886      output = array_ops.identity(reverse)
887
888      dims_val = [2, 3]
889      with session.Session(config=_get_config(False)) as sess:
890        output_val_ref = sess.run(output, feed_dict={dims: dims_val})
891
892      with session.Session(config=_get_config()) as sess:
893        metadata = config_pb2.RunMetadata()
894        output_val = sess.run(
895            output, run_metadata=metadata, feed_dict={
896                dims: dims_val
897            })
898
899      nodes = []
900      num_transposes = 0
901      for node in metadata.cost_graph.node:
902        if _is_transpose(node.name):
903          num_transposes += 1
904        nodes.append(node.name)
905
906      # Four transposes were initially added in the Expand phase of
907      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
908      expected_num_transposes = 2
909      self.assertEqual(expected_num_transposes, num_transposes)
910      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
911      self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes)
912      self._assert_map_nhwc_to_nchw('ReverseV2-1', nodes)
913      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
914
915  @test_util.deprecated_graph_mode_only
916  def testSelectOp(self):
917    if test.is_gpu_available(cuda_only=True):
918      random_seed.set_random_seed(0)
919      x = random_ops.truncated_normal([1, 784], seed=0)
920      conv = _two_layer_model(x)
921      add = math_ops.add(conv, conv)
922      mean = math_ops.reduce_mean(conv)
923      condition = math_ops.less(conv, mean)
924      select = gen_math_ops.select(condition, conv, add)
925      output = array_ops.identity(select)
926
927      with session.Session(config=_get_config(False)) as sess:
928        output_val_ref = self.evaluate(output)
929
930      with session.Session(config=_get_config()) as sess:
931        metadata = config_pb2.RunMetadata()
932        output_val = sess.run(output, run_metadata=metadata)
933
934      nodes = []
935      num_transposes = 0
936      for node in metadata.cost_graph.node:
937        if _is_transpose(node.name):
938          num_transposes += 1
939        nodes.append(node.name)
940
941      expected_num_transposes = 2
942      self.assertEqual(expected_num_transposes, num_transposes)
943      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
944      self._assert_trans_nchw_to_nhwc('Select-0-0', nodes)
945      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
946
947  @test_util.deprecated_graph_mode_only
948  def testSelectOpConditionUnknownShape(self):
949    if test.is_gpu_available(cuda_only=True):
950      random_seed.set_random_seed(0)
951      x = random_ops.truncated_normal([1, 784], seed=0)
952      conv = _two_layer_model(x)
953      add = math_ops.add(conv, conv)
954      condition = array_ops.placeholder(dtype='bool')
955      select = gen_math_ops.select(condition, conv, add)
956      output = array_ops.identity(select)
957
958      condition_val = np.zeros((1, 7, 7, 64))
959      with session.Session(config=_get_config(False)) as sess:
960        output_val_ref = sess.run(output, feed_dict={condition: condition_val})
961
962      with session.Session(config=_get_config()) as sess:
963        metadata = config_pb2.RunMetadata()
964        output_val = sess.run(
965            output, run_metadata=metadata, feed_dict={condition: condition_val})
966
967      nodes = []
968      num_transposes = 0
969      for node in metadata.cost_graph.node:
970        if _is_transpose(node.name):
971          num_transposes += 1
972        nodes.append(node.name)
973
974      expected_num_transposes = 3
975      self.assertEqual(expected_num_transposes, num_transposes)
976      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
977      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
978
979  @test_util.deprecated_graph_mode_only
980  def testSelectOpScalarCondition(self):
981    if test.is_gpu_available(cuda_only=True):
982      random_seed.set_random_seed(0)
983      x = random_ops.truncated_normal([1, 784], seed=0)
984      conv = _two_layer_model(x)
985      add = math_ops.add(conv, conv)
986      condition = constant_op.constant(True)
987      select = gen_math_ops.select(condition, conv, add)
988      output = array_ops.identity(select)
989
990      with session.Session(config=_get_config(False)) as sess:
991        output_val_ref = self.evaluate(output)
992
993      with session.Session(config=_get_config()) as sess:
994        metadata = config_pb2.RunMetadata()
995        output_val = sess.run(output, run_metadata=metadata)
996
997      nodes = []
998      num_transposes = 0
999      for node in metadata.cost_graph.node:
1000        if _is_transpose(node.name):
1001          num_transposes += 1
1002        nodes.append(node.name)
1003
1004      expected_num_transposes = 2
1005      self.assertEqual(expected_num_transposes, num_transposes)
1006      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1007      self._assert_trans_nchw_to_nhwc('Select-0-0', nodes)
1008      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1009
1010  @test_util.deprecated_graph_mode_only
1011  def testPadWithNonConstPaddings(self):
1012    if test.is_gpu_available(cuda_only=True):
1013      random_seed.set_random_seed(0)
1014      x = random_ops.truncated_normal([1, 784], seed=0)
1015      conv = _two_layer_model(x)
1016      paddings = array_ops.placeholder(dtype='int32')
1017      pad = array_ops.pad(conv, paddings)
1018      output = array_ops.identity(pad)
1019
1020      paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]]
1021      with session.Session(config=_get_config(False)) as sess:
1022        output_val_ref = sess.run(output, feed_dict={paddings: paddings_val})
1023
1024      with session.Session(config=_get_config()) as sess:
1025        metadata = config_pb2.RunMetadata()
1026        output_val = sess.run(
1027            output, run_metadata=metadata, feed_dict={
1028                paddings: paddings_val
1029            })
1030
1031      nodes = []
1032      num_transposes = 0
1033      for node in metadata.cost_graph.node:
1034        if _is_transpose(node.name):
1035          num_transposes += 1
1036        nodes.append(node.name)
1037
1038      # Four transposes were initially added in the Expand phase of
1039      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
1040      expected_num_transposes = 2
1041      self.assertEqual(expected_num_transposes, num_transposes)
1042      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1043      self._assert_trans_nchw_to_nhwc('Pad-0-0', nodes)
1044      self._assert_vec_nhwc_to_nchw('Pad-1', nodes)
1045      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1046
1047  @test_util.deprecated_graph_mode_only
1048  def testMaxPoolV2(self):
1049    if test.is_gpu_available(cuda_only=True):
1050      random_seed.set_random_seed(0)
1051      x = random_ops.truncated_normal([1, 784], seed=0)
1052      conv = _two_layer_model(x)
1053      ksize = constant_op.constant([1, 2, 3, 1], shape=[4])
1054      strides = array_ops.placeholder(dtype='int32', shape=[4])
1055      max_pool = gen_nn_ops.max_pool_v2(conv, ksize, strides, 'VALID')
1056      output = array_ops.identity(max_pool)
1057
1058      strides_val = [1, 3, 2, 1]
1059      with session.Session(config=_get_config(False)) as sess:
1060        output_val_ref = sess.run(output, feed_dict={strides: strides_val})
1061
1062      with session.Session(config=_get_config()) as sess:
1063        metadata = config_pb2.RunMetadata()
1064        output_val = sess.run(
1065            output, run_metadata=metadata, feed_dict={
1066                strides: strides_val
1067            })
1068
1069      nodes = []
1070      num_transposes = 0
1071      for node in metadata.cost_graph.node:
1072        if _is_transpose(node.name):
1073          num_transposes += 1
1074        nodes.append(node.name)
1075
1076      expected_num_transposes = 2
1077      self.assertEqual(expected_num_transposes, num_transposes)
1078      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1079      self._assert_trans_nchw_to_nhwc('MaxPoolV2-0-0', nodes)
1080      self._assert_vec_nhwc_to_nchw('MaxPoolV2-2', nodes)
1081      self.assertIn('MaxPoolV2-1-LayoutOptimizer', nodes)
1082      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1083
1084  @test_util.deprecated_graph_mode_only
1085  def testMaxPoolGradV2(self):
1086    if test.is_gpu_available(cuda_only=True):
1087      random_seed.set_random_seed(0)
1088      x = random_ops.truncated_normal([1, 784], seed=0)
1089      conv = _two_layer_model(x)
1090      ksize = constant_op.constant([1, 2, 3, 1], shape=[4])
1091      strides = array_ops.placeholder(dtype='int32', shape=[4])
1092      max_pool_grad = gen_nn_ops.max_pool_grad_v2(conv, conv, conv, ksize,
1093                                                  strides, 'VALID')
1094      output = array_ops.identity(max_pool_grad)
1095
1096      strides_val = [1, 3, 2, 1]
1097      with session.Session(config=_get_config(False)) as sess:
1098        output_val_ref = sess.run(output, feed_dict={strides: strides_val})
1099
1100      with session.Session(config=_get_config()) as sess:
1101        metadata = config_pb2.RunMetadata()
1102        output_val = sess.run(
1103            output, run_metadata=metadata, feed_dict={
1104                strides: strides_val
1105            })
1106
1107      nodes = []
1108      num_transposes = 0
1109      for node in metadata.cost_graph.node:
1110        if _is_transpose(node.name):
1111          num_transposes += 1
1112        nodes.append(node.name)
1113
1114      expected_num_transposes = 2
1115      self.assertEqual(expected_num_transposes, num_transposes)
1116      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1117      self._assert_trans_nchw_to_nhwc('MaxPoolGradV2-0-0', nodes)
1118      self._assert_vec_nhwc_to_nchw('MaxPoolGradV2-4', nodes)
1119      self.assertIn('MaxPoolGradV2-3-LayoutOptimizer', nodes)
1120      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1121
1122  @test_util.deprecated_graph_mode_only
1123  def testSliceWithNonConstAxis(self):
1124    if test.is_gpu_available(cuda_only=True):
1125      random_seed.set_random_seed(0)
1126      x = random_ops.truncated_normal([1, 784], seed=0)
1127      conv = _two_layer_model(x)
1128      size = array_ops.placeholder(dtype='int32')
1129      s = array_ops.slice(conv, [0, 0, 0, 0], size)
1130      output = array_ops.identity(s)
1131
1132      size_val = [1, 2, 3, 4]
1133      with session.Session(config=_get_config(False)) as sess:
1134        output_val_ref = sess.run(output, feed_dict={size: size_val})
1135
1136      with session.Session(config=_get_config()) as sess:
1137        metadata = config_pb2.RunMetadata()
1138        output_val = sess.run(
1139            output, run_metadata=metadata, feed_dict={
1140                size: size_val
1141            })
1142
1143      nodes = []
1144      num_transposes = 0
1145      for node in metadata.cost_graph.node:
1146        if _is_transpose(node.name):
1147          num_transposes += 1
1148        nodes.append(node.name)
1149
1150      # Four transposes were initially added in the Expand phase of
1151      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
1152      expected_num_transposes = 2
1153      self.assertEqual(expected_num_transposes, num_transposes)
1154      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1155      self._assert_trans_nchw_to_nhwc('Slice-0-0', nodes)
1156      self._assert_vec_nhwc_to_nchw('Slice-2', nodes)
1157      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1158
1159  @test_util.deprecated_graph_mode_only
1160  def testStridedSliceWithNonConstAxis(self):
1161    if test.is_gpu_available(cuda_only=True):
1162      random_seed.set_random_seed(0)
1163      x = random_ops.truncated_normal([1, 784], seed=0)
1164      conv = _two_layer_model(x)
1165      end = array_ops.placeholder(dtype='int32')
1166      s = array_ops.strided_slice(conv, [0, 0, 0, 0], end, strides=[1, 2, 3, 1])
1167      output = array_ops.identity(s)
1168
1169      end_val = [1, 2, 3, 4]
1170      with session.Session(config=_get_config(False)) as sess:
1171        output_val_ref = sess.run(output, feed_dict={end: end_val})
1172
1173      with session.Session(config=_get_config()) as sess:
1174        metadata = config_pb2.RunMetadata()
1175        output_val = sess.run(
1176            output, run_metadata=metadata, feed_dict={
1177                end: end_val
1178            })
1179
1180      nodes = []
1181      num_transposes = 0
1182      for node in metadata.cost_graph.node:
1183        if _is_transpose(node.name):
1184          num_transposes += 1
1185        nodes.append(node.name)
1186
1187      # Four transposes were initially added in the Expand phase of
1188      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
1189      expected_num_transposes = 2
1190      self.assertEqual(expected_num_transposes, num_transposes)
1191      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1192      self._assert_trans_nchw_to_nhwc('StridedSlice-0-0', nodes)
1193      self._assert_vec_nhwc_to_nchw('StridedSlice-2', nodes)
1194      self.assertIn('StridedSlice-1-LayoutOptimizer', nodes)
1195      self.assertIn('StridedSlice-3-LayoutOptimizer', nodes)
1196      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1197
1198  @test_util.deprecated_graph_mode_only
1199  def testStridedSliceWithMask1011(self):
1200    if test.is_gpu_available(cuda_only=True):
1201      random_seed.set_random_seed(0)
1202      x = random_ops.truncated_normal([1, 784], seed=0)
1203      conv = _two_layer_model(x)
1204      # This will generate a StridedSlice op with begin mask and
1205      # end mask 11(1011).
1206      s = conv[:, :, 1:-1, :]
1207      output = array_ops.identity(s)
1208
1209      with session.Session(config=_get_config(False)) as sess:
1210        output_val_ref = self.evaluate(output)
1211
1212      with session.Session(config=_get_config()) as sess:
1213        metadata = config_pb2.RunMetadata()
1214        output_val = sess.run(output, run_metadata=metadata)
1215
1216      nodes = []
1217      num_transposes = 0
1218      for node in metadata.cost_graph.node:
1219        if _is_transpose(node.name):
1220          num_transposes += 1
1221        nodes.append(node.name)
1222
1223      # Four transposes were initially added in the Expand phase of
1224      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
1225      expected_num_transposes = 2
1226      self.assertEqual(expected_num_transposes, num_transposes)
1227      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1228      self._assert_trans_nchw_to_nhwc('strided_slice-0-0', nodes)
1229      self.assertIn('strided_slice-1-LayoutOptimizer', nodes)
1230      self.assertIn('strided_slice-2-LayoutOptimizer', nodes)
1231      self.assertIn('strided_slice-3-LayoutOptimizer', nodes)
1232      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1233
1234  @test_util.deprecated_graph_mode_only
1235  def testStridedSliceWithMask0111(self):
1236    if test.is_gpu_available(cuda_only=True):
1237      random_seed.set_random_seed(0)
1238      x = random_ops.truncated_normal([1, 784], seed=0)
1239      conv = _two_layer_model(x)
1240      # This will generate a StridedSlice op with begin mask and
1241      # end mask 7(0111).
1242      s = conv[:, :, :, 1:-1]
1243      output = array_ops.identity(s)
1244
1245      with session.Session(config=_get_config(False)) as sess:
1246        output_val_ref = self.evaluate(output)
1247
1248      with session.Session(config=_get_config()) as sess:
1249        metadata = config_pb2.RunMetadata()
1250        output_val = sess.run(output, run_metadata=metadata)
1251
1252      nodes = []
1253      num_transposes = 0
1254      for node in metadata.cost_graph.node:
1255        if _is_transpose(node.name):
1256          num_transposes += 1
1257        nodes.append(node.name)
1258
1259      # Four transposes were initially added in the Expand phase of
1260      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
1261      expected_num_transposes = 2
1262      self.assertEqual(expected_num_transposes, num_transposes)
1263      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1264      self._assert_trans_nchw_to_nhwc('strided_slice-0-0', nodes)
1265      self.assertIn('strided_slice-1-LayoutOptimizer', nodes)
1266      self.assertIn('strided_slice-2-LayoutOptimizer', nodes)
1267      self.assertIn('strided_slice-3-LayoutOptimizer', nodes)
1268      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1269
1270  @test_util.deprecated_graph_mode_only
1271  def testStridedSliceGradWithNonConstAxis(self):
1272    if test.is_gpu_available(cuda_only=True):
1273      random_seed.set_random_seed(0)
1274      x = random_ops.truncated_normal([1, 784], seed=0)
1275      conv = _two_layer_model(x)
1276      end = array_ops.placeholder(dtype='int32')
1277      shape = array_ops.shape(conv)
1278      end_val = [1, 2, 3, 4]
1279      s = array_ops.strided_slice(
1280          conv, [0, 0, 0, 0], end_val, strides=[1, 2, 3, 1])
1281      s_grad = array_ops.strided_slice_grad(shape, [0, 0, 0, 0], end,
1282                                            [1, 2, 3, 1], s)
1283      output = array_ops.identity(s_grad)
1284
1285      with session.Session(config=_get_config(False)) as sess:
1286        output_val_ref = sess.run(output, feed_dict={end: end_val})
1287
1288      with session.Session(config=_get_config()) as sess:
1289        metadata = config_pb2.RunMetadata()
1290        output_val = sess.run(
1291            output, run_metadata=metadata, feed_dict={
1292                end: end_val
1293            })
1294
1295      nodes = []
1296      num_transposes = 0
1297      for node in metadata.cost_graph.node:
1298        if _is_transpose(node.name):
1299          num_transposes += 1
1300        nodes.append(node.name)
1301
1302      # Four transposes were initially added in the Expand phase of
1303      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
1304      expected_num_transposes = 2
1305      self.assertEqual(expected_num_transposes, num_transposes)
1306      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1307      self._assert_trans_nchw_to_nhwc('StridedSliceGrad-0-0', nodes)
1308      self._assert_vec_nhwc_to_nchw('StridedSliceGrad-2', nodes)
1309      self.assertIn('StridedSlice-1-LayoutOptimizer', nodes)
1310      self.assertIn('StridedSlice-2-LayoutOptimizer', nodes)
1311      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1312
1313  @test_util.deprecated_graph_mode_only
1314  def testShapeN(self):
1315    if test.is_gpu_available(cuda_only=True):
1316      x = array_ops.placeholder(dtype='float32')
1317      conv = _two_layer_model(x)
1318      shapen = array_ops.shape_n([conv, conv])
1319      output = math_ops.add(shapen[0], shapen[1])
1320
1321      x_val = [1.7] * 784
1322      with session.Session(config=_get_config(False)) as sess:
1323        output_val_ref = sess.run(output, feed_dict={x: x_val})
1324
1325      with session.Session(config=_get_config()) as sess:
1326        metadata = config_pb2.RunMetadata()
1327        output_val = sess.run(
1328            output, run_metadata=metadata, feed_dict={
1329                x: x_val
1330            })
1331
1332      nodes = []
1333      num_transposes = 0
1334      for node in metadata.cost_graph.node:
1335        if _is_transpose(node.name):
1336          num_transposes += 1
1337        nodes.append(node.name)
1338
1339      expected_num_transposes = 1
1340      self.assertEqual(expected_num_transposes, num_transposes)
1341      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1342      self._assert_vec_nchw_to_nhwc('ShapeN-0-0', nodes)
1343      self.assertAllEqual(output_val_ref, output_val)
1344
1345  @test_util.deprecated_graph_mode_only
1346  def testShapeNFollowedByNotConvertibleNodeReshape(self):
1347    if test.is_gpu_available(cuda_only=True):
1348      x = array_ops.placeholder(dtype='float32')
1349      conv = _two_layer_model(x)
1350      conv_reshape = array_ops.reshape(conv, [1, 1, 1, -1])
1351      shapen = array_ops.shape_n([conv, conv_reshape])
1352      shape = array_ops.identity(shapen[1])
1353      ones = array_ops.ones(shape)
1354      output = math_ops.add_n([conv_reshape, ones])
1355
1356      x_val = [1.7] * 784
1357      with session.Session(config=_get_config(False)) as sess:
1358        output_val_ref = sess.run(output, feed_dict={x: x_val})
1359
1360      with session.Session(config=_get_config()) as sess:
1361        metadata = config_pb2.RunMetadata()
1362        output_val = sess.run(
1363            output, run_metadata=metadata, feed_dict={x: x_val})
1364
1365      nodes = []
1366      num_transposes = 0
1367      for node in metadata.cost_graph.node:
1368        if _is_transpose(node.name):
1369          num_transposes += 1
1370        nodes.append(node.name)
1371
1372      expected_num_transposes = 2
1373      self.assertEqual(expected_num_transposes, num_transposes)
1374      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1375      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1376
1377  @test_util.deprecated_graph_mode_only
1378  def testLoop(self):
1379    if test.is_gpu_available(cuda_only=True):
1380      output = _loop()
1381
1382      with session.Session(config=_get_config(False)) as sess:
1383        output_val_ref = self.evaluate(output)
1384
1385      with session.Session(config=_get_config()) as sess:
1386        metadata = config_pb2.RunMetadata()
1387        output_val = sess.run(output, run_metadata=metadata)
1388
1389      nodes = []
1390      num_transposes = 0
1391      for node in metadata.cost_graph.node:
1392        if _is_transpose(node.name):
1393          num_transposes += 1
1394        nodes.append(node.name)
1395
1396      # Four transposes were initially added in the Expand phase of
1397      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
1398      expected_num_transposes = 2
1399      self.assertEqual(expected_num_transposes, num_transposes)
1400      self.assertEqual(expected_num_transposes, num_transposes)
1401      self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes)
1402      self._assert_trans_nchw_to_nhwc('map/while/MaxPool_1-0-2', nodes)
1403      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1404
1405  @test_util.deprecated_graph_mode_only
1406  def testLoopWithBranch(self):
1407    if test.is_gpu_available(cuda_only=True):
1408      output = _loop_with_branch()
1409
1410      with session.Session(config=_get_config(False)) as sess:
1411        output_val_ref = self.evaluate(output)
1412
1413      with session.Session(config=_get_config()) as sess:
1414        metadata = config_pb2.RunMetadata()
1415        output_val = sess.run(output, run_metadata=metadata)
1416
1417      nodes = []
1418      num_transposes = 0
1419      for node in metadata.cost_graph.node:
1420        if _is_transpose(node.name):
1421          num_transposes += 1
1422        nodes.append(node.name)
1423
1424      expected_num_transposes = 3
1425      self.assertEqual(expected_num_transposes, num_transposes)
1426      self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes)
1427      self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes)
1428      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1429
1430  @test_util.deprecated_graph_mode_only
1431  def testLoopWithVecAnd4D(self):
1432    if test.is_gpu_available(cuda_only=True):
1433      output = _loop_with_vec_and_4d()
1434
1435      with session.Session(config=_get_config(False)) as sess:
1436        output_val_ref = self.evaluate(output)
1437
1438      with session.Session(config=_get_config()) as sess:
1439        metadata = config_pb2.RunMetadata()
1440        output_val = sess.run(output, run_metadata=metadata)
1441
1442      nodes = []
1443      num_transposes = 0
1444      for node in metadata.cost_graph.node:
1445        if _is_transpose(node.name):
1446          num_transposes += 1
1447        nodes.append(node.name)
1448
1449      expected_num_transposes = 2
1450      self.assertEqual(expected_num_transposes, num_transposes)
1451      self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes)
1452      self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes)
1453      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1454
1455  @test_util.deprecated_graph_mode_only
1456  def testBinaryOpSecondPort(self):
1457    if test.is_gpu_available(cuda_only=True):
1458      output = _model_with_second_port()
1459
1460      with session.Session(config=_get_config(False)) as sess:
1461        output_val_ref = self.evaluate(output)
1462
1463      with session.Session(config=_get_config()) as sess:
1464        metadata = config_pb2.RunMetadata()
1465        output_val = sess.run(output, run_metadata=metadata)
1466
1467      nodes = []
1468      num_transposes = 0
1469      for node in metadata.cost_graph.node:
1470        if _is_transpose(node.name):
1471          num_transposes += 1
1472        nodes.append(node.name)
1473
1474      expected_num_transposes = 2
1475      self.assertEqual(expected_num_transposes, num_transposes)
1476      self._assert_trans_nhwc_to_nchw('FusedBatchNorm-0', nodes)
1477      self._assert_trans_nchw_to_nhwc('Add-0-0', nodes)
1478      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1479
1480  @test_util.deprecated_graph_mode_only
1481  def testGradient(self):
1482    meta_graph = _simple_metagraph()
1483    config = config_pb2.ConfigProto()
1484    config.graph_options.rewrite_options.CopyFrom(
1485        rewriter_config_pb2.RewriterConfig(
1486            layout_optimizer=rewriter_config_pb2.RewriterConfig.ON,
1487            min_graph_nodes=-1))
1488    optimized_graph = tf_optimizer.OptimizeGraph(
1489        config, meta_graph, cluster=_get_cluster())
1490
1491    found = 0
1492    for node in optimized_graph.node:
1493      if node.op in ['Conv2D', 'Conv2DBackpropFilter', 'Conv2DBackpropInput']:
1494        found += 1
1495        self.assertEqual(node.attr['data_format'].s, b'NCHW')
1496    self.assertEqual(found, 5)
1497
1498  @test_util.deprecated_graph_mode_only
1499  def testDepthwise(self):
1500    meta_graph = _simple_metagraph(depthwise=True)
1501    config = config_pb2.ConfigProto()
1502    config.graph_options.rewrite_options.CopyFrom(
1503        rewriter_config_pb2.RewriterConfig(
1504            layout_optimizer=rewriter_config_pb2.RewriterConfig.ON,
1505            min_graph_nodes=-1))
1506    optimized_graph = tf_optimizer.OptimizeGraph(
1507        config, meta_graph, cluster=_get_cluster())
1508
1509    found = 0
1510    for node in optimized_graph.node:
1511      if node.op in [
1512          'DepthwiseConv2dNative', 'DepthwiseConv2dNativeBackpropFilter',
1513          'DepthwiseConv2dNativeBackpropInput'
1514      ]:
1515        found += 1
1516        self.assertEqual(node.attr['data_format'].s, b'NCHW')
1517    self.assertEqual(found, 6)
1518
1519  def testCheckpointCompatibility(self):
1520    if not test.is_gpu_available(cuda_only=True):
1521      self.skipTest('GPU required')
1522
1523    checkpoint_path = self.get_temp_dir()
1524    self._train(checkpoint_path)
1525    vars_expected = self._train(checkpoint_path, restore=True)
1526    vars_layout_optimized = self._train(
1527        checkpoint_path, restore=True, layout_optimizer=True)
1528
1529    for var_expected, var_layout_optimized in zip(vars_expected,
1530                                                  vars_layout_optimized):
1531      self.assertAllClose(var_expected, var_layout_optimized, atol=1e-6)
1532
1533
1534if __name__ == '__main__':
1535  test.main()
1536