1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Regression using the DNNRegressor Estimator."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import tensorflow as tf
22
23import imports85  # pylint: disable=g-bad-import-order
24
25STEPS = 1000
26PRICE_NORM_FACTOR = 1000
27
28
29def my_dnn_regression_fn(features, labels, mode, params):
30  """A model function implementing DNN regression for a custom Estimator."""
31
32  # Extract the input into a dense layer, according to the feature_columns.
33  top = tf.feature_column.input_layer(features, params["feature_columns"])
34
35  # Iterate over the "hidden_units" list of layer sizes, default is [20].
36  for units in params.get("hidden_units", [20]):
37    # Add a hidden layer, densely connected on top of the previous layer.
38    top = tf.layers.dense(inputs=top, units=units, activation=tf.nn.relu)
39
40  # Connect a linear output layer on top.
41  output_layer = tf.layers.dense(inputs=top, units=1)
42
43  # Reshape the output layer to a 1-dim Tensor to return predictions
44  predictions = tf.squeeze(output_layer, 1)
45
46  if mode == tf.estimator.ModeKeys.PREDICT:
47    # In `PREDICT` mode we only need to return predictions.
48    return tf.estimator.EstimatorSpec(
49        mode=mode, predictions={"price": predictions})
50
51  # Calculate loss using mean squared error
52  average_loss = tf.losses.mean_squared_error(labels, predictions)
53
54  # Pre-made estimators use the total_loss instead of the average,
55  # so report total_loss for compatibility.
56  batch_size = tf.shape(labels)[0]
57  total_loss = tf.to_float(batch_size) * average_loss
58
59  if mode == tf.estimator.ModeKeys.TRAIN:
60    optimizer = params.get("optimizer", tf.train.AdamOptimizer)
61    optimizer = optimizer(params.get("learning_rate", None))
62    train_op = optimizer.minimize(
63        loss=average_loss, global_step=tf.train.get_global_step())
64
65    return tf.estimator.EstimatorSpec(
66        mode=mode, loss=total_loss, train_op=train_op)
67
68  # In evaluation mode we will calculate evaluation metrics.
69  assert mode == tf.estimator.ModeKeys.EVAL
70
71  # Calculate root mean squared error
72  rmse = tf.metrics.root_mean_squared_error(labels, predictions)
73
74  # Add the rmse to the collection of evaluation metrics.
75  eval_metrics = {"rmse": rmse}
76
77  return tf.estimator.EstimatorSpec(
78      mode=mode,
79      # Report sum of error for compatibility with pre-made estimators
80      loss=total_loss,
81      eval_metric_ops=eval_metrics)
82
83
84def main(argv):
85  """Builds, trains, and evaluates the model."""
86  assert len(argv) == 1
87  (train, test) = imports85.dataset()
88
89  # Switch the labels to units of thousands for better convergence.
90  def normalize_price(features, labels):
91    return features, labels / PRICE_NORM_FACTOR
92
93  train = train.map(normalize_price)
94  test = test.map(normalize_price)
95
96  # Build the training input_fn.
97  def input_train():
98    return (
99        # Shuffling with a buffer larger than the data set ensures
100        # that the examples are well mixed.
101        train.shuffle(1000).batch(128)
102        # Repeat forever
103        .repeat())
104
105  # Build the validation input_fn.
106  def input_test():
107    return test.shuffle(1000).batch(128)
108
109  # The first way assigns a unique weight to each category. To do this you must
110  # specify the category's vocabulary (values outside this specification will
111  # receive a weight of zero). Here we specify the vocabulary using a list of
112  # options. The vocabulary can also be specified with a vocabulary file (using
113  # `categorical_column_with_vocabulary_file`). For features covering a
114  # range of positive integers use `categorical_column_with_identity`.
115  body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
116  body_style = tf.feature_column.categorical_column_with_vocabulary_list(
117      key="body-style", vocabulary_list=body_style_vocab)
118  make = tf.feature_column.categorical_column_with_hash_bucket(
119      key="make", hash_bucket_size=50)
120
121  feature_columns = [
122      tf.feature_column.numeric_column(key="curb-weight"),
123      tf.feature_column.numeric_column(key="highway-mpg"),
124      # Since this is a DNN model, convert categorical columns from sparse
125      # to dense.
126      # Wrap them in an `indicator_column` to create a
127      # one-hot vector from the input.
128      tf.feature_column.indicator_column(body_style),
129      # Or use an `embedding_column` to create a trainable vector for each
130      # index.
131      tf.feature_column.embedding_column(make, dimension=3),
132  ]
133
134  # Build a custom Estimator, using the model_fn.
135  # `params` is passed through to the `model_fn`.
136  model = tf.estimator.Estimator(
137      model_fn=my_dnn_regression_fn,
138      params={
139          "feature_columns": feature_columns,
140          "learning_rate": 0.001,
141          "optimizer": tf.train.AdamOptimizer,
142          "hidden_units": [20, 20]
143      })
144
145  # Train the model.
146  model.train(input_fn=input_train, steps=STEPS)
147
148  # Evaluate how the model performs on data it has not yet seen.
149  eval_result = model.evaluate(input_fn=input_test)
150
151  # Print the Root Mean Square Error (RMSE).
152  print("\n" + 80 * "*")
153  print("\nRMS error for the test set: ${:.0f}"
154        .format(PRICE_NORM_FACTOR * eval_result["rmse"]))
155
156  print()
157
158
159if __name__ == "__main__":
160  # The Estimator periodically generates "INFO" logs; make these logs visible.
161  tf.logging.set_verbosity(tf.logging.INFO)
162  tf.app.run(main=main)
163