1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Regression using the DNNRegressor Estimator.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import tensorflow as tf 22 23import imports85 # pylint: disable=g-bad-import-order 24 25STEPS = 1000 26PRICE_NORM_FACTOR = 1000 27 28 29def my_dnn_regression_fn(features, labels, mode, params): 30 """A model function implementing DNN regression for a custom Estimator.""" 31 32 # Extract the input into a dense layer, according to the feature_columns. 33 top = tf.feature_column.input_layer(features, params["feature_columns"]) 34 35 # Iterate over the "hidden_units" list of layer sizes, default is [20]. 36 for units in params.get("hidden_units", [20]): 37 # Add a hidden layer, densely connected on top of the previous layer. 38 top = tf.layers.dense(inputs=top, units=units, activation=tf.nn.relu) 39 40 # Connect a linear output layer on top. 41 output_layer = tf.layers.dense(inputs=top, units=1) 42 43 # Reshape the output layer to a 1-dim Tensor to return predictions 44 predictions = tf.squeeze(output_layer, 1) 45 46 if mode == tf.estimator.ModeKeys.PREDICT: 47 # In `PREDICT` mode we only need to return predictions. 48 return tf.estimator.EstimatorSpec( 49 mode=mode, predictions={"price": predictions}) 50 51 # Calculate loss using mean squared error 52 average_loss = tf.losses.mean_squared_error(labels, predictions) 53 54 # Pre-made estimators use the total_loss instead of the average, 55 # so report total_loss for compatibility. 56 batch_size = tf.shape(labels)[0] 57 total_loss = tf.to_float(batch_size) * average_loss 58 59 if mode == tf.estimator.ModeKeys.TRAIN: 60 optimizer = params.get("optimizer", tf.train.AdamOptimizer) 61 optimizer = optimizer(params.get("learning_rate", None)) 62 train_op = optimizer.minimize( 63 loss=average_loss, global_step=tf.train.get_global_step()) 64 65 return tf.estimator.EstimatorSpec( 66 mode=mode, loss=total_loss, train_op=train_op) 67 68 # In evaluation mode we will calculate evaluation metrics. 69 assert mode == tf.estimator.ModeKeys.EVAL 70 71 # Calculate root mean squared error 72 rmse = tf.metrics.root_mean_squared_error(labels, predictions) 73 74 # Add the rmse to the collection of evaluation metrics. 75 eval_metrics = {"rmse": rmse} 76 77 return tf.estimator.EstimatorSpec( 78 mode=mode, 79 # Report sum of error for compatibility with pre-made estimators 80 loss=total_loss, 81 eval_metric_ops=eval_metrics) 82 83 84def main(argv): 85 """Builds, trains, and evaluates the model.""" 86 assert len(argv) == 1 87 (train, test) = imports85.dataset() 88 89 # Switch the labels to units of thousands for better convergence. 90 def normalize_price(features, labels): 91 return features, labels / PRICE_NORM_FACTOR 92 93 train = train.map(normalize_price) 94 test = test.map(normalize_price) 95 96 # Build the training input_fn. 97 def input_train(): 98 return ( 99 # Shuffling with a buffer larger than the data set ensures 100 # that the examples are well mixed. 101 train.shuffle(1000).batch(128) 102 # Repeat forever 103 .repeat()) 104 105 # Build the validation input_fn. 106 def input_test(): 107 return test.shuffle(1000).batch(128) 108 109 # The first way assigns a unique weight to each category. To do this you must 110 # specify the category's vocabulary (values outside this specification will 111 # receive a weight of zero). Here we specify the vocabulary using a list of 112 # options. The vocabulary can also be specified with a vocabulary file (using 113 # `categorical_column_with_vocabulary_file`). For features covering a 114 # range of positive integers use `categorical_column_with_identity`. 115 body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"] 116 body_style = tf.feature_column.categorical_column_with_vocabulary_list( 117 key="body-style", vocabulary_list=body_style_vocab) 118 make = tf.feature_column.categorical_column_with_hash_bucket( 119 key="make", hash_bucket_size=50) 120 121 feature_columns = [ 122 tf.feature_column.numeric_column(key="curb-weight"), 123 tf.feature_column.numeric_column(key="highway-mpg"), 124 # Since this is a DNN model, convert categorical columns from sparse 125 # to dense. 126 # Wrap them in an `indicator_column` to create a 127 # one-hot vector from the input. 128 tf.feature_column.indicator_column(body_style), 129 # Or use an `embedding_column` to create a trainable vector for each 130 # index. 131 tf.feature_column.embedding_column(make, dimension=3), 132 ] 133 134 # Build a custom Estimator, using the model_fn. 135 # `params` is passed through to the `model_fn`. 136 model = tf.estimator.Estimator( 137 model_fn=my_dnn_regression_fn, 138 params={ 139 "feature_columns": feature_columns, 140 "learning_rate": 0.001, 141 "optimizer": tf.train.AdamOptimizer, 142 "hidden_units": [20, 20] 143 }) 144 145 # Train the model. 146 model.train(input_fn=input_train, steps=STEPS) 147 148 # Evaluate how the model performs on data it has not yet seen. 149 eval_result = model.evaluate(input_fn=input_test) 150 151 # Print the Root Mean Square Error (RMSE). 152 print("\n" + 80 * "*") 153 print("\nRMS error for the test set: ${:.0f}" 154 .format(PRICE_NORM_FACTOR * eval_result["rmse"])) 155 156 print() 157 158 159if __name__ == "__main__": 160 # The Estimator periodically generates "INFO" logs; make these logs visible. 161 tf.logging.set_verbosity(tf.logging.INFO) 162 tf.app.run(main=main) 163