1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15
16"""Linear Estimators (deprecated).
17
18This module and all its submodules are deprecated. See
19[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md)
20for migration instructions.
21"""
22
23from __future__ import absolute_import
24from __future__ import division
25from __future__ import print_function
26
27import math
28
29import six
30
31from tensorflow.contrib import layers
32from tensorflow.contrib.framework import deprecated
33from tensorflow.contrib.framework import deprecated_arg_values
34from tensorflow.contrib.layers.python.layers import feature_column
35from tensorflow.contrib.learn.python.learn.estimators import estimator
36from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
37from tensorflow.contrib.learn.python.learn.estimators import prediction_key
38from tensorflow.contrib.learn.python.learn.utils import export
39from tensorflow.contrib.linear_optimizer.python import sdca_optimizer
40from tensorflow.python.feature_column import feature_column_lib as fc_core
41from tensorflow.python.framework import dtypes
42from tensorflow.python.framework import ops
43from tensorflow.python.framework import sparse_tensor
44from tensorflow.python.framework import tensor_util
45from tensorflow.python.ops import array_ops
46from tensorflow.python.ops import clip_ops
47from tensorflow.python.ops import gradients
48from tensorflow.python.ops import partitioned_variables
49from tensorflow.python.ops import variable_scope
50from tensorflow.python.platform import tf_logging as logging
51from tensorflow.python.training import session_run_hook
52from tensorflow.python.training import training as train
53from tensorflow.python.training import training_util
54
55
56# The default learning rate of 0.2 is a historical artifact of the initial
57# implementation, but seems a reasonable choice.
58_LEARNING_RATE = 0.2
59
60
61def _get_optimizer(spec):
62  if isinstance(spec, six.string_types):
63    return layers.OPTIMIZER_CLS_NAMES[spec](
64        learning_rate=_LEARNING_RATE)
65  elif callable(spec):
66    return spec()
67  return spec
68
69
70# TODO(ispir): Remove this function by fixing '_infer_model' with single outputs
71# and as_iteable case.
72def _as_iterable(preds, output):
73  for pred in preds:
74    yield pred[output]
75
76
77def _add_bias_column(feature_columns, columns_to_tensors, bias_variable,
78                     columns_to_variables):
79  """Adds a fake bias feature column filled with all 1s."""
80  # TODO(b/31008490): Move definition to a common constants place.
81  bias_column_name = "tf_virtual_bias_column"
82  if any(col.name is bias_column_name for col in feature_columns):
83    raise ValueError("%s is a reserved column name." % bias_column_name)
84  if not feature_columns:
85    raise ValueError("feature_columns can't be empty.")
86
87  # Loop through input tensors until we can figure out batch_size.
88  batch_size = None
89  for column in columns_to_tensors.values():
90    if isinstance(column, tuple):
91      column = column[0]
92    if isinstance(column, sparse_tensor.SparseTensor):
93      shape = tensor_util.constant_value(column.dense_shape)
94      if shape is not None:
95        batch_size = shape[0]
96        break
97    else:
98      batch_size = array_ops.shape(column)[0]
99      break
100  if batch_size is None:
101    raise ValueError("Could not infer batch size from input features.")
102
103  bias_column = layers.real_valued_column(bias_column_name)
104  columns_to_tensors[bias_column] = array_ops.ones([batch_size, 1],
105                                                   dtype=dtypes.float32)
106  columns_to_variables[bias_column] = [bias_variable]
107
108
109def _linear_model_fn(features, labels, mode, params, config=None):
110  """A model_fn for linear models that use a gradient-based optimizer.
111
112  Args:
113    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
114    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
115      dtype `int32` or `int64` in the range `[0, n_classes)`.
116    mode: Defines whether this is training, evaluation or prediction.
117      See `ModeKeys`.
118    params: A dict of hyperparameters.
119      The following hyperparameters are expected:
120      * head: A `Head` instance.
121      * feature_columns: An iterable containing all the feature columns used by
122          the model.
123      * optimizer: string, `Optimizer` object, or callable that defines the
124          optimizer to use for training. If `None`, will use a FTRL optimizer.
125      * gradient_clip_norm: A float > 0. If provided, gradients are
126          clipped to their global norm with this clipping ratio.
127      * joint_weights: If True, the weights for all columns will be stored in a
128        single (possibly partitioned) variable. It's more efficient, but it's
129        incompatible with SDCAOptimizer, and requires all feature columns are
130        sparse and use the 'sum' combiner.
131    config: `RunConfig` object to configure the runtime settings.
132
133  Returns:
134    A `ModelFnOps` instance.
135
136  Raises:
137    ValueError: If mode is not any of the `ModeKeys`.
138  """
139  head = params["head"]
140  feature_columns = params["feature_columns"]
141  optimizer = params.get("optimizer") or _get_default_optimizer(feature_columns)
142  gradient_clip_norm = params.get("gradient_clip_norm", None)
143  num_ps_replicas = config.num_ps_replicas if config else 0
144  joint_weights = params.get("joint_weights", False)
145
146  if not isinstance(features, dict):
147    features = {"": features}
148
149  parent_scope = "linear"
150  partitioner = partitioned_variables.min_max_variable_partitioner(
151      max_partitions=num_ps_replicas,
152      min_slice_size=64 << 20)
153
154  with variable_scope.variable_scope(
155      parent_scope,
156      values=tuple(six.itervalues(features)),
157      partitioner=partitioner) as scope:
158    if all(isinstance(fc, feature_column._FeatureColumn)  # pylint: disable=protected-access
159           for fc in feature_columns):
160      if joint_weights:
161        layer_fn = layers.joint_weighted_sum_from_feature_columns
162      else:
163        layer_fn = layers.weighted_sum_from_feature_columns
164      logits, _, _ = layer_fn(
165          columns_to_tensors=features,
166          feature_columns=feature_columns,
167          num_outputs=head.logits_dimension,
168          weight_collections=[parent_scope],
169          scope=scope)
170    else:
171      logits = fc_core.linear_model(
172          features=features,
173          feature_columns=feature_columns,
174          units=head.logits_dimension,
175          weight_collections=[parent_scope])
176
177    def _train_op_fn(loss):
178      global_step = training_util.get_global_step()
179      my_vars = ops.get_collection(parent_scope)
180      grads = gradients.gradients(loss, my_vars)
181      if gradient_clip_norm:
182        grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
183      return (_get_optimizer(optimizer).apply_gradients(
184          zip(grads, my_vars), global_step=global_step))
185
186    return head.create_model_fn_ops(
187        features=features,
188        mode=mode,
189        labels=labels,
190        train_op_fn=_train_op_fn,
191        logits=logits)
192
193
194def sdca_model_fn(features, labels, mode, params):
195  """A model_fn for linear models that use the SDCA optimizer.
196
197  Args:
198    features: A dict of `Tensor` keyed by column name.
199    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
200      dtype `int32` or `int64` in the range `[0, n_classes)`.
201    mode: Defines whether this is training, evaluation or prediction.
202      See `ModeKeys`.
203    params: A dict of hyperparameters.
204      The following hyperparameters are expected:
205      * head: A `Head` instance. Type must be one of `_BinarySvmHead`,
206          `_RegressionHead` or `_BinaryLogisticHead`.
207      * feature_columns: An iterable containing all the feature columns used by
208          the model.
209      * optimizer: An `SDCAOptimizer` instance.
210      * weight_column_name: A string defining the weight feature column, or
211          None if there are no weights.
212      * update_weights_hook: A `SessionRunHook` object or None. Used to update
213          model weights.
214
215  Returns:
216    A `ModelFnOps` instance.
217
218  Raises:
219    ValueError: If `optimizer` is not an `SDCAOptimizer` instance.
220    ValueError: If the type of head is neither `_BinarySvmHead`, nor
221      `_RegressionHead` nor `_MultiClassHead`.
222    ValueError: If mode is not any of the `ModeKeys`.
223  """
224  head = params["head"]
225  feature_columns = params["feature_columns"]
226  optimizer = params["optimizer"]
227  weight_column_name = params["weight_column_name"]
228  update_weights_hook = params.get("update_weights_hook", None)
229
230  if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
231    raise ValueError("Optimizer must be of type SDCAOptimizer")
232
233  if isinstance(head, head_lib._BinarySvmHead):  # pylint: disable=protected-access
234    loss_type = "hinge_loss"
235  elif isinstance(head, head_lib._BinaryLogisticHead):  # pylint: disable=protected-access
236    loss_type = "logistic_loss"
237  elif isinstance(head, head_lib._RegressionHead):  # pylint: disable=protected-access
238    assert head.logits_dimension == 1, ("SDCA only applies for "
239                                        "logits_dimension=1.")
240    loss_type = "squared_loss"
241  else:
242    raise ValueError("Unsupported head type: {}".format(head))
243
244  parent_scope = "linear"
245
246  with variable_scope.variable_scope(
247      values=features.values(),
248      name_or_scope=parent_scope,
249      partitioner=optimizer.partitioner) as scope:
250    features = features.copy()
251    features.update(layers.transform_features(features, feature_columns))
252    logits, columns_to_variables, bias = (
253        layers.weighted_sum_from_feature_columns(
254            columns_to_tensors=features,
255            feature_columns=feature_columns,
256            num_outputs=1,
257            scope=scope))
258
259    _add_bias_column(feature_columns, features, bias, columns_to_variables)
260
261  def _train_op_fn(unused_loss):
262    global_step = training_util.get_global_step()
263    sdca_model, train_op = optimizer.get_train_step(columns_to_variables,
264                                                    weight_column_name,
265                                                    loss_type, features,
266                                                    labels, global_step)
267    if update_weights_hook is not None:
268      update_weights_hook.set_parameters(sdca_model, train_op)
269    return train_op
270
271  model_fn_ops = head.create_model_fn_ops(
272      features=features,
273      labels=labels,
274      mode=mode,
275      train_op_fn=_train_op_fn,
276      logits=logits)
277  if update_weights_hook is not None:
278    return model_fn_ops._replace(
279        training_chief_hooks=(model_fn_ops.training_chief_hooks +
280                              [update_weights_hook]))
281  return model_fn_ops
282
283
284# Ensures consistency with LinearComposableModel.
285def _get_default_optimizer(feature_columns):
286  learning_rate = min(_LEARNING_RATE, 1.0 / math.sqrt(len(feature_columns)))
287  return train.FtrlOptimizer(learning_rate=learning_rate)
288
289
290class _SdcaUpdateWeightsHook(session_run_hook.SessionRunHook):
291  """SessionRunHook to update and shrink SDCA model weights."""
292
293  def __init__(self):
294    pass
295
296  def set_parameters(self, sdca_model, train_op):
297    self._sdca_model = sdca_model
298    self._train_op = train_op
299
300  def begin(self):
301    """Construct the update_weights op.
302
303    The op is implicitly added to the default graph.
304    """
305    self._update_op = self._sdca_model.update_weights(self._train_op)
306
307  def before_run(self, run_context):
308    """Return the update_weights op so that it is executed during this run."""
309    return session_run_hook.SessionRunArgs(self._update_op)
310
311
312class LinearClassifier(estimator.Estimator):
313  """Linear classifier model.
314
315  THIS CLASS IS DEPRECATED. See
316  [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md)
317  for general migration instructions.
318
319  Train a linear model to classify instances into one of multiple possible
320  classes. When number of possible classes is 2, this is binary classification.
321
322  Example:
323
324  ```python
325  sparse_column_a = sparse_column_with_hash_bucket(...)
326  sparse_column_b = sparse_column_with_hash_bucket(...)
327
328  sparse_feature_a_x_sparse_feature_b = crossed_column(...)
329
330  # Estimator using the default optimizer.
331  estimator = LinearClassifier(
332      feature_columns=[sparse_column_a, sparse_feature_a_x_sparse_feature_b])
333
334  # Or estimator using the FTRL optimizer with regularization.
335  estimator = LinearClassifier(
336      feature_columns=[sparse_column_a, sparse_feature_a_x_sparse_feature_b],
337      optimizer=tf.train.FtrlOptimizer(
338        learning_rate=0.1,
339        l1_regularization_strength=0.001
340      ))
341
342  # Or estimator using the SDCAOptimizer.
343  estimator = LinearClassifier(
344     feature_columns=[sparse_column_a, sparse_feature_a_x_sparse_feature_b],
345     optimizer=tf.contrib.linear_optimizer.SDCAOptimizer(
346       example_id_column='example_id',
347       num_loss_partitions=...,
348       symmetric_l2_regularization=2.0
349     ))
350
351  # Input builders
352  def input_fn_train: # returns x, y (where y represents label's class index).
353    ...
354  def input_fn_eval: # returns x, y (where y represents label's class index).
355    ...
356  def input_fn_predict: # returns x, None.
357    ...
358  estimator.fit(input_fn=input_fn_train)
359  estimator.evaluate(input_fn=input_fn_eval)
360  # predict_classes returns class indices.
361  estimator.predict_classes(input_fn=input_fn_predict)
362  ```
363
364  If the user specifies `label_keys` in constructor, labels must be strings from
365  the `label_keys` vocabulary. Example:
366
367  ```python
368  label_keys = ['label0', 'label1', 'label2']
369  estimator = LinearClassifier(
370      n_classes=n_classes,
371      feature_columns=[sparse_column_a, sparse_feature_a_x_sparse_feature_b],
372      label_keys=label_keys)
373
374  def input_fn_train: # returns x, y (where y is one of label_keys).
375    pass
376  estimator.fit(input_fn=input_fn_train)
377
378  def input_fn_eval: # returns x, y (where y is one of label_keys).
379    pass
380  estimator.evaluate(input_fn=input_fn_eval)
381  def input_fn_predict: # returns x, None
382  # predict_classes returns one of label_keys.
383  estimator.predict_classes(input_fn=input_fn_predict)
384  ```
385
386  Input of `fit` and `evaluate` should have following features,
387    otherwise there will be a `KeyError`:
388
389  * if `weight_column_name` is not `None`, a feature with
390    `key=weight_column_name` whose value is a `Tensor`.
391  * for each `column` in `feature_columns`:
392    - if `column` is a `SparseColumn`, a feature with `key=column.name`
393      whose `value` is a `SparseTensor`.
394    - if `column` is a `WeightedSparseColumn`, two features: the first with
395      `key` the id column name, the second with `key` the weight column name.
396      Both features' `value` must be a `SparseTensor`.
397    - if `column` is a `RealValuedColumn`, a feature with `key=column.name`
398      whose `value` is a `Tensor`.
399  """
400
401  def __init__(self,  # _joint_weight pylint: disable=invalid-name
402               feature_columns,
403               model_dir=None,
404               n_classes=2,
405               weight_column_name=None,
406               optimizer=None,
407               gradient_clip_norm=None,
408               enable_centered_bias=False,
409               _joint_weight=False,
410               config=None,
411               feature_engineering_fn=None,
412               label_keys=None):
413    """Construct a `LinearClassifier` estimator object.
414
415    Args:
416      feature_columns: An iterable containing all the feature columns used by
417        the model. All items in the set should be instances of classes derived
418        from `FeatureColumn`.
419      model_dir: Directory to save model parameters, graph and etc. This can
420        also be used to load checkpoints from the directory into a estimator
421        to continue training a previously saved model.
422      n_classes: number of label classes. Default is binary classification.
423        Note that class labels are integers representing the class index (i.e.
424        values from 0 to n_classes-1). For arbitrary label values (e.g. string
425        labels), convert to class indices first.
426      weight_column_name: A string defining feature column name representing
427        weights. It is used to down weight or boost examples during training. It
428        will be multiplied by the loss of the example.
429      optimizer: The optimizer used to train the model. If specified, it should
430        be either an instance of `tf.Optimizer` or the SDCAOptimizer. If `None`,
431        the Ftrl optimizer will be used.
432      gradient_clip_norm: A `float` > 0. If provided, gradients are clipped
433        to their global norm with this clipping ratio. See
434        `tf.clip_by_global_norm` for more details.
435      enable_centered_bias: A bool. If True, estimator will learn a centered
436        bias variable for each class. Rest of the model structure learns the
437        residual after centered bias.
438      _joint_weight: If True, the weights for all columns will be stored in a
439        single (possibly partitioned) variable. It's more efficient, but it's
440        incompatible with SDCAOptimizer, and requires all feature columns are
441        sparse and use the 'sum' combiner.
442      config: `RunConfig` object to configure the runtime settings.
443      feature_engineering_fn: Feature engineering function. Takes features and
444                        labels which are the output of `input_fn` and
445                        returns features and labels which will be fed
446                        into the model.
447      label_keys: Optional list of strings with size `[n_classes]` defining the
448        label vocabulary. Only supported for `n_classes` > 2.
449
450    Returns:
451      A `LinearClassifier` estimator.
452
453    Raises:
454      ValueError: if n_classes < 2.
455      ValueError: if enable_centered_bias=True and optimizer is SDCAOptimizer.
456    """
457    if (isinstance(optimizer, sdca_optimizer.SDCAOptimizer) and
458        enable_centered_bias):
459      raise ValueError("enable_centered_bias is not supported with SDCA")
460
461    self._feature_columns = tuple(feature_columns or [])
462    assert self._feature_columns
463
464    chief_hook = None
465    head = head_lib.multi_class_head(
466        n_classes,
467        weight_column_name=weight_column_name,
468        enable_centered_bias=enable_centered_bias,
469        label_keys=label_keys)
470    params = {
471        "head": head,
472        "feature_columns": feature_columns,
473        "optimizer": optimizer,
474    }
475
476    if isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
477      assert not _joint_weight, ("_joint_weight is incompatible with the"
478                                 " SDCAOptimizer")
479      assert n_classes == 2, "SDCA only applies to binary classification."
480
481      model_fn = sdca_model_fn
482      # The model_fn passes the model parameters to the chief_hook. We then use
483      # the hook to update weights and shrink step only on the chief.
484      chief_hook = _SdcaUpdateWeightsHook()
485      params.update({
486          "weight_column_name": weight_column_name,
487          "update_weights_hook": chief_hook,
488      })
489    else:
490      model_fn = _linear_model_fn
491      params.update({
492          "gradient_clip_norm": gradient_clip_norm,
493          "joint_weights": _joint_weight,
494      })
495
496    super(LinearClassifier, self).__init__(
497        model_fn=model_fn,
498        model_dir=model_dir,
499        config=config,
500        params=params,
501        feature_engineering_fn=feature_engineering_fn)
502
503  @deprecated_arg_values(
504      estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
505      as_iterable=False)
506  @deprecated_arg_values(
507      "2017-03-01",
508      "Please switch to predict_classes, or set `outputs` argument.",
509      outputs=None)
510  def predict(self, x=None, input_fn=None, batch_size=None, outputs=None,
511              as_iterable=True):
512    """Returns predictions for given features.
513
514    By default, returns predicted classes. But this default will be dropped
515    soon. Users should either pass `outputs`, or call `predict_classes` method.
516
517    Args:
518      x: features.
519      input_fn: Input function. If set, x must be None.
520      batch_size: Override default batch size.
521      outputs: list of `str`, name of the output to predict.
522        If `None`, returns classes.
523      as_iterable: If True, return an iterable which keeps yielding predictions
524        for each example until inputs are exhausted. Note: The inputs must
525        terminate if you want the iterable to terminate (e.g. be sure to pass
526        num_epochs=1 if you are using something like read_batch_features).
527
528    Returns:
529      Numpy array of predicted classes with shape [batch_size] (or an iterable
530      of predicted classes if as_iterable is True). Each predicted class is
531      represented by its class index (i.e. integer from 0 to n_classes-1).
532      If `outputs` is set, returns a dict of predictions.
533    """
534    if not outputs:
535      return self.predict_classes(
536          x=x,
537          input_fn=input_fn,
538          batch_size=batch_size,
539          as_iterable=as_iterable)
540    return super(LinearClassifier, self).predict(
541        x=x,
542        input_fn=input_fn,
543        batch_size=batch_size,
544        outputs=outputs,
545        as_iterable=as_iterable)
546
547  @deprecated_arg_values(
548      estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
549      as_iterable=False)
550  def predict_classes(self, x=None, input_fn=None, batch_size=None,
551                      as_iterable=True):
552    """Returns predicted classes for given features.
553
554    Args:
555      x: features.
556      input_fn: Input function. If set, x must be None.
557      batch_size: Override default batch size.
558      as_iterable: If True, return an iterable which keeps yielding predictions
559        for each example until inputs are exhausted. Note: The inputs must
560        terminate if you want the iterable to terminate (e.g. be sure to pass
561        num_epochs=1 if you are using something like read_batch_features).
562
563    Returns:
564      Numpy array of predicted classes with shape [batch_size] (or an iterable
565      of predicted classes if as_iterable is True). Each predicted class is
566      represented by its class index (i.e. integer from 0 to n_classes-1).
567    """
568    key = prediction_key.PredictionKey.CLASSES
569    preds = super(LinearClassifier, self).predict(
570        x=x,
571        input_fn=input_fn,
572        batch_size=batch_size,
573        outputs=[key],
574        as_iterable=as_iterable)
575    if as_iterable:
576      return _as_iterable(preds, output=key)
577    return preds[key]
578
579  @deprecated_arg_values(
580      estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
581      as_iterable=False)
582  def predict_proba(self, x=None, input_fn=None, batch_size=None,
583                    as_iterable=True):
584    """Returns predicted probabilities for given features.
585
586    Args:
587      x: features.
588      input_fn: Input function. If set, x and y must be None.
589      batch_size: Override default batch size.
590      as_iterable: If True, return an iterable which keeps yielding predictions
591        for each example until inputs are exhausted. Note: The inputs must
592        terminate if you want the iterable to terminate (e.g. be sure to pass
593        num_epochs=1 if you are using something like read_batch_features).
594
595    Returns:
596      Numpy array of predicted probabilities with shape [batch_size, n_classes]
597      (or an iterable of predicted probabilities if as_iterable is True).
598    """
599    key = prediction_key.PredictionKey.PROBABILITIES
600    preds = super(LinearClassifier, self).predict(
601        x=x,
602        input_fn=input_fn,
603        batch_size=batch_size,
604        outputs=[key],
605        as_iterable=as_iterable)
606    if as_iterable:
607      return _as_iterable(preds, output=key)
608    return preds[key]
609
610  @deprecated("2017-03-25", "Please use Estimator.export_savedmodel() instead.")
611  def export(self,
612             export_dir,
613             input_fn=None,
614             input_feature_key=None,
615             use_deprecated_input_fn=True,
616             signature_fn=None,
617             default_batch_size=1,
618             exports_to_keep=None):
619    """See BaseEstimator.export."""
620    def default_input_fn(unused_estimator, examples):
621      return layers.parse_feature_columns_from_examples(
622          examples, self._feature_columns)
623
624    return super(LinearClassifier, self).export(
625        export_dir=export_dir,
626        input_fn=input_fn or default_input_fn,
627        input_feature_key=input_feature_key,
628        use_deprecated_input_fn=use_deprecated_input_fn,
629        signature_fn=(signature_fn or
630                      export.classification_signature_fn_with_prob),
631        prediction_key=prediction_key.PredictionKey.PROBABILITIES,
632        default_batch_size=default_batch_size,
633        exports_to_keep=exports_to_keep)
634
635
636class LinearRegressor(estimator.Estimator):
637  """Linear regressor model.
638
639  THIS CLASS IS DEPRECATED. See
640  [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md)
641  for general migration instructions.
642
643  Train a linear regression model to predict label value given observation of
644  feature values.
645
646  Example:
647
648  ```python
649  sparse_column_a = sparse_column_with_hash_bucket(...)
650  sparse_column_b = sparse_column_with_hash_bucket(...)
651
652  sparse_feature_a_x_sparse_feature_b = crossed_column(...)
653
654  estimator = LinearRegressor(
655      feature_columns=[sparse_column_a, sparse_feature_a_x_sparse_feature_b])
656
657  # Input builders
658  def input_fn_train: # returns x, y
659    ...
660  def input_fn_eval: # returns x, y
661    ...
662  estimator.fit(input_fn=input_fn_train)
663  estimator.evaluate(input_fn=input_fn_eval)
664  estimator.predict(x=x)
665  ```
666
667  Input of `fit` and `evaluate` should have following features,
668    otherwise there will be a KeyError:
669
670  * if `weight_column_name` is not `None`:
671    key=weight_column_name, value=a `Tensor`
672  * for column in `feature_columns`:
673    - if isinstance(column, `SparseColumn`):
674        key=column.name, value=a `SparseTensor`
675    - if isinstance(column, `WeightedSparseColumn`):
676        {key=id column name, value=a `SparseTensor`,
677         key=weight column name, value=a `SparseTensor`}
678    - if isinstance(column, `RealValuedColumn`):
679        key=column.name, value=a `Tensor`
680  """
681
682  def __init__(self,  # _joint_weights: pylint: disable=invalid-name
683               feature_columns,
684               model_dir=None,
685               weight_column_name=None,
686               optimizer=None,
687               gradient_clip_norm=None,
688               enable_centered_bias=False,
689               label_dimension=1,
690               _joint_weights=False,
691               config=None,
692               feature_engineering_fn=None):
693    """Construct a `LinearRegressor` estimator object.
694
695    Args:
696      feature_columns: An iterable containing all the feature columns used by
697        the model. All items in the set should be instances of classes derived
698        from `FeatureColumn`.
699      model_dir: Directory to save model parameters, graph, etc. This can
700        also be used to load checkpoints from the directory into a estimator
701        to continue training a previously saved model.
702      weight_column_name: A string defining feature column name representing
703        weights. It is used to down weight or boost examples during training. It
704        will be multiplied by the loss of the example.
705      optimizer: An instance of `tf.Optimizer` used to train the model. If
706        `None`, will use an Ftrl optimizer.
707      gradient_clip_norm: A `float` > 0. If provided, gradients are clipped
708        to their global norm with this clipping ratio. See
709        `tf.clip_by_global_norm` for more details.
710      enable_centered_bias: A bool. If True, estimator will learn a centered
711        bias variable for each class. Rest of the model structure learns the
712        residual after centered bias.
713      label_dimension: Number of regression targets per example. This is the
714        size of the last dimension of the labels and logits `Tensor` objects
715        (typically, these have shape `[batch_size, label_dimension]`).
716      _joint_weights: If True use a single (possibly partitioned) variable to
717        store the weights. It's faster, but requires all feature columns are
718        sparse and have the 'sum' combiner. Incompatible with SDCAOptimizer.
719      config: `RunConfig` object to configure the runtime settings.
720      feature_engineering_fn: Feature engineering function. Takes features and
721                        labels which are the output of `input_fn` and
722                        returns features and labels which will be fed
723                        into the model.
724
725    Returns:
726      A `LinearRegressor` estimator.
727    """
728    self._feature_columns = tuple(feature_columns or [])
729    assert self._feature_columns
730
731    chief_hook = None
732    if (isinstance(optimizer, sdca_optimizer.SDCAOptimizer) and
733        enable_centered_bias):
734      enable_centered_bias = False
735      logging.warning("centered_bias is not supported with SDCA, "
736                      "please disable it explicitly.")
737    head = head_lib.regression_head(
738        weight_column_name=weight_column_name,
739        label_dimension=label_dimension,
740        enable_centered_bias=enable_centered_bias)
741    params = {
742        "head": head,
743        "feature_columns": feature_columns,
744        "optimizer": optimizer,
745    }
746
747    if isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
748      assert label_dimension == 1, "SDCA only applies for label_dimension=1."
749      assert not _joint_weights, ("_joint_weights is incompatible with"
750                                  " SDCAOptimizer.")
751
752      model_fn = sdca_model_fn
753      # The model_fn passes the model parameters to the chief_hook. We then use
754      # the hook to update weights and shrink step only on the chief.
755      chief_hook = _SdcaUpdateWeightsHook()
756      params.update({
757          "weight_column_name": weight_column_name,
758          "update_weights_hook": chief_hook,
759      })
760    else:
761      model_fn = _linear_model_fn
762      params.update({
763          "gradient_clip_norm": gradient_clip_norm,
764          "joint_weights": _joint_weights,
765      })
766
767    super(LinearRegressor, self).__init__(
768        model_fn=model_fn,
769        model_dir=model_dir,
770        config=config,
771        params=params,
772        feature_engineering_fn=feature_engineering_fn)
773
774  @deprecated_arg_values(
775      estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
776      as_iterable=False)
777  @deprecated_arg_values(
778      "2017-03-01",
779      "Please switch to predict_scores, or set `outputs` argument.",
780      outputs=None)
781  def predict(self, x=None, input_fn=None, batch_size=None, outputs=None,
782              as_iterable=True):
783    """Returns predictions for given features.
784
785    By default, returns predicted scores. But this default will be dropped
786    soon. Users should either pass `outputs`, or call `predict_scores` method.
787
788    Args:
789      x: features.
790      input_fn: Input function. If set, x must be None.
791      batch_size: Override default batch size.
792      outputs: list of `str`, name of the output to predict.
793        If `None`, returns scores.
794      as_iterable: If True, return an iterable which keeps yielding predictions
795        for each example until inputs are exhausted. Note: The inputs must
796        terminate if you want the iterable to terminate (e.g. be sure to pass
797        num_epochs=1 if you are using something like read_batch_features).
798
799    Returns:
800      Numpy array of predicted scores (or an iterable of predicted scores if
801      as_iterable is True). If `label_dimension == 1`, the shape of the output
802      is `[batch_size]`, otherwise the shape is `[batch_size, label_dimension]`.
803      If `outputs` is set, returns a dict of predictions.
804    """
805    if not outputs:
806      return self.predict_scores(
807          x=x,
808          input_fn=input_fn,
809          batch_size=batch_size,
810          as_iterable=as_iterable)
811    return super(LinearRegressor, self).predict(
812        x=x,
813        input_fn=input_fn,
814        batch_size=batch_size,
815        outputs=outputs,
816        as_iterable=as_iterable)
817
818  @deprecated_arg_values(
819      estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
820      as_iterable=False)
821  def predict_scores(self, x=None, input_fn=None, batch_size=None,
822                     as_iterable=True):
823    """Returns predicted scores for given features.
824
825    Args:
826      x: features.
827      input_fn: Input function. If set, x must be None.
828      batch_size: Override default batch size.
829      as_iterable: If True, return an iterable which keeps yielding predictions
830        for each example until inputs are exhausted. Note: The inputs must
831        terminate if you want the iterable to terminate (e.g. be sure to pass
832        num_epochs=1 if you are using something like read_batch_features).
833
834    Returns:
835      Numpy array of predicted scores (or an iterable of predicted scores if
836      as_iterable is True). If `label_dimension == 1`, the shape of the output
837      is `[batch_size]`, otherwise the shape is `[batch_size, label_dimension]`.
838    """
839    key = prediction_key.PredictionKey.SCORES
840    preds = super(LinearRegressor, self).predict(
841        x=x,
842        input_fn=input_fn,
843        batch_size=batch_size,
844        outputs=[key],
845        as_iterable=as_iterable)
846    if as_iterable:
847      return _as_iterable(preds, output=key)
848    return preds[key]
849
850  @deprecated("2017-03-25", "Please use Estimator.export_savedmodel() instead.")
851  def export(self,
852             export_dir,
853             input_fn=None,
854             input_feature_key=None,
855             use_deprecated_input_fn=True,
856             signature_fn=None,
857             default_batch_size=1,
858             exports_to_keep=None):
859    """See BaseEstimator.export."""
860    def default_input_fn(unused_estimator, examples):
861      return layers.parse_feature_columns_from_examples(
862          examples, self._feature_columns)
863
864    return super(LinearRegressor, self).export(
865        export_dir=export_dir,
866        input_fn=input_fn or default_input_fn,
867        input_feature_key=input_feature_key,
868        use_deprecated_input_fn=use_deprecated_input_fn,
869        signature_fn=(signature_fn or export.regression_signature_fn),
870        prediction_key=prediction_key.PredictionKey.SCORES,
871        default_batch_size=default_batch_size,
872        exports_to_keep=exports_to_keep)
873
874
875class LinearEstimator(estimator.Estimator):
876  """Linear model with user specified head.
877
878  THIS CLASS IS DEPRECATED. See
879  [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md)
880  for general migration instructions.
881
882  Train a generalized linear model to predict label value given observation of
883  feature values.
884
885  Example:
886  To do poisson regression,
887
888  ```python
889  sparse_column_a = sparse_column_with_hash_bucket(...)
890  sparse_column_b = sparse_column_with_hash_bucket(...)
891
892  sparse_feature_a_x_sparse_feature_b = crossed_column(...)
893
894  estimator = LinearEstimator(
895      feature_columns=[sparse_column_a, sparse_feature_a_x_sparse_feature_b],
896      head=head_lib.poisson_regression_head())
897
898  # Input builders
899  def input_fn_train: # returns x, y
900    ...
901  def input_fn_eval: # returns x, y
902    ...
903  estimator.fit(input_fn=input_fn_train)
904  estimator.evaluate(input_fn=input_fn_eval)
905  estimator.predict(x=x)
906  ```
907
908  Input of `fit` and `evaluate` should have following features,
909    otherwise there will be a KeyError:
910
911  * if `weight_column_name` is not `None`:
912    key=weight_column_name, value=a `Tensor`
913  * for column in `feature_columns`:
914    - if isinstance(column, `SparseColumn`):
915        key=column.name, value=a `SparseTensor`
916    - if isinstance(column, `WeightedSparseColumn`):
917        {key=id column name, value=a `SparseTensor`,
918         key=weight column name, value=a `SparseTensor`}
919    - if isinstance(column, `RealValuedColumn`):
920        key=column.name, value=a `Tensor`
921  """
922
923  def __init__(self,  # _joint_weights: pylint: disable=invalid-name
924               feature_columns,
925               head,
926               model_dir=None,
927               weight_column_name=None,
928               optimizer=None,
929               gradient_clip_norm=None,
930               _joint_weights=False,
931               config=None,
932               feature_engineering_fn=None):
933    """Construct a `LinearEstimator` object.
934
935    Args:
936      feature_columns: An iterable containing all the feature columns used by
937        the model. All items in the set should be instances of classes derived
938        from `FeatureColumn`.
939      head: An instance of _Head class.
940      model_dir: Directory to save model parameters, graph, etc. This can
941        also be used to load checkpoints from the directory into a estimator
942        to continue training a previously saved model.
943      weight_column_name: A string defining feature column name representing
944        weights. It is used to down weight or boost examples during training. It
945        will be multiplied by the loss of the example.
946      optimizer: An instance of `tf.Optimizer` used to train the model. If
947        `None`, will use an Ftrl optimizer.
948      gradient_clip_norm: A `float` > 0. If provided, gradients are clipped
949        to their global norm with this clipping ratio. See
950        `tf.clip_by_global_norm` for more details.
951      _joint_weights: If True use a single (possibly partitioned) variable to
952        store the weights. It's faster, but requires all feature columns are
953        sparse and have the 'sum' combiner. Incompatible with SDCAOptimizer.
954      config: `RunConfig` object to configure the runtime settings.
955      feature_engineering_fn: Feature engineering function. Takes features and
956                        labels which are the output of `input_fn` and
957                        returns features and labels which will be fed
958                        into the model.
959
960    Returns:
961      A `LinearEstimator` estimator.
962
963    Raises:
964      ValueError: if optimizer is not supported, e.g., SDCAOptimizer
965    """
966    assert feature_columns
967    if isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
968      raise ValueError("LinearEstimator does not support SDCA optimizer.")
969
970    params = {
971        "head": head,
972        "feature_columns": feature_columns,
973        "optimizer": optimizer,
974        "gradient_clip_norm": gradient_clip_norm,
975        "joint_weights": _joint_weights,
976    }
977    super(LinearEstimator, self).__init__(
978        model_fn=_linear_model_fn,
979        model_dir=model_dir,
980        config=config,
981        params=params,
982        feature_engineering_fn=feature_engineering_fn)
983