1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Boston housing price regression dataset.
16"""
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import numpy as np
22
23from tensorflow.python.keras.utils.data_utils import get_file
24from tensorflow.python.util.tf_export import keras_export
25
26
27@keras_export('keras.datasets.boston_housing.load_data')
28def load_data(path='boston_housing.npz', test_split=0.2, seed=113):
29  """Loads the Boston Housing dataset.
30
31  Arguments:
32      path: path where to cache the dataset locally
33          (relative to ~/.keras/datasets).
34      test_split: fraction of the data to reserve as test set.
35      seed: Random seed for shuffling the data
36          before computing the test split.
37
38  Returns:
39      Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
40  """
41  assert 0 <= test_split < 1
42  origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/'
43  path = get_file(
44      path,
45      origin=origin_folder + 'boston_housing.npz',
46      file_hash=
47      'f553886a1f8d56431e820c5b82552d9d95cfcb96d1e678153f8839538947dff5')
48  with np.load(path) as f:
49    x = f['x']
50    y = f['y']
51
52  np.random.seed(seed)
53  indices = np.arange(len(x))
54  np.random.shuffle(indices)
55  x = x[indices]
56  y = y[indices]
57
58  x_train = np.array(x[:int(len(x) * (1 - test_split))])
59  y_train = np.array(y[:int(len(x) * (1 - test_split))])
60  x_test = np.array(x[int(len(x) * (1 - test_split)):])
61  y_test = np.array(y[int(len(x) * (1 - test_split)):])
62  return (x_train, y_train), (x_test, y_test)
63