1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Boston housing price regression dataset. 16""" 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import numpy as np 22 23from tensorflow.python.keras.utils.data_utils import get_file 24from tensorflow.python.util.tf_export import keras_export 25 26 27@keras_export('keras.datasets.boston_housing.load_data') 28def load_data(path='boston_housing.npz', test_split=0.2, seed=113): 29 """Loads the Boston Housing dataset. 30 31 Arguments: 32 path: path where to cache the dataset locally 33 (relative to ~/.keras/datasets). 34 test_split: fraction of the data to reserve as test set. 35 seed: Random seed for shuffling the data 36 before computing the test split. 37 38 Returns: 39 Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. 40 """ 41 assert 0 <= test_split < 1 42 origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' 43 path = get_file( 44 path, 45 origin=origin_folder + 'boston_housing.npz', 46 file_hash= 47 'f553886a1f8d56431e820c5b82552d9d95cfcb96d1e678153f8839538947dff5') 48 with np.load(path) as f: 49 x = f['x'] 50 y = f['y'] 51 52 np.random.seed(seed) 53 indices = np.arange(len(x)) 54 np.random.shuffle(indices) 55 x = x[indices] 56 y = y[indices] 57 58 x_train = np.array(x[:int(len(x) * (1 - test_split))]) 59 y_train = np.array(y[:int(len(x) * (1 - test_split))]) 60 x_test = np.array(x[int(len(x) * (1 - test_split)):]) 61 y_test = np.array(y[int(len(x) * (1 - test_split)):]) 62 return (x_train, y_train), (x_test, y_test) 63