1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""CIFAR10 small images classification dataset. 16""" 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import os 22 23import numpy as np 24 25from tensorflow.python.keras import backend as K 26from tensorflow.python.keras.datasets.cifar import load_batch 27from tensorflow.python.keras.utils.data_utils import get_file 28from tensorflow.python.util.tf_export import keras_export 29 30 31@keras_export('keras.datasets.cifar10.load_data') 32def load_data(): 33 """Loads [CIFAR10 dataset](https://www.cs.toronto.edu/~kriz/cifar.html). 34 35 This is a dataset of 50,000 32x32 color training images and 10,000 test 36 images, labeled over 10 categories. See more info at the 37 [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html). 38 39 Returns: 40 Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. 41 42 **x_train, x_test**: uint8 arrays of RGB image data with shape 43 `(num_samples, 3, 32, 32)` if `tf.keras.backend.image_data_format()` is 44 `'channels_first'`, or `(num_samples, 32, 32, 3)` if the data format 45 is `'channels_last'`. 46 47 **y_train, y_test**: uint8 arrays of category labels 48 (integers in range 0-9) each with shape (num_samples, 1). 49 """ 50 dirname = 'cifar-10-batches-py' 51 origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' 52 path = get_file( 53 dirname, 54 origin=origin, 55 untar=True, 56 file_hash= 57 '6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce') 58 59 num_train_samples = 50000 60 61 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') 62 y_train = np.empty((num_train_samples,), dtype='uint8') 63 64 for i in range(1, 6): 65 fpath = os.path.join(path, 'data_batch_' + str(i)) 66 (x_train[(i - 1) * 10000:i * 10000, :, :, :], 67 y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) 68 69 fpath = os.path.join(path, 'test_batch') 70 x_test, y_test = load_batch(fpath) 71 72 y_train = np.reshape(y_train, (len(y_train), 1)) 73 y_test = np.reshape(y_test, (len(y_test), 1)) 74 75 if K.image_data_format() == 'channels_last': 76 x_train = x_train.transpose(0, 2, 3, 1) 77 x_test = x_test.transpose(0, 2, 3, 1) 78 79 x_test = x_test.astype(x_train.dtype) 80 y_test = y_test.astype(y_train.dtype) 81 82 return (x_train, y_train), (x_test, y_test) 83