1// Protocol messages for describing input data Examples for machine learning 2// model training or inference. 3syntax = "proto3"; 4 5package tensorflow; 6 7import "tensorflow/core/example/feature.proto"; 8 9option cc_enable_arenas = true; 10option java_outer_classname = "ExampleProtos"; 11option java_multiple_files = true; 12option java_package = "org.tensorflow.example"; 13option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/example/example_protos_go_proto"; 14 15// An Example is a mostly-normalized data format for storing data for 16// training and inference. It contains a key-value store (features); where 17// each key (string) maps to a Feature message (which is oneof packed BytesList, 18// FloatList, or Int64List). This flexible and compact format allows the 19// storage of large amounts of typed data, but requires that the data shape 20// and use be determined by the configuration files and parsers that are used to 21// read and write this format. That is, the Example is mostly *not* a 22// self-describing format. In TensorFlow, Examples are read in row-major 23// format, so any configuration that describes data with rank-2 or above 24// should keep this in mind. For example, to store an M x N matrix of Bytes, 25// the BytesList must contain M*N bytes, with M rows of N contiguous values 26// each. That is, the BytesList value must store the matrix as: 27// .... row 0 .... .... row 1 .... // ........... // ... row M-1 .... 28// 29// An Example for a movie recommendation application: 30// features { 31// feature { 32// key: "age" 33// value { float_list { 34// value: 29.0 35// }} 36// } 37// feature { 38// key: "movie" 39// value { bytes_list { 40// value: "The Shawshank Redemption" 41// value: "Fight Club" 42// }} 43// } 44// feature { 45// key: "movie_ratings" 46// value { float_list { 47// value: 9.0 48// value: 9.7 49// }} 50// } 51// feature { 52// key: "suggestion" 53// value { bytes_list { 54// value: "Inception" 55// }} 56// } 57// # Note that this feature exists to be used as a label in training. 58// # E.g., if training a logistic regression model to predict purchase 59// # probability in our learning tool we would set the label feature to 60// # "suggestion_purchased". 61// feature { 62// key: "suggestion_purchased" 63// value { float_list { 64// value: 1.0 65// }} 66// } 67// # Similar to "suggestion_purchased" above this feature exists to be used 68// # as a label in training. 69// # E.g., if training a linear regression model to predict purchase 70// # price in our learning tool we would set the label feature to 71// # "purchase_price". 72// feature { 73// key: "purchase_price" 74// value { float_list { 75// value: 9.99 76// }} 77// } 78// } 79// 80// A conformant Example data set obeys the following conventions: 81// - If a Feature K exists in one example with data type T, it must be of 82// type T in all other examples when present. It may be omitted. 83// - The number of instances of Feature K list data may vary across examples, 84// depending on the requirements of the model. 85// - If a Feature K doesn't exist in an example, a K-specific default will be 86// used, if configured. 87// - If a Feature K exists in an example but contains no items, the intent 88// is considered to be an empty tensor and no default will be used. 89 90message Example { 91 Features features = 1; 92} 93 94// A SequenceExample is an Example representing one or more sequences, and 95// some context. The context contains features which apply to the entire 96// example. The feature_lists contain a key, value map where each key is 97// associated with a repeated set of Features (a FeatureList). 98// A FeatureList thus represents the values of a feature identified by its key 99// over time / frames. 100// 101// Below is a SequenceExample for a movie recommendation application recording a 102// sequence of ratings by a user. The time-independent features ("locale", 103// "age", "favorites") describing the user are part of the context. The sequence 104// of movies the user rated are part of the feature_lists. For each movie in the 105// sequence we have information on its name and actors and the user's rating. 106// This information is recorded in three separate feature_list(s). 107// In the example below there are only two movies. All three feature_list(s), 108// namely "movie_ratings", "movie_names", and "actors" have a feature value for 109// both movies. Note, that "actors" is itself a bytes_list with multiple 110// strings per movie. 111// 112// context: { 113// feature: { 114// key : "locale" 115// value: { 116// bytes_list: { 117// value: [ "pt_BR" ] 118// } 119// } 120// } 121// feature: { 122// key : "age" 123// value: { 124// float_list: { 125// value: [ 19.0 ] 126// } 127// } 128// } 129// feature: { 130// key : "favorites" 131// value: { 132// bytes_list: { 133// value: [ "Majesty Rose", "Savannah Outen", "One Direction" ] 134// } 135// } 136// } 137// } 138// feature_lists: { 139// feature_list: { 140// key : "movie_ratings" 141// value: { 142// feature: { 143// float_list: { 144// value: [ 4.5 ] 145// } 146// } 147// feature: { 148// float_list: { 149// value: [ 5.0 ] 150// } 151// } 152// } 153// } 154// feature_list: { 155// key : "movie_names" 156// value: { 157// feature: { 158// bytes_list: { 159// value: [ "The Shawshank Redemption" ] 160// } 161// } 162// feature: { 163// bytes_list: { 164// value: [ "Fight Club" ] 165// } 166// } 167// } 168// } 169// feature_list: { 170// key : "actors" 171// value: { 172// feature: { 173// bytes_list: { 174// value: [ "Tim Robbins", "Morgan Freeman" ] 175// } 176// } 177// feature: { 178// bytes_list: { 179// value: [ "Brad Pitt", "Edward Norton", "Helena Bonham Carter" ] 180// } 181// } 182// } 183// } 184// } 185// 186// A conformant SequenceExample data set obeys the following conventions: 187// 188// Context: 189// - All conformant context features K must obey the same conventions as 190// a conformant Example's features (see above). 191// Feature lists: 192// - A FeatureList L may be missing in an example; it is up to the 193// parser configuration to determine if this is allowed or considered 194// an empty list (zero length). 195// - If a FeatureList L exists, it may be empty (zero length). 196// - If a FeatureList L is non-empty, all features within the FeatureList 197// must have the same data type T. Even across SequenceExamples, the type T 198// of the FeatureList identified by the same key must be the same. An entry 199// without any values may serve as an empty feature. 200// - If a FeatureList L is non-empty, it is up to the parser configuration 201// to determine if all features within the FeatureList must 202// have the same size. The same holds for this FeatureList across multiple 203// examples. 204// - For sequence modeling, e.g.: 205// http://colah.github.io/posts/2015-08-Understanding-LSTMs/ 206// https://github.com/tensorflow/nmt 207// the feature lists represent a sequence of frames. 208// In this scenario, all FeatureLists in a SequenceExample have the same 209// number of Feature messages, so that the ith element in each FeatureList 210// is part of the ith frame (or time step). 211// Examples of conformant and non-conformant examples' FeatureLists: 212// 213// Conformant FeatureLists: 214// feature_lists: { feature_list: { 215// key: "movie_ratings" 216// value: { feature: { float_list: { value: [ 4.5 ] } } 217// feature: { float_list: { value: [ 5.0 ] } } } 218// } } 219// 220// Non-conformant FeatureLists (mismatched types): 221// feature_lists: { feature_list: { 222// key: "movie_ratings" 223// value: { feature: { float_list: { value: [ 4.5 ] } } 224// feature: { int64_list: { value: [ 5 ] } } } 225// } } 226// 227// Conditionally conformant FeatureLists, the parser configuration determines 228// if the feature sizes must match: 229// feature_lists: { feature_list: { 230// key: "movie_ratings" 231// value: { feature: { float_list: { value: [ 4.5 ] } } 232// feature: { float_list: { value: [ 5.0, 6.0 ] } } } 233// } } 234// 235// Conformant pair of SequenceExample 236// feature_lists: { feature_list: { 237// key: "movie_ratings" 238// value: { feature: { float_list: { value: [ 4.5 ] } } 239// feature: { float_list: { value: [ 5.0 ] } } } 240// } } 241// and: 242// feature_lists: { feature_list: { 243// key: "movie_ratings" 244// value: { feature: { float_list: { value: [ 4.5 ] } } 245// feature: { float_list: { value: [ 5.0 ] } } 246// feature: { float_list: { value: [ 2.0 ] } } } 247// } } 248// 249// Conformant pair of SequenceExample 250// feature_lists: { feature_list: { 251// key: "movie_ratings" 252// value: { feature: { float_list: { value: [ 4.5 ] } } 253// feature: { float_list: { value: [ 5.0 ] } } } 254// } } 255// and: 256// feature_lists: { feature_list: { 257// key: "movie_ratings" 258// value: { } 259// } } 260// 261// Conditionally conformant pair of SequenceExample, the parser configuration 262// determines if the second feature_lists is consistent (zero-length) or 263// invalid (missing "movie_ratings"): 264// feature_lists: { feature_list: { 265// key: "movie_ratings" 266// value: { feature: { float_list: { value: [ 4.5 ] } } 267// feature: { float_list: { value: [ 5.0 ] } } } 268// } } 269// and: 270// feature_lists: { } 271// 272// Non-conformant pair of SequenceExample (mismatched types) 273// feature_lists: { feature_list: { 274// key: "movie_ratings" 275// value: { feature: { float_list: { value: [ 4.5 ] } } 276// feature: { float_list: { value: [ 5.0 ] } } } 277// } } 278// and: 279// feature_lists: { feature_list: { 280// key: "movie_ratings" 281// value: { feature: { int64_list: { value: [ 4 ] } } 282// feature: { int64_list: { value: [ 5 ] } } 283// feature: { int64_list: { value: [ 2 ] } } } 284// } } 285// 286// Conditionally conformant pair of SequenceExample; the parser configuration 287// determines if the feature sizes must match: 288// feature_lists: { feature_list: { 289// key: "movie_ratings" 290// value: { feature: { float_list: { value: [ 4.5 ] } } 291// feature: { float_list: { value: [ 5.0 ] } } } 292// } } 293// and: 294// feature_lists: { feature_list: { 295// key: "movie_ratings" 296// value: { feature: { float_list: { value: [ 4.0 ] } } 297// feature: { float_list: { value: [ 5.0, 3.0 ] } } 298// } } 299 300message SequenceExample { 301 Features context = 1; 302 FeatureLists feature_lists = 2; 303} 304