1// Protocol messages for describing input data Examples for machine learning
2// model training or inference.
3syntax = "proto3";
4
5package tensorflow;
6
7import "tensorflow/core/example/feature.proto";
8
9option cc_enable_arenas = true;
10option java_outer_classname = "ExampleProtos";
11option java_multiple_files = true;
12option java_package = "org.tensorflow.example";
13option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/example/example_protos_go_proto";
14
15// An Example is a mostly-normalized data format for storing data for
16// training and inference.  It contains a key-value store (features); where
17// each key (string) maps to a Feature message (which is oneof packed BytesList,
18// FloatList, or Int64List).  This flexible and compact format allows the
19// storage of large amounts of typed data, but requires that the data shape
20// and use be determined by the configuration files and parsers that are used to
21// read and write this format.  That is, the Example is mostly *not* a
22// self-describing format.  In TensorFlow, Examples are read in row-major
23// format, so any configuration that describes data with rank-2 or above
24// should keep this in mind.  For example, to store an M x N matrix of Bytes,
25// the BytesList must contain M*N bytes, with M rows of N contiguous values
26// each.  That is, the BytesList value must store the matrix as:
27//     .... row 0 .... .... row 1 .... // ...........  // ... row M-1 ....
28//
29// An Example for a movie recommendation application:
30//   features {
31//     feature {
32//       key: "age"
33//       value { float_list {
34//         value: 29.0
35//       }}
36//     }
37//     feature {
38//       key: "movie"
39//       value { bytes_list {
40//         value: "The Shawshank Redemption"
41//         value: "Fight Club"
42//       }}
43//     }
44//     feature {
45//       key: "movie_ratings"
46//       value { float_list {
47//         value: 9.0
48//         value: 9.7
49//       }}
50//     }
51//     feature {
52//       key: "suggestion"
53//       value { bytes_list {
54//         value: "Inception"
55//       }}
56//     }
57//     # Note that this feature exists to be used as a label in training.
58//     # E.g., if training a logistic regression model to predict purchase
59//     # probability in our learning tool we would set the label feature to
60//     # "suggestion_purchased".
61//     feature {
62//       key: "suggestion_purchased"
63//       value { float_list {
64//         value: 1.0
65//       }}
66//     }
67//     # Similar to "suggestion_purchased" above this feature exists to be used
68//     # as a label in training.
69//     # E.g., if training a linear regression model to predict purchase
70//     # price in our learning tool we would set the label feature to
71//     # "purchase_price".
72//     feature {
73//       key: "purchase_price"
74//       value { float_list {
75//         value: 9.99
76//       }}
77//     }
78//  }
79//
80// A conformant Example data set obeys the following conventions:
81//   - If a Feature K exists in one example with data type T, it must be of
82//       type T in all other examples when present. It may be omitted.
83//   - The number of instances of Feature K list data may vary across examples,
84//       depending on the requirements of the model.
85//   - If a Feature K doesn't exist in an example, a K-specific default will be
86//       used, if configured.
87//   - If a Feature K exists in an example but contains no items, the intent
88//       is considered to be an empty tensor and no default will be used.
89
90message Example {
91  Features features = 1;
92}
93
94// A SequenceExample is an Example representing one or more sequences, and
95// some context.  The context contains features which apply to the entire
96// example. The feature_lists contain a key, value map where each key is
97// associated with a repeated set of Features (a FeatureList).
98// A FeatureList thus represents the values of a feature identified by its key
99// over time / frames.
100//
101// Below is a SequenceExample for a movie recommendation application recording a
102// sequence of ratings by a user. The time-independent features ("locale",
103// "age", "favorites") describing the user are part of the context. The sequence
104// of movies the user rated are part of the feature_lists. For each movie in the
105// sequence we have information on its name and actors and the user's rating.
106// This information is recorded in three separate feature_list(s).
107// In the example below there are only two movies. All three feature_list(s),
108// namely "movie_ratings", "movie_names", and "actors" have a feature value for
109// both movies. Note, that "actors" is itself a bytes_list with multiple
110// strings per movie.
111//
112// context: {
113//   feature: {
114//     key  : "locale"
115//     value: {
116//       bytes_list: {
117//         value: [ "pt_BR" ]
118//       }
119//     }
120//   }
121//   feature: {
122//     key  : "age"
123//     value: {
124//       float_list: {
125//         value: [ 19.0 ]
126//       }
127//     }
128//   }
129//   feature: {
130//     key  : "favorites"
131//     value: {
132//       bytes_list: {
133//         value: [ "Majesty Rose", "Savannah Outen", "One Direction" ]
134//       }
135//     }
136//   }
137// }
138// feature_lists: {
139//   feature_list: {
140//     key  : "movie_ratings"
141//     value: {
142//       feature: {
143//         float_list: {
144//           value: [ 4.5 ]
145//         }
146//       }
147//       feature: {
148//         float_list: {
149//           value: [ 5.0 ]
150//         }
151//       }
152//     }
153//   }
154//   feature_list: {
155//     key  : "movie_names"
156//     value: {
157//       feature: {
158//         bytes_list: {
159//           value: [ "The Shawshank Redemption" ]
160//         }
161//       }
162//       feature: {
163//         bytes_list: {
164//           value: [ "Fight Club" ]
165//         }
166//       }
167//     }
168//   }
169//   feature_list: {
170//     key  : "actors"
171//     value: {
172//       feature: {
173//         bytes_list: {
174//           value: [ "Tim Robbins", "Morgan Freeman" ]
175//         }
176//       }
177//       feature: {
178//         bytes_list: {
179//           value: [ "Brad Pitt", "Edward Norton", "Helena Bonham Carter" ]
180//         }
181//       }
182//     }
183//   }
184// }
185//
186// A conformant SequenceExample data set obeys the following conventions:
187//
188// Context:
189//   - All conformant context features K must obey the same conventions as
190//     a conformant Example's features (see above).
191// Feature lists:
192//   - A FeatureList L may be missing in an example; it is up to the
193//     parser configuration to determine if this is allowed or considered
194//     an empty list (zero length).
195//   - If a FeatureList L exists, it may be empty (zero length).
196//   - If a FeatureList L is non-empty, all features within the FeatureList
197//     must have the same data type T. Even across SequenceExamples, the type T
198//     of the FeatureList identified by the same key must be the same. An entry
199//     without any values may serve as an empty feature.
200//   - If a FeatureList L is non-empty, it is up to the parser configuration
201//     to determine if all features within the FeatureList must
202//     have the same size.  The same holds for this FeatureList across multiple
203//     examples.
204//   - For sequence modeling, e.g.:
205//        http://colah.github.io/posts/2015-08-Understanding-LSTMs/
206//        https://github.com/tensorflow/nmt
207//     the feature lists represent a sequence of frames.
208//     In this scenario, all FeatureLists in a SequenceExample have the same
209//     number of Feature messages, so that the ith element in each FeatureList
210//     is part of the ith frame (or time step).
211// Examples of conformant and non-conformant examples' FeatureLists:
212//
213// Conformant FeatureLists:
214//    feature_lists: { feature_list: {
215//      key: "movie_ratings"
216//      value: { feature: { float_list: { value: [ 4.5 ] } }
217//               feature: { float_list: { value: [ 5.0 ] } } }
218//    } }
219//
220// Non-conformant FeatureLists (mismatched types):
221//    feature_lists: { feature_list: {
222//      key: "movie_ratings"
223//      value: { feature: { float_list: { value: [ 4.5 ] } }
224//               feature: { int64_list: { value: [ 5 ] } } }
225//    } }
226//
227// Conditionally conformant FeatureLists, the parser configuration determines
228// if the feature sizes must match:
229//    feature_lists: { feature_list: {
230//      key: "movie_ratings"
231//      value: { feature: { float_list: { value: [ 4.5 ] } }
232//               feature: { float_list: { value: [ 5.0, 6.0 ] } } }
233//    } }
234//
235// Conformant pair of SequenceExample
236//    feature_lists: { feature_list: {
237//      key: "movie_ratings"
238//      value: { feature: { float_list: { value: [ 4.5 ] } }
239//               feature: { float_list: { value: [ 5.0 ] } } }
240//    } }
241// and:
242//    feature_lists: { feature_list: {
243//      key: "movie_ratings"
244//      value: { feature: { float_list: { value: [ 4.5 ] } }
245//               feature: { float_list: { value: [ 5.0 ] } }
246//               feature: { float_list: { value: [ 2.0 ] } } }
247//    } }
248//
249// Conformant pair of SequenceExample
250//    feature_lists: { feature_list: {
251//      key: "movie_ratings"
252//      value: { feature: { float_list: { value: [ 4.5 ] } }
253//               feature: { float_list: { value: [ 5.0 ] } } }
254//    } }
255// and:
256//    feature_lists: { feature_list: {
257//      key: "movie_ratings"
258//      value: { }
259//    } }
260//
261// Conditionally conformant pair of SequenceExample, the parser configuration
262// determines if the second feature_lists is consistent (zero-length) or
263// invalid (missing "movie_ratings"):
264//    feature_lists: { feature_list: {
265//      key: "movie_ratings"
266//      value: { feature: { float_list: { value: [ 4.5 ] } }
267//               feature: { float_list: { value: [ 5.0 ] } } }
268//    } }
269// and:
270//    feature_lists: { }
271//
272// Non-conformant pair of SequenceExample (mismatched types)
273//    feature_lists: { feature_list: {
274//      key: "movie_ratings"
275//      value: { feature: { float_list: { value: [ 4.5 ] } }
276//               feature: { float_list: { value: [ 5.0 ] } } }
277//    } }
278// and:
279//    feature_lists: { feature_list: {
280//      key: "movie_ratings"
281//      value: { feature: { int64_list: { value: [ 4 ] } }
282//               feature: { int64_list: { value: [ 5 ] } }
283//               feature: { int64_list: { value: [ 2 ] } } }
284//    } }
285//
286// Conditionally conformant pair of SequenceExample; the parser configuration
287// determines if the feature sizes must match:
288//    feature_lists: { feature_list: {
289//      key: "movie_ratings"
290//      value: { feature: { float_list: { value: [ 4.5 ] } }
291//               feature: { float_list: { value: [ 5.0 ] } } }
292//    } }
293// and:
294//    feature_lists: { feature_list: {
295//      key: "movie_ratings"
296//      value: { feature: { float_list: { value: [ 4.0 ] } }
297//               feature: { float_list: { value: [ 5.0, 3.0 ] } }
298//    } }
299
300message SequenceExample {
301  Features context = 1;
302  FeatureLists feature_lists = 2;
303}
304