1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 /// \brief SQLite extension for Snappy compression
17 ///
18 /// Snappy a compression library that trades ratio for speed, almost going a
19 /// tenth as fast as memcpy().
20 ///
21 /// FUNCTIONS
22 ///
23 /// - snap(value: BLOB|TEXT) -> BLOB
24 /// - snap(value: NULL|INT|REAL) -> value
25 ///
26 ///   Applies Snappy compression. If value is TEXT or BLOB, then it is
27 ///   compressed and a BLOB is returned with a byte prepended to indicate the
28 ///   original type. Other types are returned as-is.
29 ///
30 /// - unsnap(value: BLOB) -> TEXT|BLOB
31 /// - unsnap(value: TEXT) -> SQLITE_MISMATCH
32 /// - unsnap(value: NULL|INT|REAL) -> value
33 ///
34 ///   Decompresses value created by snap(). If value is empty, then an empty
35 ///   blob is returned. Otherwise the original type is restored from the first
36 ///   byte and the remaining ones are decompressed. TEXT is not allowed as an
37 ///   input type. Remaining types are returned as-is.
38 ///
39 /// PERFORMANCE CONSIDERATIONS
40 ///
41 /// These functions are deterministic. This means SQLite ≥3.8.3 will factor
42 /// them out of inner loops when constant arguments are provided. In SQLite
43 /// ≥3.15.0 they can be used in the WHERE clause of partial indexes. Currently
44 /// there is no support for common sub-expression elimination.
45 ///
46 /// SQLite environments that aren't universally UTF8 will work, but should
47 /// encounter superfluous charset transcodings; as this implementation encodes
48 /// only UTF8 TEXT for the sake of simplicity. Contributions are welcome that
49 /// register multiple sister functions for the various charsets, which use the
50 /// higher order bits of the type byte to indicate encoding.
51 ///
52 /// SUPPORT MATRIX
53 ///
54 /// - 3.20.0 (2016-05-18) What FOSS TensorFlow uses
55 /// - 3.13.0 (2016-05-18) What Google uses c. 2017-12
56 /// - 3.8.2  (2013-12-06) Used by Ubuntu 14.04
57 ///
58 /// MANUAL COMPILATION
59 ///
60 /// $ sudo apt-get install libsqlite3-dev libsnappy-dev
61 /// $ c++ -shared --std=c++11 -o libsnapfn.so -fPIC snapfn.cc -lsnappy
62 ///
63 /// $ sqlite3
64 /// sqlite> .load libsnapfn.so
65 /// sqlite> select hex(snap('aaaaaaaaaaaaaaaaa'));
66 /// 031100613E0100
67 /// sqlite> select unsnap(x'031100613E0100');
68 /// aaaaaaaaaaaaaaaaa
69 ///
70 /// $ python
71 /// >>> import sqlite3
72 /// >>> db = sqlite3.connect(':memory:')
73 /// >>> db.enable_load_extension(True)
74 /// >>> db.execute('select load_extension("libsnapfn.so")')
75 /// >>> db.enable_load_extension(False)
76 /// >>> db.execute('select hex(snap("aaaaaaaaaaaaaaaaa"))').fetchone()[0]
77 /// u'031100613E0100'
78 
79 #include "sqlite3ext.h"
80 #include "snappy.h"
81 
82 SQLITE_EXTENSION_INIT1
83 
snap(sqlite3_context * ctx,int,sqlite3_value ** argv)84 static void snap(sqlite3_context* ctx, int /*argc*/, sqlite3_value** argv) {
85   const char* data;
86   int type = sqlite3_value_type(argv[0]);
87   switch (type) {
88     case SQLITE_NULL:
89       return;
90     case SQLITE_INTEGER:
91       sqlite3_result_int64(ctx, sqlite3_value_int64(argv[0]));
92       return;
93     case SQLITE_FLOAT:
94       sqlite3_result_double(ctx, sqlite3_value_double(argv[0]));
95       return;
96     case SQLITE_BLOB:
97       data = reinterpret_cast<const char*>(sqlite3_value_blob(argv[0]));
98       break;
99     case SQLITE_TEXT:
100       data = reinterpret_cast<const char*>(sqlite3_value_text(argv[0]));
101       break;
102     default:
103       sqlite3_result_error(ctx, "snap() invalid type", -1);
104       sqlite3_result_error_code(ctx, SQLITE_MISMATCH);
105       return;
106   }
107   int size = sqlite3_value_bytes(argv[0]);
108   if (size <= 0) {
109     char result[] = {static_cast<char>(type)};
110     sqlite3_result_blob(ctx, result, sizeof(result), SQLITE_TRANSIENT);
111     return;
112   }
113   size_t output_size =
114       snappy::MaxCompressedLength(static_cast<size_t>(size)) + 1;
115   if (output_size >
116       static_cast<size_t>(sqlite3_limit(sqlite3_context_db_handle(ctx),
117                                         SQLITE_LIMIT_LENGTH, -1))) {
118     sqlite3_result_error_toobig(ctx);
119     return;
120   }
121   auto output =
122       static_cast<char*>(sqlite3_malloc(static_cast<int>(output_size)));
123   if (output == nullptr) {
124     sqlite3_result_error_nomem(ctx);
125     return;
126   }
127   *output++ = static_cast<char>(type), --output_size;
128   snappy::RawCompress(data, static_cast<size_t>(size), output, &output_size);
129   sqlite3_result_blob(ctx, output - 1, static_cast<int>(output_size + 1),
130                       sqlite3_free);
131 }
132 
unsnap(sqlite3_context * ctx,int,sqlite3_value ** argv)133 static void unsnap(sqlite3_context* ctx, int /*argc*/, sqlite3_value** argv) {
134   int type = sqlite3_value_type(argv[0]);
135   switch (type) {
136     case SQLITE_NULL:
137       return;
138     case SQLITE_INTEGER:
139       sqlite3_result_int64(ctx, sqlite3_value_int64(argv[0]));
140       return;
141     case SQLITE_FLOAT:
142       sqlite3_result_double(ctx, sqlite3_value_double(argv[0]));
143       return;
144     case SQLITE_BLOB:
145       break;
146     default:
147       sqlite3_result_error(ctx, "unsnap() invalid type", -1);
148       sqlite3_result_error_code(ctx, SQLITE_MISMATCH);
149       return;
150   }
151   int size = sqlite3_value_bytes(argv[0]);
152   auto blob = reinterpret_cast<const char*>(sqlite3_value_blob(argv[0]));
153   if (size <= 0) {
154     sqlite3_result_zeroblob(ctx, 0);
155     return;
156   }
157   type = static_cast<int>(*blob++), --size;
158   if (type != SQLITE_BLOB && type != SQLITE_TEXT) {
159     sqlite3_result_error(ctx, "unsnap() first byte is invalid type", -1);
160     sqlite3_result_error_code(ctx, SQLITE_CORRUPT);
161     return;
162   }
163   if (size == 0) {
164     if (type == SQLITE_TEXT) {
165       sqlite3_result_text(ctx, "", 0, SQLITE_STATIC);
166     } else {
167       sqlite3_result_zeroblob(ctx, 0);
168     }
169     return;
170   }
171   size_t output_size;
172   if (!snappy::GetUncompressedLength(blob, static_cast<size_t>(size),
173                                      &output_size)) {
174     sqlite3_result_error(ctx, "snappy parse error", -1);
175     sqlite3_result_error_code(ctx, SQLITE_CORRUPT);
176     return;
177   }
178   if (output_size >
179       static_cast<size_t>(sqlite3_limit(sqlite3_context_db_handle(ctx),
180                                         SQLITE_LIMIT_LENGTH, -1))) {
181     sqlite3_result_error_toobig(ctx);
182     return;
183   }
184   auto output =
185       static_cast<char*>(sqlite3_malloc(static_cast<int>(output_size)));
186   if (output == nullptr) {
187     sqlite3_result_error_nomem(ctx);
188     return;
189   }
190   if (!snappy::RawUncompress(blob, static_cast<size_t>(size), output)) {
191     sqlite3_result_error(ctx, "snappy message corruption", -1);
192     sqlite3_result_error_code(ctx, SQLITE_CORRUPT);
193     sqlite3_free(output);
194     return;
195   }
196   if (type == SQLITE_TEXT) {
197     sqlite3_result_text(ctx, output, static_cast<int>(output_size),
198                         sqlite3_free);
199   } else {
200     sqlite3_result_blob(ctx, output, static_cast<int>(output_size),
201                         sqlite3_free);
202   }
203 }
204 
205 extern "C" {
206 
207 #ifndef SQLITE_DETERMINISTIC
208 #define SQLITE_DETERMINISTIC 0
209 #endif
210 
211 #ifndef SQLITE_CALLBACK
212 #define SQLITE_CALLBACK
213 #endif
214 
sqlite3_snapfn_init(sqlite3 * db,const char **,const sqlite3_api_routines * pApi)215 SQLITE_CALLBACK int sqlite3_snapfn_init(sqlite3* db, const char** /*pzErrMsg*/,
216                                         const sqlite3_api_routines* pApi) {
217   SQLITE_EXTENSION_INIT2(pApi);
218   int rc;
219 
220   rc = sqlite3_create_function_v2(
221       db,
222       "snap",                              // zFunctionName
223       1,                                   // nArg
224       SQLITE_UTF8 | SQLITE_DETERMINISTIC,  // eTextRep
225       nullptr,                             // pApp
226       snap,                                // xFunc
227       nullptr,                             // xStep
228       nullptr,                             // xFinal
229       nullptr                              // xDestroy
230   );
231   if (rc != SQLITE_OK) {
232     return rc;
233   }
234 
235   rc = sqlite3_create_function_v2(
236       db,
237       "unsnap",                            // zFunctionName
238       1,                                   // nArg
239       SQLITE_UTF8 | SQLITE_DETERMINISTIC,  // eTextRep
240       nullptr,                             // pApp
241       unsnap,                              // xFunc
242       nullptr,                             // xStep
243       nullptr,                             // xFinal
244       nullptr                              // xDestroy
245   );
246   if (rc != SQLITE_OK) {
247     return rc;
248   }
249 
250   return SQLITE_OK;
251 }
252 
253 }  // extern "C"
254