1#!/usr/bin/env python 2# Copyright 2010 Google Inc. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16"""Represents a lexographic range of namespaces.""" 17 18 19 20# pylint: disable=g-bad-name 21 22__all__ = [ 23 'NAMESPACE_CHARACTERS', 24 'MAX_NAMESPACE_LENGTH', 25 'MAX_NAMESPACE', 26 'MIN_NAMESPACE', 27 'NAMESPACE_BATCH_SIZE', 28 'NamespaceRange', 29 'get_namespace_keys', 30] 31 32import itertools 33import string 34 35from google.appengine.api import datastore 36from google.appengine.ext import db 37from google.appengine.ext.db import metadata 38 39NAMESPACE_CHARACTERS = ''.join(sorted(string.digits + 40 string.lowercase + 41 string.uppercase + 42 '._-')) 43MAX_NAMESPACE_LENGTH = 100 44MIN_NAMESPACE = '' 45NAMESPACE_BATCH_SIZE = 50 46 47 48def _setup_constants(alphabet=NAMESPACE_CHARACTERS, 49 max_length=MAX_NAMESPACE_LENGTH, 50 batch_size=NAMESPACE_BATCH_SIZE): 51 """Calculate derived constant values. Only useful for testing.""" 52 53 global NAMESPACE_CHARACTERS 54 global MAX_NAMESPACE_LENGTH 55 # pylint: disable=global-variable-undefined 56 global MAX_NAMESPACE 57 global _LEX_DISTANCE 58 global NAMESPACE_BATCH_SIZE 59 60 NAMESPACE_CHARACTERS = alphabet 61 MAX_NAMESPACE_LENGTH = max_length 62 MAX_NAMESPACE = NAMESPACE_CHARACTERS[-1] * MAX_NAMESPACE_LENGTH 63 NAMESPACE_BATCH_SIZE = batch_size 64 65 # _LEX_DISTANCE will contain the lexical distance between two adjacent 66 # characters in NAMESPACE_CHARACTERS at each character index. This is used 67 # to calculate the ordinal for each string. Example: 68 # NAMESPACE_CHARACTERS = 'ab' 69 # MAX_NAMESPACE_LENGTH = 3 70 # _LEX_DISTANCE = [1, 3, 7] 71 # '' => 0 72 # 'a' => 1 73 # 'aa' => 2 74 # 'aaa' => 3 75 # 'aab' => 4 - Distance between 'aaa' and 'aab' is 1. 76 # 'ab' => 5 - Distance between 'aa' and 'ab' is 3. 77 # 'aba' => 6 78 # 'abb' => 7 79 # 'b' => 8 - Distance between 'a' and 'b' is 7. 80 # 'ba' => 9 81 # 'baa' => 10 82 # 'bab' => 11 83 # ... 84 # _namespace_to_ord('bab') = (1 * 7 + 1) + (0 * 3 + 1) + (1 * 1 + 1) = 11 85 _LEX_DISTANCE = [1] 86 for i in range(1, MAX_NAMESPACE_LENGTH): 87 _LEX_DISTANCE.append( 88 _LEX_DISTANCE[i-1] * len(NAMESPACE_CHARACTERS) + 1) 89 # pylint: disable=undefined-loop-variable 90 del i 91_setup_constants() 92 93 94def _ord_to_namespace(n, _max_length=None): 95 """Convert a namespace ordinal to a namespace string. 96 97 Converts an int, representing the sequence number of a namespace ordered 98 lexographically, into a namespace string. 99 100 >>> _ord_to_namespace(0) 101 '' 102 >>> _ord_to_namespace(1) 103 '-' 104 >>> _ord_to_namespace(2) 105 '--' 106 >>> _ord_to_namespace(3) 107 '---' 108 109 Args: 110 n: A number representing the lexographical ordering of a namespace. 111 _max_length: The maximum namespace length. 112 Returns: 113 A string representing the nth namespace in lexographical order. 114 """ 115 if _max_length is None: 116 _max_length = MAX_NAMESPACE_LENGTH 117 118 length = _LEX_DISTANCE[_max_length - 1] 119 if n == 0: 120 return '' 121 n -= 1 122 return (NAMESPACE_CHARACTERS[n / length] + 123 _ord_to_namespace(n % length, _max_length - 1)) 124 125 126def _namespace_to_ord(namespace): 127 """Converts a namespace string into an int representing its lexographic order. 128 129 >>> _namespace_to_ord('') 130 '' 131 >>> _namespace_to_ord('_') 132 1 133 >>> _namespace_to_ord('__') 134 2 135 136 Args: 137 namespace: A namespace string. 138 139 Returns: 140 An int representing the lexographical order of the given namespace string. 141 """ 142 n = 0 143 for i, c in enumerate(namespace): 144 n += (_LEX_DISTANCE[MAX_NAMESPACE_LENGTH - i- 1] * 145 NAMESPACE_CHARACTERS.index(c) 146 + 1) 147 return n 148 149 150def _key_for_namespace(namespace, app): 151 """Return the __namespace__ key for a namespace. 152 153 Args: 154 namespace: The namespace whose key is requested. 155 app: The id of the application that the key belongs to. 156 157 Returns: 158 A db.Key representing the namespace. 159 """ 160 if namespace: 161 return db.Key.from_path(metadata.Namespace.KIND_NAME, 162 namespace, 163 _app=app) 164 else: 165 return db.Key.from_path(metadata.Namespace.KIND_NAME, 166 metadata.Namespace.EMPTY_NAMESPACE_ID, 167 _app=app) 168 169 170class NamespaceRange(object): 171 """An inclusive lexographical range of namespaces. 172 173 This class is immutable. 174 """ 175 176 def __init__(self, 177 namespace_start=None, 178 namespace_end=None, 179 _app=None): 180 # pylint: disable=g-doc-args 181 """Initializes a NamespaceRange instance. 182 183 Args: 184 namespace_start: A string representing the start of the namespace range. 185 namespace_start is included in the range. If namespace_start is None 186 then the lexographically first namespace is used. 187 namespace_end: A string representing the end of the namespace range. 188 namespace_end is included in the range and must be >= namespace_start. 189 If namespace_end is None then the lexographically last namespace is 190 used. 191 192 Raises: 193 ValueError: if namespace_start > namespace_end. 194 """ 195 if namespace_start is None: 196 namespace_start = MIN_NAMESPACE 197 198 if namespace_end is None: 199 namespace_end = MAX_NAMESPACE 200 201 if namespace_start > namespace_end: 202 raise ValueError('namespace_start (%r) > namespace_end (%r)' % ( 203 namespace_start, namespace_end)) 204 self.__namespace_start = namespace_start 205 self.__namespace_end = namespace_end 206 self.__app = _app 207 208 @property 209 def app(self): 210 return self.__app 211 212 @property 213 def namespace_start(self): 214 return self.__namespace_start 215 216 @property 217 def namespace_end(self): 218 return self.__namespace_end 219 220 @property 221 def is_single_namespace(self): 222 """True if the namespace range only includes a single namespace.""" 223 return self.namespace_start == self.namespace_end 224 225 def split_range(self): 226 """Splits the NamespaceRange into two nearly equal-sized ranges. 227 228 Returns: 229 If this NamespaceRange contains a single namespace then a list containing 230 this NamespaceRange is returned. Otherwise a two-element list containing 231 two NamespaceRanges whose total range is identical to this 232 NamespaceRange's is returned. 233 """ 234 if self.is_single_namespace: 235 return [self] 236 237 mid_point = (_namespace_to_ord(self.namespace_start) + 238 _namespace_to_ord(self.namespace_end)) // 2 239 240 return [NamespaceRange(self.namespace_start, 241 _ord_to_namespace(mid_point), 242 _app=self.app), 243 NamespaceRange(_ord_to_namespace(mid_point+1), 244 self.namespace_end, 245 _app=self.app)] 246 247 def __copy__(self): 248 return self.__class__(self.__namespace_start, 249 self.__namespace_end, 250 self.__app) 251 252 def __eq__(self, o): 253 return (self.namespace_start == o.namespace_start and 254 self.namespace_end == o.namespace_end) 255 256 def __hash__(self): 257 return hash((self.namespace_start, self.namespace_end, self.app)) 258 259 def __repr__(self): 260 if self.app is None: 261 return 'NamespaceRange(namespace_start=%r, namespace_end=%r)' % ( 262 self.namespace_start, self.namespace_end) 263 else: 264 return 'NamespaceRange(namespace_start=%r, namespace_end=%r, _app=%r)' % ( 265 self.namespace_start, self.namespace_end, self.app) 266 267 def with_start_after(self, after_namespace): 268 """Returns a copy of this NamespaceName with a new namespace_start. 269 270 Args: 271 after_namespace: A namespace string. 272 273 Returns: 274 A NamespaceRange object whose namespace_start is the lexographically next 275 namespace after the given namespace string. 276 277 Raises: 278 ValueError: if the NamespaceRange includes only a single namespace. 279 """ 280 namespace_start = _ord_to_namespace(_namespace_to_ord(after_namespace) + 1) 281 return NamespaceRange(namespace_start, self.namespace_end, _app=self.app) 282 283 def make_datastore_query(self, cursor=None): 284 """Returns a datastore.Query that generates all namespaces in the range. 285 286 Args: 287 cursor: start cursor for the query. 288 289 Returns: 290 A datastore.Query instance that generates db.Keys for each namespace in 291 the NamespaceRange. 292 """ 293 filters = {} 294 filters['__key__ >= '] = _key_for_namespace( 295 self.namespace_start, self.app) 296 filters['__key__ <= '] = _key_for_namespace( 297 self.namespace_end, self.app) 298 299 return datastore.Query('__namespace__', 300 filters=filters, 301 keys_only=True, 302 cursor=cursor, 303 _app=self.app) 304 305 def normalized_start(self): 306 """Returns a NamespaceRange with leading non-existant namespaces removed. 307 308 Returns: 309 A copy of this NamespaceRange whose namespace_start is adjusted to exclude 310 the portion of the range that contains no actual namespaces in the 311 datastore. None is returned if the NamespaceRange contains no actual 312 namespaces in the datastore. 313 """ 314 namespaces_after_key = list(self.make_datastore_query().Run(limit=1)) 315 316 if not namespaces_after_key: 317 return None 318 319 namespace_after_key = namespaces_after_key[0].name() or '' 320 return NamespaceRange(namespace_after_key, 321 self.namespace_end, 322 _app=self.app) 323 324 def to_json_object(self): 325 """Returns a dict representation that can be serialized to JSON.""" 326 obj_dict = dict(namespace_start=self.namespace_start, 327 namespace_end=self.namespace_end) 328 if self.app is not None: 329 obj_dict['app'] = self.app 330 return obj_dict 331 332 @classmethod 333 def from_json_object(cls, json): 334 """Returns a NamespaceRange from an object deserialized from JSON.""" 335 return cls(json['namespace_start'], 336 json['namespace_end'], 337 _app=json.get('app')) 338 339 # TODO(user): Implement an option where the returned namespace range is 340 # not normalized using with_start_after to support consistent namespace 341 # queries. 342 @classmethod 343 def split(cls, 344 n, 345 contiguous, 346 can_query=itertools.chain(itertools.repeat(True, 50), 347 itertools.repeat(False)).next, 348 _app=None): 349 # pylint: disable=g-doc-args 350 """Splits the complete NamespaceRange into n equally-sized NamespaceRanges. 351 352 Args: 353 n: The maximum number of NamespaceRanges to return. Fewer than n 354 namespaces may be returned. 355 contiguous: If True then the returned NamespaceRanges will cover the 356 entire space of possible namespaces (i.e. from MIN_NAMESPACE to 357 MAX_NAMESPACE) without gaps. If False then the returned 358 NamespaceRanges may exclude namespaces that don't appear in the 359 datastore. 360 can_query: A function that returns True if split() can query the datastore 361 to generate more fair namespace range splits, and False otherwise. 362 If not set then split() is allowed to make 50 datastore queries. 363 364 Returns: 365 A list of at most n NamespaceRanges representing a near-equal distribution 366 of actual existant datastore namespaces. The returned list will be sorted 367 lexographically. 368 369 Raises: 370 ValueError: if n is < 1. 371 """ 372 if n < 1: 373 raise ValueError('n must be >= 1') 374 375 ranges = None 376 if can_query(): 377 if not contiguous: 378 ns_keys = get_namespace_keys(_app, n + 1) 379 if not ns_keys: 380 return [] 381 else: 382 if len(ns_keys) <= n: 383 # If you have less actual namespaces than number of NamespaceRanges 384 # to return, then just return the list of those namespaces. 385 ns_range = [] 386 for ns_key in ns_keys: 387 ns_range.append(NamespaceRange(ns_key.name() or '', 388 ns_key.name() or '', 389 _app=_app)) 390 return sorted(ns_range, 391 key=lambda ns_range: ns_range.namespace_start) 392 # Use the first key and save the initial normalized_start() call. 393 ranges = [NamespaceRange(ns_keys[0].name() or '', _app=_app)] 394 else: 395 ns_range = NamespaceRange(_app=_app).normalized_start() 396 if ns_range is None: 397 return [NamespaceRange(_app=_app)] 398 ranges = [ns_range] 399 else: 400 ranges = [NamespaceRange(_app=_app)] 401 402 singles = [] 403 while ranges and (len(ranges) + len(singles)) < n: 404 namespace_range = ranges.pop(0) 405 if namespace_range.is_single_namespace: 406 singles.append(namespace_range) 407 else: 408 left, right = namespace_range.split_range() 409 if can_query(): 410 right = right.normalized_start() 411 if right is not None: 412 ranges.append(right) 413 ranges.append(left) 414 ns_ranges = sorted(singles + ranges, 415 key=lambda ns_range: ns_range.namespace_start) 416 417 if contiguous: 418 if not ns_ranges: 419 # This condition is possible if every namespace was deleted after the 420 # first call to ns_range.normalized_start(). 421 return [NamespaceRange(_app=_app)] 422 423 continuous_ns_ranges = [] 424 for i in range(len(ns_ranges)): 425 if i == 0: 426 namespace_start = MIN_NAMESPACE 427 else: 428 namespace_start = ns_ranges[i].namespace_start 429 430 if i == len(ns_ranges) - 1: 431 namespace_end = MAX_NAMESPACE 432 else: 433 namespace_end = _ord_to_namespace( 434 _namespace_to_ord(ns_ranges[i+1].namespace_start) - 1) 435 436 continuous_ns_ranges.append(NamespaceRange(namespace_start, 437 namespace_end, 438 _app=_app)) 439 return continuous_ns_ranges 440 else: 441 return ns_ranges 442 443 def __iter__(self): 444 """Iterate over all the namespaces within this range.""" 445 cursor = None 446 while True: 447 query = self.make_datastore_query(cursor=cursor) 448 count = 0 449 for ns_key in query.Run(limit=NAMESPACE_BATCH_SIZE): 450 count += 1 451 yield ns_key.name() or '' 452 if count < NAMESPACE_BATCH_SIZE: 453 break 454 cursor = query.GetCursor() 455 456 457def get_namespace_keys(app, limit): 458 """Get namespace keys.""" 459 ns_query = datastore.Query('__namespace__', keys_only=True, _app=app) 460 return list(ns_query.Run(limit=limit, batch_size=limit)) 461