1
2 /* set object implementation
3 Written and maintained by Raymond D. Hettinger <python@rcn.com>
4 Derived from Lib/sets.py and Objects/dictobject.c.
5
6 Copyright (c) 2003-2007 Python Software Foundation.
7 All rights reserved.
8 */
9
10 #include "Python.h"
11 #include "structmember.h"
12
13 /* Set a key error with the specified argument, wrapping it in a
14 * tuple automatically so that tuple keys are not unpacked as the
15 * exception arguments. */
16 static void
set_key_error(PyObject * arg)17 set_key_error(PyObject *arg)
18 {
19 PyObject *tup;
20 tup = PyTuple_Pack(1, arg);
21 if (!tup)
22 return; /* caller will expect error to be set anyway */
23 PyErr_SetObject(PyExc_KeyError, tup);
24 Py_DECREF(tup);
25 }
26
27 /* This must be >= 1. */
28 #define PERTURB_SHIFT 5
29
30 /* Object used as dummy key to fill deleted entries */
31 static PyObject *dummy = NULL; /* Initialized by first call to make_new_set() */
32
33 #ifdef Py_REF_DEBUG
34 PyObject *
_PySet_Dummy(void)35 _PySet_Dummy(void)
36 {
37 return dummy;
38 }
39 #endif
40
41 #define INIT_NONZERO_SET_SLOTS(so) do { \
42 (so)->table = (so)->smalltable; \
43 (so)->mask = PySet_MINSIZE - 1; \
44 (so)->hash = -1; \
45 } while(0)
46
47 #define EMPTY_TO_MINSIZE(so) do { \
48 memset((so)->smalltable, 0, sizeof((so)->smalltable)); \
49 (so)->used = (so)->fill = 0; \
50 INIT_NONZERO_SET_SLOTS(so); \
51 } while(0)
52
53 /* Reuse scheme to save calls to malloc, free, and memset */
54 #ifndef PySet_MAXFREELIST
55 #define PySet_MAXFREELIST 80
56 #endif
57 static PySetObject *free_list[PySet_MAXFREELIST];
58 static int numfree = 0;
59
60 /*
61 The basic lookup function used by all operations.
62 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
63 Open addressing is preferred over chaining since the link overhead for
64 chaining would be substantial (100% with typical malloc overhead).
65
66 The initial probe index is computed as hash mod the table size. Subsequent
67 probe indices are computed as explained in Objects/dictobject.c.
68
69 All arithmetic on hash should ignore overflow.
70
71 Unlike the dictionary implementation, the lookkey functions can return
72 NULL if the rich comparison returns an error.
73 */
74
75 static setentry *
set_lookkey(PySetObject * so,PyObject * key,register long hash)76 set_lookkey(PySetObject *so, PyObject *key, register long hash)
77 {
78 register Py_ssize_t i;
79 register size_t perturb;
80 register setentry *freeslot;
81 register size_t mask = so->mask;
82 setentry *table = so->table;
83 register setentry *entry;
84 register int cmp;
85 PyObject *startkey;
86
87 i = hash & mask;
88 entry = &table[i];
89 if (entry->key == NULL || entry->key == key)
90 return entry;
91
92 if (entry->key == dummy)
93 freeslot = entry;
94 else {
95 if (entry->hash == hash) {
96 startkey = entry->key;
97 Py_INCREF(startkey);
98 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
99 Py_DECREF(startkey);
100 if (cmp < 0)
101 return NULL;
102 if (table == so->table && entry->key == startkey) {
103 if (cmp > 0)
104 return entry;
105 }
106 else {
107 /* The compare did major nasty stuff to the
108 * set: start over.
109 */
110 return set_lookkey(so, key, hash);
111 }
112 }
113 freeslot = NULL;
114 }
115
116 /* In the loop, key == dummy is by far (factor of 100s) the
117 least likely outcome, so test for that last. */
118 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
119 i = (i << 2) + i + perturb + 1;
120 entry = &table[i & mask];
121 if (entry->key == NULL) {
122 if (freeslot != NULL)
123 entry = freeslot;
124 break;
125 }
126 if (entry->key == key)
127 break;
128 if (entry->hash == hash && entry->key != dummy) {
129 startkey = entry->key;
130 Py_INCREF(startkey);
131 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
132 Py_DECREF(startkey);
133 if (cmp < 0)
134 return NULL;
135 if (table == so->table && entry->key == startkey) {
136 if (cmp > 0)
137 break;
138 }
139 else {
140 /* The compare did major nasty stuff to the
141 * set: start over.
142 */
143 return set_lookkey(so, key, hash);
144 }
145 }
146 else if (entry->key == dummy && freeslot == NULL)
147 freeslot = entry;
148 }
149 return entry;
150 }
151
152 /*
153 * Hacked up version of set_lookkey which can assume keys are always strings;
154 * This means we can always use _PyString_Eq directly and not have to check to
155 * see if the comparison altered the table.
156 */
157 static setentry *
set_lookkey_string(PySetObject * so,PyObject * key,register long hash)158 set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
159 {
160 register Py_ssize_t i;
161 register size_t perturb;
162 register setentry *freeslot;
163 register size_t mask = so->mask;
164 setentry *table = so->table;
165 register setentry *entry;
166
167 /* Make sure this function doesn't have to handle non-string keys,
168 including subclasses of str; e.g., one reason to subclass
169 strings is to override __eq__, and for speed we don't cater to
170 that here. */
171 if (!PyString_CheckExact(key)) {
172 so->lookup = set_lookkey;
173 return set_lookkey(so, key, hash);
174 }
175 i = hash & mask;
176 entry = &table[i];
177 if (entry->key == NULL || entry->key == key)
178 return entry;
179 if (entry->key == dummy)
180 freeslot = entry;
181 else {
182 if (entry->hash == hash && _PyString_Eq(entry->key, key))
183 return entry;
184 freeslot = NULL;
185 }
186
187 /* In the loop, key == dummy is by far (factor of 100s) the
188 least likely outcome, so test for that last. */
189 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
190 i = (i << 2) + i + perturb + 1;
191 entry = &table[i & mask];
192 if (entry->key == NULL)
193 return freeslot == NULL ? entry : freeslot;
194 if (entry->key == key
195 || (entry->hash == hash
196 && entry->key != dummy
197 && _PyString_Eq(entry->key, key)))
198 return entry;
199 if (entry->key == dummy && freeslot == NULL)
200 freeslot = entry;
201 }
202 assert(0); /* NOT REACHED */
203 return 0;
204 }
205
206 /*
207 Internal routine to insert a new key into the table.
208 Used by the public insert routine.
209 Eats a reference to key.
210 */
211 static int
set_insert_key(register PySetObject * so,PyObject * key,long hash)212 set_insert_key(register PySetObject *so, PyObject *key, long hash)
213 {
214 register setentry *entry;
215 typedef setentry *(*lookupfunc)(PySetObject *, PyObject *, long);
216
217 assert(so->lookup != NULL);
218 entry = so->lookup(so, key, hash);
219 if (entry == NULL)
220 return -1;
221 if (entry->key == NULL) {
222 /* UNUSED */
223 so->fill++;
224 entry->key = key;
225 entry->hash = hash;
226 so->used++;
227 } else if (entry->key == dummy) {
228 /* DUMMY */
229 entry->key = key;
230 entry->hash = hash;
231 so->used++;
232 Py_DECREF(dummy);
233 } else {
234 /* ACTIVE */
235 Py_DECREF(key);
236 }
237 return 0;
238 }
239
240 /*
241 Internal routine used by set_table_resize() to insert an item which is
242 known to be absent from the set. This routine also assumes that
243 the set contains no deleted entries. Besides the performance benefit,
244 using set_insert_clean() in set_table_resize() is dangerous (SF bug #1456209).
245 Note that no refcounts are changed by this routine; if needed, the caller
246 is responsible for incref'ing `key`.
247 */
248 static void
set_insert_clean(register PySetObject * so,PyObject * key,long hash)249 set_insert_clean(register PySetObject *so, PyObject *key, long hash)
250 {
251 register size_t i;
252 register size_t perturb;
253 register size_t mask = (size_t)so->mask;
254 setentry *table = so->table;
255 register setentry *entry;
256
257 i = hash & mask;
258 entry = &table[i];
259 for (perturb = hash; entry->key != NULL; perturb >>= PERTURB_SHIFT) {
260 i = (i << 2) + i + perturb + 1;
261 entry = &table[i & mask];
262 }
263 so->fill++;
264 entry->key = key;
265 entry->hash = hash;
266 so->used++;
267 }
268
269 /*
270 Restructure the table by allocating a new table and reinserting all
271 keys again. When entries have been deleted, the new table may
272 actually be smaller than the old one.
273 */
274 static int
set_table_resize(PySetObject * so,Py_ssize_t minused)275 set_table_resize(PySetObject *so, Py_ssize_t minused)
276 {
277 Py_ssize_t newsize;
278 setentry *oldtable, *newtable, *entry;
279 Py_ssize_t i;
280 int is_oldtable_malloced;
281 setentry small_copy[PySet_MINSIZE];
282
283 assert(minused >= 0);
284
285 /* Find the smallest table size > minused. */
286 for (newsize = PySet_MINSIZE;
287 newsize <= minused && newsize > 0;
288 newsize <<= 1)
289 ;
290 if (newsize <= 0) {
291 PyErr_NoMemory();
292 return -1;
293 }
294
295 /* Get space for a new table. */
296 oldtable = so->table;
297 assert(oldtable != NULL);
298 is_oldtable_malloced = oldtable != so->smalltable;
299
300 if (newsize == PySet_MINSIZE) {
301 /* A large table is shrinking, or we can't get any smaller. */
302 newtable = so->smalltable;
303 if (newtable == oldtable) {
304 if (so->fill == so->used) {
305 /* No dummies, so no point doing anything. */
306 return 0;
307 }
308 /* We're not going to resize it, but rebuild the
309 table anyway to purge old dummy entries.
310 Subtle: This is *necessary* if fill==size,
311 as set_lookkey needs at least one virgin slot to
312 terminate failing searches. If fill < size, it's
313 merely desirable, as dummies slow searches. */
314 assert(so->fill > so->used);
315 memcpy(small_copy, oldtable, sizeof(small_copy));
316 oldtable = small_copy;
317 }
318 }
319 else {
320 newtable = PyMem_NEW(setentry, newsize);
321 if (newtable == NULL) {
322 PyErr_NoMemory();
323 return -1;
324 }
325 }
326
327 /* Make the set empty, using the new table. */
328 assert(newtable != oldtable);
329 so->table = newtable;
330 so->mask = newsize - 1;
331 memset(newtable, 0, sizeof(setentry) * newsize);
332 so->used = 0;
333 i = so->fill;
334 so->fill = 0;
335
336 /* Copy the data over; this is refcount-neutral for active entries;
337 dummy entries aren't copied over, of course */
338 for (entry = oldtable; i > 0; entry++) {
339 if (entry->key == NULL) {
340 /* UNUSED */
341 ;
342 } else if (entry->key == dummy) {
343 /* DUMMY */
344 --i;
345 assert(entry->key == dummy);
346 Py_DECREF(entry->key);
347 } else {
348 /* ACTIVE */
349 --i;
350 set_insert_clean(so, entry->key, entry->hash);
351 }
352 }
353
354 if (is_oldtable_malloced)
355 PyMem_DEL(oldtable);
356 return 0;
357 }
358
359 /* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
360
361 static int
set_add_entry(register PySetObject * so,setentry * entry)362 set_add_entry(register PySetObject *so, setentry *entry)
363 {
364 register Py_ssize_t n_used;
365 PyObject *key = entry->key;
366 long hash = entry->hash;
367
368 assert(so->fill <= so->mask); /* at least one empty slot */
369 n_used = so->used;
370 Py_INCREF(key);
371 if (set_insert_key(so, key, hash) == -1) {
372 Py_DECREF(key);
373 return -1;
374 }
375 if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
376 return 0;
377 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
378 }
379
380 static int
set_add_key(register PySetObject * so,PyObject * key)381 set_add_key(register PySetObject *so, PyObject *key)
382 {
383 register long hash;
384 register Py_ssize_t n_used;
385
386 if (!PyString_CheckExact(key) ||
387 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
388 hash = PyObject_Hash(key);
389 if (hash == -1)
390 return -1;
391 }
392 assert(so->fill <= so->mask); /* at least one empty slot */
393 n_used = so->used;
394 Py_INCREF(key);
395 if (set_insert_key(so, key, hash) == -1) {
396 Py_DECREF(key);
397 return -1;
398 }
399 if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
400 return 0;
401 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
402 }
403
404 #define DISCARD_NOTFOUND 0
405 #define DISCARD_FOUND 1
406
407 static int
set_discard_entry(PySetObject * so,setentry * oldentry)408 set_discard_entry(PySetObject *so, setentry *oldentry)
409 { register setentry *entry;
410 PyObject *old_key;
411
412 entry = (so->lookup)(so, oldentry->key, oldentry->hash);
413 if (entry == NULL)
414 return -1;
415 if (entry->key == NULL || entry->key == dummy)
416 return DISCARD_NOTFOUND;
417 old_key = entry->key;
418 Py_INCREF(dummy);
419 entry->key = dummy;
420 so->used--;
421 Py_DECREF(old_key);
422 return DISCARD_FOUND;
423 }
424
425 static int
set_discard_key(PySetObject * so,PyObject * key)426 set_discard_key(PySetObject *so, PyObject *key)
427 {
428 register long hash;
429 register setentry *entry;
430 PyObject *old_key;
431
432 assert (PyAnySet_Check(so));
433 if (!PyString_CheckExact(key) ||
434 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
435 hash = PyObject_Hash(key);
436 if (hash == -1)
437 return -1;
438 }
439 entry = (so->lookup)(so, key, hash);
440 if (entry == NULL)
441 return -1;
442 if (entry->key == NULL || entry->key == dummy)
443 return DISCARD_NOTFOUND;
444 old_key = entry->key;
445 Py_INCREF(dummy);
446 entry->key = dummy;
447 so->used--;
448 Py_DECREF(old_key);
449 return DISCARD_FOUND;
450 }
451
452 static int
set_clear_internal(PySetObject * so)453 set_clear_internal(PySetObject *so)
454 {
455 setentry *entry, *table;
456 int table_is_malloced;
457 Py_ssize_t fill;
458 setentry small_copy[PySet_MINSIZE];
459 #ifdef Py_DEBUG
460 Py_ssize_t i, n;
461 assert (PyAnySet_Check(so));
462
463 n = so->mask + 1;
464 i = 0;
465 #endif
466
467 table = so->table;
468 assert(table != NULL);
469 table_is_malloced = table != so->smalltable;
470
471 /* This is delicate. During the process of clearing the set,
472 * decrefs can cause the set to mutate. To avoid fatal confusion
473 * (voice of experience), we have to make the set empty before
474 * clearing the slots, and never refer to anything via so->ref while
475 * clearing.
476 */
477 fill = so->fill;
478 if (table_is_malloced)
479 EMPTY_TO_MINSIZE(so);
480
481 else if (fill > 0) {
482 /* It's a small table with something that needs to be cleared.
483 * Afraid the only safe way is to copy the set entries into
484 * another small table first.
485 */
486 memcpy(small_copy, table, sizeof(small_copy));
487 table = small_copy;
488 EMPTY_TO_MINSIZE(so);
489 }
490 /* else it's a small table that's already empty */
491
492 /* Now we can finally clear things. If C had refcounts, we could
493 * assert that the refcount on table is 1 now, i.e. that this function
494 * has unique access to it, so decref side-effects can't alter it.
495 */
496 for (entry = table; fill > 0; ++entry) {
497 #ifdef Py_DEBUG
498 assert(i < n);
499 ++i;
500 #endif
501 if (entry->key) {
502 --fill;
503 Py_DECREF(entry->key);
504 }
505 #ifdef Py_DEBUG
506 else
507 assert(entry->key == NULL);
508 #endif
509 }
510
511 if (table_is_malloced)
512 PyMem_DEL(table);
513 return 0;
514 }
515
516 /*
517 * Iterate over a set table. Use like so:
518 *
519 * Py_ssize_t pos;
520 * setentry *entry;
521 * pos = 0; # important! pos should not otherwise be changed by you
522 * while (set_next(yourset, &pos, &entry)) {
523 * Refer to borrowed reference in entry->key.
524 * }
525 *
526 * CAUTION: In general, it isn't safe to use set_next in a loop that
527 * mutates the table.
528 */
529 static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)530 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
531 {
532 Py_ssize_t i;
533 Py_ssize_t mask;
534 register setentry *table;
535
536 assert (PyAnySet_Check(so));
537 i = *pos_ptr;
538 assert(i >= 0);
539 table = so->table;
540 mask = so->mask;
541 while (i <= mask && (table[i].key == NULL || table[i].key == dummy))
542 i++;
543 *pos_ptr = i+1;
544 if (i > mask)
545 return 0;
546 assert(table[i].key != NULL);
547 *entry_ptr = &table[i];
548 return 1;
549 }
550
551 static void
set_dealloc(PySetObject * so)552 set_dealloc(PySetObject *so)
553 {
554 register setentry *entry;
555 Py_ssize_t fill = so->fill;
556 PyObject_GC_UnTrack(so);
557 Py_TRASHCAN_SAFE_BEGIN(so)
558 if (so->weakreflist != NULL)
559 PyObject_ClearWeakRefs((PyObject *) so);
560
561 for (entry = so->table; fill > 0; entry++) {
562 if (entry->key) {
563 --fill;
564 Py_DECREF(entry->key);
565 }
566 }
567 if (so->table != so->smalltable)
568 PyMem_DEL(so->table);
569 if (numfree < PySet_MAXFREELIST && PyAnySet_CheckExact(so))
570 free_list[numfree++] = so;
571 else
572 Py_TYPE(so)->tp_free(so);
573 Py_TRASHCAN_SAFE_END(so)
574 }
575
576 static int
set_tp_print(PySetObject * so,FILE * fp,int flags)577 set_tp_print(PySetObject *so, FILE *fp, int flags)
578 {
579 setentry *entry;
580 Py_ssize_t pos=0;
581 char *emit = ""; /* No separator emitted on first pass */
582 char *separator = ", ";
583 int status = Py_ReprEnter((PyObject*)so);
584
585 if (status != 0) {
586 if (status < 0)
587 return status;
588 Py_BEGIN_ALLOW_THREADS
589 fprintf(fp, "%s(...)", so->ob_type->tp_name);
590 Py_END_ALLOW_THREADS
591 return 0;
592 }
593
594 Py_BEGIN_ALLOW_THREADS
595 fprintf(fp, "%s([", so->ob_type->tp_name);
596 Py_END_ALLOW_THREADS
597 while (set_next(so, &pos, &entry)) {
598 Py_BEGIN_ALLOW_THREADS
599 fputs(emit, fp);
600 Py_END_ALLOW_THREADS
601 emit = separator;
602 if (PyObject_Print(entry->key, fp, 0) != 0) {
603 Py_ReprLeave((PyObject*)so);
604 return -1;
605 }
606 }
607 Py_BEGIN_ALLOW_THREADS
608 fputs("])", fp);
609 Py_END_ALLOW_THREADS
610 Py_ReprLeave((PyObject*)so);
611 return 0;
612 }
613
614 static PyObject *
set_repr(PySetObject * so)615 set_repr(PySetObject *so)
616 {
617 PyObject *keys, *result=NULL, *listrepr;
618 int status = Py_ReprEnter((PyObject*)so);
619
620 if (status != 0) {
621 if (status < 0)
622 return NULL;
623 return PyString_FromFormat("%s(...)", so->ob_type->tp_name);
624 }
625
626 keys = PySequence_List((PyObject *)so);
627 if (keys == NULL)
628 goto done;
629 listrepr = PyObject_Repr(keys);
630 Py_DECREF(keys);
631 if (listrepr == NULL)
632 goto done;
633
634 result = PyString_FromFormat("%s(%s)", so->ob_type->tp_name,
635 PyString_AS_STRING(listrepr));
636 Py_DECREF(listrepr);
637 done:
638 Py_ReprLeave((PyObject*)so);
639 return result;
640 }
641
642 static Py_ssize_t
set_len(PyObject * so)643 set_len(PyObject *so)
644 {
645 return ((PySetObject *)so)->used;
646 }
647
648 static int
set_merge(PySetObject * so,PyObject * otherset)649 set_merge(PySetObject *so, PyObject *otherset)
650 {
651 PySetObject *other;
652 PyObject *key;
653 long hash;
654 register Py_ssize_t i;
655 register setentry *entry;
656
657 assert (PyAnySet_Check(so));
658 assert (PyAnySet_Check(otherset));
659
660 other = (PySetObject*)otherset;
661 if (other == so || other->used == 0)
662 /* a.update(a) or a.update({}); nothing to do */
663 return 0;
664 /* Do one big resize at the start, rather than
665 * incrementally resizing as we insert new keys. Expect
666 * that there will be no (or few) overlapping keys.
667 */
668 if ((so->fill + other->used)*3 >= (so->mask+1)*2) {
669 if (set_table_resize(so, (so->used + other->used)*2) != 0)
670 return -1;
671 }
672 for (i = 0; i <= other->mask; i++) {
673 entry = &other->table[i];
674 key = entry->key;
675 hash = entry->hash;
676 if (key != NULL &&
677 key != dummy) {
678 Py_INCREF(key);
679 if (set_insert_key(so, key, hash) == -1) {
680 Py_DECREF(key);
681 return -1;
682 }
683 }
684 }
685 return 0;
686 }
687
688 static int
set_contains_key(PySetObject * so,PyObject * key)689 set_contains_key(PySetObject *so, PyObject *key)
690 {
691 long hash;
692 setentry *entry;
693
694 if (!PyString_CheckExact(key) ||
695 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
696 hash = PyObject_Hash(key);
697 if (hash == -1)
698 return -1;
699 }
700 entry = (so->lookup)(so, key, hash);
701 if (entry == NULL)
702 return -1;
703 key = entry->key;
704 return key != NULL && key != dummy;
705 }
706
707 static int
set_contains_entry(PySetObject * so,setentry * entry)708 set_contains_entry(PySetObject *so, setentry *entry)
709 {
710 PyObject *key;
711 setentry *lu_entry;
712
713 lu_entry = (so->lookup)(so, entry->key, entry->hash);
714 if (lu_entry == NULL)
715 return -1;
716 key = lu_entry->key;
717 return key != NULL && key != dummy;
718 }
719
720 static PyObject *
set_pop(PySetObject * so)721 set_pop(PySetObject *so)
722 {
723 register Py_ssize_t i = 0;
724 register setentry *entry;
725 PyObject *key;
726
727 assert (PyAnySet_Check(so));
728 if (so->used == 0) {
729 PyErr_SetString(PyExc_KeyError, "pop from an empty set");
730 return NULL;
731 }
732
733 /* Set entry to "the first" unused or dummy set entry. We abuse
734 * the hash field of slot 0 to hold a search finger:
735 * If slot 0 has a value, use slot 0.
736 * Else slot 0 is being used to hold a search finger,
737 * and we use its hash value as the first index to look.
738 */
739 entry = &so->table[0];
740 if (entry->key == NULL || entry->key == dummy) {
741 i = entry->hash;
742 /* The hash field may be a real hash value, or it may be a
743 * legit search finger, or it may be a once-legit search
744 * finger that's out of bounds now because it wrapped around
745 * or the table shrunk -- simply make sure it's in bounds now.
746 */
747 if (i > so->mask || i < 1)
748 i = 1; /* skip slot 0 */
749 while ((entry = &so->table[i])->key == NULL || entry->key==dummy) {
750 i++;
751 if (i > so->mask)
752 i = 1;
753 }
754 }
755 key = entry->key;
756 Py_INCREF(dummy);
757 entry->key = dummy;
758 so->used--;
759 so->table[0].hash = i + 1; /* next place to start */
760 return key;
761 }
762
763 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
764 Raises KeyError if the set is empty.");
765
766 static int
set_traverse(PySetObject * so,visitproc visit,void * arg)767 set_traverse(PySetObject *so, visitproc visit, void *arg)
768 {
769 Py_ssize_t pos = 0;
770 setentry *entry;
771
772 while (set_next(so, &pos, &entry))
773 Py_VISIT(entry->key);
774 return 0;
775 }
776
777 static long
frozenset_hash(PyObject * self)778 frozenset_hash(PyObject *self)
779 {
780 PySetObject *so = (PySetObject *)self;
781 long h, hash = 1927868237L;
782 setentry *entry;
783 Py_ssize_t pos = 0;
784
785 if (so->hash != -1)
786 return so->hash;
787
788 hash *= PySet_GET_SIZE(self) + 1;
789 while (set_next(so, &pos, &entry)) {
790 /* Work to increase the bit dispersion for closely spaced hash
791 values. The is important because some use cases have many
792 combinations of a small number of elements with nearby
793 hashes so that many distinct combinations collapse to only
794 a handful of distinct hash values. */
795 h = entry->hash;
796 hash ^= (h ^ (h << 16) ^ 89869747L) * 3644798167u;
797 }
798 hash = hash * 69069L + 907133923L;
799 if (hash == -1)
800 hash = 590923713L;
801 so->hash = hash;
802 return hash;
803 }
804
805 /***** Set iterator type ***********************************************/
806
807 typedef struct {
808 PyObject_HEAD
809 PySetObject *si_set; /* Set to NULL when iterator is exhausted */
810 Py_ssize_t si_used;
811 Py_ssize_t si_pos;
812 Py_ssize_t len;
813 } setiterobject;
814
815 static void
setiter_dealloc(setiterobject * si)816 setiter_dealloc(setiterobject *si)
817 {
818 Py_XDECREF(si->si_set);
819 PyObject_GC_Del(si);
820 }
821
822 static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)823 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
824 {
825 Py_VISIT(si->si_set);
826 return 0;
827 }
828
829 static PyObject *
setiter_len(setiterobject * si)830 setiter_len(setiterobject *si)
831 {
832 Py_ssize_t len = 0;
833 if (si->si_set != NULL && si->si_used == si->si_set->used)
834 len = si->len;
835 return PyInt_FromLong(len);
836 }
837
838 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
839
840 static PyMethodDef setiter_methods[] = {
841 {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
842 {NULL, NULL} /* sentinel */
843 };
844
setiter_iternext(setiterobject * si)845 static PyObject *setiter_iternext(setiterobject *si)
846 {
847 PyObject *key;
848 register Py_ssize_t i, mask;
849 register setentry *entry;
850 PySetObject *so = si->si_set;
851
852 if (so == NULL)
853 return NULL;
854 assert (PyAnySet_Check(so));
855
856 if (si->si_used != so->used) {
857 PyErr_SetString(PyExc_RuntimeError,
858 "Set changed size during iteration");
859 si->si_used = -1; /* Make this state sticky */
860 return NULL;
861 }
862
863 i = si->si_pos;
864 assert(i>=0);
865 entry = so->table;
866 mask = so->mask;
867 while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
868 i++;
869 si->si_pos = i+1;
870 if (i > mask)
871 goto fail;
872 si->len--;
873 key = entry[i].key;
874 Py_INCREF(key);
875 return key;
876
877 fail:
878 Py_DECREF(so);
879 si->si_set = NULL;
880 return NULL;
881 }
882
883 static PyTypeObject PySetIter_Type = {
884 PyVarObject_HEAD_INIT(&PyType_Type, 0)
885 "setiterator", /* tp_name */
886 sizeof(setiterobject), /* tp_basicsize */
887 0, /* tp_itemsize */
888 /* methods */
889 (destructor)setiter_dealloc, /* tp_dealloc */
890 0, /* tp_print */
891 0, /* tp_getattr */
892 0, /* tp_setattr */
893 0, /* tp_compare */
894 0, /* tp_repr */
895 0, /* tp_as_number */
896 0, /* tp_as_sequence */
897 0, /* tp_as_mapping */
898 0, /* tp_hash */
899 0, /* tp_call */
900 0, /* tp_str */
901 PyObject_GenericGetAttr, /* tp_getattro */
902 0, /* tp_setattro */
903 0, /* tp_as_buffer */
904 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
905 0, /* tp_doc */
906 (traverseproc)setiter_traverse, /* tp_traverse */
907 0, /* tp_clear */
908 0, /* tp_richcompare */
909 0, /* tp_weaklistoffset */
910 PyObject_SelfIter, /* tp_iter */
911 (iternextfunc)setiter_iternext, /* tp_iternext */
912 setiter_methods, /* tp_methods */
913 0,
914 };
915
916 static PyObject *
set_iter(PySetObject * so)917 set_iter(PySetObject *so)
918 {
919 setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
920 if (si == NULL)
921 return NULL;
922 Py_INCREF(so);
923 si->si_set = so;
924 si->si_used = so->used;
925 si->si_pos = 0;
926 si->len = so->used;
927 _PyObject_GC_TRACK(si);
928 return (PyObject *)si;
929 }
930
931 static int
set_update_internal(PySetObject * so,PyObject * other)932 set_update_internal(PySetObject *so, PyObject *other)
933 {
934 PyObject *key, *it;
935
936 if (PyAnySet_Check(other))
937 return set_merge(so, other);
938
939 if (PyDict_CheckExact(other)) {
940 PyObject *value;
941 Py_ssize_t pos = 0;
942 long hash;
943 Py_ssize_t dictsize = PyDict_Size(other);
944
945 /* Do one big resize at the start, rather than
946 * incrementally resizing as we insert new keys. Expect
947 * that there will be no (or few) overlapping keys.
948 */
949 if (dictsize == -1)
950 return -1;
951 if ((so->fill + dictsize)*3 >= (so->mask+1)*2) {
952 if (set_table_resize(so, (so->used + dictsize)*2) != 0)
953 return -1;
954 }
955 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
956 setentry an_entry;
957
958 an_entry.hash = hash;
959 an_entry.key = key;
960 if (set_add_entry(so, &an_entry) == -1)
961 return -1;
962 }
963 return 0;
964 }
965
966 it = PyObject_GetIter(other);
967 if (it == NULL)
968 return -1;
969
970 while ((key = PyIter_Next(it)) != NULL) {
971 if (set_add_key(so, key) == -1) {
972 Py_DECREF(it);
973 Py_DECREF(key);
974 return -1;
975 }
976 Py_DECREF(key);
977 }
978 Py_DECREF(it);
979 if (PyErr_Occurred())
980 return -1;
981 return 0;
982 }
983
984 static PyObject *
set_update(PySetObject * so,PyObject * args)985 set_update(PySetObject *so, PyObject *args)
986 {
987 Py_ssize_t i;
988
989 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
990 PyObject *other = PyTuple_GET_ITEM(args, i);
991 if (set_update_internal(so, other) == -1)
992 return NULL;
993 }
994 Py_RETURN_NONE;
995 }
996
997 PyDoc_STRVAR(update_doc,
998 "Update a set with the union of itself and others.");
999
1000 static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)1001 make_new_set(PyTypeObject *type, PyObject *iterable)
1002 {
1003 register PySetObject *so = NULL;
1004
1005 if (dummy == NULL) { /* Auto-initialize dummy */
1006 dummy = PyString_FromString("<dummy key>");
1007 if (dummy == NULL)
1008 return NULL;
1009 }
1010
1011 /* create PySetObject structure */
1012 if (numfree &&
1013 (type == &PySet_Type || type == &PyFrozenSet_Type)) {
1014 so = free_list[--numfree];
1015 assert (so != NULL && PyAnySet_CheckExact(so));
1016 Py_TYPE(so) = type;
1017 _Py_NewReference((PyObject *)so);
1018 EMPTY_TO_MINSIZE(so);
1019 PyObject_GC_Track(so);
1020 } else {
1021 so = (PySetObject *)type->tp_alloc(type, 0);
1022 if (so == NULL)
1023 return NULL;
1024 /* tp_alloc has already zeroed the structure */
1025 assert(so->table == NULL && so->fill == 0 && so->used == 0);
1026 INIT_NONZERO_SET_SLOTS(so);
1027 }
1028
1029 so->lookup = set_lookkey_string;
1030 so->weakreflist = NULL;
1031
1032 if (iterable != NULL) {
1033 if (set_update_internal(so, iterable) == -1) {
1034 Py_DECREF(so);
1035 return NULL;
1036 }
1037 }
1038
1039 return (PyObject *)so;
1040 }
1041
1042 /* The empty frozenset is a singleton */
1043 static PyObject *emptyfrozenset = NULL;
1044
1045 static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1046 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1047 {
1048 PyObject *iterable = NULL, *result;
1049
1050 if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset()", kwds))
1051 return NULL;
1052
1053 if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
1054 return NULL;
1055
1056 if (type != &PyFrozenSet_Type)
1057 return make_new_set(type, iterable);
1058
1059 if (iterable != NULL) {
1060 /* frozenset(f) is idempotent */
1061 if (PyFrozenSet_CheckExact(iterable)) {
1062 Py_INCREF(iterable);
1063 return iterable;
1064 }
1065 result = make_new_set(type, iterable);
1066 if (result == NULL || PySet_GET_SIZE(result))
1067 return result;
1068 Py_DECREF(result);
1069 }
1070 /* The empty frozenset is a singleton */
1071 if (emptyfrozenset == NULL)
1072 emptyfrozenset = make_new_set(type, NULL);
1073 Py_XINCREF(emptyfrozenset);
1074 return emptyfrozenset;
1075 }
1076
1077 void
PySet_Fini(void)1078 PySet_Fini(void)
1079 {
1080 PySetObject *so;
1081
1082 while (numfree) {
1083 numfree--;
1084 so = free_list[numfree];
1085 PyObject_GC_Del(so);
1086 }
1087 Py_CLEAR(dummy);
1088 Py_CLEAR(emptyfrozenset);
1089 }
1090
1091 static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1092 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1093 {
1094 if (type == &PySet_Type && !_PyArg_NoKeywords("set()", kwds))
1095 return NULL;
1096
1097 return make_new_set(type, NULL);
1098 }
1099
1100 /* set_swap_bodies() switches the contents of any two sets by moving their
1101 internal data pointers and, if needed, copying the internal smalltables.
1102 Semantically equivalent to:
1103
1104 t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1105
1106 The function always succeeds and it leaves both objects in a stable state.
1107 Useful for creating temporary frozensets from sets for membership testing
1108 in __contains__(), discard(), and remove(). Also useful for operations
1109 that update in-place (by allowing an intermediate result to be swapped
1110 into one of the original inputs).
1111 */
1112
1113 static void
set_swap_bodies(PySetObject * a,PySetObject * b)1114 set_swap_bodies(PySetObject *a, PySetObject *b)
1115 {
1116 Py_ssize_t t;
1117 setentry *u;
1118 setentry *(*f)(PySetObject *so, PyObject *key, long hash);
1119 setentry tab[PySet_MINSIZE];
1120 long h;
1121
1122 t = a->fill; a->fill = b->fill; b->fill = t;
1123 t = a->used; a->used = b->used; b->used = t;
1124 t = a->mask; a->mask = b->mask; b->mask = t;
1125
1126 u = a->table;
1127 if (a->table == a->smalltable)
1128 u = b->smalltable;
1129 a->table = b->table;
1130 if (b->table == b->smalltable)
1131 a->table = a->smalltable;
1132 b->table = u;
1133
1134 f = a->lookup; a->lookup = b->lookup; b->lookup = f;
1135
1136 if (a->table == a->smalltable || b->table == b->smalltable) {
1137 memcpy(tab, a->smalltable, sizeof(tab));
1138 memcpy(a->smalltable, b->smalltable, sizeof(tab));
1139 memcpy(b->smalltable, tab, sizeof(tab));
1140 }
1141
1142 if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type) &&
1143 PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1144 h = a->hash; a->hash = b->hash; b->hash = h;
1145 } else {
1146 a->hash = -1;
1147 b->hash = -1;
1148 }
1149 }
1150
1151 static PyObject *
set_copy(PySetObject * so)1152 set_copy(PySetObject *so)
1153 {
1154 return make_new_set(Py_TYPE(so), (PyObject *)so);
1155 }
1156
1157 static PyObject *
frozenset_copy(PySetObject * so)1158 frozenset_copy(PySetObject *so)
1159 {
1160 if (PyFrozenSet_CheckExact(so)) {
1161 Py_INCREF(so);
1162 return (PyObject *)so;
1163 }
1164 return set_copy(so);
1165 }
1166
1167 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1168
1169 static PyObject *
set_clear(PySetObject * so)1170 set_clear(PySetObject *so)
1171 {
1172 set_clear_internal(so);
1173 Py_RETURN_NONE;
1174 }
1175
1176 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1177
1178 static PyObject *
set_union(PySetObject * so,PyObject * args)1179 set_union(PySetObject *so, PyObject *args)
1180 {
1181 PySetObject *result;
1182 PyObject *other;
1183 Py_ssize_t i;
1184
1185 result = (PySetObject *)set_copy(so);
1186 if (result == NULL)
1187 return NULL;
1188
1189 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1190 other = PyTuple_GET_ITEM(args, i);
1191 if ((PyObject *)so == other)
1192 continue;
1193 if (set_update_internal(result, other) == -1) {
1194 Py_DECREF(result);
1195 return NULL;
1196 }
1197 }
1198 return (PyObject *)result;
1199 }
1200
1201 PyDoc_STRVAR(union_doc,
1202 "Return the union of sets as a new set.\n\
1203 \n\
1204 (i.e. all elements that are in either set.)");
1205
1206 static PyObject *
set_or(PySetObject * so,PyObject * other)1207 set_or(PySetObject *so, PyObject *other)
1208 {
1209 PySetObject *result;
1210
1211 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1212 Py_INCREF(Py_NotImplemented);
1213 return Py_NotImplemented;
1214 }
1215
1216 result = (PySetObject *)set_copy(so);
1217 if (result == NULL)
1218 return NULL;
1219 if ((PyObject *)so == other)
1220 return (PyObject *)result;
1221 if (set_update_internal(result, other) == -1) {
1222 Py_DECREF(result);
1223 return NULL;
1224 }
1225 return (PyObject *)result;
1226 }
1227
1228 static PyObject *
set_ior(PySetObject * so,PyObject * other)1229 set_ior(PySetObject *so, PyObject *other)
1230 {
1231 if (!PyAnySet_Check(other)) {
1232 Py_INCREF(Py_NotImplemented);
1233 return Py_NotImplemented;
1234 }
1235 if (set_update_internal(so, other) == -1)
1236 return NULL;
1237 Py_INCREF(so);
1238 return (PyObject *)so;
1239 }
1240
1241 static PyObject *
set_intersection(PySetObject * so,PyObject * other)1242 set_intersection(PySetObject *so, PyObject *other)
1243 {
1244 PySetObject *result;
1245 PyObject *key, *it, *tmp;
1246
1247 if ((PyObject *)so == other)
1248 return set_copy(so);
1249
1250 result = (PySetObject *)make_new_set(Py_TYPE(so), NULL);
1251 if (result == NULL)
1252 return NULL;
1253
1254 if (PyAnySet_Check(other)) {
1255 Py_ssize_t pos = 0;
1256 setentry *entry;
1257
1258 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1259 tmp = (PyObject *)so;
1260 so = (PySetObject *)other;
1261 other = tmp;
1262 }
1263
1264 while (set_next((PySetObject *)other, &pos, &entry)) {
1265 int rv = set_contains_entry(so, entry);
1266 if (rv == -1) {
1267 Py_DECREF(result);
1268 return NULL;
1269 }
1270 if (rv) {
1271 if (set_add_entry(result, entry) == -1) {
1272 Py_DECREF(result);
1273 return NULL;
1274 }
1275 }
1276 }
1277 return (PyObject *)result;
1278 }
1279
1280 it = PyObject_GetIter(other);
1281 if (it == NULL) {
1282 Py_DECREF(result);
1283 return NULL;
1284 }
1285
1286 while ((key = PyIter_Next(it)) != NULL) {
1287 int rv;
1288 setentry entry;
1289 long hash = PyObject_Hash(key);
1290
1291 if (hash == -1) {
1292 Py_DECREF(it);
1293 Py_DECREF(result);
1294 Py_DECREF(key);
1295 return NULL;
1296 }
1297 entry.hash = hash;
1298 entry.key = key;
1299 rv = set_contains_entry(so, &entry);
1300 if (rv == -1) {
1301 Py_DECREF(it);
1302 Py_DECREF(result);
1303 Py_DECREF(key);
1304 return NULL;
1305 }
1306 if (rv) {
1307 if (set_add_entry(result, &entry) == -1) {
1308 Py_DECREF(it);
1309 Py_DECREF(result);
1310 Py_DECREF(key);
1311 return NULL;
1312 }
1313 }
1314 Py_DECREF(key);
1315 }
1316 Py_DECREF(it);
1317 if (PyErr_Occurred()) {
1318 Py_DECREF(result);
1319 return NULL;
1320 }
1321 return (PyObject *)result;
1322 }
1323
1324 static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1325 set_intersection_multi(PySetObject *so, PyObject *args)
1326 {
1327 Py_ssize_t i;
1328 PyObject *result = (PyObject *)so;
1329
1330 if (PyTuple_GET_SIZE(args) == 0)
1331 return set_copy(so);
1332
1333 Py_INCREF(so);
1334 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1335 PyObject *other = PyTuple_GET_ITEM(args, i);
1336 PyObject *newresult = set_intersection((PySetObject *)result, other);
1337 if (newresult == NULL) {
1338 Py_DECREF(result);
1339 return NULL;
1340 }
1341 Py_DECREF(result);
1342 result = newresult;
1343 }
1344 return result;
1345 }
1346
1347 PyDoc_STRVAR(intersection_doc,
1348 "Return the intersection of two or more sets as a new set.\n\
1349 \n\
1350 (i.e. elements that are common to all of the sets.)");
1351
1352 static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1353 set_intersection_update(PySetObject *so, PyObject *other)
1354 {
1355 PyObject *tmp;
1356
1357 tmp = set_intersection(so, other);
1358 if (tmp == NULL)
1359 return NULL;
1360 set_swap_bodies(so, (PySetObject *)tmp);
1361 Py_DECREF(tmp);
1362 Py_RETURN_NONE;
1363 }
1364
1365 static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1366 set_intersection_update_multi(PySetObject *so, PyObject *args)
1367 {
1368 PyObject *tmp;
1369
1370 tmp = set_intersection_multi(so, args);
1371 if (tmp == NULL)
1372 return NULL;
1373 set_swap_bodies(so, (PySetObject *)tmp);
1374 Py_DECREF(tmp);
1375 Py_RETURN_NONE;
1376 }
1377
1378 PyDoc_STRVAR(intersection_update_doc,
1379 "Update a set with the intersection of itself and another.");
1380
1381 static PyObject *
set_and(PySetObject * so,PyObject * other)1382 set_and(PySetObject *so, PyObject *other)
1383 {
1384 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1385 Py_INCREF(Py_NotImplemented);
1386 return Py_NotImplemented;
1387 }
1388 return set_intersection(so, other);
1389 }
1390
1391 static PyObject *
set_iand(PySetObject * so,PyObject * other)1392 set_iand(PySetObject *so, PyObject *other)
1393 {
1394 PyObject *result;
1395
1396 if (!PyAnySet_Check(other)) {
1397 Py_INCREF(Py_NotImplemented);
1398 return Py_NotImplemented;
1399 }
1400 result = set_intersection_update(so, other);
1401 if (result == NULL)
1402 return NULL;
1403 Py_DECREF(result);
1404 Py_INCREF(so);
1405 return (PyObject *)so;
1406 }
1407
1408 static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1409 set_isdisjoint(PySetObject *so, PyObject *other)
1410 {
1411 PyObject *key, *it, *tmp;
1412
1413 if ((PyObject *)so == other) {
1414 if (PySet_GET_SIZE(so) == 0)
1415 Py_RETURN_TRUE;
1416 else
1417 Py_RETURN_FALSE;
1418 }
1419
1420 if (PyAnySet_CheckExact(other)) {
1421 Py_ssize_t pos = 0;
1422 setentry *entry;
1423
1424 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1425 tmp = (PyObject *)so;
1426 so = (PySetObject *)other;
1427 other = tmp;
1428 }
1429 while (set_next((PySetObject *)other, &pos, &entry)) {
1430 int rv = set_contains_entry(so, entry);
1431 if (rv == -1)
1432 return NULL;
1433 if (rv)
1434 Py_RETURN_FALSE;
1435 }
1436 Py_RETURN_TRUE;
1437 }
1438
1439 it = PyObject_GetIter(other);
1440 if (it == NULL)
1441 return NULL;
1442
1443 while ((key = PyIter_Next(it)) != NULL) {
1444 int rv;
1445 setentry entry;
1446 long hash = PyObject_Hash(key);
1447
1448 if (hash == -1) {
1449 Py_DECREF(key);
1450 Py_DECREF(it);
1451 return NULL;
1452 }
1453 entry.hash = hash;
1454 entry.key = key;
1455 rv = set_contains_entry(so, &entry);
1456 Py_DECREF(key);
1457 if (rv == -1) {
1458 Py_DECREF(it);
1459 return NULL;
1460 }
1461 if (rv) {
1462 Py_DECREF(it);
1463 Py_RETURN_FALSE;
1464 }
1465 }
1466 Py_DECREF(it);
1467 if (PyErr_Occurred())
1468 return NULL;
1469 Py_RETURN_TRUE;
1470 }
1471
1472 PyDoc_STRVAR(isdisjoint_doc,
1473 "Return True if two sets have a null intersection.");
1474
1475 static int
set_difference_update_internal(PySetObject * so,PyObject * other)1476 set_difference_update_internal(PySetObject *so, PyObject *other)
1477 {
1478 if ((PyObject *)so == other)
1479 return set_clear_internal(so);
1480
1481 if (PyAnySet_Check(other)) {
1482 setentry *entry;
1483 Py_ssize_t pos = 0;
1484
1485 while (set_next((PySetObject *)other, &pos, &entry))
1486 if (set_discard_entry(so, entry) == -1)
1487 return -1;
1488 } else {
1489 PyObject *key, *it;
1490 it = PyObject_GetIter(other);
1491 if (it == NULL)
1492 return -1;
1493
1494 while ((key = PyIter_Next(it)) != NULL) {
1495 if (set_discard_key(so, key) == -1) {
1496 Py_DECREF(it);
1497 Py_DECREF(key);
1498 return -1;
1499 }
1500 Py_DECREF(key);
1501 }
1502 Py_DECREF(it);
1503 if (PyErr_Occurred())
1504 return -1;
1505 }
1506 /* If more than 1/5 are dummies, then resize them away. */
1507 if ((so->fill - so->used) * 5 < so->mask)
1508 return 0;
1509 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1510 }
1511
1512 static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1513 set_difference_update(PySetObject *so, PyObject *args)
1514 {
1515 Py_ssize_t i;
1516
1517 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1518 PyObject *other = PyTuple_GET_ITEM(args, i);
1519 if (set_difference_update_internal(so, other) == -1)
1520 return NULL;
1521 }
1522 Py_RETURN_NONE;
1523 }
1524
1525 PyDoc_STRVAR(difference_update_doc,
1526 "Remove all elements of another set from this set.");
1527
1528 static PyObject *
set_difference(PySetObject * so,PyObject * other)1529 set_difference(PySetObject *so, PyObject *other)
1530 {
1531 PyObject *result;
1532 setentry *entry;
1533 Py_ssize_t pos = 0;
1534
1535 if (!PyAnySet_Check(other) && !PyDict_CheckExact(other)) {
1536 result = set_copy(so);
1537 if (result == NULL)
1538 return NULL;
1539 if (set_difference_update_internal((PySetObject *)result, other) != -1)
1540 return result;
1541 Py_DECREF(result);
1542 return NULL;
1543 }
1544
1545 result = make_new_set(Py_TYPE(so), NULL);
1546 if (result == NULL)
1547 return NULL;
1548
1549 if (PyDict_CheckExact(other)) {
1550 while (set_next(so, &pos, &entry)) {
1551 setentry entrycopy;
1552 entrycopy.hash = entry->hash;
1553 entrycopy.key = entry->key;
1554 if (!_PyDict_Contains(other, entry->key, entry->hash)) {
1555 if (set_add_entry((PySetObject *)result, &entrycopy) == -1) {
1556 Py_DECREF(result);
1557 return NULL;
1558 }
1559 }
1560 }
1561 return result;
1562 }
1563
1564 while (set_next(so, &pos, &entry)) {
1565 int rv = set_contains_entry((PySetObject *)other, entry);
1566 if (rv == -1) {
1567 Py_DECREF(result);
1568 return NULL;
1569 }
1570 if (!rv) {
1571 if (set_add_entry((PySetObject *)result, entry) == -1) {
1572 Py_DECREF(result);
1573 return NULL;
1574 }
1575 }
1576 }
1577 return result;
1578 }
1579
1580 static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1581 set_difference_multi(PySetObject *so, PyObject *args)
1582 {
1583 Py_ssize_t i;
1584 PyObject *result, *other;
1585
1586 if (PyTuple_GET_SIZE(args) == 0)
1587 return set_copy(so);
1588
1589 other = PyTuple_GET_ITEM(args, 0);
1590 result = set_difference(so, other);
1591 if (result == NULL)
1592 return NULL;
1593
1594 for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1595 other = PyTuple_GET_ITEM(args, i);
1596 if (set_difference_update_internal((PySetObject *)result, other) == -1) {
1597 Py_DECREF(result);
1598 return NULL;
1599 }
1600 }
1601 return result;
1602 }
1603
1604 PyDoc_STRVAR(difference_doc,
1605 "Return the difference of two or more sets as a new set.\n\
1606 \n\
1607 (i.e. all elements that are in this set but not the others.)");
1608 static PyObject *
set_sub(PySetObject * so,PyObject * other)1609 set_sub(PySetObject *so, PyObject *other)
1610 {
1611 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1612 Py_INCREF(Py_NotImplemented);
1613 return Py_NotImplemented;
1614 }
1615 return set_difference(so, other);
1616 }
1617
1618 static PyObject *
set_isub(PySetObject * so,PyObject * other)1619 set_isub(PySetObject *so, PyObject *other)
1620 {
1621 if (!PyAnySet_Check(other)) {
1622 Py_INCREF(Py_NotImplemented);
1623 return Py_NotImplemented;
1624 }
1625 if (set_difference_update_internal(so, other) == -1)
1626 return NULL;
1627 Py_INCREF(so);
1628 return (PyObject *)so;
1629 }
1630
1631 static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1632 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1633 {
1634 PySetObject *otherset;
1635 PyObject *key;
1636 Py_ssize_t pos = 0;
1637 setentry *entry;
1638
1639 if ((PyObject *)so == other)
1640 return set_clear(so);
1641
1642 if (PyDict_CheckExact(other)) {
1643 PyObject *value;
1644 int rv;
1645 long hash;
1646 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1647 setentry an_entry;
1648
1649 Py_INCREF(key);
1650 an_entry.hash = hash;
1651 an_entry.key = key;
1652
1653 rv = set_discard_entry(so, &an_entry);
1654 if (rv == -1) {
1655 Py_DECREF(key);
1656 return NULL;
1657 }
1658 if (rv == DISCARD_NOTFOUND) {
1659 if (set_add_entry(so, &an_entry) == -1) {
1660 Py_DECREF(key);
1661 return NULL;
1662 }
1663 }
1664 Py_DECREF(key);
1665 }
1666 Py_RETURN_NONE;
1667 }
1668
1669 if (PyAnySet_Check(other)) {
1670 Py_INCREF(other);
1671 otherset = (PySetObject *)other;
1672 } else {
1673 otherset = (PySetObject *)make_new_set(Py_TYPE(so), other);
1674 if (otherset == NULL)
1675 return NULL;
1676 }
1677
1678 while (set_next(otherset, &pos, &entry)) {
1679 int rv = set_discard_entry(so, entry);
1680 if (rv == -1) {
1681 Py_DECREF(otherset);
1682 return NULL;
1683 }
1684 if (rv == DISCARD_NOTFOUND) {
1685 if (set_add_entry(so, entry) == -1) {
1686 Py_DECREF(otherset);
1687 return NULL;
1688 }
1689 }
1690 }
1691 Py_DECREF(otherset);
1692 Py_RETURN_NONE;
1693 }
1694
1695 PyDoc_STRVAR(symmetric_difference_update_doc,
1696 "Update a set with the symmetric difference of itself and another.");
1697
1698 static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1699 set_symmetric_difference(PySetObject *so, PyObject *other)
1700 {
1701 PyObject *rv;
1702 PySetObject *otherset;
1703
1704 otherset = (PySetObject *)make_new_set(Py_TYPE(so), other);
1705 if (otherset == NULL)
1706 return NULL;
1707 rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1708 if (rv == NULL)
1709 return NULL;
1710 Py_DECREF(rv);
1711 return (PyObject *)otherset;
1712 }
1713
1714 PyDoc_STRVAR(symmetric_difference_doc,
1715 "Return the symmetric difference of two sets as a new set.\n\
1716 \n\
1717 (i.e. all elements that are in exactly one of the sets.)");
1718
1719 static PyObject *
set_xor(PySetObject * so,PyObject * other)1720 set_xor(PySetObject *so, PyObject *other)
1721 {
1722 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1723 Py_INCREF(Py_NotImplemented);
1724 return Py_NotImplemented;
1725 }
1726 return set_symmetric_difference(so, other);
1727 }
1728
1729 static PyObject *
set_ixor(PySetObject * so,PyObject * other)1730 set_ixor(PySetObject *so, PyObject *other)
1731 {
1732 PyObject *result;
1733
1734 if (!PyAnySet_Check(other)) {
1735 Py_INCREF(Py_NotImplemented);
1736 return Py_NotImplemented;
1737 }
1738 result = set_symmetric_difference_update(so, other);
1739 if (result == NULL)
1740 return NULL;
1741 Py_DECREF(result);
1742 Py_INCREF(so);
1743 return (PyObject *)so;
1744 }
1745
1746 static PyObject *
set_issubset(PySetObject * so,PyObject * other)1747 set_issubset(PySetObject *so, PyObject *other)
1748 {
1749 setentry *entry;
1750 Py_ssize_t pos = 0;
1751
1752 if (!PyAnySet_Check(other)) {
1753 PyObject *tmp, *result;
1754 tmp = make_new_set(&PySet_Type, other);
1755 if (tmp == NULL)
1756 return NULL;
1757 result = set_issubset(so, tmp);
1758 Py_DECREF(tmp);
1759 return result;
1760 }
1761 if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1762 Py_RETURN_FALSE;
1763
1764 while (set_next(so, &pos, &entry)) {
1765 int rv = set_contains_entry((PySetObject *)other, entry);
1766 if (rv == -1)
1767 return NULL;
1768 if (!rv)
1769 Py_RETURN_FALSE;
1770 }
1771 Py_RETURN_TRUE;
1772 }
1773
1774 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1775
1776 static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1777 set_issuperset(PySetObject *so, PyObject *other)
1778 {
1779 PyObject *tmp, *result;
1780
1781 if (!PyAnySet_Check(other)) {
1782 tmp = make_new_set(&PySet_Type, other);
1783 if (tmp == NULL)
1784 return NULL;
1785 result = set_issuperset(so, tmp);
1786 Py_DECREF(tmp);
1787 return result;
1788 }
1789 return set_issubset((PySetObject *)other, (PyObject *)so);
1790 }
1791
1792 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1793
1794 static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1795 set_richcompare(PySetObject *v, PyObject *w, int op)
1796 {
1797 PyObject *r1, *r2;
1798
1799 if(!PyAnySet_Check(w)) {
1800 if (op == Py_EQ)
1801 Py_RETURN_FALSE;
1802 if (op == Py_NE)
1803 Py_RETURN_TRUE;
1804 PyErr_SetString(PyExc_TypeError, "can only compare to a set");
1805 return NULL;
1806 }
1807 switch (op) {
1808 case Py_EQ:
1809 if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1810 Py_RETURN_FALSE;
1811 if (v->hash != -1 &&
1812 ((PySetObject *)w)->hash != -1 &&
1813 v->hash != ((PySetObject *)w)->hash)
1814 Py_RETURN_FALSE;
1815 return set_issubset(v, w);
1816 case Py_NE:
1817 r1 = set_richcompare(v, w, Py_EQ);
1818 if (r1 == NULL)
1819 return NULL;
1820 r2 = PyBool_FromLong(PyObject_Not(r1));
1821 Py_DECREF(r1);
1822 return r2;
1823 case Py_LE:
1824 return set_issubset(v, w);
1825 case Py_GE:
1826 return set_issuperset(v, w);
1827 case Py_LT:
1828 if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1829 Py_RETURN_FALSE;
1830 return set_issubset(v, w);
1831 case Py_GT:
1832 if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1833 Py_RETURN_FALSE;
1834 return set_issuperset(v, w);
1835 }
1836 Py_INCREF(Py_NotImplemented);
1837 return Py_NotImplemented;
1838 }
1839
1840 static int
set_nocmp(PyObject * self,PyObject * other)1841 set_nocmp(PyObject *self, PyObject *other)
1842 {
1843 PyErr_SetString(PyExc_TypeError, "cannot compare sets using cmp()");
1844 return -1;
1845 }
1846
1847 static PyObject *
set_add(PySetObject * so,PyObject * key)1848 set_add(PySetObject *so, PyObject *key)
1849 {
1850 if (set_add_key(so, key) == -1)
1851 return NULL;
1852 Py_RETURN_NONE;
1853 }
1854
1855 PyDoc_STRVAR(add_doc,
1856 "Add an element to a set.\n\
1857 \n\
1858 This has no effect if the element is already present.");
1859
1860 static int
set_contains(PySetObject * so,PyObject * key)1861 set_contains(PySetObject *so, PyObject *key)
1862 {
1863 PyObject *tmpkey;
1864 int rv;
1865
1866 rv = set_contains_key(so, key);
1867 if (rv == -1) {
1868 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1869 return -1;
1870 PyErr_Clear();
1871 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1872 if (tmpkey == NULL)
1873 return -1;
1874 rv = set_contains(so, tmpkey);
1875 Py_DECREF(tmpkey);
1876 }
1877 return rv;
1878 }
1879
1880 static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1881 set_direct_contains(PySetObject *so, PyObject *key)
1882 {
1883 long result;
1884
1885 result = set_contains(so, key);
1886 if (result == -1)
1887 return NULL;
1888 return PyBool_FromLong(result);
1889 }
1890
1891 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1892
1893 static PyObject *
set_remove(PySetObject * so,PyObject * key)1894 set_remove(PySetObject *so, PyObject *key)
1895 {
1896 PyObject *tmpkey;
1897 int rv;
1898
1899 rv = set_discard_key(so, key);
1900 if (rv == -1) {
1901 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1902 return NULL;
1903 PyErr_Clear();
1904 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1905 if (tmpkey == NULL)
1906 return NULL;
1907 rv = set_discard_key(so, tmpkey);
1908 Py_DECREF(tmpkey);
1909 if (rv == -1)
1910 return NULL;
1911 }
1912
1913 if (rv == DISCARD_NOTFOUND) {
1914 set_key_error(key);
1915 return NULL;
1916 }
1917 Py_RETURN_NONE;
1918 }
1919
1920 PyDoc_STRVAR(remove_doc,
1921 "Remove an element from a set; it must be a member.\n\
1922 \n\
1923 If the element is not a member, raise a KeyError.");
1924
1925 static PyObject *
set_discard(PySetObject * so,PyObject * key)1926 set_discard(PySetObject *so, PyObject *key)
1927 {
1928 PyObject *tmpkey, *result;
1929 int rv;
1930
1931 rv = set_discard_key(so, key);
1932 if (rv == -1) {
1933 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1934 return NULL;
1935 PyErr_Clear();
1936 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1937 if (tmpkey == NULL)
1938 return NULL;
1939 result = set_discard(so, tmpkey);
1940 Py_DECREF(tmpkey);
1941 return result;
1942 }
1943 Py_RETURN_NONE;
1944 }
1945
1946 PyDoc_STRVAR(discard_doc,
1947 "Remove an element from a set if it is a member.\n\
1948 \n\
1949 If the element is not a member, do nothing.");
1950
1951 static PyObject *
set_reduce(PySetObject * so)1952 set_reduce(PySetObject *so)
1953 {
1954 PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1955
1956 keys = PySequence_List((PyObject *)so);
1957 if (keys == NULL)
1958 goto done;
1959 args = PyTuple_Pack(1, keys);
1960 if (args == NULL)
1961 goto done;
1962 dict = PyObject_GetAttrString((PyObject *)so, "__dict__");
1963 if (dict == NULL) {
1964 PyErr_Clear();
1965 dict = Py_None;
1966 Py_INCREF(dict);
1967 }
1968 result = PyTuple_Pack(3, Py_TYPE(so), args, dict);
1969 done:
1970 Py_XDECREF(args);
1971 Py_XDECREF(keys);
1972 Py_XDECREF(dict);
1973 return result;
1974 }
1975
1976 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
1977
1978 static PyObject *
set_sizeof(PySetObject * so)1979 set_sizeof(PySetObject *so)
1980 {
1981 Py_ssize_t res;
1982
1983 res = sizeof(PySetObject);
1984 if (so->table != so->smalltable)
1985 res = res + (so->mask + 1) * sizeof(setentry);
1986 return PyInt_FromSsize_t(res);
1987 }
1988
1989 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
1990 static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)1991 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1992 {
1993 PyObject *iterable = NULL;
1994
1995 if (!PyAnySet_Check(self))
1996 return -1;
1997 if (PySet_Check(self) && !_PyArg_NoKeywords("set()", kwds))
1998 return -1;
1999 if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
2000 return -1;
2001 set_clear_internal(self);
2002 self->hash = -1;
2003 if (iterable == NULL)
2004 return 0;
2005 return set_update_internal(self, iterable);
2006 }
2007
2008 static PySequenceMethods set_as_sequence = {
2009 set_len, /* sq_length */
2010 0, /* sq_concat */
2011 0, /* sq_repeat */
2012 0, /* sq_item */
2013 0, /* sq_slice */
2014 0, /* sq_ass_item */
2015 0, /* sq_ass_slice */
2016 (objobjproc)set_contains, /* sq_contains */
2017 };
2018
2019 /* set object ********************************************************/
2020
2021 #ifdef Py_DEBUG
2022 static PyObject *test_c_api(PySetObject *so);
2023
2024 PyDoc_STRVAR(test_c_api_doc, "Exercises C API. Returns True.\n\
2025 All is well if assertions don't fail.");
2026 #endif
2027
2028 static PyMethodDef set_methods[] = {
2029 {"add", (PyCFunction)set_add, METH_O,
2030 add_doc},
2031 {"clear", (PyCFunction)set_clear, METH_NOARGS,
2032 clear_doc},
2033 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2034 contains_doc},
2035 {"copy", (PyCFunction)set_copy, METH_NOARGS,
2036 copy_doc},
2037 {"discard", (PyCFunction)set_discard, METH_O,
2038 discard_doc},
2039 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2040 difference_doc},
2041 {"difference_update", (PyCFunction)set_difference_update, METH_VARARGS,
2042 difference_update_doc},
2043 {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS,
2044 intersection_doc},
2045 {"intersection_update",(PyCFunction)set_intersection_update_multi, METH_VARARGS,
2046 intersection_update_doc},
2047 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2048 isdisjoint_doc},
2049 {"issubset", (PyCFunction)set_issubset, METH_O,
2050 issubset_doc},
2051 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2052 issuperset_doc},
2053 {"pop", (PyCFunction)set_pop, METH_NOARGS,
2054 pop_doc},
2055 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2056 reduce_doc},
2057 {"remove", (PyCFunction)set_remove, METH_O,
2058 remove_doc},
2059 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2060 sizeof_doc},
2061 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2062 symmetric_difference_doc},
2063 {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update, METH_O,
2064 symmetric_difference_update_doc},
2065 #ifdef Py_DEBUG
2066 {"test_c_api", (PyCFunction)test_c_api, METH_NOARGS,
2067 test_c_api_doc},
2068 #endif
2069 {"union", (PyCFunction)set_union, METH_VARARGS,
2070 union_doc},
2071 {"update", (PyCFunction)set_update, METH_VARARGS,
2072 update_doc},
2073 {NULL, NULL} /* sentinel */
2074 };
2075
2076 static PyNumberMethods set_as_number = {
2077 0, /*nb_add*/
2078 (binaryfunc)set_sub, /*nb_subtract*/
2079 0, /*nb_multiply*/
2080 0, /*nb_divide*/
2081 0, /*nb_remainder*/
2082 0, /*nb_divmod*/
2083 0, /*nb_power*/
2084 0, /*nb_negative*/
2085 0, /*nb_positive*/
2086 0, /*nb_absolute*/
2087 0, /*nb_nonzero*/
2088 0, /*nb_invert*/
2089 0, /*nb_lshift*/
2090 0, /*nb_rshift*/
2091 (binaryfunc)set_and, /*nb_and*/
2092 (binaryfunc)set_xor, /*nb_xor*/
2093 (binaryfunc)set_or, /*nb_or*/
2094 0, /*nb_coerce*/
2095 0, /*nb_int*/
2096 0, /*nb_long*/
2097 0, /*nb_float*/
2098 0, /*nb_oct*/
2099 0, /*nb_hex*/
2100 0, /*nb_inplace_add*/
2101 (binaryfunc)set_isub, /*nb_inplace_subtract*/
2102 0, /*nb_inplace_multiply*/
2103 0, /*nb_inplace_divide*/
2104 0, /*nb_inplace_remainder*/
2105 0, /*nb_inplace_power*/
2106 0, /*nb_inplace_lshift*/
2107 0, /*nb_inplace_rshift*/
2108 (binaryfunc)set_iand, /*nb_inplace_and*/
2109 (binaryfunc)set_ixor, /*nb_inplace_xor*/
2110 (binaryfunc)set_ior, /*nb_inplace_or*/
2111 };
2112
2113 PyDoc_STRVAR(set_doc,
2114 "set() -> new empty set object\n\
2115 set(iterable) -> new set object\n\
2116 \n\
2117 Build an unordered collection of unique elements.");
2118
2119 PyTypeObject PySet_Type = {
2120 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2121 "set", /* tp_name */
2122 sizeof(PySetObject), /* tp_basicsize */
2123 0, /* tp_itemsize */
2124 /* methods */
2125 (destructor)set_dealloc, /* tp_dealloc */
2126 (printfunc)set_tp_print, /* tp_print */
2127 0, /* tp_getattr */
2128 0, /* tp_setattr */
2129 set_nocmp, /* tp_compare */
2130 (reprfunc)set_repr, /* tp_repr */
2131 &set_as_number, /* tp_as_number */
2132 &set_as_sequence, /* tp_as_sequence */
2133 0, /* tp_as_mapping */
2134 (hashfunc)PyObject_HashNotImplemented, /* tp_hash */
2135 0, /* tp_call */
2136 0, /* tp_str */
2137 PyObject_GenericGetAttr, /* tp_getattro */
2138 0, /* tp_setattro */
2139 0, /* tp_as_buffer */
2140 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_CHECKTYPES |
2141 Py_TPFLAGS_BASETYPE, /* tp_flags */
2142 set_doc, /* tp_doc */
2143 (traverseproc)set_traverse, /* tp_traverse */
2144 (inquiry)set_clear_internal, /* tp_clear */
2145 (richcmpfunc)set_richcompare, /* tp_richcompare */
2146 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2147 (getiterfunc)set_iter, /* tp_iter */
2148 0, /* tp_iternext */
2149 set_methods, /* tp_methods */
2150 0, /* tp_members */
2151 0, /* tp_getset */
2152 0, /* tp_base */
2153 0, /* tp_dict */
2154 0, /* tp_descr_get */
2155 0, /* tp_descr_set */
2156 0, /* tp_dictoffset */
2157 (initproc)set_init, /* tp_init */
2158 PyType_GenericAlloc, /* tp_alloc */
2159 set_new, /* tp_new */
2160 PyObject_GC_Del, /* tp_free */
2161 };
2162
2163 /* frozenset object ********************************************************/
2164
2165
2166 static PyMethodDef frozenset_methods[] = {
2167 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2168 contains_doc},
2169 {"copy", (PyCFunction)frozenset_copy, METH_NOARGS,
2170 copy_doc},
2171 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2172 difference_doc},
2173 {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS,
2174 intersection_doc},
2175 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2176 isdisjoint_doc},
2177 {"issubset", (PyCFunction)set_issubset, METH_O,
2178 issubset_doc},
2179 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2180 issuperset_doc},
2181 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2182 reduce_doc},
2183 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2184 sizeof_doc},
2185 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2186 symmetric_difference_doc},
2187 {"union", (PyCFunction)set_union, METH_VARARGS,
2188 union_doc},
2189 {NULL, NULL} /* sentinel */
2190 };
2191
2192 static PyNumberMethods frozenset_as_number = {
2193 0, /*nb_add*/
2194 (binaryfunc)set_sub, /*nb_subtract*/
2195 0, /*nb_multiply*/
2196 0, /*nb_divide*/
2197 0, /*nb_remainder*/
2198 0, /*nb_divmod*/
2199 0, /*nb_power*/
2200 0, /*nb_negative*/
2201 0, /*nb_positive*/
2202 0, /*nb_absolute*/
2203 0, /*nb_nonzero*/
2204 0, /*nb_invert*/
2205 0, /*nb_lshift*/
2206 0, /*nb_rshift*/
2207 (binaryfunc)set_and, /*nb_and*/
2208 (binaryfunc)set_xor, /*nb_xor*/
2209 (binaryfunc)set_or, /*nb_or*/
2210 };
2211
2212 PyDoc_STRVAR(frozenset_doc,
2213 "frozenset() -> empty frozenset object\n\
2214 frozenset(iterable) -> frozenset object\n\
2215 \n\
2216 Build an immutable unordered collection of unique elements.");
2217
2218 PyTypeObject PyFrozenSet_Type = {
2219 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2220 "frozenset", /* tp_name */
2221 sizeof(PySetObject), /* tp_basicsize */
2222 0, /* tp_itemsize */
2223 /* methods */
2224 (destructor)set_dealloc, /* tp_dealloc */
2225 (printfunc)set_tp_print, /* tp_print */
2226 0, /* tp_getattr */
2227 0, /* tp_setattr */
2228 set_nocmp, /* tp_compare */
2229 (reprfunc)set_repr, /* tp_repr */
2230 &frozenset_as_number, /* tp_as_number */
2231 &set_as_sequence, /* tp_as_sequence */
2232 0, /* tp_as_mapping */
2233 frozenset_hash, /* tp_hash */
2234 0, /* tp_call */
2235 0, /* tp_str */
2236 PyObject_GenericGetAttr, /* tp_getattro */
2237 0, /* tp_setattro */
2238 0, /* tp_as_buffer */
2239 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_CHECKTYPES |
2240 Py_TPFLAGS_BASETYPE, /* tp_flags */
2241 frozenset_doc, /* tp_doc */
2242 (traverseproc)set_traverse, /* tp_traverse */
2243 (inquiry)set_clear_internal, /* tp_clear */
2244 (richcmpfunc)set_richcompare, /* tp_richcompare */
2245 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2246 (getiterfunc)set_iter, /* tp_iter */
2247 0, /* tp_iternext */
2248 frozenset_methods, /* tp_methods */
2249 0, /* tp_members */
2250 0, /* tp_getset */
2251 0, /* tp_base */
2252 0, /* tp_dict */
2253 0, /* tp_descr_get */
2254 0, /* tp_descr_set */
2255 0, /* tp_dictoffset */
2256 0, /* tp_init */
2257 PyType_GenericAlloc, /* tp_alloc */
2258 frozenset_new, /* tp_new */
2259 PyObject_GC_Del, /* tp_free */
2260 };
2261
2262
2263 /***** C API functions *************************************************/
2264
2265 PyObject *
PySet_New(PyObject * iterable)2266 PySet_New(PyObject *iterable)
2267 {
2268 return make_new_set(&PySet_Type, iterable);
2269 }
2270
2271 PyObject *
PyFrozenSet_New(PyObject * iterable)2272 PyFrozenSet_New(PyObject *iterable)
2273 {
2274 return make_new_set(&PyFrozenSet_Type, iterable);
2275 }
2276
2277 Py_ssize_t
PySet_Size(PyObject * anyset)2278 PySet_Size(PyObject *anyset)
2279 {
2280 if (!PyAnySet_Check(anyset)) {
2281 PyErr_BadInternalCall();
2282 return -1;
2283 }
2284 return PySet_GET_SIZE(anyset);
2285 }
2286
2287 int
PySet_Clear(PyObject * set)2288 PySet_Clear(PyObject *set)
2289 {
2290 if (!PySet_Check(set)) {
2291 PyErr_BadInternalCall();
2292 return -1;
2293 }
2294 return set_clear_internal((PySetObject *)set);
2295 }
2296
2297 int
PySet_Contains(PyObject * anyset,PyObject * key)2298 PySet_Contains(PyObject *anyset, PyObject *key)
2299 {
2300 if (!PyAnySet_Check(anyset)) {
2301 PyErr_BadInternalCall();
2302 return -1;
2303 }
2304 return set_contains_key((PySetObject *)anyset, key);
2305 }
2306
2307 int
PySet_Discard(PyObject * set,PyObject * key)2308 PySet_Discard(PyObject *set, PyObject *key)
2309 {
2310 if (!PySet_Check(set)) {
2311 PyErr_BadInternalCall();
2312 return -1;
2313 }
2314 return set_discard_key((PySetObject *)set, key);
2315 }
2316
2317 int
PySet_Add(PyObject * anyset,PyObject * key)2318 PySet_Add(PyObject *anyset, PyObject *key)
2319 {
2320 if (!PySet_Check(anyset) &&
2321 (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2322 PyErr_BadInternalCall();
2323 return -1;
2324 }
2325 return set_add_key((PySetObject *)anyset, key);
2326 }
2327
2328 int
_PySet_Next(PyObject * set,Py_ssize_t * pos,PyObject ** key)2329 _PySet_Next(PyObject *set, Py_ssize_t *pos, PyObject **key)
2330 {
2331 setentry *entry_ptr;
2332
2333 if (!PyAnySet_Check(set)) {
2334 PyErr_BadInternalCall();
2335 return -1;
2336 }
2337 if (set_next((PySetObject *)set, pos, &entry_ptr) == 0)
2338 return 0;
2339 *key = entry_ptr->key;
2340 return 1;
2341 }
2342
2343 int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,long * hash)2344 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, long *hash)
2345 {
2346 setentry *entry;
2347
2348 if (!PyAnySet_Check(set)) {
2349 PyErr_BadInternalCall();
2350 return -1;
2351 }
2352 if (set_next((PySetObject *)set, pos, &entry) == 0)
2353 return 0;
2354 *key = entry->key;
2355 *hash = entry->hash;
2356 return 1;
2357 }
2358
2359 PyObject *
PySet_Pop(PyObject * set)2360 PySet_Pop(PyObject *set)
2361 {
2362 if (!PySet_Check(set)) {
2363 PyErr_BadInternalCall();
2364 return NULL;
2365 }
2366 return set_pop((PySetObject *)set);
2367 }
2368
2369 int
_PySet_Update(PyObject * set,PyObject * iterable)2370 _PySet_Update(PyObject *set, PyObject *iterable)
2371 {
2372 if (!PySet_Check(set)) {
2373 PyErr_BadInternalCall();
2374 return -1;
2375 }
2376 return set_update_internal((PySetObject *)set, iterable);
2377 }
2378
2379 #ifdef Py_DEBUG
2380
2381 /* Test code to be called with any three element set.
2382 Returns True and original set is restored. */
2383
2384 #define assertRaises(call_return_value, exception) \
2385 do { \
2386 assert(call_return_value); \
2387 assert(PyErr_ExceptionMatches(exception)); \
2388 PyErr_Clear(); \
2389 } while(0)
2390
2391 static PyObject *
test_c_api(PySetObject * so)2392 test_c_api(PySetObject *so)
2393 {
2394 Py_ssize_t count;
2395 char *s;
2396 Py_ssize_t i;
2397 PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x;
2398 PyObject *ob = (PyObject *)so;
2399 PyObject *str;
2400
2401 /* Verify preconditions */
2402 assert(PyAnySet_Check(ob));
2403 assert(PyAnySet_CheckExact(ob));
2404 assert(!PyFrozenSet_CheckExact(ob));
2405
2406 /* so.clear(); so |= set("abc"); */
2407 str = PyString_FromString("abc");
2408 if (str == NULL)
2409 return NULL;
2410 set_clear_internal(so);
2411 if (set_update_internal(so, str) == -1) {
2412 Py_DECREF(str);
2413 return NULL;
2414 }
2415 Py_DECREF(str);
2416
2417 /* Exercise type/size checks */
2418 assert(PySet_Size(ob) == 3);
2419 assert(PySet_GET_SIZE(ob) == 3);
2420
2421 /* Raise TypeError for non-iterable constructor arguments */
2422 assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2423 assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2424
2425 /* Raise TypeError for unhashable key */
2426 dup = PySet_New(ob);
2427 assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2428 assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2429 assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2430
2431 /* Exercise successful pop, contains, add, and discard */
2432 elem = PySet_Pop(ob);
2433 assert(PySet_Contains(ob, elem) == 0);
2434 assert(PySet_GET_SIZE(ob) == 2);
2435 assert(PySet_Add(ob, elem) == 0);
2436 assert(PySet_Contains(ob, elem) == 1);
2437 assert(PySet_GET_SIZE(ob) == 3);
2438 assert(PySet_Discard(ob, elem) == 1);
2439 assert(PySet_GET_SIZE(ob) == 2);
2440 assert(PySet_Discard(ob, elem) == 0);
2441 assert(PySet_GET_SIZE(ob) == 2);
2442
2443 /* Exercise clear */
2444 dup2 = PySet_New(dup);
2445 assert(PySet_Clear(dup2) == 0);
2446 assert(PySet_Size(dup2) == 0);
2447 Py_DECREF(dup2);
2448
2449 /* Raise SystemError on clear or update of frozen set */
2450 f = PyFrozenSet_New(dup);
2451 assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2452 assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2453 assert(PySet_Add(f, elem) == 0);
2454 Py_INCREF(f);
2455 assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2456 Py_DECREF(f);
2457 Py_DECREF(f);
2458
2459 /* Exercise direct iteration */
2460 i = 0, count = 0;
2461 while (_PySet_Next((PyObject *)dup, &i, &x)) {
2462 s = PyString_AsString(x);
2463 assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2464 count++;
2465 }
2466 assert(count == 3);
2467
2468 /* Exercise updates */
2469 dup2 = PySet_New(NULL);
2470 assert(_PySet_Update(dup2, dup) == 0);
2471 assert(PySet_Size(dup2) == 3);
2472 assert(_PySet_Update(dup2, dup) == 0);
2473 assert(PySet_Size(dup2) == 3);
2474 Py_DECREF(dup2);
2475
2476 /* Raise SystemError when self argument is not a set or frozenset. */
2477 t = PyTuple_New(0);
2478 assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2479 assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2480 Py_DECREF(t);
2481
2482 /* Raise SystemError when self argument is not a set. */
2483 f = PyFrozenSet_New(dup);
2484 assert(PySet_Size(f) == 3);
2485 assert(PyFrozenSet_CheckExact(f));
2486 assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2487 assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2488 Py_DECREF(f);
2489
2490 /* Raise KeyError when popping from an empty set */
2491 assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2492 Py_DECREF(ob);
2493 assert(PySet_GET_SIZE(ob) == 0);
2494 assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2495
2496 /* Restore the set from the copy using the PyNumber API */
2497 assert(PyNumber_InPlaceOr(ob, dup) == ob);
2498 Py_DECREF(ob);
2499
2500 /* Verify constructors accept NULL arguments */
2501 f = PySet_New(NULL);
2502 assert(f != NULL);
2503 assert(PySet_GET_SIZE(f) == 0);
2504 Py_DECREF(f);
2505 f = PyFrozenSet_New(NULL);
2506 assert(f != NULL);
2507 assert(PyFrozenSet_CheckExact(f));
2508 assert(PySet_GET_SIZE(f) == 0);
2509 Py_DECREF(f);
2510
2511 Py_DECREF(elem);
2512 Py_DECREF(dup);
2513 Py_RETURN_TRUE;
2514 }
2515
2516 #undef assertRaises
2517
2518 #endif
2519