1 
2 /* set object implementation
3    Written and maintained by Raymond D. Hettinger <python@rcn.com>
4    Derived from Lib/sets.py and Objects/dictobject.c.
5 */
6 
7 #include "Python.h"
8 #include "structmember.h"
9 
10 /* Set a key error with the specified argument, wrapping it in a
11  * tuple automatically so that tuple keys are not unpacked as the
12  * exception arguments. */
13 static void
set_key_error(PyObject * arg)14 set_key_error(PyObject *arg)
15 {
16     PyObject *tup;
17     tup = PyTuple_Pack(1, arg);
18     if (!tup)
19         return; /* caller will expect error to be set anyway */
20     PyErr_SetObject(PyExc_KeyError, tup);
21     Py_DECREF(tup);
22 }
23 
24 /* This must be >= 1. */
25 #define PERTURB_SHIFT 5
26 
27 /* Object used as dummy key to fill deleted entries */
28 static PyObject *dummy = NULL; /* Initialized by first call to make_new_set() */
29 
30 #ifdef Py_REF_DEBUG
31 PyObject *
_PySet_Dummy(void)32 _PySet_Dummy(void)
33 {
34     return dummy;
35 }
36 #endif
37 
38 #define INIT_NONZERO_SET_SLOTS(so) do {                         \
39     (so)->table = (so)->smalltable;                             \
40     (so)->mask = PySet_MINSIZE - 1;                             \
41     (so)->hash = -1;                                            \
42     } while(0)
43 
44 #define EMPTY_TO_MINSIZE(so) do {                               \
45     memset((so)->smalltable, 0, sizeof((so)->smalltable));      \
46     (so)->used = (so)->fill = 0;                                \
47     INIT_NONZERO_SET_SLOTS(so);                                 \
48     } while(0)
49 
50 /* Reuse scheme to save calls to malloc, free, and memset */
51 #ifndef PySet_MAXFREELIST
52 #define PySet_MAXFREELIST 80
53 #endif
54 static PySetObject *free_list[PySet_MAXFREELIST];
55 static int numfree = 0;
56 
57 /*
58 The basic lookup function used by all operations.
59 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
60 Open addressing is preferred over chaining since the link overhead for
61 chaining would be substantial (100% with typical malloc overhead).
62 
63 The initial probe index is computed as hash mod the table size. Subsequent
64 probe indices are computed as explained in Objects/dictobject.c.
65 
66 All arithmetic on hash should ignore overflow.
67 
68 Unlike the dictionary implementation, the lookkey functions can return
69 NULL if the rich comparison returns an error.
70 */
71 
72 static setentry *
set_lookkey(PySetObject * so,PyObject * key,register long hash)73 set_lookkey(PySetObject *so, PyObject *key, register long hash)
74 {
75     register Py_ssize_t i;
76     register size_t perturb;
77     register setentry *freeslot;
78     register size_t mask = so->mask;
79     setentry *table = so->table;
80     register setentry *entry;
81     register int cmp;
82     PyObject *startkey;
83 
84     i = hash & mask;
85     entry = &table[i];
86     if (entry->key == NULL || entry->key == key)
87         return entry;
88 
89     if (entry->key == dummy)
90         freeslot = entry;
91     else {
92         if (entry->hash == hash) {
93             startkey = entry->key;
94             Py_INCREF(startkey);
95             cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
96             Py_DECREF(startkey);
97             if (cmp < 0)
98                 return NULL;
99             if (table == so->table && entry->key == startkey) {
100                 if (cmp > 0)
101                     return entry;
102             }
103             else {
104                 /* The compare did major nasty stuff to the
105                  * set:  start over.
106                  */
107                 return set_lookkey(so, key, hash);
108             }
109         }
110         freeslot = NULL;
111     }
112 
113     /* In the loop, key == dummy is by far (factor of 100s) the
114        least likely outcome, so test for that last. */
115     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
116         i = (i << 2) + i + perturb + 1;
117         entry = &table[i & mask];
118         if (entry->key == NULL) {
119             if (freeslot != NULL)
120                 entry = freeslot;
121             break;
122         }
123         if (entry->key == key)
124             break;
125         if (entry->hash == hash && entry->key != dummy) {
126             startkey = entry->key;
127             Py_INCREF(startkey);
128             cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
129             Py_DECREF(startkey);
130             if (cmp < 0)
131                 return NULL;
132             if (table == so->table && entry->key == startkey) {
133                 if (cmp > 0)
134                     break;
135             }
136             else {
137                 /* The compare did major nasty stuff to the
138                  * set:  start over.
139                  */
140                 return set_lookkey(so, key, hash);
141             }
142         }
143         else if (entry->key == dummy && freeslot == NULL)
144             freeslot = entry;
145     }
146     return entry;
147 }
148 
149 /*
150  * Hacked up version of set_lookkey which can assume keys are always strings;
151  * This means we can always use _PyString_Eq directly and not have to check to
152  * see if the comparison altered the table.
153  */
154 static setentry *
set_lookkey_string(PySetObject * so,PyObject * key,register long hash)155 set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
156 {
157     register Py_ssize_t i;
158     register size_t perturb;
159     register setentry *freeslot;
160     register size_t mask = so->mask;
161     setentry *table = so->table;
162     register setentry *entry;
163 
164     /* Make sure this function doesn't have to handle non-string keys,
165        including subclasses of str; e.g., one reason to subclass
166        strings is to override __eq__, and for speed we don't cater to
167        that here. */
168     if (!PyString_CheckExact(key)) {
169         so->lookup = set_lookkey;
170         return set_lookkey(so, key, hash);
171     }
172     i = hash & mask;
173     entry = &table[i];
174     if (entry->key == NULL || entry->key == key)
175         return entry;
176     if (entry->key == dummy)
177         freeslot = entry;
178     else {
179         if (entry->hash == hash && _PyString_Eq(entry->key, key))
180             return entry;
181         freeslot = NULL;
182     }
183 
184     /* In the loop, key == dummy is by far (factor of 100s) the
185        least likely outcome, so test for that last. */
186     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
187         i = (i << 2) + i + perturb + 1;
188         entry = &table[i & mask];
189         if (entry->key == NULL)
190             return freeslot == NULL ? entry : freeslot;
191         if (entry->key == key
192             || (entry->hash == hash
193             && entry->key != dummy
194             && _PyString_Eq(entry->key, key)))
195             return entry;
196         if (entry->key == dummy && freeslot == NULL)
197             freeslot = entry;
198     }
199     assert(0);          /* NOT REACHED */
200     return 0;
201 }
202 
203 /*
204 Internal routine to insert a new key into the table.
205 Used by the public insert routine.
206 Eats a reference to key.
207 */
208 static int
set_insert_key(register PySetObject * so,PyObject * key,long hash)209 set_insert_key(register PySetObject *so, PyObject *key, long hash)
210 {
211     register setentry *entry;
212 
213     assert(so->lookup != NULL);
214     entry = so->lookup(so, key, hash);
215     if (entry == NULL)
216         return -1;
217     if (entry->key == NULL) {
218         /* UNUSED */
219         so->fill++;
220         entry->key = key;
221         entry->hash = hash;
222         so->used++;
223     } else if (entry->key == dummy) {
224         /* DUMMY */
225         entry->key = key;
226         entry->hash = hash;
227         so->used++;
228         Py_DECREF(dummy);
229     } else {
230         /* ACTIVE */
231         Py_DECREF(key);
232     }
233     return 0;
234 }
235 
236 /*
237 Internal routine used by set_table_resize() to insert an item which is
238 known to be absent from the set.  This routine also assumes that
239 the set contains no deleted entries.  Besides the performance benefit,
240 using set_insert_clean() in set_table_resize() is dangerous (SF bug #1456209).
241 Note that no refcounts are changed by this routine; if needed, the caller
242 is responsible for incref'ing `key`.
243 */
244 static void
set_insert_clean(register PySetObject * so,PyObject * key,long hash)245 set_insert_clean(register PySetObject *so, PyObject *key, long hash)
246 {
247     register size_t i;
248     register size_t perturb;
249     register size_t mask = (size_t)so->mask;
250     setentry *table = so->table;
251     register setentry *entry;
252 
253     i = hash & mask;
254     entry = &table[i];
255     for (perturb = hash; entry->key != NULL; perturb >>= PERTURB_SHIFT) {
256         i = (i << 2) + i + perturb + 1;
257         entry = &table[i & mask];
258     }
259     so->fill++;
260     entry->key = key;
261     entry->hash = hash;
262     so->used++;
263 }
264 
265 /*
266 Restructure the table by allocating a new table and reinserting all
267 keys again.  When entries have been deleted, the new table may
268 actually be smaller than the old one.
269 */
270 static int
set_table_resize(PySetObject * so,Py_ssize_t minused)271 set_table_resize(PySetObject *so, Py_ssize_t minused)
272 {
273     Py_ssize_t newsize;
274     setentry *oldtable, *newtable, *entry;
275     Py_ssize_t i;
276     int is_oldtable_malloced;
277     setentry small_copy[PySet_MINSIZE];
278 
279     assert(minused >= 0);
280 
281     /* Find the smallest table size > minused. */
282     for (newsize = PySet_MINSIZE;
283          newsize <= minused && newsize > 0;
284          newsize <<= 1)
285         ;
286     if (newsize <= 0) {
287         PyErr_NoMemory();
288         return -1;
289     }
290 
291     /* Get space for a new table. */
292     oldtable = so->table;
293     assert(oldtable != NULL);
294     is_oldtable_malloced = oldtable != so->smalltable;
295 
296     if (newsize == PySet_MINSIZE) {
297         /* A large table is shrinking, or we can't get any smaller. */
298         newtable = so->smalltable;
299         if (newtable == oldtable) {
300             if (so->fill == so->used) {
301                 /* No dummies, so no point doing anything. */
302                 return 0;
303             }
304             /* We're not going to resize it, but rebuild the
305                table anyway to purge old dummy entries.
306                Subtle:  This is *necessary* if fill==size,
307                as set_lookkey needs at least one virgin slot to
308                terminate failing searches.  If fill < size, it's
309                merely desirable, as dummies slow searches. */
310             assert(so->fill > so->used);
311             memcpy(small_copy, oldtable, sizeof(small_copy));
312             oldtable = small_copy;
313         }
314     }
315     else {
316         newtable = PyMem_NEW(setentry, newsize);
317         if (newtable == NULL) {
318             PyErr_NoMemory();
319             return -1;
320         }
321     }
322 
323     /* Make the set empty, using the new table. */
324     assert(newtable != oldtable);
325     so->table = newtable;
326     so->mask = newsize - 1;
327     memset(newtable, 0, sizeof(setentry) * newsize);
328     so->used = 0;
329     i = so->fill;
330     so->fill = 0;
331 
332     /* Copy the data over; this is refcount-neutral for active entries;
333        dummy entries aren't copied over, of course */
334     for (entry = oldtable; i > 0; entry++) {
335         if (entry->key == NULL) {
336             /* UNUSED */
337             ;
338         } else if (entry->key == dummy) {
339             /* DUMMY */
340             --i;
341             assert(entry->key == dummy);
342             Py_DECREF(entry->key);
343         } else {
344             /* ACTIVE */
345             --i;
346             set_insert_clean(so, entry->key, entry->hash);
347         }
348     }
349 
350     if (is_oldtable_malloced)
351         PyMem_DEL(oldtable);
352     return 0;
353 }
354 
355 /* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
356 
357 static int
set_add_entry(register PySetObject * so,setentry * entry)358 set_add_entry(register PySetObject *so, setentry *entry)
359 {
360     register Py_ssize_t n_used;
361     PyObject *key = entry->key;
362     long hash = entry->hash;
363 
364     assert(so->fill <= so->mask);  /* at least one empty slot */
365     n_used = so->used;
366     Py_INCREF(key);
367     if (set_insert_key(so, key, hash) == -1) {
368         Py_DECREF(key);
369         return -1;
370     }
371     if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
372         return 0;
373     return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
374 }
375 
376 static int
set_add_key(register PySetObject * so,PyObject * key)377 set_add_key(register PySetObject *so, PyObject *key)
378 {
379     register long hash;
380     register Py_ssize_t n_used;
381 
382     if (!PyString_CheckExact(key) ||
383         (hash = ((PyStringObject *) key)->ob_shash) == -1) {
384         hash = PyObject_Hash(key);
385         if (hash == -1)
386             return -1;
387     }
388     assert(so->fill <= so->mask);  /* at least one empty slot */
389     n_used = so->used;
390     Py_INCREF(key);
391     if (set_insert_key(so, key, hash) == -1) {
392         Py_DECREF(key);
393         return -1;
394     }
395     if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
396         return 0;
397     return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
398 }
399 
400 #define DISCARD_NOTFOUND 0
401 #define DISCARD_FOUND 1
402 
403 static int
set_discard_entry(PySetObject * so,setentry * oldentry)404 set_discard_entry(PySetObject *so, setentry *oldentry)
405 {       register setentry *entry;
406     PyObject *old_key;
407 
408     entry = (so->lookup)(so, oldentry->key, oldentry->hash);
409     if (entry == NULL)
410         return -1;
411     if (entry->key == NULL  ||  entry->key == dummy)
412         return DISCARD_NOTFOUND;
413     old_key = entry->key;
414     Py_INCREF(dummy);
415     entry->key = dummy;
416     so->used--;
417     Py_DECREF(old_key);
418     return DISCARD_FOUND;
419 }
420 
421 static int
set_discard_key(PySetObject * so,PyObject * key)422 set_discard_key(PySetObject *so, PyObject *key)
423 {
424     register long hash;
425     register setentry *entry;
426     PyObject *old_key;
427 
428     assert (PyAnySet_Check(so));
429     if (!PyString_CheckExact(key) ||
430         (hash = ((PyStringObject *) key)->ob_shash) == -1) {
431         hash = PyObject_Hash(key);
432         if (hash == -1)
433             return -1;
434     }
435     entry = (so->lookup)(so, key, hash);
436     if (entry == NULL)
437         return -1;
438     if (entry->key == NULL  ||  entry->key == dummy)
439         return DISCARD_NOTFOUND;
440     old_key = entry->key;
441     Py_INCREF(dummy);
442     entry->key = dummy;
443     so->used--;
444     Py_DECREF(old_key);
445     return DISCARD_FOUND;
446 }
447 
448 static int
set_clear_internal(PySetObject * so)449 set_clear_internal(PySetObject *so)
450 {
451     setentry *entry, *table;
452     int table_is_malloced;
453     Py_ssize_t fill;
454     setentry small_copy[PySet_MINSIZE];
455 #ifdef Py_DEBUG
456     Py_ssize_t i, n;
457     assert (PyAnySet_Check(so));
458 
459     n = so->mask + 1;
460     i = 0;
461 #endif
462 
463     table = so->table;
464     assert(table != NULL);
465     table_is_malloced = table != so->smalltable;
466 
467     /* This is delicate.  During the process of clearing the set,
468      * decrefs can cause the set to mutate.  To avoid fatal confusion
469      * (voice of experience), we have to make the set empty before
470      * clearing the slots, and never refer to anything via so->ref while
471      * clearing.
472      */
473     fill = so->fill;
474     if (table_is_malloced)
475         EMPTY_TO_MINSIZE(so);
476 
477     else if (fill > 0) {
478         /* It's a small table with something that needs to be cleared.
479          * Afraid the only safe way is to copy the set entries into
480          * another small table first.
481          */
482         memcpy(small_copy, table, sizeof(small_copy));
483         table = small_copy;
484         EMPTY_TO_MINSIZE(so);
485     }
486     /* else it's a small table that's already empty */
487 
488     /* Now we can finally clear things.  If C had refcounts, we could
489      * assert that the refcount on table is 1 now, i.e. that this function
490      * has unique access to it, so decref side-effects can't alter it.
491      */
492     for (entry = table; fill > 0; ++entry) {
493 #ifdef Py_DEBUG
494         assert(i < n);
495         ++i;
496 #endif
497         if (entry->key) {
498             --fill;
499             Py_DECREF(entry->key);
500         }
501 #ifdef Py_DEBUG
502         else
503             assert(entry->key == NULL);
504 #endif
505     }
506 
507     if (table_is_malloced)
508         PyMem_DEL(table);
509     return 0;
510 }
511 
512 /*
513  * Iterate over a set table.  Use like so:
514  *
515  *     Py_ssize_t pos;
516  *     setentry *entry;
517  *     pos = 0;   # important!  pos should not otherwise be changed by you
518  *     while (set_next(yourset, &pos, &entry)) {
519  *              Refer to borrowed reference in entry->key.
520  *     }
521  *
522  * CAUTION:  In general, it isn't safe to use set_next in a loop that
523  * mutates the table.
524  */
525 static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)526 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
527 {
528     Py_ssize_t i;
529     Py_ssize_t mask;
530     register setentry *table;
531 
532     assert (PyAnySet_Check(so));
533     i = *pos_ptr;
534     assert(i >= 0);
535     table = so->table;
536     mask = so->mask;
537     while (i <= mask && (table[i].key == NULL || table[i].key == dummy))
538         i++;
539     *pos_ptr = i+1;
540     if (i > mask)
541         return 0;
542     assert(table[i].key != NULL);
543     *entry_ptr = &table[i];
544     return 1;
545 }
546 
547 static void
set_dealloc(PySetObject * so)548 set_dealloc(PySetObject *so)
549 {
550     register setentry *entry;
551     Py_ssize_t fill = so->fill;
552     /* bpo-31095: UnTrack is needed before calling any callbacks */
553     PyObject_GC_UnTrack(so);
554     Py_TRASHCAN_SAFE_BEGIN(so)
555     if (so->weakreflist != NULL)
556         PyObject_ClearWeakRefs((PyObject *) so);
557 
558     for (entry = so->table; fill > 0; entry++) {
559         if (entry->key) {
560             --fill;
561             Py_DECREF(entry->key);
562         }
563     }
564     if (so->table != so->smalltable)
565         PyMem_DEL(so->table);
566     if (numfree < PySet_MAXFREELIST && PyAnySet_CheckExact(so))
567         free_list[numfree++] = so;
568     else
569         Py_TYPE(so)->tp_free(so);
570     Py_TRASHCAN_SAFE_END(so)
571 }
572 
573 static int
set_tp_print(PySetObject * so,FILE * fp,int flags)574 set_tp_print(PySetObject *so, FILE *fp, int flags)
575 {
576     setentry *entry;
577     Py_ssize_t pos=0;
578     char *emit = "";            /* No separator emitted on first pass */
579     char *separator = ", ";
580     int status = Py_ReprEnter((PyObject*)so);
581 
582     if (status != 0) {
583         if (status < 0)
584             return status;
585         Py_BEGIN_ALLOW_THREADS
586         fprintf(fp, "%s(...)", Py_TYPE(so)->tp_name);
587         Py_END_ALLOW_THREADS
588         return 0;
589     }
590 
591     Py_BEGIN_ALLOW_THREADS
592     fprintf(fp, "%s([", Py_TYPE(so)->tp_name);
593     Py_END_ALLOW_THREADS
594     while (set_next(so, &pos, &entry)) {
595         Py_BEGIN_ALLOW_THREADS
596         fputs(emit, fp);
597         Py_END_ALLOW_THREADS
598         emit = separator;
599         if (PyObject_Print(entry->key, fp, 0) != 0) {
600             Py_ReprLeave((PyObject*)so);
601             return -1;
602         }
603     }
604     Py_BEGIN_ALLOW_THREADS
605     fputs("])", fp);
606     Py_END_ALLOW_THREADS
607     Py_ReprLeave((PyObject*)so);
608     return 0;
609 }
610 
611 static PyObject *
set_repr(PySetObject * so)612 set_repr(PySetObject *so)
613 {
614     PyObject *keys, *result=NULL, *listrepr;
615     int status = Py_ReprEnter((PyObject*)so);
616 
617     if (status != 0) {
618         if (status < 0)
619             return NULL;
620         return PyString_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
621     }
622 
623     keys = PySequence_List((PyObject *)so);
624     if (keys == NULL)
625         goto done;
626     listrepr = PyObject_Repr(keys);
627     Py_DECREF(keys);
628     if (listrepr == NULL)
629         goto done;
630 
631     result = PyString_FromFormat("%s(%s)", Py_TYPE(so)->tp_name,
632         PyString_AS_STRING(listrepr));
633     Py_DECREF(listrepr);
634 done:
635     Py_ReprLeave((PyObject*)so);
636     return result;
637 }
638 
639 static Py_ssize_t
set_len(PyObject * so)640 set_len(PyObject *so)
641 {
642     return ((PySetObject *)so)->used;
643 }
644 
645 static int
set_merge(PySetObject * so,PyObject * otherset)646 set_merge(PySetObject *so, PyObject *otherset)
647 {
648     PySetObject *other;
649     PyObject *key;
650     long hash;
651     register Py_ssize_t i;
652     register setentry *entry;
653 
654     assert (PyAnySet_Check(so));
655     assert (PyAnySet_Check(otherset));
656 
657     other = (PySetObject*)otherset;
658     if (other == so || other->used == 0)
659         /* a.update(a) or a.update({}); nothing to do */
660         return 0;
661     /* Do one big resize at the start, rather than
662      * incrementally resizing as we insert new keys.  Expect
663      * that there will be no (or few) overlapping keys.
664      */
665     if ((so->fill + other->used)*3 >= (so->mask+1)*2) {
666        if (set_table_resize(so, (so->used + other->used)*2) != 0)
667            return -1;
668     }
669     for (i = 0; i <= other->mask; i++) {
670         entry = &other->table[i];
671         key = entry->key;
672         hash = entry->hash;
673         if (key != NULL &&
674             key != dummy) {
675             Py_INCREF(key);
676             if (set_insert_key(so, key, hash) == -1) {
677                 Py_DECREF(key);
678                 return -1;
679             }
680         }
681     }
682     return 0;
683 }
684 
685 static int
set_contains_key(PySetObject * so,PyObject * key)686 set_contains_key(PySetObject *so, PyObject *key)
687 {
688     long hash;
689     setentry *entry;
690 
691     if (!PyString_CheckExact(key) ||
692         (hash = ((PyStringObject *) key)->ob_shash) == -1) {
693         hash = PyObject_Hash(key);
694         if (hash == -1)
695             return -1;
696     }
697     entry = (so->lookup)(so, key, hash);
698     if (entry == NULL)
699         return -1;
700     key = entry->key;
701     return key != NULL && key != dummy;
702 }
703 
704 static int
set_contains_entry(PySetObject * so,setentry * entry)705 set_contains_entry(PySetObject *so, setentry *entry)
706 {
707     PyObject *key;
708     setentry *lu_entry;
709 
710     lu_entry = (so->lookup)(so, entry->key, entry->hash);
711     if (lu_entry == NULL)
712         return -1;
713     key = lu_entry->key;
714     return key != NULL && key != dummy;
715 }
716 
717 static PyObject *
set_pop(PySetObject * so)718 set_pop(PySetObject *so)
719 {
720     register Py_ssize_t i = 0;
721     register setentry *entry;
722     PyObject *key;
723 
724     assert (PyAnySet_Check(so));
725     if (so->used == 0) {
726         PyErr_SetString(PyExc_KeyError, "pop from an empty set");
727         return NULL;
728     }
729 
730     /* Set entry to "the first" unused or dummy set entry.  We abuse
731      * the hash field of slot 0 to hold a search finger:
732      * If slot 0 has a value, use slot 0.
733      * Else slot 0 is being used to hold a search finger,
734      * and we use its hash value as the first index to look.
735      */
736     entry = &so->table[0];
737     if (entry->key == NULL || entry->key == dummy) {
738         i = entry->hash;
739         /* The hash field may be a real hash value, or it may be a
740          * legit search finger, or it may be a once-legit search
741          * finger that's out of bounds now because it wrapped around
742          * or the table shrunk -- simply make sure it's in bounds now.
743          */
744         if (i > so->mask || i < 1)
745             i = 1;              /* skip slot 0 */
746         while ((entry = &so->table[i])->key == NULL || entry->key==dummy) {
747             i++;
748             if (i > so->mask)
749                 i = 1;
750         }
751     }
752     key = entry->key;
753     Py_INCREF(dummy);
754     entry->key = dummy;
755     so->used--;
756     so->table[0].hash = i + 1;  /* next place to start */
757     return key;
758 }
759 
760 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
761 Raises KeyError if the set is empty.");
762 
763 static int
set_traverse(PySetObject * so,visitproc visit,void * arg)764 set_traverse(PySetObject *so, visitproc visit, void *arg)
765 {
766     Py_ssize_t pos = 0;
767     setentry *entry;
768 
769     while (set_next(so, &pos, &entry))
770         Py_VISIT(entry->key);
771     return 0;
772 }
773 
774 static long
frozenset_hash(PyObject * self)775 frozenset_hash(PyObject *self)
776 {
777     PySetObject *so = (PySetObject *)self;
778     long h, hash = 1927868237L;
779     setentry *entry;
780     Py_ssize_t pos = 0;
781 
782     if (so->hash != -1)
783         return so->hash;
784 
785     hash *= PySet_GET_SIZE(self) + 1;
786     while (set_next(so, &pos, &entry)) {
787         /* Work to increase the bit dispersion for closely spaced hash
788            values.  This is important because some use cases have many
789            combinations of a small number of elements with nearby
790            hashes so that many distinct combinations collapse to only
791            a handful of distinct hash values. */
792         h = entry->hash;
793         hash ^= (h ^ (h << 16) ^ 89869747L)  * 3644798167u;
794     }
795     hash = hash * 69069L + 907133923L;
796     if (hash == -1)
797         hash = 590923713L;
798     so->hash = hash;
799     return hash;
800 }
801 
802 /***** Set iterator type ***********************************************/
803 
804 typedef struct {
805     PyObject_HEAD
806     PySetObject *si_set; /* Set to NULL when iterator is exhausted */
807     Py_ssize_t si_used;
808     Py_ssize_t si_pos;
809     Py_ssize_t len;
810 } setiterobject;
811 
812 static void
setiter_dealloc(setiterobject * si)813 setiter_dealloc(setiterobject *si)
814 {
815     /* bpo-31095: UnTrack is needed before calling any callbacks */
816     _PyObject_GC_UNTRACK(si);
817     Py_XDECREF(si->si_set);
818     PyObject_GC_Del(si);
819 }
820 
821 static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)822 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
823 {
824     Py_VISIT(si->si_set);
825     return 0;
826 }
827 
828 static PyObject *
setiter_len(setiterobject * si)829 setiter_len(setiterobject *si)
830 {
831     Py_ssize_t len = 0;
832     if (si->si_set != NULL && si->si_used == si->si_set->used)
833         len = si->len;
834     return PyInt_FromLong(len);
835 }
836 
837 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
838 
839 static PyMethodDef setiter_methods[] = {
840     {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
841     {NULL,              NULL}           /* sentinel */
842 };
843 
setiter_iternext(setiterobject * si)844 static PyObject *setiter_iternext(setiterobject *si)
845 {
846     PyObject *key;
847     register Py_ssize_t i, mask;
848     register setentry *entry;
849     PySetObject *so = si->si_set;
850 
851     if (so == NULL)
852         return NULL;
853     assert (PyAnySet_Check(so));
854 
855     if (si->si_used != so->used) {
856         PyErr_SetString(PyExc_RuntimeError,
857                         "Set changed size during iteration");
858         si->si_used = -1; /* Make this state sticky */
859         return NULL;
860     }
861 
862     i = si->si_pos;
863     assert(i>=0);
864     entry = so->table;
865     mask = so->mask;
866     while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
867         i++;
868     si->si_pos = i+1;
869     if (i > mask)
870         goto fail;
871     si->len--;
872     key = entry[i].key;
873     Py_INCREF(key);
874     return key;
875 
876 fail:
877     si->si_set = NULL;
878     Py_DECREF(so);
879     return NULL;
880 }
881 
882 static PyTypeObject PySetIter_Type = {
883     PyVarObject_HEAD_INIT(&PyType_Type, 0)
884     "setiterator",                              /* tp_name */
885     sizeof(setiterobject),                      /* tp_basicsize */
886     0,                                          /* tp_itemsize */
887     /* methods */
888     (destructor)setiter_dealloc,                /* tp_dealloc */
889     0,                                          /* tp_print */
890     0,                                          /* tp_getattr */
891     0,                                          /* tp_setattr */
892     0,                                          /* tp_compare */
893     0,                                          /* tp_repr */
894     0,                                          /* tp_as_number */
895     0,                                          /* tp_as_sequence */
896     0,                                          /* tp_as_mapping */
897     0,                                          /* tp_hash */
898     0,                                          /* tp_call */
899     0,                                          /* tp_str */
900     PyObject_GenericGetAttr,                    /* tp_getattro */
901     0,                                          /* tp_setattro */
902     0,                                          /* tp_as_buffer */
903     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
904     0,                                          /* tp_doc */
905     (traverseproc)setiter_traverse,             /* tp_traverse */
906     0,                                          /* tp_clear */
907     0,                                          /* tp_richcompare */
908     0,                                          /* tp_weaklistoffset */
909     PyObject_SelfIter,                          /* tp_iter */
910     (iternextfunc)setiter_iternext,             /* tp_iternext */
911     setiter_methods,                            /* tp_methods */
912     0,
913 };
914 
915 static PyObject *
set_iter(PySetObject * so)916 set_iter(PySetObject *so)
917 {
918     setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
919     if (si == NULL)
920         return NULL;
921     Py_INCREF(so);
922     si->si_set = so;
923     si->si_used = so->used;
924     si->si_pos = 0;
925     si->len = so->used;
926     _PyObject_GC_TRACK(si);
927     return (PyObject *)si;
928 }
929 
930 static int
set_update_internal(PySetObject * so,PyObject * other)931 set_update_internal(PySetObject *so, PyObject *other)
932 {
933     PyObject *key, *it;
934 
935     if (PyAnySet_Check(other))
936         return set_merge(so, other);
937 
938     if (PyDict_CheckExact(other)) {
939         PyObject *value;
940         Py_ssize_t pos = 0;
941         long hash;
942         Py_ssize_t dictsize = PyDict_Size(other);
943 
944         /* Do one big resize at the start, rather than
945         * incrementally resizing as we insert new keys.  Expect
946         * that there will be no (or few) overlapping keys.
947         */
948         if (dictsize == -1)
949             return -1;
950         if ((so->fill + dictsize)*3 >= (so->mask+1)*2) {
951             if (set_table_resize(so, (so->used + dictsize)*2) != 0)
952                 return -1;
953         }
954         while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
955             setentry an_entry;
956 
957             an_entry.hash = hash;
958             an_entry.key = key;
959             if (set_add_entry(so, &an_entry) == -1)
960                 return -1;
961         }
962         return 0;
963     }
964 
965     it = PyObject_GetIter(other);
966     if (it == NULL)
967         return -1;
968 
969     while ((key = PyIter_Next(it)) != NULL) {
970         if (set_add_key(so, key) == -1) {
971             Py_DECREF(it);
972             Py_DECREF(key);
973             return -1;
974         }
975         Py_DECREF(key);
976     }
977     Py_DECREF(it);
978     if (PyErr_Occurred())
979         return -1;
980     return 0;
981 }
982 
983 static PyObject *
set_update(PySetObject * so,PyObject * args)984 set_update(PySetObject *so, PyObject *args)
985 {
986     Py_ssize_t i;
987 
988     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
989         PyObject *other = PyTuple_GET_ITEM(args, i);
990         if (set_update_internal(so, other) == -1)
991             return NULL;
992     }
993     Py_RETURN_NONE;
994 }
995 
996 PyDoc_STRVAR(update_doc,
997 "Update a set with the union of itself and others.");
998 
999 static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)1000 make_new_set(PyTypeObject *type, PyObject *iterable)
1001 {
1002     register PySetObject *so = NULL;
1003 
1004     if (dummy == NULL) { /* Auto-initialize dummy */
1005         dummy = PyString_FromString("<dummy key>");
1006         if (dummy == NULL)
1007             return NULL;
1008     }
1009 
1010     /* create PySetObject structure */
1011     if (numfree &&
1012         (type == &PySet_Type  ||  type == &PyFrozenSet_Type)) {
1013         so = free_list[--numfree];
1014         assert (so != NULL && PyAnySet_CheckExact(so));
1015         Py_TYPE(so) = type;
1016         _Py_NewReference((PyObject *)so);
1017         EMPTY_TO_MINSIZE(so);
1018         PyObject_GC_Track(so);
1019     } else {
1020         so = (PySetObject *)type->tp_alloc(type, 0);
1021         if (so == NULL)
1022             return NULL;
1023         /* tp_alloc has already zeroed the structure */
1024         assert(so->table == NULL && so->fill == 0 && so->used == 0);
1025         INIT_NONZERO_SET_SLOTS(so);
1026     }
1027 
1028     so->lookup = set_lookkey_string;
1029     so->weakreflist = NULL;
1030 
1031     if (iterable != NULL) {
1032         if (set_update_internal(so, iterable) == -1) {
1033             Py_DECREF(so);
1034             return NULL;
1035         }
1036     }
1037 
1038     return (PyObject *)so;
1039 }
1040 
1041 /* The empty frozenset is a singleton */
1042 static PyObject *emptyfrozenset = NULL;
1043 
1044 static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1045 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1046 {
1047     PyObject *iterable = NULL, *result;
1048 
1049     if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset()", kwds))
1050         return NULL;
1051 
1052     if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
1053         return NULL;
1054 
1055     if (type != &PyFrozenSet_Type)
1056         return make_new_set(type, iterable);
1057 
1058     if (iterable != NULL) {
1059         /* frozenset(f) is idempotent */
1060         if (PyFrozenSet_CheckExact(iterable)) {
1061             Py_INCREF(iterable);
1062             return iterable;
1063         }
1064         result = make_new_set(type, iterable);
1065         if (result == NULL || PySet_GET_SIZE(result))
1066             return result;
1067         Py_DECREF(result);
1068     }
1069     /* The empty frozenset is a singleton */
1070     if (emptyfrozenset == NULL)
1071         emptyfrozenset = make_new_set(type, NULL);
1072     Py_XINCREF(emptyfrozenset);
1073     return emptyfrozenset;
1074 }
1075 
1076 void
PySet_Fini(void)1077 PySet_Fini(void)
1078 {
1079     PySetObject *so;
1080 
1081     while (numfree) {
1082         numfree--;
1083         so = free_list[numfree];
1084         PyObject_GC_Del(so);
1085     }
1086     Py_CLEAR(dummy);
1087     Py_CLEAR(emptyfrozenset);
1088 }
1089 
1090 static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1091 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1092 {
1093     if (type == &PySet_Type && !_PyArg_NoKeywords("set()", kwds))
1094         return NULL;
1095 
1096     return make_new_set(type, NULL);
1097 }
1098 
1099 /* set_swap_bodies() switches the contents of any two sets by moving their
1100    internal data pointers and, if needed, copying the internal smalltables.
1101    Semantically equivalent to:
1102 
1103      t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1104 
1105    The function always succeeds and it leaves both objects in a stable state.
1106    Useful for creating temporary frozensets from sets for membership testing
1107    in __contains__(), discard(), and remove().  Also useful for operations
1108    that update in-place (by allowing an intermediate result to be swapped
1109    into one of the original inputs).
1110 */
1111 
1112 static void
set_swap_bodies(PySetObject * a,PySetObject * b)1113 set_swap_bodies(PySetObject *a, PySetObject *b)
1114 {
1115     Py_ssize_t t;
1116     setentry *u;
1117     setentry *(*f)(PySetObject *so, PyObject *key, long hash);
1118     setentry tab[PySet_MINSIZE];
1119     long h;
1120 
1121     t = a->fill;     a->fill   = b->fill;        b->fill  = t;
1122     t = a->used;     a->used   = b->used;        b->used  = t;
1123     t = a->mask;     a->mask   = b->mask;        b->mask  = t;
1124 
1125     u = a->table;
1126     if (a->table == a->smalltable)
1127         u = b->smalltable;
1128     a->table  = b->table;
1129     if (b->table == b->smalltable)
1130         a->table = a->smalltable;
1131     b->table = u;
1132 
1133     f = a->lookup;   a->lookup = b->lookup;      b->lookup = f;
1134 
1135     if (a->table == a->smalltable || b->table == b->smalltable) {
1136         memcpy(tab, a->smalltable, sizeof(tab));
1137         memcpy(a->smalltable, b->smalltable, sizeof(tab));
1138         memcpy(b->smalltable, tab, sizeof(tab));
1139     }
1140 
1141     if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type)  &&
1142         PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1143         h = a->hash;     a->hash = b->hash;  b->hash = h;
1144     } else {
1145         a->hash = -1;
1146         b->hash = -1;
1147     }
1148 }
1149 
1150 static PyObject *
set_copy(PySetObject * so)1151 set_copy(PySetObject *so)
1152 {
1153     return make_new_set(Py_TYPE(so), (PyObject *)so);
1154 }
1155 
1156 static PyObject *
frozenset_copy(PySetObject * so)1157 frozenset_copy(PySetObject *so)
1158 {
1159     if (PyFrozenSet_CheckExact(so)) {
1160         Py_INCREF(so);
1161         return (PyObject *)so;
1162     }
1163     return set_copy(so);
1164 }
1165 
1166 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1167 
1168 static PyObject *
set_clear(PySetObject * so)1169 set_clear(PySetObject *so)
1170 {
1171     set_clear_internal(so);
1172     Py_RETURN_NONE;
1173 }
1174 
1175 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1176 
1177 static PyObject *
set_union(PySetObject * so,PyObject * args)1178 set_union(PySetObject *so, PyObject *args)
1179 {
1180     PySetObject *result;
1181     PyObject *other;
1182     Py_ssize_t i;
1183 
1184     result = (PySetObject *)set_copy(so);
1185     if (result == NULL)
1186         return NULL;
1187 
1188     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1189         other = PyTuple_GET_ITEM(args, i);
1190         if ((PyObject *)so == other)
1191             continue;
1192         if (set_update_internal(result, other) == -1) {
1193             Py_DECREF(result);
1194             return NULL;
1195         }
1196     }
1197     return (PyObject *)result;
1198 }
1199 
1200 PyDoc_STRVAR(union_doc,
1201  "Return the union of sets as a new set.\n\
1202 \n\
1203 (i.e. all elements that are in either set.)");
1204 
1205 static PyObject *
set_or(PySetObject * so,PyObject * other)1206 set_or(PySetObject *so, PyObject *other)
1207 {
1208     PySetObject *result;
1209 
1210     if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1211         Py_INCREF(Py_NotImplemented);
1212         return Py_NotImplemented;
1213     }
1214 
1215     result = (PySetObject *)set_copy(so);
1216     if (result == NULL)
1217         return NULL;
1218     if ((PyObject *)so == other)
1219         return (PyObject *)result;
1220     if (set_update_internal(result, other) == -1) {
1221         Py_DECREF(result);
1222         return NULL;
1223     }
1224     return (PyObject *)result;
1225 }
1226 
1227 static PyObject *
set_ior(PySetObject * so,PyObject * other)1228 set_ior(PySetObject *so, PyObject *other)
1229 {
1230     if (!PyAnySet_Check(other)) {
1231         Py_INCREF(Py_NotImplemented);
1232         return Py_NotImplemented;
1233     }
1234     if (set_update_internal(so, other) == -1)
1235         return NULL;
1236     Py_INCREF(so);
1237     return (PyObject *)so;
1238 }
1239 
1240 static PyObject *
set_intersection(PySetObject * so,PyObject * other)1241 set_intersection(PySetObject *so, PyObject *other)
1242 {
1243     PySetObject *result;
1244     PyObject *key, *it, *tmp;
1245 
1246     if ((PyObject *)so == other)
1247         return set_copy(so);
1248 
1249     result = (PySetObject *)make_new_set(Py_TYPE(so), NULL);
1250     if (result == NULL)
1251         return NULL;
1252 
1253     if (PyAnySet_Check(other)) {
1254         Py_ssize_t pos = 0;
1255         setentry *entry;
1256 
1257         if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1258             tmp = (PyObject *)so;
1259             so = (PySetObject *)other;
1260             other = tmp;
1261         }
1262 
1263         while (set_next((PySetObject *)other, &pos, &entry)) {
1264             int rv = set_contains_entry(so, entry);
1265             if (rv == -1) {
1266                 Py_DECREF(result);
1267                 return NULL;
1268             }
1269             if (rv) {
1270                 if (set_add_entry(result, entry) == -1) {
1271                     Py_DECREF(result);
1272                     return NULL;
1273                 }
1274             }
1275         }
1276         return (PyObject *)result;
1277     }
1278 
1279     it = PyObject_GetIter(other);
1280     if (it == NULL) {
1281         Py_DECREF(result);
1282         return NULL;
1283     }
1284 
1285     while ((key = PyIter_Next(it)) != NULL) {
1286         int rv;
1287         setentry entry;
1288         long hash = PyObject_Hash(key);
1289 
1290         if (hash == -1) {
1291             Py_DECREF(it);
1292             Py_DECREF(result);
1293             Py_DECREF(key);
1294             return NULL;
1295         }
1296         entry.hash = hash;
1297         entry.key = key;
1298         rv = set_contains_entry(so, &entry);
1299         if (rv == -1) {
1300             Py_DECREF(it);
1301             Py_DECREF(result);
1302             Py_DECREF(key);
1303             return NULL;
1304         }
1305         if (rv) {
1306             if (set_add_entry(result, &entry) == -1) {
1307                 Py_DECREF(it);
1308                 Py_DECREF(result);
1309                 Py_DECREF(key);
1310                 return NULL;
1311             }
1312         }
1313         Py_DECREF(key);
1314     }
1315     Py_DECREF(it);
1316     if (PyErr_Occurred()) {
1317         Py_DECREF(result);
1318         return NULL;
1319     }
1320     return (PyObject *)result;
1321 }
1322 
1323 static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1324 set_intersection_multi(PySetObject *so, PyObject *args)
1325 {
1326     Py_ssize_t i;
1327     PyObject *result = (PyObject *)so;
1328 
1329     if (PyTuple_GET_SIZE(args) == 0)
1330         return set_copy(so);
1331 
1332     Py_INCREF(so);
1333     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1334         PyObject *other = PyTuple_GET_ITEM(args, i);
1335         PyObject *newresult = set_intersection((PySetObject *)result, other);
1336         if (newresult == NULL) {
1337             Py_DECREF(result);
1338             return NULL;
1339         }
1340         Py_DECREF(result);
1341         result = newresult;
1342     }
1343     return result;
1344 }
1345 
1346 PyDoc_STRVAR(intersection_doc,
1347 "Return the intersection of two or more sets as a new set.\n\
1348 \n\
1349 (i.e. elements that are common to all of the sets.)");
1350 
1351 static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1352 set_intersection_update(PySetObject *so, PyObject *other)
1353 {
1354     PyObject *tmp;
1355 
1356     tmp = set_intersection(so, other);
1357     if (tmp == NULL)
1358         return NULL;
1359     set_swap_bodies(so, (PySetObject *)tmp);
1360     Py_DECREF(tmp);
1361     Py_RETURN_NONE;
1362 }
1363 
1364 static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1365 set_intersection_update_multi(PySetObject *so, PyObject *args)
1366 {
1367     PyObject *tmp;
1368 
1369     tmp = set_intersection_multi(so, args);
1370     if (tmp == NULL)
1371         return NULL;
1372     set_swap_bodies(so, (PySetObject *)tmp);
1373     Py_DECREF(tmp);
1374     Py_RETURN_NONE;
1375 }
1376 
1377 PyDoc_STRVAR(intersection_update_doc,
1378 "Update a set with the intersection of itself and another.");
1379 
1380 static PyObject *
set_and(PySetObject * so,PyObject * other)1381 set_and(PySetObject *so, PyObject *other)
1382 {
1383     if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1384         Py_INCREF(Py_NotImplemented);
1385         return Py_NotImplemented;
1386     }
1387     return set_intersection(so, other);
1388 }
1389 
1390 static PyObject *
set_iand(PySetObject * so,PyObject * other)1391 set_iand(PySetObject *so, PyObject *other)
1392 {
1393     PyObject *result;
1394 
1395     if (!PyAnySet_Check(other)) {
1396         Py_INCREF(Py_NotImplemented);
1397         return Py_NotImplemented;
1398     }
1399     result = set_intersection_update(so, other);
1400     if (result == NULL)
1401         return NULL;
1402     Py_DECREF(result);
1403     Py_INCREF(so);
1404     return (PyObject *)so;
1405 }
1406 
1407 static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1408 set_isdisjoint(PySetObject *so, PyObject *other)
1409 {
1410     PyObject *key, *it, *tmp;
1411 
1412     if ((PyObject *)so == other) {
1413         if (PySet_GET_SIZE(so) == 0)
1414             Py_RETURN_TRUE;
1415         else
1416             Py_RETURN_FALSE;
1417     }
1418 
1419     if (PyAnySet_CheckExact(other)) {
1420         Py_ssize_t pos = 0;
1421         setentry *entry;
1422 
1423         if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1424             tmp = (PyObject *)so;
1425             so = (PySetObject *)other;
1426             other = tmp;
1427         }
1428         while (set_next((PySetObject *)other, &pos, &entry)) {
1429             int rv = set_contains_entry(so, entry);
1430             if (rv == -1)
1431                 return NULL;
1432             if (rv)
1433                 Py_RETURN_FALSE;
1434         }
1435         Py_RETURN_TRUE;
1436     }
1437 
1438     it = PyObject_GetIter(other);
1439     if (it == NULL)
1440         return NULL;
1441 
1442     while ((key = PyIter_Next(it)) != NULL) {
1443         int rv;
1444         setentry entry;
1445         long hash = PyObject_Hash(key);
1446 
1447         if (hash == -1) {
1448             Py_DECREF(key);
1449             Py_DECREF(it);
1450             return NULL;
1451         }
1452         entry.hash = hash;
1453         entry.key = key;
1454         rv = set_contains_entry(so, &entry);
1455         Py_DECREF(key);
1456         if (rv == -1) {
1457             Py_DECREF(it);
1458             return NULL;
1459         }
1460         if (rv) {
1461             Py_DECREF(it);
1462             Py_RETURN_FALSE;
1463         }
1464     }
1465     Py_DECREF(it);
1466     if (PyErr_Occurred())
1467         return NULL;
1468     Py_RETURN_TRUE;
1469 }
1470 
1471 PyDoc_STRVAR(isdisjoint_doc,
1472 "Return True if two sets have a null intersection.");
1473 
1474 static int
set_difference_update_internal(PySetObject * so,PyObject * other)1475 set_difference_update_internal(PySetObject *so, PyObject *other)
1476 {
1477     if ((PyObject *)so == other)
1478         return set_clear_internal(so);
1479 
1480     if (PyAnySet_Check(other)) {
1481         setentry *entry;
1482         Py_ssize_t pos = 0;
1483 
1484         while (set_next((PySetObject *)other, &pos, &entry))
1485             if (set_discard_entry(so, entry) == -1)
1486                 return -1;
1487     } else {
1488         PyObject *key, *it;
1489         it = PyObject_GetIter(other);
1490         if (it == NULL)
1491             return -1;
1492 
1493         while ((key = PyIter_Next(it)) != NULL) {
1494             if (set_discard_key(so, key) == -1) {
1495                 Py_DECREF(it);
1496                 Py_DECREF(key);
1497                 return -1;
1498             }
1499             Py_DECREF(key);
1500         }
1501         Py_DECREF(it);
1502         if (PyErr_Occurred())
1503             return -1;
1504     }
1505     /* If more than 1/5 are dummies, then resize them away. */
1506     if ((so->fill - so->used) * 5 < so->mask)
1507         return 0;
1508     return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1509 }
1510 
1511 static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1512 set_difference_update(PySetObject *so, PyObject *args)
1513 {
1514     Py_ssize_t i;
1515 
1516     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1517         PyObject *other = PyTuple_GET_ITEM(args, i);
1518         if (set_difference_update_internal(so, other) == -1)
1519             return NULL;
1520     }
1521     Py_RETURN_NONE;
1522 }
1523 
1524 PyDoc_STRVAR(difference_update_doc,
1525 "Remove all elements of another set from this set.");
1526 
1527 static PyObject *
set_difference(PySetObject * so,PyObject * other)1528 set_difference(PySetObject *so, PyObject *other)
1529 {
1530     PyObject *result;
1531     setentry *entry;
1532     Py_ssize_t pos = 0;
1533 
1534     if (!PyAnySet_Check(other)  && !PyDict_CheckExact(other)) {
1535         result = set_copy(so);
1536         if (result == NULL)
1537             return NULL;
1538         if (set_difference_update_internal((PySetObject *)result, other) != -1)
1539             return result;
1540         Py_DECREF(result);
1541         return NULL;
1542     }
1543 
1544     result = make_new_set(Py_TYPE(so), NULL);
1545     if (result == NULL)
1546         return NULL;
1547 
1548     if (PyDict_CheckExact(other)) {
1549         while (set_next(so, &pos, &entry)) {
1550             setentry entrycopy;
1551             int rv;
1552             entrycopy.hash = entry->hash;
1553             entrycopy.key = entry->key;
1554             rv = _PyDict_Contains(other, entry->key, entry->hash);
1555             if (rv < 0) {
1556                 Py_DECREF(result);
1557                 return NULL;
1558             }
1559             if (!rv) {
1560                 if (set_add_entry((PySetObject *)result, &entrycopy) == -1) {
1561                     Py_DECREF(result);
1562                     return NULL;
1563                 }
1564             }
1565         }
1566         return result;
1567     }
1568 
1569     while (set_next(so, &pos, &entry)) {
1570         int rv = set_contains_entry((PySetObject *)other, entry);
1571         if (rv == -1) {
1572             Py_DECREF(result);
1573             return NULL;
1574         }
1575         if (!rv) {
1576             if (set_add_entry((PySetObject *)result, entry) == -1) {
1577                 Py_DECREF(result);
1578                 return NULL;
1579             }
1580         }
1581     }
1582     return result;
1583 }
1584 
1585 static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1586 set_difference_multi(PySetObject *so, PyObject *args)
1587 {
1588     Py_ssize_t i;
1589     PyObject *result, *other;
1590 
1591     if (PyTuple_GET_SIZE(args) == 0)
1592         return set_copy(so);
1593 
1594     other = PyTuple_GET_ITEM(args, 0);
1595     result = set_difference(so, other);
1596     if (result == NULL)
1597         return NULL;
1598 
1599     for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1600         other = PyTuple_GET_ITEM(args, i);
1601         if (set_difference_update_internal((PySetObject *)result, other) == -1) {
1602             Py_DECREF(result);
1603             return NULL;
1604         }
1605     }
1606     return result;
1607 }
1608 
1609 PyDoc_STRVAR(difference_doc,
1610 "Return the difference of two or more sets as a new set.\n\
1611 \n\
1612 (i.e. all elements that are in this set but not the others.)");
1613 static PyObject *
set_sub(PySetObject * so,PyObject * other)1614 set_sub(PySetObject *so, PyObject *other)
1615 {
1616     if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1617         Py_INCREF(Py_NotImplemented);
1618         return Py_NotImplemented;
1619     }
1620     return set_difference(so, other);
1621 }
1622 
1623 static PyObject *
set_isub(PySetObject * so,PyObject * other)1624 set_isub(PySetObject *so, PyObject *other)
1625 {
1626     if (!PyAnySet_Check(other)) {
1627         Py_INCREF(Py_NotImplemented);
1628         return Py_NotImplemented;
1629     }
1630     if (set_difference_update_internal(so, other) == -1)
1631         return NULL;
1632     Py_INCREF(so);
1633     return (PyObject *)so;
1634 }
1635 
1636 static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1637 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1638 {
1639     PySetObject *otherset;
1640     PyObject *key;
1641     Py_ssize_t pos = 0;
1642     setentry *entry;
1643 
1644     if ((PyObject *)so == other)
1645         return set_clear(so);
1646 
1647     if (PyDict_CheckExact(other)) {
1648         PyObject *value;
1649         int rv;
1650         long hash;
1651         while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1652             setentry an_entry;
1653 
1654             Py_INCREF(key);
1655             an_entry.hash = hash;
1656             an_entry.key = key;
1657 
1658             rv = set_discard_entry(so, &an_entry);
1659             if (rv == -1) {
1660                 Py_DECREF(key);
1661                 return NULL;
1662             }
1663             if (rv == DISCARD_NOTFOUND) {
1664                 if (set_add_entry(so, &an_entry) == -1) {
1665                     Py_DECREF(key);
1666                     return NULL;
1667                 }
1668             }
1669             Py_DECREF(key);
1670         }
1671         Py_RETURN_NONE;
1672     }
1673 
1674     if (PyAnySet_Check(other)) {
1675         Py_INCREF(other);
1676         otherset = (PySetObject *)other;
1677     } else {
1678         otherset = (PySetObject *)make_new_set(Py_TYPE(so), other);
1679         if (otherset == NULL)
1680             return NULL;
1681     }
1682 
1683     while (set_next(otherset, &pos, &entry)) {
1684         int rv = set_discard_entry(so, entry);
1685         if (rv == -1) {
1686             Py_DECREF(otherset);
1687             return NULL;
1688         }
1689         if (rv == DISCARD_NOTFOUND) {
1690             if (set_add_entry(so, entry) == -1) {
1691                 Py_DECREF(otherset);
1692                 return NULL;
1693             }
1694         }
1695     }
1696     Py_DECREF(otherset);
1697     Py_RETURN_NONE;
1698 }
1699 
1700 PyDoc_STRVAR(symmetric_difference_update_doc,
1701 "Update a set with the symmetric difference of itself and another.");
1702 
1703 static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1704 set_symmetric_difference(PySetObject *so, PyObject *other)
1705 {
1706     PyObject *rv;
1707     PySetObject *otherset;
1708 
1709     otherset = (PySetObject *)make_new_set(Py_TYPE(so), other);
1710     if (otherset == NULL)
1711         return NULL;
1712     rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1713     if (rv == NULL) {
1714         Py_DECREF(otherset);
1715         return NULL;
1716     }
1717     Py_DECREF(rv);
1718     return (PyObject *)otherset;
1719 }
1720 
1721 PyDoc_STRVAR(symmetric_difference_doc,
1722 "Return the symmetric difference of two sets as a new set.\n\
1723 \n\
1724 (i.e. all elements that are in exactly one of the sets.)");
1725 
1726 static PyObject *
set_xor(PySetObject * so,PyObject * other)1727 set_xor(PySetObject *so, PyObject *other)
1728 {
1729     if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1730         Py_INCREF(Py_NotImplemented);
1731         return Py_NotImplemented;
1732     }
1733     return set_symmetric_difference(so, other);
1734 }
1735 
1736 static PyObject *
set_ixor(PySetObject * so,PyObject * other)1737 set_ixor(PySetObject *so, PyObject *other)
1738 {
1739     PyObject *result;
1740 
1741     if (!PyAnySet_Check(other)) {
1742         Py_INCREF(Py_NotImplemented);
1743         return Py_NotImplemented;
1744     }
1745     result = set_symmetric_difference_update(so, other);
1746     if (result == NULL)
1747         return NULL;
1748     Py_DECREF(result);
1749     Py_INCREF(so);
1750     return (PyObject *)so;
1751 }
1752 
1753 static PyObject *
set_issubset(PySetObject * so,PyObject * other)1754 set_issubset(PySetObject *so, PyObject *other)
1755 {
1756     setentry *entry;
1757     Py_ssize_t pos = 0;
1758 
1759     if (!PyAnySet_Check(other)) {
1760         PyObject *tmp, *result;
1761         tmp = make_new_set(&PySet_Type, other);
1762         if (tmp == NULL)
1763             return NULL;
1764         result = set_issubset(so, tmp);
1765         Py_DECREF(tmp);
1766         return result;
1767     }
1768     if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1769         Py_RETURN_FALSE;
1770 
1771     while (set_next(so, &pos, &entry)) {
1772         int rv = set_contains_entry((PySetObject *)other, entry);
1773         if (rv == -1)
1774             return NULL;
1775         if (!rv)
1776             Py_RETURN_FALSE;
1777     }
1778     Py_RETURN_TRUE;
1779 }
1780 
1781 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1782 
1783 static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1784 set_issuperset(PySetObject *so, PyObject *other)
1785 {
1786     PyObject *tmp, *result;
1787 
1788     if (!PyAnySet_Check(other)) {
1789         tmp = make_new_set(&PySet_Type, other);
1790         if (tmp == NULL)
1791             return NULL;
1792         result = set_issuperset(so, tmp);
1793         Py_DECREF(tmp);
1794         return result;
1795     }
1796     return set_issubset((PySetObject *)other, (PyObject *)so);
1797 }
1798 
1799 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1800 
1801 static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1802 set_richcompare(PySetObject *v, PyObject *w, int op)
1803 {
1804     PyObject *r1;
1805     int r2;
1806 
1807     if(!PyAnySet_Check(w)) {
1808         Py_INCREF(Py_NotImplemented);
1809         return Py_NotImplemented;
1810     }
1811     switch (op) {
1812     case Py_EQ:
1813         if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1814             Py_RETURN_FALSE;
1815         if (v->hash != -1  &&
1816             ((PySetObject *)w)->hash != -1 &&
1817             v->hash != ((PySetObject *)w)->hash)
1818             Py_RETURN_FALSE;
1819         return set_issubset(v, w);
1820     case Py_NE:
1821         r1 = set_richcompare(v, w, Py_EQ);
1822         if (r1 == NULL)
1823             return NULL;
1824         r2 = PyObject_IsTrue(r1);
1825         Py_DECREF(r1);
1826         if (r2 < 0)
1827             return NULL;
1828         return PyBool_FromLong(!r2);
1829     case Py_LE:
1830         return set_issubset(v, w);
1831     case Py_GE:
1832         return set_issuperset(v, w);
1833     case Py_LT:
1834         if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1835             Py_RETURN_FALSE;
1836         return set_issubset(v, w);
1837     case Py_GT:
1838         if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1839             Py_RETURN_FALSE;
1840         return set_issuperset(v, w);
1841     }
1842     Py_INCREF(Py_NotImplemented);
1843     return Py_NotImplemented;
1844 }
1845 
1846 static int
set_nocmp(PyObject * self,PyObject * other)1847 set_nocmp(PyObject *self, PyObject *other)
1848 {
1849     PyErr_SetString(PyExc_TypeError, "cannot compare sets using cmp()");
1850     return -1;
1851 }
1852 
1853 static PyObject *
set_add(PySetObject * so,PyObject * key)1854 set_add(PySetObject *so, PyObject *key)
1855 {
1856     if (set_add_key(so, key) == -1)
1857         return NULL;
1858     Py_RETURN_NONE;
1859 }
1860 
1861 PyDoc_STRVAR(add_doc,
1862 "Add an element to a set.\n\
1863 \n\
1864 This has no effect if the element is already present.");
1865 
1866 static int
set_contains(PySetObject * so,PyObject * key)1867 set_contains(PySetObject *so, PyObject *key)
1868 {
1869     PyObject *tmpkey;
1870     int rv;
1871 
1872     rv = set_contains_key(so, key);
1873     if (rv == -1) {
1874         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1875             return -1;
1876         PyErr_Clear();
1877         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1878         if (tmpkey == NULL)
1879             return -1;
1880         rv = set_contains_key(so, tmpkey);
1881         Py_DECREF(tmpkey);
1882     }
1883     return rv;
1884 }
1885 
1886 static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1887 set_direct_contains(PySetObject *so, PyObject *key)
1888 {
1889     long result;
1890 
1891     result = set_contains(so, key);
1892     if (result == -1)
1893         return NULL;
1894     return PyBool_FromLong(result);
1895 }
1896 
1897 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1898 
1899 static PyObject *
set_remove(PySetObject * so,PyObject * key)1900 set_remove(PySetObject *so, PyObject *key)
1901 {
1902     PyObject *tmpkey;
1903     int rv;
1904 
1905     rv = set_discard_key(so, key);
1906     if (rv == -1) {
1907         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1908             return NULL;
1909         PyErr_Clear();
1910         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1911         if (tmpkey == NULL)
1912             return NULL;
1913         rv = set_discard_key(so, tmpkey);
1914         Py_DECREF(tmpkey);
1915         if (rv == -1)
1916             return NULL;
1917     }
1918 
1919     if (rv == DISCARD_NOTFOUND) {
1920         set_key_error(key);
1921         return NULL;
1922     }
1923     Py_RETURN_NONE;
1924 }
1925 
1926 PyDoc_STRVAR(remove_doc,
1927 "Remove an element from a set; it must be a member.\n\
1928 \n\
1929 If the element is not a member, raise a KeyError.");
1930 
1931 static PyObject *
set_discard(PySetObject * so,PyObject * key)1932 set_discard(PySetObject *so, PyObject *key)
1933 {
1934     PyObject *tmpkey;
1935     int rv;
1936 
1937     rv = set_discard_key(so, key);
1938     if (rv == -1) {
1939         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1940             return NULL;
1941         PyErr_Clear();
1942         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1943         if (tmpkey == NULL)
1944             return NULL;
1945         rv = set_discard_key(so, tmpkey);
1946         Py_DECREF(tmpkey);
1947         if (rv == -1)
1948             return NULL;
1949     }
1950     Py_RETURN_NONE;
1951 }
1952 
1953 PyDoc_STRVAR(discard_doc,
1954 "Remove an element from a set if it is a member.\n\
1955 \n\
1956 If the element is not a member, do nothing.");
1957 
1958 static PyObject *
set_reduce(PySetObject * so)1959 set_reduce(PySetObject *so)
1960 {
1961     PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1962 
1963     keys = PySequence_List((PyObject *)so);
1964     if (keys == NULL)
1965         goto done;
1966     args = PyTuple_Pack(1, keys);
1967     if (args == NULL)
1968         goto done;
1969     dict = PyObject_GetAttrString((PyObject *)so, "__dict__");
1970     if (dict == NULL) {
1971         PyErr_Clear();
1972         dict = Py_None;
1973         Py_INCREF(dict);
1974     }
1975     result = PyTuple_Pack(3, Py_TYPE(so), args, dict);
1976 done:
1977     Py_XDECREF(args);
1978     Py_XDECREF(keys);
1979     Py_XDECREF(dict);
1980     return result;
1981 }
1982 
1983 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
1984 
1985 static PyObject *
set_sizeof(PySetObject * so)1986 set_sizeof(PySetObject *so)
1987 {
1988     Py_ssize_t res;
1989 
1990     res = _PyObject_SIZE(Py_TYPE(so));
1991     if (so->table != so->smalltable)
1992         res = res + (so->mask + 1) * sizeof(setentry);
1993     return PyInt_FromSsize_t(res);
1994 }
1995 
1996 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
1997 static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)1998 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1999 {
2000     PyObject *iterable = NULL;
2001 
2002     if (!PyAnySet_Check(self))
2003         return -1;
2004     if (PySet_Check(self) && !_PyArg_NoKeywords("set()", kwds))
2005         return -1;
2006     if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
2007         return -1;
2008     set_clear_internal(self);
2009     self->hash = -1;
2010     if (iterable == NULL)
2011         return 0;
2012     return set_update_internal(self, iterable);
2013 }
2014 
2015 static PySequenceMethods set_as_sequence = {
2016     set_len,                            /* sq_length */
2017     0,                                  /* sq_concat */
2018     0,                                  /* sq_repeat */
2019     0,                                  /* sq_item */
2020     0,                                  /* sq_slice */
2021     0,                                  /* sq_ass_item */
2022     0,                                  /* sq_ass_slice */
2023     (objobjproc)set_contains,           /* sq_contains */
2024 };
2025 
2026 /* set object ********************************************************/
2027 
2028 #ifdef Py_DEBUG
2029 static PyObject *test_c_api(PySetObject *so);
2030 
2031 PyDoc_STRVAR(test_c_api_doc, "Exercises C API.  Returns True.\n\
2032 All is well if assertions don't fail.");
2033 #endif
2034 
2035 static PyMethodDef set_methods[] = {
2036     {"add",             (PyCFunction)set_add,           METH_O,
2037      add_doc},
2038     {"clear",           (PyCFunction)set_clear,         METH_NOARGS,
2039      clear_doc},
2040     {"__contains__",(PyCFunction)set_direct_contains,           METH_O | METH_COEXIST,
2041      contains_doc},
2042     {"copy",            (PyCFunction)set_copy,          METH_NOARGS,
2043      copy_doc},
2044     {"discard",         (PyCFunction)set_discard,       METH_O,
2045      discard_doc},
2046     {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,
2047      difference_doc},
2048     {"difference_update",       (PyCFunction)set_difference_update,     METH_VARARGS,
2049      difference_update_doc},
2050     {"intersection",(PyCFunction)set_intersection_multi,        METH_VARARGS,
2051      intersection_doc},
2052     {"intersection_update",(PyCFunction)set_intersection_update_multi,          METH_VARARGS,
2053      intersection_update_doc},
2054     {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,
2055      isdisjoint_doc},
2056     {"issubset",        (PyCFunction)set_issubset,      METH_O,
2057      issubset_doc},
2058     {"issuperset",      (PyCFunction)set_issuperset,    METH_O,
2059      issuperset_doc},
2060     {"pop",             (PyCFunction)set_pop,           METH_NOARGS,
2061      pop_doc},
2062     {"__reduce__",      (PyCFunction)set_reduce,        METH_NOARGS,
2063      reduce_doc},
2064     {"remove",          (PyCFunction)set_remove,        METH_O,
2065      remove_doc},
2066     {"__sizeof__",      (PyCFunction)set_sizeof,        METH_NOARGS,
2067      sizeof_doc},
2068     {"symmetric_difference",(PyCFunction)set_symmetric_difference,      METH_O,
2069      symmetric_difference_doc},
2070     {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update,        METH_O,
2071      symmetric_difference_update_doc},
2072 #ifdef Py_DEBUG
2073     {"test_c_api",      (PyCFunction)test_c_api,        METH_NOARGS,
2074      test_c_api_doc},
2075 #endif
2076     {"union",           (PyCFunction)set_union,         METH_VARARGS,
2077      union_doc},
2078     {"update",          (PyCFunction)set_update,        METH_VARARGS,
2079      update_doc},
2080     {NULL,              NULL}   /* sentinel */
2081 };
2082 
2083 static PyNumberMethods set_as_number = {
2084     0,                                  /*nb_add*/
2085     (binaryfunc)set_sub,                /*nb_subtract*/
2086     0,                                  /*nb_multiply*/
2087     0,                                  /*nb_divide*/
2088     0,                                  /*nb_remainder*/
2089     0,                                  /*nb_divmod*/
2090     0,                                  /*nb_power*/
2091     0,                                  /*nb_negative*/
2092     0,                                  /*nb_positive*/
2093     0,                                  /*nb_absolute*/
2094     0,                                  /*nb_nonzero*/
2095     0,                                  /*nb_invert*/
2096     0,                                  /*nb_lshift*/
2097     0,                                  /*nb_rshift*/
2098     (binaryfunc)set_and,                /*nb_and*/
2099     (binaryfunc)set_xor,                /*nb_xor*/
2100     (binaryfunc)set_or,                 /*nb_or*/
2101     0,                                  /*nb_coerce*/
2102     0,                                  /*nb_int*/
2103     0,                                  /*nb_long*/
2104     0,                                  /*nb_float*/
2105     0,                                  /*nb_oct*/
2106     0,                                  /*nb_hex*/
2107     0,                                  /*nb_inplace_add*/
2108     (binaryfunc)set_isub,               /*nb_inplace_subtract*/
2109     0,                                  /*nb_inplace_multiply*/
2110     0,                                  /*nb_inplace_divide*/
2111     0,                                  /*nb_inplace_remainder*/
2112     0,                                  /*nb_inplace_power*/
2113     0,                                  /*nb_inplace_lshift*/
2114     0,                                  /*nb_inplace_rshift*/
2115     (binaryfunc)set_iand,               /*nb_inplace_and*/
2116     (binaryfunc)set_ixor,               /*nb_inplace_xor*/
2117     (binaryfunc)set_ior,                /*nb_inplace_or*/
2118 };
2119 
2120 PyDoc_STRVAR(set_doc,
2121 "set() -> new empty set object\n\
2122 set(iterable) -> new set object\n\
2123 \n\
2124 Build an unordered collection of unique elements.");
2125 
2126 PyTypeObject PySet_Type = {
2127     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2128     "set",                              /* tp_name */
2129     sizeof(PySetObject),                /* tp_basicsize */
2130     0,                                  /* tp_itemsize */
2131     /* methods */
2132     (destructor)set_dealloc,            /* tp_dealloc */
2133     (printfunc)set_tp_print,            /* tp_print */
2134     0,                                  /* tp_getattr */
2135     0,                                  /* tp_setattr */
2136     set_nocmp,                          /* tp_compare */
2137     (reprfunc)set_repr,                 /* tp_repr */
2138     &set_as_number,                     /* tp_as_number */
2139     &set_as_sequence,                   /* tp_as_sequence */
2140     0,                                  /* tp_as_mapping */
2141     (hashfunc)PyObject_HashNotImplemented,      /* tp_hash */
2142     0,                                  /* tp_call */
2143     0,                                  /* tp_str */
2144     PyObject_GenericGetAttr,            /* tp_getattro */
2145     0,                                  /* tp_setattro */
2146     0,                                  /* tp_as_buffer */
2147     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_CHECKTYPES |
2148         Py_TPFLAGS_BASETYPE,            /* tp_flags */
2149     set_doc,                            /* tp_doc */
2150     (traverseproc)set_traverse,         /* tp_traverse */
2151     (inquiry)set_clear_internal,        /* tp_clear */
2152     (richcmpfunc)set_richcompare,       /* tp_richcompare */
2153     offsetof(PySetObject, weakreflist),         /* tp_weaklistoffset */
2154     (getiterfunc)set_iter,      /* tp_iter */
2155     0,                                  /* tp_iternext */
2156     set_methods,                        /* tp_methods */
2157     0,                                  /* tp_members */
2158     0,                                  /* tp_getset */
2159     0,                                  /* tp_base */
2160     0,                                  /* tp_dict */
2161     0,                                  /* tp_descr_get */
2162     0,                                  /* tp_descr_set */
2163     0,                                  /* tp_dictoffset */
2164     (initproc)set_init,                 /* tp_init */
2165     PyType_GenericAlloc,                /* tp_alloc */
2166     set_new,                            /* tp_new */
2167     PyObject_GC_Del,                    /* tp_free */
2168 };
2169 
2170 /* frozenset object ********************************************************/
2171 
2172 
2173 static PyMethodDef frozenset_methods[] = {
2174     {"__contains__",(PyCFunction)set_direct_contains,           METH_O | METH_COEXIST,
2175      contains_doc},
2176     {"copy",            (PyCFunction)frozenset_copy,    METH_NOARGS,
2177      copy_doc},
2178     {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,
2179      difference_doc},
2180     {"intersection",(PyCFunction)set_intersection_multi,        METH_VARARGS,
2181      intersection_doc},
2182     {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,
2183      isdisjoint_doc},
2184     {"issubset",        (PyCFunction)set_issubset,      METH_O,
2185      issubset_doc},
2186     {"issuperset",      (PyCFunction)set_issuperset,    METH_O,
2187      issuperset_doc},
2188     {"__reduce__",      (PyCFunction)set_reduce,        METH_NOARGS,
2189      reduce_doc},
2190     {"__sizeof__",      (PyCFunction)set_sizeof,        METH_NOARGS,
2191      sizeof_doc},
2192     {"symmetric_difference",(PyCFunction)set_symmetric_difference,      METH_O,
2193      symmetric_difference_doc},
2194     {"union",           (PyCFunction)set_union,         METH_VARARGS,
2195      union_doc},
2196     {NULL,              NULL}   /* sentinel */
2197 };
2198 
2199 static PyNumberMethods frozenset_as_number = {
2200     0,                                  /*nb_add*/
2201     (binaryfunc)set_sub,                /*nb_subtract*/
2202     0,                                  /*nb_multiply*/
2203     0,                                  /*nb_divide*/
2204     0,                                  /*nb_remainder*/
2205     0,                                  /*nb_divmod*/
2206     0,                                  /*nb_power*/
2207     0,                                  /*nb_negative*/
2208     0,                                  /*nb_positive*/
2209     0,                                  /*nb_absolute*/
2210     0,                                  /*nb_nonzero*/
2211     0,                                  /*nb_invert*/
2212     0,                                  /*nb_lshift*/
2213     0,                                  /*nb_rshift*/
2214     (binaryfunc)set_and,                /*nb_and*/
2215     (binaryfunc)set_xor,                /*nb_xor*/
2216     (binaryfunc)set_or,                 /*nb_or*/
2217 };
2218 
2219 PyDoc_STRVAR(frozenset_doc,
2220 "frozenset() -> empty frozenset object\n\
2221 frozenset(iterable) -> frozenset object\n\
2222 \n\
2223 Build an immutable unordered collection of unique elements.");
2224 
2225 PyTypeObject PyFrozenSet_Type = {
2226     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2227     "frozenset",                        /* tp_name */
2228     sizeof(PySetObject),                /* tp_basicsize */
2229     0,                                  /* tp_itemsize */
2230     /* methods */
2231     (destructor)set_dealloc,            /* tp_dealloc */
2232     (printfunc)set_tp_print,            /* tp_print */
2233     0,                                  /* tp_getattr */
2234     0,                                  /* tp_setattr */
2235     set_nocmp,                          /* tp_compare */
2236     (reprfunc)set_repr,                 /* tp_repr */
2237     &frozenset_as_number,               /* tp_as_number */
2238     &set_as_sequence,                   /* tp_as_sequence */
2239     0,                                  /* tp_as_mapping */
2240     frozenset_hash,                     /* tp_hash */
2241     0,                                  /* tp_call */
2242     0,                                  /* tp_str */
2243     PyObject_GenericGetAttr,            /* tp_getattro */
2244     0,                                  /* tp_setattro */
2245     0,                                  /* tp_as_buffer */
2246     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_CHECKTYPES |
2247         Py_TPFLAGS_BASETYPE,            /* tp_flags */
2248     frozenset_doc,                      /* tp_doc */
2249     (traverseproc)set_traverse,         /* tp_traverse */
2250     (inquiry)set_clear_internal,        /* tp_clear */
2251     (richcmpfunc)set_richcompare,       /* tp_richcompare */
2252     offsetof(PySetObject, weakreflist),         /* tp_weaklistoffset */
2253     (getiterfunc)set_iter,              /* tp_iter */
2254     0,                                  /* tp_iternext */
2255     frozenset_methods,                  /* tp_methods */
2256     0,                                  /* tp_members */
2257     0,                                  /* tp_getset */
2258     0,                                  /* tp_base */
2259     0,                                  /* tp_dict */
2260     0,                                  /* tp_descr_get */
2261     0,                                  /* tp_descr_set */
2262     0,                                  /* tp_dictoffset */
2263     0,                                  /* tp_init */
2264     PyType_GenericAlloc,                /* tp_alloc */
2265     frozenset_new,                      /* tp_new */
2266     PyObject_GC_Del,                    /* tp_free */
2267 };
2268 
2269 
2270 /***** C API functions *************************************************/
2271 
2272 PyObject *
PySet_New(PyObject * iterable)2273 PySet_New(PyObject *iterable)
2274 {
2275     return make_new_set(&PySet_Type, iterable);
2276 }
2277 
2278 PyObject *
PyFrozenSet_New(PyObject * iterable)2279 PyFrozenSet_New(PyObject *iterable)
2280 {
2281     return make_new_set(&PyFrozenSet_Type, iterable);
2282 }
2283 
2284 Py_ssize_t
PySet_Size(PyObject * anyset)2285 PySet_Size(PyObject *anyset)
2286 {
2287     if (!PyAnySet_Check(anyset)) {
2288         PyErr_BadInternalCall();
2289         return -1;
2290     }
2291     return PySet_GET_SIZE(anyset);
2292 }
2293 
2294 int
PySet_Clear(PyObject * set)2295 PySet_Clear(PyObject *set)
2296 {
2297     if (!PySet_Check(set)) {
2298         PyErr_BadInternalCall();
2299         return -1;
2300     }
2301     return set_clear_internal((PySetObject *)set);
2302 }
2303 
2304 int
PySet_Contains(PyObject * anyset,PyObject * key)2305 PySet_Contains(PyObject *anyset, PyObject *key)
2306 {
2307     if (!PyAnySet_Check(anyset)) {
2308         PyErr_BadInternalCall();
2309         return -1;
2310     }
2311     return set_contains_key((PySetObject *)anyset, key);
2312 }
2313 
2314 int
PySet_Discard(PyObject * set,PyObject * key)2315 PySet_Discard(PyObject *set, PyObject *key)
2316 {
2317     if (!PySet_Check(set)) {
2318         PyErr_BadInternalCall();
2319         return -1;
2320     }
2321     return set_discard_key((PySetObject *)set, key);
2322 }
2323 
2324 int
PySet_Add(PyObject * anyset,PyObject * key)2325 PySet_Add(PyObject *anyset, PyObject *key)
2326 {
2327     if (!PySet_Check(anyset) &&
2328         (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2329         PyErr_BadInternalCall();
2330         return -1;
2331     }
2332     return set_add_key((PySetObject *)anyset, key);
2333 }
2334 
2335 int
_PySet_Next(PyObject * set,Py_ssize_t * pos,PyObject ** key)2336 _PySet_Next(PyObject *set, Py_ssize_t *pos, PyObject **key)
2337 {
2338     setentry *entry_ptr;
2339 
2340     if (!PyAnySet_Check(set)) {
2341         PyErr_BadInternalCall();
2342         return -1;
2343     }
2344     if (set_next((PySetObject *)set, pos, &entry_ptr) == 0)
2345         return 0;
2346     *key = entry_ptr->key;
2347     return 1;
2348 }
2349 
2350 int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,long * hash)2351 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, long *hash)
2352 {
2353     setentry *entry;
2354 
2355     if (!PyAnySet_Check(set)) {
2356         PyErr_BadInternalCall();
2357         return -1;
2358     }
2359     if (set_next((PySetObject *)set, pos, &entry) == 0)
2360         return 0;
2361     *key = entry->key;
2362     *hash = entry->hash;
2363     return 1;
2364 }
2365 
2366 PyObject *
PySet_Pop(PyObject * set)2367 PySet_Pop(PyObject *set)
2368 {
2369     if (!PySet_Check(set)) {
2370         PyErr_BadInternalCall();
2371         return NULL;
2372     }
2373     return set_pop((PySetObject *)set);
2374 }
2375 
2376 int
_PySet_Update(PyObject * set,PyObject * iterable)2377 _PySet_Update(PyObject *set, PyObject *iterable)
2378 {
2379     if (!PySet_Check(set)) {
2380         PyErr_BadInternalCall();
2381         return -1;
2382     }
2383     return set_update_internal((PySetObject *)set, iterable);
2384 }
2385 
2386 #ifdef Py_DEBUG
2387 
2388 /* Test code to be called with any three element set.
2389    Returns True and original set is restored. */
2390 
2391 #define assertRaises(call_return_value, exception)              \
2392     do {                                                        \
2393         assert(call_return_value);                              \
2394         assert(PyErr_ExceptionMatches(exception));              \
2395         PyErr_Clear();                                          \
2396     } while(0)
2397 
2398 static PyObject *
test_c_api(PySetObject * so)2399 test_c_api(PySetObject *so)
2400 {
2401     Py_ssize_t count;
2402     char *s;
2403     Py_ssize_t i;
2404     PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x;
2405     PyObject *ob = (PyObject *)so;
2406     PyObject *str;
2407 
2408     /* Verify preconditions */
2409     assert(PyAnySet_Check(ob));
2410     assert(PyAnySet_CheckExact(ob));
2411     assert(!PyFrozenSet_CheckExact(ob));
2412 
2413     /* so.clear(); so |= set("abc"); */
2414     str = PyString_FromString("abc");
2415     if (str == NULL)
2416         return NULL;
2417     set_clear_internal(so);
2418     if (set_update_internal(so, str) == -1) {
2419         Py_DECREF(str);
2420         return NULL;
2421     }
2422     Py_DECREF(str);
2423 
2424     /* Exercise type/size checks */
2425     assert(PySet_Size(ob) == 3);
2426     assert(PySet_GET_SIZE(ob) == 3);
2427 
2428     /* Raise TypeError for non-iterable constructor arguments */
2429     assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2430     assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2431 
2432     /* Raise TypeError for unhashable key */
2433     dup = PySet_New(ob);
2434     assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2435     assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2436     assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2437 
2438     /* Exercise successful pop, contains, add, and discard */
2439     elem = PySet_Pop(ob);
2440     assert(PySet_Contains(ob, elem) == 0);
2441     assert(PySet_GET_SIZE(ob) == 2);
2442     assert(PySet_Add(ob, elem) == 0);
2443     assert(PySet_Contains(ob, elem) == 1);
2444     assert(PySet_GET_SIZE(ob) == 3);
2445     assert(PySet_Discard(ob, elem) == 1);
2446     assert(PySet_GET_SIZE(ob) == 2);
2447     assert(PySet_Discard(ob, elem) == 0);
2448     assert(PySet_GET_SIZE(ob) == 2);
2449 
2450     /* Exercise clear */
2451     dup2 = PySet_New(dup);
2452     assert(PySet_Clear(dup2) == 0);
2453     assert(PySet_Size(dup2) == 0);
2454     Py_DECREF(dup2);
2455 
2456     /* Raise SystemError on clear or update of frozen set */
2457     f = PyFrozenSet_New(dup);
2458     assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2459     assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2460     assert(PySet_Add(f, elem) == 0);
2461     Py_INCREF(f);
2462     assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2463     Py_DECREF(f);
2464     Py_DECREF(f);
2465 
2466     /* Exercise direct iteration */
2467     i = 0, count = 0;
2468     while (_PySet_Next((PyObject *)dup, &i, &x)) {
2469         s = PyString_AsString(x);
2470         assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2471         count++;
2472     }
2473     assert(count == 3);
2474 
2475     /* Exercise updates */
2476     dup2 = PySet_New(NULL);
2477     assert(_PySet_Update(dup2, dup) == 0);
2478     assert(PySet_Size(dup2) == 3);
2479     assert(_PySet_Update(dup2, dup) == 0);
2480     assert(PySet_Size(dup2) == 3);
2481     Py_DECREF(dup2);
2482 
2483     /* Raise SystemError when self argument is not a set or frozenset. */
2484     t = PyTuple_New(0);
2485     assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2486     assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2487     Py_DECREF(t);
2488 
2489     /* Raise SystemError when self argument is not a set. */
2490     f = PyFrozenSet_New(dup);
2491     assert(PySet_Size(f) == 3);
2492     assert(PyFrozenSet_CheckExact(f));
2493     assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2494     assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2495     Py_DECREF(f);
2496 
2497     /* Raise KeyError when popping from an empty set */
2498     assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2499     Py_DECREF(ob);
2500     assert(PySet_GET_SIZE(ob) == 0);
2501     assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2502 
2503     /* Restore the set from the copy using the PyNumber API */
2504     assert(PyNumber_InPlaceOr(ob, dup) == ob);
2505     Py_DECREF(ob);
2506 
2507     /* Verify constructors accept NULL arguments */
2508     f = PySet_New(NULL);
2509     assert(f != NULL);
2510     assert(PySet_GET_SIZE(f) == 0);
2511     Py_DECREF(f);
2512     f = PyFrozenSet_New(NULL);
2513     assert(f != NULL);
2514     assert(PyFrozenSet_CheckExact(f));
2515     assert(PySet_GET_SIZE(f) == 0);
2516     Py_DECREF(f);
2517 
2518     Py_DECREF(elem);
2519     Py_DECREF(dup);
2520     Py_RETURN_TRUE;
2521 }
2522 
2523 #undef assertRaises
2524 
2525 #endif
2526