1 /* Accumulator struct implementation */
2 
3 #include "Python.h"
4 #include "accu.h"
5 
6 static PyObject *
join_list_unicode(PyObject * lst)7 join_list_unicode(PyObject *lst)
8 {
9     /* return ''.join(lst) */
10     PyObject *sep, *ret;
11     sep = PyUnicode_FromStringAndSize("", 0);
12     ret = PyUnicode_Join(sep, lst);
13     Py_DECREF(sep);
14     return ret;
15 }
16 
17 int
_PyAccu_Init(_PyAccu * acc)18 _PyAccu_Init(_PyAccu *acc)
19 {
20     /* Lazily allocated */
21     acc->large = NULL;
22     acc->small = PyList_New(0);
23     if (acc->small == NULL)
24         return -1;
25     return 0;
26 }
27 
28 static int
flush_accumulator(_PyAccu * acc)29 flush_accumulator(_PyAccu *acc)
30 {
31     Py_ssize_t nsmall = PyList_GET_SIZE(acc->small);
32     if (nsmall) {
33         int ret;
34         PyObject *joined;
35         if (acc->large == NULL) {
36             acc->large = PyList_New(0);
37             if (acc->large == NULL)
38                 return -1;
39         }
40         joined = join_list_unicode(acc->small);
41         if (joined == NULL)
42             return -1;
43         if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
44             Py_DECREF(joined);
45             return -1;
46         }
47         ret = PyList_Append(acc->large, joined);
48         Py_DECREF(joined);
49         return ret;
50     }
51     return 0;
52 }
53 
54 int
_PyAccu_Accumulate(_PyAccu * acc,PyObject * unicode)55 _PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode)
56 {
57     Py_ssize_t nsmall;
58     assert(PyUnicode_Check(unicode));
59 
60     if (PyList_Append(acc->small, unicode))
61         return -1;
62     nsmall = PyList_GET_SIZE(acc->small);
63     /* Each item in a list of unicode objects has an overhead (in 64-bit
64      * builds) of:
65      *   - 8 bytes for the list slot
66      *   - 56 bytes for the header of the unicode object
67      * that is, 64 bytes.  100000 such objects waste more than 6 MiB
68      * compared to a single concatenated string.
69      */
70     if (nsmall < 100000)
71         return 0;
72     return flush_accumulator(acc);
73 }
74 
75 PyObject *
_PyAccu_FinishAsList(_PyAccu * acc)76 _PyAccu_FinishAsList(_PyAccu *acc)
77 {
78     int ret;
79     PyObject *res;
80 
81     ret = flush_accumulator(acc);
82     Py_CLEAR(acc->small);
83     if (ret) {
84         Py_CLEAR(acc->large);
85         return NULL;
86     }
87     res = acc->large;
88     acc->large = NULL;
89     return res;
90 }
91 
92 PyObject *
_PyAccu_Finish(_PyAccu * acc)93 _PyAccu_Finish(_PyAccu *acc)
94 {
95     PyObject *list, *res;
96     if (acc->large == NULL) {
97         list = acc->small;
98         acc->small = NULL;
99     }
100     else {
101         list = _PyAccu_FinishAsList(acc);
102         if (!list)
103             return NULL;
104     }
105     res = join_list_unicode(list);
106     Py_DECREF(list);
107     return res;
108 }
109 
110 void
_PyAccu_Destroy(_PyAccu * acc)111 _PyAccu_Destroy(_PyAccu *acc)
112 {
113     Py_CLEAR(acc->small);
114     Py_CLEAR(acc->large);
115 }
116