1 /*
2  * Copyright © 2012  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #ifndef HB_SET_PRIVATE_HH
28 #define HB_SET_PRIVATE_HH
29 
30 #include "hb-private.hh"
31 #include "hb-object-private.hh"
32 
33 
34 /*
35  * The set digests here implement various "filters" that support
36  * "approximate member query".  Conceptually these are like Bloom
37  * Filter and Quotient Filter, however, much smaller, faster, and
38  * designed to fit the requirements of our uses for glyph coverage
39  * queries.
40  *
41  * Our filters are highly accurate if the lookup covers fairly local
42  * set of glyphs, but fully flooded and ineffective if coverage is
43  * all over the place.
44  *
45  * The frozen-set can be used instead of a digest, to trade more
46  * memory for 100% accuracy, but in practice, that doesn't look like
47  * an attractive trade-off.
48  */
49 
50 template <typename mask_t, unsigned int shift>
51 struct hb_set_digest_lowest_bits_t
52 {
53   ASSERT_POD ();
54 
55   static const unsigned int mask_bytes = sizeof (mask_t);
56   static const unsigned int mask_bits = sizeof (mask_t) * 8;
57   static const unsigned int num_bits = 0
58 				     + (mask_bytes >= 1 ? 3 : 0)
59 				     + (mask_bytes >= 2 ? 1 : 0)
60 				     + (mask_bytes >= 4 ? 1 : 0)
61 				     + (mask_bytes >= 8 ? 1 : 0)
62 				     + (mask_bytes >= 16? 1 : 0)
63 				     + 0;
64 
65   ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8);
66   ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8);
67 
inithb_set_digest_lowest_bits_t68   inline void init (void) {
69     mask = 0;
70   }
71 
addhb_set_digest_lowest_bits_t72   inline void add (hb_codepoint_t g) {
73     mask |= mask_for (g);
74   }
75 
add_rangehb_set_digest_lowest_bits_t76   inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
77     if ((b >> shift) - (a >> shift) >= mask_bits - 1)
78       mask = (mask_t) -1;
79     else {
80       mask_t ma = mask_for (a);
81       mask_t mb = mask_for (b);
82       mask |= mb + (mb - ma) - (mb < ma);
83     }
84   }
85 
may_havehb_set_digest_lowest_bits_t86   inline bool may_have (hb_codepoint_t g) const {
87     return !!(mask & mask_for (g));
88   }
89 
90   private:
91 
mask_forhb_set_digest_lowest_bits_t92   static inline mask_t mask_for (hb_codepoint_t g) {
93     return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1));
94   }
95   mask_t mask;
96 };
97 
98 template <typename head_t, typename tail_t>
99 struct hb_set_digest_combiner_t
100 {
101   ASSERT_POD ();
102 
inithb_set_digest_combiner_t103   inline void init (void) {
104     head.init ();
105     tail.init ();
106   }
107 
addhb_set_digest_combiner_t108   inline void add (hb_codepoint_t g) {
109     head.add (g);
110     tail.add (g);
111   }
112 
add_rangehb_set_digest_combiner_t113   inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
114     head.add_range (a, b);
115     tail.add_range (a, b);
116   }
117 
may_havehb_set_digest_combiner_t118   inline bool may_have (hb_codepoint_t g) const {
119     return head.may_have (g) && tail.may_have (g);
120   }
121 
122   private:
123   head_t head;
124   tail_t tail;
125 };
126 
127 
128 /*
129  * hb_set_digest_t
130  *
131  * This is a combination of digests that performs "best".
132  * There is not much science to this: it's a result of intuition
133  * and testing.
134  */
135 typedef hb_set_digest_combiner_t
136 <
137   hb_set_digest_lowest_bits_t<unsigned long, 4>,
138   hb_set_digest_combiner_t
139   <
140     hb_set_digest_lowest_bits_t<unsigned long, 0>,
141     hb_set_digest_lowest_bits_t<unsigned long, 9>
142   >
143 > hb_set_digest_t;
144 
145 
146 
147 /*
148  * hb_set_t
149  */
150 
151 
152 /* TODO Make this faster and memmory efficient. */
153 
154 struct hb_set_t
155 {
156   friend struct hb_frozen_set_t;
157 
158   hb_object_header_t header;
159   ASSERT_POD ();
160   bool in_error;
161 
inithb_set_t162   inline void init (void) {
163     hb_object_init (this);
164     clear ();
165   }
finihb_set_t166   inline void fini (void) {
167   }
clearhb_set_t168   inline void clear (void) {
169     if (unlikely (hb_object_is_inert (this)))
170       return;
171     in_error = false;
172     memset (elts, 0, sizeof elts);
173   }
is_emptyhb_set_t174   inline bool is_empty (void) const {
175     for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++)
176       if (elts[i])
177         return false;
178     return true;
179   }
addhb_set_t180   inline void add (hb_codepoint_t g)
181   {
182     if (unlikely (in_error)) return;
183     if (unlikely (g == INVALID)) return;
184     if (unlikely (g > MAX_G)) return;
185     elt (g) |= mask (g);
186   }
add_rangehb_set_t187   inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
188   {
189     if (unlikely (in_error)) return;
190     /* TODO Speedup */
191     for (unsigned int i = a; i < b + 1; i++)
192       add (i);
193   }
delhb_set_t194   inline void del (hb_codepoint_t g)
195   {
196     if (unlikely (in_error)) return;
197     if (unlikely (g > MAX_G)) return;
198     elt (g) &= ~mask (g);
199   }
del_rangehb_set_t200   inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
201   {
202     if (unlikely (in_error)) return;
203     /* TODO Speedup */
204     for (unsigned int i = a; i < b + 1; i++)
205       del (i);
206   }
hashb_set_t207   inline bool has (hb_codepoint_t g) const
208   {
209     if (unlikely (g > MAX_G)) return false;
210     return !!(elt (g) & mask (g));
211   }
intersectshb_set_t212   inline bool intersects (hb_codepoint_t first,
213 			  hb_codepoint_t last) const
214   {
215     if (unlikely (first > MAX_G)) return false;
216     if (unlikely (last  > MAX_G)) last = MAX_G;
217     unsigned int end = last + 1;
218     for (hb_codepoint_t i = first; i < end; i++)
219       if (has (i))
220         return true;
221     return false;
222   }
is_equalhb_set_t223   inline bool is_equal (const hb_set_t *other) const
224   {
225     for (unsigned int i = 0; i < ELTS; i++)
226       if (elts[i] != other->elts[i])
227         return false;
228     return true;
229   }
sethb_set_t230   inline void set (const hb_set_t *other)
231   {
232     if (unlikely (in_error)) return;
233     for (unsigned int i = 0; i < ELTS; i++)
234       elts[i] = other->elts[i];
235   }
union_hb_set_t236   inline void union_ (const hb_set_t *other)
237   {
238     if (unlikely (in_error)) return;
239     for (unsigned int i = 0; i < ELTS; i++)
240       elts[i] |= other->elts[i];
241   }
intersecthb_set_t242   inline void intersect (const hb_set_t *other)
243   {
244     if (unlikely (in_error)) return;
245     for (unsigned int i = 0; i < ELTS; i++)
246       elts[i] &= other->elts[i];
247   }
subtracthb_set_t248   inline void subtract (const hb_set_t *other)
249   {
250     if (unlikely (in_error)) return;
251     for (unsigned int i = 0; i < ELTS; i++)
252       elts[i] &= ~other->elts[i];
253   }
symmetric_differencehb_set_t254   inline void symmetric_difference (const hb_set_t *other)
255   {
256     if (unlikely (in_error)) return;
257     for (unsigned int i = 0; i < ELTS; i++)
258       elts[i] ^= other->elts[i];
259   }
inverthb_set_t260   inline void invert (void)
261   {
262     if (unlikely (in_error)) return;
263     for (unsigned int i = 0; i < ELTS; i++)
264       elts[i] = ~elts[i];
265   }
nexthb_set_t266   inline bool next (hb_codepoint_t *codepoint) const
267   {
268     if (unlikely (*codepoint == INVALID)) {
269       hb_codepoint_t i = get_min ();
270       if (i != INVALID) {
271         *codepoint = i;
272 	return true;
273       } else {
274 	*codepoint = INVALID;
275         return false;
276       }
277     }
278     for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++)
279       if (has (i)) {
280         *codepoint = i;
281 	return true;
282       }
283     *codepoint = INVALID;
284     return false;
285   }
next_rangehb_set_t286   inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
287   {
288     hb_codepoint_t i;
289 
290     i = *last;
291     if (!next (&i))
292     {
293       *last = *first = INVALID;
294       return false;
295     }
296 
297     *last = *first = i;
298     while (next (&i) && i == *last + 1)
299       (*last)++;
300 
301     return true;
302   }
303 
get_populationhb_set_t304   inline unsigned int get_population (void) const
305   {
306     unsigned int count = 0;
307     for (unsigned int i = 0; i < ELTS; i++)
308       count += _hb_popcount32 (elts[i]);
309     return count;
310   }
get_minhb_set_t311   inline hb_codepoint_t get_min (void) const
312   {
313     for (unsigned int i = 0; i < ELTS; i++)
314       if (elts[i])
315 	for (unsigned int j = 0; j < BITS; j++)
316 	  if (elts[i] & (1u << j))
317 	    return i * BITS + j;
318     return INVALID;
319   }
get_maxhb_set_t320   inline hb_codepoint_t get_max (void) const
321   {
322     for (unsigned int i = ELTS; i; i--)
323       if (elts[i - 1])
324 	for (unsigned int j = BITS; j; j--)
325 	  if (elts[i - 1] & (1u << (j - 1)))
326 	    return (i - 1) * BITS + (j - 1);
327     return INVALID;
328   }
329 
330   typedef uint32_t elt_t;
331   static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */
332   static const unsigned int SHIFT = 5;
333   static const unsigned int BITS = (1 << SHIFT);
334   static const unsigned int MASK = BITS - 1;
335   static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS;
336   static  const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
337 
elthb_set_t338   elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; }
elthb_set_t339   elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
maskhb_set_t340   elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
341 
342   elt_t elts[ELTS]; /* XXX 8kb */
343 
344   ASSERT_STATIC (sizeof (elt_t) * 8 == BITS);
345   ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G);
346 };
347 
348 struct hb_frozen_set_t
349 {
350   static const unsigned int SHIFT = hb_set_t::SHIFT;
351   static const unsigned int BITS = hb_set_t::BITS;
352   static const unsigned int MASK = hb_set_t::MASK;
353   typedef hb_set_t::elt_t elt_t;
354 
inithb_frozen_set_t355   inline void init (const hb_set_t &set)
356   {
357     start = count = 0;
358     elts = NULL;
359 
360     unsigned int max = set.get_max ();
361     if (max == set.INVALID)
362       return;
363     unsigned int min = set.get_min ();
364     const elt_t &min_elt = set.elt (min);
365 
366     start = min & ~MASK;
367     count = max - start + 1;
368     unsigned int num_elts = (count + BITS - 1) / BITS;
369     unsigned int elts_size = num_elts * sizeof (elt_t);
370     elts = (elt_t *) malloc (elts_size);
371     if (unlikely (!elts))
372     {
373       start = count = 0;
374       return;
375     }
376     memcpy (elts, &min_elt, elts_size);
377   }
378 
finihb_frozen_set_t379   inline void fini (void)
380   {
381     if (elts)
382       free (elts);
383   }
384 
hashb_frozen_set_t385   inline bool has (hb_codepoint_t g) const
386   {
387     /* hb_codepoint_t is unsigned. */
388     g -= start;
389     if (unlikely (g > count)) return false;
390     return !!(elt (g) & mask (g));
391   }
392 
elthb_frozen_set_t393   elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
maskhb_frozen_set_t394   elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
395 
396   private:
397   hb_codepoint_t start, count;
398   elt_t *elts;
399 };
400 
401 
402 #endif /* HB_SET_PRIVATE_HH */
403