1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2012-2014, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 * collationkeys.h
9 *
10 * created on: 2012sep02
11 * created by: Markus W. Scherer
12 */
13 
14 #ifndef __COLLATIONKEYS_H__
15 #define __COLLATIONKEYS_H__
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_COLLATION
20 
21 #include "unicode/bytestream.h"
22 #include "unicode/ucol.h"
23 #include "charstr.h"
24 #include "collation.h"
25 
26 U_NAMESPACE_BEGIN
27 
28 class CollationIterator;
29 struct CollationDataReader;
30 struct CollationSettings;
31 
32 class SortKeyByteSink : public ByteSink {
33 public:
SortKeyByteSink(char * dest,int32_t destCapacity)34     SortKeyByteSink(char *dest, int32_t destCapacity)
35             : buffer_(dest), capacity_(destCapacity),
36               appended_(0), ignore_(0) {}
37     virtual ~SortKeyByteSink();
38 
IgnoreBytes(int32_t numIgnore)39     void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; }
40 
41     virtual void Append(const char *bytes, int32_t n);
Append(uint32_t b)42     void Append(uint32_t b) {
43         if (ignore_ > 0) {
44             --ignore_;
45         } else {
46             if (appended_ < capacity_ || Resize(1, appended_)) {
47                 buffer_[appended_] = (char)b;
48             }
49             ++appended_;
50         }
51     }
52     virtual char *GetAppendBuffer(int32_t min_capacity,
53                                   int32_t desired_capacity_hint,
54                                   char *scratch, int32_t scratch_capacity,
55                                   int32_t *result_capacity);
NumberOfBytesAppended()56     int32_t NumberOfBytesAppended() const { return appended_; }
57 
58     /**
59      * @return how many bytes can be appended (including ignored ones)
60      *         without reallocation
61      */
GetRemainingCapacity()62     int32_t GetRemainingCapacity() const {
63         // Either ignore_ or appended_ should be 0.
64         return ignore_ + capacity_ - appended_;
65     }
66 
Overflowed()67     UBool Overflowed() const { return appended_ > capacity_; }
68     /** @return FALSE if memory allocation failed */
IsOk()69     UBool IsOk() const { return buffer_ != NULL; }
70 
71 protected:
72     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0;
73     virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0;
74 
SetNotOk()75     void SetNotOk() {
76         buffer_ = NULL;
77         capacity_ = 0;
78     }
79 
80     char *buffer_;
81     int32_t capacity_;
82     int32_t appended_;
83     int32_t ignore_;
84 
85 private:
86     SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
87     SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
88 };
89 
90 class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ {
91 public:
92     class LevelCallback : public UMemory {
93     public:
94         virtual ~LevelCallback();
95         /**
96          * @param level The next level about to be written to the ByteSink.
97          * @return TRUE if the level is to be written
98          *         (the base class implementation always returns TRUE)
99          */
100         virtual UBool needToWrite(Collation::Level level);
101     };
102 
103     /**
104      * Writes the sort key bytes for minLevel up to the iterator data's strength.
105      * Optionally writes the case level.
106      * Stops writing levels when callback.needToWrite(level) returns FALSE.
107      * Separates levels with the LEVEL_SEPARATOR_BYTE
108      * but does not write a TERMINATOR_BYTE.
109      */
110     static void writeSortKeyUpToQuaternary(CollationIterator &iter,
111                                            const UBool *compressibleBytes,
112                                            const CollationSettings &settings,
113                                            SortKeyByteSink &sink,
114                                            Collation::Level minLevel, LevelCallback &callback,
115                                            UBool preflight, UErrorCode &errorCode);
116 private:
117     friend struct CollationDataReader;
118 
119     CollationKeys();  // no instantiation
120 
121     // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
122     static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE;
123     static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;
124     static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40;
125     static const int32_t SEC_COMMON_MAX_COUNT = 0x21;
126 
127     // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
128     static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1;
129     static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7;
130     static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13;
131     static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;
132 
133     // Case level, upperFirst: Compress up to 13 common weights as 3..15.
134     static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3;
135     static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15;
136     static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;
137 
138     // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
139     static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE;
140     static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;
141     static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;
142     static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61;
143 
144     // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
145     static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE;
146     static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;
147     static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;
148     static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;
149 
150     // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
151     static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80;
152     static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;
153     static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;
154     static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;
155 
156     // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
157     static const uint32_t QUAT_COMMON_LOW = 0x1c;
158     static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;
159     static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;
160     static const int32_t QUAT_COMMON_MAX_COUNT = 0x71;
161     // Primary weights shifted to quaternary level must be encoded with
162     // a lead byte below the common-weight compression range.
163     static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1;  // 0x1b
164 };
165 
166 U_NAMESPACE_END
167 
168 #endif  // !UCONFIG_NO_COLLATION
169 #endif  // __COLLATIONKEYS_H__
170