1 /*
2  **********************************************************************
3  * Copyright (c) 2006-2007, Google and others.  All Rights Reserved.
4  **********************************************************************
5  * Author: Mark Davis
6  **********************************************************************
7  */
8 package org.unicode.cldr.util;
9 
10 import java.util.Iterator;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.TreeMap;
14 
15 import org.unicode.cldr.util.Dictionary.Matcher;
16 import org.unicode.cldr.util.Dictionary.Matcher.Status;
17 
18 public class DictionaryStringByteConverter extends StringByteConverter {
19     private final Dictionary<String> dictionary;
20     private final Matcher<String> matcher;
21     private final StringByteConverter byteMaker;
22     private final StringBuilder buffer = new StringBuilder();
23     private final int maxBytesPerChar;
24     private Matcher<CharSequence> backMatcher = null;
25 
getDictionary()26     public Dictionary<String> getDictionary() {
27         return dictionary;
28     }
29 
DictionaryStringByteConverter(Dictionary<String> dictionary, StringByteConverter byteMaker)30     public DictionaryStringByteConverter(Dictionary<String> dictionary, StringByteConverter byteMaker) {
31         super();
32         this.dictionary = dictionary;
33         matcher = dictionary.getMatcher();
34         matcher.setText(buffer);
35         this.byteMaker = byteMaker;
36         int mBytesPerChar = 0;
37         for (Iterator<Entry<CharSequence, String>> m = dictionary.getMapping(); m.hasNext();) {
38             Entry<CharSequence, String> entry = m.next();
39             // System.out.println("** " + key + "\t\t" + value);
40             int bytesPerChar = entry.getValue().length() * byteMaker.getMaxBytesPerChar(); // all bytes are generated
41             // from last char
42             if (mBytesPerChar < bytesPerChar) {
43                 mBytesPerChar = bytesPerChar;
44             }
45         }
46         maxBytesPerChar = mBytesPerChar;
47     }
48 
49     @Override
getMaxBytesPerChar()50     public int getMaxBytesPerChar() {
51         return maxBytesPerChar;
52     }
53 
54     @Override
toBytes(char ch, byte[] output, int bytePosition)55     public int toBytes(char ch, byte[] output, int bytePosition) {
56         buffer.append(ch);
57         return toBytes(output, bytePosition, true);
58     }
59 
60     @Override
toBytes(byte[] output, int bytePosition)61     public int toBytes(byte[] output, int bytePosition) {
62         return toBytes(output, bytePosition, false);
63     }
64 
toBytes(byte[] output, int bytePosition, boolean stopOnFinalPartial)65     public int toBytes(byte[] output, int bytePosition, boolean stopOnFinalPartial) {
66         // keep converting until the buffer is empty, or unless we get a PARTIAL at the end
67         while (buffer.length() != 0) {
68             matcher.setText(buffer); // reset the matcher to the start
69             // find last, best status
70             Status status = Status.NONE;
71             int bestEnd = 0;
72             String bestValue = null;
73             main: while (true) {
74                 Status tempStatus = matcher.next();
75                 switch (tempStatus) {
76                 case NONE:
77                     break main;
78                 case PARTIAL:
79                     // if the partial is at the end, then wait for more input
80                     if (stopOnFinalPartial && matcher.getMatchEnd() == buffer.length()) {
81                         if (true) matcher.nextUniquePartial(); // for debugging
82                         return bytePosition;
83                     }
84                     continue; // otherwise ignore
85                 default:
86                     // MATCH
87                     status = tempStatus;
88                     bestEnd = matcher.getMatchEnd();
89                     bestValue = matcher.getMatchValue();
90                     break;
91                 }
92             }
93             // we've now come out, and have either MATCH or not
94             // so replace what we came up with, and continue
95             switch (status) {
96             case MATCH:
97                 bytePosition = byteMaker.toBytes(bestValue, output, bytePosition);
98                 buffer.delete(0, bestEnd);
99                 break;
100             default:
101                 bytePosition = byteMaker.toBytes(buffer.charAt(0), output, bytePosition);
102                 buffer.delete(0, 1);
103                 break;
104             }
105         }
106         return bytePosition;
107     }
108 
109     @Override
fromBytes(byte[] input, int byteStart, int byteLength, Appendable result)110     public Appendable fromBytes(byte[] input, int byteStart, int byteLength, Appendable result) {
111         // first convert from bytes
112         StringBuffer internal = new StringBuffer();
113         byteMaker.fromBytes(input, byteStart, byteLength, internal);
114         // then convert using dictionary
115         if (backMatcher == null) {
116             Map<CharSequence, CharSequence> back = new TreeMap<>(
117                 Dictionary.CHAR_SEQUENCE_COMPARATOR);
118             for (Iterator<Entry<CharSequence, String>> m = dictionary.getMapping(); m.hasNext();) {
119                 Entry<CharSequence, String> entry = m.next();
120                 if (entry.getValue().length() != 0) {
121                     if (!back.containsKey(entry.getValue())) {// may lose info
122                         back.put(entry.getValue(), entry.getKey());
123                     }
124                 }
125             }
126             backMatcher = new StateDictionaryBuilder<CharSequence>().make(back).getMatcher();
127         }
128         backMatcher.setText(internal).convert(result);
129         return result;
130     }
131 }