1 /* 2 * Copyright 2001-2004 The Apache Software Foundation. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package org.apache.commons.codec.language; 18 19 import org.apache.commons.codec.EncoderException; 20 import org.apache.commons.codec.StringEncoder; 21 22 /** 23 * Encodes a string into a Refined Soundex value. A refined soundex code is 24 * optimized for spell checking words. Soundex method originally developed by 25 * <CITE>Margaret Odell</CITE> and <CITE>Robert Russell</CITE>. 26 * 27 * @author Apache Software Foundation 28 * @version $Id: RefinedSoundex.java,v 1.21 2004/06/05 18:32:04 ggregory Exp $ 29 * 30 * @deprecated Please use {@link java.net.URL#openConnection} instead. 31 * Please visit <a href="http://android-developers.blogspot.com/2011/09/androids-http-clients.html">this webpage</a> 32 * for further details. 33 */ 34 @Deprecated 35 public class RefinedSoundex implements StringEncoder { 36 37 /** 38 * This static variable contains an instance of the RefinedSoundex using 39 * the US_ENGLISH mapping. 40 */ 41 public static final RefinedSoundex US_ENGLISH = new RefinedSoundex(); 42 43 /** 44 * RefinedSoundex is *refined* for a number of reasons one being that the 45 * mappings have been altered. This implementation contains default 46 * mappings for US English. 47 */ 48 public static final char[] US_ENGLISH_MAPPING = "01360240043788015936020505".toCharArray(); 49 50 /** 51 * Every letter of the alphabet is "mapped" to a numerical value. This char 52 * array holds the values to which each letter is mapped. This 53 * implementation contains a default map for US_ENGLISH 54 */ 55 private char[] soundexMapping; 56 57 /** 58 * Creates an instance of the RefinedSoundex object using the default US 59 * English mapping. 60 */ RefinedSoundex()61 public RefinedSoundex() { 62 this(US_ENGLISH_MAPPING); 63 } 64 65 /** 66 * Creates a refined soundex instance using a custom mapping. This 67 * constructor can be used to customize the mapping, and/or possibly 68 * provide an internationalized mapping for a non-Western character set. 69 * 70 * @param mapping 71 * Mapping array to use when finding the corresponding code for 72 * a given character 73 */ RefinedSoundex(char[] mapping)74 public RefinedSoundex(char[] mapping) { 75 this.soundexMapping = mapping; 76 } 77 78 // BEGIN android-note 79 // Removed @see reference to SoundexUtils below, since the class isn't 80 // public. 81 // END android-note 82 /** 83 * Returns the number of characters in the two encoded Strings that are the 84 * same. This return value ranges from 0 to the length of the shortest 85 * encoded String: 0 indicates little or no similarity, and 4 out of 4 (for 86 * example) indicates strong similarity or identical values. For refined 87 * Soundex, the return value can be greater than 4. 88 * 89 * @param s1 90 * A String that will be encoded and compared. 91 * @param s2 92 * A String that will be encoded and compared. 93 * @return The number of characters in the two encoded Strings that are the 94 * same from 0 to to the length of the shortest encoded String. 95 * 96 * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> 97 * MS T-SQL DIFFERENCE</a> 98 * 99 * @throws EncoderException 100 * if an error occurs encoding one of the strings 101 * @since 1.3 102 */ difference(String s1, String s2)103 public int difference(String s1, String s2) throws EncoderException { 104 return SoundexUtils.difference(this, s1, s2); 105 } 106 107 /** 108 * Encodes an Object using the refined soundex algorithm. This method is 109 * provided in order to satisfy the requirements of the Encoder interface, 110 * and will throw an EncoderException if the supplied object is not of type 111 * java.lang.String. 112 * 113 * @param pObject 114 * Object to encode 115 * @return An object (or type java.lang.String) containing the refined 116 * soundex code which corresponds to the String supplied. 117 * @throws EncoderException 118 * if the parameter supplied is not of type java.lang.String 119 */ encode(Object pObject)120 public Object encode(Object pObject) throws EncoderException { 121 if (!(pObject instanceof java.lang.String)) { 122 throw new EncoderException("Parameter supplied to RefinedSoundex encode is not of type java.lang.String"); 123 } 124 return soundex((String) pObject); 125 } 126 127 /** 128 * Encodes a String using the refined soundex algorithm. 129 * 130 * @param pString 131 * A String object to encode 132 * @return A Soundex code corresponding to the String supplied 133 */ encode(String pString)134 public String encode(String pString) { 135 return soundex(pString); 136 } 137 138 /** 139 * Returns the mapping code for a given character. The mapping codes are 140 * maintained in an internal char array named soundexMapping, and the 141 * default values of these mappings are US English. 142 * 143 * @param c 144 * char to get mapping for 145 * @return A character (really a numeral) to return for the given char 146 */ getMappingCode(char c)147 char getMappingCode(char c) { 148 if (!Character.isLetter(c)) { 149 return 0; 150 } 151 return this.soundexMapping[Character.toUpperCase(c) - 'A']; 152 } 153 154 /** 155 * Retreives the Refined Soundex code for a given String object. 156 * 157 * @param str 158 * String to encode using the Refined Soundex algorithm 159 * @return A soundex code for the String supplied 160 */ soundex(String str)161 public String soundex(String str) { 162 if (str == null) { 163 return null; 164 } 165 str = SoundexUtils.clean(str); 166 if (str.length() == 0) { 167 return str; 168 } 169 170 StringBuffer sBuf = new StringBuffer(); 171 sBuf.append(str.charAt(0)); 172 173 char last, current; 174 last = '*'; 175 176 for (int i = 0; i < str.length(); i++) { 177 178 current = getMappingCode(str.charAt(i)); 179 if (current == last) { 180 continue; 181 } else if (current != 0) { 182 sBuf.append(current); 183 } 184 185 last = current; 186 187 } 188 189 return sBuf.toString(); 190 } 191 } 192