/* * Copyright (C) 2009 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License */ package com.android.providers.contacts; import android.content.ContentValues; import android.provider.ContactsContract.CommonDataKinds.StructuredName; import android.provider.ContactsContract.FullNameStyle; import android.provider.ContactsContract.PhoneticNameStyle; import android.text.TextUtils; import com.android.providers.contacts.util.NeededForTesting; import java.lang.Character.UnicodeBlock; import java.util.HashSet; import java.util.Locale; import java.util.StringTokenizer; /** * The purpose of this class is to split a full name into given names and last * name. The logic only supports having a single last name. If the full name has * multiple last names the output will be incorrect. *
* Core algorithm: *
* [prefix] given name(s) [[middle name] family name] [, suffix] * [prefix] family name, given name [middle name] [,suffix] **/ private void splitWesternName(Name name, String fullName) { NameTokenizer tokens = new NameTokenizer(fullName); parsePrefix(name, tokens); // If the name consists of just one or two tokens, treat them as first/last name, // not as suffix. Example: John Ma; Ma is last name, not "M.A.". if (tokens.mEndPointer > 2) { parseSuffix(name, tokens); } if (name.prefix == null && tokens.mEndPointer - tokens.mStartPointer == 1) { name.givenNames = tokens.mTokens[tokens.mStartPointer]; } else { parseLastName(name, tokens); parseMiddleName(name, tokens); parseGivenNames(name, tokens); } } /** * Splits a full name composed according to the Chinese tradition: *
* [family name [middle name]] given name **/ private void splitChineseName(Name name, String fullName) { StringTokenizer tokenizer = new StringTokenizer(fullName); while (tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); if (name.givenNames == null) { name.givenNames = token; } else if (name.familyName == null) { name.familyName = name.givenNames; name.givenNames = token; } else if (name.middleName == null) { name.middleName = name.givenNames; name.givenNames = token; } else { name.middleName = name.middleName + name.givenNames; name.givenNames = token; } } // If a single word parse that word up. if (name.givenNames != null && name.familyName == null && name.middleName == null) { int length = fullName.length(); if (length == 2) { name.familyName = fullName.substring(0, 1); name.givenNames = fullName.substring(1); } else if (length == 3) { name.familyName = fullName.substring(0, 1); name.middleName = fullName.substring(1, 2); name.givenNames = fullName.substring(2); } else if (length == 4) { name.familyName = fullName.substring(0, 2); name.middleName = fullName.substring(2, 3); name.givenNames = fullName.substring(3); } } } /** * Splits a full name composed according to the Japanese tradition: *
* [family name] given name(s) **/ private void splitJapaneseName(Name name, String fullName) { StringTokenizer tokenizer = new StringTokenizer(fullName); while (tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); if (name.givenNames == null) { name.givenNames = token; } else if (name.familyName == null) { name.familyName = name.givenNames; name.givenNames = token; } else { name.givenNames += " " + token; } } } /** * Splits a full name composed according to the Korean tradition: *
* [family name] given name(s) **/ private void splitKoreanName(Name name, String fullName) { StringTokenizer tokenizer = new StringTokenizer(fullName); if (tokenizer.countTokens() > 1) { // Each name can be identified by separators. while (tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); if (name.givenNames == null) { name.givenNames = token; } else if (name.familyName == null) { name.familyName = name.givenNames; name.givenNames = token; } else { name.givenNames += " " + token; } } } else { // There is no separator. Try to guess family name. // The length of most family names is 1. int familyNameLength = 1; // Compare with 2-length family names. for (String twoLengthFamilyName : KOREAN_TWO_CHARCTER_FAMILY_NAMES) { if (fullName.startsWith(twoLengthFamilyName)) { familyNameLength = 2; break; } } name.familyName = fullName.substring(0, familyNameLength); if (fullName.length() > familyNameLength) { name.givenNames = fullName.substring(familyNameLength); } } } /** * Concatenates components of a name according to the rules dictated by the name style. * * @param givenNameFirst is ignored for CJK display name styles */ public String join(Name name, boolean givenNameFirst, boolean includePrefix) { String prefix = includePrefix ? name.prefix : null; switch (name.fullNameStyle) { case FullNameStyle.CJK: case FullNameStyle.CHINESE: case FullNameStyle.KOREAN: return join(prefix, name.familyName, name.middleName, name.givenNames, name.suffix, false, false, false); case FullNameStyle.JAPANESE: return join(prefix, name.familyName, name.middleName, name.givenNames, name.suffix, true, false, false); default: if (givenNameFirst) { return join(prefix, name.givenNames, name.middleName, name.familyName, name.suffix, true, false, true); } else { return join(prefix, name.familyName, name.givenNames, name.middleName, name.suffix, true, true, true); } } } /** * Concatenates components of the phonetic name following the CJK tradition: * family name + middle name + given name(s). */ public String joinPhoneticName(Name name) { return join(null, name.phoneticFamilyName, name.phoneticMiddleName, name.phoneticGivenName, null, true, false, false); } /** * Concatenates parts of a full name inserting spaces and commas as specified. */ private String join(String prefix, String part1, String part2, String part3, String suffix, boolean useSpace, boolean useCommaAfterPart1, boolean useCommaAfterPart3) { prefix = prefix == null ? null: prefix.trim(); part1 = part1 == null ? null: part1.trim(); part2 = part2 == null ? null: part2.trim(); part3 = part3 == null ? null: part3.trim(); suffix = suffix == null ? null: suffix.trim(); boolean hasPrefix = !TextUtils.isEmpty(prefix); boolean hasPart1 = !TextUtils.isEmpty(part1); boolean hasPart2 = !TextUtils.isEmpty(part2); boolean hasPart3 = !TextUtils.isEmpty(part3); boolean hasSuffix = !TextUtils.isEmpty(suffix); boolean isSingleWord = true; String singleWord = null; if (hasPrefix) { singleWord = prefix; } if (hasPart1) { if (singleWord != null) { isSingleWord = false; } else { singleWord = part1; } } if (hasPart2) { if (singleWord != null) { isSingleWord = false; } else { singleWord = part2; } } if (hasPart3) { if (singleWord != null) { isSingleWord = false; } else { singleWord = part3; } } if (hasSuffix) { if (singleWord != null) { isSingleWord = false; } else { singleWord = normalizedSuffix(suffix); } } if (isSingleWord) { return singleWord; } StringBuilder sb = new StringBuilder(); if (hasPrefix) { sb.append(prefix); } if (hasPart1) { if (hasPrefix) { sb.append(' '); } sb.append(part1); } if (hasPart2) { if (hasPrefix || hasPart1) { if (useCommaAfterPart1) { sb.append(','); } if (useSpace) { sb.append(' '); } } sb.append(part2); } if (hasPart3) { if (hasPrefix || hasPart1 || hasPart2) { if (useSpace) { sb.append(' '); } } sb.append(part3); } if (hasSuffix) { if (hasPrefix || hasPart1 || hasPart2 || hasPart3) { if (useCommaAfterPart3) { sb.append(','); } if (useSpace) { sb.append(' '); } } sb.append(normalizedSuffix(suffix)); } return sb.toString(); } /** * Puts a dot after the supplied suffix if that is the accepted form of the suffix, * e.g. "Jr." and "Sr.", but not "I", "II" and "III". */ private String normalizedSuffix(String suffix) { int length = suffix.length(); if (length == 0 || suffix.charAt(length - 1) == '.') { return suffix; } String withDot = suffix + '.'; if (mSuffixesSet.contains(withDot.toUpperCase())) { return withDot; } else { return suffix; } } /** * If the supplied name style is undefined, returns a default based on the language, * otherwise returns the supplied name style itself. * * @param nameStyle See {@link FullNameStyle}. */ public int getAdjustedFullNameStyle(int nameStyle) { if (nameStyle == FullNameStyle.UNDEFINED) { if (JAPANESE_LANGUAGE.equals(mLanguage)) { return FullNameStyle.JAPANESE; } else if (KOREAN_LANGUAGE.equals(mLanguage)) { return FullNameStyle.KOREAN; } else if (CHINESE_LANGUAGE.equals(mLanguage)) { return FullNameStyle.CHINESE; } else { return FullNameStyle.WESTERN; } } else if (nameStyle == FullNameStyle.CJK) { if (JAPANESE_LANGUAGE.equals(mLanguage)) { return FullNameStyle.JAPANESE; } else if (KOREAN_LANGUAGE.equals(mLanguage)) { return FullNameStyle.KOREAN; } else { return FullNameStyle.CHINESE; } } return nameStyle; } /** * Parses the first word from the name if it is a prefix. */ private void parsePrefix(Name name, NameTokenizer tokens) { if (tokens.mStartPointer == tokens.mEndPointer) { return; } String firstToken = tokens.mTokens[tokens.mStartPointer]; if (mPrefixesSet.contains(firstToken.toUpperCase())) { if (tokens.hasDot(tokens.mStartPointer)) { firstToken += '.'; } name.prefix = firstToken; tokens.mStartPointer++; } } /** * Parses the last word(s) from the name if it is a suffix. */ private void parseSuffix(Name name, NameTokenizer tokens) { if (tokens.mStartPointer == tokens.mEndPointer) { return; } String lastToken = tokens.mTokens[tokens.mEndPointer - 1]; // Take care of an explicit comma-separated suffix if (tokens.mEndPointer - tokens.mStartPointer > 2 && tokens.hasComma(tokens.mEndPointer - 2)) { if (tokens.hasDot(tokens.mEndPointer - 1)) { lastToken += '.'; } name.suffix = lastToken; tokens.mEndPointer--; return; } if (lastToken.length() > mMaxSuffixLength) { return; } String normalized = lastToken.toUpperCase(); if (mSuffixesSet.contains(normalized)) { name.suffix = lastToken; tokens.mEndPointer--; return; } if (tokens.hasDot(tokens.mEndPointer - 1)) { lastToken += '.'; } normalized += "."; // Take care of suffixes like M.D. and D.D.S. int pos = tokens.mEndPointer - 1; while (normalized.length() <= mMaxSuffixLength) { if (mSuffixesSet.contains(normalized)) { name.suffix = lastToken; tokens.mEndPointer = pos; return; } if (pos == tokens.mStartPointer) { break; } pos--; if (tokens.hasDot(pos)) { lastToken = tokens.mTokens[pos] + "." + lastToken; } else { lastToken = tokens.mTokens[pos] + " " + lastToken; } normalized = tokens.mTokens[pos].toUpperCase() + "." + normalized; } } private void parseLastName(Name name, NameTokenizer tokens) { if (tokens.mStartPointer == tokens.mEndPointer) { return; } // If the first word is followed by a comma, assume that it's the family name if (tokens.hasComma(tokens.mStartPointer)) { name.familyName = tokens.mTokens[tokens.mStartPointer]; tokens.mStartPointer++; return; } // If the second word is followed by a comma and the first word // is a last name prefix as in "de Sade" and "von Cliburn", treat // the first two words as the family name. if (tokens.mStartPointer + 1 < tokens.mEndPointer && tokens.hasComma(tokens.mStartPointer + 1) && isFamilyNamePrefix(tokens.mTokens[tokens.mStartPointer])) { String familyNamePrefix = tokens.mTokens[tokens.mStartPointer]; if (tokens.hasDot(tokens.mStartPointer)) { familyNamePrefix += '.'; } name.familyName = familyNamePrefix + " " + tokens.mTokens[tokens.mStartPointer + 1]; tokens.mStartPointer += 2; return; } // Finally, assume that the last word is the last name name.familyName = tokens.mTokens[tokens.mEndPointer - 1]; tokens.mEndPointer--; // Take care of last names like "de Sade" and "von Cliburn" if ((tokens.mEndPointer - tokens.mStartPointer) > 0) { String lastNamePrefix = tokens.mTokens[tokens.mEndPointer - 1]; if (isFamilyNamePrefix(lastNamePrefix)) { if (tokens.hasDot(tokens.mEndPointer - 1)) { lastNamePrefix += '.'; } name.familyName = lastNamePrefix + " " + name.familyName; tokens.mEndPointer--; } } } /** * Returns true if the supplied word is an accepted last name prefix, e.g. "von", "de" */ private boolean isFamilyNamePrefix(String word) { final String normalized = word.toUpperCase(); return mLastNamePrefixesSet.contains(normalized) || mLastNamePrefixesSet.contains(normalized + "."); } private void parseMiddleName(Name name, NameTokenizer tokens) { if (tokens.mStartPointer == tokens.mEndPointer) { return; } if ((tokens.mEndPointer - tokens.mStartPointer) > 1) { if ((tokens.mEndPointer - tokens.mStartPointer) == 2 || !mConjuctions.contains(tokens.mTokens[tokens.mEndPointer - 2]. toUpperCase())) { name.middleName = tokens.mTokens[tokens.mEndPointer - 1]; if (tokens.hasDot(tokens.mEndPointer - 1)) { name.middleName += '.'; } tokens.mEndPointer--; } } } private void parseGivenNames(Name name, NameTokenizer tokens) { if (tokens.mStartPointer == tokens.mEndPointer) { return; } if ((tokens.mEndPointer - tokens.mStartPointer) == 1) { name.givenNames = tokens.mTokens[tokens.mStartPointer]; } else { StringBuilder sb = new StringBuilder(); for (int i = tokens.mStartPointer; i < tokens.mEndPointer; i++) { if (i != tokens.mStartPointer) { sb.append(' '); } sb.append(tokens.mTokens[i]); if (tokens.hasDot(i)) { sb.append('.'); } } name.givenNames = sb.toString(); } } /** * Makes the best guess at the expected full name style based on the character set * used in the supplied name. If the phonetic name is also supplied, tries to * differentiate between Chinese, Japanese and Korean based on the alphabet used * for the phonetic name. */ public void guessNameStyle(Name name) { guessFullNameStyle(name); guessPhoneticNameStyle(name); name.fullNameStyle = getAdjustedNameStyleBasedOnPhoneticNameStyle(name.fullNameStyle, name.phoneticNameStyle); } /** * Updates the display name style according to the phonetic name style if we * were unsure about display name style based on the name components, but * phonetic name makes it more definitive. */ public int getAdjustedNameStyleBasedOnPhoneticNameStyle(int nameStyle, int phoneticNameStyle) { if (phoneticNameStyle != PhoneticNameStyle.UNDEFINED) { if (nameStyle == FullNameStyle.UNDEFINED || nameStyle == FullNameStyle.CJK) { if (phoneticNameStyle == PhoneticNameStyle.JAPANESE) { return FullNameStyle.JAPANESE; } else if (phoneticNameStyle == PhoneticNameStyle.KOREAN) { return FullNameStyle.KOREAN; } if (nameStyle == FullNameStyle.CJK && phoneticNameStyle == PhoneticNameStyle.PINYIN) { return FullNameStyle.CHINESE; } } } return nameStyle; } /** * Makes the best guess at the expected full name style based on the character set * used in the supplied name. */ private void guessFullNameStyle(NameSplitter.Name name) { if (name.fullNameStyle != FullNameStyle.UNDEFINED) { return; } int bestGuess = guessFullNameStyle(name.givenNames); // A mix of Hanzi and latin chars are common in China, so we have to go through all names // if the name is not JANPANESE or KOREAN. if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK && bestGuess != FullNameStyle.WESTERN) { name.fullNameStyle = bestGuess; return; } int guess = guessFullNameStyle(name.familyName); if (guess != FullNameStyle.UNDEFINED) { if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) { name.fullNameStyle = guess; return; } bestGuess = guess; } guess = guessFullNameStyle(name.middleName); if (guess != FullNameStyle.UNDEFINED) { if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) { name.fullNameStyle = guess; return; } bestGuess = guess; } guess = guessFullNameStyle(name.prefix); if (guess != FullNameStyle.UNDEFINED) { if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) { name.fullNameStyle = guess; return; } bestGuess = guess; } guess = guessFullNameStyle(name.suffix); if (guess != FullNameStyle.UNDEFINED) { if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) { name.fullNameStyle = guess; return; } bestGuess = guess; } name.fullNameStyle = bestGuess; } public int guessFullNameStyle(String name) { if (name == null) { return FullNameStyle.UNDEFINED; } int nameStyle = FullNameStyle.UNDEFINED; int length = name.length(); int offset = 0; while (offset < length) { int codePoint = Character.codePointAt(name, offset); if (Character.isLetter(codePoint)) { UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint); if (!isLatinUnicodeBlock(unicodeBlock)) { if (isCJKUnicodeBlock(unicodeBlock)) { // We don't know if this is Chinese, Japanese or Korean - // trying to figure out by looking at other characters in the name return guessCJKNameStyle(name, offset + Character.charCount(codePoint)); } if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) { return FullNameStyle.JAPANESE; } if (isKoreanUnicodeBlock(unicodeBlock)) { return FullNameStyle.KOREAN; } } nameStyle = FullNameStyle.WESTERN; } offset += Character.charCount(codePoint); } return nameStyle; } private int guessCJKNameStyle(String name, int offset) { int length = name.length(); while (offset < length) { int codePoint = Character.codePointAt(name, offset); if (Character.isLetter(codePoint)) { UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint); if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) { return FullNameStyle.JAPANESE; } if (isKoreanUnicodeBlock(unicodeBlock)) { return FullNameStyle.KOREAN; } } offset += Character.charCount(codePoint); } return FullNameStyle.CJK; } private void guessPhoneticNameStyle(NameSplitter.Name name) { if (name.phoneticNameStyle != PhoneticNameStyle.UNDEFINED) { return; } int bestGuess = guessPhoneticNameStyle(name.phoneticFamilyName); if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK) { name.phoneticNameStyle = bestGuess; return; } int guess = guessPhoneticNameStyle(name.phoneticGivenName); if (guess != FullNameStyle.UNDEFINED) { if (guess != FullNameStyle.CJK) { name.phoneticNameStyle = guess; return; } bestGuess = guess; } guess = guessPhoneticNameStyle(name.phoneticMiddleName); if (guess != FullNameStyle.UNDEFINED) { if (guess != FullNameStyle.CJK) { name.phoneticNameStyle = guess; return; } bestGuess = guess; } } public int guessPhoneticNameStyle(String name) { if (name == null) { return PhoneticNameStyle.UNDEFINED; } int nameStyle = PhoneticNameStyle.UNDEFINED; int length = name.length(); int offset = 0; while (offset < length) { int codePoint = Character.codePointAt(name, offset); if (Character.isLetter(codePoint)) { UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint); if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) { return PhoneticNameStyle.JAPANESE; } if (isKoreanUnicodeBlock(unicodeBlock)) { return PhoneticNameStyle.KOREAN; } if (isLatinUnicodeBlock(unicodeBlock)) { return PhoneticNameStyle.PINYIN; } } offset += Character.charCount(codePoint); } return nameStyle; } private static boolean isLatinUnicodeBlock(UnicodeBlock unicodeBlock) { return unicodeBlock == UnicodeBlock.BASIC_LATIN || unicodeBlock == UnicodeBlock.LATIN_1_SUPPLEMENT || unicodeBlock == UnicodeBlock.LATIN_EXTENDED_A || unicodeBlock == UnicodeBlock.LATIN_EXTENDED_B || unicodeBlock == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL; } private static boolean isCJKUnicodeBlock(UnicodeBlock block) { return block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B || block == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION || block == UnicodeBlock.CJK_RADICALS_SUPPLEMENT || block == UnicodeBlock.CJK_COMPATIBILITY || block == UnicodeBlock.CJK_COMPATIBILITY_FORMS || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT; } private static boolean isKoreanUnicodeBlock(UnicodeBlock unicodeBlock) { return unicodeBlock == UnicodeBlock.HANGUL_SYLLABLES || unicodeBlock == UnicodeBlock.HANGUL_JAMO || unicodeBlock == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO; } private static boolean isJapanesePhoneticUnicodeBlock(UnicodeBlock unicodeBlock) { return unicodeBlock == UnicodeBlock.KATAKANA || unicodeBlock == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS || unicodeBlock == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS || unicodeBlock == UnicodeBlock.HIRAGANA; } }