From 69ae7985ce26709b0acd9c376c751b8fb0ba4528 Mon Sep 17 00:00:00 2001 From: Yorke Lee Date: Thu, 21 Mar 2013 18:22:18 -0700 Subject: Fix name matching bug with non space separators Bug: 8435819 Change-Id: I69c32207d123e5da4ae5421a5fe83ffeee4e5070 --- .../dialer/dialpad/SmartDialNameMatcher.java | 38 ++++++++++++++-------- .../dialer/dialpad/SmartDialNameMatcherTest.java | 24 ++++++++++++-- 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/src/com/android/dialer/dialpad/SmartDialNameMatcher.java b/src/com/android/dialer/dialpad/SmartDialNameMatcher.java index 381e74773..f805abff1 100644 --- a/src/com/android/dialer/dialpad/SmartDialNameMatcher.java +++ b/src/com/android/dialer/dialpad/SmartDialNameMatcher.java @@ -66,6 +66,8 @@ public class SmartDialNameMatcher { * This gives us a way to map characters containing accents/diacritics to their * alphabetic equivalents. The unidecode library can be found at: * http://pypi.python.org/pypi/Unidecode/0.04.1 + * + * Also remaps all upper case latin characters to their lower case equivalents. */ public static char remapAccentedChars(char c) { switch (c) { @@ -471,7 +473,12 @@ public class SmartDialNameMatcher { * This function iterates through each token in the display name, trying to match the query * to the numeric equivalent of the token. * - * A token is defined as a range in the display name delimited by whitespace. For example, + * A token is defined as a range in the display name delimited by characters that have no + * latin alphabet equivalents (e.g. spaces - ' ', periods - ',', underscores - '_' or chinese + * characters - '王'). Transliteration from non-latin characters to latin character will be + * done on a best effort basis - e.g. 'Ü' - 'u'. + * + * For example, * the display name "Phillips Thomas Jr" contains three tokens: "phillips", "thomas", and "jr". * * A match must begin at the start of a token. @@ -520,25 +527,19 @@ public class SmartDialNameMatcher { int seperatorCount = 0; ArrayList partial = new ArrayList(); - // Keep going until we reach the end of displayName while (nameStart < nameLength && queryStart < queryLength) { char ch = displayName.charAt(nameStart); // Strip diacritics from accented characters if any ch = remapAccentedChars(ch); - if ((ch >= 'A') && (ch <= 'Z')) { - // Simply change the ascii code to the lower case version instead of using - // toLowerCase for efficiency - ch += 32; - } - if ((ch >= 'a') && (ch <= 'z')) { + if (isLowercaseLatin(ch)) { // a starts at index 0 if (LATIN_LETTERS_TO_DIGITS[ch - 'a'] != query.charAt(queryStart)) { // we did not find a match queryStart = 0; seperatorCount = 0; while (nameStart < nameLength && - !Character.isWhitespace(displayName.charAt(nameStart))) { + isLowercaseLatin(remapAccentedChars(displayName.charAt(nameStart)))) { nameStart++; } nameStart++; @@ -555,12 +556,14 @@ public class SmartDialNameMatcher { // we matched the first character. // branch off and see if we can find another match with the remaining // characters in the query string and the remaining tokens - //find the next space in the query string - int j = nameStart; - while (j < nameLength && displayName.charAt(j) != ' ') { - j++; + // find the next separator in the query string + int j; + for (j = nameStart; j < nameLength; j++) { + if (!isLowercaseLatin(remapAccentedChars(displayName.charAt(j)))) { + break; + } } - // this means there is at least one character left after the space + // this means there is at least one character left after the separator if (j < nameLength - 1) { final String remainder = displayName.substring(j + 1); final ArrayList partialTemp = @@ -609,6 +612,13 @@ public class SmartDialNameMatcher { return false; } + /* + * Returns true if the character is a lowercase latin character(i.e. non-separator). + */ + private boolean isLowercaseLatin(char ch) { + return ch >= 'a' && ch <= 'z'; + } + public boolean matches(String displayName) { mMatchPositions.clear(); return matchesCombination(displayName, mQuery, mMatchPositions); diff --git a/tests/src/com/android/dialer/dialpad/SmartDialNameMatcherTest.java b/tests/src/com/android/dialer/dialpad/SmartDialNameMatcherTest.java index 8b7ee03f6..08939b48e 100644 --- a/tests/src/com/android/dialer/dialpad/SmartDialNameMatcherTest.java +++ b/tests/src/com/android/dialer/dialpad/SmartDialNameMatcherTest.java @@ -80,8 +80,19 @@ public class SmartDialNameMatcherTest extends TestCase { checkMatches("William John Smith", "5764", true, 15, 16, 22, 25); } - // TODO: Do we want to make these pass anymore? - @Suppress + public void testMatches_InitialWithSeparator() { + // wjs matches (W)illiam (J)ohn (S)mith + checkMatches("William John-Smith", "957", true, 0, 1, 8, 9, 13, 14); + // wjsmit matches (W)illiam (J)ohn-(OShe)a + checkMatches("William John-O'Shea", "956743", true, 0, 1, 8, 9, 13, 18); + // wjohn matches (W)illiam-(John) Smith + checkMatches("William-John Smith", "95646", true, 0, 1, 8, 12); + // jsmi matches William (J)ohn-(Smi)th + checkMatches("William John-Smith", "5764", true, 8, 9, 13, 16); + // make sure multiple spaces don't mess things up + checkMatches("William John---Smith", "5764", true, 15, 16, 22, 25); + } + public void testMatches_repeatedSeparators() { // Simple match for single token checkMatches("John,,,,,Doe", "5646", true, 0, 4); @@ -91,6 +102,15 @@ public class SmartDialNameMatcherTest extends TestCase { checkMatches("John,,,,,Doe", "363", true, 9, 12); } + public void testMatches_LatinMix() { + // Latin + Chinese characters + checkMatches("Lee王力Wang宏", "59264", true, 0, 1, 5, 9); + // Latin + Japanese characters + checkMatches("千Abcd佳智Efgh佳IJKL", "222333444555", true, 1, 16); + // Latin + Arabic characters + checkMatches("Peterعبد الرحمنJames", "752637", true, 0, 1, 15, 20); + } + public void testMatches_umlaut() { checkMatches("ÄÖÜäöü", "268268", true, 0, 6); } -- cgit v1.2.3