summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorke Lee <yorkelee@google.com>2013-03-21 18:22:18 -0700
committerYorke Lee <yorkelee@google.com>2013-04-08 11:56:03 -0700
commit69ae7985ce26709b0acd9c376c751b8fb0ba4528 (patch)
tree7f8e4a06e6bd13fd386d104c63d089aeaef64c68
parentf5bc56a398b20eed03df82f5dacd86aebc35d954 (diff)
Fix name matching bug with non space separators
Bug: 8435819 Change-Id: I69c32207d123e5da4ae5421a5fe83ffeee4e5070
-rw-r--r--src/com/android/dialer/dialpad/SmartDialNameMatcher.java38
-rw-r--r--tests/src/com/android/dialer/dialpad/SmartDialNameMatcherTest.java24
2 files changed, 46 insertions, 16 deletions
diff --git a/src/com/android/dialer/dialpad/SmartDialNameMatcher.java b/src/com/android/dialer/dialpad/SmartDialNameMatcher.java
index 381e74773..f805abff1 100644
--- a/src/com/android/dialer/dialpad/SmartDialNameMatcher.java
+++ b/src/com/android/dialer/dialpad/SmartDialNameMatcher.java
@@ -66,6 +66,8 @@ public class SmartDialNameMatcher {
* This gives us a way to map characters containing accents/diacritics to their
* alphabetic equivalents. The unidecode library can be found at:
* http://pypi.python.org/pypi/Unidecode/0.04.1
+ *
+ * Also remaps all upper case latin characters to their lower case equivalents.
*/
public static char remapAccentedChars(char c) {
switch (c) {
@@ -471,7 +473,12 @@ public class SmartDialNameMatcher {
* This function iterates through each token in the display name, trying to match the query
* to the numeric equivalent of the token.
*
- * A token is defined as a range in the display name delimited by whitespace. For example,
+ * A token is defined as a range in the display name delimited by characters that have no
+ * latin alphabet equivalents (e.g. spaces - ' ', periods - ',', underscores - '_' or chinese
+ * characters - '王'). Transliteration from non-latin characters to latin character will be
+ * done on a best effort basis - e.g. 'Ü' - 'u'.
+ *
+ * For example,
* the display name "Phillips Thomas Jr" contains three tokens: "phillips", "thomas", and "jr".
*
* A match must begin at the start of a token.
@@ -520,25 +527,19 @@ public class SmartDialNameMatcher {
int seperatorCount = 0;
ArrayList<SmartDialMatchPosition> partial = new ArrayList<SmartDialMatchPosition>();
-
// Keep going until we reach the end of displayName
while (nameStart < nameLength && queryStart < queryLength) {
char ch = displayName.charAt(nameStart);
// Strip diacritics from accented characters if any
ch = remapAccentedChars(ch);
- if ((ch >= 'A') && (ch <= 'Z')) {
- // Simply change the ascii code to the lower case version instead of using
- // toLowerCase for efficiency
- ch += 32;
- }
- if ((ch >= 'a') && (ch <= 'z')) {
+ if (isLowercaseLatin(ch)) {
// a starts at index 0
if (LATIN_LETTERS_TO_DIGITS[ch - 'a'] != query.charAt(queryStart)) {
// we did not find a match
queryStart = 0;
seperatorCount = 0;
while (nameStart < nameLength &&
- !Character.isWhitespace(displayName.charAt(nameStart))) {
+ isLowercaseLatin(remapAccentedChars(displayName.charAt(nameStart)))) {
nameStart++;
}
nameStart++;
@@ -555,12 +556,14 @@ public class SmartDialNameMatcher {
// we matched the first character.
// branch off and see if we can find another match with the remaining
// characters in the query string and the remaining tokens
- //find the next space in the query string
- int j = nameStart;
- while (j < nameLength && displayName.charAt(j) != ' ') {
- j++;
+ // find the next separator in the query string
+ int j;
+ for (j = nameStart; j < nameLength; j++) {
+ if (!isLowercaseLatin(remapAccentedChars(displayName.charAt(j)))) {
+ break;
+ }
}
- // this means there is at least one character left after the space
+ // this means there is at least one character left after the separator
if (j < nameLength - 1) {
final String remainder = displayName.substring(j + 1);
final ArrayList<SmartDialMatchPosition> partialTemp =
@@ -609,6 +612,13 @@ public class SmartDialNameMatcher {
return false;
}
+ /*
+ * Returns true if the character is a lowercase latin character(i.e. non-separator).
+ */
+ private boolean isLowercaseLatin(char ch) {
+ return ch >= 'a' && ch <= 'z';
+ }
+
public boolean matches(String displayName) {
mMatchPositions.clear();
return matchesCombination(displayName, mQuery, mMatchPositions);
diff --git a/tests/src/com/android/dialer/dialpad/SmartDialNameMatcherTest.java b/tests/src/com/android/dialer/dialpad/SmartDialNameMatcherTest.java
index 8b7ee03f6..08939b48e 100644
--- a/tests/src/com/android/dialer/dialpad/SmartDialNameMatcherTest.java
+++ b/tests/src/com/android/dialer/dialpad/SmartDialNameMatcherTest.java
@@ -80,8 +80,19 @@ public class SmartDialNameMatcherTest extends TestCase {
checkMatches("William John Smith", "5764", true, 15, 16, 22, 25);
}
- // TODO: Do we want to make these pass anymore?
- @Suppress
+ public void testMatches_InitialWithSeparator() {
+ // wjs matches (W)illiam (J)ohn (S)mith
+ checkMatches("William John-Smith", "957", true, 0, 1, 8, 9, 13, 14);
+ // wjsmit matches (W)illiam (J)ohn-(OShe)a
+ checkMatches("William John-O'Shea", "956743", true, 0, 1, 8, 9, 13, 18);
+ // wjohn matches (W)illiam-(John) Smith
+ checkMatches("William-John Smith", "95646", true, 0, 1, 8, 12);
+ // jsmi matches William (J)ohn-(Smi)th
+ checkMatches("William John-Smith", "5764", true, 8, 9, 13, 16);
+ // make sure multiple spaces don't mess things up
+ checkMatches("William John---Smith", "5764", true, 15, 16, 22, 25);
+ }
+
public void testMatches_repeatedSeparators() {
// Simple match for single token
checkMatches("John,,,,,Doe", "5646", true, 0, 4);
@@ -91,6 +102,15 @@ public class SmartDialNameMatcherTest extends TestCase {
checkMatches("John,,,,,Doe", "363", true, 9, 12);
}
+ public void testMatches_LatinMix() {
+ // Latin + Chinese characters
+ checkMatches("Lee王力Wang宏", "59264", true, 0, 1, 5, 9);
+ // Latin + Japanese characters
+ checkMatches("千Abcd佳智Efgh佳IJKL", "222333444555", true, 1, 16);
+ // Latin + Arabic characters
+ checkMatches("Peterعبد الرحمنJames", "752637", true, 0, 1, 15, 20);
+ }
+
public void testMatches_umlaut() {
checkMatches("ÄÖÜäöü", "268268", true, 0, 6);
}