M pyphonetics/phonetics/soundex.py => pyphonetics/phonetics/soundex.py +12 -3
@@ 32,10 32,19 @@ class Soundex(PhoneticAlgorithm):
tail = ''.join(self.translations[char] for char in word
if self.translations[char] != 'D')
- # Dropping first code's letter if duplicate
+ # Dropping all leading code's letters if same as first letter - AMERICAN SOUNDEX RULE
if len(tail):
- if tail[0] == self.translations[first_letter]:
- tail = tail[1:]
+ print(word)
+ for i, char in enumerate(tail):
+ if char != self.translations[first_letter] and len(tail) > 1:
+ tail = tail[i:]
+ break
+ if len(tail) == 1:
+ if tail[0] == self.translations[first_letter]:
+ tail = tail[1:]
+ break
+ if tail[i+1:] == '':
+ tail = tail[i+1:]
code = squeeze(tail).replace('0', '')
return self.pad(first_letter + code)
M tests/test_corner_cases.py => tests/test_corner_cases.py +12 -1
@@ 5,9 5,17 @@ from pyphonetics.exceptions import EmptyStringError
def test_soundex():
soundex = Soundex()
-
+
assert soundex.phonetics('h') == 'H000'
+ assert soundex.phonetics('hh') == 'H000'
+ assert soundex.phonetics('hhh') == 'H000'
assert soundex.phonetics('d') == 'D000'
+ assert soundex.phonetics('dd') == 'D000'
+ assert soundex.phonetics('ddd') == 'D000'
+ assert soundex.phonetics('ddm') == 'D500'
+ assert soundex.phonetics('ddmmmm') == 'D500'
+ assert soundex.phonetics('Pffister') == 'P236'
+ assert soundex.phonetics('Pfister') == 'P236'
with pytest.raises(EmptyStringError):
soundex.phonetics('')
@@ 26,5 34,8 @@ def test_refined_soundex():
def test_fuzzy_soundex():
soundex = FuzzySoundex()
+ assert soundex.phonetics('Catharine') == 'K365'
+ assert soundex.phonetics('Katharine') == 'K365'
+
with pytest.raises(EmptyStringError):
soundex.phonetics('')
M tests/test_phonetics.py => tests/test_phonetics.py +1 -1
@@ 28,7 28,7 @@ def test_soundex():
('A261', 'Ashcroft'),
('A261', 'Ashcraft'),
('T522', 'Tymczak'),
- ('P123', 'Pfister'),
+ ('P236', 'Pfister'),
('A536', 'Andrew'),
('W252', 'Wozniak'),
('C423', 'Callister'),