From 63915e4dce58f8bb8c89156a0dcecadc3b972a60 Mon Sep 17 00:00:00 2001 From: Jordan <37647414+linuxgoose@users.noreply.github.com> Date: Thu, 27 Mar 2025 22:53:58 +0000 Subject: [PATCH] Fixing of not dropping all leading instances of the first character matching the next in line --- pyphonetics/phonetics/soundex.py | 15 ++++++++++++--- tests/test_corner_cases.py | 13 ++++++++++++- tests/test_phonetics.py | 2 +- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/pyphonetics/phonetics/soundex.py b/pyphonetics/phonetics/soundex.py index 3b6934e46504fb6ef4fc860a5c13c0e08808505e..8349ba8d62f1f5182a5f3a30bdfffa1ebd744349 100644 --- a/pyphonetics/phonetics/soundex.py +++ b/pyphonetics/phonetics/soundex.py @@ -32,10 +32,19 @@ class Soundex(PhoneticAlgorithm): tail = ''.join(self.translations[char] for char in word if self.translations[char] != 'D') - # Dropping first code's letter if duplicate + # Dropping all leading code's letters if same as first letter - AMERICAN SOUNDEX RULE if len(tail): - if tail[0] == self.translations[first_letter]: - tail = tail[1:] + print(word) + for i, char in enumerate(tail): + if char != self.translations[first_letter] and len(tail) > 1: + tail = tail[i:] + break + if len(tail) == 1: + if tail[0] == self.translations[first_letter]: + tail = tail[1:] + break + if tail[i+1:] == '': + tail = tail[i+1:] code = squeeze(tail).replace('0', '') return self.pad(first_letter + code) diff --git a/tests/test_corner_cases.py b/tests/test_corner_cases.py index 6eb9ca95174fa8f25d1d02f1205111c189ffbc34..9ce4606f434c7e3d922bc74515460ce2beba6b48 100644 --- a/tests/test_corner_cases.py +++ b/tests/test_corner_cases.py @@ -5,9 +5,17 @@ from pyphonetics.exceptions import EmptyStringError def test_soundex(): soundex = Soundex() - + assert soundex.phonetics('h') == 'H000' + assert soundex.phonetics('hh') == 'H000' + assert soundex.phonetics('hhh') == 'H000' assert soundex.phonetics('d') == 'D000' + assert soundex.phonetics('dd') == 'D000' + assert soundex.phonetics('ddd') == 'D000' + assert soundex.phonetics('ddm') == 'D500' + assert soundex.phonetics('ddmmmm') == 'D500' + assert soundex.phonetics('Pffister') == 'P236' + assert soundex.phonetics('Pfister') == 'P236' with pytest.raises(EmptyStringError): soundex.phonetics('') @@ -26,5 +34,8 @@ def test_refined_soundex(): def test_fuzzy_soundex(): soundex = FuzzySoundex() + assert soundex.phonetics('Catharine') == 'K365' + assert soundex.phonetics('Katharine') == 'K365' + with pytest.raises(EmptyStringError): soundex.phonetics('') diff --git a/tests/test_phonetics.py b/tests/test_phonetics.py index 834d3ed1fbcf615be3331889c9dcdd14040a8119..293ffae5739edf38b123dced3c3cc49d40076e20 100644 --- a/tests/test_phonetics.py +++ b/tests/test_phonetics.py @@ -28,7 +28,7 @@ def test_soundex(): ('A261', 'Ashcroft'), ('A261', 'Ashcraft'), ('T522', 'Tymczak'), - ('P123', 'Pfister'), + ('P236', 'Pfister'), ('A536', 'Andrew'), ('W252', 'Wozniak'), ('C423', 'Callister'),