From 63915e4dce58f8bb8c89156a0dcecadc3b972a60 Mon Sep 17 00:00:00 2001
From: Jordan <37647414+linuxgoose@users.noreply.github.com>
Date: Thu, 27 Mar 2025 22:53:58 +0000
Subject: [PATCH] Fixing of not dropping all leading instances of the first
 character matching the next in line

---
 pyphonetics/phonetics/soundex.py | 15 ++++++++++++---
 tests/test_corner_cases.py       | 13 ++++++++++++-
 tests/test_phonetics.py          |  2 +-
 3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/pyphonetics/phonetics/soundex.py b/pyphonetics/phonetics/soundex.py
index 3b6934e46504fb6ef4fc860a5c13c0e08808505e..8349ba8d62f1f5182a5f3a30bdfffa1ebd744349 100644
--- a/pyphonetics/phonetics/soundex.py
+++ b/pyphonetics/phonetics/soundex.py
@@ -32,10 +32,19 @@ class Soundex(PhoneticAlgorithm):
         tail = ''.join(self.translations[char] for char in word
                        if self.translations[char] != 'D')
 
-        # Dropping first code's letter if duplicate
+        # Dropping all leading code's letters if same as first letter - AMERICAN SOUNDEX RULE
         if len(tail):
-            if tail[0] == self.translations[first_letter]:
-                tail = tail[1:]
+            print(word)
+            for i, char in enumerate(tail):
+                if char != self.translations[first_letter] and len(tail) > 1:
+                    tail = tail[i:]
+                    break
+                if len(tail) == 1:
+                    if tail[0] == self.translations[first_letter]:
+                        tail = tail[1:]
+                        break
+                if tail[i+1:] == '':
+                    tail = tail[i+1:]
 
         code = squeeze(tail).replace('0', '')
         return self.pad(first_letter + code)
diff --git a/tests/test_corner_cases.py b/tests/test_corner_cases.py
index 6eb9ca95174fa8f25d1d02f1205111c189ffbc34..9ce4606f434c7e3d922bc74515460ce2beba6b48 100644
--- a/tests/test_corner_cases.py
+++ b/tests/test_corner_cases.py
@@ -5,9 +5,17 @@ from pyphonetics.exceptions import EmptyStringError
 
 def test_soundex():
     soundex = Soundex()
-
+    
     assert soundex.phonetics('h') == 'H000'
+    assert soundex.phonetics('hh') == 'H000'
+    assert soundex.phonetics('hhh') == 'H000'
     assert soundex.phonetics('d') == 'D000'
+    assert soundex.phonetics('dd') == 'D000'
+    assert soundex.phonetics('ddd') == 'D000'
+    assert soundex.phonetics('ddm') == 'D500'
+    assert soundex.phonetics('ddmmmm') == 'D500'
+    assert soundex.phonetics('Pffister') == 'P236'
+    assert soundex.phonetics('Pfister') == 'P236'
 
     with pytest.raises(EmptyStringError):
         soundex.phonetics('')
@@ -26,5 +34,8 @@ def test_refined_soundex():
 def test_fuzzy_soundex():
     soundex = FuzzySoundex()
 
+    assert soundex.phonetics('Catharine') == 'K365'
+    assert soundex.phonetics('Katharine') == 'K365'
+
     with pytest.raises(EmptyStringError):
         soundex.phonetics('')
diff --git a/tests/test_phonetics.py b/tests/test_phonetics.py
index 834d3ed1fbcf615be3331889c9dcdd14040a8119..293ffae5739edf38b123dced3c3cc49d40076e20 100644
--- a/tests/test_phonetics.py
+++ b/tests/test_phonetics.py
@@ -28,7 +28,7 @@ def test_soundex():
         ('A261', 'Ashcroft'),
         ('A261', 'Ashcraft'),
         ('T522', 'Tymczak'),
-        ('P123', 'Pfister'),
+        ('P236', 'Pfister'),
         ('A536', 'Andrew'),
         ('W252', 'Wozniak'),
         ('C423', 'Callister'),