~linuxgoose/linguistics-robin

63915e4dce58f8bb8c89156a0dcecadc3b972a60 — Jordan 8 months ago c5ab0a6
Fixing of not dropping all leading instances of the first character matching the next in line
3 files changed, 25 insertions(+), 5 deletions(-)

M pyphonetics/phonetics/soundex.py
M tests/test_corner_cases.py
M tests/test_phonetics.py
M pyphonetics/phonetics/soundex.py => pyphonetics/phonetics/soundex.py +12 -3
@@ 32,10 32,19 @@ class Soundex(PhoneticAlgorithm):
        tail = ''.join(self.translations[char] for char in word
                       if self.translations[char] != 'D')

        # Dropping first code's letter if duplicate
        # Dropping all leading code's letters if same as first letter - AMERICAN SOUNDEX RULE
        if len(tail):
            if tail[0] == self.translations[first_letter]:
                tail = tail[1:]
            print(word)
            for i, char in enumerate(tail):
                if char != self.translations[first_letter] and len(tail) > 1:
                    tail = tail[i:]
                    break
                if len(tail) == 1:
                    if tail[0] == self.translations[first_letter]:
                        tail = tail[1:]
                        break
                if tail[i+1:] == '':
                    tail = tail[i+1:]

        code = squeeze(tail).replace('0', '')
        return self.pad(first_letter + code)

M tests/test_corner_cases.py => tests/test_corner_cases.py +12 -1
@@ 5,9 5,17 @@ from pyphonetics.exceptions import EmptyStringError

def test_soundex():
    soundex = Soundex()

    
    assert soundex.phonetics('h') == 'H000'
    assert soundex.phonetics('hh') == 'H000'
    assert soundex.phonetics('hhh') == 'H000'
    assert soundex.phonetics('d') == 'D000'
    assert soundex.phonetics('dd') == 'D000'
    assert soundex.phonetics('ddd') == 'D000'
    assert soundex.phonetics('ddm') == 'D500'
    assert soundex.phonetics('ddmmmm') == 'D500'
    assert soundex.phonetics('Pffister') == 'P236'
    assert soundex.phonetics('Pfister') == 'P236'

    with pytest.raises(EmptyStringError):
        soundex.phonetics('')


@@ 26,5 34,8 @@ def test_refined_soundex():
def test_fuzzy_soundex():
    soundex = FuzzySoundex()

    assert soundex.phonetics('Catharine') == 'K365'
    assert soundex.phonetics('Katharine') == 'K365'

    with pytest.raises(EmptyStringError):
        soundex.phonetics('')

M tests/test_phonetics.py => tests/test_phonetics.py +1 -1
@@ 28,7 28,7 @@ def test_soundex():
        ('A261', 'Ashcroft'),
        ('A261', 'Ashcraft'),
        ('T522', 'Tymczak'),
        ('P123', 'Pfister'),
        ('P236', 'Pfister'),
        ('A536', 'Andrew'),
        ('W252', 'Wozniak'),
        ('C423', 'Callister'),