~linuxgoose/linguistics-robin (81a0dc8236096147eb60a03e19b6585a3c9dd3c6): tests/test_phonetics.py

from linguistics_robin import Metaphone, Soundex, MatchingRatingApproach,\
    FuzzySoundex, Lein, RefinedSoundex


def test_metaphone():
    tests = [
        ('TSKRMNXN', 'discrimination'),
        ('HL', 'hello'),
        ('TRT', 'droid'),
        ('HPKRT', 'hypocrite'),
        ('WL', 'well'),
        ('AM', 'am'),
        ('S', 'say'),
        ('FSNT', 'pheasant'),
        ('KT', 'god')
    ]

    metaphone = Metaphone()
    for test in tests:
        assert metaphone.phonetics(test[1]) == test[0]


def test_soundex():
    tests = [
        ('R163', 'Rupert'),
        ('R163', 'Robert'),
        ('R150', 'Rubin'),
        ('A261', 'Ashcroft'),
        ('A261', 'Ashcraft'),
        ('T522', 'Tymczak'),
        ('P236', 'Pfister'),
        ('A536', 'Andrew'),
        ('W252', 'Wozniak'),
        ('C423', 'Callister'),
        ('H400', 'Hello'),
        ('M635', 'Martin'),
        ('B656', 'Bernard'),
        ('F600', 'Faure'),
        ('P620', 'Perez'),
        ('G620', 'Gros'),
        ('C120', 'Chapuis'),
        ('B600', 'Boyer'),
        ('G360', 'Gauthier'),
        ('R000', 'Rey'),
        ('B634', 'Barthélémy'),
        ('H560', 'Henry'),
        ('M450', 'Moulin'),
        ('R200', 'Rousseau')
    ]

    soundex = Soundex()
    for test in tests:
        assert soundex.phonetics(test[1]) == test[0]


def test_soundex_refined():
    tests = [
        ('T6036084', 'testing'),
        ('T6036084', 'TESTING'),
        ('T60', 'The'),
        ('Q503', 'quick'),
        ('B1908', 'brown'),
        ('F205', 'fox'),
        ('J408106', 'jumped'),
        ('O0209', 'over'),
        ('L7050', 'lazy'),
        ('D6043', 'dogs')
    ]

    soundex = RefinedSoundex()
    for test in tests:
        assert soundex.phonetics(test[1]) == test[0]
        
        
def test_soundex_homophones():
    tests = [
        ('Braz', 'Broz'),
        ('Caren', 'Caron', 'Carren', 'Charon', 'Corain', 'Coram', 'Corran', 
         'Corrin', 'Corwin', 'Curran', 'Curreen','Currin', 'Currom', 'Currum', 'Curwen'),
        ('Hairs', 'Hark', 'Hars', 'Hayers', 'Heers', 'Hiers'),
        ('Lambard', 'Lambart', 'Lambert', 'Lambird', 'Lampaert', 'Lampard', 
         'Lampart', 'Lamperd', 'Lampert', 'Lamport','Limbert', 'Lombard'),
        ('Nolton', 'Noulton')
    ]

    soundex = Soundex()
    for test in tests:
        phonetics = [soundex.phonetics(word) for word in test]
        assert len(set(phonetics)) == 1  # all phonetics are the same, so set size = 1


def test_mra():
    tests = [
        ('BYRN', 'Byrne'),
        ('BRN', 'Boern'),
        ('SMTH', 'Smith'),
        ('SMYTH', 'Smyth'),
        ('CTHRN', 'Catherine'),
        ('KTHRYN', 'Kathryn')
    ]

    mra = MatchingRatingApproach()
    for test in tests:
        assert mra.phonetics(test[1]) == test[0]


def test_fuzzy_soundex():
    tests = [
        ('Kristen', 'K6935'),
        ('Krissy', 'K69'),
        ('Christen', 'K6935'),
        ('peter', 'P36'),
        ('pete', 'P3'),
        ('pedro', 'P36'),
        ('stephen', 'S315'),
        ('steve', 'S31'),
        ('smith', 'S53'),
        ('smythe', 'S53'),
        ('gail', 'G4'),
        ('gayle', 'G4'),
        ('guillaume', 'G45'),
        ('christine', 'K6935'),
        ('christina', 'K6935'),
        ('kristina', 'K6935'),
        ('Wight', 'W3'),
        ('Hardt', 'H6'),
        ('Knight', 'N3'),
        ('Czech', 'S7'),
        ('Tsech', 'S7'),
        ('gnomic', 'N59'),
        ('Wright', 'R3'),
        ('Hrothgar', 'R376'),
        ('Hwaet', 'W3'),
        ('Grant', 'G63'),
        ('Hart', 'H6')
    ]

    fuzzy = FuzzySoundex()
    for test in tests:
        assert fuzzy.phonetics(test[0]) == test[1]


def test_lein():
    tests = [
        ('Guillaume', 'G320'),
        ('Dabbs', 'D450'),
        ('Daves', 'D450'),
        ('Davies', 'D450'),
        ('Davis', 'D450'),
        ('Debaca', 'D450'),
        ('Debose', 'D450'),
        ('Debus', 'D450'),
        ('Defazio', 'D450'),
        ('Defigh', 'D450'),
        ('Deveaux', 'D450'),
        ('Devese', 'D450'),
        ('Devies', 'D450'),
        ('Devos', 'D450'),
        ('Dipiazza', 'D450'),
        ('Divish', 'D450'),
        ('Dobak', 'D450'),
        ('Dobbs', 'D450'),
        ('Dobis', 'D450'),
        ('Dobish', 'D450'),
        ('Dobosh', 'D450'),
        ('Doepke', 'D450'),
        ('Dopps', 'D450'),
        ('Doubek', 'D450'),
        ('Doviak', 'D450'),
        ('Dubbs', 'D450'),
        ('Dubke', 'D450'),
        ('Dubois', 'D450'),
        ('Duboise', 'D450'),
        ('Dubose', 'D450'),
        ('Dubs', 'D450'),
        ('Dubukey', 'D450'),
        ('Dubus', 'D450'),
        ('Dufek', 'D450'),
        ('Duffek', 'D450'),
        ('Dupas', 'D450'),
        ('Dupois', 'D450'),
        ('Dupuis', 'D450'),
        ('Arlène', 'A332'),
        ('Lüdenscheidt', 'L125')
    ]

    lein = Lein()
    for test in tests:
        assert lein.phonetics(test[0]) == test[1]