~linuxgoose/linguistics-robin

ref: f61602823a31a9fe5eeddb01debbcb4188d35056 linguistics-robin/linguistics_robin/phonetics/refined_soundex.py -rw-r--r-- 912 bytes
f6160282 — Jordan Robinson Merge pull request #25 from linuxgoose/23-caverphone-2-__vowels-undefined-error 8 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import re
from unidecode import unidecode

from ..utils import translation, squeeze, check_str, check_empty
from .phonetic_algorithm import PhoneticAlgorithm


class RefinedSoundex(PhoneticAlgorithm):
    """
    The Refined Soundex algorithm.

    [Reference]: https://en.wikipedia.org/wiki/Soundex
    [Authors]: Robert C. Russel, Margaret King Odell
    """
    def __init__(self):
        super().__init__()

        self.translations = translation(
            'AEIOUYWHBPFVCKSGJQXZDTLMNR',
            '000000DD112233344555667889'
        )

    def phonetics(self, word):
        check_str(word)
        check_empty(word)

        word = unidecode(word).upper()
        word = re.sub(r'[^A-Z]', r'', word)

        first_letter = word[0]
        tail = ''.join(self.translations[char] for char in word
                       if self.translations[char] != 'D')

        return first_letter + squeeze(tail)