import re from unidecode import unidecode from ..utils import translation, squeeze, check_str, check_empty from .phonetic_algorithm import PhoneticAlgorithm class RefinedSoundex(PhoneticAlgorithm): """ The Refined Soundex algorithm. [Reference]: https://en.wikipedia.org/wiki/Soundex [Authors]: Robert C. Russel, Margaret King Odell """ def __init__(self): super().__init__() self.translations = translation( 'AEIOUYWHBPFVCKSGJQXZDTLMNR', '000000DD112233344555667889' ) def phonetics(self, word): check_str(word) check_empty(word) word = unidecode(word).upper() word = re.sub(r'[^A-Z]', r'', word) first_letter = word[0] tail = ''.join(self.translations[char] for char in word if self.translations[char] != 'D') return first_letter + squeeze(tail)