import re from unidecode import unidecode from ..utils import squeeze, translation, check_str, check_empty from .phonetic_algorithm import PhoneticAlgorithm class Lein(PhoneticAlgorithm): """ The Lein name coding procedure. [Reference]: http://naldc.nal.usda.gov/download/27833/PDF """ def __init__(self): super().__init__() self.translations = translation( 'DTMNLRBFPVCJKGQSXZ', '112233444455555555' ) self.pad = lambda code: '{}0000'.format(code)[:4] def phonetics(self, word): check_str(word) check_empty(word) word = unidecode(word).upper() word = re.sub(r'[^A-Z]\s', r'', word) # Keep the 1st letter first, code = word[0], word[1:] # Drop vowels and Y, W & H code = re.sub(r'[AEIOUYWH]', r'', code) # Drop consecutive duplicates and truncate to 4 chars code = squeeze(code)[0: 4] # Translations code = ''.join(self.translations.get(char, char) for char in code) return self.pad(first + code)