import re from unidecode import unidecode from ..utils import squeeze, check_empty, check_str from .phonetic_algorithm import PhoneticAlgorithm class MatchingRatingApproach(PhoneticAlgorithm): """ Functions related to the computation of the Match Rating Approach codex. [Reference]: https://en.wikipedia.org/wiki/Match_rating_approach [Article]: Moore, G B.; Kuhns, J L.; Treffzs, J L.; Montgomery, C A. (Feb 1, 1977). Accessing Individual Records from Personal Data Files Using Nonunique Identifiers. US National Institute of Standards and Technology. p. 17. NIST SP - 500-2. """ def __init__(self): super().__init__() def phonetics(self, word): check_str(word) check_empty(word) codex = unidecode(word).upper() codex = re.sub(r'[^A-Z]', r'', codex) # Dropping non - leading vowels codex = codex[0] + re.sub(r'[AEIOU]', r'', codex[1:]) # Dropping consecutive consonants codex = squeeze(codex) # Returning the codex offset = min(3, len(codex) - 3) return codex[:3] + codex[len(codex) - offset:offset + len(codex)]