from typing import List
import string
from ..utils import check_str, check_empty
from .phonetic_algorithm import PhoneticAlgorithm
class NYSIIS(PhoneticAlgorithm):
"""
The NYSIIS algorithm.
[Reference]: https://en.wikipedia.org/wiki/New_York_State_Identification_and_Intelligence_System & http://www.dropby.com/NYSIIS.html
[Authors]:
"""
def __init__(self):
super().__init__()
# vowel array defined outside of function as not to recreate when used in loops
__vowels : List[str ]= ["A","E","I","O","U"]
# Python should have this functionality built in, replacing at an index
def __replaceAt(self, input : str, index : int, replace : str = "") -> str :
return input[:index] + replace + input[(len(replace) or 1) + index:]
def phonetics(self, word : str) -> str | None:
check_str(word)
check_empty(word)
input : str = word
# fail fast if there isn't an input value to code (code defensively)
if (input is None or not input) :
return None
# strip leading and trailing whitespace
input = input.strip()
# make input uppercase (wiki algorithm doen't mention this as first step)
input = input.upper()
# Step 1.
if input.startswith("MAC") :
input = "MCC" + input.removeprefix("MAC")
elif input.startswith("KN") :
input = "NN" + input.removeprefix("KN")
elif input.startswith("K") :
input = "C" + input.removeprefix("K")
elif input.startswith("PH") :
input = "FF" + input.removeprefix("PH")
elif input.startswith("PF") :
input = "FF" + input.removeprefix("PF")
elif input.startswith("SCH") :
input = "SSS" + input.removeprefix("SCH")
# Step 2.
if input.endswith("EE") :
input = input.removesuffix("EE") + "Y"
elif input.endswith("IE") :
input = input.removesuffix("IE") + "Y"
for item in ["DT","RT","RD","NT","ND"] :
if input.endswith(item) :
input = input.removesuffix(item) + "D"
# Steps 3-4.
idx : int = 1
while idx < len(input) :
# Step 5. (1)
# only process letters, skip all other characters including spaces
if input[idx] not in string.ascii_letters :
input = self.self.__replaceAt(input,idx)
# keeps current index and restarts
continue
# Step 5. (2)
if input[idx] in self.__vowels :
if input[idx:idx+2] == "EV" :
input = self.__replaceAt(input,idx,"EV")
else :
input = self.__replaceAt(input,idx,"A")
# Step 5. (3)
elif input[idx] == "Q" :
input = self.__replaceAt(input,idx,"G")
elif input[idx] == "Z" :
input = self.__replaceAt(input,idx,"S")
elif input[idx] == "M" :
input = self.__replaceAt(input,idx,"N")
# Step 5. (4)
elif input[idx:idx+2] == "KN" :
input = self.__replaceAt(input,idx,"N")
elif input[idx] == "K" :
input = self.__replaceAt(input,idx,"C")
# Step 5. (5)
elif input[idx:idx+2] == "PH" :
input = self.__replaceAt(input,idx,"FF")
# Step 5. (6)
elif input[idx] == "H" and (input[idx - 1] not in self.__vowels or input[idx:idx+1] not in self.__vowels) :
input = self.__replaceAt(input,idx,input[idx - 1])
# Step 5. (7)
elif input[idx] == "W" and input[idx - 1] in self.__vowels :
input = self.__replaceAt(input,idx,input[idx - 1])
# Step 6.
if input[idx] == input[idx - 1] :
input = self.__replaceAt(input,idx,"")
continue
idx += 1
# Step 7.
if input.endswith("S") :
input = input.removesuffix("S")
# Step 8.
if input.endswith("AY") :
input = self.__replaceAt(input,idx,"AY") + "Y"
# Step 9.
if input.endswith("A") :
input = input.removesuffix("A")
# Step 10. Ensure the output includes at minimum the first letter of the input
if len(input) < 1:
input = word[0].upper()
return input[0:6] + f'[{input[6:]}]' if len(input) > 6 else input