A .gitignore => .gitignore +198 -0
@@ 0,0 1,198 @@
+# Created by .ignore support plugin (hsz.mobi)
+### VirtualEnv template
+# Virtualenv
+# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
+.Python
+[Bb]in
+[Ii]nclude
+[Ll]ib
+[Ll]ib64
+[Ll]ocal
+[Ss]cripts
+pyvenv.cfg
+.venv
+pip-selfcheck.json
+
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+### JetBrains template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+.idea/
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn. Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+cmake-build-*/
+
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+
+# File-based project format
+*.iws
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+.idea/replstate.xml
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+
+# Editor-based Rest Client
+.idea/httpRequests
+
+# Android studio 3.1+ serialized cache file
+.idea/caches/build_file_checksums.ser
+
M LICENSE.rst => LICENSE.rst +1 -1
@@ 1,6 1,6 @@
The MIT License (MIT)
-Copyright (c) 2016 Guillaume Plique (Yomguithereal)
+Copyright (c) 2020 Ilias Koutsakis
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
D README.rst => README.rst +0 -54
@@ 1,54 0,0 @@
-===========
-Pyphonetics
-===========
-
-Pyphonetics is a Python 3 library for phonetic algorithms. Right now, the following algorithms are implemented and supported:
-
- * Soundex
- * Metaphone
- * Refined Soundex
- * Fuzzy Soundex
- * Lein
- * Matching Rating Approach
-
-More will be added in the future.
-
-Instalation
-***********
-
-The module is available in PyPI, just use `pip install pyphonetics`.
-
-
-Usage
-*****
-
- >>> from pyphonetics import Soundex
- >>> soundex = Soundex()
- >>> soundex.phonetics('Rupert')
- 'R163'
- >>> soundex.phonetics('Robert')
- 'R163'
- >>> soundex.sounds_like('Robert', 'Rupert')
- True
-
-
-The same API applies to every algorithm, e.g:
-
- >>> from pyphonetics import Metaphone
- >>> metaphone = Metaphone()
- >>> metaphone.phonetics('discrimination')
- 'TSKRMNXN'
-
-You can also use the `distance(word1, word2, metric='levenshtein')` method to find the distance between 2 phonetic representations.
-
- >>> from pyphonetics import RefinedSoundex
- >>> rs = RefinedSoundex()
- >>> rs.distance('Rupert', 'Robert')
- 0
- >>> rs.distance('assign', 'assist', metric='hamming')
- 2
-
-Credits
-=======
-
-The module was largely based on the implementation of phonetic algorithms found in the Talisman.js (https://github.com/Yomguithereal/talisman) Node NLP library.>
\ No newline at end of file
D flit.ini => flit.ini +0 -18
@@ 1,18 0,0 @@
-[metadata]
-module=pyphonetics
-author=Lilykos
-author-email=lilykosk@gmail.com
-home-page=http://github.com/Lilykos/pyphonetics
-requires=unidecode
- pytest
-requires-python= >=3
-description-file=README.rst
-classifiers=Intended Audience :: Developers
- Programming Language :: Python :: 3
- Topic :: Software Development :: Libraries :: Python Modules
-
-# If you want command line scripts, this is how to declare them.
-# If not, you can leave this section out completely.
-# [scripts]
-# # foobar:main means the script will do: from foobar import main; main()
-# foobar=foobar:main>
\ No newline at end of file
M pyphonetics/__init__.py => pyphonetics/__init__.py +1 -1
@@ 6,4 6,4 @@ from .phonetics import (Soundex,
Lein,
RefinedSoundex)
-__version__ = '0.4.1'
+__version__ = '0.5'
M pyphonetics/exceptions.py => pyphonetics/exceptions.py +4 -0
@@ 8,3 8,7 @@ class WrongLengthException(Exception):
class DistanceMetricError(Exception):
pass
+
+
+class EmptyStringError(Exception):
+ pass
M pyphonetics/phonetics/fuzzy_soundex.py => pyphonetics/phonetics/fuzzy_soundex.py +3 -7
@@ 1,8 1,7 @@
import re
from unidecode import unidecode
-from ..utils import squeeze, translation
-from ..exceptions import UnicodeException
+from ..utils import squeeze, translation, check_empty, check_str
from .phonetic_algorithm import PhoneticAlgorithm
@@ 53,11 52,8 @@ class FuzzySoundex(PhoneticAlgorithm):
self.set4 = 'HWY'
def phonetics(self, word):
- if not isinstance(word, str):
- raise UnicodeException('Expected a unicode string!')
-
- if not word:
- return ''
+ check_str(word)
+ check_empty(word)
word = unidecode(word).upper()
M pyphonetics/phonetics/lein.py => pyphonetics/phonetics/lein.py +3 -4
@@ 1,8 1,7 @@
import re
from unidecode import unidecode
-from ..utils import squeeze, translation
-from ..exceptions import UnicodeException
+from ..utils import squeeze, translation, check_str, check_empty
from .phonetic_algorithm import PhoneticAlgorithm
@@ 23,8 22,8 @@ class Lein(PhoneticAlgorithm):
self.pad = lambda code: '{}0000'.format(code)[:4]
def phonetics(self, word):
- if not isinstance(word, str):
- raise UnicodeException('Expected a unicode string!')
+ check_str(word)
+ check_empty(word)
word = unidecode(word).upper()
word = re.sub(r'[^A-Z]\s', r'', word)
M pyphonetics/phonetics/metaphone.py => pyphonetics/phonetics/metaphone.py +3 -3
@@ 1,7 1,7 @@
import re
from unidecode import unidecode
-from ..exceptions import UnicodeException
+from ..utils import check_str, check_empty
from .phonetic_algorithm import PhoneticAlgorithm
@@ 46,8 46,8 @@ class Metaphone(PhoneticAlgorithm):
]
def phonetics(self, word):
- if not isinstance(word, str):
- raise UnicodeException('Expected a unicode string!')
+ check_str(word)
+ check_empty(word)
code = unidecode(word).lower()
for item in self.rules:
M pyphonetics/phonetics/mra.py => pyphonetics/phonetics/mra.py +3 -4
@@ 1,8 1,7 @@
import re
from unidecode import unidecode
-from ..utils import squeeze
-from ..exceptions import UnicodeException
+from ..utils import squeeze, check_empty, check_str
from .phonetic_algorithm import PhoneticAlgorithm
@@ 19,8 18,8 @@ class MatchingRatingApproach(PhoneticAlgorithm):
super().__init__()
def phonetics(self, word):
- if not isinstance(word, str):
- raise UnicodeException('Expected a unicode string!')
+ check_str(word)
+ check_empty(word)
codex = unidecode(word).upper()
codex = re.sub(r'[^A-Z]', r'', codex)
M pyphonetics/phonetics/refined_soundex.py => pyphonetics/phonetics/refined_soundex.py +3 -4
@@ 1,8 1,7 @@
import re
from unidecode import unidecode
-from ..utils import translation, squeeze
-from ..exceptions import UnicodeException
+from ..utils import translation, squeeze, check_str, check_empty
from .phonetic_algorithm import PhoneticAlgorithm
@@ 22,8 21,8 @@ class RefinedSoundex(PhoneticAlgorithm):
)
def phonetics(self, word):
- if not isinstance(word, str):
- raise UnicodeException('Expected a unicode string!')
+ check_str(word)
+ check_empty(word)
word = unidecode(word).upper()
word = re.sub(r'[^A-Z]', r'', word)
M pyphonetics/phonetics/soundex.py => pyphonetics/phonetics/soundex.py +6 -6
@@ 1,8 1,7 @@
import re
from unidecode import unidecode
-from ..utils import translation, squeeze
-from ..exceptions import UnicodeException
+from ..utils import translation, squeeze, check_str, check_empty
from .phonetic_algorithm import PhoneticAlgorithm
@@ 23,8 22,8 @@ class Soundex(PhoneticAlgorithm):
self.pad = lambda code: '{}0000'.format(code)[:4]
def phonetics(self, word):
- if not isinstance(word, str):
- raise UnicodeException('Expected a unicode string!')
+ check_str(word)
+ check_empty(word)
word = unidecode(word).upper()
word = re.sub(r'[^A-Z]', r'', word)
@@ 34,8 33,9 @@ class Soundex(PhoneticAlgorithm):
if self.translations[char] != 'D')
# Dropping first code's letter if duplicate
- if tail[0] == self.translations[first_letter]:
- tail = tail[1:]
+ if len(tail):
+ if tail[0] == self.translations[first_letter]:
+ tail = tail[1:]
code = squeeze(tail).replace('0', '')
return self.pad(first_letter + code)
M pyphonetics/utils.py => pyphonetics/utils.py +14 -1
@@ 1,6 1,7 @@
from itertools import groupby
-from .exceptions import WrongLengthException
+from .exceptions import WrongLengthException, UnicodeException, \
+ EmptyStringError
def translation(first, second):
@@ 13,3 14,15 @@ def translation(first, second):
def squeeze(word):
"""Squeeze the given sequence by dropping consecutive duplicates."""
return ''.join(x[0] for x in groupby(word))
+
+
+def check_str(word):
+ """Throw exception at non-string input."""
+ if not isinstance(word, str):
+ raise UnicodeException('Expected a unicode string!')
+
+
+def check_empty(word):
+ """Throw exception at empty string input."""
+ if not len(word):
+ raise EmptyStringError('The given string is empty.')
A pyproject.toml => pyproject.toml +19 -0
@@ 0,0 1,19 @@
+[build-system]
+requires = [
+ "flit_core >=2,<3",
+ "unidecode",
+ "pytest"
+]
+build-backend = "flit_core.buildapi"
+
+[tool.flit.metadata]
+module = "pyphonetics"
+author = "Lilykos"
+author-email = "ilias.koutsakis@gmail.com"
+home-page = "https://github.com/Lilykos/pyphonetics"
+classifiers = [
+ "License :: OSI Approved :: MIT License",
+ "Programming Language :: Python :: 3",
+ "Intended Audience :: Developers",
+ "Topic :: Software Development :: Libraries :: Python Modules"
+]
A tests/test_corner_cases.py => tests/test_corner_cases.py +30 -0
@@ 0,0 1,30 @@
+import pytest
+from pyphonetics import Soundex, RefinedSoundex, FuzzySoundex
+from pyphonetics.exceptions import EmptyStringError
+
+
+def test_soundex():
+ soundex = Soundex()
+
+ assert soundex.phonetics('h') == 'H000'
+ assert soundex.phonetics('d') == 'D000'
+
+ with pytest.raises(EmptyStringError):
+ soundex.phonetics('')
+
+
+def test_refined_soundex():
+ soundex = RefinedSoundex()
+
+ assert soundex.phonetics('h') == 'H'
+ assert soundex.phonetics('d') == 'D6'
+
+ with pytest.raises(EmptyStringError):
+ soundex.phonetics('')
+
+
+def test_fuzzy_soundex():
+ soundex = FuzzySoundex()
+
+ with pytest.raises(EmptyStringError):
+ soundex.phonetics('')
M tests/test_phonetics.py => tests/test_phonetics.py +0 -1
@@ 106,7 106,6 @@ def test_mra():
def test_fuzzy_soundex():
tests = [
- ('', ''),
('Kristen', 'K6935'),
('Krissy', 'K69'),
('Christen', 'K6935'),