~linuxgoose/linguistics-robin: general updates

16 files changed, 288 insertions(+), 104 deletions(-)

A .gitignore
M LICENSE.rst
D README.rst
D flit.ini
M pyphonetics/__init__.py
M pyphonetics/exceptions.py
M pyphonetics/phonetics/fuzzy_soundex.py
M pyphonetics/phonetics/lein.py
M pyphonetics/phonetics/metaphone.py
M pyphonetics/phonetics/mra.py
M pyphonetics/phonetics/refined_soundex.py
M pyphonetics/phonetics/soundex.py
M pyphonetics/utils.py
A pyproject.toml
A tests/test_corner_cases.py
M tests/test_phonetics.py

A .gitignore => .gitignore +198 -0

@@ 0,0 1,198 @@
+# Created by .ignore support plugin (hsz.mobi)
+### VirtualEnv template
+# Virtualenv
+# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
+.Python
+[Bb]in
+[Ii]nclude
+[Ll]ib
+[Ll]ib64
+[Ll]ocal
+[Ss]cripts
+pyvenv.cfg
+.venv
+pip-selfcheck.json
+
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+### JetBrains template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+.idea/
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+cmake-build-*/
+
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+
+# File-based project format
+*.iws
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+.idea/replstate.xml
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+
+# Editor-based Rest Client
+.idea/httpRequests
+
+# Android studio 3.1+ serialized cache file
+.idea/caches/build_file_checksums.ser
+

M LICENSE.rst => LICENSE.rst +1 -1

@@ 1,6 1,6 @@
 The MIT License (MIT)
 
-Copyright (c) 2016 Guillaume Plique (Yomguithereal)
+Copyright (c) 2020 Ilias Koutsakis
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

D README.rst => README.rst +0 -54

@@ 1,54 0,0 @@
-===========
-Pyphonetics
-===========
-
-Pyphonetics is a Python 3 library for phonetic algorithms. Right now, the following algorithms are implemented and supported:
-
- * Soundex
- * Metaphone
- * Refined Soundex
- * Fuzzy Soundex
- * Lein
- * Matching Rating Approach
-
-More will be added in the future.
-
-Instalation
-***********
-
-The module is available in PyPI, just use `pip install pyphonetics`.
-
-
-Usage
-*****
-
-    >>> from pyphonetics import Soundex
-    >>> soundex = Soundex()
-    >>> soundex.phonetics('Rupert')
-    'R163'
-    >>> soundex.phonetics('Robert')
-    'R163'
-    >>> soundex.sounds_like('Robert', 'Rupert')
-    True
-
-
-The same API applies to every algorithm, e.g:
-
-    >>> from pyphonetics import Metaphone
-    >>> metaphone = Metaphone()
-    >>> metaphone.phonetics('discrimination')
-    'TSKRMNXN'
-
-You can also use the `distance(word1, word2, metric='levenshtein')` method to find the distance between 2 phonetic representations.
-
-    >>> from pyphonetics import RefinedSoundex
-    >>> rs = RefinedSoundex()
-    >>> rs.distance('Rupert', 'Robert')
-    0
-    >>> rs.distance('assign', 'assist', metric='hamming')
-    2
-
-Credits
-=======
-
-The module was largely based on the implementation of phonetic algorithms found in the Talisman.js (https://github.com/Yomguithereal/talisman) Node NLP library.>
\ No newline at end of file

D flit.ini => flit.ini +0 -18

@@ 1,18 0,0 @@
-[metadata]
-module=pyphonetics
-author=Lilykos
-author-email=lilykosk@gmail.com
-home-page=http://github.com/Lilykos/pyphonetics
-requires=unidecode
-    pytest
-requires-python= >=3
-description-file=README.rst
-classifiers=Intended Audience :: Developers
-    Programming Language :: Python :: 3
-    Topic :: Software Development :: Libraries :: Python Modules
-
-# If you want command line scripts, this is how to declare them.
-# If not, you can leave this section out completely.
-# [scripts]
-# # foobar:main means the script will do: from foobar import main; main()
-# foobar=foobar:main>
\ No newline at end of file

M pyphonetics/__init__.py => pyphonetics/__init__.py +1 -1

@@ 6,4 6,4 @@ from .phonetics import (Soundex,
                         Lein,
                         RefinedSoundex)
 
-__version__ = '0.4.1'
+__version__ = '0.5'

M pyphonetics/exceptions.py => pyphonetics/exceptions.py +4 -0

@@ 8,3 8,7 @@ class WrongLengthException(Exception):
 
 class DistanceMetricError(Exception):
     pass
+
+
+class EmptyStringError(Exception):
+    pass

M pyphonetics/phonetics/fuzzy_soundex.py => pyphonetics/phonetics/fuzzy_soundex.py +3 -7

@@ 1,8 1,7 @@
 import re
 from unidecode import unidecode
 
-from ..utils import squeeze, translation
-from ..exceptions import UnicodeException
+from ..utils import squeeze, translation, check_empty, check_str
 from .phonetic_algorithm import PhoneticAlgorithm
 
 


@@ 53,11 52,8 @@ class FuzzySoundex(PhoneticAlgorithm):
         self.set4 = 'HWY'
 
     def phonetics(self, word):
-        if not isinstance(word, str):
-            raise UnicodeException('Expected a unicode string!')
-
-        if not word:
-            return ''
+        check_str(word)
+        check_empty(word)
 
         word = unidecode(word).upper()

M pyphonetics/phonetics/lein.py => pyphonetics/phonetics/lein.py +3 -4

@@ 1,8 1,7 @@
 import re
 from unidecode import unidecode
 
-from ..utils import squeeze, translation
-from ..exceptions import UnicodeException
+from ..utils import squeeze, translation, check_str, check_empty
 from .phonetic_algorithm import PhoneticAlgorithm
 
 


@@ 23,8 22,8 @@ class Lein(PhoneticAlgorithm):
         self.pad = lambda code: '{}0000'.format(code)[:4]
 
     def phonetics(self, word):
-        if not isinstance(word, str):
-            raise UnicodeException('Expected a unicode string!')
+        check_str(word)
+        check_empty(word)
 
         word = unidecode(word).upper()
         word = re.sub(r'[^A-Z]\s', r'', word)

M pyphonetics/phonetics/metaphone.py => pyphonetics/phonetics/metaphone.py +3 -3

@@ 1,7 1,7 @@
 import re
 from unidecode import unidecode
 
-from ..exceptions import UnicodeException
+from ..utils import check_str, check_empty
 from .phonetic_algorithm import PhoneticAlgorithm
 
 


@@ 46,8 46,8 @@ class Metaphone(PhoneticAlgorithm):
         ]
 
     def phonetics(self, word):
-        if not isinstance(word, str):
-            raise UnicodeException('Expected a unicode string!')
+        check_str(word)
+        check_empty(word)
 
         code = unidecode(word).lower()
         for item in self.rules:

M pyphonetics/phonetics/mra.py => pyphonetics/phonetics/mra.py +3 -4

@@ 1,8 1,7 @@
 import re
 from unidecode import unidecode
 
-from ..utils import squeeze
-from ..exceptions import UnicodeException
+from ..utils import squeeze, check_empty, check_str
 from .phonetic_algorithm import PhoneticAlgorithm
 
 


@@ 19,8 18,8 @@ class MatchingRatingApproach(PhoneticAlgorithm):
         super().__init__()
 
     def phonetics(self, word):
-        if not isinstance(word, str):
-            raise UnicodeException('Expected a unicode string!')
+        check_str(word)
+        check_empty(word)
 
         codex = unidecode(word).upper()
         codex = re.sub(r'[^A-Z]', r'', codex)

M pyphonetics/phonetics/refined_soundex.py => pyphonetics/phonetics/refined_soundex.py +3 -4

@@ 1,8 1,7 @@
 import re
 from unidecode import unidecode
 
-from ..utils import translation, squeeze
-from ..exceptions import UnicodeException
+from ..utils import translation, squeeze, check_str, check_empty
 from .phonetic_algorithm import PhoneticAlgorithm
 
 


@@ 22,8 21,8 @@ class RefinedSoundex(PhoneticAlgorithm):
         )
 
     def phonetics(self, word):
-        if not isinstance(word, str):
-            raise UnicodeException('Expected a unicode string!')
+        check_str(word)
+        check_empty(word)
 
         word = unidecode(word).upper()
         word = re.sub(r'[^A-Z]', r'', word)

M pyphonetics/phonetics/soundex.py => pyphonetics/phonetics/soundex.py +6 -6

@@ 1,8 1,7 @@
 import re
 from unidecode import unidecode
 
-from ..utils import translation, squeeze
-from ..exceptions import UnicodeException
+from ..utils import translation, squeeze, check_str, check_empty
 from .phonetic_algorithm import PhoneticAlgorithm
 
 


@@ 23,8 22,8 @@ class Soundex(PhoneticAlgorithm):
         self.pad = lambda code: '{}0000'.format(code)[:4]
 
     def phonetics(self, word):
-        if not isinstance(word, str):
-            raise UnicodeException('Expected a unicode string!')
+        check_str(word)
+        check_empty(word)
 
         word = unidecode(word).upper()
         word = re.sub(r'[^A-Z]', r'', word)


@@ 34,8 33,9 @@ class Soundex(PhoneticAlgorithm):
                        if self.translations[char] != 'D')
 
         # Dropping first code's letter if duplicate
-        if tail[0] == self.translations[first_letter]:
-            tail = tail[1:]
+        if len(tail):
+            if tail[0] == self.translations[first_letter]:
+                tail = tail[1:]
 
         code = squeeze(tail).replace('0', '')
         return self.pad(first_letter + code)

M pyphonetics/utils.py => pyphonetics/utils.py +14 -1

@@ 1,6 1,7 @@
 from itertools import groupby
 
-from .exceptions import WrongLengthException
+from .exceptions import WrongLengthException, UnicodeException, \
+    EmptyStringError
 
 
 def translation(first, second):


@@ 13,3 14,15 @@ def translation(first, second):
 def squeeze(word):
     """Squeeze the given sequence by dropping consecutive duplicates."""
     return ''.join(x[0] for x in groupby(word))
+
+
+def check_str(word):
+    """Throw exception at non-string input."""
+    if not isinstance(word, str):
+        raise UnicodeException('Expected a unicode string!')
+
+
+def check_empty(word):
+    """Throw exception at empty string input."""
+    if not len(word):
+        raise EmptyStringError('The given string is empty.')

A pyproject.toml => pyproject.toml +19 -0

@@ 0,0 1,19 @@
+[build-system]
+requires = [
+    "flit_core >=2,<3",
+    "unidecode",
+    "pytest"
+]
+build-backend = "flit_core.buildapi"
+
+[tool.flit.metadata]
+module = "pyphonetics"
+author = "Lilykos"
+author-email = "ilias.koutsakis@gmail.com"
+home-page = "https://github.com/Lilykos/pyphonetics"
+classifiers = [
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Intended Audience :: Developers",
+    "Topic :: Software Development :: Libraries :: Python Modules"
+]

A tests/test_corner_cases.py => tests/test_corner_cases.py +30 -0

@@ 0,0 1,30 @@
+import pytest
+from pyphonetics import Soundex, RefinedSoundex, FuzzySoundex
+from pyphonetics.exceptions import EmptyStringError
+
+
+def test_soundex():
+    soundex = Soundex()
+
+    assert soundex.phonetics('h') == 'H000'
+    assert soundex.phonetics('d') == 'D000'
+
+    with pytest.raises(EmptyStringError):
+        soundex.phonetics('')
+
+
+def test_refined_soundex():
+    soundex = RefinedSoundex()
+
+    assert soundex.phonetics('h') == 'H'
+    assert soundex.phonetics('d') == 'D6'
+
+    with pytest.raises(EmptyStringError):
+        soundex.phonetics('')
+
+
+def test_fuzzy_soundex():
+    soundex = FuzzySoundex()
+
+    with pytest.raises(EmptyStringError):
+        soundex.phonetics('')

M tests/test_phonetics.py => tests/test_phonetics.py +0 -1

@@ 106,7 106,6 @@ def test_mra():
 
 def test_fuzzy_soundex():
     tests = [
-        ('', ''),
         ('Kristen', 'K6935'),
         ('Krissy', 'K69'),
         ('Christen', 'K6935'),