# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
# SPDX-License-Identifier: Apache-2.0
"""
Thai soundex
Has three systems to choose from: Udom83 (default), LK82, and MetaSound
"""
from pythainlp.soundex import DEFAULT_SOUNDEX_ENGINE
from pythainlp.soundex.lk82 import lk82
from pythainlp.soundex.metasound import metasound
from pythainlp.soundex.prayut_and_somchaip import prayut_and_somchaip
from pythainlp.soundex.udom83 import udom83
# Other Thai soundex systems (not implemented yet): Arun91, KSS97
# [KSS97] https://linux.thai.net/~thep/soundex/soundex.html
[docs]
def soundex(
text: str, engine: str = DEFAULT_SOUNDEX_ENGINE, length: int = 4
) -> str:
"""
This function converts Thai text into phonetic code.
:param str text: word
:param str engine: soundex engine
:param int length: preferred length of the Soundex code (default is 4)\
for metasound and prayut_and_somchaip only
:return: Soundex code
:rtype: str
:Options for engine:
* *udom83* (default) - Thai soundex algorithm proposed
by Vichit Lorchirachoonkul [#udom83]_
* *lk82* - Thai soundex algorithm proposed by
Wannee Udompanich [#lk82]_
* *metasound* - Thai soundex algorithm based on a combination
of Metaphone and Soundex proposed by Snae & Brückner [#metasound]_
* *prayut_and_somchaip* - Thai-English Cross-Language Transliterated
Word Retrieval using Soundex Technique [#prayut_and_somchaip]_
:Example:
::
from pythainlp.soundex import soundex
soundex("ลัก"), soundex("ลัก", engine='lk82'), \\
soundex("ลัก", engine='metasound')
# output: ('ร100000', 'ร1000', 'ล100')
soundex("รัก"), soundex("รัก", engine='lk82'), \\
soundex("รัก", engine='metasound')
# output: ('ร100000', 'ร1000', 'ร100')
soundex("รักษ์"), soundex("รักษ์", engine='lk82'), \\
soundex("รักษ์", engine='metasound')
# output: ('ร100000', 'ร1000', 'ร100')
soundex("บูรณการ"), soundex("บูรณการ", engine='lk82'), \\
soundex("บูรณการ", engine='metasound')
# output: ('บ931900', 'บE419', 'บ551')
soundex("ปัจจุบัน"), soundex("ปัจจุบัน", engine='lk82'), \\
soundex("ปัจจุบัน", engine='metasound')
# output: ('ป775300', 'ป3E54', 'ป223')
soundex("vp", engine="prayut_and_somchaip")
# output: '11'
soundex("วีพี", engine="prayut_and_somchaip")
# output: '11'
"""
if engine == "lk82":
_soundex = lk82(text)
elif engine == "prayut_and_somchaip":
_soundex = prayut_and_somchaip(text, length=length)
elif engine == "metasound":
_soundex = metasound(text, length=length)
else: # default, use "udom83"
_soundex = udom83(text)
return _soundex