# -*- coding: utf-8 -*-
from pythainlp.corpus import get_corpus
__all__ = [
"countries",
"provinces",
"thai_negations",
"thai_stopwords",
"thai_syllables",
"thai_words",
]
_THAI_COUNTRIES = set()
_THAI_COUNTRIES_FILENAME = "countries_th.txt"
_THAI_THAILAND_PROVINCES = set()
_THAI_THAILAND_PROVINCES_FILENAME = "thailand_provinces_th.txt"
_THAI_SYLLABLES = set()
_THAI_SYLLABLES_FILENAME = "syllables_th.txt"
_THAI_WORDS = set()
_THAI_WORDS_FILENAME = "words_th.txt"
_THAI_STOPWORDS = set()
_THAI_STOPWORDS_FILENAME = "stopwords_th.txt"
_THAI_NEGATIONS = set()
_THAI_NEGATIONS_FILENAME = "negations_th.txt"
[docs]def countries() -> frozenset:
"""
Return a frozenset of country names in Thai
"""
global _THAI_COUNTRIES
if not _THAI_COUNTRIES:
_THAI_COUNTRIES = get_corpus(_THAI_COUNTRIES_FILENAME)
return _THAI_COUNTRIES
[docs]def provinces() -> frozenset:
"""
Return a frozenset of Thailand province names in Thai
"""
global _THAI_THAILAND_PROVINCES
if not _THAI_THAILAND_PROVINCES:
_THAI_THAILAND_PROVINCES = get_corpus(_THAI_THAILAND_PROVINCES_FILENAME)
return _THAI_THAILAND_PROVINCES
[docs]def thai_syllables() -> frozenset:
"""
Return a frozenset of Thai syllables
"""
global _THAI_SYLLABLES
if not _THAI_SYLLABLES:
_THAI_SYLLABLES = get_corpus(_THAI_SYLLABLES_FILENAME)
return _THAI_SYLLABLES
[docs]def thai_words() -> frozenset:
"""
Return a frozenset of Thai words
"""
global _THAI_WORDS
if not _THAI_WORDS:
_THAI_WORDS = get_corpus(_THAI_WORDS_FILENAME)
return _THAI_WORDS
[docs]def thai_stopwords() -> frozenset:
"""
Return a frozenset of Thai stopwords
"""
global _THAI_STOPWORDS
if not _THAI_STOPWORDS:
_THAI_STOPWORDS = get_corpus(_THAI_STOPWORDS_FILENAME)
return _THAI_STOPWORDS
[docs]def thai_negations() -> frozenset:
"""
Return a frozenset of Thai negation words
"""
global _THAI_NEGATIONS
if not _THAI_NEGATIONS:
_THAI_NEGATIONS = get_corpus(_THAI_NEGATIONS_FILENAME)
return _THAI_NEGATIONS