Interactive online version: Binder badge Google Colab badge

ค้นหาคำที่มีเสียงคล้องจองภาษาไทยด้วย PyThaiNLP

PyThaiNLP เราได้จัดทำฟังก์ชันสำหรับหาคำที่มีเสียงคล้องจองในพจนานุกรมภาษาไทย

[15]:
!pip install --pre pythainlp
!pip install python-crfsuite
Requirement already satisfied: pythainlp in /usr/local/lib/python3.10/dist-packages (4.1.0b4)
Requirement already satisfied: requests>=2.22.0 in /usr/local/lib/python3.10/dist-packages (from pythainlp) (2.31.0)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (3.2.0)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (2.0.4)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (2023.7.22)
Collecting python-crfsuite
  Downloading python_crfsuite-0.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (993 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 993.5/993.5 kB 6.6 MB/s eta 0:00:00
Installing collected packages: python-crfsuite
Successfully installed python-crfsuite-0.9.9
[16]:
from pythainlp.corpus import thai_words
from pythainlp.tokenize import syllable_tokenize
[17]:
all_thai_words_dict = [i for i in list(thai_words()) if len(syllable_tokenize(i))==1]
[18]:
from pythainlp.khavee import KhaveeVerifier
kv = KhaveeVerifier()
[19]:
all_thai_words_dict[0]
[19]:
'เทอญ'
[21]:
word="จีบ" # หาคำที่มีเสียงคล้องจอง ของ คำว่า "จีีบ"
list_sumpus=[]
for i in all_thai_words_dict:
  try:
    if kv.is_sumpus(word,i) and i!=word:
      list_sumpus.append(i)
  except:
    pass
print(list_sumpus)
['กลีบ', 'อีฟ', 'ถีบ', 'รีฟ', 'ตีบ', 'ชีพ', 'หลีบ', 'บีบ', 'ตี้บ', 'ลีบ', 'ทวีป', 'งีบ', 'หีบ', 'คีบ', 'ปี๊บ', 'หนีบ', 'รีบ', 'ทีป', 'จี๊ป', 'ปีบ', 'ครีบ', 'กีบ']
[ ]: