# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
# SPDX-FileType: SOURCE
# SPDX-License-Identifier: Apache-2.0
"""Spell out time as Thai words.
Convert time string or time object to Thai words.
"""
from __future__ import annotations
from datetime import datetime, time
from functools import lru_cache
from pythainlp.tokenize import Tokenizer
from pythainlp.util.numtoword import num_to_thaiword
from pythainlp.util.wordtonum import thaiword_to_num
_TIME_FORMAT_WITH_SEC = "%H:%M:%S"
_TIME_FORMAT_WITHOUT_SEC = "%H:%M"
_DICT_THAI_TIME = {
"ศูนย์": 0,
"หนึ่ง": 1,
"สอง": 2,
"ยี่": 2,
"สาม": 3,
"สี่": 4,
"ห้า": 5,
"หก": 6,
"เจ็ด": 7,
"แปด": 8,
"เก้า": 9,
"สิบ": 10,
"เอ็ด": 1,
# set the value of the time unit
"โมงเช้า": 6, # start counting at 7:00 a.m.
"โมงเย็น": 13,
"บ่าย": 13,
"บ่ายโมง": 13,
"ตี": 0,
"เที่ยงวัน": 12,
"เที่ยงคืน": 0,
"เที่ยง": 12,
"ทุ่ม": 18,
"นาฬิกา": 0,
"ครึ่ง": 30,
}
@lru_cache
def _thai_time_cut():
"""Lazy load Thai time tokenizer with cache"""
return Tokenizer(custom_dict=list(_DICT_THAI_TIME.keys()), engine="newmm")
_THAI_TIME_AFFIX = [
"โมงเช้า",
"บ่ายโมง",
"โมงเย็น",
"โมง",
"นาฬิกา",
"ทุ่ม",
"ตี",
"เที่ยงคืน",
"เที่ยงวัน",
"เที่ยง",
]
def _format_6h(h: int) -> str:
"""Thai time (6-hour clock)."""
text = ""
if h == 0:
text += "เที่ยงคืน"
elif h < 7:
text += "ตี" + num_to_thaiword(h)
elif h < 12:
text += num_to_thaiword(h - 6) + "โมงเช้า"
elif h == 12:
text += "เที่ยง"
elif h < 18:
if h == 13:
text += "บ่ายโมง"
else:
text += "บ่าย" + num_to_thaiword(h - 12) + "โมง"
elif h == 18:
text += "หกโมงเย็น"
else:
text += num_to_thaiword(h - 18) + "ทุ่ม"
return text
def _format_m6h(h: int) -> str:
"""Thai time (modified 6-hour clock)."""
text = ""
if h == 0:
text += "เที่ยงคืน"
elif h < 6:
text += "ตี" + num_to_thaiword(h)
elif h < 12:
text += num_to_thaiword(h) + "โมง"
elif h == 12:
text += "เที่ยง"
elif h < 19:
text += num_to_thaiword(h - 12) + "โมง"
else:
text += num_to_thaiword(h - 18) + "ทุ่ม"
return text
def _format_24h(h: int) -> str:
"""Thai time (24-hour clock)."""
text = num_to_thaiword(h) + "นาฬิกา"
return text
def _format(
h: int,
m: int,
s: int,
fmt: str = "24h",
precision: str | None = None,
) -> str:
text = ""
if fmt == "6h":
text = _format_6h(h)
elif fmt == "m6h":
text = _format_m6h(h)
elif fmt == "24h":
text = _format_24h(h)
else:
raise NotImplementedError(f"Time format not supported: {fmt}")
if precision in ("m", "s"):
if m == 30 and (s == 0 or precision == "m") and (fmt in ("6h", "m6h")):
text += "ครึ่ง"
else:
text += num_to_thaiword(m) + "นาที"
if precision == "s":
text += num_to_thaiword(s) + "วินาที"
else:
if m:
if m == 30 and s == 0 and (fmt in ("6h", "m6h")):
text += "ครึ่ง"
else:
text += num_to_thaiword(m) + "นาที"
if s:
text += num_to_thaiword(s) + "วินาที"
return text
[docs]
def time_to_thaiword(
time_data: time | datetime | str,
fmt: str = "24h",
precision: str | None = None,
) -> str:
"""Spell out time as Thai words.
:param str time_data: time input, can be a datetime.time object \
or a datetime.datetime object \
or a string (in H:M or H:M:S format, using 24-hour clock)
:param str fmt: time output format
* *24h* - 24-hour clock (default)
* *6h* - 6-hour clock
* *m6h* - Modified 6-hour clock
:param str precision: precision of the spell out time
* *m* - always spell out at minute level
* *s* - always spell out at second level
* None - spell out only non-zero parts
:return: Time spelled out as Thai words
:rtype: str
:Example:
::
time_to_thaiword("8:17")
# output:
# แปดนาฬิกาสิบเจ็ดนาที
time_to_thaiword("8:17", "6h")
# output:
# สองโมงเช้าสิบเจ็ดนาที
time_to_thaiword("8:17", "m6h")
# output:
# แปดโมงสิบเจ็ดนาที
time_to_thaiword("18:30", fmt="m6h")
# output:
# หกโมงครึ่ง
time_to_thaiword(datetime.time(12, 3, 0))
# output:
# สิบสองนาฬิกาสามนาที
time_to_thaiword(datetime.time(12, 3, 0), precision="s")
# output:
# สิบสองนาฬิกาสามนาทีศูนย์วินาที
"""
_time = None
if isinstance(time_data, (time, datetime)):
_time = time_data
else:
if not isinstance(time_data, str):
raise TypeError(
"Time input must be a datetime.time object, "
"a datetime.datetime object, or a string."
)
if not time_data:
raise ValueError("Time string cannot be empty.")
try:
_time = datetime.strptime(time_data, _TIME_FORMAT_WITH_SEC)
except ValueError:
try:
_time = datetime.strptime(time_data, _TIME_FORMAT_WITHOUT_SEC)
except ValueError:
pass
if not _time:
raise ValueError(
f"Time string '{time_data}' does not match H:M or H:M:S format."
)
text = _format(_time.hour, _time.minute, _time.second, fmt, precision)
return text
[docs]
def thaiword_to_time(text: str, padding: bool = True) -> str:
"""Convert Thai time in words into time (H:M).
:param str text: Thai time in words
:param bool padding: Zero pad the hour if True
:return: time string
:rtype: str
:Example:
::
thaiword_to_time("บ่ายโมงครึ่ง")
# output:
# 13:30
"""
keys_dict = list(_DICT_THAI_TIME.keys())
text = text.replace("กว่า", "").replace("ๆ", "").replace(" ", "")
_i = ["ตีหนึ่ง", "ตีสอง", "ตีสาม", "ตีสี่", "ตีห้า"]
_time = ""
for affix in _THAI_TIME_AFFIX:
if affix in text and affix != "ตี":
_time = text.replace(affix, affix + "|")
break
elif affix in text and affix == "ตี":
for j in _i:
if j in text:
_time = text.replace(j, j + "|")
break
else:
pass
if "|" not in _time:
raise ValueError("Cannot find any Thai word for time affix.")
_LIST_THAI_TIME = _time.split("|")
del _time
hour = _thai_time_cut().word_tokenize(_LIST_THAI_TIME[0])
minute = _LIST_THAI_TIME[1]
if len(minute) > 1:
minute = _thai_time_cut().word_tokenize(minute)
else:
minute = 0
text = ""
# determine hour
if hour[-1] == "นาฬิกา" and hour[0] in keys_dict and hour[:-1]:
text += str(thaiword_to_num("".join(hour[:-1])))
elif hour[0] == "ตี" and hour[1] in keys_dict:
text += str(_DICT_THAI_TIME[hour[1]])
elif hour[-1] == "โมงเช้า" and hour[0] in keys_dict:
if _DICT_THAI_TIME[hour[0]] < 6:
text += str(_DICT_THAI_TIME[hour[0]] + 6)
else:
text += str(_DICT_THAI_TIME[hour[0]])
elif (hour[-1] == "โมงเย็น" or hour[-1] == "โมง") and hour[0] == "บ่าย":
text += str(_DICT_THAI_TIME[hour[1]] + 12)
elif (hour[-1] == "โมงเย็น" or hour[-1] == "โมง") and hour[0] in keys_dict:
text += str(_DICT_THAI_TIME[hour[0]] + 12)
elif hour[-1] == "เที่ยงคืน":
text += "0"
elif hour[-1] == "เที่ยงวัน" or hour[-1] == "เที่ยง":
text += "12"
elif hour[0] == "บ่ายโมง":
text += "13"
elif hour[-1] == "ทุ่ม":
if len(hour) == 1:
text += "19"
else:
text += str(_DICT_THAI_TIME[hour[0]] + 18)
if not text:
raise ValueError("Cannot find any Thai word for hour.")
if padding and len(text) == 1:
text = "0" + text
text += ":"
# determine minute
if minute:
n = 0
for affix in minute:
if affix in keys_dict:
if affix != "สิบ":
n += _DICT_THAI_TIME[affix]
elif affix == "สิบ" and n != 0:
n *= 10
elif affix == "สิบ" and n == 0:
n += 10
if n != 0 and n > 9:
text += str(n)
else:
text += "0" + str(n)
else:
text += "00"
return text