# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: 2016-2025 PyThaiNLP Project
# SPDX-FileType: SOURCE
# SPDX-License-Identifier: Apache-2.0
"""
Check if it is Thai text
"""
import string
from typing import Tuple
from collections import defaultdict
from pythainlp import (
    thai_above_vowels,
    thai_below_vowels,
    thai_consonants,
    thai_digits,
    thai_follow_vowels,
    thai_lead_vowels,
    thai_punctuations,
    thai_signs,
    thai_tonemarks,
    thai_vowels,
)
_DEFAULT_IGNORE_CHARS = string.whitespace + string.digits + string.punctuation
_TH_FIRST_CHAR_ASCII = 3584
_TH_LAST_CHAR_ASCII = 3711
# A comprehensive map of Thai characters to their descriptive names.
THAI_CHAR_NAMES = {
    # Consonants
    **{char: char for char in thai_consonants},
    # Vowels and Signs
    "\u0e24": "ฤ",
    "\u0e26": "ฦ",
    "\u0e30": "สระ อะ",
    "\u0e31": "ไม้หันอากาศ",
    "\u0e32": "สระ อา",
    "\u0e33": "สระ อำ",
    "\u0e34": "สระ อิ",
    "\u0e35": "สระ อี",
    "\u0e36": "สระ อึ",
    "\u0e37": "สระ อือ",
    "\u0e38": "สระ อุ",
    "\u0e39": "สระ อู",
    "\u0e40": "สระ เอ",
    "\u0e41": "สระ แอ",
    "\u0e42": "สระ โอ",
    "\u0e43": "สระ ใอ",
    "\u0e44": "สระ ไอ",
    "\u0e45": "ไม้ม้วน",
    "\u0e4d": "นฤคหิต",
    "\u0e47": "ไม้ไต่คู้",
    # Tone Marks
    "\u0e48": "ไม้เอก",
    "\u0e49": "ไม้โท",
    "\u0e4a": "ไม้ตรี",
    "\u0e4b": "ไม้จัตวา",
    # Other Signs
    "\u0e2f": "ไปยาลน้อย",
    "\u0e3a": "พินทุ",
    "\u0e46": "ไม้ยมก",
    "\u0e4c": "การันต์",
    "\u0e4e": "ยามักการ",
    # Punctuation
    "\u0e4f": "ฟองมัน",
    "\u0e5a": "อังคั่นคู่",
    "\u0e5b": "โคมุต",
    # Digits
    **{char: char for char in thai_digits},
    # Symbol
    "\u0e3f": "฿",
}
[docs]
def isthaichar(ch: str) -> bool:
    """Check if a character is a Thai character.
    :param ch: input character
    :type ch: str
    :return: True if ch is a Thai character, otherwise False.
    :rtype: bool
    :Example:
    ::
        from pythainlp.util import isthaichar
        isthaichar("ก")  # THAI CHARACTER KO KAI
        # output: True
        isthaichar("๕")  # THAI DIGIT FIVE
        # output: True
    """
    ch_val = ord(ch)
    if _TH_FIRST_CHAR_ASCII <= ch_val <= _TH_LAST_CHAR_ASCII:
        return True
    return False 
[docs]
def isthai(text: str, ignore_chars: str = ".") -> bool:
    """Check if every character in a string is a Thai character.
    :param text: input text
    :type text: str
    :param ignore_chars: characters to be ignored, defaults to "."
    :type ignore_chars: str, optional
    :return: True if every character in the input string is Thai,
             otherwise False.
    :rtype: bool
    :Example:
    ::
        from pythainlp.util import isthai
        isthai("กาลเวลา")
        # output: True
        isthai("กาลเวลา.")
        # output: True
        isthai("กาล-เวลา")
        # output: False
        isthai("กาล-เวลา +66", ignore_chars="01234567890+-.,")
        # output: True
    """
    if not ignore_chars:
        ignore_chars = ""
    for ch in text:
        if ch not in ignore_chars and not isthaichar(ch):
            return False
    return True 
[docs]
def countthai(text: str, ignore_chars: str = _DEFAULT_IGNORE_CHARS) -> float:
    """Find proportion of Thai characters in a given text
    :param text: input text
    :type text: str
    :param ignore_chars: characters to be ignored, defaults to whitespace,\\
        digits, and punctuation marks.
    :type ignore_chars: str, optional
    :return: proportion of Thai characters in the text (percentage)
    :rtype: float
    :Example:
    ::
        from pythainlp.util import countthai
        countthai("ไทยเอ็นแอลพี 3.0")
        # output: 100.0
        countthai("PyThaiNLP 3.0")
        # output: 0.0
        countthai("ใช้งาน PyThaiNLP 3.0")
        # output: 40.0
        countthai("ใช้งาน PyThaiNLP 3.0", ignore_chars="")
        # output: 30.0
    """
    if not text or not isinstance(text, str):
        return 0.0
    if not ignore_chars:
        ignore_chars = ""
    num_thai = 0
    num_ignore = 0
    for ch in text:
        if ch in ignore_chars:
            num_ignore += 1
        elif isthaichar(ch):
            num_thai += 1
    num_count = len(text) - num_ignore
    if num_count == 0:
        return 0.0
    return (num_thai / num_count) * 100 
[docs]
def display_thai_char(ch: str) -> str:
    """Prefix an underscore (_) to a high-position vowel or a tone mark,
    to ease readability.
    :param ch: input character
    :type ch: str
    :return: "_" + ch
    :rtype: str
    :Example:
    ::
        from pythainlp.util import display_thai_char
        display_thai_char("้")
        # output: "_้"
    """
    if (
        ch in thai_above_vowels
        or ch in thai_tonemarks
        or ch in "\u0e33\u0e4c\u0e4d\u0e4e"
    ):
        # last condition is Sra Aum, Thanthakhat, Nikhahit, Yamakkan
        return "_" + ch
    else:
        return ch 
[docs]
def thai_word_tone_detector(word: str) -> Tuple[str, str]:
    """
    Thai tone detector for word.
    It uses pythainlp.transliterate.pronunciate for converting word to\
        pronunciation.
    :param str word: Thai word.
    :return: Thai pronunciation with tones in each syllable.\
        (l, m, h, r, f or empty if it cannot be detected)
    :rtype: Tuple[str, str]
    :Example:
    ::
        from pythainlp.util import thai_word_tone_detector
        print(thai_word_tone_detector("คนดี"))
        # output: [('คน', 'm'), ('ดี', 'm')]
        print(thai_word_tone_detector("มือถือ"))
        # output: [('มือ', 'm'), ('ถือ', 'r')]
    """
    from ..transliterate import pronunciate
    from ..util.syllable import tone_detector
    _pronunciate = pronunciate(word).split("-")
    return [(i, tone_detector(i.replace("หฺ", "ห"))) for i in _pronunciate] 
[docs]
def count_thai_chars(text: str) -> dict:
    """
    Count Thai characters by type
    This function will give you numbers of Thai characters by type\
        (consonants, vowels, lead_vowels, follow_vowels, above_vowels,\
        below_vowels, tonemarks, signs, thai_digits, punctuations, non_thai)
    :param str text: Text
    :return: Dict with numbers of Thai characters by type
    :rtype: dict
    :Example:
    ::
        from pythainlp.util import count_thai_chars
        count_thai_chars("ทดสอบภาษาไทย")
        # output: {
        # 'vowels': 3,
        # 'lead_vowels': 1,
        # 'follow_vowels': 2,
        # 'above_vowels': 0,
        # 'below_vowels': 0,
        # 'consonants': 9,
        # 'tonemarks': 0,
        # 'signs': 0,
        # 'thai_digits': 0,
        # 'punctuations': 0,
        # 'non_thai': 0
        # }
    """
    _dict = {
        "vowels": 0,
        "lead_vowels": 0,
        "follow_vowels": 0,
        "above_vowels": 0,
        "below_vowels": 0,
        "consonants": 0,
        "tonemarks": 0,
        "signs": 0,
        "thai_digits": 0,
        "punctuations": 0,
        "non_thai": 0,
    }
    for c in text:
        if c in thai_vowels:
            _dict["vowels"] += 1
        if c in thai_lead_vowels:
            _dict["lead_vowels"] += 1
        elif c in thai_follow_vowels:
            _dict["follow_vowels"] += 1
        elif c in thai_above_vowels:
            _dict["above_vowels"] += 1
        elif c in thai_below_vowels:
            _dict["below_vowels"] += 1
        elif c in thai_consonants:
            _dict["consonants"] += 1
        elif c in thai_tonemarks:
            _dict["tonemarks"] += 1
        elif c in thai_signs:
            _dict["signs"] += 1
        elif c in thai_digits:
            _dict["thai_digits"] += 1
        elif c in thai_punctuations:
            _dict["punctuations"] += 1
        else:
            _dict["non_thai"] += 1
    return _dict 
[docs]
def analyze_thai_text(text: str) -> dict:
    """
    Analyzes a string of Thai text and returns a dictionaries,
    where each values represents a single classified character from the text.
    The function processes the text character by character and maps each Thai
    character to its descriptive name or itself (for consonants and digits).
    :param str text: The Thai text string to be analyzed.
    :rtype: dict
    :return: A dictionaries, with each item containing
                    a single character and a count of 1.
    Examples:
        >>> analyze_thai_text("คนดี")
        {'ค': 1, 'น': 1, 'ด': 1, 'สระ อี': 1}
        >>> analyze_thai_text("เล่น")
        {'สระ เอ': 1, 'ล': 1, 'ไม้เอก': 1, 'น': 1}
    """
    results = defaultdict(int)
    # Iterate over each character in the input string
    for char in text:
        # Check if the character is in our mapping
        if char in THAI_CHAR_NAMES:
            name = THAI_CHAR_NAMES[char]
            results[name]+=1
        else:
            # If the character is not a known Thai character, classify it as character
            results[char]+=1
    return dict(results)