# -*- coding: utf-8 -*-
"""
Check if it is Thai text
"""
import string
from pythainlp import thai_above_vowels, thai_tonemarks
_DEFAULT_IGNORE_CHARS = string.whitespace + string.digits + string.punctuation
_TH_FIRST_CHAR_ASCII = 3584
_TH_LAST_CHAR_ASCII = 3711
[docs]def isthaichar(ch: str) -> bool:
    """
    This function checks if the input character is a Thai character.
    :param str ch: input character
    :return: returns **True** if the input character is a Thai characttr,
             otherwise returns **False**
    :rtype: bool
    :Example:
    ::
        from pythainlp.util import isthaichar
        isthaichar("ก") # THAI CHARACTER KO KAI
        # output: True
        isthaichar("๐") # THAI DIGIT ZERO
        # output: True
        isthaichar("๕") # THAI DIGIT FIVE
        # output: True
    """
    ch_val = ord(ch)
    if ch_val >= _TH_FIRST_CHAR_ASCII and ch_val <= _TH_LAST_CHAR_ASCII:
        return True
    return False 
[docs]def isthai(word: str, ignore_chars: str = ".") -> bool:
    """
    This function checks if all character in the input string
    are Thai character.
    :param str word: input text
    :param str ignore_chars: string characters to be ignored
                             (i.e. will be considered as Thai)
    :return: returns **True** if the input text all contains Thai characters,
             otherwise returns **False**
    :rtype: bool
    :Example:
    Check if all character is Thai character. By default,
    it ignores only full stop (".")::
        from pythainlp.util import isthai
        isthai("กาลเวลา")
        # output: True
        isthai("กาลเวลา.")
        # output: True
    Explicitly ignore digits, whitespace, and the following characters
    ("-", ".", "$", ",")::
        from pythainlp.util import isthai
        isthai("กาลเวลา, การเวลา-ก,  3.75$", ignore_chars="1234567890.-,$ ")
        # output: True
    """
    if not ignore_chars:
        ignore_chars = ""
    for ch in word:
        if ch not in ignore_chars and not isthaichar(ch):
            return False
    return True 
[docs]def countthai(text: str, ignore_chars: str = _DEFAULT_IGNORE_CHARS) -> float:
    """
    This function calculates percentage of Thai characters in the text
    with an option to ignored some characters.
    :param str text: input text
    :param str ignore_chars: string of characters to ignore from counting.
                             By default, the ignored characters are whitespace,
                             newline, digits, and punctuation.
    :return: percentage of Thai characters in the text
    :rtype: float
    :Example:
    Find the percentage of Thai characters in the textt with default
    ignored characters set (whitespace, newline character,
    punctuation and digits)::
        from pythainlp.util import countthai
        countthai("ดอนัลด์ จอห์น ทรัมป์ English: Donald John Trump")
        # output: 45.0
        countthai("(English: Donald John Trump)")
        # output: 0.0
    Find the percentage of Thai characters in the text while ignoring
    only punctuation but not whitespace, newline character and digits::
        import string
        string.punctuation
        # output: !"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~
        countthai("ดอนัลด์ จอห์น ทรัมป์ English: Donald John Trump", \\
            ignore_chars=string.punctuation)
        # output: 39.130434782608695
        countthai("ดอนัลด์ จอห์น ทรัมป์ (English: Donald John Trump)", \\
            ignore_chars=string.punctuation)
        # output: 0.0
    """
    if not text or not isinstance(text, str):
        return 0.0
    if not ignore_chars:
        ignore_chars = ""
    num_thai = 0
    num_ignore = 0
    for ch in text:
        if ch in ignore_chars:
            num_ignore += 1
        elif isthaichar(ch):
            num_thai += 1
    num_count = len(text) - num_ignore
    if num_count == 0:
        return 0.0
    return (num_thai / num_count) * 100 
[docs]def display_thai_char(char: str) -> str:
    """
    This function adds a underscore (_) prefix to high-position vowels and tone
    marks to ease readability
    :param str character:
    :return: returns **True** if the input text all contains Thai characters,
             otherwise returns **False**
    :rtype: bool
    :Example:
    display_thai_char("้")
    # output: "_้"
    """
    if char in thai_above_vowels or char in thai_tonemarks \
       
or char in '\u0e33\u0e4c\u0e4d\u0e4e':
        # last condition is Sra Aum, Thanthakhat, Nikhahit, Yamakkan
        return "_" + char
    else:
        return char