# SPDX-FileCopyrightText: 2016-2026 PyThaiNLP Project
# SPDX-FileType: SOURCE
# SPDX-License-Identifier: Apache-2.0
"""Thai braille conversion core functionality."""
from __future__ import annotations
import re
from pythainlp.tokenize import word_tokenize
from pythainlp.util import Trie
# Thai character to braille pattern mapping
# Braille patterns are represented as dot numbers (1-8)
# Following international braille standards
thai_braille_mapping_dict: dict[str, list[str]] = {
"ก": ["1245"],
"ข": ["13"],
"ฃ": ["356", "13"],
"ค": ["136"],
"ฅ": ["36", "136"],
"ฆ": ["6", "136"],
"ง": ["12456"],
"จ": ["245"],
"ฉ": ["34"],
"ช": ["346"],
"ซ": ["2346"],
"ฌ": ["6", "346"],
"ญ": ["6", "13456"],
"ฎ": ["6", "145"],
"ฏ": ["6", "1256"],
"ฐ": ["6", "2345"],
"ฑ": ["6", "23456"],
"ฒ": ["36", "23456"],
"ณ": ["6", "1345"],
"ด": ["145"],
"ต": ["1256"],
"ถ": ["2345"],
"ท": ["23456"],
"ธ": ["356", "23456"],
"น": ["1345"],
"บ": ["1236"],
"ป": ["12346"],
"ผ": ["1234"],
"ฝ": ["1346"],
"พ": ["1456"],
"ฟ": ["1246"],
"ภ": ["6", "1456"],
"ม": ["134"],
"ย": ["13456"],
"ร": ["1235"],
"ล": ["123"],
"ว": ["2456"],
"ศ": ["6", "234"],
"ษ": ["36", "234"],
"ส": ["234"],
"ห": ["125"],
"ฬ": ["6", "123"],
"อ": ["135"],
"ฮ": ["123456"],
"ฤ": ["1235", "2"],
"ฦ": ["123", "2"],
"<N>": ["3456"], # Number prefix (used by replace_number function)
"1": ["1"],
"2": ["12"],
"3": ["14"],
"4": ["145"],
"5": ["15"],
"6": ["124"],
"7": ["1245"],
"8": ["125"],
"9": ["24"],
"0": ["245"],
"๑": ["1"],
"๒": ["12"],
"๓": ["14"],
"๔": ["145"],
"๕": ["15"],
"๖": ["124"],
"๗": ["1245"],
"๘": ["125"],
"๙": ["24"],
"๐": ["245"],
"ะ": ["1"],
"า": ["16"],
"ิ": ["12"],
"ี": ["23"],
"ุ": ["14"],
"ู": ["25"],
"ึ": ["246"],
"ื": ["26"],
"เ": ["124"],
"โ": ["24"],
"ั": ["345"],
"ำ": ["1356"],
"แ": ["126"],
"ไ": ["156"],
"ใ": ["156", "2"],
"่": ["35"],
"้": ["256"],
"๊": ["2356"],
"๋": ["236"],
"์": ["356"],
"ๆ": ["2"],
" ": ["-1"],
"ฯ": ["56", "23"], # Thai abbreviation mark
"ฯลฯ": ["56", "123"], # Thai abbreviation (special multi-character case)
".": ["456", "256"],
"@": ["1", "1"],
"?": ["456", "236"],
"!": ["456", "235"],
";": ["456", "23"],
":": ["456", "25"],
"/": ["456", "34"],
"\\": ["456", "16"],
"-": ["36"],
"=": ["56", "2356"],
"%": ["4", "356"],
'"': ["5", "2"],
"(": ["5", "126"],
")": ["5", "345"],
}
# Special vowel patterns for Thai braille
_dict_2: dict[str, list[str]] = {
"เ-อ": ["146"],
"เ-ีย": ["12356"],
"เ-ือ": ["12345"],
"-ัว": ["15"],
"เ-า": ["235"],
"เ-าะ": ["135", "1"],
}
# Merge special vowel patterns into main mapping
thai_braille_mapping_dict = {**thai_braille_mapping_dict, **_dict_2}
# Template patterns for vowel matching
_v1: list[str] = ["เ-tอ", "เ-ีtย", "เ-ืtอ", "-ัtว", "เ-tา", "เ-tาะ"]
# Create trie for efficient pattern matching
char_trie = Trie(list(thai_braille_mapping_dict.keys()) + _v1 + [" ", "<N>"])
# Build vowel replacement patterns
_vowel_patterns: list[str] = [
i.replace("-", "([ก-ฮ])").replace("t", "([่้๊๋])") + ",\\1" + i.replace("t", "") + "\\2"
for i in _v1
]
_vowel_patterns += [
i.replace("-", "([ก-ฮ])") + ",\\1" + i for i in _dict_2.keys()
]
_VOWELS: list[tuple[str, str]] = [
(x.split(",")[0], x.split(",")[1]) for x in _vowel_patterns
]
def replace_number(word: str) -> str:
"""Add number prefix if word starts with a digit.
:param str word: Word to check
:return: Word with number prefix if applicable
:rtype: str
"""
if word and word[0] in "1234567890๐๑๒๓๔๕๖๗๘๙":
return "<N>" + word
return word
def _replace_vowels(word: str) -> str:
"""Replace complex Thai vowel patterns for braille conversion.
:param str word: Word containing Thai vowels
:return: Word with vowels replaced for braille processing
:rtype: str
"""
for pattern, replacement in _VOWELS:
word = re.sub(pattern, replacement, word)
return word
[docs]
def thai_word_braille(word: str) -> str:
"""Convert a Thai word to braille representation.
:param str word: Thai word to convert
:return: Braille representation of the word
:rtype: str
:Example:
::
from pythainlp.braille import thai_word_braille
thai_word_braille("กก")
# Output: '⠛⠛'
thai_word_braille("น้ำ")
# Output: '⠝⠲⠵'
"""
if not word:
return ""
word = _replace_vowels(word)
word = replace_number(word)
_temp: list[list[str]] = []
for token in word_tokenize(word, custom_dict=char_trie, engine="mm"):
if token.isspace() and len(token) > 1:
# Handle multiple spaces by converting each space individually
for char in token:
if char in thai_braille_mapping_dict:
_temp.append(thai_braille_mapping_dict[char])
elif token in thai_braille_mapping_dict:
_temp.append(thai_braille_mapping_dict[token])
if not _temp:
return ""
braille_obj = Braille(_temp)
return braille_obj.tobraille()
[docs]
def thai_text_braille(text: str) -> list[str]:
"""Convert Thai text to braille representation by word.
:param str text: Thai text to convert
:return: List of braille representations for each word
:rtype: list[str]
:Example:
::
from pythainlp.braille import thai_text_braille
thai_text_braille("สวัสดี ครับ")
# Output: ['⠎⠺⢂⠎⠤⠙⢒', '⠅⠗⢢⠃']
"""
_list_braille: list[str] = []
for token in word_tokenize(text):
_list_braille.append(thai_word_braille(token))
return _list_braille
[docs]
class Braille:
"""Braille pattern converter.
Converts dot number patterns to Unicode braille characters.
"""
[docs]
def __init__(self, data: list[list[str]] | list[str] | str) -> None:
"""Initialize Braille converter.
:param data: Braille dot patterns as list or string
:type data: list[list[str]] | list[str] | str
"""
self.inputdata: list[list[str]] | list[str] | str = data
if isinstance(data, list):
if len(data) > 1:
self.data: list[list[str]] | list[str] = [""] * len(data)
for i in range(len(data)):
self.data[i] = sorted(list(data[i]))
elif len(data) == 1:
self.data = sorted(list(data[0]))
else:
self.data = []
else:
self.data = sorted(list(data)) if data else []
# International standard Braille mapping
# Dots 1,2,3 = left column (top, middle, bottom)
# Dots 4,5,6 = right column (top, middle, bottom)
# Dots 7,8 = bottom row (left, right) for 8-dot Braille
self.db: dict[str, str] = {
"-1": " ",
"0": "⠀",
"1": "⠁",
"12": "⠃",
"123": "⠇",
"1234": "⠏",
"12345": "⠟",
"123456": "⠿",
"1234567": "⡿",
"12345678": "⣿",
"1234568": "⢿",
"123457": "⡟",
"1234578": "⣟",
"123458": "⢟",
"12346": "⠯",
"123467": "⡯",
"1234678": "⣯",
"123468": "⢯",
"12347": "⡏",
"123478": "⣏",
"12348": "⢏",
"1235": "⠗",
"12356": "⠷",
"123567": "⡷",
"1235678": "⣷",
"123568": "⢷",
"12357": "⡗",
"123578": "⣗",
"12358": "⢗",
"1236": "⠧",
"12367": "⡧",
"123678": "⣧",
"12368": "⢧",
"1237": "⡇",
"12378": "⣇",
"1238": "⢇",
"124": "⠋",
"1245": "⠛",
"12456": "⠻",
"124567": "⡻",
"1245678": "⣻",
"124568": "⢻",
"12457": "⡛",
"124578": "⣛",
"12458": "⢛",
"1246": "⠫",
"12467": "⡫",
"124678": "⣫",
"12468": "⢫",
"1247": "⡋",
"12478": "⣋",
"1248": "⢋",
"125": "⠓",
"1256": "⠳",
"12567": "⡳",
"125678": "⣳",
"12568": "⢳",
"1257": "⡓",
"12578": "⣓",
"1258": "⢓",
"126": "⠣",
"1267": "⡣",
"12678": "⣣",
"1268": "⢣",
"127": "⡃",
"1278": "⣃",
"128": "⢃",
"13": "⠅",
"134": "⠍",
"1345": "⠝",
"13456": "⠽",
"134567": "⡽",
"1345678": "⣽",
"134568": "⢽",
"13457": "⡝",
"134578": "⣝",
"13458": "⢝",
"1346": "⠭",
"13467": "⡭",
"134678": "⣭",
"13468": "⢭",
"1347": "⡍",
"13478": "⣍",
"1348": "⢍",
"135": "⠕",
"1356": "⠵",
"13567": "⡵",
"135678": "⣵",
"13568": "⢵",
"1357": "⡕",
"13578": "⣕",
"1358": "⢕",
"136": "⠥",
"1367": "⡥",
"13678": "⣥",
"1368": "⢥",
"137": "⡅",
"1378": "⣅",
"138": "⢅",
"14": "⠉",
"145": "⠙",
"1456": "⠹",
"14567": "⡹",
"145678": "⣹",
"14568": "⢹",
"1457": "⡙",
"14578": "⣙",
"1458": "⢙",
"146": "⠩",
"1467": "⡩",
"14678": "⣩",
"1468": "⢩",
"147": "⡉",
"1478": "⣉",
"148": "⢉",
"15": "⠑",
"156": "⠱",
"1567": "⡱",
"15678": "⣱",
"1568": "⢱",
"157": "⡑",
"1578": "⣑",
"158": "⢑",
"16": "⠡",
"167": "⡡",
"1678": "⣡",
"168": "⢡",
"17": "⡁",
"178": "⣁",
"18": "⢁",
"2": "⠂",
"23": "⠆",
"234": "⠎",
"2345": "⠞",
"23456": "⠾",
"234567": "⡾",
"2345678": "⣾",
"234568": "⢾",
"23457": "⡞",
"234578": "⣞",
"23458": "⢞",
"2346": "⠮",
"23467": "⡮",
"234678": "⣮",
"23468": "⢮",
"2347": "⡎",
"23478": "⣎",
"2348": "⢎",
"235": "⠖",
"2356": "⠶",
"23567": "⡶",
"235678": "⣶",
"23568": "⢶",
"2357": "⡖",
"23578": "⣖",
"2358": "⢖",
"236": "⠦",
"2367": "⡦",
"23678": "⣦",
"2368": "⢦",
"237": "⡆",
"2378": "⣆",
"238": "⢆",
"24": "⠊",
"245": "⠚",
"2456": "⠺",
"24567": "⡺",
"245678": "⣺",
"24568": "⢺",
"2457": "⡚",
"24578": "⣚",
"2458": "⢚",
"246": "⠪",
"2467": "⡪",
"24678": "⣪",
"2468": "⢪",
"247": "⡊",
"2478": "⣊",
"248": "⢊",
"25": "⠒",
"256": "⠲",
"2567": "⡲",
"25678": "⣲",
"2568": "⢲",
"257": "⡒",
"2578": "⣒",
"258": "⢒",
"26": "⠢",
"267": "⡢",
"2678": "⣢",
"268": "⢢",
"27": "⡂",
"278": "⣂",
"28": "⢂",
"3": "⠄",
"34": "⠌",
"345": "⠜",
"3456": "⠼",
"34567": "⡼",
"345678": "⣼",
"34568": "⢼",
"3457": "⡜",
"34578": "⣜",
"3458": "⢜",
"346": "⠬",
"3467": "⡬",
"34678": "⣬",
"3468": "⢬",
"347": "⡌",
"3478": "⣌",
"348": "⢌",
"35": "⠔",
"356": "⠴",
"3567": "⡴",
"35678": "⣴",
"3568": "⢴",
"357": "⡔",
"3578": "⣔",
"358": "⢔",
"36": "⠤",
"367": "⡤",
"3678": "⣤",
"368": "⢤",
"37": "⡄",
"378": "⣄",
"38": "⢄",
"4": "⠈",
"45": "⠘",
"456": "⠸",
"4567": "⡸",
"45678": "⣸",
"4568": "⢸",
"457": "⡘",
"4578": "⣘",
"458": "⢘",
"46": "⠨",
"467": "⡨",
"4678": "⣨",
"468": "⢨",
"47": "⡈",
"478": "⣈",
"48": "⢈",
"5": "⠐",
"56": "⠰",
"567": "⡰",
"5678": "⣰",
"568": "⢰",
"57": "⡐",
"578": "⣐",
"58": "⢐",
"6": "⠠",
"67": "⡠",
"678": "⣠",
"68": "⢠",
"7": "⡀",
"78": "⣀",
"8": "⢀",
}
[docs]
def tobraille(self) -> str:
"""Convert dot patterns to braille Unicode characters.
:return: Unicode braille representation
:rtype: str
"""
if not self.data:
return ""
if len(self.data) > 1 and isinstance(self.inputdata, list):
result = ""
for pattern in self.data:
pattern_str = "".join(str("".join(pattern)))
if pattern_str in self.db:
result += self.db[pattern_str]
return result
else:
pattern_str = "".join(self.data)
return self.db.get(pattern_str, "")
[docs]
def printbraille(self) -> str:
"""Mirror dot patterns for physical braille printing.
International standard: swap 1↔4, 2↔5, 3↔6, 7↔8
:return: Mirrored braille for printing
:rtype: str
"""
mirror_map: dict[str, str] = {
"1": "4",
"2": "5",
"3": "6",
"4": "1",
"5": "2",
"6": "3",
"7": "8",
"8": "7",
}
if len(self.data) > 1 and isinstance(self.inputdata, list):
mirrored_patterns: list[str] = []
for pattern in self.data:
mirrored = "".join(mirror_map[dot] for dot in pattern)
mirrored_sorted = "".join(sorted(mirrored))
mirrored_patterns.append(self.db[mirrored_sorted])
mirrored_patterns.reverse()
return "".join(mirrored_patterns)
else:
mirrored = "".join(mirror_map[dot] for dot in self.data)
mirrored_sorted = "".join(sorted(mirrored))
return self.db[mirrored_sorted]