Source code for pythainlp.parse.core

# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
# SPDX-License-Identifier: Apache-2.0

from typing import List, Union

_tagger = None
_tagger_name = ""


[docs] def dependency_parsing( text: str, model: Union[str, None] = None, tag: str = "str", engine: str = "esupar", ) -> Union[List[List[str]], str]: """ Dependency Parsing :param str text: text to apply dependency parsing to :param str model: model for using with engine \ (for esupar and transformers_ud) :param str tag: output type (str or list) :param str engine: the name of dependency parser :return: str (conllu) or List :rtype: Union[List[List[str]], str] **Options for engine** * *esupar* (default) - Tokenizer, POS tagger and Dependency parser \ using BERT/RoBERTa/DeBERTa models. `GitHub \ <https://github.com/KoichiYasuoka/esupar>`_ * *spacy_thai* - Tokenizer, POS tagger, and dependency parser \ for the Thai language, using Universal Dependencies. \ `GitHub <https://github.com/KoichiYasuoka/spacy-thai>`_ * *transformers_ud* - TransformersUD \ `GitHub <https://github.com/KoichiYasuoka/>`_ * *ud_goeswith* - POS tagging and dependency parsing \ using `goeswith` for subwords **Options for model (esupar engine)** * *th* (default) - KoichiYasuoka/roberta-base-thai-spm-upos model \ `Huggingface \ <https://huggingface.co/KoichiYasuoka/roberta-base-thai-spm-upos>`_ * *KoichiYasuoka/deberta-base-thai-upos* - DeBERTa(V2) model \ pre-trained on Thai Wikipedia texts for POS tagging and \ dependency parsing `Huggingface \ <https://huggingface.co/KoichiYasuoka/deberta-base-thai-upos>`_ * *KoichiYasuoka/roberta-base-thai-syllable-upos* - RoBERTa model \ pre-trained on Thai Wikipedia texts for POS tagging and \ dependency parsing. (syllable level) `Huggingface \ <https://huggingface.co/KoichiYasuoka/roberta-base-thai-syllable-upos>`_ * *KoichiYasuoka/roberta-base-thai-char-upos* - RoBERTa model \ pre-trained on Thai Wikipedia texts for POS tagging \ and dependency parsing. (char level) `Huggingface \ <https://huggingface.co/KoichiYasuoka/roberta-base-thai-char-upos>`_ If you want to train models for esupar, you can read \ `Huggingface <https://github.com/KoichiYasuoka/esupar>`_ **Options for model (transformers_ud engine)** * *KoichiYasuoka/deberta-base-thai-ud-head* (default) - \ DeBERTa(V2) model pretrained on Thai Wikipedia texts \ for dependency parsing (head-detection using Universal \ Dependencies) and question-answering, derived from \ deberta-base-thai. \ trained by th_blackboard.conll. `Huggingface \ <https://huggingface.co/KoichiYasuoka/deberta-base-thai-ud-head>`_ * *KoichiYasuoka/roberta-base-thai-spm-ud-head* - \ roberta model pretrained on Thai Wikipedia texts \ for dependency parsing. `Huggingface \ <https://huggingface.co/KoichiYasuoka/roberta-base-thai-spm-ud-head>`_ **Options for model (ud_goeswith engine)** * *KoichiYasuoka/deberta-base-thai-ud-goeswith* (default) - \ This is a DeBERTa(V2) model pre-trained on Thai Wikipedia \ texts for POS tagging and dependency parsing (using goeswith for subwords) \ `Huggingface <https://huggingface.co/KoichiYasuoka/deberta-base-thai-ud-goeswith>`_ :Example: :: from pythainlp.parse import dependency_parsing print(dependency_parsing("ผมเป็นคนดี", engine="esupar")) # output: # 1 ผม _ PRON _ _ 3 nsubj _ SpaceAfter=No # 2 เป็น _ VERB _ _ 3 cop _ SpaceAfter=No # 3 คน _ NOUN _ _ 0 root _ SpaceAfter=No # 4 ดี _ VERB _ _ 3 acl _ SpaceAfter=No print(dependency_parsing("ผมเป็นคนดี", engine="spacy_thai")) # output: # 1 ผม PRON PPRS _ 2 nsubj _ SpaceAfter=No # 2 เป็น VERB VSTA _ 0 ROOT _ SpaceAfter=No # 3 คนดี NOUN NCMN _ 2 obj _ SpaceAfter=No """ global _tagger, _tagger_name if _tagger_name != engine: if engine == "esupar": from pythainlp.parse.esupar_engine import Parse _tagger = Parse(model=model) elif engine == "transformers_ud": from pythainlp.parse.transformers_ud import Parse _tagger = Parse(model=model) elif engine == "spacy_thai": from pythainlp.parse.spacy_thai_engine import Parse _tagger = Parse() elif engine == "ud_goeswith": from pythainlp.parse.ud_goeswith import Parse _tagger = Parse(model=model) else: raise NotImplementedError("The engine doesn't support.") _tagger_name = engine return _tagger(text, tag=tag)