Source code for pythainlp.coref.core

# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
# SPDX-License-Identifier: Apache-2.0
from typing import List

_MODEL = None


[docs] def coreference_resolution( texts: List[str], model_name: str = "han-coref-v1.0", device: str = "cpu" ): """ Coreference Resolution :param List[str] texts: list of texts to apply coreference resolution to :param str model_name: coreference resolution model :param str device: device for running coreference resolution model on\ ("cpu", "cuda", and others) :return: List of texts with coreference resolution :rtype: List[dict] :Options for model_name: * *han-coref-v1.0* - (default) Han-Coref: Thai coreference resolution\ by PyThaiNLP v1.0 :Example: :: from pythainlp.coref import coreference_resolution print( coreference_resolution( ["Bill Gates ได้รับวัคซีน COVID-19 เข็มแรกแล้ว ระบุ ผมรู้สึกสบายมาก"] ) ) # output: # [ # {'text': 'Bill Gates ได้รับวัคซีน COVID-19 เข็มแรกแล้ว ระบุ ผมรู้สึกสบายมาก', # 'clusters_string': [['Bill Gates', 'ผม']], # 'clusters': [[(0, 10), (50, 52)]]} # ] """ global _MODEL if isinstance(texts, str): texts = [texts] if _MODEL is None and model_name == "han-coref-v1.0": from pythainlp.coref.han_coref import HanCoref _MODEL = HanCoref(device=device) if _MODEL: return _MODEL.predict(texts) return [ {"text": text, "clusters_string": [], "clusters": []} for text in texts ]