Source code for architxt.nlp.model

from dataclasses import dataclass
from os.path import commonprefix

from architxt.tree import TREE_POS

__all__ = ['AnnotatedSentence', 'Entity', 'Relation', 'TreeEntity', 'TreeRel']


[docs] @dataclass(slots=True) class Entity: """A named entity.""" name: str start: int end: int id: str def __post_init__(self) -> None: if self.start < 0: msg = "Start cannot be negative." raise ValueError(msg) if self.start >= self.end: msg = "Start cannot be larger than end." raise ValueError(msg) def __len__(self) -> int: return self.end - self.start def __lt__(self, other: 'Entity') -> bool: return self.start < other.start
[docs] @dataclass(slots=True) class TreeEntity: """An entity in a tree, the name is associate with a list of leaf tree position.""" name: str positions: list[TREE_POS] @property def root_pos(self) -> tuple[int, ...]: """Get the position that covers every position of the entity.""" prefix = commonprefix(self.positions) return tuple(prefix) if prefix != self.positions[0] else tuple(prefix[:-1]) def __post_init__(self) -> None: if not self.positions: msg = "Cannot have empty list of positions." raise ValueError(msg) def __len__(self) -> int: return len(self.positions)
[docs] @dataclass(slots=True) class Relation: """A relation between two entities.""" src: str # Ent id dst: str # Ent id name: str
[docs] @dataclass(slots=True) class TreeRel: """A relation between two entities in a tree.""" pos_start: TREE_POS pos_end: TREE_POS name: str
[docs] @dataclass(slots=True) class AnnotatedSentence: """A sentence with Entity/Relation annotations.""" txt: str entities: list[Entity] rels: list[Relation]