Source code for architxt.nlp.entity_extractor
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING
from aiostream import pipe, stream
if TYPE_CHECKING:
from collections.abc import AsyncIterable, AsyncIterator, Iterable
from architxt.nlp.model import AnnotatedSentence
__all__ = ['EntityExtractor']
[docs]
class EntityExtractor(ABC):
@property
def name(self) -> str:
return self.__class__.__name__
@abstractmethod
def __call__(self, sentence: str) -> AnnotatedSentence: ...
[docs]
async def batch(
self,
sentences: Iterable[str] | AsyncIterable[str],
) -> AsyncIterator[AnnotatedSentence]:
sentence_stream = stream.iterate(sentences) | pipe.map(self.__call__)
async with sentence_stream.stream() as streamer:
async for sentence in streamer:
yield sentence
[docs]
async def enrich(
self,
sentences: Iterable[AnnotatedSentence] | AsyncIterable[AnnotatedSentence],
) -> AsyncIterator[AnnotatedSentence]:
def _enrich_sentence(annotated: AnnotatedSentence) -> AnnotatedSentence:
new_entities = self(annotated.txt).entities
annotated.entities.extend(new_entities)
return annotated
sentence_stream = stream.iterate(sentences) | pipe.map(_enrich_sentence)
async with sentence_stream.stream() as streamer:
async for sentence in streamer:
yield sentence