architxt.cli.loader

architxt.cli.loader#

Functions

load_corpus([corpus_path, language, ...])

Load a corpus and print the database schema as a CFG.

load_document([file, raw, root_name, ...])

Read a parse a document file to a structured tree.

load_graph([uri, username, password, ...])

load_sql([uri, simplify_association, ...])

Extract the database schema and relations to a tree format.

architxt.cli.loader.load_corpus(corpus_path=typer.Argument(..., exists=True, readable=True, help='Path to the input corpus.'), *, language=typer.Option(['French'], help='Language of the input corpus.'), corenlp_url=typer.Option('http://localhost:9000', help='URL of the CoreNLP server.'), sample=typer.Option(None, help='Number of sentences to sample from the corpus.', min=1), resolver=typer.Option(None, help='The entity resolver to use when loading the corpus.', click_type=click.Choice(['umls', 'mesh', 'rxnorm', 'go', 'hpo'], case_sensitive=False)), output=typer.Option(None, help='Path to save the result.'), merge_existing=typer.Option(False, help='Should we merge data if output file already exist'), cache=typer.Option(True, help='Enable caching of the analyzed corpus to prevent re-parsing.'), log=typer.Option(False, help='Enable logging to MLFlow.'))[source]#

Load a corpus and print the database schema as a CFG.

Return type:

None

architxt.cli.loader.load_document(file=typer.Argument(..., exists=True, readable=True, help='The document file to read.'), *, raw=typer.Option(False, help='Enable row reading, skipping any transformation to convert it to the metamodel.'), root_name=typer.Option('ROOT', help='The root node name.'), sample=typer.Option(None, help='Number of element to sample from the document.', min=1), output=typer.Option(None, help='Path to save the result.'), merge_existing=typer.Option(False, help='Should we merge data if output file already exist'))[source]#

Read a parse a document file to a structured tree.

Return type:

None

architxt.cli.loader.load_graph(uri=typer.Argument(..., help='Database connection string.'), *, username=typer.Option('neo4j', help='Username to use for authentication.'), password=typer.Option(None, help='Password to use for authentication.'), sample=typer.Option(None, help='Number of sentences to sample from the corpus.', min=1), output=typer.Option(None, help='Path to save the result.'), merge_existing=typer.Option(False, help='Should we merge data if output file already exist'))[source]#
Return type:

None

architxt.cli.loader.load_sql(uri=typer.Argument(..., help='Database connection string.'), *, simplify_association=typer.Option(True, help='Simplify association tables.'), sample=typer.Option(None, help='Number of sentences to sample from the corpus.', min=1), output=typer.Option(None, help='Path to save the result.'), merge_existing=typer.Option(False, help='Should we merge data if output file already exist'))[source]#

Extract the database schema and relations to a tree format.

Return type:

None