architxt.tree#

Functions

has_type()

Check if the given tree object has the specified type(s).

is_sub_tree(tree)

Determine whether the given Tree instance is a subtree.

Classes

NodeLabel(label_type[, label])

NodeType(*values)

Tree(label[, children, metadata, oid])

class architxt.tree.NodeLabel(label_type, label='')[source]#

Bases: str

classmethod fromstring(label)[source]#
Return type:

Union[NodeLabel, str]

name#

Type:    str

type#

Type:    NodeType

class architxt.tree.NodeType(*values)[source]#

Bases: str, Enum

COLL = 'COLL'#

Type:    str

ENT = 'ENT'#

Type:    str

GROUP = 'GROUP'#

Type:    str

REL = 'REL'#

Type:    str

class architxt.tree.Tree(label, children=None, metadata=None, oid=None)[source]#

Bases: PersistentList[_SubTree | str]

classmethod fromstring(text)[source]#

Read a tree from a LISP-style notation.

Trees are represented as nested brackettings, such as:

(S (NP (NNP John)) (VP (V runs)))

Parameters:

text (str) – The string to read

Return type:

Tree

Returns:

A tree corresponding to the string representation text.

>>> t = Tree.fromstring('(S (X xxx) (Y yyy))')
>>> print(t)
(S (X xxx) (Y yyy))
append(child)[source]#

S.append(value) – append value to the end of the sequence

Return type:

None

clear()[source]#

Remove all items from the list.

Changed in version 4.5.2: Now marks the list as changed.

Return type:

None

copy()[source]#

Copy an entire tree.

Return type:

Tree

Returns:

A new copy of the tree.

detach()[source]#

Detach a subtree from its parent.

Return type:

Tree

Returns:

The detached tree.

>>> t = Tree.fromstring('(S (A xxx) (B yyy))')
>>> detached = t[0].detach()
>>> print(detached.root)
(A xxx)
>>> print(t)
(S (B yyy))
entities()[source]#

Get a tuple of subtrees that are entities.

>>> t = Tree.fromstring('(S (X (ENT::person Alice) (ENT::fruit apple)) (Y (ENT::person Bob) (ENT::animal rabbit)))')
>>> list(t.entities()) == [t[0, 0], t[0, 1], t[1, 0], t[1, 1]]
True
>>> del t[0]
>>> list(t.entities()) == [t[0, 0], t[0, 1]]
True
>>> list(t[0, 0].entities()) == [t[0, 0]]
True
Return type:

tuple[_TypedTree, …]

entity_label_count()[source]#

Return a Counter object that counts the labels of entity subtrees.

>>> t = Tree.fromstring('(S (X (ENT::person Alice) (ENT::fruit apple)) (Y (ENT::person Bob) (ENT::animal rabbit)))')
>>> t.entity_label_count()
Counter({'person': 2, 'fruit': 1, 'animal': 1})
Return type:

Counter[str]

entity_labels()[source]#

Get the set of entity labels present in the tree.

>>> t = Tree.fromstring('(S (X (ENT::person Alice) (ENT::fruit apple)) (Y (ENT::person Bob) (ENT::animal rabbit)))')
>>> sorted(t.entity_labels())
['animal', 'fruit', 'person']
>>> sorted(t[0].entity_labels())
['fruit', 'person']
>>> del t[0]
>>> sorted(t.entity_labels())
['animal', 'person']
Return type:

set[str]

extend(children)[source]#

S.extend(iterable) – extend sequence by appending elements from the iterable

Return type:

None

group_instances(group_name)[source]#

Get a DataFrame containing all instances of a specified group within the tree.

Each row in the DataFrame represents an instance of the group, and each column represents an entity in that group, with the value being a concatenated string of that entity’s leaves.

Parameters:

group_name (str) – The name of the group to search for.

Return type:

DataFrame

Returns:

A pandas DataFrame containing instances of the specified group.

>>> t = Tree.fromstring('(S (GROUP::A (ENT::person Alice) (ENT::fruit apple)) '
...                     '(GROUP::A (ENT::person Bob) (ENT::fruit banana)) '
...                     '(GROUP::B (ENT::person Charlie) (ENT::animal dog)))')
>>> t.group_instances("A")
  person   fruit
0  Alice   apple
1    Bob  banana
>>> t.group_instances("B")
    person animal
0  Charlie    dog
>>> t.group_instances("C")
Empty DataFrame
Columns: []
Index: []
>>> t[0].group_instances("A")
  person  fruit
0  Alice  apple
groups()[source]#

Get the set of group names present within the tree.

Return type:

set[str]

Returns:

A set of unique group names within the tree.

>>> t = Tree.fromstring('(S (GROUP::A x) (GROUP::B y) (X (GROUP::C z)))')
>>> sorted(t.groups())
['A', 'B', 'C']
>>> sorted(t[0].groups())
['A']
has_duplicate_entity()[source]#

Check if there are duplicate entity labels.

>>> t = Tree.fromstring('(S (X (ENT::person Alice) (ENT::fruit apple)) (Y (ENT::person Bob) (ENT::animal rabbit)))')
>>> t.has_duplicate_entity()
True
>>> t[0].has_duplicate_entity()
False
Return type:

bool

has_entity_child()[source]#

Check if there is at least one entity as direct children.

>>> t = Tree.fromstring('(S (X (ENT::person Alice) (ENT::fruit apple)) (Y (ENT::person Bob) (ENT::animal rabbit)))')
>>> t.has_entity_child()
False
>>> t[0].has_entity_child()
True
Return type:

bool

has_unlabelled_nodes()[source]#

Check if any child has a non-typed label.

Return type:

bool

Returns:

A boolean indicating if the node contains any non-typed label.

>>> t = Tree.fromstring('(S (X xxx) (Y yyy) (Z zzz))')
>>> t.has_unlabelled_nodes()
True
>>> t = Tree.fromstring('(S (ENT::X xxx) (REL::Y yyy) (COLL::Z zzz))')
>>> t.has_unlabelled_nodes()
False
insert(pos, child)[source]#

S.insert(index, value) – insert value before index

Return type:

None

leaf_position(index)[source]#

Return the tree position of the index-th leaf in this tree.

The tree position is a tuple of indices that corresponds to the location of the index-th leaf in the tree structure. If tp = self.leaf_position(i), then self[tp] should be the same as self.leaves()[i].

Parameters:

index (int) – The index of the leaf for which to find the tree position.

Return type:

tuple[int, …]

Returns:

A tuple representing the tree position of the index-th leaf.

Raises:

IndexError – If index is negative or if there are fewer than index + 1 leaves in the tree.

>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
>>> t.leaf_position(0)
(0, 0, 0)
>>> t.leaf_position(4)
(1, 1, 1, 0)
leaves()[source]#

Return the leaves of the tree.

>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
>>> list(t.leaves())
['the', 'dog', 'chased', 'the', 'cat']
Yield:

A list containing this tree’s leaves. The order reflects the order of the leaves in the tree’s hierarchical structure.

Return type:

list[str]

merge(tree)[source]#

Merge two trees into one.

The root of both trees becomes one while maintaining the level of each subtree.

Return type:

Tree

pformat(margin=None, indent=0)[source]#

Get a pretty-printed string representation of this tree.

Parameters:
  • margin (Optional[int]) – The right margin at which to do line-wrapping.

  • indent (int) – The indentation level at which printing begins.

Return type:

str

Returns:

A pretty-printed string representation of this tree.

>>> t = Tree('S', [Tree('X', ['xxx']), Tree('Y', ['yyy'])])
>>> t.pformat()
'(S (X xxx) (Y yyy))'
pop(pos=-1, *, recursive=True)[source]#

Delete an element from the tree at the specified position pos.

If the parent tree becomes empty after the deletion, parent nodes are recursively deleted.

Parameters:
  • pos (int) – The position (index) of the element to delete in the tree.

  • recursive (bool) – If an empty tree should be removed from the parent.

Return type:

Union[Tree, str]

Returns:

The element at the position. The function modifies the tree in place.

>>> t = Tree.fromstring("(S (NP Alice) (VP (VB like) (NP (NNS apples))))")
>>> print(t[(1, 1)])
(NP (NNS apples))
>>> subtree = t[1, 1].pop(0)
>>> print(t)
(S (NP Alice) (VP (VB like)))
>>> subtree = t.pop(0)
>>> print(t)
(S (VP (VB like)))
>>> subtree = t[0].pop(0, recursive=False)
>>> print(t)
(S (VP ))
positions(*, order='preorder')[source]#

Get all the positions in the tree.

>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
>>> list(t.positions())
[(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0), (1, 0, 0), ...]
>>> for pos in t.positions(order='leaves'):
...     t[pos] = t[pos][::-1].upper()
>>> print(t)
(S (NP (D EHT) (N GOD)) (VP (V DESAHC) (NP (D EHT) (N TAC))))
Parameters:

order (Literal['preorder', 'postorder', 'bothorder', 'leaves']) – One of: preorder, postorder, bothorder, leaves.

Yield:

All positions in the tree in the given order

Return type:

Generator[tuple[int, …], None, None]

pretty_print(highlight=(), stream=None, maxwidth=32)[source]#

Pretty-print this tree as ASCII or Unicode art.

It relies on nltk.tree.prettyprinter.TreePrettyPrinter.

Parameters:
  • stream (Optional[TextIO]) – The file to print to.

  • highlight (Sequence[Union[Tree, int]]) – Optionally, a sequence of Tree objects in tree which should be highlighted. Has the effect of only applying colors to nodes in this sequence.

  • maxwidth (int) – maximum number of characters before a label starts to wrap. Leaf with more than maxwidth characters will be truncated.

Return type:

None

productions()[source]#

Generate the productions that correspond to the non-terminal nodes of the tree.

For each subtree of the form (P: C1 C2 … Cn) this produces a production of the form P -> C1 C2 … Cn.

>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
>>> t.productions()
[S -> NP VP, NP -> D N, D -> 'the', N -> 'dog', VP -> V NP, V -> 'chased',
NP -> D N, D -> 'the', N -> 'cat']
Return type:

list[Production]

reduce(skip_types=None)[source]#

Attempt to reduce this subtree, lifting the children into the parent node, replacing the subtree.

Reduction happens if: - The tree has exactly one child, AND - Its label is not in types (if types is given)

Parameters:

skip_types (Optional[set[Union[str, NodeType]]]) – A set of node types that should be kept, or None to reduce reduces all single-child nodes.

Return type:

bool

Returns:

True if the subtree was reduced, False otherwise.

>>> t = Tree.fromstring("(S (NP Alice) (VP (VB like) (NP (NNS apples))))")
>>> t[1, 1].reduce()
True
>>> print(t)
(S (NP Alice) (VP (VB like) (NNS apples)))
>>> t[0].reduce()
True
>>> print(t)
(S Alice (VP (VB like) (NNS apples)))
reduce_all(skip_types=None)[source]#

Recursively reduces all reducible subtrees in the tree.

The reduction process continues until no further reductions are possible. Subtrees can be skipped if their types are listed in skip_types.

Parameters:

skip_types (Optional[set[Union[str, NodeType]]]) – A set of node types that should be kept, or None to reduce reduces all single-child nodes.

>>> t = Tree.fromstring("(S (X (Y (Z (NP Alice)))) (VP (VB likes) (NP (NNS apples))))")
>>> t.reduce_all()
>>> print(t)
(S Alice (VP likes apples))
Return type:

None

remove(child, *, recursive=True)[source]#

S.remove(value) – remove first occurrence of value. Raise ValueError if the value is not present.

Return type:

None

subtrees(filter_fn=None)[source]#

Get all the subtrees of this tree, optionally restricted to trees matching the filter function.

Parameters:

filter_fn (Optional[Callable[Tree, bool]]) – The function to filter all local trees

>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
>>> for s in t.subtrees(lambda t: t.height == 2):
...     print(s)
(D the)
(N dog)
(V chased)
(D the)
(N cat)
Return type:

Generator[Tree, None, None]

to_svg(highlight=())[source]#

Pretty-print this tree as SVG.

It relies on nltk.tree.prettyprinter.TreePrettyPrinter.

Parameters:

highlight (Sequence[Union[Tree, int]]) – Optionally, a sequence of Tree objects in tree which should be highlighted. Has the effect of only applying colors to nodes in this sequence.

Return type:

str

property depth#

Get the depth of the tree.

>>> t = Tree.fromstring('(S (X (ENT::person Alice) (ENT::fruit apple)) (Y (ENT::person Bob) (ENT::animal rabbit)))')
>>> t.depth
1
>>> t[0].depth
2
>>> t[0, 0].depth
3
Return type:

int

property height#

Get the height of the tree.

>>> t = Tree.fromstring('(S (X (ENT::person Alice) (ENT::fruit apple)) (Y (ENT::person Bob) (ENT::animal rabbit)))')
>>> t.height
4
>>> t[0].height
3
>>> t[0, 0].height
2
Return type:

int

property label#

The label of this tree.

Return type:

Union[NodeLabel, str]

property metadata#
Return type:

MutableMapping[str, Any]

property oid#
Return type:

UUID

property parent#

The parent of this tree, or None if it has no parent.

>>> t = Tree.fromstring('(S (A xxx) (A xxx))')
>>> t.parent
>>> t[0].parent is t
True
>>> t[1].parent is t
True
Return type:

Optional[Tree]

property parent_index#

The index of this tree in its parent.

I.e., tree.parent[tree.parent_index] is tree. Note that tree.parent_index is not necessarily equal to tree.parent.index(tree), since the index() method returns the first child that is equal to its argument.

>>> t = Tree.fromstring('(S (A xxx) (A xxx))')
>>> t.parent_index
>>> t[0].parent_index
0
>>> t[1].parent_index
1
Return type:

Optional[int]

property position#

The tree position of this tree, relative to the root of the tree.

>>> t = Tree.fromstring('(S (X (ENT::person Alice) (ENT::fruit apple)) (Y (ENT::person Bob) (ENT::animal rabbit)))')
>>> t.position
()
>>> t[1, 0].position
(1, 0)
Return type:

tuple[int, …]

property root#

The root of this tree.

I.e., the unique ancestor of this tree whose parent is None. If tree.parent() is None, then tree is its own root.

>>> t = Tree.fromstring('(S (X (ENT::person Alice) (ENT::fruit apple)) (Y (ENT::person Bob) (ENT::animal rabbit)))')
>>> t[0, 0].root is t
True
Return type:

Tree

architxt.tree.has_type(t: _SubTree, types: set[architxt.tree.NodeType | str] | architxt.tree.NodeType | str | None = None) TypeGuard['_TypedSubTree'][source]#
architxt.tree.has_type(t: architxt.tree.Tree, types: set[architxt.tree.NodeType | str] | architxt.tree.NodeType | str | None = None) TypeGuard['_TypedTree']
architxt.tree.has_type(t, types: set[architxt.tree.NodeType | str] | architxt.tree.NodeType | str | None = None)
architxt.tree.has_type(t, types=None)

Check if the given tree object has the specified type(s).

Parameters:
  • t (Any) – The object to check type for (can be a Tree, Production, or NodeLabel).

  • types (Union[set[Union[NodeType, str]], NodeType, str, None]) – The types to check for (can be a set of strings, a string, or None).

Return type:

bool

Returns:

True if the object has the specified type(s), False otherwise.

>>> tree = Tree.fromstring('(S (ENT Alice) (REL Bob))')
>>> has_type(tree, NodeType.ENT)
False
>>> has_type(tree[0], NodeType.ENT)
True
>>> has_type(tree[0], 'ENT')
True
>>> has_type(tree[1], NodeType.ENT)
False
>>> has_type(tree[1], {NodeType.ENT, NodeType.REL})
True
architxt.tree.is_sub_tree(tree)[source]#

Determine whether the given Tree instance is a subtree.

This helper function serves as a type guard to assist static type checkers like mypy in refining the type of tree when the function returns True.

Parameters:

tree (Tree) – The tree instance to check.

Return type:

TypeGuard[_SubTree]

Returns:

True if tree is a subtree (i.e., has a parent), False otherwise.

>>> t = Tree.fromstring('(S (X xxx) (Y yyy))')
>>> is_sub_tree(t)
False
>>> is_sub_tree(t[0])
True