Source code for architxt.simplification.tree_rewriting.operations.reductions

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING

from architxt.tree import NodeType, Tree, has_type

from .operation import Operation

if TYPE_CHECKING:
    from collections.abc import Iterable

    from architxt.similarity import TREE_CLUSTER
    from architxt.tree import _SubTree

__all__ = [
    'ReduceBottomOperation',
    'ReduceTopOperation',
]


[docs] class ReduceOperation(Operation, ABC): """ Base class for reduction operations. This class defines custom behavior for identifying subtrees to be reduced and applying the reduction operation. """
[docs] @abstractmethod def subtrees_to_reduce(self, tree: Tree) -> Iterable[_SubTree]: ...
[docs] def apply(self, tree: Tree, *, equiv_subtrees: TREE_CLUSTER) -> bool: # noqa: ARG002 reduced = False for subtree in self.subtrees_to_reduce(tree): parent = subtree.parent position = subtree.position label = subtree.label old_labels = tuple(str(child.label) for child in parent) # Convert subtree's children into independent nodes new_children = (child.detach() for child in subtree[:]) # Put children in the parent at the original subtree position parent_pos = subtree.parent_index parent[parent_pos : parent_pos + 1] = new_children new_labels = tuple(str(child.label) for child in parent) self._log_to_mlflow( { 'label': str(label), 'position': position, 'labels.old': old_labels, 'labels.new': new_labels, } ) reduced = True return reduced
[docs] class ReduceBottomOperation(ReduceOperation): """ Reduces the unlabelled nodes of a tree at the bottom-level. This function identifies subtrees that do not have a specific type but contain only children of type `ENT`. It then repositions these subtrees children directly under their parent nodes, effectively "flattening" the tree structure at this level. """
[docs] def subtrees_to_reduce(self, tree: Tree) -> Iterable[_SubTree]: return [ subtree for subtree in tree.subtrees(include_self=False, reverse=True) if not has_type(subtree) and all(has_type(child, NodeType.ENT) for child in subtree) ]
[docs] class ReduceTopOperation(ReduceOperation): """ Reduces the unlabelled nodes of a tree at the top-level. It identifies subtrees that do not have a specific type and repositions these subtrees children directly under their parent nodes, effectively "flattening" the tree structure at this level. """
[docs] def subtrees_to_reduce(self, tree: Tree) -> Iterable[_SubTree]: return [subtree for subtree in tree if isinstance(subtree, Tree) and not has_type(subtree)]