Skip to content

visitors

BindVisitor

Bases: BooleanExpressionVisitor[BooleanExpression]

Rewrites a boolean expression by replacing unbound references with references to fields in a struct schema.

Parameters:

Name Type Description Default
schema Schema

A schema to use when binding the expression.

required
case_sensitive bool

Whether to consider case when binding a reference to a field in a schema, defaults to True.

required

Raises:

Type Description
TypeError

In the case a predicate is already bound.

Source code in pyiceberg/expressions/visitors.py
class BindVisitor(BooleanExpressionVisitor[BooleanExpression]):
    """Rewrites a boolean expression by replacing unbound references with references to fields in a struct schema.

    Args:
      schema (Schema): A schema to use when binding the expression.
      case_sensitive (bool): Whether to consider case when binding a reference to a field in a schema, defaults to True.

    Raises:
        TypeError: In the case a predicate is already bound.
    """

    schema: Schema
    case_sensitive: bool

    def __init__(self, schema: Schema, case_sensitive: bool) -> None:
        self.schema = schema
        self.case_sensitive = case_sensitive

    def visit_true(self) -> BooleanExpression:
        return AlwaysTrue()

    def visit_false(self) -> BooleanExpression:
        return AlwaysFalse()

    def visit_not(self, child_result: BooleanExpression) -> BooleanExpression:
        return Not(child=child_result)

    def visit_and(self, left_result: BooleanExpression, right_result: BooleanExpression) -> BooleanExpression:
        return And(left=left_result, right=right_result)

    def visit_or(self, left_result: BooleanExpression, right_result: BooleanExpression) -> BooleanExpression:
        return Or(left=left_result, right=right_result)

    def visit_unbound_predicate(self, predicate: UnboundPredicate[L]) -> BooleanExpression:
        return predicate.bind(self.schema, case_sensitive=self.case_sensitive)

    def visit_bound_predicate(self, predicate: BoundPredicate[L]) -> BooleanExpression:
        raise TypeError(f"Found already bound predicate: {predicate}")

BooleanExpressionVisitor

Bases: Generic[T], ABC

Source code in pyiceberg/expressions/visitors.py
class BooleanExpressionVisitor(Generic[T], ABC):
    @abstractmethod
    def visit_true(self) -> T:
        """Visit method for an AlwaysTrue boolean expression.

        Note: This visit method has no arguments since AlwaysTrue instances have no context.
        """

    @abstractmethod
    def visit_false(self) -> T:
        """Visit method for an AlwaysFalse boolean expression.

        Note: This visit method has no arguments since AlwaysFalse instances have no context.
        """

    @abstractmethod
    def visit_not(self, child_result: T) -> T:
        """Visit method for a Not boolean expression.

        Args:
            child_result (T): The result of visiting the child of the Not boolean expression.
        """

    @abstractmethod
    def visit_and(self, left_result: T, right_result: T) -> T:
        """Visit method for an And boolean expression.

        Args:
            left_result (T): The result of visiting the left side of the expression.
            right_result (T): The result of visiting the right side of the expression.
        """

    @abstractmethod
    def visit_or(self, left_result: T, right_result: T) -> T:
        """Visit method for an Or boolean expression.

        Args:
            left_result (T): The result of visiting the left side of the expression.
            right_result (T): The result of visiting the right side of the expression.
        """

    @abstractmethod
    def visit_unbound_predicate(self, predicate: UnboundPredicate[L]) -> T:
        """Visit method for an unbound predicate in an expression tree.

        Args:
            predicate (UnboundPredicate[L): An instance of an UnboundPredicate.
        """

    @abstractmethod
    def visit_bound_predicate(self, predicate: BoundPredicate[L]) -> T:
        """Visit method for a bound predicate in an expression tree.

        Args:
            predicate (BoundPredicate[L]): An instance of a BoundPredicate.
        """

visit_and(left_result, right_result) abstractmethod

Visit method for an And boolean expression.

Parameters:

Name Type Description Default
left_result T

The result of visiting the left side of the expression.

required
right_result T

The result of visiting the right side of the expression.

required
Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_and(self, left_result: T, right_result: T) -> T:
    """Visit method for an And boolean expression.

    Args:
        left_result (T): The result of visiting the left side of the expression.
        right_result (T): The result of visiting the right side of the expression.
    """

visit_bound_predicate(predicate) abstractmethod

Visit method for a bound predicate in an expression tree.

Parameters:

Name Type Description Default
predicate BoundPredicate[L]

An instance of a BoundPredicate.

required
Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_bound_predicate(self, predicate: BoundPredicate[L]) -> T:
    """Visit method for a bound predicate in an expression tree.

    Args:
        predicate (BoundPredicate[L]): An instance of a BoundPredicate.
    """

visit_false() abstractmethod

Visit method for an AlwaysFalse boolean expression.

Note: This visit method has no arguments since AlwaysFalse instances have no context.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_false(self) -> T:
    """Visit method for an AlwaysFalse boolean expression.

    Note: This visit method has no arguments since AlwaysFalse instances have no context.
    """

visit_not(child_result) abstractmethod

Visit method for a Not boolean expression.

Parameters:

Name Type Description Default
child_result T

The result of visiting the child of the Not boolean expression.

required
Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_not(self, child_result: T) -> T:
    """Visit method for a Not boolean expression.

    Args:
        child_result (T): The result of visiting the child of the Not boolean expression.
    """

visit_or(left_result, right_result) abstractmethod

Visit method for an Or boolean expression.

Parameters:

Name Type Description Default
left_result T

The result of visiting the left side of the expression.

required
right_result T

The result of visiting the right side of the expression.

required
Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_or(self, left_result: T, right_result: T) -> T:
    """Visit method for an Or boolean expression.

    Args:
        left_result (T): The result of visiting the left side of the expression.
        right_result (T): The result of visiting the right side of the expression.
    """

visit_true() abstractmethod

Visit method for an AlwaysTrue boolean expression.

Note: This visit method has no arguments since AlwaysTrue instances have no context.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_true(self) -> T:
    """Visit method for an AlwaysTrue boolean expression.

    Note: This visit method has no arguments since AlwaysTrue instances have no context.
    """

visit_unbound_predicate(predicate) abstractmethod

Visit method for an unbound predicate in an expression tree.

Parameters:

Name Type Description Default
predicate UnboundPredicate[L

An instance of an UnboundPredicate.

required
Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_unbound_predicate(self, predicate: UnboundPredicate[L]) -> T:
    """Visit method for an unbound predicate in an expression tree.

    Args:
        predicate (UnboundPredicate[L): An instance of an UnboundPredicate.
    """

BoundBooleanExpressionVisitor

Bases: BooleanExpressionVisitor[T], ABC

Source code in pyiceberg/expressions/visitors.py
class BoundBooleanExpressionVisitor(BooleanExpressionVisitor[T], ABC):
    @abstractmethod
    def visit_in(self, term: BoundTerm[L], literals: Set[L]) -> T:
        """Visit a bound In predicate."""

    @abstractmethod
    def visit_not_in(self, term: BoundTerm[L], literals: Set[L]) -> T:
        """Visit a bound NotIn predicate."""

    @abstractmethod
    def visit_is_nan(self, term: BoundTerm[L]) -> T:
        """Visit a bound IsNan predicate."""

    @abstractmethod
    def visit_not_nan(self, term: BoundTerm[L]) -> T:
        """Visit a bound NotNan predicate."""

    @abstractmethod
    def visit_is_null(self, term: BoundTerm[L]) -> T:
        """Visit a bound IsNull predicate."""

    @abstractmethod
    def visit_not_null(self, term: BoundTerm[L]) -> T:
        """Visit a bound NotNull predicate."""

    @abstractmethod
    def visit_equal(self, term: BoundTerm[L], literal: Literal[L]) -> T:
        """Visit a bound Equal predicate."""

    @abstractmethod
    def visit_not_equal(self, term: BoundTerm[L], literal: Literal[L]) -> T:
        """Visit a bound NotEqual predicate."""

    @abstractmethod
    def visit_greater_than_or_equal(self, term: BoundTerm[L], literal: Literal[L]) -> T:
        """Visit a bound GreaterThanOrEqual predicate."""

    @abstractmethod
    def visit_greater_than(self, term: BoundTerm[L], literal: Literal[L]) -> T:
        """Visit a bound GreaterThan predicate."""

    @abstractmethod
    def visit_less_than(self, term: BoundTerm[L], literal: Literal[L]) -> T:
        """Visit a bound LessThan predicate."""

    @abstractmethod
    def visit_less_than_or_equal(self, term: BoundTerm[L], literal: Literal[L]) -> T:
        """Visit a bound LessThanOrEqual predicate."""

    @abstractmethod
    def visit_true(self) -> T:
        """Visit a bound True predicate."""

    @abstractmethod
    def visit_false(self) -> T:
        """Visit a bound False predicate."""

    @abstractmethod
    def visit_not(self, child_result: T) -> T:
        """Visit a bound Not predicate."""

    @abstractmethod
    def visit_and(self, left_result: T, right_result: T) -> T:
        """Visit a bound And predicate."""

    @abstractmethod
    def visit_or(self, left_result: T, right_result: T) -> T:
        """Visit a bound Or predicate."""

    @abstractmethod
    def visit_starts_with(self, term: BoundTerm[L], literal: Literal[L]) -> T:
        """Visit bound StartsWith predicate."""

    @abstractmethod
    def visit_not_starts_with(self, term: BoundTerm[L], literal: Literal[L]) -> T:
        """Visit bound NotStartsWith predicate."""

    def visit_unbound_predicate(self, predicate: UnboundPredicate[L]) -> T:
        """Visit an unbound predicate.

        Args:
            predicate (UnboundPredicate[L]): An unbound predicate.
        Raises:
            TypeError: This always raises since an unbound predicate is not expected in a bound boolean expression.
        """
        raise TypeError(f"Not a bound predicate: {predicate}")

    def visit_bound_predicate(self, predicate: BoundPredicate[L]) -> T:
        """Visit a bound predicate.

        Args:
            predicate (BoundPredicate[L]): A bound predicate.
        """
        return visit_bound_predicate(predicate, self)

visit_and(left_result, right_result) abstractmethod

Visit a bound And predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_and(self, left_result: T, right_result: T) -> T:
    """Visit a bound And predicate."""

visit_bound_predicate(predicate)

Visit a bound predicate.

Parameters:

Name Type Description Default
predicate BoundPredicate[L]

A bound predicate.

required
Source code in pyiceberg/expressions/visitors.py
def visit_bound_predicate(self, predicate: BoundPredicate[L]) -> T:
    """Visit a bound predicate.

    Args:
        predicate (BoundPredicate[L]): A bound predicate.
    """
    return visit_bound_predicate(predicate, self)

visit_equal(term, literal) abstractmethod

Visit a bound Equal predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_equal(self, term: BoundTerm[L], literal: Literal[L]) -> T:
    """Visit a bound Equal predicate."""

visit_false() abstractmethod

Visit a bound False predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_false(self) -> T:
    """Visit a bound False predicate."""

visit_greater_than(term, literal) abstractmethod

Visit a bound GreaterThan predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_greater_than(self, term: BoundTerm[L], literal: Literal[L]) -> T:
    """Visit a bound GreaterThan predicate."""

visit_greater_than_or_equal(term, literal) abstractmethod

Visit a bound GreaterThanOrEqual predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_greater_than_or_equal(self, term: BoundTerm[L], literal: Literal[L]) -> T:
    """Visit a bound GreaterThanOrEqual predicate."""

visit_in(term, literals) abstractmethod

Visit a bound In predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_in(self, term: BoundTerm[L], literals: Set[L]) -> T:
    """Visit a bound In predicate."""

visit_is_nan(term) abstractmethod

Visit a bound IsNan predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_is_nan(self, term: BoundTerm[L]) -> T:
    """Visit a bound IsNan predicate."""

visit_is_null(term) abstractmethod

Visit a bound IsNull predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_is_null(self, term: BoundTerm[L]) -> T:
    """Visit a bound IsNull predicate."""

visit_less_than(term, literal) abstractmethod

Visit a bound LessThan predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_less_than(self, term: BoundTerm[L], literal: Literal[L]) -> T:
    """Visit a bound LessThan predicate."""

visit_less_than_or_equal(term, literal) abstractmethod

Visit a bound LessThanOrEqual predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_less_than_or_equal(self, term: BoundTerm[L], literal: Literal[L]) -> T:
    """Visit a bound LessThanOrEqual predicate."""

visit_not(child_result) abstractmethod

Visit a bound Not predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_not(self, child_result: T) -> T:
    """Visit a bound Not predicate."""

visit_not_equal(term, literal) abstractmethod

Visit a bound NotEqual predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_not_equal(self, term: BoundTerm[L], literal: Literal[L]) -> T:
    """Visit a bound NotEqual predicate."""

visit_not_in(term, literals) abstractmethod

Visit a bound NotIn predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_not_in(self, term: BoundTerm[L], literals: Set[L]) -> T:
    """Visit a bound NotIn predicate."""

visit_not_nan(term) abstractmethod

Visit a bound NotNan predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_not_nan(self, term: BoundTerm[L]) -> T:
    """Visit a bound NotNan predicate."""

visit_not_null(term) abstractmethod

Visit a bound NotNull predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_not_null(self, term: BoundTerm[L]) -> T:
    """Visit a bound NotNull predicate."""

visit_not_starts_with(term, literal) abstractmethod

Visit bound NotStartsWith predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_not_starts_with(self, term: BoundTerm[L], literal: Literal[L]) -> T:
    """Visit bound NotStartsWith predicate."""

visit_or(left_result, right_result) abstractmethod

Visit a bound Or predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_or(self, left_result: T, right_result: T) -> T:
    """Visit a bound Or predicate."""

visit_starts_with(term, literal) abstractmethod

Visit bound StartsWith predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_starts_with(self, term: BoundTerm[L], literal: Literal[L]) -> T:
    """Visit bound StartsWith predicate."""

visit_true() abstractmethod

Visit a bound True predicate.

Source code in pyiceberg/expressions/visitors.py
@abstractmethod
def visit_true(self) -> T:
    """Visit a bound True predicate."""

visit_unbound_predicate(predicate)

Visit an unbound predicate.

Parameters:

Name Type Description Default
predicate UnboundPredicate[L]

An unbound predicate.

required

Raises: TypeError: This always raises since an unbound predicate is not expected in a bound boolean expression.

Source code in pyiceberg/expressions/visitors.py
def visit_unbound_predicate(self, predicate: UnboundPredicate[L]) -> T:
    """Visit an unbound predicate.

    Args:
        predicate (UnboundPredicate[L]): An unbound predicate.
    Raises:
        TypeError: This always raises since an unbound predicate is not expected in a bound boolean expression.
    """
    raise TypeError(f"Not a bound predicate: {predicate}")

bind(schema, expression, case_sensitive)

Travers over an expression to bind the predicates to the schema.

Parameters:

Name Type Description Default
schema Schema

A schema to use when binding the expression.

required
expression BooleanExpression

An expression containing UnboundPredicates that can be bound.

required
case_sensitive bool

Whether to consider case when binding a reference to a field in a schema, defaults to True.

required

Raises:

Type Description
TypeError

In the case a predicate is already bound.

Source code in pyiceberg/expressions/visitors.py
def bind(schema: Schema, expression: BooleanExpression, case_sensitive: bool) -> BooleanExpression:
    """Travers over an expression to bind the predicates to the schema.

    Args:
      schema (Schema): A schema to use when binding the expression.
      expression (BooleanExpression): An expression containing UnboundPredicates that can be bound.
      case_sensitive (bool): Whether to consider case when binding a reference to a field in a schema, defaults to True.

    Raises:
        TypeError: In the case a predicate is already bound.
    """
    return visit(expression, BindVisitor(schema, case_sensitive))

expression_to_plain_format(expressions, cast_int_to_datetime=False)

Format a Disjunctive Normal Form expression.

These are the formats that the expression can be fed into:

  • https://arrow.apache.org/docs/python/generated/pyarrow.parquet.read_table.html
  • https://docs.dask.org/en/stable/generated/dask.dataframe.read_parquet.html

Contrary to normal DNF that may contain Not expressions, but here they should have been rewritten. This can be done using rewrite_not(...).

Keep in mind that this is only used for page skipping, and still needs to filter on a row level.

Parameters:

Name Type Description Default
expressions Tuple[BooleanExpression, ...]

Expression in Disjunctive Normal Form.

required

Returns:

Type Description
List[List[Tuple[str, str, Any]]]

Formatter filter compatible with Dask and PyArrow.

Source code in pyiceberg/expressions/visitors.py
def expression_to_plain_format(
    expressions: Tuple[BooleanExpression, ...], cast_int_to_datetime: bool = False
) -> List[List[Tuple[str, str, Any]]]:
    """Format a Disjunctive Normal Form expression.

    These are the formats that the expression can be fed into:

    - https://arrow.apache.org/docs/python/generated/pyarrow.parquet.read_table.html
    - https://docs.dask.org/en/stable/generated/dask.dataframe.read_parquet.html

    Contrary to normal DNF that may contain Not expressions, but here they should have
    been rewritten. This can be done using ``rewrite_not(...)``.

    Keep in mind that this is only used for page skipping, and still needs to filter
    on a row level.

    Args:
        expressions: Expression in Disjunctive Normal Form.

    Returns:
        Formatter filter compatible with Dask and PyArrow.
    """
    # In the form of expr1 ∨ expr2 ∨ ... ∨ exprN
    visitor = ExpressionToPlainFormat(cast_int_to_datetime)
    return [visit(expression, visitor) for expression in expressions]

visit(obj, visitor)

Apply a boolean expression visitor to any point within an expression.

The function traverses the expression in post-order fashion.

Parameters:

Name Type Description Default
obj BooleanExpression

An instance of a BooleanExpression.

required
visitor BooleanExpressionVisitor[T]

An instance of an implementation of the generic BooleanExpressionVisitor base class.

required

Raises:

Type Description
NotImplementedError

If attempting to visit an unsupported expression.

Source code in pyiceberg/expressions/visitors.py
@singledispatch
def visit(obj: BooleanExpression, visitor: BooleanExpressionVisitor[T]) -> T:
    """Apply a boolean expression visitor to any point within an expression.

    The function traverses the expression in post-order fashion.

    Args:
        obj (BooleanExpression): An instance of a BooleanExpression.
        visitor (BooleanExpressionVisitor[T]): An instance of an implementation of the generic BooleanExpressionVisitor base class.

    Raises:
        NotImplementedError: If attempting to visit an unsupported expression.
    """
    raise NotImplementedError(f"Cannot visit unsupported expression: {obj}")