Skip to content

statistics

UpdateStatistics

Bases: UpdateTableMetadata['UpdateStatistics']

Run statistics management operations using APIs.

APIs include set_statistics and remove statistics operations.

Use table.update_statistics().().commit() to run a specific operation. Use table.update_statistics().().().commit() to run multiple operations.

Pending changes are applied on commit.

We can also use context managers to make more changes. For example:

with table.update_statistics() as update: update.set_statistics(statistics_file=statistics_file) update.remove_statistics(snapshot_id=2)

Source code in pyiceberg/table/update/statistics.py
class UpdateStatistics(UpdateTableMetadata["UpdateStatistics"]):
    """
    Run statistics management operations using APIs.

    APIs include set_statistics and remove statistics operations.

    Use table.update_statistics().<operation>().commit() to run a specific operation.
    Use table.update_statistics().<operation-one>().<operation-two>().commit() to run multiple operations.

    Pending changes are applied on commit.

    We can also use context managers to make more changes. For example:

    with table.update_statistics() as update:
        update.set_statistics(statistics_file=statistics_file)
        update.remove_statistics(snapshot_id=2)
    """

    _updates: Tuple[TableUpdate, ...] = ()

    def __init__(self, transaction: "Transaction") -> None:
        super().__init__(transaction)

    def set_statistics(self, statistics_file: StatisticsFile) -> "UpdateStatistics":
        self._updates += (
            SetStatisticsUpdate(
                statistics=statistics_file,
            ),
        )

        return self

    def remove_statistics(self, snapshot_id: int) -> "UpdateStatistics":
        self._updates = (
            RemoveStatisticsUpdate(
                snapshot_id=snapshot_id,
            ),
        )

        return self

    def _commit(self) -> UpdatesAndRequirements:
        return self._updates, ()