Skip to content

snapshots

Operation

Bases: Enum

Describes the operation.

Possible operation values are
  • append: Only data files were added and no files were removed.
  • replace: Data and delete files were added and removed without changing table data; i.e., compaction, changing the data file format, or relocating data files.
  • overwrite: Data and delete files were added and removed in a logical overwrite operation.
  • delete: Data files were removed and their contents logically deleted and/or delete files were added to delete rows.
Source code in pyiceberg/table/snapshots.py
class Operation(Enum):
    """Describes the operation.

    Possible operation values are:
        - append: Only data files were added and no files were removed.
        - replace: Data and delete files were added and removed without changing table data; i.e., compaction, changing the data file format, or relocating data files.
        - overwrite: Data and delete files were added and removed in a logical overwrite operation.
        - delete: Data files were removed and their contents logically deleted and/or delete files were added to delete rows.
    """

    APPEND = "append"
    REPLACE = "replace"
    OVERWRITE = "overwrite"
    DELETE = "delete"

    def __repr__(self) -> str:
        """Return the string representation of the Operation class."""
        return f"Operation.{self.name}"

__repr__()

Return the string representation of the Operation class.

Source code in pyiceberg/table/snapshots.py
def __repr__(self) -> str:
    """Return the string representation of the Operation class."""
    return f"Operation.{self.name}"

Snapshot

Bases: IcebergBaseModel

Source code in pyiceberg/table/snapshots.py
class Snapshot(IcebergBaseModel):
    snapshot_id: int = Field(alias="snapshot-id")
    parent_snapshot_id: Optional[int] = Field(alias="parent-snapshot-id", default=None)
    sequence_number: Optional[int] = Field(alias="sequence-number", default=INITIAL_SEQUENCE_NUMBER)
    timestamp_ms: int = Field(alias="timestamp-ms", default_factory=lambda: int(time.time() * 1000))
    manifest_list: Optional[str] = Field(
        alias="manifest-list", description="Location of the snapshot's manifest list file", default=None
    )
    summary: Optional[Summary] = Field(default=None)
    schema_id: Optional[int] = Field(alias="schema-id", default=None)

    def __str__(self) -> str:
        """Return the string representation of the Snapshot class."""
        operation = f"{self.summary.operation}: " if self.summary else ""
        parent_id = f", parent_id={self.parent_snapshot_id}" if self.parent_snapshot_id else ""
        schema_id = f", schema_id={self.schema_id}" if self.schema_id is not None else ""
        result_str = f"{operation}id={self.snapshot_id}{parent_id}{schema_id}"
        return result_str

    def manifests(self, io: FileIO) -> List[ManifestFile]:
        """Return the manifests for the given snapshot."""
        if self.manifest_list:
            return list(_manifests(io, self.manifest_list))
        return []

__str__()

Return the string representation of the Snapshot class.

Source code in pyiceberg/table/snapshots.py
def __str__(self) -> str:
    """Return the string representation of the Snapshot class."""
    operation = f"{self.summary.operation}: " if self.summary else ""
    parent_id = f", parent_id={self.parent_snapshot_id}" if self.parent_snapshot_id else ""
    schema_id = f", schema_id={self.schema_id}" if self.schema_id is not None else ""
    result_str = f"{operation}id={self.snapshot_id}{parent_id}{schema_id}"
    return result_str

manifests(io)

Return the manifests for the given snapshot.

Source code in pyiceberg/table/snapshots.py
def manifests(self, io: FileIO) -> List[ManifestFile]:
    """Return the manifests for the given snapshot."""
    if self.manifest_list:
        return list(_manifests(io, self.manifest_list))
    return []

Summary

Bases: IcebergBaseModel, Mapping[str, str]

A class that stores the summary information for a Snapshot.

The snapshot summary’s operation field is used by some operations, like snapshot expiration, to skip processing certain snapshots.

Source code in pyiceberg/table/snapshots.py
class Summary(IcebergBaseModel, Mapping[str, str]):
    """A class that stores the summary information for a Snapshot.

    The snapshot summary’s operation field is used by some operations,
    like snapshot expiration, to skip processing certain snapshots.
    """

    operation: Operation = Field()
    _additional_properties: Dict[str, str] = PrivateAttr()

    def __init__(self, operation: Optional[Operation] = None, **data: Any) -> None:
        if operation is None:
            warnings.warn("Encountered invalid snapshot summary: operation is missing, defaulting to overwrite")
            operation = Operation.OVERWRITE
        super().__init__(operation=operation, **data)
        self._additional_properties = data

    def __getitem__(self, __key: str) -> Optional[Any]:  # type: ignore
        """Return a key as it is a map."""
        if __key.lower() == "operation":
            return self.operation
        else:
            return self._additional_properties.get(__key)

    def __setitem__(self, key: str, value: Any) -> None:
        """Set a key as it is a map."""
        if key.lower() == "operation":
            self.operation = value
        else:
            self._additional_properties[key] = value

    def __len__(self) -> int:
        """Return the number of keys in the summary."""
        # Operation is required
        return 1 + len(self._additional_properties)

    @model_serializer
    def ser_model(self) -> Dict[str, str]:
        return {
            "operation": str(self.operation.value),
            **self._additional_properties,
        }

    @property
    def additional_properties(self) -> Dict[str, str]:
        return self._additional_properties

    def __repr__(self) -> str:
        """Return the string representation of the Summary class."""
        repr_properties = f", **{repr(self._additional_properties)}" if self._additional_properties else ""
        return f"Summary({repr(self.operation)}{repr_properties})"

    def __eq__(self, other: Any) -> bool:
        """Compare if the summary is equal to another summary."""
        return (
            self.operation == other.operation and self.additional_properties == other.additional_properties
            if isinstance(other, Summary)
            else False
        )

__eq__(other)

Compare if the summary is equal to another summary.

Source code in pyiceberg/table/snapshots.py
def __eq__(self, other: Any) -> bool:
    """Compare if the summary is equal to another summary."""
    return (
        self.operation == other.operation and self.additional_properties == other.additional_properties
        if isinstance(other, Summary)
        else False
    )

__getitem__(__key)

Return a key as it is a map.

Source code in pyiceberg/table/snapshots.py
def __getitem__(self, __key: str) -> Optional[Any]:  # type: ignore
    """Return a key as it is a map."""
    if __key.lower() == "operation":
        return self.operation
    else:
        return self._additional_properties.get(__key)

__len__()

Return the number of keys in the summary.

Source code in pyiceberg/table/snapshots.py
def __len__(self) -> int:
    """Return the number of keys in the summary."""
    # Operation is required
    return 1 + len(self._additional_properties)

__repr__()

Return the string representation of the Summary class.

Source code in pyiceberg/table/snapshots.py
def __repr__(self) -> str:
    """Return the string representation of the Summary class."""
    repr_properties = f", **{repr(self._additional_properties)}" if self._additional_properties else ""
    return f"Summary({repr(self.operation)}{repr_properties})"

__setitem__(key, value)

Set a key as it is a map.

Source code in pyiceberg/table/snapshots.py
def __setitem__(self, key: str, value: Any) -> None:
    """Set a key as it is a map."""
    if key.lower() == "operation":
        self.operation = value
    else:
        self._additional_properties[key] = value

ancestors_of(current_snapshot, table_metadata)

Get the ancestors of and including the given snapshot.

Source code in pyiceberg/table/snapshots.py
def ancestors_of(current_snapshot: Optional[Snapshot], table_metadata: TableMetadata) -> Iterable[Snapshot]:
    """Get the ancestors of and including the given snapshot."""
    snapshot = current_snapshot
    while snapshot is not None:
        yield snapshot
        if snapshot.parent_snapshot_id is None:
            break
        snapshot = table_metadata.snapshot_by_id(snapshot.parent_snapshot_id)