Skip to content

scan_planning

CountMap

Bases: KeyValueMap[int]

Map of field IDs to counts.

Source code in pyiceberg/catalog/rest/scan_planning.py
class CountMap(KeyValueMap[int]):
    """Map of field IDs to counts."""

FetchScanTasksRequest

Bases: IcebergBaseModel

Request body for fetching scan tasks endpoint.

Source code in pyiceberg/catalog/rest/scan_planning.py
class FetchScanTasksRequest(IcebergBaseModel):
    """Request body for fetching scan tasks endpoint."""

    plan_task: str = Field(alias="plan-task")

KeyValueMap

Bases: IcebergBaseModel, Generic[V]

Map serialized as parallel key/value arrays for column statistics.

Source code in pyiceberg/catalog/rest/scan_planning.py
class KeyValueMap(IcebergBaseModel, Generic[V]):
    """Map serialized as parallel key/value arrays for column statistics."""

    keys: list[int] = Field(default_factory=list)
    values: list[V] = Field(default_factory=list)

    @model_validator(mode="after")
    def _validate_lengths_match(self) -> KeyValueMap[V]:
        if len(self.keys) != len(self.values):
            raise ValueError(f"keys and values must have same length: {len(self.keys)} != {len(self.values)}")
        return self

    def to_dict(self) -> dict[int, V]:
        """Convert to dictionary mapping field ID to value."""
        return dict(zip(self.keys, self.values, strict=True))

to_dict()

Convert to dictionary mapping field ID to value.

Source code in pyiceberg/catalog/rest/scan_planning.py
def to_dict(self) -> dict[int, V]:
    """Convert to dictionary mapping field ID to value."""
    return dict(zip(self.keys, self.values, strict=True))

PlanCancelled

Bases: IcebergBaseModel

Planning was cancelled.

Source code in pyiceberg/catalog/rest/scan_planning.py
class PlanCancelled(IcebergBaseModel):
    """Planning was cancelled."""

    status: Literal["cancelled"] = "cancelled"

PlanCompleted

Bases: ScanTasks

Completed scan plan result.

Source code in pyiceberg/catalog/rest/scan_planning.py
class PlanCompleted(ScanTasks):
    """Completed scan plan result."""

    status: Literal["completed"] = "completed"
    plan_id: str | None = Field(alias="plan-id", default=None)
    storage_credentials: list[StorageCredential] | None = Field(alias="storage-credentials", default=None)

PlanFailed

Bases: IcebergBaseModel

Planning failed with error.

Source code in pyiceberg/catalog/rest/scan_planning.py
class PlanFailed(IcebergBaseModel):
    """Planning failed with error."""

    status: Literal["failed"] = "failed"
    error: ErrorResponseMessage

PlanSubmitted

Bases: IcebergBaseModel

Scan plan submitted, poll for completion.

Source code in pyiceberg/catalog/rest/scan_planning.py
class PlanSubmitted(IcebergBaseModel):
    """Scan plan submitted, poll for completion."""

    status: Literal["submitted"] = "submitted"
    plan_id: str | None = Field(alias="plan-id", default=None)

PlanTableScanRequest

Bases: IcebergBaseModel

Request body for planning a REST scan.

Source code in pyiceberg/catalog/rest/scan_planning.py
class PlanTableScanRequest(IcebergBaseModel):
    """Request body for planning a REST scan."""

    snapshot_id: int | None = Field(alias="snapshot-id", default=None)
    select: list[str] | None = Field(default=None)
    filter: SerializableBooleanExpression | None = Field(default=None)
    case_sensitive: bool = Field(alias="case-sensitive", default=True)
    use_snapshot_schema: bool = Field(alias="use-snapshot-schema", default=False)
    start_snapshot_id: int | None = Field(alias="start-snapshot-id", default=None)
    end_snapshot_id: int | None = Field(alias="end-snapshot-id", default=None)
    stats_fields: list[str] | None = Field(alias="stats-fields", default=None)
    min_rows_requested: int | None = Field(alias="min-rows-requested", default=None)

    @model_validator(mode="after")
    def _validate_snapshot_fields(self) -> PlanTableScanRequest:
        if self.start_snapshot_id is not None and self.end_snapshot_id is None:
            raise ValueError("end-snapshot-id is required when start-snapshot-id is specified")
        if self.snapshot_id is not None and self.start_snapshot_id is not None:
            raise ValueError("Cannot specify both snapshot-id and start-snapshot-id")
        return self

RESTContentFile

Bases: IcebergBaseModel

Base model for data and delete files from REST API.

Source code in pyiceberg/catalog/rest/scan_planning.py
class RESTContentFile(IcebergBaseModel):
    """Base model for data and delete files from REST API."""

    spec_id: int = Field(alias="spec-id")
    partition: list[PrimitiveTypeValue] = Field(default_factory=list)
    content: Literal["data", "position-deletes", "equality-deletes"]
    file_path: str = Field(alias="file-path")
    file_format: FileFormat = Field(alias="file-format")
    file_size_in_bytes: int = Field(alias="file-size-in-bytes")
    record_count: int = Field(alias="record-count")
    key_metadata: str | None = Field(alias="key-metadata", default=None)
    split_offsets: list[int] | None = Field(alias="split-offsets", default=None)
    sort_order_id: int | None = Field(alias="sort-order-id", default=None)

RESTDataFile

Bases: RESTContentFile

Data file from REST API.

Source code in pyiceberg/catalog/rest/scan_planning.py
class RESTDataFile(RESTContentFile):
    """Data file from REST API."""

    content: Literal["data"] = Field(default="data")
    first_row_id: int | None = Field(alias="first-row-id", default=None)
    column_sizes: CountMap | None = Field(alias="column-sizes", default=None)
    value_counts: CountMap | None = Field(alias="value-counts", default=None)
    null_value_counts: CountMap | None = Field(alias="null-value-counts", default=None)
    nan_value_counts: CountMap | None = Field(alias="nan-value-counts", default=None)
    lower_bounds: ValueMap | None = Field(alias="lower-bounds", default=None)
    upper_bounds: ValueMap | None = Field(alias="upper-bounds", default=None)

RESTEqualityDeleteFile

Bases: RESTContentFile

Equality delete file from REST API.

Source code in pyiceberg/catalog/rest/scan_planning.py
class RESTEqualityDeleteFile(RESTContentFile):
    """Equality delete file from REST API."""

    content: Literal["equality-deletes"] = Field(default="equality-deletes")
    equality_ids: list[int] | None = Field(alias="equality-ids", default=None)

RESTFileScanTask

Bases: IcebergBaseModel

A file scan task from the REST server.

Source code in pyiceberg/catalog/rest/scan_planning.py
class RESTFileScanTask(IcebergBaseModel):
    """A file scan task from the REST server."""

    data_file: RESTDataFile = Field(alias="data-file")
    delete_file_references: list[int] | None = Field(alias="delete-file-references", default=None)
    residual_filter: BooleanExpression | None = Field(alias="residual-filter", default=None)

RESTPositionDeleteFile

Bases: RESTContentFile

Position delete file from REST API.

Source code in pyiceberg/catalog/rest/scan_planning.py
class RESTPositionDeleteFile(RESTContentFile):
    """Position delete file from REST API."""

    content: Literal["position-deletes"] = Field(default="position-deletes")
    referenced_data_file: str | None = Field(alias="referenced-data-file", default=None)
    content_offset: int | None = Field(alias="content-offset", default=None)
    content_size_in_bytes: int | None = Field(alias="content-size-in-bytes", default=None)

ScanTasks

Bases: IcebergBaseModel

Container for scan tasks returned by the server.

Source code in pyiceberg/catalog/rest/scan_planning.py
class ScanTasks(IcebergBaseModel):
    """Container for scan tasks returned by the server."""

    delete_files: list[RESTDeleteFile] = Field(alias="delete-files", default_factory=list)
    file_scan_tasks: list[RESTFileScanTask] = Field(alias="file-scan-tasks", default_factory=list)
    plan_tasks: list[str] = Field(alias="plan-tasks", default_factory=list)

    @model_validator(mode="after")
    def _validate_delete_file_references(self) -> ScanTasks:
        # validate delete file references are in bounds
        max_idx = len(self.delete_files) - 1
        for task in self.file_scan_tasks:
            for idx in task.delete_file_references or []:
                if idx < 0 or idx > max_idx:
                    raise ValueError(f"Invalid delete file reference: {idx} (valid range: 0-{max_idx})")

        if self.delete_files and not self.file_scan_tasks:
            raise ValueError("Invalid response: deleteFiles should only be returned with fileScanTasks that reference them")

        return self

StorageCredential

Bases: IcebergBaseModel

Storage credential for accessing content files.

Source code in pyiceberg/catalog/rest/scan_planning.py
class StorageCredential(IcebergBaseModel):
    """Storage credential for accessing content files."""

    prefix: str = Field(description="Storage location prefix this credential applies to")
    config: dict[str, str] = Field(default_factory=dict)

ValueMap

Bases: KeyValueMap[PrimitiveTypeValue]

Map of field IDs to primitive values (for lower/upper bounds).

Source code in pyiceberg/catalog/rest/scan_planning.py
class ValueMap(KeyValueMap[PrimitiveTypeValue]):
    """Map of field IDs to primitive values (for lower/upper bounds)."""