schema

`UpdateSchema` ¶

Bases: UpdateTableMetadata['UpdateSchema']

Source code in pyiceberg/table/update/schema.py

class UpdateSchema(UpdateTableMetadata["UpdateSchema"]):
    _schema: Schema
    _last_column_id: itertools.count[int]
    _identifier_field_names: set[str]

    _adds: dict[int, list[NestedField]] = {}
    _updates: dict[int, NestedField] = {}
    _deletes: set[int] = set()
    _moves: dict[int, list[_Move]] = {}

    _added_name_to_id: dict[str, int] = {}
    # Part of https://github.com/apache/iceberg/pull/8393
    _id_to_parent: dict[int, str] = {}
    _allow_incompatible_changes: bool
    _case_sensitive: bool

    def __init__(
        self,
        transaction: Transaction,
        allow_incompatible_changes: bool = False,
        case_sensitive: bool = True,
        schema: Schema | None = None,
        name_mapping: NameMapping | None = None,
    ) -> None:
        super().__init__(transaction)

        if isinstance(schema, Schema):
            self._schema = schema
            self._last_column_id = itertools.count(1 + schema.highest_field_id)
        else:
            self._schema = self._transaction.table_metadata.schema()
            self._last_column_id = itertools.count(1 + self._transaction.table_metadata.last_column_id)

        self._name_mapping = name_mapping
        self._identifier_field_names = self._schema.identifier_field_names()

        self._adds = {}
        self._updates = {}
        self._deletes = set()
        self._moves = {}

        self._added_name_to_id = {}

        def get_column_name(field_id: int) -> str:
            column_name = self._schema.find_column_name(column_id=field_id)
            if column_name is None:
                raise ValueError(f"Could not find field-id: {field_id}")
            return column_name

        self._id_to_parent = {
            field_id: get_column_name(parent_field_id) for field_id, parent_field_id in self._schema._lazy_id_to_parent.items()
        }

        self._allow_incompatible_changes = allow_incompatible_changes
        self._case_sensitive = case_sensitive
        self._transaction = transaction

    def case_sensitive(self, case_sensitive: bool) -> UpdateSchema:
        """Determine if the case of schema needs to be considered when comparing column names.

        Args:
            case_sensitive: When false case is not considered in column name comparisons.

        Returns:
            This for method chaining
        """
        self._case_sensitive = case_sensitive
        return self

    def union_by_name(
        # TODO: Move TableProperties.DEFAULT_FORMAT_VERSION to separate file and set that as format_version default.
        self,
        new_schema: Schema | pa.Schema,
        format_version: TableVersion = 2,
    ) -> UpdateSchema:
        from pyiceberg.catalog import Catalog

        visit_with_partner(
            Catalog._convert_schema_if_needed(new_schema, format_version=format_version),
            -1,
            _UnionByNameVisitor(update_schema=self, existing_schema=self._schema, case_sensitive=self._case_sensitive),
            # type: ignore
            PartnerIdByNameAccessor(partner_schema=self._schema, case_sensitive=self._case_sensitive),
        )
        return self

    def add_column(
        self,
        path: str | tuple[str, ...],
        field_type: IcebergType,
        doc: str | None = None,
        required: bool = False,
        default_value: L | None = None,
    ) -> UpdateSchema:
        """Add a new column to a nested struct or Add a new top-level column.

        Because "." may be interpreted as a column path separator or may be used in field names, it
        is not allowed to add nested column by passing in a string. To add to nested structures or
        to add fields with names that contain "." use a tuple instead to indicate the path.

        If type is a nested type, its field IDs are reassigned when added to the existing schema.

        Args:
            path: Name for the new column.
            field_type: Type for the new column.
            doc: Documentation string for the new column.
            required: Whether the new column is required.
            default_value: Default value for the new column.

        Returns:
            This for method chaining.
        """
        if isinstance(path, str):
            if "." in path:
                raise ValueError(f"Cannot add column with ambiguous name: {path}, provide a tuple instead")
            path = (path,)

        name = path[-1]
        parent = path[:-1]

        full_name = ".".join(path)
        parent_full_path = ".".join(parent)
        parent_id: int = TABLE_ROOT_ID

        if len(parent) > 0:
            parent_field = self._schema.find_field(parent_full_path, self._case_sensitive)
            parent_type = parent_field.field_type
            if isinstance(parent_type, MapType):
                parent_field = parent_type.value_field
            elif isinstance(parent_type, ListType):
                parent_field = parent_type.element_field

            if not parent_field.field_type.is_struct:
                raise ValueError(f"Cannot add column '{name}' to non-struct type: {parent_full_path}")

            parent_id = parent_field.field_id

        existing_field = None
        try:
            existing_field = self._schema.find_field(full_name, self._case_sensitive)
        except ValueError:
            pass

        if existing_field is not None and existing_field.field_id not in self._deletes:
            raise ValueError(f"Cannot add column, name already exists: {full_name}")

        # assign new IDs in order
        new_id = self.assign_new_column_id()
        new_type = assign_fresh_schema_ids(field_type, self.assign_new_column_id)

        if default_value is not None:
            try:
                # To make sure that the value is valid for the type
                initial_default = literal(default_value).to(new_type).value
            except ValueError as e:
                raise ValueError(f"Invalid default value: {e}") from e
        else:
            initial_default = default_value  # type: ignore

        if (required and initial_default is None) and not self._allow_incompatible_changes:
            # Table format version 1 and 2 cannot add required column because there is no initial value
            raise ValueError(f"Incompatible change: cannot add required column: {'.'.join(path)}")

        # update tracking for moves
        self._added_name_to_id[full_name] = new_id
        self._id_to_parent[new_id] = parent_full_path

        field = NestedField(
            field_id=new_id,
            name=name,
            field_type=new_type,
            required=required,
            doc=doc,
            initial_default=initial_default,
            write_default=initial_default,
        )

        if parent_id in self._adds:
            self._adds[parent_id].append(field)
        else:
            self._adds[parent_id] = [field]

        return self

    def delete_column(self, path: str | tuple[str, ...]) -> UpdateSchema:
        """Delete a column from a table.

        Args:
            path: The path to the column.

        Returns:
            The UpdateSchema with the delete operation staged.
        """
        name = (path,) if isinstance(path, str) else path
        full_name = ".".join(name)

        field = self._schema.find_field(full_name, case_sensitive=self._case_sensitive)

        if field.field_id in self._adds:
            raise ValueError(f"Cannot delete a column that has additions: {full_name}")
        if field.field_id in self._updates:
            raise ValueError(f"Cannot delete a column that has updates: {full_name}")

        self._deletes.add(field.field_id)

        return self

    def set_default_value(self, path: str | tuple[str, ...], default_value: L | None) -> UpdateSchema:
        """Set the default value of a column.

        Args:
            path: The path to the column.

        Returns:
            The UpdateSchema with the delete operation staged.
        """
        self._set_column_default_value(path, default_value)

        return self

    def rename_column(self, path_from: str | tuple[str, ...], new_name: str) -> UpdateSchema:
        """Update the name of a column.

        Args:
            path_from: The path to the column to be renamed.
            new_name: The new path of the column.

        Returns:
            The UpdateSchema with the rename operation staged.
        """
        path_from = ".".join(path_from) if isinstance(path_from, tuple) else path_from
        field_from = self._schema.find_field(path_from, self._case_sensitive)

        if field_from.field_id in self._deletes:
            raise ValueError(f"Cannot rename a column that will be deleted: {path_from}")

        if updated := self._updates.get(field_from.field_id):
            self._updates[field_from.field_id] = NestedField(
                field_id=updated.field_id,
                name=new_name,
                field_type=updated.field_type,
                doc=updated.doc,
                required=updated.required,
                initial_default=updated.initial_default,
                write_default=updated.write_default,
            )
        else:
            self._updates[field_from.field_id] = NestedField(
                field_id=field_from.field_id,
                name=new_name,
                field_type=field_from.field_type,
                doc=field_from.doc,
                required=field_from.required,
                initial_default=field_from.initial_default,
                write_default=field_from.write_default,
            )

        # Lookup the field because of casing
        from_field_correct_casing = self._schema.find_column_name(field_from.field_id)
        if from_field_correct_casing in self._identifier_field_names:
            self._identifier_field_names.remove(from_field_correct_casing)
            new_identifier_path = f"{from_field_correct_casing[: -len(field_from.name)]}{new_name}"
            self._identifier_field_names.add(new_identifier_path)

        return self

    def make_column_optional(self, path: str | tuple[str, ...]) -> UpdateSchema:
        """Make a column optional.

        Args:
            path: The path to the field.

        Returns:
            The UpdateSchema with the requirement change staged.
        """
        self._set_column_requirement(path, required=False)
        return self

    def set_identifier_fields(self, *fields: str) -> None:
        self._identifier_field_names = set(fields)

    def _set_column_requirement(self, path: str | tuple[str, ...], required: bool) -> None:
        path = (path,) if isinstance(path, str) else path
        name = ".".join(path)

        field = self._schema.find_field(name, self._case_sensitive)

        if (field.required and required) or (field.optional and not required):
            # if the change is a noop, allow it even if allowIncompatibleChanges is false
            return

        if not self._allow_incompatible_changes and required:
            raise ValueError(f"Cannot change column nullability: {name}: optional -> required")

        if field.field_id in self._deletes:
            raise ValueError(f"Cannot update a column that will be deleted: {name}")

        if updated := self._updates.get(field.field_id):
            self._updates[field.field_id] = NestedField(
                field_id=updated.field_id,
                name=updated.name,
                field_type=updated.field_type,
                doc=updated.doc,
                required=required,
                initial_default=updated.initial_default,
                write_default=updated.write_default,
            )
        else:
            self._updates[field.field_id] = NestedField(
                field_id=field.field_id,
                name=field.name,
                field_type=field.field_type,
                doc=field.doc,
                required=required,
                initial_default=field.initial_default,
                write_default=field.write_default,
            )

    def _set_column_default_value(self, path: str | tuple[str, ...], default_value: Any) -> None:
        path = (path,) if isinstance(path, str) else path
        name = ".".join(path)

        field = self._schema.find_field(name, self._case_sensitive)

        if default_value is not None:
            try:
                # To make sure that the value is valid for the type
                default_value = literal(default_value).to(field.field_type).value
            except ValueError as e:
                raise ValueError(f"Invalid default value: {e}") from e

        if field.required and default_value == field.write_default:
            # if the change is a noop, allow it even if allowIncompatibleChanges is false
            return

        if not self._allow_incompatible_changes and field.required and default_value is None:
            raise ValueError("Cannot change change default-value of a required column to None")

        if field.field_id in self._deletes:
            raise ValueError(f"Cannot update a column that will be deleted: {name}")

        if updated := self._updates.get(field.field_id):
            self._updates[field.field_id] = NestedField(
                field_id=updated.field_id,
                name=updated.name,
                field_type=updated.field_type,
                doc=updated.doc,
                required=updated.required,
                initial_default=updated.initial_default,
                write_default=default_value,
            )
        else:
            self._updates[field.field_id] = NestedField(
                field_id=field.field_id,
                name=field.name,
                field_type=field.field_type,
                doc=field.doc,
                required=field.required,
                initial_default=field.initial_default,
                write_default=default_value,
            )

    def update_column(
        self,
        path: str | tuple[str, ...],
        field_type: IcebergType | None = None,
        required: bool | None = None,
        doc: str | None = None,
    ) -> UpdateSchema:
        """Update the type of column.

        Args:
            path: The path to the field.
            field_type: The new type
            required: If the field should be required
            doc: Documentation describing the column

        Returns:
            The UpdateSchema with the type update staged.
        """
        path = (path,) if isinstance(path, str) else path
        full_name = ".".join(path)

        if field_type is None and required is None and doc is None:
            return self

        field = self._schema.find_field(full_name, self._case_sensitive)

        if field.field_id in self._deletes:
            raise ValueError(f"Cannot update a column that will be deleted: {full_name}")

        if field_type is not None:
            if not field.field_type.is_primitive:
                raise ValidationError(f"Cannot change column type: {field.field_type} is not a primitive")

            if not self._allow_incompatible_changes and field.field_type != field_type:
                try:
                    promote(field.field_type, field_type)
                except ResolveError as e:
                    raise ValidationError(f"Cannot change column type: {full_name}: {field.field_type} -> {field_type}") from e

        # if other updates for the same field exist in one transaction:
        if updated := self._updates.get(field.field_id):
            self._updates[field.field_id] = NestedField(
                field_id=updated.field_id,
                name=updated.name,
                field_type=field_type or updated.field_type,
                doc=doc if doc is not None else updated.doc,
                required=updated.required,
                initial_default=updated.initial_default,
                write_default=updated.write_default,
            )
        else:
            self._updates[field.field_id] = NestedField(
                field_id=field.field_id,
                name=field.name,
                field_type=field_type or field.field_type,
                doc=doc if doc is not None else field.doc,
                required=field.required,
                initial_default=field.initial_default,
                write_default=field.write_default,
            )

        if required is not None:
            self._set_column_requirement(path, required=required)

        return self

    def _find_for_move(self, name: str) -> int | None:
        try:
            return self._schema.find_field(name, self._case_sensitive).field_id
        except ValueError:
            pass

        return self._added_name_to_id.get(name)

    def _move(self, move: _Move) -> None:
        if parent_name := self._id_to_parent.get(move.field_id):
            parent_field = self._schema.find_field(parent_name, case_sensitive=self._case_sensitive)
            if not parent_field.field_type.is_struct:
                raise ValueError(f"Cannot move fields in non-struct type: {parent_field.field_type}")

            if move.op == _MoveOperation.After or move.op == _MoveOperation.Before:
                if move.other_field_id is None:
                    raise ValueError("Expected other field when performing before/after move")

                if self._id_to_parent.get(move.field_id) != self._id_to_parent.get(move.other_field_id):
                    raise ValueError(f"Cannot move field {move.full_name} to a different struct")

            self._moves[parent_field.field_id] = self._moves.get(parent_field.field_id, []) + [move]
        else:
            # In the top level field
            if move.op == _MoveOperation.After or move.op == _MoveOperation.Before:
                if move.other_field_id is None:
                    raise ValueError("Expected other field when performing before/after move")

                if other_struct := self._id_to_parent.get(move.other_field_id):
                    raise ValueError(f"Cannot move field {move.full_name} to a different struct: {other_struct}")

            self._moves[TABLE_ROOT_ID] = self._moves.get(TABLE_ROOT_ID, []) + [move]

    def move_first(self, path: str | tuple[str, ...]) -> UpdateSchema:
        """Move the field to the first position of the parent struct.

        Args:
            path: The path to the field.

        Returns:
            The UpdateSchema with the move operation staged.
        """
        full_name = ".".join(path) if isinstance(path, tuple) else path

        field_id = self._find_for_move(full_name)

        if field_id is None:
            raise ValueError(f"Cannot move missing column: {full_name}")

        self._move(_Move(field_id=field_id, full_name=full_name, op=_MoveOperation.First))

        return self

    def move_before(self, path: str | tuple[str, ...], before_path: str | tuple[str, ...]) -> UpdateSchema:
        """Move the field to before another field.

        Args:
            path: The path to the field.

        Returns:
            The UpdateSchema with the move operation staged.
        """
        full_name = ".".join(path) if isinstance(path, tuple) else path
        field_id = self._find_for_move(full_name)

        if field_id is None:
            raise ValueError(f"Cannot move missing column: {full_name}")

        before_full_name = (
            ".".join(
                before_path,
            )
            if isinstance(before_path, tuple)
            else before_path
        )
        before_field_id = self._find_for_move(before_full_name)

        if before_field_id is None:
            raise ValueError(f"Cannot move {full_name} before missing column: {before_full_name}")

        if field_id == before_field_id:
            raise ValueError(f"Cannot move {full_name} before itself")

        self._move(_Move(field_id=field_id, full_name=full_name, other_field_id=before_field_id, op=_MoveOperation.Before))

        return self

    def move_after(self, path: str | tuple[str, ...], after_name: str | tuple[str, ...]) -> UpdateSchema:
        """Move the field to after another field.

        Args:
            path: The path to the field.

        Returns:
            The UpdateSchema with the move operation staged.
        """
        full_name = ".".join(path) if isinstance(path, tuple) else path

        field_id = self._find_for_move(full_name)

        if field_id is None:
            raise ValueError(f"Cannot move missing column: {full_name}")

        after_path = ".".join(after_name) if isinstance(after_name, tuple) else after_name
        after_field_id = self._find_for_move(after_path)

        if after_field_id is None:
            raise ValueError(f"Cannot move {full_name} after missing column: {after_path}")

        if field_id == after_field_id:
            raise ValueError(f"Cannot move {full_name} after itself")

        self._move(_Move(field_id=field_id, full_name=full_name, other_field_id=after_field_id, op=_MoveOperation.After))

        return self

    def _commit(self) -> UpdatesAndRequirements:
        """Apply the pending changes and commit."""
        from pyiceberg.table import TableProperties

        new_schema = self._apply()

        existing_schema_id = next(
            (schema.schema_id for schema in self._transaction.table_metadata.schemas if schema == new_schema), None
        )

        requirements: tuple[TableRequirement, ...] = ()
        updates: tuple[TableUpdate, ...] = ()

        # Check if it is different current schema ID
        if existing_schema_id != self._schema.schema_id:
            requirements += (AssertCurrentSchemaId(current_schema_id=self._schema.schema_id),)
            if existing_schema_id is None:
                updates += (
                    AddSchemaUpdate(schema=new_schema),
                    SetCurrentSchemaUpdate(schema_id=-1),
                )
            else:
                updates += (SetCurrentSchemaUpdate(schema_id=existing_schema_id),)

            if name_mapping := self._name_mapping:
                updated_name_mapping = update_mapping(name_mapping, self._updates, self._adds)
                updates += (
                    SetPropertiesUpdate(updates={TableProperties.DEFAULT_NAME_MAPPING: updated_name_mapping.model_dump_json()}),
                )

        return updates, requirements

    def _apply(self) -> Schema:
        """Apply the pending changes to the original schema and returns the result.

        Returns:
            the result Schema when all pending updates are applied
        """
        struct = visit(self._schema, _ApplyChanges(self._adds, self._updates, self._deletes, self._moves))
        if struct is None:
            # Should never happen
            raise ValueError("Could not apply changes")

        # Check the field-ids
        new_schema = Schema(*struct.fields)
        from pyiceberg.partitioning import validate_partition_name

        for spec in self._transaction.table_metadata.partition_specs:
            for partition_field in spec.fields:
                validate_partition_name(
                    partition_field.name, partition_field.transform, partition_field.source_id, new_schema, set()
                )
        field_ids = set()
        for name in self._identifier_field_names:
            try:
                field = new_schema.find_field(name, case_sensitive=self._case_sensitive)
            except ValueError as e:
                raise ValueError(
                    f"Cannot find identifier field {name}. In case of deletion, update the identifier fields first."
                ) from e

            field_ids.add(field.field_id)

        if txn := self._transaction:
            next_schema_id = 1 + (
                max(schema.schema_id for schema in txn.table_metadata.schemas) if txn.table_metadata is not None else 0
            )
        else:
            next_schema_id = 0

        return Schema(*struct.fields, schema_id=next_schema_id, identifier_field_ids=field_ids)

    def assign_new_column_id(self) -> int:
        return next(self._last_column_id)

`add_column(path, field_type, doc=None, required=False, default_value=None)` ¶

Add a new column to a nested struct or Add a new top-level column.

Because "." may be interpreted as a column path separator or may be used in field names, it is not allowed to add nested column by passing in a string. To add to nested structures or to add fields with names that contain "." use a tuple instead to indicate the path.

If type is a nested type, its field IDs are reassigned when added to the existing schema.

Parameters:

Name	Type	Description	Default
`path`	`str \| tuple[str, ...]`	Name for the new column.	required
`field_type`	`IcebergType`	Type for the new column.	required
`doc`	`str \| None`	Documentation string for the new column.	`None`
`required`	`bool`	Whether the new column is required.	`False`
`default_value`	`L \| None`	Default value for the new column.	`None`

Returns:

Type	Description
`UpdateSchema`	This for method chaining.

Source code in pyiceberg/table/update/schema.py

def add_column(
    self,
    path: str | tuple[str, ...],
    field_type: IcebergType,
    doc: str | None = None,
    required: bool = False,
    default_value: L | None = None,
) -> UpdateSchema:
    """Add a new column to a nested struct or Add a new top-level column.

    Because "." may be interpreted as a column path separator or may be used in field names, it
    is not allowed to add nested column by passing in a string. To add to nested structures or
    to add fields with names that contain "." use a tuple instead to indicate the path.

    If type is a nested type, its field IDs are reassigned when added to the existing schema.

    Args:
        path: Name for the new column.
        field_type: Type for the new column.
        doc: Documentation string for the new column.
        required: Whether the new column is required.
        default_value: Default value for the new column.

    Returns:
        This for method chaining.
    """
    if isinstance(path, str):
        if "." in path:
            raise ValueError(f"Cannot add column with ambiguous name: {path}, provide a tuple instead")
        path = (path,)

    name = path[-1]
    parent = path[:-1]

    full_name = ".".join(path)
    parent_full_path = ".".join(parent)
    parent_id: int = TABLE_ROOT_ID

    if len(parent) > 0:
        parent_field = self._schema.find_field(parent_full_path, self._case_sensitive)
        parent_type = parent_field.field_type
        if isinstance(parent_type, MapType):
            parent_field = parent_type.value_field
        elif isinstance(parent_type, ListType):
            parent_field = parent_type.element_field

        if not parent_field.field_type.is_struct:
            raise ValueError(f"Cannot add column '{name}' to non-struct type: {parent_full_path}")

        parent_id = parent_field.field_id

    existing_field = None
    try:
        existing_field = self._schema.find_field(full_name, self._case_sensitive)
    except ValueError:
        pass

    if existing_field is not None and existing_field.field_id not in self._deletes:
        raise ValueError(f"Cannot add column, name already exists: {full_name}")

    # assign new IDs in order
    new_id = self.assign_new_column_id()
    new_type = assign_fresh_schema_ids(field_type, self.assign_new_column_id)

    if default_value is not None:
        try:
            # To make sure that the value is valid for the type
            initial_default = literal(default_value).to(new_type).value
        except ValueError as e:
            raise ValueError(f"Invalid default value: {e}") from e
    else:
        initial_default = default_value  # type: ignore

    if (required and initial_default is None) and not self._allow_incompatible_changes:
        # Table format version 1 and 2 cannot add required column because there is no initial value
        raise ValueError(f"Incompatible change: cannot add required column: {'.'.join(path)}")

    # update tracking for moves
    self._added_name_to_id[full_name] = new_id
    self._id_to_parent[new_id] = parent_full_path

    field = NestedField(
        field_id=new_id,
        name=name,
        field_type=new_type,
        required=required,
        doc=doc,
        initial_default=initial_default,
        write_default=initial_default,
    )

    if parent_id in self._adds:
        self._adds[parent_id].append(field)
    else:
        self._adds[parent_id] = [field]

    return self

`case_sensitive(case_sensitive)` ¶

Determine if the case of schema needs to be considered when comparing column names.

Parameters:

Name	Type	Description	Default
`case_sensitive`	`bool`	When false case is not considered in column name comparisons.	required

Returns:

Type	Description
`UpdateSchema`	This for method chaining

Source code in pyiceberg/table/update/schema.py

def case_sensitive(self, case_sensitive: bool) -> UpdateSchema:
    """Determine if the case of schema needs to be considered when comparing column names.

    Args:
        case_sensitive: When false case is not considered in column name comparisons.

    Returns:
        This for method chaining
    """
    self._case_sensitive = case_sensitive
    return self

`delete_column(path)` ¶

Delete a column from a table.

Parameters:

Name	Type	Description	Default
`path`	`str \| tuple[str, ...]`	The path to the column.	required

Returns:

Type	Description
`UpdateSchema`	The UpdateSchema with the delete operation staged.

Source code in pyiceberg/table/update/schema.py

def delete_column(self, path: str | tuple[str, ...]) -> UpdateSchema:
    """Delete a column from a table.

    Args:
        path: The path to the column.

    Returns:
        The UpdateSchema with the delete operation staged.
    """
    name = (path,) if isinstance(path, str) else path
    full_name = ".".join(name)

    field = self._schema.find_field(full_name, case_sensitive=self._case_sensitive)

    if field.field_id in self._adds:
        raise ValueError(f"Cannot delete a column that has additions: {full_name}")
    if field.field_id in self._updates:
        raise ValueError(f"Cannot delete a column that has updates: {full_name}")

    self._deletes.add(field.field_id)

    return self

`make_column_optional(path)` ¶

Make a column optional.

Parameters:

Name	Type	Description	Default
`path`	`str \| tuple[str, ...]`	The path to the field.	required

Returns:

Type	Description
`UpdateSchema`	The UpdateSchema with the requirement change staged.

Source code in pyiceberg/table/update/schema.py

def make_column_optional(self, path: str | tuple[str, ...]) -> UpdateSchema:
    """Make a column optional.

    Args:
        path: The path to the field.

    Returns:
        The UpdateSchema with the requirement change staged.
    """
    self._set_column_requirement(path, required=False)
    return self

`move_after(path, after_name)` ¶

Move the field to after another field.

Parameters:

Name	Type	Description	Default
`path`	`str \| tuple[str, ...]`	The path to the field.	required

Returns:

Type	Description
`UpdateSchema`	The UpdateSchema with the move operation staged.

Source code in pyiceberg/table/update/schema.py

def move_after(self, path: str | tuple[str, ...], after_name: str | tuple[str, ...]) -> UpdateSchema:
    """Move the field to after another field.

    Args:
        path: The path to the field.

    Returns:
        The UpdateSchema with the move operation staged.
    """
    full_name = ".".join(path) if isinstance(path, tuple) else path

    field_id = self._find_for_move(full_name)

    if field_id is None:
        raise ValueError(f"Cannot move missing column: {full_name}")

    after_path = ".".join(after_name) if isinstance(after_name, tuple) else after_name
    after_field_id = self._find_for_move(after_path)

    if after_field_id is None:
        raise ValueError(f"Cannot move {full_name} after missing column: {after_path}")

    if field_id == after_field_id:
        raise ValueError(f"Cannot move {full_name} after itself")

    self._move(_Move(field_id=field_id, full_name=full_name, other_field_id=after_field_id, op=_MoveOperation.After))

    return self

`move_before(path, before_path)` ¶

Move the field to before another field.

Parameters:

Name	Type	Description	Default
`path`	`str \| tuple[str, ...]`	The path to the field.	required

Returns:

Type	Description
`UpdateSchema`	The UpdateSchema with the move operation staged.

Source code in pyiceberg/table/update/schema.py

def move_before(self, path: str | tuple[str, ...], before_path: str | tuple[str, ...]) -> UpdateSchema:
    """Move the field to before another field.

    Args:
        path: The path to the field.

    Returns:
        The UpdateSchema with the move operation staged.
    """
    full_name = ".".join(path) if isinstance(path, tuple) else path
    field_id = self._find_for_move(full_name)

    if field_id is None:
        raise ValueError(f"Cannot move missing column: {full_name}")

    before_full_name = (
        ".".join(
            before_path,
        )
        if isinstance(before_path, tuple)
        else before_path
    )
    before_field_id = self._find_for_move(before_full_name)

    if before_field_id is None:
        raise ValueError(f"Cannot move {full_name} before missing column: {before_full_name}")

    if field_id == before_field_id:
        raise ValueError(f"Cannot move {full_name} before itself")

    self._move(_Move(field_id=field_id, full_name=full_name, other_field_id=before_field_id, op=_MoveOperation.Before))

    return self

`move_first(path)` ¶

Move the field to the first position of the parent struct.

Parameters:

Name	Type	Description	Default
`path`	`str \| tuple[str, ...]`	The path to the field.	required

Returns:

Type	Description
`UpdateSchema`	The UpdateSchema with the move operation staged.

Source code in pyiceberg/table/update/schema.py

def move_first(self, path: str | tuple[str, ...]) -> UpdateSchema:
    """Move the field to the first position of the parent struct.

    Args:
        path: The path to the field.

    Returns:
        The UpdateSchema with the move operation staged.
    """
    full_name = ".".join(path) if isinstance(path, tuple) else path

    field_id = self._find_for_move(full_name)

    if field_id is None:
        raise ValueError(f"Cannot move missing column: {full_name}")

    self._move(_Move(field_id=field_id, full_name=full_name, op=_MoveOperation.First))

    return self

`rename_column(path_from, new_name)` ¶

Update the name of a column.

Parameters:

Name	Type	Description	Default
`path_from`	`str \| tuple[str, ...]`	The path to the column to be renamed.	required
`new_name`	`str`	The new path of the column.	required

Returns:

Type	Description
`UpdateSchema`	The UpdateSchema with the rename operation staged.

Source code in pyiceberg/table/update/schema.py

def rename_column(self, path_from: str | tuple[str, ...], new_name: str) -> UpdateSchema:
    """Update the name of a column.

    Args:
        path_from: The path to the column to be renamed.
        new_name: The new path of the column.

    Returns:
        The UpdateSchema with the rename operation staged.
    """
    path_from = ".".join(path_from) if isinstance(path_from, tuple) else path_from
    field_from = self._schema.find_field(path_from, self._case_sensitive)

    if field_from.field_id in self._deletes:
        raise ValueError(f"Cannot rename a column that will be deleted: {path_from}")

    if updated := self._updates.get(field_from.field_id):
        self._updates[field_from.field_id] = NestedField(
            field_id=updated.field_id,
            name=new_name,
            field_type=updated.field_type,
            doc=updated.doc,
            required=updated.required,
            initial_default=updated.initial_default,
            write_default=updated.write_default,
        )
    else:
        self._updates[field_from.field_id] = NestedField(
            field_id=field_from.field_id,
            name=new_name,
            field_type=field_from.field_type,
            doc=field_from.doc,
            required=field_from.required,
            initial_default=field_from.initial_default,
            write_default=field_from.write_default,
        )

    # Lookup the field because of casing
    from_field_correct_casing = self._schema.find_column_name(field_from.field_id)
    if from_field_correct_casing in self._identifier_field_names:
        self._identifier_field_names.remove(from_field_correct_casing)
        new_identifier_path = f"{from_field_correct_casing[: -len(field_from.name)]}{new_name}"
        self._identifier_field_names.add(new_identifier_path)

    return self

`set_default_value(path, default_value)` ¶

Set the default value of a column.

Parameters:

Name	Type	Description	Default
`path`	`str \| tuple[str, ...]`	The path to the column.	required

Returns:

Type	Description
`UpdateSchema`	The UpdateSchema with the delete operation staged.

Source code in pyiceberg/table/update/schema.py

def set_default_value(self, path: str | tuple[str, ...], default_value: L | None) -> UpdateSchema:
    """Set the default value of a column.

    Args:
        path: The path to the column.

    Returns:
        The UpdateSchema with the delete operation staged.
    """
    self._set_column_default_value(path, default_value)

    return self

`update_column(path, field_type=None, required=None, doc=None)` ¶

Update the type of column.

Parameters:

Name	Type	Description	Default
`path`	`str \| tuple[str, ...]`	The path to the field.	required
`field_type`	`IcebergType \| None`	The new type	`None`
`required`	`bool \| None`	If the field should be required	`None`
`doc`	`str \| None`	Documentation describing the column	`None`

Returns:

Type	Description
`UpdateSchema`	The UpdateSchema with the type update staged.

Source code in pyiceberg/table/update/schema.py

def update_column(
    self,
    path: str | tuple[str, ...],
    field_type: IcebergType | None = None,
    required: bool | None = None,
    doc: str | None = None,
) -> UpdateSchema:
    """Update the type of column.

    Args:
        path: The path to the field.
        field_type: The new type
        required: If the field should be required
        doc: Documentation describing the column

    Returns:
        The UpdateSchema with the type update staged.
    """
    path = (path,) if isinstance(path, str) else path
    full_name = ".".join(path)

    if field_type is None and required is None and doc is None:
        return self

    field = self._schema.find_field(full_name, self._case_sensitive)

    if field.field_id in self._deletes:
        raise ValueError(f"Cannot update a column that will be deleted: {full_name}")

    if field_type is not None:
        if not field.field_type.is_primitive:
            raise ValidationError(f"Cannot change column type: {field.field_type} is not a primitive")

        if not self._allow_incompatible_changes and field.field_type != field_type:
            try:
                promote(field.field_type, field_type)
            except ResolveError as e:
                raise ValidationError(f"Cannot change column type: {full_name}: {field.field_type} -> {field_type}") from e

    # if other updates for the same field exist in one transaction:
    if updated := self._updates.get(field.field_id):
        self._updates[field.field_id] = NestedField(
            field_id=updated.field_id,
            name=updated.name,
            field_type=field_type or updated.field_type,
            doc=doc if doc is not None else updated.doc,
            required=updated.required,
            initial_default=updated.initial_default,
            write_default=updated.write_default,
        )
    else:
        self._updates[field.field_id] = NestedField(
            field_id=field.field_id,
            name=field.name,
            field_type=field_type or field.field_type,
            doc=doc if doc is not None else field.doc,
            required=field.required,
            initial_default=field.initial_default,
            write_default=field.write_default,
        )

    if required is not None:
        self._set_column_requirement(path, required=required)

    return self

schema

UpdateSchema ¶

add_column(path, field_type, doc=None, required=False, default_value=None) ¶

case_sensitive(case_sensitive) ¶

delete_column(path) ¶

make_column_optional(path) ¶

move_after(path, after_name) ¶

move_before(path, before_path) ¶

move_first(path) ¶

rename_column(path_from, new_name) ¶

set_default_value(path, default_value) ¶

update_column(path, field_type=None, required=None, doc=None) ¶

`UpdateSchema` ¶

`add_column(path, field_type, doc=None, required=False, default_value=None)` ¶

`case_sensitive(case_sensitive)` ¶

`delete_column(path)` ¶

`make_column_optional(path)` ¶

`move_after(path, after_name)` ¶

`move_before(path, before_path)` ¶

`move_first(path)` ¶

`rename_column(path_from, new_name)` ¶

`set_default_value(path, default_value)` ¶

`update_column(path, field_type=None, required=None, doc=None)` ¶