catalog

`Catalog` ¶

Bases: ABC

Base Catalog for table operations like - create, drop, load, list and others.

The catalog table APIs accept a table identifier, which is fully classified table name. The identifier can be a string or tuple of strings. If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is.

The catalog namespace APIs follow a similar convention wherein they also accept a namespace identifier that can be a string or tuple of strings.

Attributes:

Name	Type	Description
`name`	`str`	Name of the catalog.
`properties`	`Properties`	Catalog properties.

Source code in pyiceberg/catalog/__init__.py

class Catalog(ABC):
    """Base Catalog for table operations like - create, drop, load, list and others.

    The catalog table APIs accept a table identifier, which is fully classified table name. The identifier can be a string or
    tuple of strings. If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is.

    The catalog namespace APIs follow a similar convention wherein they also accept a namespace identifier that can be a string
    or tuple of strings.

    Attributes:
        name (str): Name of the catalog.
        properties (Properties): Catalog properties.
    """

    name: str
    properties: Properties

    def __init__(self, name: str, **properties: str):
        self.name = name
        self.properties = properties

    @abstractmethod
    def create_table(
        self,
        identifier: Union[str, Identifier],
        schema: Union[Schema, "pa.Schema"],
        location: Optional[str] = None,
        partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
        sort_order: SortOrder = UNSORTED_SORT_ORDER,
        properties: Properties = EMPTY_DICT,
    ) -> Table:
        """Create a table.

        Args:
            identifier (str | Identifier): Table identifier.
            schema (Schema): Table's schema.
            location (str | None): Location for the table. Optional Argument.
            partition_spec (PartitionSpec): PartitionSpec for the table.
            sort_order (SortOrder): SortOrder for the table.
            properties (Properties): Table properties that can be a string based dictionary.

        Returns:
            Table: the created table instance.

        Raises:
            TableAlreadyExistsError: If a table with the name already exists.
        """

    @abstractmethod
    def create_table_transaction(
        self,
        identifier: Union[str, Identifier],
        schema: Union[Schema, "pa.Schema"],
        location: Optional[str] = None,
        partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
        sort_order: SortOrder = UNSORTED_SORT_ORDER,
        properties: Properties = EMPTY_DICT,
    ) -> CreateTableTransaction:
        """Create a CreateTableTransaction.

        Args:
            identifier (str | Identifier): Table identifier.
            schema (Schema): Table's schema.
            location (str | None): Location for the table. Optional Argument.
            partition_spec (PartitionSpec): PartitionSpec for the table.
            sort_order (SortOrder): SortOrder for the table.
            properties (Properties): Table properties that can be a string based dictionary.

        Returns:
            CreateTableTransaction: createTableTransaction instance.
        """

    def create_table_if_not_exists(
        self,
        identifier: Union[str, Identifier],
        schema: Union[Schema, "pa.Schema"],
        location: Optional[str] = None,
        partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
        sort_order: SortOrder = UNSORTED_SORT_ORDER,
        properties: Properties = EMPTY_DICT,
    ) -> Table:
        """Create a table if it does not exist.

        Args:
            identifier (str | Identifier): Table identifier.
            schema (Schema): Table's schema.
            location (str | None): Location for the table. Optional Argument.
            partition_spec (PartitionSpec): PartitionSpec for the table.
            sort_order (SortOrder): SortOrder for the table.
            properties (Properties): Table properties that can be a string based dictionary.

        Returns:
            Table: the created table instance if the table does not exist, else the existing
            table instance.
        """
        try:
            return self.create_table(identifier, schema, location, partition_spec, sort_order, properties)
        except TableAlreadyExistsError:
            return self.load_table(identifier)

    @abstractmethod
    def load_table(self, identifier: Union[str, Identifier]) -> Table:
        """Load the table's metadata and returns the table instance.

        You can also use this method to check for table existence using 'try catalog.table() except NoSuchTableError'.
        Note: This method doesn't scan data stored in the table.

        Args:
            identifier (str | Identifier): Table identifier.

        Returns:
            Table: the table instance with its metadata.

        Raises:
            NoSuchTableError: If a table with the name does not exist.
        """

    @abstractmethod
    def table_exists(self, identifier: Union[str, Identifier]) -> bool:
        """Check if a table exists.

        Args:
            identifier (str | Identifier): Table identifier.

        Returns:
            bool: True if the table exists, False otherwise.
        """

    @abstractmethod
    def view_exists(self, identifier: Union[str, Identifier]) -> bool:
        """Check if a view exists.

        Args:
            identifier (str | Identifier): View identifier.

        Returns:
            bool: True if the view exists, False otherwise.
        """

    @abstractmethod
    def register_table(self, identifier: Union[str, Identifier], metadata_location: str) -> Table:
        """Register a new table using existing metadata.

        Args:
            identifier Union[str, Identifier]: Table identifier for the table
            metadata_location str: The location to the metadata

        Returns:
            Table: The newly registered table

        Raises:
            TableAlreadyExistsError: If the table already exists
        """

    @abstractmethod
    def drop_table(self, identifier: Union[str, Identifier]) -> None:
        """Drop a table.

        Args:
            identifier (str | Identifier): Table identifier.

        Raises:
            NoSuchTableError: If a table with the name does not exist.
        """

    @abstractmethod
    def purge_table(self, identifier: Union[str, Identifier]) -> None:
        """Drop a table and purge all data and metadata files.

        Note: This method only logs warning rather than raise exception when encountering file deletion failure.

        Args:
            identifier (str | Identifier): Table identifier.

        Raises:
            NoSuchTableError: If a table with the name does not exist, or the identifier is invalid.
        """

    @abstractmethod
    def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: Union[str, Identifier]) -> Table:
        """Rename a fully classified table name.

        Args:
            from_identifier (str | Identifier): Existing table identifier.
            to_identifier (str | Identifier): New table identifier.

        Returns:
            Table: the updated table instance with its metadata.

        Raises:
            NoSuchTableError: If a table with the name does not exist.
        """

    @abstractmethod
    def commit_table(
        self, table: Table, requirements: Tuple[TableRequirement, ...], updates: Tuple[TableUpdate, ...]
    ) -> CommitTableResponse:
        """Commit updates to a table.

        Args:
            table (Table): The table to be updated.
            requirements: (Tuple[TableRequirement, ...]): Table requirements.
            updates: (Tuple[TableUpdate, ...]): Table updates.

        Returns:
            CommitTableResponse: The updated metadata.

        Raises:
            NoSuchTableError: If a table with the given identifier does not exist.
            CommitFailedException: Requirement not met, or a conflict with a concurrent commit.
            CommitStateUnknownException: Failed due to an internal exception on the side of the catalog.
        """

    @abstractmethod
    def create_namespace(self, namespace: Union[str, Identifier], properties: Properties = EMPTY_DICT) -> None:
        """Create a namespace in the catalog.

        Args:
            namespace (str | Identifier): Namespace identifier.
            properties (Properties): A string dictionary of properties for the given namespace.

        Raises:
            NamespaceAlreadyExistsError: If a namespace with the given name already exists.
        """

    def create_namespace_if_not_exists(self, namespace: Union[str, Identifier], properties: Properties = EMPTY_DICT) -> None:
        """Create a namespace if it does not exist.

        Args:
            namespace (str | Identifier): Namespace identifier.
            properties (Properties): A string dictionary of properties for the given namespace.
        """
        try:
            self.create_namespace(namespace, properties)
        except NamespaceAlreadyExistsError:
            pass

    @abstractmethod
    def drop_namespace(self, namespace: Union[str, Identifier]) -> None:
        """Drop a namespace.

        Args:
            namespace (str | Identifier): Namespace identifier.

        Raises:
            NoSuchNamespaceError: If a namespace with the given name does not exist.
            NamespaceNotEmptyError: If the namespace is not empty.
        """

    @abstractmethod
    def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]:
        """List tables under the given namespace in the catalog.

        Args:
            namespace (str | Identifier): Namespace identifier to search.

        Returns:
            List[Identifier]: list of table identifiers.

        Raises:
            NoSuchNamespaceError: If a namespace with the given name does not exist.
        """

    @abstractmethod
    def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identifier]:
        """List namespaces from the given namespace. If not given, list top-level namespaces from the catalog.

        Args:
            namespace (str | Identifier): Namespace identifier to search.

        Returns:
            List[Identifier]: a List of namespace identifiers.

        Raises:
            NoSuchNamespaceError: If a namespace with the given name does not exist.
        """

    @abstractmethod
    def list_views(self, namespace: Union[str, Identifier]) -> List[Identifier]:
        """List views under the given namespace in the catalog.

        Args:
            namespace (str | Identifier): Namespace identifier to search.

        Returns:
            List[Identifier]: list of table identifiers.

        Raises:
            NoSuchNamespaceError: If a namespace with the given name does not exist.
        """

    @abstractmethod
    def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Properties:
        """Get properties for a namespace.

        Args:
            namespace (str | Identifier): Namespace identifier.

        Returns:
            Properties: Properties for the given namespace.

        Raises:
            NoSuchNamespaceError: If a namespace with the given name does not exist.
        """

    @abstractmethod
    def update_namespace_properties(
        self, namespace: Union[str, Identifier], removals: Optional[Set[str]] = None, updates: Properties = EMPTY_DICT
    ) -> PropertiesUpdateSummary:
        """Remove provided property keys and updates properties for a namespace.

        Args:
            namespace (str | Identifier): Namespace identifier.
            removals (Set[str]): Set of property keys that need to be removed. Optional Argument.
            updates (Properties): Properties to be updated for the given namespace.

        Raises:
            NoSuchNamespaceError: If a namespace with the given name does not exist.
            ValueError: If removals and updates have overlapping keys.
        """

    @abstractmethod
    def drop_view(self, identifier: Union[str, Identifier]) -> None:
        """Drop a view.

        Args:
            identifier (str | Identifier): View identifier.

        Raises:
            NoSuchViewError: If a view with the given name does not exist.
        """

    @staticmethod
    def identifier_to_tuple(identifier: Union[str, Identifier]) -> Identifier:
        """Parse an identifier to a tuple.

        If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is.

        Args:
            identifier (str | Identifier): an identifier, either a string or tuple of strings.

        Returns:
            Identifier: a tuple of strings.
        """
        return identifier if isinstance(identifier, tuple) else tuple(str.split(identifier, "."))

    @staticmethod
    def table_name_from(identifier: Union[str, Identifier]) -> str:
        """Extract table name from a table identifier.

        Args:
            identifier (str | Identifier: a table identifier.

        Returns:
            str: Table name.
        """
        return Catalog.identifier_to_tuple(identifier)[-1]

    @staticmethod
    def namespace_from(identifier: Union[str, Identifier]) -> Identifier:
        """Extract table namespace from a table identifier.

        Args:
            identifier (Union[str, Identifier]): a table identifier.

        Returns:
            Identifier: Namespace identifier.
        """
        return Catalog.identifier_to_tuple(identifier)[:-1]

    @staticmethod
    def namespace_to_string(
        identifier: Union[str, Identifier], err: Union[Type[ValueError], Type[NoSuchNamespaceError]] = ValueError
    ) -> str:
        """Transform a namespace identifier into a string.

        Args:
            identifier (Union[str, Identifier]): a namespace identifier.
            err (Union[Type[ValueError], Type[NoSuchNamespaceError]]): the error type to raise when identifier is empty.

        Returns:
            Identifier: Namespace identifier.
        """
        tuple_identifier = Catalog.identifier_to_tuple(identifier)
        if len(tuple_identifier) < 1:
            raise err("Empty namespace identifier")

        # Check if any segment of the tuple is an empty string
        if any(segment.strip() == "" for segment in tuple_identifier):
            raise err("Namespace identifier contains an empty segment or a segment with only whitespace")

        return ".".join(segment.strip() for segment in tuple_identifier)

    @staticmethod
    def identifier_to_database(
        identifier: Union[str, Identifier], err: Union[Type[ValueError], Type[NoSuchNamespaceError]] = ValueError
    ) -> str:
        tuple_identifier = Catalog.identifier_to_tuple(identifier)
        if len(tuple_identifier) != 1:
            raise err(f"Invalid database, hierarchical namespaces are not supported: {identifier}")

        return tuple_identifier[0]

    @staticmethod
    def identifier_to_database_and_table(
        identifier: Union[str, Identifier],
        err: Union[Type[ValueError], Type[NoSuchTableError], Type[NoSuchNamespaceError]] = ValueError,
    ) -> Tuple[str, str]:
        tuple_identifier = Catalog.identifier_to_tuple(identifier)
        if len(tuple_identifier) != 2:
            raise err(f"Invalid path, hierarchical namespaces are not supported: {identifier}")

        return tuple_identifier[0], tuple_identifier[1]

    def _load_file_io(self, properties: Properties = EMPTY_DICT, location: Optional[str] = None) -> FileIO:
        return load_file_io({**self.properties, **properties}, location)

    @staticmethod
    def _convert_schema_if_needed(schema: Union[Schema, "pa.Schema"]) -> Schema:
        if isinstance(schema, Schema):
            return schema
        try:
            import pyarrow as pa

            from pyiceberg.io.pyarrow import _ConvertToIcebergWithoutIDs, visit_pyarrow

            downcast_ns_timestamp_to_us = Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
            if isinstance(schema, pa.Schema):
                schema: Schema = visit_pyarrow(  # type: ignore
                    schema, _ConvertToIcebergWithoutIDs(downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us)
                )
                return schema
        except ModuleNotFoundError:
            pass
        raise ValueError(f"{type(schema)=}, but it must be pyiceberg.schema.Schema or pyarrow.Schema")

    @staticmethod
    def _delete_old_metadata(io: FileIO, base: TableMetadata, metadata: TableMetadata) -> None:
        """Delete oldest metadata if config is set to true."""
        delete_after_commit: bool = property_as_bool(
            metadata.properties,
            TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED,
            TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT,
        )

        if delete_after_commit:
            removed_previous_metadata_files: set[str] = {log.metadata_file for log in base.metadata_log}
            current_metadata_files: set[str] = {log.metadata_file for log in metadata.metadata_log}
            removed_previous_metadata_files.difference_update(current_metadata_files)
            delete_files(io, removed_previous_metadata_files, METADATA)

    def __repr__(self) -> str:
        """Return the string representation of the Catalog class."""
        return f"{self.name} ({self.__class__})"

`repr()` ¶

Return the string representation of the Catalog class.

Source code in pyiceberg/catalog/__init__.py

def __repr__(self) -> str:
    """Return the string representation of the Catalog class."""
    return f"{self.name} ({self.__class__})"

`_delete_old_metadata(io, base, metadata)` `staticmethod` ¶

Delete oldest metadata if config is set to true.

Source code in pyiceberg/catalog/__init__.py

@staticmethod
def _delete_old_metadata(io: FileIO, base: TableMetadata, metadata: TableMetadata) -> None:
    """Delete oldest metadata if config is set to true."""
    delete_after_commit: bool = property_as_bool(
        metadata.properties,
        TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED,
        TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT,
    )

    if delete_after_commit:
        removed_previous_metadata_files: set[str] = {log.metadata_file for log in base.metadata_log}
        current_metadata_files: set[str] = {log.metadata_file for log in metadata.metadata_log}
        removed_previous_metadata_files.difference_update(current_metadata_files)
        delete_files(io, removed_previous_metadata_files, METADATA)

`commit_table(table, requirements, updates)` `abstractmethod` ¶

Commit updates to a table.

Parameters:

Name	Type	Description	Default
`table`	`Table`	The table to be updated.	required
`requirements`	`Tuple[TableRequirement, ...]`	(Tuple[TableRequirement, ...]): Table requirements.	required
`updates`	`Tuple[TableUpdate, ...]`	(Tuple[TableUpdate, ...]): Table updates.	required

Returns:

Name	Type	Description
`CommitTableResponse`	`CommitTableResponse`	The updated metadata.

Raises:

Type	Description
`NoSuchTableError`	If a table with the given identifier does not exist.
`CommitFailedException`	Requirement not met, or a conflict with a concurrent commit.
`CommitStateUnknownException`	Failed due to an internal exception on the side of the catalog.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def commit_table(
    self, table: Table, requirements: Tuple[TableRequirement, ...], updates: Tuple[TableUpdate, ...]
) -> CommitTableResponse:
    """Commit updates to a table.

    Args:
        table (Table): The table to be updated.
        requirements: (Tuple[TableRequirement, ...]): Table requirements.
        updates: (Tuple[TableUpdate, ...]): Table updates.

    Returns:
        CommitTableResponse: The updated metadata.

    Raises:
        NoSuchTableError: If a table with the given identifier does not exist.
        CommitFailedException: Requirement not met, or a conflict with a concurrent commit.
        CommitStateUnknownException: Failed due to an internal exception on the side of the catalog.
    """

`create_namespace(namespace, properties=EMPTY_DICT)` `abstractmethod` ¶

Create a namespace in the catalog.

Parameters:

Name	Type	Description	Default
`namespace`	`str \| Identifier`	Namespace identifier.	required
`properties`	`Properties`	A string dictionary of properties for the given namespace.	`EMPTY_DICT`

Raises:

Type	Description
`NamespaceAlreadyExistsError`	If a namespace with the given name already exists.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def create_namespace(self, namespace: Union[str, Identifier], properties: Properties = EMPTY_DICT) -> None:
    """Create a namespace in the catalog.

    Args:
        namespace (str | Identifier): Namespace identifier.
        properties (Properties): A string dictionary of properties for the given namespace.

    Raises:
        NamespaceAlreadyExistsError: If a namespace with the given name already exists.
    """

`create_namespace_if_not_exists(namespace, properties=EMPTY_DICT)` ¶

Create a namespace if it does not exist.

Parameters:

Name	Type	Description	Default
`namespace`	`str \| Identifier`	Namespace identifier.	required
`properties`	`Properties`	A string dictionary of properties for the given namespace.	`EMPTY_DICT`

Source code in pyiceberg/catalog/__init__.py

def create_namespace_if_not_exists(self, namespace: Union[str, Identifier], properties: Properties = EMPTY_DICT) -> None:
    """Create a namespace if it does not exist.

    Args:
        namespace (str | Identifier): Namespace identifier.
        properties (Properties): A string dictionary of properties for the given namespace.
    """
    try:
        self.create_namespace(namespace, properties)
    except NamespaceAlreadyExistsError:
        pass

`create_table(identifier, schema, location=None, partition_spec=UNPARTITIONED_PARTITION_SPEC, sort_order=UNSORTED_SORT_ORDER, properties=EMPTY_DICT)` `abstractmethod` ¶

Create a table.

Parameters:

Name	Type	Description	Default
`identifier`	`str \| Identifier`	Table identifier.	required
`schema`	`Schema`	Table's schema.	required
`location`	`str \| None`	Location for the table. Optional Argument.	`None`
`partition_spec`	`PartitionSpec`	PartitionSpec for the table.	`UNPARTITIONED_PARTITION_SPEC`
`sort_order`	`SortOrder`	SortOrder for the table.	`UNSORTED_SORT_ORDER`
`properties`	`Properties`	Table properties that can be a string based dictionary.	`EMPTY_DICT`

Returns:

Name	Type	Description
`Table`	`Table`	the created table instance.

Raises:

Type	Description
`TableAlreadyExistsError`	If a table with the name already exists.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def create_table(
    self,
    identifier: Union[str, Identifier],
    schema: Union[Schema, "pa.Schema"],
    location: Optional[str] = None,
    partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
    sort_order: SortOrder = UNSORTED_SORT_ORDER,
    properties: Properties = EMPTY_DICT,
) -> Table:
    """Create a table.

    Args:
        identifier (str | Identifier): Table identifier.
        schema (Schema): Table's schema.
        location (str | None): Location for the table. Optional Argument.
        partition_spec (PartitionSpec): PartitionSpec for the table.
        sort_order (SortOrder): SortOrder for the table.
        properties (Properties): Table properties that can be a string based dictionary.

    Returns:
        Table: the created table instance.

    Raises:
        TableAlreadyExistsError: If a table with the name already exists.
    """

`create_table_if_not_exists(identifier, schema, location=None, partition_spec=UNPARTITIONED_PARTITION_SPEC, sort_order=UNSORTED_SORT_ORDER, properties=EMPTY_DICT)` ¶

Create a table if it does not exist.

Parameters:

Name	Type	Description	Default
`identifier`	`str \| Identifier`	Table identifier.	required
`schema`	`Schema`	Table's schema.	required
`location`	`str \| None`	Location for the table. Optional Argument.	`None`
`partition_spec`	`PartitionSpec`	PartitionSpec for the table.	`UNPARTITIONED_PARTITION_SPEC`
`sort_order`	`SortOrder`	SortOrder for the table.	`UNSORTED_SORT_ORDER`
`properties`	`Properties`	Table properties that can be a string based dictionary.	`EMPTY_DICT`

Returns:

Name	Type	Description
`Table`	`Table`	the created table instance if the table does not exist, else the existing
	`Table`	table instance.

Source code in pyiceberg/catalog/__init__.py

def create_table_if_not_exists(
    self,
    identifier: Union[str, Identifier],
    schema: Union[Schema, "pa.Schema"],
    location: Optional[str] = None,
    partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
    sort_order: SortOrder = UNSORTED_SORT_ORDER,
    properties: Properties = EMPTY_DICT,
) -> Table:
    """Create a table if it does not exist.

    Args:
        identifier (str | Identifier): Table identifier.
        schema (Schema): Table's schema.
        location (str | None): Location for the table. Optional Argument.
        partition_spec (PartitionSpec): PartitionSpec for the table.
        sort_order (SortOrder): SortOrder for the table.
        properties (Properties): Table properties that can be a string based dictionary.

    Returns:
        Table: the created table instance if the table does not exist, else the existing
        table instance.
    """
    try:
        return self.create_table(identifier, schema, location, partition_spec, sort_order, properties)
    except TableAlreadyExistsError:
        return self.load_table(identifier)

`create_table_transaction(identifier, schema, location=None, partition_spec=UNPARTITIONED_PARTITION_SPEC, sort_order=UNSORTED_SORT_ORDER, properties=EMPTY_DICT)` `abstractmethod` ¶

Create a CreateTableTransaction.

Parameters:

Name	Type	Description	Default
`identifier`	`str \| Identifier`	Table identifier.	required
`schema`	`Schema`	Table's schema.	required
`location`	`str \| None`	Location for the table. Optional Argument.	`None`
`partition_spec`	`PartitionSpec`	PartitionSpec for the table.	`UNPARTITIONED_PARTITION_SPEC`
`sort_order`	`SortOrder`	SortOrder for the table.	`UNSORTED_SORT_ORDER`
`properties`	`Properties`	Table properties that can be a string based dictionary.	`EMPTY_DICT`

Returns:

Name	Type	Description
`CreateTableTransaction`	`CreateTableTransaction`	createTableTransaction instance.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def create_table_transaction(
    self,
    identifier: Union[str, Identifier],
    schema: Union[Schema, "pa.Schema"],
    location: Optional[str] = None,
    partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
    sort_order: SortOrder = UNSORTED_SORT_ORDER,
    properties: Properties = EMPTY_DICT,
) -> CreateTableTransaction:
    """Create a CreateTableTransaction.

    Args:
        identifier (str | Identifier): Table identifier.
        schema (Schema): Table's schema.
        location (str | None): Location for the table. Optional Argument.
        partition_spec (PartitionSpec): PartitionSpec for the table.
        sort_order (SortOrder): SortOrder for the table.
        properties (Properties): Table properties that can be a string based dictionary.

    Returns:
        CreateTableTransaction: createTableTransaction instance.
    """

`drop_namespace(namespace)` `abstractmethod` ¶

Drop a namespace.

Parameters:

Name	Type	Description	Default
`namespace`	`str \| Identifier`	Namespace identifier.	required

Raises:

Type	Description
`NoSuchNamespaceError`	If a namespace with the given name does not exist.
`NamespaceNotEmptyError`	If the namespace is not empty.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def drop_namespace(self, namespace: Union[str, Identifier]) -> None:
    """Drop a namespace.

    Args:
        namespace (str | Identifier): Namespace identifier.

    Raises:
        NoSuchNamespaceError: If a namespace with the given name does not exist.
        NamespaceNotEmptyError: If the namespace is not empty.
    """

`drop_table(identifier)` `abstractmethod` ¶

Drop a table.

Parameters:

Name	Type	Description	Default
`identifier`	`str \| Identifier`	Table identifier.	required

Raises:

Type	Description
`NoSuchTableError`	If a table with the name does not exist.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def drop_table(self, identifier: Union[str, Identifier]) -> None:
    """Drop a table.

    Args:
        identifier (str | Identifier): Table identifier.

    Raises:
        NoSuchTableError: If a table with the name does not exist.
    """

`drop_view(identifier)` `abstractmethod` ¶

Drop a view.

Parameters:

Name	Type	Description	Default
`identifier`	`str \| Identifier`	View identifier.	required

Raises:

Type	Description
`NoSuchViewError`	If a view with the given name does not exist.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def drop_view(self, identifier: Union[str, Identifier]) -> None:
    """Drop a view.

    Args:
        identifier (str | Identifier): View identifier.

    Raises:
        NoSuchViewError: If a view with the given name does not exist.
    """

`identifier_to_tuple(identifier)` `staticmethod` ¶

Parse an identifier to a tuple.

If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is.

Parameters:

Name	Type	Description	Default
`identifier`	`str \| Identifier`	an identifier, either a string or tuple of strings.	required

Returns:

Name	Type	Description
`Identifier`	`Identifier`	a tuple of strings.

Source code in pyiceberg/catalog/__init__.py

@staticmethod
def identifier_to_tuple(identifier: Union[str, Identifier]) -> Identifier:
    """Parse an identifier to a tuple.

    If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is.

    Args:
        identifier (str | Identifier): an identifier, either a string or tuple of strings.

    Returns:
        Identifier: a tuple of strings.
    """
    return identifier if isinstance(identifier, tuple) else tuple(str.split(identifier, "."))

`list_namespaces(namespace=())` `abstractmethod` ¶

List namespaces from the given namespace. If not given, list top-level namespaces from the catalog.

Parameters:

Name	Type	Description	Default
`namespace`	`str \| Identifier`	Namespace identifier to search.	`()`

Returns:

Type	Description
`List[Identifier]`	List[Identifier]: a List of namespace identifiers.

Raises:

Type	Description
`NoSuchNamespaceError`	If a namespace with the given name does not exist.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identifier]:
    """List namespaces from the given namespace. If not given, list top-level namespaces from the catalog.

    Args:
        namespace (str | Identifier): Namespace identifier to search.

    Returns:
        List[Identifier]: a List of namespace identifiers.

    Raises:
        NoSuchNamespaceError: If a namespace with the given name does not exist.
    """

`list_tables(namespace)` `abstractmethod` ¶

List tables under the given namespace in the catalog.

Parameters:

Name	Type	Description	Default
`namespace`	`str \| Identifier`	Namespace identifier to search.	required

Returns:

Type	Description
`List[Identifier]`	List[Identifier]: list of table identifiers.

Raises:

Type	Description
`NoSuchNamespaceError`	If a namespace with the given name does not exist.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]:
    """List tables under the given namespace in the catalog.

    Args:
        namespace (str | Identifier): Namespace identifier to search.

    Returns:
        List[Identifier]: list of table identifiers.

    Raises:
        NoSuchNamespaceError: If a namespace with the given name does not exist.
    """

`list_views(namespace)` `abstractmethod` ¶

List views under the given namespace in the catalog.

Parameters:

Name	Type	Description	Default
`namespace`	`str \| Identifier`	Namespace identifier to search.	required

Returns:

Type	Description
`List[Identifier]`	List[Identifier]: list of table identifiers.

Raises:

Type	Description
`NoSuchNamespaceError`	If a namespace with the given name does not exist.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def list_views(self, namespace: Union[str, Identifier]) -> List[Identifier]:
    """List views under the given namespace in the catalog.

    Args:
        namespace (str | Identifier): Namespace identifier to search.

    Returns:
        List[Identifier]: list of table identifiers.

    Raises:
        NoSuchNamespaceError: If a namespace with the given name does not exist.
    """

`load_namespace_properties(namespace)` `abstractmethod` ¶

Get properties for a namespace.

Parameters:

Name	Type	Description	Default
`namespace`	`str \| Identifier`	Namespace identifier.	required

Returns:

Name	Type	Description
`Properties`	`Properties`	Properties for the given namespace.

Raises:

Type	Description
`NoSuchNamespaceError`	If a namespace with the given name does not exist.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Properties:
    """Get properties for a namespace.

    Args:
        namespace (str | Identifier): Namespace identifier.

    Returns:
        Properties: Properties for the given namespace.

    Raises:
        NoSuchNamespaceError: If a namespace with the given name does not exist.
    """

`load_table(identifier)` `abstractmethod` ¶

Load the table's metadata and returns the table instance.

You can also use this method to check for table existence using 'try catalog.table() except NoSuchTableError'. Note: This method doesn't scan data stored in the table.

Parameters:

Name	Type	Description	Default
`identifier`	`str \| Identifier`	Table identifier.	required

Returns:

Name	Type	Description
`Table`	`Table`	the table instance with its metadata.

Raises:

Type	Description
`NoSuchTableError`	If a table with the name does not exist.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def load_table(self, identifier: Union[str, Identifier]) -> Table:
    """Load the table's metadata and returns the table instance.

    You can also use this method to check for table existence using 'try catalog.table() except NoSuchTableError'.
    Note: This method doesn't scan data stored in the table.

    Args:
        identifier (str | Identifier): Table identifier.

    Returns:
        Table: the table instance with its metadata.

    Raises:
        NoSuchTableError: If a table with the name does not exist.
    """

`namespace_from(identifier)` `staticmethod` ¶

Extract table namespace from a table identifier.

Parameters:

Name	Type	Description	Default
`identifier`	`Union[str, Identifier]`	a table identifier.	required

Returns:

Name	Type	Description
`Identifier`	`Identifier`	Namespace identifier.

Source code in pyiceberg/catalog/__init__.py

@staticmethod
def namespace_from(identifier: Union[str, Identifier]) -> Identifier:
    """Extract table namespace from a table identifier.

    Args:
        identifier (Union[str, Identifier]): a table identifier.

    Returns:
        Identifier: Namespace identifier.
    """
    return Catalog.identifier_to_tuple(identifier)[:-1]

`namespace_to_string(identifier, err=ValueError)` `staticmethod` ¶

Transform a namespace identifier into a string.

Parameters:

Name	Type	Description	Default
`identifier`	`Union[str, Identifier]`	a namespace identifier.	required
`err`	`Union[Type[ValueError], Type[NoSuchNamespaceError]]`	the error type to raise when identifier is empty.	`ValueError`

Returns:

Name	Type	Description
`Identifier`	`str`	Namespace identifier.

Source code in pyiceberg/catalog/__init__.py

@staticmethod
def namespace_to_string(
    identifier: Union[str, Identifier], err: Union[Type[ValueError], Type[NoSuchNamespaceError]] = ValueError
) -> str:
    """Transform a namespace identifier into a string.

    Args:
        identifier (Union[str, Identifier]): a namespace identifier.
        err (Union[Type[ValueError], Type[NoSuchNamespaceError]]): the error type to raise when identifier is empty.

    Returns:
        Identifier: Namespace identifier.
    """
    tuple_identifier = Catalog.identifier_to_tuple(identifier)
    if len(tuple_identifier) < 1:
        raise err("Empty namespace identifier")

    # Check if any segment of the tuple is an empty string
    if any(segment.strip() == "" for segment in tuple_identifier):
        raise err("Namespace identifier contains an empty segment or a segment with only whitespace")

    return ".".join(segment.strip() for segment in tuple_identifier)

`purge_table(identifier)` `abstractmethod` ¶

Drop a table and purge all data and metadata files.

Note: This method only logs warning rather than raise exception when encountering file deletion failure.

Parameters:

Name	Type	Description	Default
`identifier`	`str \| Identifier`	Table identifier.	required

Raises:

Type	Description
`NoSuchTableError`	If a table with the name does not exist, or the identifier is invalid.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def purge_table(self, identifier: Union[str, Identifier]) -> None:
    """Drop a table and purge all data and metadata files.

    Note: This method only logs warning rather than raise exception when encountering file deletion failure.

    Args:
        identifier (str | Identifier): Table identifier.

    Raises:
        NoSuchTableError: If a table with the name does not exist, or the identifier is invalid.
    """

`register_table(identifier, metadata_location)` `abstractmethod` ¶

Register a new table using existing metadata.

Parameters:

Name	Type	Description	Default
`identifier`	`Union[str, Identifier]`	Table identifier for the table	required
`metadata_location`	`str`	The location to the metadata	required

Returns:

Name	Type	Description
`Table`	`Table`	The newly registered table

Raises:

Type	Description
`TableAlreadyExistsError`	If the table already exists

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def register_table(self, identifier: Union[str, Identifier], metadata_location: str) -> Table:
    """Register a new table using existing metadata.

    Args:
        identifier Union[str, Identifier]: Table identifier for the table
        metadata_location str: The location to the metadata

    Returns:
        Table: The newly registered table

    Raises:
        TableAlreadyExistsError: If the table already exists
    """

`rename_table(from_identifier, to_identifier)` `abstractmethod` ¶

Rename a fully classified table name.

Parameters:

Name	Type	Description	Default
`from_identifier`	`str \| Identifier`	Existing table identifier.	required
`to_identifier`	`str \| Identifier`	New table identifier.	required

Returns:

Name	Type	Description
`Table`	`Table`	the updated table instance with its metadata.

Raises:

Type	Description
`NoSuchTableError`	If a table with the name does not exist.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: Union[str, Identifier]) -> Table:
    """Rename a fully classified table name.

    Args:
        from_identifier (str | Identifier): Existing table identifier.
        to_identifier (str | Identifier): New table identifier.

    Returns:
        Table: the updated table instance with its metadata.

    Raises:
        NoSuchTableError: If a table with the name does not exist.
    """

`table_exists(identifier)` `abstractmethod` ¶

Check if a table exists.

Parameters:

Name	Type	Description	Default
`identifier`	`str \| Identifier`	Table identifier.	required

Returns:

Name	Type	Description
`bool`	`bool`	True if the table exists, False otherwise.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def table_exists(self, identifier: Union[str, Identifier]) -> bool:
    """Check if a table exists.

    Args:
        identifier (str | Identifier): Table identifier.

    Returns:
        bool: True if the table exists, False otherwise.
    """

`table_name_from(identifier)` `staticmethod` ¶

Extract table name from a table identifier.

Parameters:

Name	Type	Description	Default
`identifier`	`str \| Identifier`	a table identifier.	required

Returns:

Name	Type	Description
`str`	`str`	Table name.

Source code in pyiceberg/catalog/__init__.py

@staticmethod
def table_name_from(identifier: Union[str, Identifier]) -> str:
    """Extract table name from a table identifier.

    Args:
        identifier (str | Identifier: a table identifier.

    Returns:
        str: Table name.
    """
    return Catalog.identifier_to_tuple(identifier)[-1]

`update_namespace_properties(namespace, removals=None, updates=EMPTY_DICT)` `abstractmethod` ¶

Remove provided property keys and updates properties for a namespace.

Parameters:

Name	Type	Description	Default
`namespace`	`str \| Identifier`	Namespace identifier.	required
`removals`	`Set[str]`	Set of property keys that need to be removed. Optional Argument.	`None`
`updates`	`Properties`	Properties to be updated for the given namespace.	`EMPTY_DICT`

Raises:

Type	Description
`NoSuchNamespaceError`	If a namespace with the given name does not exist.
`ValueError`	If removals and updates have overlapping keys.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def update_namespace_properties(
    self, namespace: Union[str, Identifier], removals: Optional[Set[str]] = None, updates: Properties = EMPTY_DICT
) -> PropertiesUpdateSummary:
    """Remove provided property keys and updates properties for a namespace.

    Args:
        namespace (str | Identifier): Namespace identifier.
        removals (Set[str]): Set of property keys that need to be removed. Optional Argument.
        updates (Properties): Properties to be updated for the given namespace.

    Raises:
        NoSuchNamespaceError: If a namespace with the given name does not exist.
        ValueError: If removals and updates have overlapping keys.
    """

`view_exists(identifier)` `abstractmethod` ¶

Check if a view exists.

Parameters:

Name	Type	Description	Default
`identifier`	`str \| Identifier`	View identifier.	required

Returns:

Name	Type	Description
`bool`	`bool`	True if the view exists, False otherwise.

Source code in pyiceberg/catalog/__init__.py

@abstractmethod
def view_exists(self, identifier: Union[str, Identifier]) -> bool:
    """Check if a view exists.

    Args:
        identifier (str | Identifier): View identifier.

    Returns:
        bool: True if the view exists, False otherwise.
    """

`MetastoreCatalog` ¶

Bases: Catalog, ABC

Source code in pyiceberg/catalog/__init__.py

class MetastoreCatalog(Catalog, ABC):
    def __init__(self, name: str, **properties: str):
        super().__init__(name, **properties)

    def create_table_transaction(
        self,
        identifier: Union[str, Identifier],
        schema: Union[Schema, "pa.Schema"],
        location: Optional[str] = None,
        partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
        sort_order: SortOrder = UNSORTED_SORT_ORDER,
        properties: Properties = EMPTY_DICT,
    ) -> CreateTableTransaction:
        return CreateTableTransaction(
            self._create_staged_table(identifier, schema, location, partition_spec, sort_order, properties)
        )

    def table_exists(self, identifier: Union[str, Identifier]) -> bool:
        try:
            self.load_table(identifier)
            return True
        except NoSuchTableError:
            return False

    def purge_table(self, identifier: Union[str, Identifier]) -> None:
        table = self.load_table(identifier)
        self.drop_table(identifier)
        io = load_file_io(self.properties, table.metadata_location)
        metadata = table.metadata
        manifest_lists_to_delete = set()
        manifests_to_delete: List[ManifestFile] = []
        for snapshot in metadata.snapshots:
            manifests_to_delete += snapshot.manifests(io)
            manifest_lists_to_delete.add(snapshot.manifest_list)

        manifest_paths_to_delete = {manifest.manifest_path for manifest in manifests_to_delete}
        prev_metadata_files = {log.metadata_file for log in metadata.metadata_log}

        delete_data_files(io, manifests_to_delete)
        delete_files(io, manifest_paths_to_delete, MANIFEST)
        delete_files(io, manifest_lists_to_delete, MANIFEST_LIST)
        delete_files(io, prev_metadata_files, PREVIOUS_METADATA)
        delete_files(io, {table.metadata_location}, METADATA)

    def _create_staged_table(
        self,
        identifier: Union[str, Identifier],
        schema: Union[Schema, "pa.Schema"],
        location: Optional[str] = None,
        partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
        sort_order: SortOrder = UNSORTED_SORT_ORDER,
        properties: Properties = EMPTY_DICT,
    ) -> StagedTable:
        """Create a table and return the table instance without committing the changes.

        Args:
            identifier (str | Identifier): Table identifier.
            schema (Schema): Table's schema.
            location (str | None): Location for the table. Optional Argument.
            partition_spec (PartitionSpec): PartitionSpec for the table.
            sort_order (SortOrder): SortOrder for the table.
            properties (Properties): Table properties that can be a string based dictionary.

        Returns:
            StagedTable: the created staged table instance.
        """
        schema: Schema = self._convert_schema_if_needed(schema)  # type: ignore

        database_name, table_name = self.identifier_to_database_and_table(identifier)

        location = self._resolve_table_location(location, database_name, table_name)
        provider = load_location_provider(location, properties)
        metadata_location = provider.new_table_metadata_file_location()
        metadata = new_table_metadata(
            location=location, schema=schema, partition_spec=partition_spec, sort_order=sort_order, properties=properties
        )
        io = self._load_file_io(properties=properties, location=metadata_location)
        return StagedTable(
            identifier=(database_name, table_name),
            metadata=metadata,
            metadata_location=metadata_location,
            io=io,
            catalog=self,
        )

    def _update_and_stage_table(
        self,
        current_table: Optional[Table],
        table_identifier: Identifier,
        requirements: Tuple[TableRequirement, ...],
        updates: Tuple[TableUpdate, ...],
    ) -> StagedTable:
        for requirement in requirements:
            requirement.validate(current_table.metadata if current_table else None)

        updated_metadata = update_table_metadata(
            base_metadata=current_table.metadata if current_table else self._empty_table_metadata(),
            updates=updates,
            enforce_validation=current_table is None,
            metadata_location=current_table.metadata_location if current_table else None,
        )

        new_metadata_version = self._parse_metadata_version(current_table.metadata_location) + 1 if current_table else 0
        provider = load_location_provider(updated_metadata.location, updated_metadata.properties)
        new_metadata_location = provider.new_table_metadata_file_location(new_metadata_version)

        return StagedTable(
            identifier=table_identifier,
            metadata=updated_metadata,
            metadata_location=new_metadata_location,
            io=self._load_file_io(properties=updated_metadata.properties, location=new_metadata_location),
            catalog=self,
        )

    def _get_updated_props_and_update_summary(
        self, current_properties: Properties, removals: Optional[Set[str]], updates: Properties
    ) -> Tuple[PropertiesUpdateSummary, Properties]:
        self._check_for_overlap(updates=updates, removals=removals)
        updated_properties = dict(current_properties)

        removed: Set[str] = set()
        updated: Set[str] = set()

        if removals:
            for key in removals:
                if key in updated_properties:
                    updated_properties.pop(key)
                    removed.add(key)
        if updates:
            for key, value in updates.items():
                updated_properties[key] = value
                updated.add(key)

        expected_to_change = (removals or set()).difference(removed)
        properties_update_summary = PropertiesUpdateSummary(
            removed=list(removed or []), updated=list(updated or []), missing=list(expected_to_change)
        )

        return properties_update_summary, updated_properties

    def _resolve_table_location(self, location: Optional[str], database_name: str, table_name: str) -> str:
        if not location:
            return self._get_default_warehouse_location(database_name, table_name)
        return location.rstrip("/")

    def _get_default_warehouse_location(self, database_name: str, table_name: str) -> str:
        database_properties = self.load_namespace_properties(database_name)
        if database_location := database_properties.get(LOCATION):
            database_location = database_location.rstrip("/")
            return f"{database_location}/{table_name}"

        if warehouse_path := self.properties.get(WAREHOUSE_LOCATION):
            warehouse_path = warehouse_path.rstrip("/")
            return f"{warehouse_path}/{database_name}.db/{table_name}"

        raise ValueError("No default path is set, please specify a location when creating a table")

    @staticmethod
    def _write_metadata(metadata: TableMetadata, io: FileIO, metadata_path: str) -> None:
        ToOutputFile.table_metadata(metadata, io.new_output(metadata_path))

    @staticmethod
    def _parse_metadata_version(metadata_location: str) -> int:
        """Parse the version from the metadata location.

        The version is the first part of the file name, before the first dash.
        For example, the version of the metadata file
        `s3://bucket/db/tb/metadata/00001-6c97e413-d51b-4538-ac70-12fe2a85cb83.metadata.json`
        is 1.
        If the path does not comply with the pattern, the version is defaulted to be -1, ensuring
        that the next metadata file is treated as having version 0.

        Args:
            metadata_location (str): The location of the metadata file.

        Returns:
            int: The version of the metadata file. -1 if the file name does not have valid version string
        """
        file_name = metadata_location.split("/")[-1]
        if file_name_match := TABLE_METADATA_FILE_NAME_REGEX.fullmatch(file_name):
            try:
                uuid.UUID(file_name_match.group(2))
            except ValueError:
                return -1
            return int(file_name_match.group(1))
        else:
            return -1

    @staticmethod
    def _check_for_overlap(removals: Optional[Set[str]], updates: Properties) -> None:
        if updates and removals:
            overlap = set(removals) & set(updates.keys())
            if overlap:
                raise ValueError(f"Updates and deletes have an overlap: {overlap}")

    @staticmethod
    def _empty_table_metadata() -> TableMetadata:
        """Return an empty TableMetadata instance.

        It is used to build a TableMetadata from a sequence of initial TableUpdates.
        It is a V1 TableMetadata because there will be a UpgradeFormatVersionUpdate in
        initial changes to bump the metadata to the target version.

        Returns:
            TableMetadata: An empty TableMetadata instance.
        """
        return TableMetadataV1.model_construct(last_column_id=-1, schema=Schema())

`_create_staged_table(identifier, schema, location=None, partition_spec=UNPARTITIONED_PARTITION_SPEC, sort_order=UNSORTED_SORT_ORDER, properties=EMPTY_DICT)` ¶

Create a table and return the table instance without committing the changes.

Parameters:

Name	Type	Description	Default
`identifier`	`str \| Identifier`	Table identifier.	required
`schema`	`Schema`	Table's schema.	required
`location`	`str \| None`	Location for the table. Optional Argument.	`None`
`partition_spec`	`PartitionSpec`	PartitionSpec for the table.	`UNPARTITIONED_PARTITION_SPEC`
`sort_order`	`SortOrder`	SortOrder for the table.	`UNSORTED_SORT_ORDER`
`properties`	`Properties`	Table properties that can be a string based dictionary.	`EMPTY_DICT`

Returns:

Name	Type	Description
`StagedTable`	`StagedTable`	the created staged table instance.

Source code in pyiceberg/catalog/__init__.py

def _create_staged_table(
    self,
    identifier: Union[str, Identifier],
    schema: Union[Schema, "pa.Schema"],
    location: Optional[str] = None,
    partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
    sort_order: SortOrder = UNSORTED_SORT_ORDER,
    properties: Properties = EMPTY_DICT,
) -> StagedTable:
    """Create a table and return the table instance without committing the changes.

    Args:
        identifier (str | Identifier): Table identifier.
        schema (Schema): Table's schema.
        location (str | None): Location for the table. Optional Argument.
        partition_spec (PartitionSpec): PartitionSpec for the table.
        sort_order (SortOrder): SortOrder for the table.
        properties (Properties): Table properties that can be a string based dictionary.

    Returns:
        StagedTable: the created staged table instance.
    """
    schema: Schema = self._convert_schema_if_needed(schema)  # type: ignore

    database_name, table_name = self.identifier_to_database_and_table(identifier)

    location = self._resolve_table_location(location, database_name, table_name)
    provider = load_location_provider(location, properties)
    metadata_location = provider.new_table_metadata_file_location()
    metadata = new_table_metadata(
        location=location, schema=schema, partition_spec=partition_spec, sort_order=sort_order, properties=properties
    )
    io = self._load_file_io(properties=properties, location=metadata_location)
    return StagedTable(
        identifier=(database_name, table_name),
        metadata=metadata,
        metadata_location=metadata_location,
        io=io,
        catalog=self,
    )

`_empty_table_metadata()` `staticmethod` ¶

Return an empty TableMetadata instance.

It is used to build a TableMetadata from a sequence of initial TableUpdates. It is a V1 TableMetadata because there will be a UpgradeFormatVersionUpdate in initial changes to bump the metadata to the target version.

Returns:

Name	Type	Description
`TableMetadata`	`TableMetadata`	An empty TableMetadata instance.

Source code in pyiceberg/catalog/__init__.py

@staticmethod
def _empty_table_metadata() -> TableMetadata:
    """Return an empty TableMetadata instance.

    It is used to build a TableMetadata from a sequence of initial TableUpdates.
    It is a V1 TableMetadata because there will be a UpgradeFormatVersionUpdate in
    initial changes to bump the metadata to the target version.

    Returns:
        TableMetadata: An empty TableMetadata instance.
    """
    return TableMetadataV1.model_construct(last_column_id=-1, schema=Schema())

`_parse_metadata_version(metadata_location)` `staticmethod` ¶

Parse the version from the metadata location.

The version is the first part of the file name, before the first dash. For example, the version of the metadata file s3://bucket/db/tb/metadata/00001-6c97e413-d51b-4538-ac70-12fe2a85cb83.metadata.json is 1. If the path does not comply with the pattern, the version is defaulted to be -1, ensuring that the next metadata file is treated as having version 0.

Parameters:

Name	Type	Description	Default
`metadata_location`	`str`	The location of the metadata file.	required

Returns:

Name	Type	Description
`int`	`int`	The version of the metadata file. -1 if the file name does not have valid version string

Source code in pyiceberg/catalog/__init__.py

@staticmethod
def _parse_metadata_version(metadata_location: str) -> int:
    """Parse the version from the metadata location.

    The version is the first part of the file name, before the first dash.
    For example, the version of the metadata file
    `s3://bucket/db/tb/metadata/00001-6c97e413-d51b-4538-ac70-12fe2a85cb83.metadata.json`
    is 1.
    If the path does not comply with the pattern, the version is defaulted to be -1, ensuring
    that the next metadata file is treated as having version 0.

    Args:
        metadata_location (str): The location of the metadata file.

    Returns:
        int: The version of the metadata file. -1 if the file name does not have valid version string
    """
    file_name = metadata_location.split("/")[-1]
    if file_name_match := TABLE_METADATA_FILE_NAME_REGEX.fullmatch(file_name):
        try:
            uuid.UUID(file_name_match.group(2))
        except ValueError:
            return -1
        return int(file_name_match.group(1))
    else:
        return -1

`delete_data_files(io, manifests_to_delete)` ¶

Delete data files linked to given manifests.

Log warnings if failing to delete any file.

Parameters:

Name	Type	Description	Default
`io`	`FileIO`	The FileIO used to delete the object.	required
`manifests_to_delete`	`List[ManifestFile]`	A list of manifest contains paths of data files to be deleted.	required

Source code in pyiceberg/catalog/__init__.py

def delete_data_files(io: FileIO, manifests_to_delete: List[ManifestFile]) -> None:
    """Delete data files linked to given manifests.

    Log warnings if failing to delete any file.

    Args:
        io: The FileIO used to delete the object.
        manifests_to_delete: A list of manifest contains paths of data files to be deleted.
    """
    deleted_files: dict[str, bool] = {}
    for manifest_file in manifests_to_delete:
        for entry in manifest_file.fetch_manifest_entry(io, discard_deleted=False):
            path = entry.data_file.file_path
            if not deleted_files.get(path, False):
                try:
                    io.delete(path)
                except OSError as exc:
                    logger.warning(msg=f"Failed to delete data file {path}", exc_info=exc)
                deleted_files[path] = True

`delete_files(io, files_to_delete, file_type)` ¶

Delete files.

Log warnings if failing to delete any file.

Parameters:

Name	Type	Description	Default
`io`	`FileIO`	The FileIO used to delete the object.	required
`files_to_delete`	`Set[str]`	A set of file paths to be deleted.	required
`file_type`	`str`	The type of the file.	required

Source code in pyiceberg/catalog/__init__.py

def delete_files(io: FileIO, files_to_delete: Set[str], file_type: str) -> None:
    """Delete files.

    Log warnings if failing to delete any file.

    Args:
        io: The FileIO used to delete the object.
        files_to_delete: A set of file paths to be deleted.
        file_type: The type of the file.
    """
    for file in files_to_delete:
        try:
            io.delete(file)
        except OSError as exc:
            logger.warning(msg=f"Failed to delete {file_type} file {file}", exc_info=exc)

`infer_catalog_type(name, catalog_properties)` ¶

Try to infer the type based on the dict.

Parameters:

Name	Type	Description	Default
`name`	`str`	Name of the catalog.	required
`catalog_properties`	`RecursiveDict`	Catalog properties.	required

Returns:

Type	Description
`Optional[CatalogType]`	The inferred type based on the provided properties.

Raises:

Type	Description
`ValueError`	Raises a ValueError in case properties are missing, or the wrong type.

Source code in pyiceberg/catalog/__init__.py

def infer_catalog_type(name: str, catalog_properties: RecursiveDict) -> Optional[CatalogType]:
    """Try to infer the type based on the dict.

    Args:
        name: Name of the catalog.
        catalog_properties: Catalog properties.

    Returns:
        The inferred type based on the provided properties.

    Raises:
        ValueError: Raises a ValueError in case properties are missing, or the wrong type.
    """
    if uri := catalog_properties.get("uri"):
        if isinstance(uri, str):
            if uri.startswith("http"):
                return CatalogType.REST
            elif uri.startswith("thrift"):
                return CatalogType.HIVE
            elif uri.startswith(("sqlite", "postgresql")):
                return CatalogType.SQL
            else:
                raise ValueError(f"Could not infer the catalog type from the uri: {uri}")
        else:
            raise ValueError(f"Expects the URI to be a string, got: {type(uri)}")
    raise ValueError(
        f"URI missing, please provide using --uri, the config or environment variable PYICEBERG_CATALOG__{name.upper()}__URI"
    )

`load_catalog(name=None, **properties)` ¶

Load the catalog based on the properties.

Will look up the properties from the config, based on the name.

Parameters:

Name	Type	Description	Default
`name`	`Optional[str]`	The name of the catalog.	`None`
`properties`	`Optional[str]`	The properties that are used next to the configuration.	`{}`

Returns:

Type	Description
`Catalog`	An initialized Catalog.

Raises:

Type	Description
`ValueError`	Raises a ValueError in case properties are missing or malformed, or if it could not determine the catalog based on the properties.

Source code in pyiceberg/catalog/__init__.py

def load_catalog(name: Optional[str] = None, **properties: Optional[str]) -> Catalog:
    """Load the catalog based on the properties.

    Will look up the properties from the config, based on the name.

    Args:
        name: The name of the catalog.
        properties: The properties that are used next to the configuration.

    Returns:
        An initialized Catalog.

    Raises:
        ValueError: Raises a ValueError in case properties are missing or malformed,
            or if it could not determine the catalog based on the properties.
    """
    if name is None:
        name = _ENV_CONFIG.get_default_catalog_name()

    env = _ENV_CONFIG.get_catalog_config(name)
    conf: RecursiveDict = merge_config(env or {}, cast(RecursiveDict, properties))

    catalog_type: Optional[CatalogType]
    provided_catalog_type = conf.get(TYPE)

    if catalog_impl := properties.get(PY_CATALOG_IMPL):
        if provided_catalog_type:
            raise ValueError(
                "Must not set both catalog type and py-catalog-impl configurations, "
                f"but found type {provided_catalog_type} and py-catalog-impl {catalog_impl}"
            )

        if catalog := _import_catalog(name, catalog_impl, properties):
            logger.info("Loaded Catalog: %s", catalog_impl)
            return catalog
        else:
            raise ValueError(f"Could not initialize Catalog: {catalog_impl}")

    catalog_type = None
    if provided_catalog_type and isinstance(provided_catalog_type, str):
        catalog_type = CatalogType(provided_catalog_type.lower())
    elif not provided_catalog_type:
        catalog_type = infer_catalog_type(name, conf)

    if catalog_type:
        return AVAILABLE_CATALOGS[catalog_type](name, cast(Dict[str, str], conf))

    raise ValueError(f"Could not initialize catalog with the following properties: {properties}")

catalog

Catalog ¶

__repr__() ¶

_delete_old_metadata(io, base, metadata) staticmethod ¶

commit_table(table, requirements, updates) abstractmethod ¶

create_namespace(namespace, properties=EMPTY_DICT) abstractmethod ¶

create_namespace_if_not_exists(namespace, properties=EMPTY_DICT) ¶

create_table(identifier, schema, location=None, partition_spec=UNPARTITIONED_PARTITION_SPEC, sort_order=UNSORTED_SORT_ORDER, properties=EMPTY_DICT) abstractmethod ¶

create_table_if_not_exists(identifier, schema, location=None, partition_spec=UNPARTITIONED_PARTITION_SPEC, sort_order=UNSORTED_SORT_ORDER, properties=EMPTY_DICT) ¶

create_table_transaction(identifier, schema, location=None, partition_spec=UNPARTITIONED_PARTITION_SPEC, sort_order=UNSORTED_SORT_ORDER, properties=EMPTY_DICT) abstractmethod ¶

drop_namespace(namespace) abstractmethod ¶

drop_table(identifier) abstractmethod ¶

drop_view(identifier) abstractmethod ¶

identifier_to_tuple(identifier) staticmethod ¶

list_namespaces(namespace=()) abstractmethod ¶

list_tables(namespace) abstractmethod ¶

list_views(namespace) abstractmethod ¶

load_namespace_properties(namespace) abstractmethod ¶

load_table(identifier) abstractmethod ¶

namespace_from(identifier) staticmethod ¶

namespace_to_string(identifier, err=ValueError) staticmethod ¶

purge_table(identifier) abstractmethod ¶

register_table(identifier, metadata_location) abstractmethod ¶

rename_table(from_identifier, to_identifier) abstractmethod ¶

table_exists(identifier) abstractmethod ¶

table_name_from(identifier) staticmethod ¶

update_namespace_properties(namespace, removals=None, updates=EMPTY_DICT) abstractmethod ¶

view_exists(identifier) abstractmethod ¶

MetastoreCatalog ¶

_create_staged_table(identifier, schema, location=None, partition_spec=UNPARTITIONED_PARTITION_SPEC, sort_order=UNSORTED_SORT_ORDER, properties=EMPTY_DICT) ¶

_empty_table_metadata() staticmethod ¶

_parse_metadata_version(metadata_location) staticmethod ¶

delete_data_files(io, manifests_to_delete) ¶

delete_files(io, files_to_delete, file_type) ¶

infer_catalog_type(name, catalog_properties) ¶

load_catalog(name=None, **properties) ¶

`Catalog` ¶

`repr()` ¶

`_delete_old_metadata(io, base, metadata)` `staticmethod` ¶

`commit_table(table, requirements, updates)` `abstractmethod` ¶

`create_namespace(namespace, properties=EMPTY_DICT)` `abstractmethod` ¶

`create_namespace_if_not_exists(namespace, properties=EMPTY_DICT)` ¶

`create_table(identifier, schema, location=None, partition_spec=UNPARTITIONED_PARTITION_SPEC, sort_order=UNSORTED_SORT_ORDER, properties=EMPTY_DICT)` `abstractmethod` ¶

`create_table_if_not_exists(identifier, schema, location=None, partition_spec=UNPARTITIONED_PARTITION_SPEC, sort_order=UNSORTED_SORT_ORDER, properties=EMPTY_DICT)` ¶

`create_table_transaction(identifier, schema, location=None, partition_spec=UNPARTITIONED_PARTITION_SPEC, sort_order=UNSORTED_SORT_ORDER, properties=EMPTY_DICT)` `abstractmethod` ¶

`drop_namespace(namespace)` `abstractmethod` ¶

`drop_table(identifier)` `abstractmethod` ¶

`drop_view(identifier)` `abstractmethod` ¶

`identifier_to_tuple(identifier)` `staticmethod` ¶

`list_namespaces(namespace=())` `abstractmethod` ¶

`list_tables(namespace)` `abstractmethod` ¶

`list_views(namespace)` `abstractmethod` ¶

`load_namespace_properties(namespace)` `abstractmethod` ¶

`load_table(identifier)` `abstractmethod` ¶

`namespace_from(identifier)` `staticmethod` ¶

`namespace_to_string(identifier, err=ValueError)` `staticmethod` ¶

`purge_table(identifier)` `abstractmethod` ¶

`register_table(identifier, metadata_location)` `abstractmethod` ¶

`rename_table(from_identifier, to_identifier)` `abstractmethod` ¶

`table_exists(identifier)` `abstractmethod` ¶

`table_name_from(identifier)` `staticmethod` ¶

`update_namespace_properties(namespace, removals=None, updates=EMPTY_DICT)` `abstractmethod` ¶

`view_exists(identifier)` `abstractmethod` ¶

`MetastoreCatalog` ¶

`_create_staged_table(identifier, schema, location=None, partition_spec=UNPARTITIONED_PARTITION_SPEC, sort_order=UNSORTED_SORT_ORDER, properties=EMPTY_DICT)` ¶

`_empty_table_metadata()` `staticmethod` ¶

`_parse_metadata_version(metadata_location)` `staticmethod` ¶

`delete_data_files(io, manifests_to_delete)` ¶

`delete_files(io, files_to_delete, file_type)` ¶

`infer_catalog_type(name, catalog_properties)` ¶

`load_catalog(name=None, **properties)` ¶