Source code for ursa.catalog.exceptions
"""Exception types for catalog operations.
The PK-collision taxonomy has two layers:
* :class:`CatalogRowExists` carries structured ``table`` + ``primary_key``
attributes so callers (notably :mod:`ursa.register`) can introspect the
failed insert without parsing a string message.
* :class:`CatalogPKConflict` is the catalog-internal raise site (used by
:class:`~ursa.catalog.Catalog.add` and ``_reject_existing_pks``). It
inherits from :class:`CatalogRowExists`, so ``except CatalogRowExists``
catches both, and ``Catalog.add()`` populates the structured attrs at
the existing-row collision site.
"""
from __future__ import annotations
__all__ = [
"CatalogError",
"CatalogNotInitialized",
"CatalogPKConflict",
"CatalogRowExists",
"CatalogSchemaError",
]
[docs]
class CatalogError(Exception):
"""Base class for all catalog-layer errors."""
[docs]
class CatalogRowExists(CatalogError):
"""Raised when an insert violates a table's primary-key uniqueness.
Phase 1a (M2) is non-idempotent — a re-register with the same primary
key always raises. Idempotent re-register semantics are tracked in
`ENG-1074 <https://linear.app/constellationlab/issue/ENG-1074>`_.
Attributes
----------
table:
Logical catalog table name (e.g. ``"recordings"``, ``"events"``).
primary_key:
Tuple of ``(field, value)`` pairs that identify the conflicting
row. Tuple-of-pairs rather than ``dict`` so the natural ordering
from ``__primary_key__`` is preserved in the error message.
"""
def __init__(self, table: str, primary_key: tuple[tuple[str, object], ...]) -> None:
self.table = table
self.primary_key = primary_key
pk_str = ", ".join(f"{k}={v!r}" for k, v in primary_key)
super().__init__(f"row already exists in {table!r}: {pk_str}")
[docs]
class CatalogPKConflict(CatalogRowExists):
"""Raised by :meth:`~ursa.catalog.Catalog.add` on duplicate primary key.
Subclass of :class:`CatalogRowExists` so callers up the stack
(notably :mod:`ursa.register`) can ``except CatalogRowExists`` and
catch both the register-layer raises and the catalog-internal ones.
Constructor accepts either form:
* ``CatalogPKConflict(table, primary_key)`` — structured form, used
by ``Catalog.add()``'s existing-row collision path. Fills
``table`` / ``primary_key`` attributes via the parent.
* ``CatalogPKConflict(message)`` — legacy free-text form, used by
the within-batch duplicate path where the message includes
``"duplicate PK …"``. ``table`` and ``primary_key`` default to
empty.
"""
def __init__(
self,
table_or_message: str,
primary_key: tuple[tuple[str, object], ...] | None = None,
) -> None:
if primary_key is None:
# Legacy free-text form. Skip CatalogRowExists.__init__ to avoid
# auto-formatting the message; plumb the string straight through.
self.table = ""
self.primary_key = ()
CatalogError.__init__(self, table_or_message)
else:
super().__init__(table_or_message, primary_key)
[docs]
class CatalogSchemaError(CatalogError):
"""Raised by the writer when a row violates an Arrow-level schema
invariant the Pydantic model doesn't enforce. The canonical case is a
``TimeWindow`` or ``EmbeddingSource`` instance carrying extra fields:
Pydantic allows ``extra="allow"`` on these submodels (so legacy reads
don't break), but Arrow struct columns are fixed-schema, so unknown
nested keys cannot be persisted.
"""
[docs]
class CatalogNotInitialized(CatalogError):
"""Raised when an operation hits a catalog that has not been opened
(no underlying lancedb connection). Mostly defensive — ``Catalog.open()``
runs ``_ensure_tables()`` so a fresh instance is always usable.
"""