Source code for ursa.catalog.exceptions

"""Exception types for catalog operations.

The PK-collision taxonomy has two layers:

* :class:`CatalogRowExists` carries structured ``table`` + ``primary_key``
  attributes so callers (notably :mod:`ursa.register`) can introspect the
  failed insert without parsing a string message.
* :class:`CatalogPKConflict` is the catalog-internal raise site (used by
  :class:`~ursa.catalog.Catalog.add` and ``_reject_existing_pks``). It
  inherits from :class:`CatalogRowExists`, so ``except CatalogRowExists``
  catches both, and ``Catalog.add()`` populates the structured attrs at
  the existing-row collision site.
"""

from __future__ import annotations

__all__ = [
    "CatalogError",
    "CatalogNotInitialized",
    "CatalogPKConflict",
    "CatalogRowExists",
    "CatalogSchemaError",
]


[docs] class CatalogError(Exception): """Base class for all catalog-layer errors."""
[docs] class CatalogRowExists(CatalogError): """Raised when an insert violates a table's primary-key uniqueness. Phase 1a (M2) is non-idempotent — a re-register with the same primary key always raises. Idempotent re-register semantics are tracked in `ENG-1074 <https://linear.app/constellationlab/issue/ENG-1074>`_. Attributes ---------- table: Logical catalog table name (e.g. ``"recordings"``, ``"events"``). primary_key: Tuple of ``(field, value)`` pairs that identify the conflicting row. Tuple-of-pairs rather than ``dict`` so the natural ordering from ``__primary_key__`` is preserved in the error message. """ def __init__(self, table: str, primary_key: tuple[tuple[str, object], ...]) -> None: self.table = table self.primary_key = primary_key pk_str = ", ".join(f"{k}={v!r}" for k, v in primary_key) super().__init__(f"row already exists in {table!r}: {pk_str}")
[docs] class CatalogPKConflict(CatalogRowExists): """Raised by :meth:`~ursa.catalog.Catalog.add` on duplicate primary key. Subclass of :class:`CatalogRowExists` so callers up the stack (notably :mod:`ursa.register`) can ``except CatalogRowExists`` and catch both the register-layer raises and the catalog-internal ones. Constructor accepts either form: * ``CatalogPKConflict(table, primary_key)`` — structured form, used by ``Catalog.add()``'s existing-row collision path. Fills ``table`` / ``primary_key`` attributes via the parent. * ``CatalogPKConflict(message)`` — legacy free-text form, used by the within-batch duplicate path where the message includes ``"duplicate PK …"``. ``table`` and ``primary_key`` default to empty. """ def __init__( self, table_or_message: str, primary_key: tuple[tuple[str, object], ...] | None = None, ) -> None: if primary_key is None: # Legacy free-text form. Skip CatalogRowExists.__init__ to avoid # auto-formatting the message; plumb the string straight through. self.table = "" self.primary_key = () CatalogError.__init__(self, table_or_message) else: super().__init__(table_or_message, primary_key)
[docs] class CatalogSchemaError(CatalogError): """Raised by the writer when a row violates an Arrow-level schema invariant the Pydantic model doesn't enforce. The canonical case is a ``TimeWindow`` or ``EmbeddingSource`` instance carrying extra fields: Pydantic allows ``extra="allow"`` on these submodels (so legacy reads don't break), but Arrow struct columns are fixed-schema, so unknown nested keys cannot be persisted. """
[docs] class CatalogNotInitialized(CatalogError): """Raised when an operation hits a catalog that has not been opened (no underlying lancedb connection). Mostly defensive — ``Catalog.open()`` runs ``_ensure_tables()`` so a fresh instance is always usable. """