Source code for ursa.filters

"""Query-filter primitives for processed-store row-level filtering.

These live on :class:`~ursa._query_types.UrsaQuery` (``time_filters`` /
``metadata_filters``) and are resolved at :meth:`DataInterface.query` time —
``time_filters`` into recording-relative-second windows stamped onto
:attr:`QueryResult.time_selection`, ``metadata_filters`` into row-level
predicates applied during catalog resolution.

Units note: :class:`IntervalSelector` intervals are **recording-relative
seconds** (0 = recording start, same coordinate space as
``ModalityRow.domain_intervals``), whereas ``UrsaQuery.time_range`` is an
**absolute epoch-nanosecond** window across recordings. When both are set,
``query()`` intersects them per recording.
"""

from __future__ import annotations

from typing import Any, Literal

from pydantic import BaseModel, ConfigDict, Field, field_validator

from ursa.catalog.schemas import TimeWindow

__all__ = [
    "AroundEvent",
    "IntervalSelector",
    "MetadataFilter",
    "TimeFilter",
]


[docs] class TimeFilter(BaseModel): """Base class for server-side temporal selectors (``UrsaQuery.time_filters``). Frozen so filters key dicts and round-trip through ``UrsaQuery.query_hash``. Concrete subclasses carry a ``kind`` discriminator. Resolved by ``query()`` into recording-relative-second windows on :attr:`QueryResult.time_selection`. """ model_config = ConfigDict(frozen=True, extra="forbid")
[docs] class IntervalSelector(TimeFilter): """Explicit multi-window selector — N disjoint recording-relative-second intervals. Event-independent: generalizes ``time_range`` to a *list* of windows expressed in recording-relative seconds (so it applies per recording, unlike the absolute-ns ``time_range``). ``query()`` tiles/clips to the selected intervals; :meth:`DataInterface.stream` lays its window grid within each interval independently (never across a gap). ``intervals`` accepts ``TimeWindow`` instances or ``(start_s, end_s)`` tuples (coerced), e.g. ``IntervalSelector(intervals=[(0, 10), (20, 30)])``. """ kind: Literal["interval"] = "interval" intervals: list[TimeWindow] = Field(min_length=1)
[docs] @field_validator("intervals", mode="before") @classmethod def _coerce_tuples(cls, v: Any) -> Any: """Coerce ``(start_s, end_s)`` tuples to :class:`TimeWindow` for ergonomics. Non-tuple / already-``TimeWindow`` items pass through unchanged so ``TimeWindow``'s own ``end > start`` validator still fires uniformly. """ if not isinstance(v, (list, tuple)): return v out: list[Any] = [] for iv in v: if isinstance(iv, (tuple, list)) and len(iv) == 2: out.append(TimeWindow(start_seconds=float(iv[0]), end_seconds=float(iv[1]))) else: out.append(iv) return out
[docs] class AroundEvent(TimeFilter): """Event-relative window selector — **deferred (ENG-906)**. A clean drop-in for the ``time_filters`` framework: ``AroundEvent`` is accepted by the query surface, but resolution into ``time_selection`` is not yet implemented — ``query()`` raises ``NotImplementedError(ENG-906)`` for it today (only the dispatch branch exists). Implementing it needs (1) ``EventRow``s populated for the recording and (2) the events-table join in ``_resolve_time_filters``. """ kind: Literal["around_event"] = "around_event" event_type: str before: float = 0.0 after: float = 0.0
[docs] class MetadataFilter(BaseModel): """Row-level predicate on modality (or recording) metadata / channel_spec. Applied at ``query()`` time after status/worker_ids routing and before the time-domain prune. ``field`` is a dotted path whose namespace depends on ``target``. For ``target="modality"`` (default) it reads the :class:`ModalityRow`'s ``metadata`` (e.g. ``"device"``) or its ``channel_spec`` (e.g. ``"channel_spec.units"`` / ``"channel_spec.reference"`` — the channel count is ``len(channel_spec["names"])``, there is no ``n_channels`` key). For ``target="recording"`` it reads **only** the parent :class:`RecordingRow`'s ``metadata`` — ``channel_spec`` is not available at recording scope, and a failing recording-level predicate drops all of that recording's rows. There is deliberately no generic ``Filter`` alias — the name says *metadata*, since ``TimeFilter`` subclasses are equally "filters". Missing-field convention: a field absent from the namespace fails **all** predicates except ``exists`` (which returns ``False``). In particular ``op="ne"`` / ``op="contains"`` against a missing field drop the row rather than keeping it — mirroring SQL's ``NULL != x`` being not-true. Use ``op="exists"`` if you need to test for presence. """ model_config = ConfigDict(frozen=True, extra="forbid") field: str op: Literal["eq", "ne", "gt", "ge", "lt", "le", "in", "contains", "exists"] value: Any = None target: Literal["modality", "recording"] = "modality"