Source code for ursa.raw

"""M2 raw-modality payload (ENG-890).

:class:`RawBytes` is a thin carrier surfaced on :class:`ursa.Data` under
``data.modalities[name]`` for modalities with ``ingestion_status="raw"``.
No time domain, no parsing — :func:`ursa.get` reads the segment files at
``ModalityRow.raw_storage_uri`` and packages them as ``(key, bytes)``
tuples in store-listing order.

ENG-1093 replaces this with array-bearing ``temporaldata`` subclasses
once Virgo's ingestion node populates the processed store; this module
goes away then.
"""

from __future__ import annotations

from dataclasses import dataclass

from ursa.catalog.schemas import ModalityRow

__all__ = ["RawBytes"]


[docs] @dataclass(frozen=True, slots=True) class RawBytes: """List-of-segments raw-modality payload. ``segments`` holds ``(object_key, bytes)`` tuples in sorted-key order (the get-path applies ``sorted(..., key=meta.key)`` so callers don't have to). ``metadata`` is the same :class:`ModalityRow` the catalog returned via :class:`QueryResult`. A list of segments — rather than one concatenated blob — preserves framed formats (video, parquet, lance) faithfully. Callers that want one byte string can do ``b"".join(b for _, b in rb.segments)`` for formats where naïve concat is meaningful (CSV / JSONL / raw binary). """ segments: tuple[tuple[str, bytes], ...] metadata: ModalityRow