Source code for ursa.raw
"""M2 raw-modality payload (ENG-890).
:class:`RawBytes` is a thin carrier surfaced on :class:`ursa.Data` under
``data.modalities[name]`` for modalities with ``ingestion_status="raw"``.
No time domain, no parsing — :func:`ursa.get` reads the segment files at
``ModalityRow.raw_storage_uri`` and packages them as ``(key, bytes)``
tuples in store-listing order.
ENG-1093 replaces this with array-bearing ``temporaldata`` subclasses
once Virgo's ingestion node populates the processed store; this module
goes away then.
"""
from __future__ import annotations
from dataclasses import dataclass
from ursa.catalog.schemas import ModalityRow
__all__ = ["RawBytes"]
[docs]
@dataclass(frozen=True, slots=True)
class RawBytes:
"""List-of-segments raw-modality payload.
``segments`` holds ``(object_key, bytes)`` tuples in sorted-key order
(the get-path applies ``sorted(..., key=meta.key)`` so callers don't
have to). ``metadata`` is the same :class:`ModalityRow` the catalog
returned via :class:`QueryResult`.
A list of segments — rather than one concatenated blob — preserves
framed formats (video, parquet, lance) faithfully. Callers that want
one byte string can do ``b"".join(b for _, b in rb.segments)`` for
formats where naïve concat is meaningful (CSV / JSONL / raw binary).
"""
segments: tuple[tuple[str, bytes], ...]
metadata: ModalityRow