Source code for ursa.store.uri
"""URI → ``(role, key)`` resolution for the M2 read path (ENG-890).
:func:`parse_storage_uri` splits an ``r2://bucket/key`` URI into the
``role`` name a caller passes to :func:`ursa.get_store` plus the
prefix-relative ``key``. M2 is read-only; write callers compose their
own role names because the bucket → role mapping has four entries (RW
and RO variants) and write-side semantics live in
:func:`ursa.register.modality`.
The bucket constants come from :mod:`ursa.layout` so a future bucket
rename touches one module. The role names match the literals in
:data:`ursa.store.config.R2Creds`.
Reused by :func:`ursa.get` (this ticket) and :func:`ursa.download`
(ENG-1091).
"""
from __future__ import annotations
from urllib.parse import urlparse
from ursa.layout import active_buckets
__all__ = ["parse_storage_uri"]
[docs]
def parse_storage_uri(uri: str) -> tuple[str, str]:
"""Split an ``r2://bucket/key`` URI into ``(role, key)`` for *read*.
Mapping (M2), profile-aware via :func:`ursa.layout.active_buckets`:
- active raw bucket (``constellation-data`` / ``constellation-data-test``)
→ role ``"raw_ro"``
- active assets bucket (``constellation-assets`` /
``constellation-assets-test``) → role ``"assets_ro"``
Write-side mappings (``raw_rw``, ``assets_rw``) are intentionally
omitted: the catalog-registered ``raw_storage_uri`` is read-only
territory, and ``register`` callers already know which credential they
want.
Parameters
----------
uri
A storage URI that conforms to :data:`ursa.catalog.schemas.StorageURI`
(Pydantic validates that upstream — this helper trusts shape and
only checks semantics). Trailing slashes are preserved in the
returned ``key`` so callers can pass it to ``store.list(prefix=key)``
without re-appending the delimiter.
Returns
-------
(role, key)
``role`` is one of the literals in
:data:`ursa.store.config.R2Creds`. ``key`` is the URI's path with
the leading ``/`` stripped so it composes cleanly with
:class:`ursa.store.ObjectStore` keys (which are
prefix-relative).
Raises
------
NotImplementedError
For non-``r2://`` schemes. ``s3``, ``gcs``, and ``file`` are
valid per the :data:`ursa.catalog.schemas.URI_PATTERN` regex but
have no read-store mapping in M2.
ValueError
For ``r2://`` URIs whose bucket is not in the active profile's
mapping. A common cause is profile-incoherence between the writer
(e.g. data-engine) and reader (this process) — check
``CONSTELLATION_PROFILE`` on both ends.
"""
parsed = urlparse(uri)
if parsed.scheme != "r2":
raise NotImplementedError(
f"parse_storage_uri only supports r2:// in M2; got scheme "
f"{parsed.scheme!r} (uri={uri!r}). Other schemes are valid "
"per URI_PATTERN but have no store-role mapping yet."
)
buckets = active_buckets()
read_role_by_bucket: dict[str, str] = {
buckets.raw: "raw_ro",
buckets.assets: "assets_ro",
}
bucket = parsed.netloc
role = read_role_by_bucket.get(bucket)
if role is None:
raise ValueError(
f"unknown bucket {bucket!r} in storage URI {uri!r}; expected one "
f"of {sorted(read_role_by_bucket)} (active CONSTELLATION_PROFILE). "
f"If the URI was built under a different profile, check that the "
f"writer and reader agree on CONSTELLATION_PROFILE."
)
# urlparse keeps the leading ``/`` in ``path``; strip it so the
# returned key is prefix-relative like the rest of the ObjectStore
# surface.
key = parsed.path.lstrip("/")
return role, key