Source code for ursa.store.uri
"""URI → ``(role, key)`` resolution for the M2 read path (ENG-890).
:func:`parse_storage_uri` splits an ``r2://bucket/key`` URI into the
``role`` name a caller passes to :func:`ursa.get_store` plus the
prefix-relative ``key``. M2 is read-only; write callers compose their
own role names because the bucket → role mapping has four entries (RW
and RO variants) and write-side semantics live in
:func:`ursa.register.modality`.
The bucket constants come from :mod:`ursa.layout` so a future bucket
rename touches one module. The role names match the literals in
:data:`ursa.store.config.R2Creds`.
Reused by :func:`ursa.get` (this ticket) and :func:`ursa.download`
(ENG-1091).
"""
from __future__ import annotations
from urllib.parse import urlparse
from ursa.layout import ASSETS_BUCKET, RAW_BUCKET
__all__ = ["parse_storage_uri"]
# Read-only bucket → role map. Write-side mappings (``raw_rw``,
# ``assets_rw``) are intentionally omitted: the catalog-registered
# ``raw_storage_uri`` is read-only territory, and ``register`` callers
# already know which credential they want.
_READ_ROLE_BY_BUCKET: dict[str, str] = {
RAW_BUCKET: "raw_ro",
ASSETS_BUCKET: "assets_ro",
}
[docs]
def parse_storage_uri(uri: str) -> tuple[str, str]:
"""Split an ``r2://bucket/key`` URI into ``(role, key)`` for *read*.
Mapping (M2):
- bucket :data:`ursa.layout.RAW_BUCKET` (``constellation-data``)
→ role ``"raw_ro"``
- bucket :data:`ursa.layout.ASSETS_BUCKET` (``constellation-assets``)
→ role ``"assets_ro"``
Parameters
----------
uri
A storage URI that conforms to :data:`ursa.catalog.schemas.StorageURI`
(Pydantic validates that upstream — this helper trusts shape and
only checks semantics). Trailing slashes are preserved in the
returned ``key`` so callers can pass it to ``store.list(prefix=key)``
without re-appending the delimiter.
Returns
-------
(role, key)
``role`` is one of the literals in
:data:`ursa.store.config.R2Creds`. ``key`` is the URI's path with
the leading ``/`` stripped so it composes cleanly with
:class:`ursa.store.ObjectStore` keys (which are
prefix-relative).
Raises
------
NotImplementedError
For non-``r2://`` schemes. ``s3``, ``gcs``, and ``file`` are
valid per the :data:`ursa.catalog.schemas.URI_PATTERN` regex but
have no read-store mapping in M2.
ValueError
For ``r2://`` URIs whose bucket is not in the M2 mapping.
"""
parsed = urlparse(uri)
if parsed.scheme != "r2":
raise NotImplementedError(
f"parse_storage_uri only supports r2:// in M2; got scheme "
f"{parsed.scheme!r} (uri={uri!r}). Other schemes are valid "
"per URI_PATTERN but have no store-role mapping yet."
)
bucket = parsed.netloc
role = _READ_ROLE_BY_BUCKET.get(bucket)
if role is None:
raise ValueError(
f"unknown bucket {bucket!r} in storage URI {uri!r}; "
f"expected one of {sorted(_READ_ROLE_BY_BUCKET)}"
)
# urlparse keeps the leading ``/`` in ``path``; strip it so the
# returned key is prefix-relative like the rest of the ObjectStore
# surface.
key = parsed.path.lstrip("/")
return role, key