Skip to content
18 changes: 18 additions & 0 deletions src/azul/service/index_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
CatalogName,
config,
)
from azul.drs import (
CompactDRSURI,
DRSURI,
)
from azul.filters import (
Filters,
)
Expand Down Expand Up @@ -86,16 +90,30 @@ def prepare_request(self, request: Search) -> Search:
return request

def _file_url(self, *, uuid: str, version: str, drs_uri: str | None) -> str | None:
# FIXME: Redundant implementations of file URLs and mirror URIs
# https://github.com/DataBiosphere/azul/issues/8042
if drs_uri is None:
# To download a file we need its DRS URI
return None
elif (
config.catalogs[self.catalog].atlas == 'lungmap'
and isinstance(DRSURI.parse(drs_uri), CompactDRSURI)
):
# LungMAP contains fles not hosted on TDR. Downloading these files
# requires authentication that can't be provided by Azul, rendering
# our file URLs non-functional. If a user tries to follow such a
# URL, the request fails with a 401 status, so we avoid exposing
# them wherever possible.
return None
else:
return str(self.file_url_func(catalog=self.catalog,
fetch=False,
file_uuid=uuid,
version=version))

def _file_mirror_uri(self, source: SourceRef, file: JSON) -> str | None:
# FIXME: Redundant implementations of file URLs and mirror URIs
# https://github.com/DataBiosphere/azul/issues/8042
file_cls = self.plugin.file_class
mirror_service = self.service.mirror_service(self.catalog)
return mirror_service.mirror_uri(source, file_cls, file)
Expand Down
18 changes: 18 additions & 0 deletions src/azul/service/manifest_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@
from azul.deployment import (
aws,
)
from azul.drs import (
CompactDRSURI,
DRSURI,
)
from azul.field_type import (
FieldType,
FieldTypes,
Expand Down Expand Up @@ -1139,9 +1143,21 @@ def _azul_file_url(self,
file: JSON,
args: Mapping = frozendict()
) -> str | None:
# FIXME: Redundant implementations of file URLs and mirror URIs
# https://github.com/DataBiosphere/azul/issues/8042
if file['drs_uri'] is None:
# To download a file we need its DRS URI
return None
elif (
config.catalogs[self.catalog].atlas == 'lungmap'
and isinstance(DRSURI.parse(json_str(file['drs_uri'])), CompactDRSURI)
):
# LungMAP contains files not hosted on TDR. Downloading these files
# requires authentication that can't be provided by Azul, rendering
# our file URLs non-functional. If a user tries to follow such a
# URL, the request fails with a 401 status, so we avoid exposing
# them wherever possible.
return None
else:
special_fields = self.metadata_plugin.special_fields
return str(self.file_url_func(catalog=self.catalog,
Expand All @@ -1151,6 +1167,8 @@ def _azul_file_url(self,
**args))

def _azul_mirror_uri(self, source: SourceRef, file: JSON) -> str | None:
# FIXME: Redundant implementations of file URLs and mirror URIs
# https://github.com/DataBiosphere/azul/issues/8042
file_cls = self.metadata_plugin.file_class
return self.mirror_service.mirror_uri(source, file_cls, file)

Expand Down
24 changes: 12 additions & 12 deletions src/azul/service/query_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
Self,
)

import attr
import attrs
from furl import (
furl,
)
Expand Down Expand Up @@ -119,7 +119,7 @@ def process_response(self, response: R1) -> R2:
raise NotImplementedError


@attr.s(frozen=True, auto_attribs=True, kw_only=True)
@attrs.frozen(kw_only=True)
class OpenSearchChain[R0, R1, R2](OpenSearchStage[R0, R2]):
"""
The result of wrapping a stage or chain in another stage.
Expand Down Expand Up @@ -150,7 +150,7 @@ def stages(self) -> Iterable[OpenSearchStage]:
yield self.inner


@attr.s(frozen=True, auto_attribs=True, kw_only=True)
@attrs.frozen(kw_only=True)
class _OpenSearchStage[R1, R2](OpenSearchStage[R1, R2], metaclass=ABCMeta):
"""
A base implementation of a stage.
Expand All @@ -170,7 +170,7 @@ def wrap[R0](self, other: OpenSearchStage[R0, R1]) -> OpenSearchChain[R0, R1, R2
TranslatedFilters = Mapping[FieldPath, Mapping[str, Sequence[PrimitiveJSON]]]


@attr.s(frozen=True, auto_attribs=True, kw_only=True)
@attrs.frozen(kw_only=True)
class FilterStage(_OpenSearchStage[Response, Response]):
"""
Converts the given filters to an OpenSearch query and adds that query as
Expand Down Expand Up @@ -268,7 +268,7 @@ def prepare_query(self, skip_field_paths: tuple[FieldPath] = ()) -> Query:
return Q('bool', must=query_list)


@attr.s(frozen=True, auto_attribs=True, kw_only=True)
@attrs.frozen(kw_only=True)
class AggregationStage(_OpenSearchStage[MutableJSON, MutableJSON]):
"""
Cooperate with the given filter stage to augment the request with an
Expand Down Expand Up @@ -421,7 +421,7 @@ def _populate_accessible(self, aggs: MutableJSON) -> None:
aggs[special_fields.accessible.name] = agg


@attr.s(frozen=True, auto_attribs=True, kw_only=True)
@attrs.frozen(kw_only=True)
class SlicingStage(_OpenSearchStage[Response, Response]):
"""
Augments the request with a document slice (known as a *source filter* in
Expand Down Expand Up @@ -450,7 +450,7 @@ def _prepared_slice(self) -> DocumentSlice | None:
# FIXME: Elminate Eliminate reliance on Elasticsearch DSL
# https://github.com/DataBiosphere/azul/issues/4111

@attr.s(frozen=True, auto_attribs=True, kw_only=True)
@attrs.frozen(kw_only=True)
class ToDictStage(_OpenSearchStage[Response, MutableJSON]):

def prepare_request(self, request: Search) -> Search:
Expand All @@ -477,7 +477,7 @@ def sort_key_to_json(s: SortKey) -> AnyJSON:
return list(s)


@attr.s(auto_attribs=True, kw_only=True, frozen=True)
@attrs.frozen(kw_only=True)
class Pagination:
order: str
size: int
Expand All @@ -490,9 +490,9 @@ def advance(self,
search_before: SortKey | None,
search_after: SortKey | None
) -> Self:
return attr.evolve(self,
search_before=search_before,
search_after=search_after)
return attrs.evolve(self,
search_before=search_before,
search_after=search_after)

def link(self, *, previous: bool, **params: str) -> furl | None:
"""
Expand Down Expand Up @@ -521,7 +521,7 @@ class ResponsePagination(JSONTypedDict):
ResponseTriple = tuple[JSONs, ResponsePagination, JSON]


@attr.s(frozen=True, auto_attribs=True, kw_only=True)
@attrs.frozen(kw_only=True)
class PaginationStage(_OpenSearchStage[JSON, ResponseTriple]):
"""
Handles the pagination of search results
Expand Down
45 changes: 38 additions & 7 deletions test/azul_test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from typing import (
Callable,
Iterable,
Mapping,
Optional,
)
from unittest import (
Expand Down Expand Up @@ -54,6 +55,9 @@
from azul.deployment import (
aws,
)
from azul.lib.types import (
JSON,
)
from azul.logging import (
configure_test_logging,
get_test_logger,
Expand Down Expand Up @@ -509,6 +513,13 @@ def _patch_source_cache(cls):
)
cls.addClassPatch(patch_source_cache(hit=[cls.source.ref.id]))

@classmethod
def _source_config(cls) -> Mapping[str, JSON]:
return {
str(source.ref.spec): source.config.to_json()
for source in cls._sources()
}


class DCP2TestCase(TDRTestCase):
source = Source(
Expand All @@ -522,18 +533,39 @@ class DCP2TestCase(TDRTestCase):

@classmethod
def catalog_config(cls) -> dict[CatalogName, Config.Catalog]:
sources = {
str(source.ref.spec): source.config.to_json()
for source in cls._sources()
}
return {
cls.catalog: config.Catalog(name=cls.catalog,
atlas='hca',
internal=False,
mirror_limit=None,
plugins=dict(metadata=config.Catalog.Plugin(name='hca'),
repository=config.Catalog.Plugin(name='tdr_hca')),
sources=sources)
sources=cls._source_config())
}


class LungmapTestCase(TDRTestCase):
source_spec = ('tdr:bigquery:gcp:datarepo-dev-5d9526e0:'
'lungmap_dev_1bdcecde16be420888f478cd2133d11d__20220401_20220404')
source = Source(
config=SourceConfig(mirror=False),
ref=TDRSourceRef(
id='96c6482b-7949-4d6e-894b-371149e85134',
spec=TDRSourceSpec.parse(source_spec),
prefix=Prefix.of_everything
)
)

@classmethod
def catalog_config(cls) -> dict[CatalogName, Config.Catalog]:
return {
cls.catalog: config.Catalog(name=cls.catalog,
atlas='lungmap',
internal=False,
mirror_limit=-1,
plugins=dict(metadata=config.Catalog.Plugin(name='hca'),
repository=config.Catalog.Plugin(name='tdr_hca')),
sources=cls._source_config())
}


Expand All @@ -549,15 +581,14 @@ class AnvilTestCase(TDRTestCase):

@classmethod
def catalog_config(cls) -> dict[CatalogName, Config.Catalog]:
sources = {str(cls.source.ref.spec): cls.source.config.to_json()}
return {
cls.catalog: config.Catalog(name=cls.catalog,
atlas='anvil',
internal=False,
mirror_limit=None,
plugins=dict(metadata=config.Catalog.Plugin(name='anvil'),
repository=config.Catalog.Plugin(name='tdr_anvil')),
sources=sources)
sources=cls._source_config())
}


Expand Down
Loading
Loading