1+ import asyncio
12import logging
23
34import requests
5+ from aiocache import cached
6+
7+ from mavedb .lib .clingen .cache import CACHE_CLASS , CACHE_CONFIG , CACHE_TTL_SECONDS , clingen_cache_key_builder
48
59logger = logging .getLogger (__name__ )
610logger .setLevel (logging .DEBUG )
711
812CLINGEN_API_URL = "https://reg.genome.network/allele"
913
1014
11- def get_canonical_pa_ids (clingen_allele_id : str ) -> list [str ]:
12- """ "Retrieve any canonical PA IDs from the ClinGen API for a given clingen allele ID."""
13- response = requests .get (f"{ CLINGEN_API_URL } /{ clingen_allele_id } " )
14- if response .status_code != 200 :
15- logger .error (f"Failed to query ClinGen API for { clingen_allele_id } : { response .status_code } " )
15+ @cached (ttl = CACHE_TTL_SECONDS , key_builder = clingen_cache_key_builder , cache = CACHE_CLASS , ** CACHE_CONFIG )
16+ async def get_canonical_pa_ids (clingen_allele_id : str ) -> list [str ]:
17+ """Retrieve canonical PA IDs from the ClinGen API for a given ClinGen allele ID.
18+
19+ Results are automatically cached for 24 hours using aiocache with configurable backend.
20+ This significantly reduces repeated API calls when processing multiple ClinVar control
21+ versions or running jobs that query the same alleles. Cache backend can be switched
22+ between Redis (production) and in-memory (testing) via CLINGEN_CACHE_BACKEND env var.
23+
24+ Args:
25+ clingen_allele_id: ClinGen allele ID to query (e.g., CA123456)
26+
27+ Returns:
28+ List of canonical PA IDs associated with the allele. Returns empty list if
29+ the allele has no MANE transcripts or if the allele doesn't exist (404).
30+
31+ Raises:
32+ requests.exceptions.HTTPError: If the API request fails with non-2xx status code
33+ (excluding 404, which returns empty list).
34+ """
35+ loop = asyncio .get_running_loop ()
36+ response = await loop .run_in_executor (None , requests .get , f"{ CLINGEN_API_URL } /{ clingen_allele_id } " )
37+
38+ # 404 means the allele doesn't exist in ClinGen's registry - treat as "no data" (cacheable)
39+ if response .status_code == 404 :
1640 return []
1741
42+ # All other non-2xx status codes raise exceptions (400, 429, 5xx, etc.)
43+ if response .status_code != 200 :
44+ response .raise_for_status ()
45+
1846 data = response .json ()
1947
2048 pa_ids = []
@@ -27,35 +55,92 @@ def get_canonical_pa_ids(clingen_allele_id: str) -> list[str]:
2755 return pa_ids
2856
2957
30- def get_matching_registered_ca_ids (clingen_pa_id : str ) -> list [str ]:
31- """Retrieve all matching registered transcript CA IDs for a given PA ID from the ClinGen API."""
32- response = requests .get (f"{ CLINGEN_API_URL } /{ clingen_pa_id } " )
33- if response .status_code != 200 :
34- logger .error (f"Failed to query ClinGen API for { clingen_pa_id } : { response .status_code } " )
58+ @cached (ttl = CACHE_TTL_SECONDS , key_builder = clingen_cache_key_builder , cache = CACHE_CLASS , ** CACHE_CONFIG )
59+ async def get_matching_registered_ca_ids (clingen_pa_id : str ) -> list [str ]:
60+ """Retrieve matching registered transcript CA IDs for a given PA ID from the ClinGen API.
61+
62+ Results are automatically cached for 24 hours using aiocache with configurable backend.
63+ This significantly reduces repeated API calls when processing variant translations or
64+ running jobs that query the same protein alleles. Cache backend can be switched
65+ between Redis (production) and in-memory (testing) via CLINGEN_CACHE_BACKEND env var.
66+
67+ Args:
68+ clingen_pa_id: ClinGen protein allele ID to query (e.g., PA123456)
69+
70+ Returns:
71+ List of matching registered transcript CA IDs. Returns empty list if no
72+ matching transcripts are found or if the allele doesn't exist (404).
73+
74+ Raises:
75+ requests.exceptions.HTTPError: If the API request fails with non-2xx status code
76+ (excluding 404, which returns empty list).
77+ """
78+ loop = asyncio .get_running_loop ()
79+ response = await loop .run_in_executor (None , requests .get , f"{ CLINGEN_API_URL } /{ clingen_pa_id } " )
80+
81+ # 404 means the allele doesn't exist in ClinGen's registry - treat as "no data" (cacheable)
82+ if response .status_code == 404 :
3583 return []
3684
85+ # All other non-2xx status codes raise exceptions (400, 429, 5xx, etc.)
86+ if response .status_code != 200 :
87+ response .raise_for_status ()
88+
3789 data = response .json ()
3890
3991 ca_ids = []
4092 if data .get ("aminoAcidAlleles" ):
4193 for allele in data ["aminoAcidAlleles" ]:
4294 if allele .get ("matchingRegisteredTranscripts" ):
43- # @id field returns url; the last component is the PA ID
44- ca_ids .extend ([allele ["@id" ].split ("/" )[- 1 ] for allele in allele ["matchingRegisteredTranscripts" ]])
95+ # @id field returns URL; the last component is the transcript CA ID
96+ ca_ids .extend (
97+ [transcript ["@id" ].split ("/" )[- 1 ] for transcript in allele ["matchingRegisteredTranscripts" ]]
98+ )
4599
46100 return ca_ids
47101
48102
49- def get_associated_clinvar_allele_id (clingen_allele_id : str ) -> str | None :
50- """Retrieve the associated ClinVar Allele ID for a given ClinGen Allele ID from the ClinGen API."""
51- response = requests .get (f"{ CLINGEN_API_URL } /{ clingen_allele_id } " )
103+ @cached (ttl = CACHE_TTL_SECONDS , key_builder = clingen_cache_key_builder , cache = CACHE_CLASS , ** CACHE_CONFIG )
104+ async def get_associated_clinvar_allele_id (clingen_allele_id : str ) -> str :
105+ """Retrieve the associated ClinVar Allele ID for a given ClinGen Allele ID.
106+
107+ Results are automatically cached for 24 hours using aiocache with configurable backend.
108+ This significantly reduces repeated API calls when refreshing ClinVar controls across
109+ multiple months/years, as each job queries the same ClinGen allele IDs. Cache backend
110+ can be switched between Redis (production) and in-memory (testing) via the
111+ CLINGEN_CACHE_BACKEND environment variable.
112+
113+ Note: Returns empty string when the API call succeeds but no ClinVar association exists,
114+ or when the allele doesn't exist in ClinGen's registry (404). This ensures successful
115+ negative results are cached, which is important since most ClinGen alleles don't have
116+ ClinVar associations. Other API errors (400, 429, 5xx) raise HTTPError, which prevents
117+ caching and allows retries for transient failures or surfaces issues like rate limiting.
118+
119+ Args:
120+ clingen_allele_id: ClinGen allele ID to query (e.g., CA123456)
121+
122+ Returns:
123+ Associated ClinVar allele ID as a string, or empty string if no association exists
124+ or if the allele doesn't exist (404).
125+
126+ Raises:
127+ requests.exceptions.HTTPError: If the API request fails with non-2xx status code
128+ (excluding 404, which returns empty string).
129+ """
130+ loop = asyncio .get_running_loop ()
131+ response = await loop .run_in_executor (None , requests .get , f"{ CLINGEN_API_URL } /{ clingen_allele_id } " )
132+
133+ # 404 means the allele doesn't exist in ClinGen's registry - treat as "no data" (cacheable)
134+ if response .status_code == 404 :
135+ return ""
136+
137+ # All other non-2xx status codes raise exceptions (400, 429, 5xx, etc.)
52138 if response .status_code != 200 :
53- logger .error (f"Failed to query ClinGen API for { clingen_allele_id } : { response .status_code } " )
54- return None
139+ response .raise_for_status ()
55140
56141 data = response .json ()
57142 clinvar_allele_id = data .get ("externalRecords" , {}).get ("ClinVarAlleles" , [{}])[0 ].get ("alleleId" )
58143 if clinvar_allele_id :
59144 return str (clinvar_allele_id )
60145
61- return None
146+ return ""
0 commit comments