Skip to content

Commit e19fad2

Browse files
committed
make cleanup interval optional
1 parent f056acb commit e19fad2

2 files changed

Lines changed: 95 additions & 49 deletions

File tree

src/pybiocfilecache/cache.py

Lines changed: 91 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -135,11 +135,62 @@ def _validate_rname(self, rname: str) -> None:
135135
raise InvalidRnameError(f"Resource name '{rname}' doesn't match pattern " f"'{self.config.rname_pattern}'")
136136

137137
def _should_cleanup(self) -> bool:
138-
"""Check if cache cleanup should be performed."""
138+
"""Check if cache cleanup should be performed.
139+
140+
Returns:
141+
True if `cleanup_interval` is set and time since last cleanup exceeds it.
142+
"""
143+
if self.config.cleanup_interval is None:
144+
return False
145+
139146
return datetime.now() - self._last_cleanup > self.config.cleanup_interval
140147

148+
def cleanup(self) -> int:
149+
"""Remove expired resources from the cache.
150+
151+
Returns:
152+
Number of resources removed.
153+
154+
Note:
155+
- If `cleanup_interval` is None, this method will still run if called explicitly.
156+
- Only removes resources with non-None expiration dates.
157+
"""
158+
if not any([self.config.cleanup_interval, self._should_cleanup()]):
159+
return 0 # Early return if automatic cleanup is disabled
160+
161+
removed = 0
162+
with self.get_session() as session:
163+
# Only query resources that have expiration dates
164+
expired = (
165+
session.query(Resource)
166+
.filter(
167+
Resource.expires.isnot(None), # Only check resources with expiration
168+
Resource.expires < datetime.now(),
169+
)
170+
.all()
171+
)
172+
173+
for resource in expired:
174+
try:
175+
Path(resource.rpath).unlink(missing_ok=True)
176+
session.delete(resource)
177+
removed += 1
178+
except Exception as e:
179+
logger.error(f"Failed to remove expired resource: {resource.rname}", exc_info=e)
180+
181+
session.commit()
182+
183+
self._last_cleanup = datetime.now()
184+
return removed
185+
141186
def get(self, rname: str) -> Optional[Resource]:
142-
"""Get resource by name from cache."""
187+
"""Get resource by name from cache.
188+
189+
Args:
190+
rname:
191+
Name to identify the resource in cache.
192+
193+
"""
143194
with self.get_session() as session:
144195
resource = session.query(Resource).filter(Resource.rname == rname).first()
145196

@@ -175,19 +226,27 @@ def add(
175226
176227
Args:
177228
rname:
178-
Name of the resource to add to cache.
229+
Name to identify the resource in cache.
179230
180231
fpath:
181-
Location of the resource.
232+
Path to the source file.
182233
183234
rtype:
235+
Type of resource.
184236
One of ``local``, ``web``, or ``relative``.
185237
Defaults to ``local``.
186238
187239
action:
188-
Either ``copy``, ``move`` or ``asis``.
240+
How to handle the file ("copy", "move", or "asis").
189241
Defaults to ``copy``.
190242
243+
tags:
244+
Optional list of tags for categorization.
245+
246+
expires:
247+
Optional expiration datetime.
248+
If None, resource never expires.
249+
191250
ext:
192251
Whether to use filepath extension when storing in cache.
193252
Defaults to `False`.
@@ -271,10 +330,10 @@ def update(
271330
272331
Args:
273332
rname:
274-
Resource name.
333+
Name to identify the resource in cache.
275334
276-
rtype:
277-
Resource type.
335+
fpath:
336+
Path to the new source file.
278337
279338
action:
280339
Either ``copy``, ``move`` or ``asis``.
@@ -322,7 +381,7 @@ def remove(self, rname: str) -> None:
322381
323382
Args:
324383
rname:
325-
Name of the resource to remove
384+
Name to identify the resource in cache.
326385
327386
Raises:
328387
BiocCacheError: If resource removal fails
@@ -348,23 +407,24 @@ def remove(self, rname: str) -> None:
348407
def list_resources(
349408
self, tag: Optional[str] = None, rtype: Optional[str] = None, expired: Optional[bool] = None
350409
) -> List[Resource]:
351-
"""List resources in cache with optional filtering.
410+
"""List resources in the cache with optional filtering.
352411
353412
Args:
354413
tag:
355-
Filter by tag.
414+
Filter resources by tag.
356415
357416
rtype:
358-
Filter by resource type.
417+
Filter resources by type.
359418
360419
expired:
361420
Filter by expiration status
362421
True: only expired resources
363422
False: only non-expired resources
364423
None: all resources
424+
Note: Resources with no expiration are always considered non-expired.
365425
366426
Returns:
367-
List of matching Resource objects.
427+
List of Resource objects matching the filters
368428
"""
369429
with self.get_session() as session:
370430
query = session.query(Resource)
@@ -375,39 +435,19 @@ def list_resources(
375435
query = query.filter(Resource.rtype == rtype)
376436
if expired is not None:
377437
if expired:
378-
query = query.filter(Resource.expires < datetime.now())
438+
query = query.filter(
439+
Resource.expires.isnot(None), # Only check resources with expiration
440+
Resource.expires < datetime.now(),
441+
)
379442
else:
380-
query = query.filter((Resource.expires > datetime.now()) | (Resource.expires.is_(None)))
443+
query = query.filter(
444+
(Resource.expires.is_(None)) # Never expires
445+
| (Resource.expires > datetime.now()) # Not yet expired
446+
)
381447

382448
resources = query.all()
383449
return [self._get_detached_resource(session, r) for r in resources]
384450

385-
def cleanup(self) -> int:
386-
"""Remove expired resources and update cleanup timestamp.
387-
388-
Returns:
389-
Number of resources removed.
390-
391-
Note:
392-
Updates `_last_cleanup` timestamp after completion.
393-
"""
394-
removed = 0
395-
with self.get_session() as session:
396-
expired = session.query(Resource).filter(Resource.expires < datetime.now()).all()
397-
398-
for resource in expired:
399-
try:
400-
Path(resource.rpath).unlink(missing_ok=True)
401-
session.delete(resource)
402-
removed += 1
403-
except Exception as e:
404-
logger.error(f"Failed to remove expired resource: {resource.rname}", exc_info=e)
405-
406-
session.commit()
407-
408-
self._last_cleanup = datetime.now()
409-
return removed
410-
411451
def validate_resource(self, resource: Resource) -> bool:
412452
"""Validate resource integrity.
413453
@@ -509,14 +549,17 @@ def search(self, query: str, field: str = "rname", exact: bool = False) -> List[
509549
return [self._get_detached_resource(session, r) for r in resources]
510550

511551
def get_stats(self) -> Dict[str, Any]:
512-
"""Get cache statistics.
513-
514-
Returns:
515-
Dictionary of cache statistics.
516-
"""
552+
"""Get statistics about the cache."""
517553
with self.get_session() as session:
518554
total = session.query(Resource).count()
519-
expired = session.query(Resource).filter(Resource.expires < datetime.now()).count()
555+
expired = (
556+
session.query(Resource)
557+
.filter(
558+
Resource.expires.isnot(None), # Only check resources with expiration
559+
Resource.expires < datetime.now(),
560+
)
561+
.count()
562+
)
520563
types = dict(session.query(Resource.rtype, func.count(Resource.id)).group_by(Resource.rtype).all())
521564

522565
return {
@@ -525,6 +568,7 @@ def get_stats(self) -> Dict[str, Any]:
525568
"cache_size_bytes": self.get_cache_size(),
526569
"resource_types": types,
527570
"last_cleanup": self._last_cleanup.isoformat(),
571+
"cleanup_enabled": self.config.cleanup_interval is not None,
528572
}
529573

530574
def purge(self, force: bool = False) -> bool:

src/pybiocfilecache/config.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ class CacheConfig:
1717
Directory to store cached files.
1818
1919
max_size_bytes:
20-
Maximum total size of cache (None for unlimited).
20+
Maximum total size of cache.
21+
None for unlimited.
2122
2223
cleanup_interval:
2324
How often to run expired resource cleanup.
25+
None for no cleanup.
2426
2527
rname_pattern:
2628
Regex pattern for valid resource names.
@@ -34,7 +36,7 @@ class CacheConfig:
3436

3537
cache_dir: Path
3638
max_size_bytes: Optional[int] = None
37-
cleanup_interval: timedelta = timedelta(days=30)
39+
cleanup_interval: Optional[timedelta] = None # timedelta(days=30)
3840
rname_pattern: str = r"^[a-zA-Z0-9_-]+$"
3941
hash_algorithm: str = "md5"
4042
compression: bool = False

0 commit comments

Comments
 (0)