Skip to content

Commit 9d5bc46

Browse files
committed
Removed helper funciton for inserting CLEM database entries
1 parent 4a4d8c5 commit 9d5bc46

1 file changed

Lines changed: 0 additions & 156 deletions

File tree

Lines changed: 0 additions & 156 deletions
Original file line numberDiff line numberDiff line change
@@ -1,156 +0,0 @@
1-
from __future__ import annotations
2-
3-
import logging
4-
import re
5-
from pathlib import Path
6-
from typing import Optional, Type, Union
7-
8-
from sqlalchemy.exc import NoResultFound
9-
from sqlmodel import Session, select
10-
11-
from murfey.util.config import get_machine_config
12-
from murfey.util.db import (
13-
ImagingSite,
14-
Session as MurfeySession,
15-
)
16-
17-
logger = logging.getLogger("murfey.workflows.clem")
18-
19-
20-
"""
21-
HELPER FUNCTIONS FOR CLEM DATABASE
22-
"""
23-
24-
25-
def _validate_and_sanitise(
26-
file: Path,
27-
session_id: int,
28-
db: Session,
29-
) -> Path:
30-
"""
31-
Performs validation and sanitisation on the incoming file paths, ensuring that
32-
no forbidden characters are present and that the the path points only to allowed
33-
sections of the file server.
34-
35-
Returns the file path as a sanitised string that can be converted into a Path
36-
object again.
37-
38-
NOTE: Due to the instrument name query, 'db' now needs to be passed as an
39-
explicit variable to this function from within a FastAPI endpoint, as using the
40-
instance that was imported directly won't load it in the correct state.
41-
"""
42-
43-
valid_file_types = (
44-
".lif",
45-
".tif",
46-
".tiff",
47-
".xlif",
48-
".xml",
49-
)
50-
51-
# Resolve symlinks and directory changes to get full file path
52-
full_path = Path(file).resolve()
53-
54-
# Use machine configuration to validate which file base paths are accepted from
55-
instrument_name = (
56-
db.exec(select(MurfeySession).where(MurfeySession.id == session_id))
57-
.one()
58-
.instrument_name
59-
)
60-
machine_config = get_machine_config(instrument_name=instrument_name)[
61-
instrument_name
62-
]
63-
rsync_basepath = (machine_config.rsync_basepath or Path("")).resolve()
64-
65-
# Check that full file path doesn't contain unallowed characters
66-
# Currently allows only:
67-
# - words (alphanumerics and "_"; \w),
68-
# - spaces (\s),
69-
# - periods,
70-
# - dashes,
71-
# - forward slashes ("/")
72-
if bool(re.fullmatch(r"^[\w\s\.\-/]+$", str(full_path))) is False:
73-
raise ValueError(f"Unallowed characters present in {file}")
74-
75-
# Check that it's not accessing somehwere it's not allowed
76-
if not str(full_path).startswith(str(rsync_basepath)):
77-
raise ValueError(f"{file} points to a directory that is not permitted")
78-
79-
# Check that it is of a permitted file type
80-
if f"{full_path.suffix}" not in valid_file_types:
81-
raise ValueError(f"{full_path.suffix} is not a permitted file format")
82-
83-
return full_path
84-
85-
86-
def get_db_entry(
87-
db: Session,
88-
# With the database search funcion having been moved out of the FastAPI
89-
# endpoint, the database now has to be explicitly passed within the FastAPI
90-
# endpoint function in order for it to be loaded in the correct state.
91-
table: Type[Union[ImagingSite,]],
92-
session_id: int,
93-
file_path: Optional[Path] = None,
94-
series_name: Optional[str] = None,
95-
) -> Union[ImagingSite,]:
96-
"""
97-
Searches the CLEM workflow-related tables in the Murfey database for an entry that
98-
matches the file path or series name within a given session. Returns the entry if
99-
a match is found, otherwise register it as a new entry in the database.
100-
"""
101-
102-
# Validate that parameters are provided correctly
103-
if file_path is None and series_name is None:
104-
raise ValueError(
105-
"One of either 'file_path' or 'series_name' has to be provided"
106-
)
107-
if file_path is not None and series_name is not None:
108-
raise ValueError("Only one of 'file_path' or 'series_name' should be provided")
109-
110-
# Validate file path if provided
111-
if file_path is not None:
112-
try:
113-
file_path = _validate_and_sanitise(file_path, session_id, db)
114-
except Exception:
115-
raise Exception
116-
117-
# Validate series name to use
118-
if series_name is not None:
119-
if bool(re.fullmatch(r"^[\w\s\.\-/]+$", series_name)) is False:
120-
raise ValueError("One or more characters in the string are not permitted")
121-
122-
# Return database entry if it exists
123-
try:
124-
db_entry = (
125-
db.exec(
126-
select(table)
127-
.where(table.session_id == session_id)
128-
.where(table.file_path == str(file_path))
129-
).one()
130-
if file_path is not None
131-
else db.exec(
132-
select(table)
133-
.where(table.session_id == session_id)
134-
.where(table.series_name == series_name)
135-
).one()
136-
)
137-
# Create and register new entry if not present
138-
except NoResultFound:
139-
db_entry = (
140-
table(
141-
file_path=str(file_path),
142-
session_id=session_id,
143-
)
144-
if file_path is not None
145-
else table(
146-
series_name=series_name,
147-
session_id=session_id,
148-
)
149-
)
150-
db.add(db_entry)
151-
db.commit()
152-
153-
except Exception:
154-
raise Exception
155-
156-
return db_entry

0 commit comments

Comments
 (0)