Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
673 changes: 230 additions & 443 deletions notebooks/demo.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion notebooks/load_gt.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand Down
834 changes: 744 additions & 90 deletions notebooks/superpixel2embedding.ipynb

Large diffs are not rendered by default.

40 changes: 25 additions & 15 deletions prepare_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from skimage.feature import multiscale_basic_features
from cellcanvas_spp.segmentation import superpixels
import pickle
from tifffile import imread

try:
DATA_DIR = Path(os.environ["COPICK_DATA"])
Expand All @@ -16,47 +17,56 @@
"$ export COPICK_DATA=</path/to/copick/data> python <script>"
)

config_file = DATA_DIR / "copick_10439/synthetic_data_10439_dataportal.json"
# config_file = DATA_DIR / "copick_10439/synthetic_data_10439_dataportal.json"
config_file = "synthetic_data_10439.json"
root = copick.from_file(config_file)

particles = dict()
for po in root.config.pickable_objects:
particles[po.name] = po.label


list_of_runs = [root.get_run(s) for s in ["16191", ]] #"16193","16191"

data_dict = {}
for run in tqdm(root.runs[2:3]):
for run in tqdm(list_of_runs):

print(f"Preparing run {run.name}")
tomogram = run.get_voxel_spacing(10).get_tomogram('wbp')
_, array = list(zarr.open(tomogram.zarr()).arrays())[0]
tomogram = array[:]
mask = np.zeros(tomogram.shape)
segmentations = run.get_segmentations()

print("Calculating SK features...")
sk_features = multiscale_basic_features(
tomogram,
intensity=True,
edges=True,
texture=True,
sigma_min=0.5,
sigma_max=8.0
)
# print("Calculating SK features...")
# sk_features = multiscale_basic_features(
# tomogram,
# intensity=True,
# edges=True,
# texture=True,
# sigma_min=0.5,
# sigma_max=8.0
# )
#sk_features = np.moveaxis(features, -1, 0)

print("Calculating superpixels...")
segm = superpixels(tomogram, sigma=4, h_minima=0.0025)
# print("Calculating superpixels...")
# segm = superpixels(tomogram, sigma=4, h_minima=0.0005)

print("Loading superpixels...")
segm = imread('data/copick_10439/segm_' + run.name + '_10000.tif') #CHANGEEEE

print("Converting ground-truth labels...")
for seg in segmentations:
_, array = list(zarr.open(seg.zarr()).arrays())[0]
arr = np.array(array[:])
mask[arr==1] = particles[seg.name]

data_dict = {"image": tomogram,
"label": mask,
"sk_features": sk_features,
# "sk_features": sk_features,
"superpixels": segm}

print("Saving data to pickle file...")
with open(f'dataset_run_{run.name}.pickle', 'wb') as f: # 'wb' means write in binary mode
with open(f'data/copick_10439/dataset_run_{run.name}_loadedSegm10000.pickle', 'wb') as f: # 'wb' means write in binary mode
pickle.dump(data_dict, f)

13 changes: 8 additions & 5 deletions src/cellcanvas_spp/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from dataclasses import dataclass
from sklearn.metrics import confusion_matrix, cohen_kappa_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_predict
import pandas as pd

Expand All @@ -20,14 +21,16 @@
class EvaluationResult:
cohen_kappa: float
confusion_matrix: pd.DataFrame
y_pred: pd.DataFrame


def evaluate_spp_features(df: pd.DataFrame) -> EvaluationResult:
def evaluate_spp_features(df: pd.DataFrame,**kwargs) -> EvaluationResult:
"""
Evaluates superpixel features by calculating Cohen's kappa score and the
confusion matrix based on ground truth and predicted labels.
"""
estimator = SVC()
# estimator = SVC(**kwargs)
estimator = RandomForestClassifier(**kwargs)
x = df.drop(columns="ground_truth") # feature set
y_true = df["ground_truth"] # labels

Expand All @@ -37,7 +40,7 @@ def evaluate_spp_features(df: pd.DataFrame) -> EvaluationResult:
kappa_score = cohen_kappa_score(y_true, y_pred)
conf_matrix = confusion_matrix(y_true, y_pred)

print("Kappa:", kappa_score)
print("Confusion:", conf_matrix)
# print("Kappa:", kappa_score)
# print("Confusion:", conf_matrix)

return EvaluationResult(kappa_score, pd.DataFrame(conf_matrix))
return EvaluationResult(kappa_score, pd.DataFrame(conf_matrix), pd.DataFrame(y_pred,columns=['y_pred']))
20 changes: 13 additions & 7 deletions src/cellcanvas_spp/ground_truth.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,18 @@ def get_gt_label_per_super_pixel(row) :
# Function to from the 8 label counts to single label (the class with most pixels, or background)

counts = row.values
# if at least pixel in the superpixel has a gt-label, assign this label (1-7)
if np.max(counts[1:])>0:
idx = np.argmax(counts[1:])+1
# if no gt-label is present in superpixel, assign background (0)
else:
idx = 0

weights = np.ones_like(counts,dtype='float')
weights[0] = 0.3

idx = np.argmax(counts * weights)

# # if at least pixel in the superpixel has a gt-label, assign this label (1-7)
# if np.max(counts[1:])>0:
# idx = np.argmax(counts[1:])+1
# # if no gt-label is present in superpixel, assign background (0)
# else:
# idx = 0
return idx

def ground_truth_count(
Expand Down Expand Up @@ -144,4 +150,4 @@ def ground_truth_stats(
"total",
]

return props_df
return props_df