cellcanvas · TeunHuijben · Sep 19, 2024 · Sep 19, 2024 · Sep 26, 2024 · Sep 26, 2024
diff --git a/notebooks/demo.ipynb b/notebooks/demo.ipynb
diff --git a/notebooks/load_gt.ipynb b/notebooks/load_gt.ipynb
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [

diff --git a/notebooks/superpixel2embedding.ipynb b/notebooks/superpixel2embedding.ipynb
diff --git a/prepare_dataset.py b/prepare_dataset.py
@@ -7,6 +7,7 @@
 from skimage.feature import multiscale_basic_features
 from cellcanvas_spp.segmentation import superpixels
 import pickle
+from tifffile import imread
 
 try:
     DATA_DIR = Path(os.environ["COPICK_DATA"])
@@ -16,47 +17,56 @@
         "$ export COPICK_DATA=</path/to/copick/data> python <script>"
     )
 
-config_file = DATA_DIR / "copick_10439/synthetic_data_10439_dataportal.json"
+# config_file = DATA_DIR / "copick_10439/synthetic_data_10439_dataportal.json"
+config_file = "synthetic_data_10439.json"
 root = copick.from_file(config_file)
 
 particles = dict()
 for po in root.config.pickable_objects:
     particles[po.name] = po.label
 
 
+list_of_runs = [root.get_run(s) for s in ["16191", ]]   #"16193","16191"
+
 data_dict = {}
-for run in tqdm(root.runs[2:3]):
+for run in tqdm(list_of_runs):
+
     print(f"Preparing run {run.name}")
     tomogram = run.get_voxel_spacing(10).get_tomogram('wbp')
     _, array = list(zarr.open(tomogram.zarr()).arrays())[0]
     tomogram = array[:]
     mask = np.zeros(tomogram.shape)
     segmentations = run.get_segmentations()
 
-    print("Calculating SK features...")
-    sk_features = multiscale_basic_features(
-            tomogram,
-            intensity=True,
-            edges=True,
-            texture=True,
-            sigma_min=0.5,
-            sigma_max=8.0
-        )
+    # print("Calculating SK features...")
+    # sk_features = multiscale_basic_features(
+    #         tomogram,
+    #         intensity=True,
+    #         edges=True,
+    #         texture=True,
+    #         sigma_min=0.5,
+    #         sigma_max=8.0
+    #     )
     #sk_features = np.moveaxis(features, -1, 0)
 
-    print("Calculating superpixels...")
-    segm = superpixels(tomogram, sigma=4, h_minima=0.0025)
+    # print("Calculating superpixels...")
+    # segm = superpixels(tomogram, sigma=4, h_minima=0.0005)
+
+    print("Loading superpixels...")
+    segm = imread('data/copick_10439/segm_' + run.name + '_10000.tif')      #CHANGEEEE
+
+    print("Converting ground-truth labels...")
     for seg in segmentations:
         _, array = list(zarr.open(seg.zarr()).arrays())[0]
         arr = np.array(array[:])
         mask[arr==1] = particles[seg.name]
 
     data_dict = {"image": tomogram, 
                  "label": mask, 
-                 "sk_features": sk_features, 
+                #  "sk_features": sk_features,
                  "superpixels": segm}
 
     print("Saving data to pickle file...")
-    with open(f'dataset_run_{run.name}.pickle', 'wb') as f:  # 'wb' means write in binary mode
+    with open(f'data/copick_10439/dataset_run_{run.name}_loadedSegm10000.pickle', 'wb') as f:  # 'wb' means write in binary mode
         pickle.dump(data_dict, f)
 
diff --git a/src/cellcanvas_spp/eval.py b/src/cellcanvas_spp/eval.py
@@ -12,6 +12,7 @@
 from dataclasses import dataclass
 from sklearn.metrics import confusion_matrix, cohen_kappa_score
 from sklearn.svm import SVC
+from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import cross_val_predict
 import pandas as pd
 
@@ -20,14 +21,16 @@
 class EvaluationResult:
     cohen_kappa: float
     confusion_matrix: pd.DataFrame
+    y_pred: pd.DataFrame
 
 
-def evaluate_spp_features(df: pd.DataFrame) -> EvaluationResult:
+def evaluate_spp_features(df: pd.DataFrame,**kwargs) -> EvaluationResult:
     """
     Evaluates superpixel features by calculating Cohen's kappa score and the
     confusion matrix based on ground truth and predicted labels.
     """
-    estimator = SVC()
+    # estimator = SVC(**kwargs)
+    estimator = RandomForestClassifier(**kwargs)
     x = df.drop(columns="ground_truth")  # feature set
     y_true = df["ground_truth"]  # labels
 
@@ -37,7 +40,7 @@ def evaluate_spp_features(df: pd.DataFrame) -> EvaluationResult:
     kappa_score = cohen_kappa_score(y_true, y_pred)
     conf_matrix = confusion_matrix(y_true, y_pred)
 
-    print("Kappa:", kappa_score)
-    print("Confusion:", conf_matrix)
+    # print("Kappa:", kappa_score)
+    # print("Confusion:", conf_matrix)
 
-    return EvaluationResult(kappa_score, pd.DataFrame(conf_matrix))
+    return EvaluationResult(kappa_score, pd.DataFrame(conf_matrix), pd.DataFrame(y_pred,columns=['y_pred']))
diff --git a/src/cellcanvas_spp/ground_truth.py b/src/cellcanvas_spp/ground_truth.py
@@ -85,12 +85,18 @@ def get_gt_label_per_super_pixel(row) :
     # Function to from the 8 label counts to single label (the class with most pixels, or background)
 
     counts = row.values
-    # if at least pixel in the superpixel has a gt-label, assign this label (1-7)
-    if np.max(counts[1:])>0:
-        idx = np.argmax(counts[1:])+1
-    # if no gt-label is present in superpixel, assign background (0)
-    else:
-        idx = 0
+
+    weights = np.ones_like(counts,dtype='float')
+    weights[0] = 0.3
+
+    idx = np.argmax(counts * weights)
+
+    # # if at least pixel in the superpixel has a gt-label, assign this label (1-7)
+    # if np.max(counts[1:])>0:
+    #     idx = np.argmax(counts[1:])+1
+    # # if no gt-label is present in superpixel, assign background (0)
+    # else:
+    #     idx = 0
     return idx
 
 def ground_truth_count(
@@ -144,4 +150,4 @@ def ground_truth_stats(
         "total",
     ]
 
-    return props_df
+    return props_df