from tqdm import tqdm
from glob import glob
import tifffile
import numpy as np
import os
from EmbedSeg.utils.preprocess_data import extract_data, split_train_val, split_train_test, get_data_properties
from EmbedSeg.utils.generate_crops import *
from EmbedSeg.utils.visualize import visualize_crop_3d
import json
from matplotlib.colors import ListedColormap
data_dir = '../../../data'
project_name = 'Platynereis-Nuclei-CBG'
extract_data(
zip_url = 'https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Platynereis-Nuclei-CBG.zip',
data_dir = data_dir,
project_name = project_name,
)
split_train_test(
data_dir = data_dir,
project_name = project_name,
train_test_name = 'train',
subset = 2,
by_fraction = False,
seed = 0)
split_train_val(
data_dir = data_dir,
project_name = project_name,
train_val_name = 'train',
subset = 2,
by_fraction = False,
seed = 0)
data_properties_dir = get_data_properties(data_dir, project_name, train_val_name=['train', 'val'],
test_name=['test'], mode='3d')
data_properties_dir['data_type']='16-bit'
data_properties_dir['pixel_size_x_microns']=0.406 # set equal to voxel size (microns) in x dimension
data_properties_dir['pixel_size_y_microns']=0.406 # set equal to voxel size (microns) in y dimension
data_properties_dir['pixel_size_z_microns']=2.031 # set equal to voxel size (microns) in z dimension
with open('data_properties.json', 'w') as outfile:
json.dump(data_properties_dir, outfile)
print("Dataset properies of the `{}` dataset is saved to `data_properties.json`".format(project_name))
center = 'medoid' # 'medoid', 'centroid'
try:
assert center in {'medoid', 'centroid'}
print("Spatial Embedding Location chosen as : {}".format(center))
except AssertionError as e:
e.args += ('Please specify center as one of : {"medoid", "centroid"}', 42)
raise
n_sigma = 5
def round_up_8(x):
return (x.astype(int)+7) & (-8)
crops_dir = './crops'
data_subsets = ['train', 'val']
crop_size_z = round_up_8(data_properties_dir['avg_object_size_z'] + n_sigma*data_properties_dir['stdev_object_size_z'])
crop_size_y = np.maximum(round_up_8(data_properties_dir['avg_object_size_y'] + n_sigma*data_properties_dir['stdev_object_size_y']),
round_up_8(data_properties_dir['avg_object_size_x'] + n_sigma*data_properties_dir['stdev_object_size_x']))
crop_size_x = crop_size_y
print("Crop size in x and y will be set equal to {}. Crop size in z is set equal to {}".format(crop_size_x, crop_size_z))
anisotropy_factor = data_properties_dir['pixel_size_z_microns']/data_properties_dir['pixel_size_x_microns']
speed_up = 2
norm = 'min-max-percentile'
try:
assert norm in {'min-max-percentile', 'mean-std', 'absolute'}
print("Normalization chosen as : {}".format(norm))
except AssertionError as e:
e.args += ('Please specify norm as one of : {"min-max-percentile", "mean-std", "absolute"}', 42)
raise
for data_subset in data_subsets:
image_dir = os.path.join(data_dir, project_name, data_subset, 'images')
instance_dir = os.path.join(data_dir, project_name, data_subset, 'masks')
image_names = sorted(glob(os.path.join(image_dir, '*.tif')))
instance_names = sorted(glob(os.path.join(instance_dir, '*.tif')))
for i in tqdm(np.arange(len(image_names))):
process_3d(image_names[i], instance_names[i], os.path.join(crops_dir, project_name), data_subset,
crop_size_x = crop_size_x, crop_size_y = crop_size_y, crop_size_z = crop_size_z,
center = center, anisotropy_factor = anisotropy_factor, speed_up = speed_up,
norm=norm, data_type = data_properties_dir['data_type'])
print("Cropping of images, instances and centre_images for data_subset = `{}` done!".format(data_subset))
normalization = {}
normalization['data_type']=data_properties_dir['data_type']
normalization['norm']=norm
with open('normalization.json', 'w') as outfile:
json.dump(normalization, outfile)
print("Normalization properties of the `{}` dataset is saved to `normalization.json`".format(project_name))
Describe the bug
When running the data jupyter notebook from examples as a python script, I get images with different crop sizes (about half being 16 x 80 x 80, the other half being 24 x 80 x 80). This results in an error during torch.stack when attempting training. Also, this seems different from the crop sizes in the paper (https://www.sciencedirect.com/science/article/pii/S1361841522001700), which lists 32 * 136 * 136 for this dataset. Can you comment on what might be going wrong?
My apologies if I missed something.
To Reproduce
Expected behavior
Equal crop sizes for the whole dataset, as well as crop sizes matching the dimensions mentioned in the paper.
Desktop (please complete the following information):