cet_toolbox/toolbox.py at master · thomasfrosio/cet_toolbox · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import os
import math
import time
import struct
import subprocess
import multiprocessing
from threading import Semaphore, Lock
import argparse
import re
from glob import glob
import itertools
from datetime import datetime

import pandas as pd

"""
TODO:   1)  Denoising (Janni or Topaz? or something simpler like what we are currently doing in MATLAB...)
            The toolbox will generate binned SIRT-like tomogram, so it should be enough for visualization.
            Alister is working on a denoising solution.
        2)  Generate output graphs for alignment quality and Ctffind.
        3)  Unittest.
        4)  Send an email to David about the tilt axis offset.
        5)  Zhengyi noticed strange behavior with new version of MotionCor2. Need to investigate this.
"""

VERSION = '0.13.1'

# The descriptor must be correctly formatted for the interactive mode and create_input_file to work.
# Format: <param1>//<type1>//<help1>//<default1>//..//<param2>//<type2>//<help2>//<default2>
descriptor = f"""
ba_set_prefix2look4//str//Look for the movies/sums with this prefix. If '*', catch every mrc/tif image.
NB: This is used even if MotionCor is deactivated//*//
ba_set_prefix2add//str//Prefix to add to every output (motion corrected images, stacks, logs, etc.).
NB: prefix/suffix from the original images are removed//WT//
ba_set_field_nb//int//Field (sep: '_', counting from 0) containing stack number in the filename of raw images.
If you do not want to run MotionCor, this must correspond to the motion corrected images. Only numbers will
be kept, allowing to have tilt<nb>//1//
ba_set_field_tilt//int//Field (sep: '_', counting from 0) containing tilt angle in the filename of raw images.
If you do not want to run MotionCor, this must correspond to the motion corrected images//3//
ba_set_pixelsize//float|str//Pixel size of the raw images in Angstrom. If 'header', the pixel size is read
from the header//header//
ad_set_max_cpus//int//Number of processes used in parallel for creating and aligning the tilt-series. Default:
available logical cores//{multiprocessing.cpu_count()}//

ba_path_raw//str//Path of the raw images directory. Only used if MotionCor2 is activated. The path can end with
'*', meaning that the movies are grouped into sub-folders (i.e. raw/stack*)//../raw//
ad_path_motioncor//str//Where the MotionCor outputs will go. Will be created if doesn't exist//motioncor//
ad_path_stacks//str//Path of stacks and Ctffind outputs. Will be created if doesn't exist//stacks//
ad_path_mdocfiles//str//Path of mdoc files. Used to create the rawtlt file. File names must be
(path)/*_<stack_nb>.mrc.mdoc with 'stack_nb' being the stack number (zeros padded, 3 characters)//mdocs//
ad_path_logfile//str//Main log file name. Other log files (MotionCor2, Ctftinf, etc.) will be saved independently//
toolbox_{datetime.now():%d%b%Y}.log//

ba_run_motioncor//bool//Run MotionCor2 or not. If not, the motion corrected images must be in path_motioncor and
the ba_set settings must correspond to these images//1//
ba_run_ctffind//bool//Estimate the defocus of the lower tilt image of each stack using Ctffind//1//
ba_run_stack//bool//Create the stack from motion-corrected sums//1//
ba_run_batchruntomo//bool//Align the tilt-series using IMOD batchruntomo//1//
ba_run_onthefly//bool//Triggers on-the-fly processing//0//
ad_run_overwrite//bool//Will re-process every stack. If 0, will look at a file (toolbox_stack_processed.txt) and
skip the stacks that are registered inside this file (<nb>:<nb>:). The stack numbers can be padded with 0//0//
ad_run_nb//int|list//Process only these/this stack(s). Must correspond to the stack number at the field
ba_set_field_nb (+/- 0 padding). This is ignored when on-the-fly is activated. Default: Process everything//all//

ba_otf_max_images_per_stack//int//Expected number of images per stacks. Used to catch the last stack//37//
ba_otf_max_time2try//float//Tolerated time (min) of inactivity//20//

ba_mc_motioncor//str//Path of MotionCor2 program//
/apps/strubi/motioncorr/2-1.1.0-gcc5.4.0-cuda8.0-sm61/MotionCor2//
ba_mc_desired_pixelsize//float|str//Desired pixel size. If lower than current pixel size, Fourier cropping
will be done by MotionCor2. If 'current': no Ftbin applied, If 'ps_x2': Ftbin=2//ps_x2//
ad_mc_throw//int//Frame to remove, from the first frame. From 0//0//
ad_mc_trunc//int//Frame to remove, from the last frame. From 0//0//
ad_mc_tolerance//float//Tolerance of alignment accuracy: less than X pixel//0.5//
ad_mc_iter//int//Iterations after which the alignment stops (if tolerance not achieved already)//10//
ad_mc_patch//int,int(,int)//After global alignment, divides the corrected frames into X*X patches on which
the local motion is measured//5,5,20//
ad_mc_group//int//Equally divide the input stack into non-overlapping sub-groups. Instead of aligning individual
frames, the sums of these sub-groups are aligned. The shifts of individual frames are then interpolated and
extrapolated. Recommended for low-signal movie stacks//1//
ba_mc_gpu//int|str//GPU IDs. Can be a list of int separated by comas (ex: 0,1,2,3) or 'auto'. These must correspond
to the ID displayed using nvidia-smi. If 'auto', the program will select the visible GPUs
that do not have any process running//auto//
ba_mc_jobs_per_gpu//int//Number of MotionCor jobs per GPU. I recommend to try with one stack to see how many
memory is allocated//3//
ad_mc_gpu_mem_usage//float//GPU memory allocated to buffer the movie stacks. For multiple MotionCor2 jobs in one
GPU, it is recommended to set it to 0. Default=0.5//0.5//
ba_mc_tif//bool//If the raw images are in TIF//0//
ba_mc_gain//str//Gain reference for MotionCor2. Must have the corrected rotation and be a mrc file//nogain//

ba_ctf_ctffind//str//Path of Ctffind///apps/strubi/ctf/4.1.5/ctffind//
ad_ctf_voltage//float//Acceleration voltage (kV)//300//
ad_ctf_cs//float//Spherical aberration (mm)//2.7//
ad_ctf_amp_cont//float//Amplitude contrast (0 to 1)//0.8//
ad_ctf_size2compute//int//Size of amplitude spectrum to compute//512//
ad_ctf_min_res//float//Minimum resolution//30//
ad_ctf_max_res//float//Maximum resolution//5//
ad_ctf_min_def//float//Minimum defocus//5000//
ad_ctf_max_def//float//Maximum defocus//50000//
ad_ctf_step_def//float//Defocus search step//500//
ad_ctf_astig_type//str|float//Do you know what astigmatism is present?//no//
ad_ctf_exhaustive//str//Slower, more exhaustive search//no//
ad_ctf_astig_restraint//str//Use a restraint on astigmatism//no//
ad_ctf_phase_shift//float//Find additional phase shift//no//

ad_brt_adoc//str//Batchruntomo adoc file to use. Overwrites every ba_brt parameters except ad_brt_start
and ad_brt_end//default//
ba_brt_gold_size//float//Size of gold beads in nm//10//
ba_brt_rotation_angle//float//Initial angle of rotation in the plane of projection. This is the CCW positive
rotation from the vertical axis to the suspected tilt axis in the unaligned views//86//
ad_brt_bin_coarse//int//Bin used for coarsed alignment. If 'auto', set the binning to have the gold beads diameter
to ~12.5 pixel//auto//
ad_brt_target_nb_beads//int//(Generous) Target number of beads per projection. Usually 25 is fine//25//
ad_brt_bin_ali//int//Binning used for final stack and tomogram reconstruction//5//
ad_brt_start//int//Starts at this step. See batchruntomo documentation.//0//
ad_brt_end//int//Ends at this step. 12: stop after gold erase. 20: stop after tomogram generation and rotation//20"""

# Used by Batchruntomo as adoc file. <parameters> will be replaced by the actual inputs.
adoc = f"""
# INPUT FILE BATCHRUNTOMO #

# Setup #
setupset.copyarg.gold = <ba_brt_gold_size>
setupset.copyarg.rotation = <ba_brt_rotation_angle>
setupset.copyarg.dual = 0
setupset.copyarg.userawtlt = 1
setupset.scanHeader = 1


# Preprocessing #
runtime.Preprocessing.any.removeXrays = 1
runtime.Preprocessing.any.archiveOriginal = 0
runtime.Preprocessing.any.endExcludeCriterion = 1
runtime.Preprocessing.any.darkExcludeRatio = 0.17
runtime.Preprocessing.any.darkExcludeFraction = 0.33
runtime.Preprocessing.any.removeExcludedViews = 1


# Coarse alignment #
comparam.xcorr.tiltxcorr.ExcludeCentralPeak = 1
comparam.xcorr.tiltxcorr.FilterRadius2 = 0.15
comparam.prenewst.newstack.BinByFactor = <ad_brt_bin_coarse>
comparam.prenewst.newstack.AntialiasFilter = -1
comparam.prenewst.newstack.ModeToOutput =


# Seeding and tracking #
runtime.Fiducials.any.trackingMethod = 0
runtime.Fiducials.any.seedingMethod = 1

comparam.track.beadtrack.LocalAreaTracking = 1
comparam.track.beadtrack.SobelFilterCentering = 1
comparam.track.beadtrack.KernelSigmaForSobel = 1.5
comparam.track.beadtrack.RoundsOfTracking = 4
runtime.BeadTracking.any.numberOfRuns = 2

comparam.autofidseed.autofidseed.TargetNumberOfBeads = <ad_brt_target_nb_beads>
comparam.autofidseed.autofidseed.AdjustSizes = 1
comparam.autofidseed.autofidseed.TwoSurfaces = 0
comparam.autofidseed.autofidseed.MinGuessNumBeads = 3


# Tomogram positionning #
runtime.Positioning.any.sampleType = 2
runtime.Positioning.any.thickness = 2000
runtime.Positioning.any.hasGoldBeads = 1
comparam.cryoposition.cryoposition.BinningToApply = 5


# Alignment #
comparam.align.tiltalign.SurfacesToAnalyze = 1
comparam.align.tiltalign.LocalAlignments = 1
comparam.align.tiltalign.RobustFitting = 1

#comparam.align.tiltalign.MagOption = 0
#comparam.align.tiltalign.TiltOption = 0
#comparam.align.tiltalign.RotOption = -1
#comparam.align.tiltalign.BeamTiltOption = 2

runtime.TiltAlignment.any.enableStretching = 0
runtime.PatchTracking.any.adjustTiltAngles = 0


# Final aligned stack #
runtime.AlignedStack.any.correctCTF = 0
runtime.AlignedStack.any.eraseGold = 2
runtime.AlignedStack.any.filterStack = 0
runtime.AlignedStack.any.binByFactor = <ad_brt_bin_ali>
runtime.AlignedStack.any.linearInterpolation = 1
comparam.newst.newstack.AntialiasFilter = 1

runtime.GoldErasing.any.extraDiameter = 4
runtime.GoldErasing.any.thickness = 3300
comparam.golderaser.ccderaser.ExpandCircleIterations = 3


# Reconstruction #
comparam.tilt.tilt.THICKNESS = 1500
comparam.tilt.tilt.FakeSIRTiterations = 8
runtime.Trimvol.any.reorient = 2

"""

TAB = ' ' * 4


class Colors:
    # Fancy!
    reset = "\033[0m"
    bold = "\033[1m"
    underline = "\033[4m"
    k = "\033[30m"
    r = "\033[91m"
    g = "\033[92m"
    b = "\033[96m"
    b_b = "\033[44m"


class InputParameters:
    """
    Gather and manage all the inputs of the Toolbox.

    NB: Except some exceptions (see self.check_inputs), inputs are a
        string as there are usually used for sub-processing.

    NB: Add an user input:
            - Modify the descriptor. If it isn't in the descriptor,
              the parameters will not be accessible by InputInteractive and
              the collect_input_from_file will raise a ValueError.
            - Add the corresponding attribute(s) in __init__
              and check_inputs if necessary.
    """

    def __init__(self, defaults):
        """
        Parse command line and create an input file if needed.

        :type defaults:     str
        :param defaults:    descriptor.
        """
        self.cmd_line = self._get_command_line()
        self.defaults = defaults.replace('\n', '').split('//')

        # this is a dead end
        if self.cmd_line.create_input_file:
            self.create_input_file()

        # set attributes to None for now
        # self.get_inputs will update them using interactive mode or input file
        self.ba_set_prefix2look4 = None
        self.ba_set_prefix2add = None
        self.ba_set_field_nb = None
        self.ba_set_field_tilt = None
        self.ba_set_pixelsize = None
        self.ad_set_max_cpus = None

        self.ba_path_raw = None
        self.ad_path_motioncor = None
        self.ad_path_stacks = None
        self.ad_path_mdocfiles = None
        self.ad_path_logfile = None

        self.ba_otf_max_images_per_stack = None
        self.ba_otf_max_time2try = None

        self.ba_mc_motioncor = None
        self.ba_mc_desired_pixelsize = None
        self.ba_mc_gpu = None
        self.ba_mc_jobs_per_gpu = None
        self.ba_mc_tif = None
        self.ba_mc_gain = None
        self.ad_mc_throw = None
        self.ad_mc_trunc = None
        self.ad_mc_tolerance = None
        self.ad_mc_iter = None
        self.ad_mc_patch = None
        self.ad_mc_group = None
        self.ad_mc_gpu_mem_usage = None

        self.ba_ctf_ctffind = None
        self.ad_ctf_voltage = None
        self.ad_ctf_cs = None
        self.ad_ctf_amp_cont = None
        self.ad_ctf_size2compute = None
        self.ad_ctf_min_res = None
        self.ad_ctf_max_res = None
        self.ad_ctf_min_def = None
        self.ad_ctf_max_def = None
        self.ad_ctf_step_def = None
        self.ad_ctf_astig_type = None
        self.ad_ctf_exhaustive = None
        self.ad_ctf_astig_restraint = None
        self.ad_ctf_phase_shift = None

        self.ba_brt_gold_size = None
        self.ba_brt_rotation_angle = None
        self.ad_brt_adoc = None
        self.ad_brt_bin_coarse = None
        self.ad_brt_target_nb_beads = None
        self.ad_brt_bin_ali = None
        self.ad_brt_start = None
        self.ad_brt_end = None

        self.ba_run_motioncor = None
        self.ba_run_ctffind = None
        self.ba_run_stack = None
        self.ba_run_batchruntomo = None
        self.ba_run_onthefly = None
        self.ad_run_overwrite = None
        self.ad_run_nb = None  # see self._set_stack

        self.hidden_oft_gpu = None  # see Metadata._get_gpu_id
        self.hidden_mc_ftbin = None  # see self.set_pixelsize
        self.hidden_queue_filename = 'toolbox_stack_processed.txt'
        self.hidden_run_nb = None  # just for warning, remember positive restriction

    def get_inputs(self):
        """Update the attr: using the inputs, either from interactive or from input file."""
        self.check_dependency('imod')

        # catch the parameters from an input file
        # user_inputs is a dict
        if self.cmd_line.input:
            print(f'{TAB}- Mode: Using inputs from {Colors.bold}{self.cmd_line.input}{Colors.reset}\n')
            user_inputs = self._get_inputs_from_file()
        else:
            print(f'{TAB}- Mode: Interactive.\n')
            interactor = InputInteractive(self.defaults, self.cmd_line.advanced)
            user_inputs = interactor.get_inputs()

        # update the attributes with collected inputs
        for key, value in user_inputs.items():
            setattr(self, key, value)
        self.hidden_oft_gpu = self.ba_mc_gpu

        self._check_inputs()

    def set_bin_coarsed(self):
        """
        Set the binning used for coarsed alignment.
        The idea is to have a gold bead equals to 10-15 pixels.
        """
        if self.ad_brt_bin_coarse == 'auto':
            try:
                self.ad_brt_bin_coarse = round(float(self.ba_brt_gold_size) /
                                               (12.5 * (self.ba_mc_desired_pixelsize / 10)))
            except TypeError:
                raise

    def set_pixelsize(self, meta=None):
        """
        Modify pixel size if necessary, as well as hidden_mc_ftbin.

        :type meta:    DataFrame or None
        :param meta:   Metadata. Must contain raw, nb and tilt.

        If meta = None, convert current and desired pixel sizes to floats if possible.
        If meta = DataFrame, extract the current pixel size from header, adjust the desired
            pixel size if necessary ('current' or 'ps_x2').

        In any case, try to compute hidden_mc_ftbin.
        """
        if meta is not None and self.ba_set_pixelsize == 'header':
            # ba_set_pixelsize can only be a float or 'header'.
            # In the later, extract pixel size from header.
            self.ba_set_pixelsize = self._get_pixel_size_from_meta(meta)

        # first call has to go here
        else:
            if self.ba_set_pixelsize != 'header':
                try:
                    self.ba_set_pixelsize = float(self.ba_set_pixelsize)
                except ValueError:
                    raise ValueError(f"Pixel: 'ba_set_pixelsize' should be a float or 'header'.")

            # set desired pixel size
            if self.ba_mc_desired_pixelsize not in ('ps_x2', 'current'):
                try:
                    self.ba_mc_desired_pixelsize = float(self.ba_mc_desired_pixelsize)
                except ValueError:
                    raise ValueError(
                        f"Pixel: 'ba_mc_desired_pixelsize' should be a float or 'current' or 'ps_x2'.")

        self._set_desired_pixelsize()
        self._set_ftbin()

    def save2logfile(self):
        """Save the inputs to the log file."""
        inputs = '\n\t'.join(f'{key}: {value}' for key, value in self.__dict__.items()
                             if 'ba_' in key or 'ad_' in key)
        with open(self.ad_path_logfile, 'a') as log_file:
            log_file.write(f'Toobox version {VERSION}.\n'
                           f"Using parameters:\n\t{inputs}\n\n")

    def warnings(self):
        """Warn the user about specific run settings."""
        if not self.ba_run_motioncor:
            logger(f"{Colors.r}WARNING: MotionCor deactivated.\n"
                   f"'ba_set_field_nb', 'ba_set_field_tilt' must match the motion corrected images.{Colors.reset}\n")
        if not self.ba_run_stack and self.ba_run_batchruntomo:
            logger(f"{Colors.r}WARNING: Newstack deactivated.\n"
                   f"Your stacks must be as followed: <ad_path_stacks>/stack<nb>/<ba_prefix2add>_<nb>.st, "
                   f"with <nb> being the 3 digit stack number (padded with zeros).{Colors.reset}\n")
        if self.ba_run_onthefly and self.hidden_run_nb:
            logger(f"{Colors.r}WARNING: On-the-fly mode activated with positive restriction (--nb/ad_run_nb).\n"
                   f"Positive restrictions are currently ignored in this mode.{Colors.reset}\n")

    def create_input_file(self):
        """Write an input file using the default parameters."""
        date_created = "Created: {0:%d-%b-%Y} | {0:%H:%M}".format(datetime.now())
        header = ('-' * 60 +
                  f'\nToolbox version {VERSION}.' +
                  f"\nParameters: {'advanced' if self.cmd_line.advanced else 'basic'}"
                  f'\n{date_created}\n\n' +
                  'Description:\n')

        # hide advanced parameters by defaults
        if self.cmd_line.advanced:
            gen_descriptor = (self.defaults[i:i + 4] for i in range(0, len(self.defaults), 4))
        else:
            gen_descriptor = (self.defaults[i:i + 4] for i in range(0, len(self.defaults), 4)
                              if 'ba_' in self.defaults[i])

        description, parameters = '', ''
        for _key, _type, _comment, _value in gen_descriptor:
            left = f'{_key} ({_type})'
            if len(_comment) > 60:
                right = self._format_input_description(_comment, left=40, right=60)
            else:
                right = f'{_comment}'
            description += f'{left:<38}: {right}\n'

            parameters += f'{_key}={_value}\n'

        input_file = f"{header + description}{'-' * 60}\n\n"
        input_file += f"Parameters: (param=value, no whitespace; in-line comments are OK)\n{parameters}\n"

        with open(self.cmd_line.create_input_file, 'w') as f:
            f.write(input_file)

        print(f"Input file '{Colors.bold}{self.cmd_line.create_input_file}{Colors.reset}' was created.\n"
              f"Closing.")
        exit()

    @staticmethod
    def check_dependency(program):
        """If program (str) is not in PATH, raise OSError."""
        if not any(os.access(os.path.join(path, program), os.X_OK)
                   for path in os.environ["PATH"].split(os.pathsep)):
            raise OSError(f'Check dependency: {program} needs to be in PATH...\n')

    @staticmethod
    def _format_input_description(description, left, right):
        """Wrap string within left and right padding."""
        final_string = ''
        string = ''
        for i in description.split():
            size = len(string) + len(i)
            if size <= left + right:
                string += ' ' + i if string else i
            else:
                pad = ' ' * left
                final_string += '\n' + pad + string if final_string else string
                string = i

        if string:
            pad = ' ' * left
            final_string += '\n' + pad + string if final_string else string

        return final_string

    @staticmethod
    def _get_command_line():
        """Parse the command line."""
        parser = argparse.ArgumentParser(prog='CET Toolbox',
                                         description='Program helping with CET data pre-processing.')

        # create_input_file OR parse input file
        parser_group = parser.add_mutually_exclusive_group()
        parser_group.add_argument('-i', '--input',
                                  nargs='?',
                                  type=str,
                                  help='Input file containing the parameters.')

        parser_group.add_argument('-c', '--create_input_file',
                                  nargs='?',
                                  const=f"Toolbox_inputs_{datetime.now():%d%b%Y}.txt",
                                  type=str,
                                  help='Create an input file from the default parameters.')

        # overwrite inputs from command line
        parser.add_argument('--fly',
                            nargs='?',
                            const=True,
                            help='Enable on-the-fly processing.')
        parser.add_argument('--logfile',
                            nargs='?',
                            type=str,
                            help='Log file name.')
        parser.add_argument('--nb',
                            type=str,
                            nargs='?',
                            help='Stack number(s) to process. Integer or a list of (optionnaly zero padded) '
                                 'integers separated by comas.')
        parser.add_argument('--overwrite',
                            nargs='?',
                            const=True,
                            help='Ignore previous processing and overwrite everything.')
        parser.add_argument('--adoc',
                            type=str,
                            nargs='?',
                            help='Batchruntomo adoc file.')
        parser.add_argument('--advanced',
                            const=True,
                            nargs='?',
                            help='Use with -c to create an input file with advanced parameters.')

        parser.add_argument('--version',
                            action='version',
                            version=f'%(prog)s {VERSION}',
                            help="Show program's version.")

        return parser.parse_args()

    def _get_inputs_from_file(self):
        """Extract the inputs from an input file."""
        try:
            with open(self.cmd_line.input, 'r') as f:
                lines = f.readlines()
        except IOError as err:
            raise IOError(f'Collect inputs from file: {err}')

        # Remove the header to be sure it is not going to be parsed
        head = 0
        for i, line in enumerate(lines):
            if '------' in line:
                head = i
        lines = lines[head:]

        # parse
        # empty parameters are accepted
        inputs_dict = {}
        r = re.compile(r'^\w+=(\S+|\s)')
        for line in lines:
            for m in r.finditer(line):
                key, value = m.group().split('=')
                inputs_dict[key] = value

        # make sure ba_ parameters are there and set ad_ parameters to default if not set.
        if inputs_dict != {}:
            inputs_dict = self._get_inputs_from_file_cleaning(inputs_dict)
        else:
            raise Exception(f'Collect inputs from file: No parameter detected in {self.cmd_line.input}.')
        return inputs_dict

    def _get_inputs_from_file_cleaning(self, inputs_dict):
        """Make sure required parameters are there and get default value for missing optional parameters."""
        param_key = (self.defaults[key] for key in range(0, len(self.defaults), 4))
        param_value = (self.defaults[value] for value in range(3, len(self.defaults) + 1, 4))

        for key, def_value in zip(param_key, param_value):
            if key not in inputs_dict:
                if 'ba_' in key:
                    raise ValueError(f'Collect inputs from file: {key} is missing.')
                else:
                    inputs_dict[key] = def_value
        return inputs_dict

    def _check_inputs(self):
        """Few sanity checks and format required inputs."""
        head = 'Check inputs:'

        # convert to int
        for _input in ('ba_set_field_nb',
                       'ba_set_field_tilt',
                       'ba_mc_jobs_per_gpu',
                       'ad_set_max_cpus'):
            try:
                setattr(self, _input, int(getattr(self, _input)))
            except ValueError:
                raise ValueError(f"{head} {_input} must be an integer.")

        # convert to bool
        for _input in ('ba_run_motioncor',
                       'ba_run_ctffind',
                       'ba_run_stack',
                       'ba_run_batchruntomo',
                       'ba_run_onthefly',
                       'ad_run_overwrite',
                       'ba_mc_tif'):
            try:
                setattr(self, _input, bool(int(getattr(self, _input))))
            except ValueError:
                raise ValueError(f'{head} {_input} must be a boolean.')

        self._check_inputs_priority()
        self._set_stack()
        self.set_pixelsize()

        if self.ba_run_motioncor and not glob(f'{self.ba_path_raw}/'):
            raise NotADirectoryError(f"{head} ba_path_raw ({self.ba_path_raw}) not found.")
        if self.ba_set_prefix2add == '':
            raise ValueError(f'{head} {self.ba_set_prefix2add} should not be empty.')

    def _check_inputs_priority(self):
        """
        When the command line gives access to an input,
        make sure it overwrites the inputs.
        """
        if self.cmd_line.fly:
            self.ba_run_onthefly = self.cmd_line.fly

        if self.cmd_line.logfile:
            self.ad_path_logfile = self.cmd_line.logfile

        if self.cmd_line.overwrite:
            self.ad_run_overwrite = self.cmd_line.overwrite

        if self.cmd_line.nb:
            self.ad_run_nb = self.cmd_line.nb

    def _set_stack(self):
        """
        ad_run_nb can be modified by the command line (--nb) or by the inputs (file or interactive).
        It will be read by Metadata, which expects a list of int or empty list.

        At this point, ad_run_nb is a string.
        If 'all', all the available stacks should be selected: Set it to [].
        If string of integers (separated by comas), restricts to specific stacks:
            Convert str of int -> list of int.

        NB: In addition to these possible restrictions, Metadata._exclude_queue can add a negative (process
            everything except these ones) priority restriction using the tool_processed.queue file.

        NB: When on-the-fly, self.ad_run_nb will be set to the queue.
        """
        tmp = []
        if isinstance(self.ad_run_nb, str) and self.ad_run_nb != 'all':
            if not self.ba_run_onthefly:
                try:
                    for nb in self.ad_run_nb.split(','):
                        tmp.append(int(nb))
                except ValueError:
                    raise ValueError("Restrict stack: ad_run_nb must be 'all' "
                                     "or list of integers separated by comas.")
            # just for self.warning
            else:
                self.hidden_run_nb = self.ad_run_nb
        self.ad_run_nb = tmp

    @staticmethod
    def _get_pixelsize_header(mrc_filename):
        """Parse the header and compute the pixel size."""
        size = 4
        offsets = [0, 4, 8, 40, 44, 48]
        structs = '3i3f'
        header = b''

        with open(mrc_filename, 'rb') as f:
            for offset in offsets:
                f.seek(offset)
                header += f.read(size)

        [nx, ny, nz,
         cellax, cellay, cellaz] = struct.unpack(structs, header)

        # make sure the pixel size is the same for each axis
        px, py, pz = cellax / nx, cellay / ny, cellaz / nz
        if math.isclose(px, py, rel_tol=1e-4) and math.isclose(px, pz, rel_tol=1e-4):
            return px
        else:
            raise Exception(f'Extract pixel size: {mrc_filename} has different pixel sizes in x, y and z.')

    def _get_pixel_size_from_meta(self, meta):
        """
        Catch the pixel from header.
        Check only for lowest tilt of every stack and make sure to
        have the same pixel size for every stacks.
        """
        pixelsizes = []
        for stack in meta['nb'].unique():
            meta_stack = meta[meta['nb'] == stack]
            image = meta_stack['raw'].loc[meta_stack['tilt'].abs().idxmin(axis=0)]

            pixelsizes.append(self._get_pixelsize_header(image))

        if len(set(pixelsizes)) == 1:
            return pixelsizes[0]
        else:
            raise Exception('Set pixel size: more than one pixel size was detected. '
                            'It is not supported at the moment, so stop here...')

    def _set_desired_pixelsize(self):
        """If the desired pixel size rely on the current pixel size, update it."""
        if not isinstance(self.ba_set_pixelsize, str):
            if not self.ba_run_motioncor:
                self.ba_mc_desired_pixelsize = self.ba_set_pixelsize
            else:
                if self.ba_mc_desired_pixelsize == 'ps_x2':
                    self.ba_mc_desired_pixelsize = self.ba_set_pixelsize * 2
                elif self.ba_mc_desired_pixelsize == 'current':
                    self.ba_mc_desired_pixelsize = self.ba_set_pixelsize

    def _set_ftbin(self):
        """Compute Ftbin for MotionCor2."""
        if isinstance(self.ba_set_pixelsize, float) and isinstance(self.ba_mc_desired_pixelsize, float):
            self.hidden_mc_ftbin = self.ba_mc_desired_pixelsize / self.ba_set_pixelsize


class InputInteractive:
    """Ask the user to set the required parameters."""

    def __init__(self, defaults, advanced):
        self.defaults = defaults
        self.advanced = advanced
        self.padding_allowed = 55
        self.trimming_help = 5

    def get_inputs(self):
        """
        Ask the user to enter the inputs one by one.
        The default parameters will be suggested and used if nothing else is given.
        Entering '+' will display the description for this parameter.

        TODO: Live input checks?
        """
        # extract the parameters from the defaults
        descriptor_param = [self.defaults[i:i + 4] for i in range(0, len(self.defaults), 4)]

        all_inputs = {}
        print("Tip: answering '+' will display the parameter description.")
        print('--- Project ---')
        all_inputs.update(self._get_inputs_collector(
            filter(lambda x: x[0].split('_')[1] == 'set', descriptor_param)))

        print('--- Paths ---')
        all_inputs.update(self._get_inputs_collector(
            filter(lambda x: x[0].split('_')[1] == 'path', descriptor_param)))

        print('--- Steps ---')
        all_inputs.update(self._get_inputs_collector(
            filter(lambda x: x[0].split('_')[1] == 'run', descriptor_param)))

        # Run only the steps that were selected by the user.
        functions = [['--- MotionCor ---', 'ba_run_motioncor', 'mc'],
                     ['--- Ctffind ---', 'ba_run_ctffind', 'ctf'],
                     ['--- Batchruntomo ---', 'ba_run_batchruntomo', 'brt'],
                     ['--- On-the-fly ---', 'ba_run_onthefly', 'otf']]
        for func in functions:
            if all_inputs[func[1]] == '1' or all_inputs[func[1]] == 'True':
                print(func[0])
                all_inputs.update(self._get_inputs_collector(
                    filter(lambda x: x[0].split('_')[1] == func[2], descriptor_param)))
            else:
                all_inputs.update(self._get_inputs_collector(
                    filter(lambda x: x[0].split('_')[1] == func[2], descriptor_param),
                    use_default=True))

        return all_inputs

    def _get_inputs_collector(self, list_of_parameters, use_default=False):
        """
        Format a list of parameters to input.

        :param list_of_parameters:      Catch user inputs for these parameters.
        :param use_default:             If True, shortcut, use default.
                                        No need to ask the user as these parameters will not be used.
        :return:                        Dictionary gathering the answer for each parameter.
        """
        # For some reason pycharm doesn't like to feed a list of pairs to dict(). Tsss.
        # So create dict directly.
        answers = {}
        if use_default:
            answers.update({param[0]: param[3] for param in list_of_parameters})
            return answers

        for parameter in list_of_parameters:
            if not self.advanced and 'ad_' in parameter[0]:
                answer = parameter[3]
            else:
                answer = input(self._format_input_header(parameter))
                if not answer:
                    answer = parameter[3]
                elif answer == '+':
                    answer = input(self._format_input_help(parameter[2]))
                    if not answer:
                        answer = parameter[3]

            answers[parameter[0]] = answer
        return answers

    def _format_input_header(self, parameter):
        """
        For a given parameter, format the string to print.
        Format:
        <param_name> (<param_type>) [<param_default>]:
        """
        space = len(parameter[0] + parameter[1] + parameter[3]) + 6
        padding_left = self.padding_allowed - space
        if padding_left > 0:
            newline_or_space = ' '
        else:
            newline_or_space = '\n'
            padding_left = self.padding_allowed - len(parameter[3]) - 2

        def_value = f'{Colors.g}[{parameter[3]}]{Colors.reset}'
        param = f'{Colors.b}{parameter[0]}{Colors.reset}'

        return f"{param} ({parameter[1]}){newline_or_space}{def_value}{' ' * padding_left}: "

    def _format_input_help(self, param_help):
        """Wrap string within a self.padding_allowed - self.trimming_help."""
        final_string = ''
        string = ''
        for i in param_help.split():
            size = len(string) + len(i)
            if size < self.padding_allowed - self.trimming_help:
                string += ' ' + i if string else i
            else:
                pad = ' ' * (self.padding_allowed - size)
                final_string += '\n' + string + pad if final_string else string + pad
                string = i

        if string:
            pad = ' ' * (self.padding_allowed - len(string))
            final_string += '\n' + string + pad if final_string else string + pad

        return final_string + ': '


class OnTheFly:
    """When the microscope is done with a stack, send it to pre-processing."""

    def __init__(self, inputs):
        self.path = inputs.ba_path_raw if inputs.ba_run_motioncor else inputs.ad_path_motioncor
        self.prefix = inputs.ba_set_prefix2look4
        self.extension = 'tif' if inputs.ba_mc_tif and inputs.ba_run_motioncor else 'mrc'
        self.field_nb = inputs.ba_set_field_nb

        # refresh
        self.time_between_checks = 5

        # Queue of stacks. Processed stacks are saved and not reprocessed.
        self.queue_filename = inputs.hidden_queue_filename
        self.processed = self._exclude_queue() if not inputs.ad_run_overwrite else []
        self.queue = None

        # tolerate some inactivity
        self.buffer = 0
        try:
            self.buffer_tolerance_sec = int(inputs.ba_otf_max_time2try) * 60
        except ValueError as err:
            raise ValueError(f'On-the-fly: {err}')
        self.buffer_tolerance = self.buffer_tolerance_sec // self.time_between_checks

        # catch the available raw files
        self.data = None
        self.data_stacks_available = None
        self.data_current = None

        # compute the current stack
        self.stack_current = None
        self.len_current_stack_previous_check = 0
        self.len_current_stack = 0
        try:
            self.len_expected = int(inputs.ba_otf_max_images_per_stack)
        except ValueError as err:
            raise ValueError(f'On-the-fly: {err}')

    def run(self, inputs):
        """
        On-the-fly: run preprocessing while data is being written...

        How does it works:
            (loop every n seconds).
            1) Catch mrc|tif files in path (raw or motioncor).
            2) Group the files in tilt-series.
            3) Split stack in two: old stack and current stack. If only one stack, old stack is not defined.
                - old stack: added to the queue if not already processed.
            4) Decide if current stack is finished or not. If so, add to the queue if not already processed.
            5) Send the queue to pre-processing and clear the queue.

        Current stack:
            - The program use a buffer to "remember" how long it's been since the last change in the raw files.
            - When a tilt-series is being written, the buffer will be reset every time a new image (from the
              same stack) is detected. Therefore, the program will tolerate having nothing to send to
              processing for a long time (a tilt-series can be acquired in more than 40min).
            - The user has to specify the expected number of images per stack. The program will send the stack
              to processing if this number is reached.
              NB:   When a tilt-series is send to processing, the program tag this stack as processed and
                    will no longer touch it.
                    If a stack has more images than expected, it becomes ambiguous so the program will stop
                    by raising an AssertationError.

            - Stack with less images than expected:
                - If new images of this stack are detected (the microscope is doing the acquisition of this stack),
                  then the program will wait for it to finish.
                - The tolerated time between images is set by the user. If nothing is written after this
                  tolerated time of inactivity, the tilt-series is send to processing (no matter the number
                  of images) and the program stops. It is the only way I found to process the last tilt-series
                  of an acquisition with missing images...

        NB: If an old stack has less images than expected, the program should handle this without any difficulty.
        NB: Stack that are already processed (toolbox_stack_processed.txt) are already in self.processed (__init__),
            so this function will not send them to pre-processing.
        NB: --stack is ignored: positive selection cannot be used with --fly
        """
        running = True
        while running:
            print(f"\rFly: Buffer = "
                  f"{round(self.buffer * self.time_between_checks)} /{self.buffer_tolerance_sec}sec",
                  end='')

            time.sleep(self.time_between_checks)
            self._get_files()

            # the goal is to identify the tilt-series that are finished and register them in this list
            self.queue = []

            # split the files into the current stack and old stacks if any
            len_avail = len(self.data_stacks_available)
            if len_avail == 1:
                self.stack_current = self.data_stacks_available[0]
            elif len_avail > 1:
                self._get_old_stacks()
            else:
                self.buffer += 1
                if self.buffer == self.buffer_tolerance:
                    running = False
                continue

            # it is more tricky to know what to do with the last stack
            self._analyse_last_stack()

            # if the buffer reaches the limit, it means nothing is happening for too long, so stop
            if self.buffer == self.buffer_tolerance:
                if self.stack_current not in self.processed:
                    self.queue.append(self.stack_current)
                    self.processed.append(self.stack_current)
                running = False

            # send to preprocessing
            if self.queue:
                print('\n')
                inputs.ad_run_nb = self.queue
                preprocessing(inputs)

            # reset the length if necessary
            self.len_current_stack_previous_check = self.len_current_stack

    def _exclude_queue(self):
        """Extract the stacks already processed from inputs.ba_hidden_queue_filename."""
        try:
            with open(self.queue_filename, 'r') as f:
                remove_stack = f.readlines()

                list2remove = []
                for line in remove_stack:
                    line = [int(i) for i in line.strip('\n').strip(' ').strip(':').split(':') if i != '']
                    list2remove += line
                return list2remove

        except IOError:
            # first time running
            return []

    @staticmethod
    def _set_ordered(iterable2clean):
        """Remove redundant values while preserving the order."""
        cleaned = []
        for item in iterable2clean:
            if item not in cleaned:
                cleaned.append(item)

        return cleaned

    def _get_files_number(self, file):
        filename_split = file.split('/')[-1].split('_')
        return int(''.join(i for i in filename_split[self.field_nb] if i.isdigit()))

    def _get_files(self):
        """
        Catch the raw files in path, order them by time of writing and set the
        number of the stack.
        """
        files = sorted(glob(f'{self.path}/{self.prefix}*.{self.extension}'), key=os.path.getmtime)
        self.data = pd.DataFrame(dict(raw=files))
        self.data['nb'] = self.data['raw'].map(self._get_files_number)
        self.data_stacks_available = self._set_ordered(self.data['nb'])

    def _get_old_stacks(self):
        """
        At this point, we know there is more than one stack.
        Therefore, old stack are finished and can be processed.
        """
        stack_current = self.data_stacks_available[-1]
        if stack_current != self.stack_current:
            self.len_current_stack_previous_check = 0
        self.stack_current = stack_current