fiberseq · mtcicero26 · May 7, 2026 · May 7, 2026 · May 7, 2026 · May 7, 2026
diff --git a/docs/REFACTOR_AUDIT.md b/docs/REFACTOR_AUDIT.md
diff --git a/fiberhmm/__init__.py b/fiberhmm/__init__.py
@@ -5,6 +5,16 @@
 
 __version__ = "2.11.0"
 
-from fiberhmm.core.hmm import FiberHMM
 from fiberhmm.core.bam_reader import ContextEncoder, FiberRead, read_bam
-from fiberhmm.core.model_io import load_model, save_model, load_model_with_metadata
+from fiberhmm.core.hmm import FiberHMM
+from fiberhmm.core.model_io import load_model, load_model_with_metadata, save_model
+
+__all__ = [
+    "ContextEncoder",
+    "FiberHMM",
+    "FiberRead",
+    "load_model",
+    "load_model_with_metadata",
+    "read_bam",
+    "save_model",
+]
diff --git a/fiberhmm/cli/apply.py b/fiberhmm/cli/apply.py
@@ -4,22 +4,23 @@
 Applies trained HMM to call chromatin footprints from fiber-seq BAM files.
 """
 
+import argparse
 import os
 import sys
-import glob
-import shutil
-import tempfile
-import argparse
-import numpy as np
+
 import pandas as pd
 
-from fiberhmm.core.model_io import load_model_with_metadata
-from fiberhmm.inference.parallel import process_bam_for_footprints
-from fiberhmm.inference.stats import FootprintStats, collect_stats_from_bam
 from fiberhmm.cli.common import (
-    add_mode_args, add_filter_args, add_edge_trim_args,
-    add_parallel_args, add_stats_args, add_version_args,
+    add_edge_trim_args,
+    add_filter_args,
+    add_mode_args,
+    add_parallel_args,
+    add_stats_args,
+    add_version_args,
 )
+from fiberhmm.core.model_io import load_model_with_metadata
+from fiberhmm.inference.parallel import process_bam_for_footprints
+from fiberhmm.inference.stats import collect_stats_from_bam
 
 
 def parse_args():
@@ -178,7 +179,7 @@ def main():
     # Load model with metadata
     print(f"Loading model from {model_path}")
     model, model_context_size, model_mode = load_model_with_metadata(model_path)
-    print(f"Model loaded successfully")
+    print("Model loaded successfully")
     print(f"  Start probs: {model.startprob_}")
     print(f"  Transition matrix:\n{model.transmat_}")
 
@@ -206,9 +207,9 @@ def main():
     # Show optimization status
     from fiberhmm.core.hmm import HAS_NUMBA
     if HAS_NUMBA:
-        print(f"  Numba JIT: enabled (fast)")
+        print("  Numba JIT: enabled (fast)")
     else:
-        print(f"  Numba JIT: disabled (pip install numba for ~10x speedup)")
+        print("  Numba JIT: disabled (pip install numba for ~10x speedup)")
 
     # Determine context size (command line overrides model)
     if args.context_size is not None:
@@ -274,21 +275,21 @@ def main():
     print(f"  Min MAPQ: {args.min_mapq}")
     print(f"  Mod prob threshold: {args.prob_threshold}/255")
     if args.circular:
-        print(f"  Circular mode: enabled")
+        print("  Circular mode: enabled")
     if with_scores:
-        print(f"  Confidence scores: enabled")
+        print("  Confidence scores: enabled")
     if args.scores_db:
         print(f"  Scores database: {db_path}")
     if mode == 'daf':
-        print(f"  Strand detection: automatic (C=+, G=-)")
+        print("  Strand detection: automatic (C=+, G=-)")
     elif mode == 'nanopore-fiber':
-        print(f"  Strand detection: none (A-centered only)")
+        print("  Strand detection: none (A-centered only)")
     if args.no_msps:
-        print(f"  MSP output: disabled (--no-msps)")
+        print("  MSP output: disabled (--no-msps)")
     else:
         print(f"  MSP min size: {msp_min_size} bp")
     if args.stats:
-        print(f"  Stats: enabled")
+        print("  Stats: enabled")
     print()
 
     # Parse chromosomes
@@ -394,7 +395,7 @@ def main():
         print("\nDone!", file=sys.stderr)
     else:
         print("\nDone!")
-        print(f"\nTo extract BED12/bigBed for browser visualization:")
+        print("\nTo extract BED12/bigBed for browser visualization:")
         print(f"  fiberhmm-extract-tags -i {output_bam}")
 
 

diff --git a/fiberhmm/cli/call.py b/fiberhmm/cli/call.py
@@ -22,16 +22,16 @@
   fiberhmm-call -i in.bam -o - --enzyme hia5 --seq pacbio | ft fire - -
 """
 import argparse
-import os
 import sys
 
 from fiberhmm.core.model_io import load_model_with_metadata
-from fiberhmm.models import SUPPORTED_ENZYMES, get_model_path as _get_bundled_model
 from fiberhmm.inference.parallel import (
-    _process_bam_streaming_pipeline_fused,
     _process_bam_region_parallel_fused,
+    _process_bam_streaming_pipeline_fused,
 )
 from fiberhmm.inference.tf_recaller import ENZYME_PRESETS
+from fiberhmm.models import SUPPORTED_ENZYMES
+from fiberhmm.models import get_model_path as _get_bundled_model
 
 
 def parse_args():

diff --git a/fiberhmm/cli/common.py b/fiberhmm/cli/common.py
@@ -27,9 +27,9 @@ def add_filter_args(parser: argparse.ArgumentParser,
     """Add read filtering arguments (--min-mapq, --prob-threshold, --min-read-length)."""
     parser.add_argument(
         '--min-mapq', '-q', type=int, default=min_mapq,
-        help=f"Minimum mapping quality; reads below this are written to output "
-             f"unchanged without footprint/nucleosome tags. Default 0 (call on "
-             f"all mapped reads). Pass a positive value to filter."
+        help="Minimum mapping quality; reads below this are written to output "
+             "unchanged without footprint/nucleosome tags. Default 0 (call on "
+             "all mapped reads). Pass a positive value to filter."
     )
     parser.add_argument(
         '--prob-threshold', type=int, default=prob_threshold,

diff --git a/fiberhmm/cli/consensus_tfs.py b/fiberhmm/cli/consensus_tfs.py
@@ -39,11 +39,18 @@
 from scipy.ndimage import gaussian_filter1d
 from scipy.signal import find_peaks
 
-from fiberhmm.io.ma_tags import parse_ma_tag, parse_aq_array
-
+from fiberhmm.io.ma_tags import parse_aq_array, parse_ma_tag
 
 # ── Query → reference coordinate conversion ──────────────────────────────────
 
+def _ref_interval_from_map(ref_map, q_start: int, length: int) -> Tuple[int, int] | Tuple[None, None]:
+    """Return half-open reference coordinates for a query interval."""
+    mapped = [p for p in ref_map[q_start: q_start + length] if p is not None]
+    if not mapped:
+        return None, None
+    return mapped[0], mapped[-1] + 1   # half-open [start, end)
+
+
 def _ref_interval(read: pysam.AlignedSegment,
                   q_start: int, length: int) -> Tuple[int, int] | Tuple[None, None]:
     """Return (ref_start, ref_end) in half-open BED format for a query interval.
@@ -52,11 +59,7 @@ def _ref_interval(read: pysam.AlignedSegment,
     and takes min/max of non-None entries.  Returns (None, None) if the entire
     footprint falls inside an insertion relative to the reference (rare).
     """
-    ref_map = read.get_reference_positions(full_length=True)
-    mapped = [p for p in ref_map[q_start: q_start + length] if p is not None]
-    if not mapped:
-        return None, None
-    return mapped[0], mapped[-1] + 1   # half-open [start, end)
+    return _ref_interval_from_map(read.get_reference_positions(full_length=True), q_start, length)
 
 
 # ── TF call extraction ────────────────────────────────────────────────────────
@@ -73,7 +76,7 @@ def _iter_tf_calls(read: pysam.AlignedSegment,
     except KeyError:
         return
     try:
-        aq = list(read.get_tag('AQ'))
+        aq = read.get_tag('AQ')
     except KeyError:
         aq = []
 
@@ -91,6 +94,7 @@ def _iter_tf_calls(read: pysam.AlignedSegment,
     # ann_idx must increment for ALL annotations (nuc, msp, tf) to stay
     # in sync with the flat per_ann list built by parse_aq_array.
     ann_idx = 0
+    ref_map = None
     for name, _strand_field, _qspec, intervals in parsed['raw_types']:
         for (q_start, length) in intervals:
             quals = per_ann[ann_idx] if ann_idx < len(per_ann) else []
@@ -100,7 +104,9 @@ def _iter_tf_calls(read: pysam.AlignedSegment,
             tq = int(quals[0]) if len(quals) >= 1 else 0
             if tq < min_tq:
                 continue
-            ref_start, ref_end = _ref_interval(read, q_start, length)
+            if ref_map is None:
+                ref_map = read.get_reference_positions(full_length=True)
+            ref_start, ref_end = _ref_interval_from_map(ref_map, q_start, length)
             if ref_start is None:
                 continue
             yield ref_start, ref_end, strand, tq
@@ -186,8 +192,10 @@ def _process_chrom(chrom: str, chrom_len: int,
             continue
         idx = int(np.searchsorted(peak_pos, center))
         candidates: list[tuple[int, int]] = []
-        if idx > 0:              candidates.append((int(peaks_arr[idx - 1]), idx - 1))
-        if idx < len(peaks_arr): candidates.append((int(peaks_arr[idx]),     idx))
+        if idx > 0:
+            candidates.append((int(peaks_arr[idx - 1]), idx - 1))
+        if idx < len(peaks_arr):
+            candidates.append((int(peaks_arr[idx]), idx))
         if not candidates:
             continue
         closest_val, closest_idx = min(candidates, key=lambda x: abs(x[0] - center))
@@ -269,7 +277,9 @@ def main() -> None:
         bed_out = open(args.output, 'w')
 
     try:
-        _banner = lambda msg: print(f"[fiberhmm-consensus-tfs] {msg}", file=sys.stderr)
+        def _banner(msg):
+            print(f"[fiberhmm-consensus-tfs] {msg}", file=sys.stderr)
+
         _banner(f"input:         {args.input}")
         _banner(f"min-mapq:      {args.min_mapq}")
         _banner(f"min-tq:        {args.min_tq}")

diff --git a/fiberhmm/cli/daf_encode.py b/fiberhmm/cli/daf_encode.py
@@ -5,7 +5,6 @@
 and encodes them as IUPAC Y/R with an st:Z tag for fiberhmm-apply --mode daf.
 """
 
-import sys
 import argparse
 
 from fiberhmm.cli.common import add_version_args

diff --git a/fiberhmm/cli/export_posteriors.py b/fiberhmm/cli/export_posteriors.py
@@ -21,26 +21,31 @@
 
 import argparse
 import os
-import sys
 import time
+from concurrent.futures import ProcessPoolExecutor
+from typing import Dict, List, Optional, Set, Tuple
+
 import numpy as np
-from typing import Dict, List, Optional, Tuple, Set
-from concurrent.futures import ProcessPoolExecutor, as_completed
 import pysam
 from tqdm import tqdm
 
+from fiberhmm.cli.common import (
+    add_edge_trim_args,
+    add_mode_args,
+    add_parallel_args,
+    add_verbose_args,
+    add_version_args,
+)
+
 # Package imports
 from fiberhmm.core.bam_reader import (
-    encode_from_query_sequence, detect_daf_strand,
-    get_reference_positions, ContextEncoder
+    detect_daf_strand,
+    encode_from_query_sequence,
+    get_reference_positions,
 )
-from fiberhmm.core.model_io import load_model_with_metadata
 from fiberhmm.core.hmm import FiberHMM
+from fiberhmm.core.model_io import freeze_model_for_inference, load_model_with_metadata
 from fiberhmm.inference.parallel import _get_genome_regions
-from fiberhmm.cli.common import (
-    add_mode_args, add_parallel_args, add_edge_trim_args,
-    add_verbose_args, add_version_args,
-)
 
 
 def _detect_format(output_path: str, format_arg: str) -> str:
@@ -154,14 +159,15 @@ def _init_worker(model_path: str, params: dict):
     os.environ['NUMBA_CACHE_DIR'] = ''
 
     _worker_model, _, _ = load_model_with_metadata(model_path, normalize=True)
+    _worker_model = freeze_model_for_inference(_worker_model)
     _worker_params = params
 
     # Warmup numba JIT with a dummy sequence
     dummy = np.zeros(100, dtype=np.int32)
     try:
         _worker_model.predict(dummy)
         _worker_model.predict_proba(dummy)
-    except:
+    except Exception:
         pass  # OK if warmup fails
 
 
@@ -345,9 +351,10 @@ def on_results(chrom, results):
                 fp_sizes=fiber['footprint_sizes'],
             )
 
-    _process_regions(regions, input_bam, model_path, params, n_cores, verbose, on_results)
-
-    total = writer.close()
+    try:
+        _process_regions(regions, input_bam, model_path, params, n_cores, verbose, on_results)
+    finally:
+        total = writer.close()
 
     if verbose:
         out_file = writer.output_path
@@ -603,7 +610,6 @@ def get_fibers_spanning(self, chrom: str, start: int, end: int) -> List['FiberPo
         return self._load_fibers(chrom, indices)
 
     def _load_fibers(self, chrom: str, indices: np.ndarray) -> List['FiberPosterior']:
-        import h5py
         grp = self.h5[chrom]
         ids = grp['fiber_ids']
         starts = grp['fiber_starts']