code · pull · Mar 1, 2026 · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026
diff --git a/app/frontend_management.py b/app/frontend_management.py
@@ -17,7 +17,7 @@
 import requests
 from typing_extensions import NotRequired
 
-from utils.install_util import get_missing_requirements_message, requirements_path
+from utils.install_util import get_missing_requirements_message, get_required_packages_versions
 
 from comfy.cli_args import DEFAULT_VERSION_STRING
 import app.logger
@@ -45,25 +45,7 @@ def get_installed_frontend_version():
 
 
 def get_required_frontend_version():
-    """Get the required frontend version from requirements.txt."""
-    try:
-        with open(requirements_path, "r", encoding="utf-8") as f:
-            for line in f:
-                line = line.strip()
-                if line.startswith("comfyui-frontend-package=="):
-                    version_str = line.split("==")[-1]
-                    if not is_valid_version(version_str):
-                        logging.error(f"Invalid version format in requirements.txt: {version_str}")
-                        return None
-                    return version_str
-            logging.error("comfyui-frontend-package not found in requirements.txt")
-            return None
-    except FileNotFoundError:
-        logging.error("requirements.txt not found. Cannot determine required frontend version.")
-        return None
-    except Exception as e:
-        logging.error(f"Error reading requirements.txt: {e}")
-        return None
+    return get_required_packages_versions().get("comfyui-frontend-package", None)
 
 
 def check_frontend_version():
@@ -217,25 +199,7 @@ def get_installed_templates_version(cls) -> str:
 
     @classmethod
     def get_required_templates_version(cls) -> str:
-        """Get the required workflow templates version from requirements.txt."""
-        try:
-            with open(requirements_path, "r", encoding="utf-8") as f:
-                for line in f:
-                    line = line.strip()
-                    if line.startswith("comfyui-workflow-templates=="):
-                        version_str = line.split("==")[-1]
-                        if not is_valid_version(version_str):
-                            logging.error(f"Invalid templates version format in requirements.txt: {version_str}")
-                            return None
-                        return version_str
-                logging.error("comfyui-workflow-templates not found in requirements.txt")
-                return None
-        except FileNotFoundError:
-            logging.error("requirements.txt not found. Cannot determine required templates version.")
-            return None
-        except Exception as e:
-            logging.error(f"Error reading requirements.txt: {e}")
-            return None
+        return get_required_packages_versions().get("comfyui-workflow-templates", None)
 
     @classmethod
     def default_frontend_path(cls) -> str:

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
@@ -146,6 +146,7 @@ def from_string(cls, value: str):
 
 parser.add_argument("--async-offload", nargs='?', const=2, type=int, default=None, metavar="NUM_STREAMS", help="Use async weight offloading. An optional argument controls the amount of offload streams. Default is 2. Enabled by default on Nvidia.")
 parser.add_argument("--disable-async-offload", action="store_true", help="Disable async weight offloading.")
+parser.add_argument("--disable-dynamic-vram", action="store_true", help="Disable dynamic VRAM and use estimate based model loading.")
 
 parser.add_argument("--force-non-blocking", action="store_true", help="Force ComfyUI to use non-blocking operations for all applicable tensors. This may improve performance on some non-Nvidia systems but can cause issues with some workflows.")
 
@@ -159,7 +160,6 @@ class PerformanceFeature(enum.Enum):
     Fp8MatrixMultiplication = "fp8_matrix_mult"
     CublasOps = "cublas_ops"
     AutoTune = "autotune"
-    DynamicVRAM = "dynamic_vram"
 
 parser.add_argument("--fast", nargs="*", type=PerformanceFeature, help="Enable some untested and potentially quality deteriorating optimizations. This is used to test new features so using it might crash your comfyui. --fast with no arguments enables everything. You can pass a list specific optimizations if you only want to enable specific ones. Current valid optimizations: {}".format(" ".join(map(lambda c: c.value, PerformanceFeature))))
 
@@ -260,4 +260,4 @@ def is_valid_directory(path: str) -> str:
     args.fast = set(args.fast)
 
 def enables_dynamic_vram():
-    return PerformanceFeature.DynamicVRAM in args.fast and not args.highvram and not args.gpu_only
+    return not args.disable_dynamic_vram and not args.highvram and not args.gpu_only and not args.novram and not args.cpu
diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py
@@ -1621,3 +1621,118 @@ def block_wrap(args):
         # unpatchify
         x = self.unpatchify(x, grid_sizes)
         return x
+
+class SCAILWanModel(WanModel):
+    def __init__(self, model_type="scail", patch_size=(1, 2, 2), in_dim=20, dim=5120, operations=None, device=None, dtype=None, **kwargs):
+        super().__init__(model_type='i2v', patch_size=patch_size, in_dim=in_dim, dim=dim, operations=operations, device=device, dtype=dtype, **kwargs)
+
+        self.patch_embedding_pose = operations.Conv3d(in_dim, dim, kernel_size=patch_size, stride=patch_size, device=device, dtype=torch.float32)
+
+    def forward_orig(self, x, t, context, clip_fea=None, freqs=None, transformer_options={}, pose_latents=None, reference_latent=None, **kwargs):
+
+        if reference_latent is not None:
+            x = torch.cat((reference_latent, x), dim=2)
+
+        # embeddings
+        x = self.patch_embedding(x.float()).to(x.dtype)
+        grid_sizes = x.shape[2:]
+        transformer_options["grid_sizes"] = grid_sizes
+        x = x.flatten(2).transpose(1, 2)
+
+        scail_pose_seq_len = 0
+        if pose_latents is not None:
+            scail_x = self.patch_embedding_pose(pose_latents.float()).to(x.dtype)
+            scail_x = scail_x.flatten(2).transpose(1, 2)
+            scail_pose_seq_len = scail_x.shape[1]
+            x = torch.cat([x, scail_x], dim=1)
+            del scail_x
+
+        # time embeddings
+        e = self.time_embedding(sinusoidal_embedding_1d(self.freq_dim, t.flatten()).to(dtype=x[0].dtype))
+        e = e.reshape(t.shape[0], -1, e.shape[-1])
+        e0 = self.time_projection(e).unflatten(2, (6, self.dim))
+
+        # context
+        context = self.text_embedding(context)
+
+        context_img_len = None
+        if clip_fea is not None:
+            if self.img_emb is not None:
+                context_clip = self.img_emb(clip_fea)  # bs x 257 x dim
+                context = torch.cat([context_clip, context], dim=1)
+            context_img_len = clip_fea.shape[-2]
+
+        patches_replace = transformer_options.get("patches_replace", {})
+        blocks_replace = patches_replace.get("dit", {})
+        transformer_options["total_blocks"] = len(self.blocks)
+        transformer_options["block_type"] = "double"
+        for i, block in enumerate(self.blocks):
+            transformer_options["block_index"] = i
+            if ("double_block", i) in blocks_replace:
+                def block_wrap(args):
+                    out = {}
+                    out["img"] = block(args["img"], context=args["txt"], e=args["vec"], freqs=args["pe"], context_img_len=context_img_len, transformer_options=args["transformer_options"])
+                    return out
+                out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": e0, "pe": freqs, "transformer_options": transformer_options}, {"original_block": block_wrap})
+                x = out["img"]
+            else:
+                x = block(x, e=e0, freqs=freqs, context=context, context_img_len=context_img_len, transformer_options=transformer_options)
+
+        # head
+        x = self.head(x, e)
+
+        if scail_pose_seq_len > 0:
+            x = x[:, :-scail_pose_seq_len]
+
+        # unpatchify
+        x = self.unpatchify(x, grid_sizes)
+
+        if reference_latent is not None:
+            x = x[:, :, reference_latent.shape[2]:]
+
+        return x
+
+    def rope_encode(self, t, h, w, t_start=0, steps_t=None, steps_h=None, steps_w=None, device=None, dtype=None, pose_latents=None, reference_latent=None, transformer_options={}):
+        main_freqs = super().rope_encode(t, h, w, t_start=t_start, steps_t=steps_t, steps_h=steps_h, steps_w=steps_w, device=device, dtype=dtype, transformer_options=transformer_options)
+
+        if pose_latents is None:
+            return main_freqs
+
+        ref_t_patches = 0
+        if reference_latent is not None:
+            ref_t_patches = (reference_latent.shape[2] + (self.patch_size[0] // 2)) // self.patch_size[0]
+
+        F_pose, H_pose, W_pose = pose_latents.shape[-3], pose_latents.shape[-2], pose_latents.shape[-1]
+
+        # if pose is at half resolution, scale_y/scale_x=2 stretches the position range to cover the same RoPE extent as the main frames
+        h_scale = h / H_pose
+        w_scale = w / W_pose
+
+        # 120 w-offset and shift 0.5 to place positions at midpoints (0.5, 2.5, ...) to match the original code
+        h_shift = (h_scale - 1) / 2
+        w_shift = (w_scale - 1) / 2
+        pose_transformer_options = {"rope_options": {"shift_y": h_shift, "shift_x": 120.0 + w_shift, "scale_y": h_scale, "scale_x": w_scale}}
+        pose_freqs = super().rope_encode(F_pose, H_pose, W_pose, t_start=t_start+ref_t_patches, device=device, dtype=dtype, transformer_options=pose_transformer_options)
+
+        return torch.cat([main_freqs, pose_freqs], dim=1)
+
+    def _forward(self, x, timestep, context, clip_fea=None, time_dim_concat=None, transformer_options={}, pose_latents=None, **kwargs):
+        bs, c, t, h, w = x.shape
+        x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size)
+
+        if pose_latents is not None:
+            pose_latents = comfy.ldm.common_dit.pad_to_patch_size(pose_latents, self.patch_size)
+
+        t_len = t
+        if time_dim_concat is not None:
+            time_dim_concat = comfy.ldm.common_dit.pad_to_patch_size(time_dim_concat, self.patch_size)
+            x = torch.cat([x, time_dim_concat], dim=2)
+            t_len = x.shape[2]
+
+        reference_latent = None
+        if "reference_latent" in kwargs:
+            reference_latent = comfy.ldm.common_dit.pad_to_patch_size(kwargs.pop("reference_latent"), self.patch_size)
+            t_len += reference_latent.shape[2]
+
+        freqs = self.rope_encode(t_len, h, w, device=x.device, dtype=x.dtype, transformer_options=transformer_options, pose_latents=pose_latents, reference_latent=reference_latent)
+        return self.forward_orig(x, timestep, context, clip_fea=clip_fea, freqs=freqs, transformer_options=transformer_options, pose_latents=pose_latents, reference_latent=reference_latent, **kwargs)[:, :, :t, :h, :w]
diff --git a/comfy/model_base.py b/comfy/model_base.py
@@ -1502,6 +1502,44 @@ def __init__(self, model_config, model_type=ModelType.IMG_TO_IMG_FLOW, image_to_
         super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.WanModel)
         self.image_to_video = image_to_video
 
+class WAN21_SCAIL(WAN21):
+    def __init__(self, model_config, model_type=ModelType.FLOW, image_to_video=False, device=None):
+        super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.SCAILWanModel)
+        self.memory_usage_factor_conds = ("reference_latent", "pose_latents")
+        self.memory_usage_shape_process = {"pose_latents": lambda shape: [shape[0], shape[1], 1.5, shape[-2], shape[-1]]}
+        self.image_to_video = image_to_video
+
+    def extra_conds(self, **kwargs):
+        out = super().extra_conds(**kwargs)
+
+        reference_latents = kwargs.get("reference_latents", None)
+        if reference_latents is not None:
+            ref_latent = self.process_latent_in(reference_latents[-1])
+            ref_mask = torch.ones_like(ref_latent[:, :4])
+            ref_latent = torch.cat([ref_latent, ref_mask], dim=1)
+            out['reference_latent'] = comfy.conds.CONDRegular(ref_latent)
+
+        pose_latents = kwargs.get("pose_video_latent", None)
+        if pose_latents is not None:
+            pose_latents = self.process_latent_in(pose_latents)
+            pose_mask = torch.ones_like(pose_latents[:, :4])
+            pose_latents = torch.cat([pose_latents, pose_mask], dim=1)
+            out['pose_latents'] = comfy.conds.CONDRegular(pose_latents)
+
+        return out
+
+    def extra_conds_shapes(self, **kwargs):
+        out = {}
+        ref_latents = kwargs.get("reference_latents", None)
+        if ref_latents is not None:
+            out['reference_latent'] = list([1, 20, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16])
+
+        pose_latents = kwargs.get("pose_video_latent", None)
+        if pose_latents is not None:
+            out['pose_latents'] = [pose_latents.shape[0], 20, *pose_latents.shape[2:]]
+
+        return out
+
 class Hunyuan3Dv2(BaseModel):
     def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
         super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hunyuan3d.model.Hunyuan3Dv2)

diff --git a/comfy/model_detection.py b/comfy/model_detection.py
@@ -498,6 +498,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
             dit_config["model_type"] = "humo"
         elif '{}face_adapter.fuser_blocks.0.k_norm.weight'.format(key_prefix) in state_dict_keys:
             dit_config["model_type"] = "animate"
+        elif '{}patch_embedding_pose.weight'.format(key_prefix) in state_dict_keys:
+            dit_config["model_type"] = "scail"
         else:
             if '{}img_emb.proj.0.bias'.format(key_prefix) in state_dict_keys:
                 dit_config["model_type"] = "i2v"

diff --git a/comfy/model_management.py b/comfy/model_management.py
@@ -180,6 +180,14 @@ def is_ixuca():
         return True
     return False
 
+def is_wsl():
+    version = platform.uname().release
+    if version.endswith("-Microsoft"):
+        return True
+    elif version.endswith("microsoft-standard-WSL2"):
+        return True
+    return False
+
 def get_torch_device():
     global directml_enabled
     global cpu_state

diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
@@ -308,15 +308,22 @@ def lowvram_patch_counter(self):
     def get_free_memory(self, device):
         return comfy.model_management.get_free_memory(device)
 
-    def clone(self, disable_dynamic=False):
+    def get_clone_model_override(self):
+        return self.model, (self.backup, self.object_patches_backup, self.pinned)
+
+    def clone(self, disable_dynamic=False, model_override=None):
         class_ = self.__class__
-        model = self.model
         if self.is_dynamic() and disable_dynamic:
             class_ = ModelPatcher
-            temp_model_patcher = self.cached_patcher_init[0](*self.cached_patcher_init[1], disable_dynamic=True)
-            model = temp_model_patcher.model
-
-        n = class_(model, self.load_device, self.offload_device, self.model_size(), weight_inplace_update=self.weight_inplace_update)
+            if model_override is None:
+                if self.cached_patcher_init is None:
+                    raise RuntimeError("Cannot create non-dynamic delegate: cached_patcher_init is not initialized.")
+                temp_model_patcher = self.cached_patcher_init[0](*self.cached_patcher_init[1], disable_dynamic=True)
+                model_override = temp_model_patcher.get_clone_model_override()
+        if model_override is None:
+            model_override = self.get_clone_model_override()
+
+        n = class_(model_override[0], self.load_device, self.offload_device, self.model_size(), weight_inplace_update=self.weight_inplace_update)
         n.patches = {}
         for k in self.patches:
             n.patches[k] = self.patches[k][:]
@@ -325,13 +332,12 @@ def clone(self, disable_dynamic=False):
         n.object_patches = self.object_patches.copy()
         n.weight_wrapper_patches = self.weight_wrapper_patches.copy()
         n.model_options = comfy.utils.deepcopy_list_dict(self.model_options)
-        n.backup = self.backup
-        n.object_patches_backup = self.object_patches_backup
         n.parent = self
-        n.pinned = self.pinned
 
         n.force_cast_weights = self.force_cast_weights
 
+        n.backup, n.object_patches_backup, n.pinned = model_override[1]
+
         # attachments
         n.attachments = {}
         for k in self.attachments:
@@ -1435,6 +1441,7 @@ def __init__(self, model, load_device, offload_device, size=0, weight_inplace_up
             del self.model.model_loaded_weight_memory
         if not hasattr(self.model, "dynamic_vbars"):
             self.model.dynamic_vbars = {}
+        self.non_dynamic_delegate_model = None
         assert load_device is not None
 
     def is_dynamic(self):
@@ -1669,4 +1676,10 @@ def patch_hook_weight_to_device(self, hooks: comfy.hooks.HookGroup, combined_pat
     def unpatch_hooks(self, whitelist_keys_set: set[str]=None) -> None:
         pass
 
+    def get_non_dynamic_delegate(self):
+        model_patcher = self.clone(disable_dynamic=True, model_override=self.non_dynamic_delegate_model)
+        self.non_dynamic_delegate_model = model_patcher.get_clone_model_override()
+        return model_patcher
+
+
 CoreModelPatcher = ModelPatcher
diff --git a/comfy/sampler_helpers.py b/comfy/sampler_helpers.py
@@ -66,6 +66,18 @@ def convert_cond(cond):
         out.append(temp)
     return out
 
+def cond_has_hooks(cond):
+    for c in cond:
+        temp = c[1]
+        if "hooks" in temp:
+            return True
+        if "control" in temp:
+            control = temp["control"]
+            extra_hooks = control.get_extra_hooks()
+            if len(extra_hooks) > 0:
+                return True
+    return False
+
 def get_additional_models(conds, dtype):
     """loads additional models in conditioning"""
     cnets: list[ControlBase] = []

diff --git a/comfy/samplers.py b/comfy/samplers.py
@@ -946,6 +946,8 @@ def set_cfg(self, cfg):
 
     def inner_set_conds(self, conds):
         for k in conds:
+            if self.model_patcher.is_dynamic() and comfy.sampler_helpers.cond_has_hooks(conds[k]):
+                self.model_patcher = self.model_patcher.get_non_dynamic_delegate()
             self.original_conds[k] = comfy.sampler_helpers.convert_cond(conds[k])
 
     def __call__(self, *args, **kwargs):