-
Notifications
You must be signed in to change notification settings - Fork 13.5k
security: prevent path traversal in dataset nodes and use weights_only for torch.load #14166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,6 +12,13 @@ | |
| from comfy_api.latest import ComfyExtension, io | ||
|
|
||
|
|
||
| def _sanitize_folder_name(base_dir, folder_name): | ||
| resolved = os.path.abspath(os.path.join(base_dir, folder_name)) | ||
| if not resolved.startswith(os.path.abspath(base_dir)): | ||
| raise ValueError("Path traversal detected in folder_name") | ||
| return resolved | ||
|
|
||
|
|
||
| def load_and_process_images(image_files, input_dir): | ||
| """Utility function to load and process a list of images. | ||
|
|
||
|
|
@@ -252,8 +259,8 @@ def execute(cls, images, folder_name, filename_prefix, mode): | |
| filename_prefix = filename_prefix[0] | ||
| mode = mode[0] | ||
|
|
||
| output_dir = os.path.join(folder_paths.get_output_directory(), folder_name) | ||
| saved_files = save_images_to_folder(images, output_dir, filename_prefix, mode=='overwrite') | ||
| output_dir = _sanitize_folder_name(folder_paths.get_output_directory(), folder_name) | ||
| saved_files = save_images_to_folder(images, output_dir, filename_prefix) | ||
|
Comment on lines
260
to
+263
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
🐛 Proposed fix- saved_files = save_images_to_folder(images, output_dir, filename_prefix)
+ saved_files = save_images_to_folder(images, output_dir, filename_prefix, overwrite=(mode == "overwrite"))🤖 Prompt for AI Agents |
||
|
|
||
| logging.info(f"Saved {len(saved_files)} images to {output_dir}.") | ||
| return io.NodeOutput() | ||
|
|
@@ -306,8 +313,8 @@ def execute(cls, images, folder_name, filename_prefix, mode, texts=None): | |
| filename_prefix = filename_prefix[0] | ||
| mode = mode[0] | ||
|
|
||
| output_dir = os.path.join(folder_paths.get_output_directory(), folder_name) | ||
| saved_files = save_images_to_folder(images, output_dir, filename_prefix, mode=='overwrite') | ||
| output_dir = _sanitize_folder_name(folder_paths.get_output_directory(), folder_name) | ||
| saved_files = save_images_to_folder(images, output_dir, filename_prefix) | ||
|
|
||
| # Save captions | ||
| if texts: | ||
|
|
@@ -1456,7 +1463,7 @@ def execute(cls, latents, conditioning, folder_name, shard_size): | |
| ) | ||
|
|
||
| # Create output directory | ||
| output_dir = os.path.join(folder_paths.get_output_directory(), folder_name) | ||
| output_dir = _sanitize_folder_name(folder_paths.get_output_directory(), folder_name) | ||
| os.makedirs(output_dir, exist_ok=True) | ||
|
|
||
| # Prepare data pairs | ||
|
|
@@ -1538,7 +1545,7 @@ def define_schema(cls): | |
| @classmethod | ||
| def execute(cls, folder_name): | ||
| # Get dataset directory | ||
| dataset_dir = os.path.join(folder_paths.get_output_directory(), folder_name) | ||
| dataset_dir = _sanitize_folder_name(folder_paths.get_output_directory(), folder_name) | ||
|
|
||
| if not os.path.exists(dataset_dir): | ||
| raise ValueError(f"Dataset directory not found: {dataset_dir}") | ||
|
|
@@ -1565,7 +1572,7 @@ def execute(cls, folder_name): | |
| shard_path = os.path.join(dataset_dir, shard_file) | ||
|
|
||
| with open(shard_path, "rb") as f: | ||
| shard_data = torch.load(f) | ||
| shard_data = torch.load(f, weights_only=True) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🌐 Web query:
💡 Result: In PyTorch, the weights_only=True option for torch.load is a security feature designed to prevent arbitrary code execution by restricting the unpickler to a limited set of safe types [1][2]. As of PyTorch 2.6, this setting is the default [2][3][4]. When loading a checkpoint with weights_only=True, if the file contains unsupported types or classes (often triggered when serializing custom objects, modules, or certain third-party types like those from NumPy), an UnpicklingError will occur, specifically identifying an "Unsupported global" [2][5][6]. Supported Types and Allowlisting By default, the weights_only unpickler allows [1][7]: - Tensors - Primitive Python types (e.g., int, float, str, bool) - Dictionaries - Basic collections (like lists and tuples) To allow additional types—such as custom model classes or specific third-party objects—you must explicitly add them to an allowlist using torch.serialization.add_safe_globals [1][8]. How to use add_safe_globals: 1. Identify the missing type from the error message (e.g., GLOBAL main.MyModel) [2][6]. 2. Add the class or function to the safe allowlist before calling torch.load [2][9]. Example: import torch from my_module import MyCustomClass # Register the class as safe torch.serialization.add_safe_globals([MyCustomClass]) # Now the load should succeed ckpt = torch.load("checkpoint.pth", weights_only=True) Best Practices: - Only allowlist types you explicitly trust, as this bypasses the security protections of weights_only=True [2]. - Avoid saving entire model objects; it is best practice to save and load only the state_dict [10]. - If you frequently encounter many missing globals, ensure your checkpoint was saved using standard PyTorch types rather than complex, custom-serialized structures [10]. If you are certain that a file is from a trusted source and it is impractical to allowlist all necessary globals, you may set weights_only=False, though this exposes your environment to potential security vulnerabilities [2][5]. Citations:
🏁 Script executed: #!/bin/bash
set -euo pipefail
# 1) Show context around the load call in question
sed -n '1520,1605p' comfy_extras/nodes_dataset.py | cat -n
# 2) Find where shard files are created/saved (torch.save usage near dataset/shard logic)
rg -n "torch\.save\(" comfy_extras/nodes_dataset.py
rg -n "shard" comfy_extras/nodes_dataset.py | head -n 80
# 3) Find conditioning generation path (encode_from_tokens_scheduled)
rg -n "encode_from_tokens_scheduled" comfy_extras/nodes_dataset.py
# 4) Search for add_safe_globals usage anywhere in repo
rg -n "add_safe_globals" -S .Repository: Comfy-Org/ComfyUI Length of output: 5830 🏁 Script executed: #!/bin/bash
set -euo pipefail
sed -n '1360,1415p' comfy_extras/nodes_dataset.py | cat -n
echo "----"
sed -n '1415,1505p' comfy_extras/nodes_dataset.py | cat -n
echo "----"
sed -n '1490,1575p' comfy_extras/nodes_dataset.py | cat -n
echo "----"
sed -n '1,120p' comfy/utils.py | cat -nRepository: Comfy-Org/ComfyUI Length of output: 14925 🏁 Script executed: #!/bin/bash
set -euo pipefail
# 1) Locate the implementation of encode_from_tokens_scheduled
rg -n "def encode_from_tokens_scheduled|encode_from_tokens_scheduled\s*\(" -S .
# 2) Print a focused excerpt around the implementation (first match)
python3 - <<'PY'
import subprocess, re, sys
res = subprocess.check_output(["bash","-lc","rg -n \"def encode_from_tokens_scheduled|encode_from_tokens_scheduled\\s*\\(\" -S . | head -n 20"], text=True)
print(res)
PYRepository: Comfy-Org/ComfyUI Length of output: 4925 🏁 Script executed: #!/bin/bash
set -euo pipefail
# Read implementation of encode_from_tokens_scheduled
sed -n '250,430p' comfy/sd.py | cat -n
# Also inspect any helper used by encode_from_tokens_scheduled (quick scan nearby)
rg -n "encode_from_tokens_scheduled" -n comfy/sd.pyRepository: Comfy-Org/ComfyUI Length of output: 9983 🏁 Script executed: #!/bin/bash
set -euo pipefail
rg -n "apply_hooks_to_conds" comfy/sd.py comfy -S
rg -n "use_clip_schedule" comfy/sd.py comfy -S
rg -n "add_hooks_to_dict\(" -S comfy/sd.py comfyRepository: Comfy-Org/ComfyUI Length of output: 1389 🏁 Script executed: #!/bin/bash
set -euo pipefail
rg -n "apply_hooks_to_conds" comfy/sd.py comfy -S
rg -n "use_clip_schedule" comfy/sd.py comfy -S
rg -n "add_hooks_to_dict\(" -S comfy/sd.py comfyRepository: Comfy-Org/ComfyUI Length of output: 1389 🏁 Script executed: #!/bin/bash
set -euo pipefail
# Find encode_token_weights implementations
rg -n "def encode_token_weights" comfy -S
# Print a small excerpt around the first few matches to see return types/structure
rg -n "def encode_token_weights" comfy -S | head -n 5 | cut -d: -f1 | uniq | head -n 5 | while read -r f; do
echo "---- $f"
# show around the first occurrence
start=$(rg -n "def encode_token_weights" "$f" -S | head -n 1 | cut -d: -f2)
begin=$((start-20))
end=$((start+80))
sed -n "${begin},${end}p" "$f" | cat -n
doneRepository: Comfy-Org/ComfyUI Length of output: 2256 🏁 Script executed: #!/bin/bash
set -euo pipefail
# 1) Inspect MakeTrainingDataset to see what CLIP object it receives/uses
rg -n "class MakeTrainingDataset|MakeTrainingDataset" comfy_extras/nodes_dataset.py
start=$(rg -n "class MakeTrainingDataset" comfy_extras/nodes_dataset.py | head -n1 | cut -d: -f1)
sed -n "$((start-40)),$((start+220))p" comfy_extras/nodes_dataset.py | cat -n
# 2) Inspect encode_token_weights for common CLIP implementations (SD1 + SDXL)
for f in comfy/sd1_clip.py comfy/sdxl_clip.py comfy/text_encoders/*clip*.py; do
[ -f "$f" ] || continue
lines=$(rg -n "def encode_token_weights" "$f" 2>/dev/null | head -n 3 || true)
if [ -n "$lines" ]; then
echo "==== $f ===="
echo "$lines"
first=$(echo "$lines" | head -n1 | cut -d: -f1)
sed -n "$((first-30)),$((first+120))p" "$f" | cat -n
fi
doneRepository: Comfy-Org/ComfyUI Length of output: 14376 🏁 Script executed: #!/bin/bash
set -euo pipefail
# 1) Check whether use_clip_schedule is ever enabled elsewhere
rg -n "use_clip_schedule\s*=" comfy -S
# 2) For sd1_clip.py, inspect both encode_token_weights definitions
for line in $(rg -n "def encode_token_weights" comfy/sd1_clip.py | cut -d: -f1); do
begin=$((line-30)); if [ "$begin" -lt 1 ]; then begin=1; fi
end=$((line+160))
echo "==== comfy/sd1_clip.py around $line ===="
sed -n "${begin},${end}p" comfy/sd1_clip.py | cat -n
done
# 3) Inspect sdxl_clip.py encode_token_weights definition
for line in $(rg -n "def encode_token_weights" comfy/sdxl_clip.py | cut -d: -f1); do
begin=$((line-30)); if [ "$begin" -lt 1 ]; then begin=1; fi
end=$((line+160))
echo "==== comfy/sdxl_clip.py around $line ===="
sed -n "${begin},${end}p" comfy/sdxl_clip.py | cat -n
doneRepository: Comfy-Org/ComfyUI Length of output: 15149 🏁 Script executed: #!/bin/bash
set -euo pipefail
rg -n "use_clip_schedule" . -S
rg -n "forced_hooks" . -S
rg -n "apply_hooks_to_conds" . -SRepository: Comfy-Org/ComfyUI Length of output: 1674 🏁 Script executed: #!/bin/bash
set -euo pipefail
sed -n '220,290p' comfy_extras/nodes_hooks.py | cat -nRepository: Comfy-Org/ComfyUI Length of output: 3341 🏁 Script executed: #!/bin/bash
set -euo pipefail
rg -n "add_safe_globals" . -S
rg -n "safe_globals" comfy -S
rg -n "HookGroup" comfy -S | head -n 50Repository: Comfy-Org/ComfyUI Length of output: 5709 Hardening risk:
🤖 Prompt for AI Agents |
||
|
|
||
| all_latents.extend(shard_data["latents"]) | ||
| all_conditioning.extend(shard_data["conditioning"]) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
startswithcheck is bypassable by sibling directories sharing the base prefix.resolved.startswith(os.path.abspath(base_dir))is the classic prefix-match pitfall. Withbase = /…/output, afolder_nameof"../output_evil"resolves to/…/output_evil, which starts with/…/outputand so passes the check — yet it lives outside the base directory. Since path-traversal prevention is the whole point of this helper, gate on a trailing separator (oros.path.commonpath) instead.🛡️ Proposed fix
📝 Committable suggestion
🤖 Prompt for AI Agents