ComfyUI-BiRefNet/BiRefNet_node.py at main · twang3/ComfyUI-BiRefNet · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os
import sys
import time

from comfy.model_management import get_torch_device
from comfy_extras.nodes_cache import load_model_cached

sys.path.insert(0, os.path.dirname(__file__))

from collections import defaultdict
import folder_paths
from models.baseline import BiRefNet
from config import Config

import cv2
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
from torchvision import transforms

from loguru import logger
from folder_paths import models_dir

config = Config()

class BiRefNet_img_processor:
    def __init__(self, config):
        self.config = config
        self.data_size = (config.size, config.size)
        self.transform_image = transforms.Compose([
            transforms.Resize(self.data_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ])

    def __call__(self, _image: np.array):
        _image_rs = cv2.resize(_image, (self.config.size, self.config.size), interpolation=cv2.INTER_LINEAR)
        _image_rs = Image.fromarray(np.uint8(_image_rs*255)).convert('RGB')
        image = self.transform_image(_image_rs)
        return image

class BiRefNet_node:
    def __init__(self):
        self.ready = False

    def load(self, weight_path, device, verbose=False):
        try:
            map_location = 'cpu' if device == 'cpu' else None
            if device == 'mps' and torch.backends.mps.is_available():
                map_location = torch.device('mps')

            def load_model():
                start = time.time()
                model = BiRefNet()
                state_dict = torch.load(weight_path, map_location=map_location)
                unwanted_prefix = '_orig_mod.'
                for k, v in list(state_dict.items()):
                    if k.startswith(unwanted_prefix):
                        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)

                model.load_state_dict(state_dict)
                model.to(device)
                model.eval()
                print(f"BiRefNet model loaded in {time.time() - start:.2f}s")
                return model

            self.model = load_model_cached(weight_path, load_model, device)
            self.processor = BiRefNet_img_processor(config)
            self.ready = True
            if verbose:
                logger.debug("Model loaded successfully on device: {}".format(device))
        except Exception as e:
            logger.error(f"Failed to load the model: {e}")
            self.ready = False
            raise RuntimeError(f"Model loading failed: {e}")


    # Correctly move INPUT_TYPES to the class level
    @classmethod
    def INPUT_TYPES(cls):
        # Example structure, adjust according to your actual input requirements
        return {
            "required": {
                "image": ("IMAGE", {}),
                "device": (["auto", "cpu", "mps"] + [f"cuda:{i}" for i in range(torch.cuda.device_count())], {"default": "auto"}),
            },
            "optional": {
                # Define optional inputs if any
            }
        }

    RETURN_TYPES = ("MASK", )
    RETURN_NAMES = ("mask", )
    FUNCTION = "matting"
    CATEGORY = "Fooocus"

    def matting(self, image, device):
        # process auto device
        if device == "auto":
            if torch.backends.mps.is_available():
                device = "mps"
            elif torch.cuda.is_available():
                device = get_torch_device()
            else:
                device = "cpu"

        if not self.ready:
            weight_path = os.path.join(models_dir, "BiRefNet", "BiRefNet-DIS_ep580.pth")
            self.load(weight_path, device=device)

        image = image.squeeze().numpy()
        img = self.processor(image)
        inputs = img[None, ...].to(device)
        logger.debug(f"{inputs.shape}")

        before = time.time()
        with torch.no_grad():
            self.model.to(device)  # Move the model to the selected device
            scaled_preds = self.model(inputs)[-1].sigmoid()
        print(f"BiRefNet model inference time: {time.time() - before:.2f}s")

        res = nn.functional.interpolate(
            scaled_preds[0].unsqueeze(0),
            size=image.shape[:2],
            mode='bilinear',
            align_corners=True
        )
        return res


NODE_CLASS_MAPPINGS = {
    "BiRefNet": BiRefNet_node,
}

# A dictionary that contains the friendly/humanly readable titles for the nodes
NODE_DISPLAY_NAME_MAPPINGS = {
    "BiRefNet": "BiRefNet Segmentation",
}