Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions fastdeploy/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,9 @@
"FD_WORKER_ALIVE_TIMEOUT": lambda: int(os.getenv("FD_WORKER_ALIVE_TIMEOUT", "30")),
# File path for file storage backend
"FILE_BACKEND_STORAGE_DIR": lambda: str(os.getenv("FILE_BACKEND_STORAGE_DIR", "/tmp/fastdeploy")),

# Whether to print model's md5 value
"MD5SUM_PRINT": lambda: bool(int(os.getenv("MD5SUM_PRINT", "0"))),
}


Expand Down
21 changes: 21 additions & 0 deletions fastdeploy/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1399,6 +1399,27 @@
)
else:
self.dynamic_weight_manager = DynamicWeightManager(self.fd_config, self.model, self.local_rank)

# save model md5sum
if envs.MD5SUM_PRINT:
logger.info("MD5SUM_PRINT is set to True, saving model md5sum...")
self._save_model_md5sum()

def _save_model_md5sum(self) -> None:
md5sum_dict = {}
for key, param in self.model.state_dict().items():
md5sum_dict[key] = get_tensor_md5(param)

Check failure on line 1411 in fastdeploy/worker/gpu_model_runner.py

View workflow job for this annotation

GitHub Actions / Pre Commit

Ruff (F821)

fastdeploy/worker/gpu_model_runner.py:1411:32: F821 Undefined name `get_tensor_md5`
logger.info(f">>>>>>>>>> {key} : {md5sum_dict[key]}")
# 使用 'w' 模式写入
file_name = f"md5sum_rank_{paddle.distributed.get_rank()}.json"
output_dir = envs.FD_LOG_DIR
if not os.path.exists(output_dir):
os.makedirs(output_dir, exist_ok=True)
with open(os.path.join(output_dir, file_name), "w", encoding="utf-8") as f:
# indent=4 让生成的 JSON 有层级缩进,方便肉眼查看
# ensure_ascii=False 保证路径中的中文或特殊字符不被转义
json.dump(md5sum_dict, f, indent=4, ensure_ascii=False)

Check failure on line 1421 in fastdeploy/worker/gpu_model_runner.py

View workflow job for this annotation

GitHub Actions / Pre Commit

Ruff (F821)

fastdeploy/worker/gpu_model_runner.py:1421:13: F821 Undefined name `json`


def get_model(self) -> nn.Layer:
"""Get current model"""
Expand Down
Loading