Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions lightllm/common/basemodel/basemodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,23 @@ def _init_kv_move_buffer(self):

def _check_mem_size(self):
self.max_total_token_num = self.mem_manager.size

assert (
self.max_total_token_num > self.batch_max_tokens
), "max_total_token_num must be greater than batch_max_tokens"
Comment on lines 216 to 218
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For consistency with the detailed error message added below, consider including the actual values of max_total_token_num and batch_max_tokens in this assertion message. This helps users diagnose configuration issues more easily.

Suggested change
assert (
self.max_total_token_num > self.batch_max_tokens
), "max_total_token_num must be greater than batch_max_tokens"
assert self.max_total_token_num > self.batch_max_tokens, (
f"max_total_token_num must be > batch_max_tokens, "
f"got max_total_token_num={self.max_total_token_num}, "
f"batch_max_tokens={self.batch_max_tokens}. "
f"Try setting --batch_max_tokens to a smaller value < {self.max_total_token_num}."
)


# 非个人性能模式下,需要保证 max_seq_length 小于等于 max_total_token_num,
# 这样才能得到完整的上下文长度的支持。个人模式主要是私有化场景,显卡显存不是
# 特别大,可能能分配的 kv 容量有限,无法支持 max_seq_length 的推理。所以个人模式下
# 可以适当放宽这个限制,不做这个校验。
if self.args.performance_mode != "personal":
assert self.max_seq_length <= self.max_total_token_num, (
f"max_total_token_num must be >= max_seq_length, "
f"got max_total_token_num={self.max_total_token_num}, "
f"max_seq_length={self.max_seq_length}. "
f"Try set --max_req_total_len a smaller value < {self.max_total_token_num}."
)
Comment on lines +225 to +230
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The error message suggestion uses < but the condition is <=. Since max_seq_length can be equal to max_total_token_num, the suggestion should reflect that. Also, 'Try set' should be 'Try setting' for better grammar.

Suggested change
assert self.max_seq_length <= self.max_total_token_num, (
f"max_total_token_num must be >= max_seq_length, "
f"got max_total_token_num={self.max_total_token_num}, "
f"max_seq_length={self.max_seq_length}. "
f"Try set --max_req_total_len a smaller value < {self.max_total_token_num}."
)
assert self.max_seq_length <= self.max_total_token_num, (
f"max_total_token_num must be >= max_seq_length, "
f"got max_total_token_num={self.max_total_token_num}, "
f"max_seq_length={self.max_seq_length}. "
f"Try setting --max_req_total_len to a value <= {self.max_total_token_num}."
)


return

def _init_req_manager(self):
Expand Down
Loading