Skip to content

Commit 969b45f

Browse files
committed
feat(handler): Support add_generation_prompt parameter pass to MTMDChatHandler
- supports disabling assistant part injection, used to support the multimodal `assistant_prefill` functionality. Signed-off-by: JamePeng <jame_peng@sina.com>
1 parent 5068a80 commit 969b45f

2 files changed

Lines changed: 7 additions & 2 deletions

File tree

llama_cpp/llama.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2717,6 +2717,7 @@ def create_chat_completion(
27172717
logprobs: Optional[bool] = None,
27182718
top_logprobs: Optional[int] = None,
27192719
assistant_prefill: bool = False,
2720+
add_generation_prompt: bool = True,
27202721
) -> Union[
27212722
CreateChatCompletionResponse, Iterator[CreateChatCompletionStreamResponse]
27222723
]:
@@ -2829,6 +2830,7 @@ def create_chat_completion(
28292830
active_loras=active_loras,
28302831
control_vector=control_vector,
28312832
assistant_prefill=assistant_prefill,
2833+
add_generation_prompt=add_generation_prompt,
28322834
)
28332835

28342836
def create_chat_completion_openai_v1(

llama_cpp/llama_chat_format.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3079,6 +3079,7 @@ def _process_mtmd_prompt(
30793079
function_call: Optional[llama_types.ChatCompletionRequestFunctionCall] = None,
30803080
tools: Optional[List[llama_types.ChatCompletionTool]] = None,
30813081
tool_choice: Optional[llama_types.ChatCompletionToolChoiceOption] = None,
3082+
add_generation_prompt: bool = True,
30823083
) -> Tuple[List[int], List[tuple], Any, List[Any]]:
30833084
"""
30843085
Core multimodal preprocessing pipeline.
@@ -3106,7 +3107,7 @@ def _process_mtmd_prompt(
31063107
# 2. Render the chat template and replace actual URLs with C++ media markers
31073108
text = self.chat_template.render(
31083109
messages=messages,
3109-
add_generation_prompt=True,
3110+
add_generation_prompt=add_generation_prompt,
31103111
eos_token=self.mtmd_eos_token,
31113112
bos_token=self.mtmd_bos_token,
31123113
functions=functions,
@@ -3306,6 +3307,7 @@ def __call__(
33063307
logit_bias: Optional[Dict[str, float]] = None,
33073308
logprobs: Optional[bool] = None,
33083309
top_logprobs: Optional[int] = None,
3310+
add_generation_prompt: bool = True,
33093311
**kwargs, # type: ignore
33103312
) -> Union[
33113313
llama_types.CreateChatCompletionResponse,
@@ -3322,7 +3324,8 @@ def __call__(
33223324
functions=functions,
33233325
function_call=function_call,
33243326
tools=tools,
3325-
tool_choice=tool_choice
3327+
tool_choice=tool_choice,
3328+
add_generation_prompt=add_generation_prompt,
33263329
)
33273330

33283331
if self.verbose:

0 commit comments

Comments
 (0)