Skip to content

Commit 57404bc

Browse files
author
LittleMouse
committed
[update] add internvl3-1B config file, update postprocess.
1 parent 92b10ac commit 57404bc

File tree

3 files changed

+100
-2
lines changed

3 files changed

+100
-2
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"mode": "internvl3-1B-ax630c",
3+
"type": "vlm",
4+
"homepage": "https://huggingface.co/AXERA-TECH/InternVL3-1B",
5+
"capabilities": [
6+
"text_generation",
7+
"chat"
8+
],
9+
"input_type": [
10+
"vlm.chat_completion",
11+
"vlm.chat_completion.stream"
12+
],
13+
"output_type": [
14+
"vlm.utf-8",
15+
"vlm.utf-8.stream"
16+
],
17+
"mode_param": {
18+
"tokenizer_type": 2,
19+
"url_tokenizer_model": "http://localhost:8080",
20+
"filename_tokens_embed": "model.embed_tokens.weight.bfloat16.bin",
21+
"filename_post_axmodel": "qwen2_post.axmodel",
22+
"template_filename_axmodel": "qwen2_p128_l%d_together.axmodel",
23+
"filename_image_encoder_axmodedl": "internvl3_1b_vit.axmodel",
24+
"b_bos": false,
25+
"b_eos": false,
26+
"axmodel_num": 24,
27+
"tokens_embed_num": 151674,
28+
"img_token_id": 151667,
29+
"tokens_embed_size": 896,
30+
"b_use_mmap_load_embed": true,
31+
"b_dynamic_load_axmodel_layer": false,
32+
"precompute_len": 1024,
33+
"ext_scripts": [
34+
"tokenizer_internvl3-1B-ax630c.py"
35+
]
36+
}
37+
}

projects/llm_framework/main_vlm/src/runner/LLM.hpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -801,11 +801,29 @@ class LLM_CTX {
801801
_attr.prefill_max_kv_cache_num_grp[_attr.prefill_max_kv_cache_num_grp.size() - 1];
802802
ALOGI("prefill_max_token_num : %d", _attr.prefill_max_token_num);
803803
}
804+
nlohmann::json dynamic_config;
805+
806+
dynamic_config["enable_temperature"] = _attr.enable_temperature;
807+
dynamic_config["temperature"] = _attr.temperature;
808+
809+
dynamic_config["enable_repetition_penalty"] = _attr.enable_repetition_penalty;
810+
dynamic_config["repetition_penalty"] = _attr.repetition_penalty;
811+
dynamic_config["penalty_window"] = _attr.penalty_window;
812+
813+
dynamic_config["enable_top_p_sampling"] = _attr.enable_top_p_sampling;
814+
dynamic_config["top_p"] = _attr.top_p;
815+
816+
dynamic_config["enable_top_k_sampling"] = _attr.enable_top_k_sampling;
817+
dynamic_config["top_k"] = _attr.top_k;
804818

805819
if (!postprocess.load_config(attr.post_config_path)) {
806820
ALOGW("load postprocess config(%s) failed", attr.post_config_path.c_str());
807821
}
808822

823+
if (!postprocess.load_config(dynamic_config)) {
824+
ALOGW("load postprocess config(%s) failed", dynamic_config.dump(4).c_str());
825+
}
826+
809827
ALOGI("LLM init ok");
810828
return true;
811829
}
@@ -1338,7 +1356,6 @@ class LLM_CTX {
13381356
return 0;
13391357
}
13401358

1341-
13421359
int Encode(std::vector<std::vector<unsigned short>> &imgs_embed, std::vector<unsigned short> &out_embed,
13431360
std::string prompt, std::vector<int> &tokens_ids, std::vector<int> &tokens_diff)
13441361
{

projects/llm_framework/main_vlm/src/runner/LLMPostprocess.hpp

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,10 +242,11 @@ class LLMPostprocess
242242
this->temperature = temperature;
243243
}
244244

245-
void set_repetition_penalty(bool enable, float penalty)
245+
void set_repetition_penalty(bool enable, float penalty, int penalty_window)
246246
{
247247
enable_repetition_penalty = enable;
248248
this->repetition_penalty = penalty;
249+
this->penalty_window = penalty_window;
249250
}
250251

251252
void set_diversity_penalty(bool enable, const std::vector<int> &common_phrases, float penalty)
@@ -295,6 +296,49 @@ class LLMPostprocess
295296
return true;
296297
}
297298

299+
bool load_config(const nlohmann::json& config)
300+
{
301+
if (config.is_null()) {
302+
ALOGE("config is null or invalid");
303+
return false;
304+
}
305+
306+
ALOGI("load config: \n%s\n", config.dump(4).c_str());
307+
308+
if (config.contains("enable_temperature")) {
309+
enable_temperature = config["enable_temperature"].get<bool>();
310+
}
311+
if (config.contains("temperature")) {
312+
temperature = config["temperature"].get<float>();
313+
}
314+
315+
if (config.contains("enable_repetition_penalty")) {
316+
enable_repetition_penalty = config["enable_repetition_penalty"].get<bool>();
317+
}
318+
if (config.contains("repetition_penalty")) {
319+
repetition_penalty = config["repetition_penalty"].get<float>();
320+
}
321+
if (config.contains("penalty_window")) {
322+
penalty_window = config["penalty_window"].get<int>();
323+
}
324+
325+
if (config.contains("enable_top_p_sampling")) {
326+
enable_top_p_sampling = config["enable_top_p_sampling"].get<bool>();
327+
}
328+
if (config.contains("top_p")) {
329+
top_p = config["top_p"].get<float>();
330+
}
331+
332+
if (config.contains("enable_top_k_sampling")) {
333+
enable_top_k_sampling = config["enable_top_k_sampling"].get<bool>();
334+
}
335+
if (config.contains("top_k")) {
336+
top_k = config["top_k"].get<int>();
337+
}
338+
339+
return true;
340+
}
341+
298342
int apply(std::vector<float> &logits, const std::vector<int> &history)
299343
{
300344
if (enable_temperature)

0 commit comments

Comments
 (0)