[update] add internvl3-1B config file, update postprocess.

LittleMouse · LittleMouse · commit 57404bc4efda · 2025-08-29T16:42:36.000+08:00
diff --git a/projects/llm_framework/main_vlm/models/mode_internvl3-1B-ax630c.json b/projects/llm_framework/main_vlm/models/mode_internvl3-1B-ax630c.json
@@ -0,0 +1,37 @@
+{
+    "mode": "internvl3-1B-ax630c",
+    "type": "vlm",
+    "homepage": "https://huggingface.co/AXERA-TECH/InternVL3-1B",
+    "capabilities": [
+        "text_generation",
+        "chat"
+    ],
+    "input_type": [
+        "vlm.chat_completion",
+        "vlm.chat_completion.stream"
+    ],
+    "output_type": [
+        "vlm.utf-8",
+        "vlm.utf-8.stream"
+    ],
+    "mode_param": {
+        "tokenizer_type": 2,
+        "url_tokenizer_model": "http://localhost:8080",
+        "filename_tokens_embed": "model.embed_tokens.weight.bfloat16.bin",
+        "filename_post_axmodel": "qwen2_post.axmodel",
+        "template_filename_axmodel": "qwen2_p128_l%d_together.axmodel",
+        "filename_image_encoder_axmodedl": "internvl3_1b_vit.axmodel",
+        "b_bos": false,
+        "b_eos": false,
+        "axmodel_num": 24,
+        "tokens_embed_num": 151674,
+        "img_token_id": 151667,
+        "tokens_embed_size": 896,
+        "b_use_mmap_load_embed": true,
+        "b_dynamic_load_axmodel_layer": false,
+        "precompute_len": 1024,
+        "ext_scripts": [
+            "tokenizer_internvl3-1B-ax630c.py"
+        ]
+    }
+}
diff --git a/projects/llm_framework/main_vlm/src/runner/LLM.hpp b/projects/llm_framework/main_vlm/src/runner/LLM.hpp
@@ -801,11 +801,29 @@ class LLM_CTX {
                 _attr.prefill_max_kv_cache_num_grp[_attr.prefill_max_kv_cache_num_grp.size() - 1];
             ALOGI("prefill_max_token_num : %d", _attr.prefill_max_token_num);
         }
+        nlohmann::json dynamic_config;
+
+        dynamic_config["enable_temperature"] = _attr.enable_temperature;
+        dynamic_config["temperature"]        = _attr.temperature;
+
+        dynamic_config["enable_repetition_penalty"] = _attr.enable_repetition_penalty;
+        dynamic_config["repetition_penalty"]        = _attr.repetition_penalty;
+        dynamic_config["penalty_window"]            = _attr.penalty_window;
+
+        dynamic_config["enable_top_p_sampling"] = _attr.enable_top_p_sampling;
+        dynamic_config["top_p"]                 = _attr.top_p;
+
+        dynamic_config["enable_top_k_sampling"] = _attr.enable_top_k_sampling;
+        dynamic_config["top_k"]                 = _attr.top_k;
 
         if (!postprocess.load_config(attr.post_config_path)) {
             ALOGW("load postprocess config(%s) failed", attr.post_config_path.c_str());
         }
 
+        if (!postprocess.load_config(dynamic_config)) {
+            ALOGW("load postprocess config(%s) failed", dynamic_config.dump(4).c_str());
+        }
+
         ALOGI("LLM init ok");
         return true;
     }
@@ -1338,7 +1356,6 @@ class LLM_CTX {
         return 0;
     }
 
-
     int Encode(std::vector<std::vector<unsigned short>> &imgs_embed, std::vector<unsigned short> &out_embed,
                std::string prompt, std::vector<int> &tokens_ids, std::vector<int> &tokens_diff)
     {
diff --git a/projects/llm_framework/main_vlm/src/runner/LLMPostprocess.hpp b/projects/llm_framework/main_vlm/src/runner/LLMPostprocess.hpp
@@ -242,10 +242,11 @@ class LLMPostprocess
         this->temperature = temperature;
     }
 
-    void set_repetition_penalty(bool enable, float penalty)
+    void set_repetition_penalty(bool enable, float penalty, int penalty_window)
     {
         enable_repetition_penalty = enable;
         this->repetition_penalty = penalty;
+        this->penalty_window = penalty_window;
     }
 
     void set_diversity_penalty(bool enable, const std::vector<int> &common_phrases, float penalty)
@@ -295,6 +296,49 @@ class LLMPostprocess
         return true;
     }
 
+    bool load_config(const nlohmann::json& config)
+    {
+        if (config.is_null()) {
+            ALOGE("config is null or invalid");
+            return false;
+        }
+
+        ALOGI("load config: \n%s\n", config.dump(4).c_str());
+
+        if (config.contains("enable_temperature")) {
+            enable_temperature = config["enable_temperature"].get<bool>();
+        }
+        if (config.contains("temperature")) {
+            temperature = config["temperature"].get<float>();
+        }
+
+        if (config.contains("enable_repetition_penalty")) {
+            enable_repetition_penalty = config["enable_repetition_penalty"].get<bool>();
+        }
+        if (config.contains("repetition_penalty")) {
+            repetition_penalty = config["repetition_penalty"].get<float>();
+        }
+        if (config.contains("penalty_window")) {
+            penalty_window = config["penalty_window"].get<int>();
+        }
+
+        if (config.contains("enable_top_p_sampling")) {
+            enable_top_p_sampling = config["enable_top_p_sampling"].get<bool>();
+        }
+        if (config.contains("top_p")) {
+            top_p = config["top_p"].get<float>();
+        }
+
+        if (config.contains("enable_top_k_sampling")) {
+            enable_top_k_sampling = config["enable_top_k_sampling"].get<bool>();
+        }
+        if (config.contains("top_k")) {
+            top_k = config["top_k"].get<int>();
+        }
+
+        return true;
+    }
+
     int apply(std::vector<float> &logits, const std::vector<int> &history)
     {
         if (enable_temperature)