Skip to content

Commit eff3a47

Browse files
author
LittleMouse
committed
[update] perf llm-asr, kws add buttons control.
1 parent 3f608d2 commit eff3a47

File tree

3 files changed

+225
-13
lines changed

3 files changed

+225
-13
lines changed

projects/llm_framework/main_asr/src/main.cpp

Lines changed: 67 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,23 @@ class llm_task {
8585
ENGINE_ONLINE = 3,
8686
} engine_type_ = ENGINE_NCNN;
8787

88+
static constexpr int kSampleRate = 16000;
89+
static constexpr int kFrameSamples = 160;
90+
int pre_roll_frames_ = 30;
91+
std::deque<int16_t> pre_roll_pcm_;
92+
bool prev_vad_detected_ = false;
93+
94+
private:
95+
void PushPreRollPcm(const int16_t *pcm, size_t n)
96+
{
97+
pre_roll_pcm_.insert(pre_roll_pcm_.end(), pcm, pcm + n);
98+
99+
const size_t max_samples = (size_t)pre_roll_frames_ * kFrameSamples;
100+
while (pre_roll_pcm_.size() > max_samples) {
101+
pre_roll_pcm_.pop_front();
102+
}
103+
}
104+
88105
public:
89106
std::string model_;
90107
std::string response_format_;
@@ -529,48 +546,86 @@ class llm_task {
529546

530547
void sys_pcm_on_data_onnx(const std::string &raw)
531548
{
549+
if (raw.size() >= sizeof(int16_t)) {
550+
const int16_t *pcm16 = reinterpret_cast<const int16_t *>(raw.data());
551+
size_t n16 = raw.size() / sizeof(int16_t);
552+
PushPreRollPcm(pcm16, n16);
553+
}
554+
532555
static int count = 0;
533556
if (count < delay_audio_frame_) {
534557
buffer_write_char(pcmdata, raw.data(), raw.length());
535558
count++;
536559
return;
537560
}
561+
538562
buffer_write_char(pcmdata, raw.data(), raw.length());
539563
buffer_position_set(pcmdata, 0);
540564

541565
std::vector<float> floatSamples;
566+
floatSamples.reserve((delay_audio_frame_ + 1) * kFrameSamples);
567+
542568
int16_t audio_val;
543569
while (buffer_read_i16(pcmdata, &audio_val, 1)) {
544-
float normalizedSample = static_cast<float>(audio_val) / INT16_MAX;
545-
floatSamples.push_back(normalizedSample);
570+
floatSamples.push_back(static_cast<float>(audio_val) / 32768.0f);
546571
}
572+
547573
buffer_resize(pcmdata, 0);
548574
count = 0;
549575

550576
vad_->AcceptWaveform(floatSamples.data(), floatSamples.size());
577+
578+
bool detected = vad_->IsSpeechDetected();
579+
bool speech_start = (!prev_vad_detected_ && detected);
580+
prev_vad_detected_ = detected;
581+
551582
while (!vad_->Empty()) {
552583
const auto &segment = vad_->Front();
553-
if (!offline_stream_) offline_stream_ = onnx_recognizer_->CreateStream();
554-
offline_stream_->AcceptWaveform(onnx_asr_config_.feat_config.sampling_rate, segment.samples.data(),
555-
segment.samples.size());
584+
585+
if (!offline_stream_) {
586+
offline_stream_ = onnx_recognizer_->CreateStream();
587+
}
588+
589+
if (speech_start && !pre_roll_pcm_.empty()) {
590+
std::vector<float> pre;
591+
pre.reserve(pre_roll_pcm_.size());
592+
for (int16_t s : pre_roll_pcm_) {
593+
pre.push_back(static_cast<float>(s) / 32768.0f);
594+
}
595+
596+
std::vector<float> merged;
597+
merged.reserve(pre.size() + segment.samples.size());
598+
merged.insert(merged.end(), pre.begin(), pre.end());
599+
merged.insert(merged.end(), segment.samples.begin(), segment.samples.end());
600+
601+
offline_stream_->AcceptWaveform(kSampleRate, merged.data(), merged.size());
602+
603+
pre_roll_pcm_.clear();
604+
speech_start = false;
605+
} else {
606+
offline_stream_->AcceptWaveform(kSampleRate, segment.samples.data(), segment.samples.size());
607+
}
608+
556609
onnx_recognizer_->DecodeStream(offline_stream_.get());
610+
557611
const auto &result = offline_stream_->GetResult();
558612
if (!result.text.empty() && out_callback_) {
559613
out_callback_(result.text, true);
560614
}
615+
561616
vad_->Pop();
617+
562618
offline_stream_.reset();
563619
}
564620

565621
{
566-
bool detected = vad_->IsSpeechDetected();
567622
float chunk_ms = (delay_audio_frame_ + 1) * 10.0f;
568-
569623
if (detected) {
570624
silence_ms_accum_ = 0.0f;
571625
} else {
572626
silence_ms_accum_ += chunk_ms;
573627
}
628+
574629
if (silence_ms_accum_ >= silence_timeout) {
575630
if (ensleep_) {
576631
if (pause) pause();
@@ -1034,6 +1089,11 @@ class llm_asr : public StackFlow {
10341089
++it;
10351090
}
10361091
}
1092+
1093+
if (data.find("sys") != std::string::npos) {
1094+
llm_task_obj->audio_flage_ = false;
1095+
}
1096+
10371097
send("None", "None", LLM_NO_ERROR, work_id);
10381098
}
10391099

projects/llm_framework/main_kws/src/main.cpp

Lines changed: 157 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ class llm_task {
8181
int count_frames_ = 0;
8282
long long last_trigger_time_ms_ = -1e9;
8383
long long frame_index_global_ = 0;
84+
int last_btn_204_state = -1;
8485

8586
public:
8687
inline const std::string &model() const
@@ -294,9 +295,9 @@ class llm_task {
294295
#undef CONFIG_AUTO_SET_SHERPA
295296

296297
#define CONFIG_AUTO_SET_AXERA(obj, key) \
297-
if (config_body.contains(#key)) \
298+
if (config_body.contains(#key)) \
298299
axera_config_.key = config_body[#key]; \
299-
else if (obj.contains(#key)) \
300+
else if (obj.contains(#key)) \
300301
axera_config_.key = obj[#key];
301302

302303
#define OPTS_AUTO_SET(obj, key) \
@@ -537,9 +538,27 @@ class llm_task {
537538
}
538539
}
539540

540-
void trigger()
541+
void trigger_wakeup()
541542
{
542-
if (out_callback_) out_callback_("", true);
543+
if (enwake_audio_ && (!wake_wav_file_.empty()) && play_awake_wav) {
544+
play_awake_wav(wake_wav_file_);
545+
}
546+
if (out_callback_) {
547+
if (enoutput_json_)
548+
out_callback_("{\"reason\":\"button_204\"}", true);
549+
else
550+
out_callback_("", true);
551+
}
552+
}
553+
554+
void set_btn_204_state(int state)
555+
{
556+
last_btn_204_state = state;
557+
}
558+
559+
int get_btn_204_state()
560+
{
561+
return last_btn_204_state;
543562
}
544563

545564
bool delete_model()
@@ -790,6 +809,40 @@ class llm_kws : public StackFlow {
790809
llm_task_obj->sys_pcm_on_data((*next_data));
791810
}
792811

812+
void task_buttons_data(const std::weak_ptr<llm_task> llm_task_obj_weak,
813+
const std::weak_ptr<llm_channel_obj> llm_channel_weak, const std::string &object,
814+
const std::string &data)
815+
{
816+
auto llm_task_obj = llm_task_obj_weak.lock();
817+
auto llm_channel = llm_channel_weak.lock();
818+
if (!(llm_task_obj && llm_channel)) {
819+
return;
820+
}
821+
if (data.empty() || (data == "None")) return;
822+
823+
try {
824+
std::string user_msg = sample_unescapeString(data);
825+
nlohmann::json btn_json = nlohmann::json::parse(user_msg);
826+
827+
if (btn_json.contains("code") && btn_json.contains("vale")) {
828+
int current_code = btn_json["code"];
829+
int current_vale = btn_json["vale"];
830+
831+
if (current_vale == 204) {
832+
int last_code = llm_task_obj->get_btn_204_state();
833+
834+
if (last_code == 0 && current_code == 1) {
835+
llm_task_obj->trigger_wakeup();
836+
}
837+
838+
llm_task_obj->set_btn_204_state(current_code);
839+
}
840+
}
841+
} catch (const std::exception &e) {
842+
SLOGE("Button data JSON parse error: %s", e.what());
843+
}
844+
}
845+
793846
int setup(const std::string &work_id, const std::string &object, const std::string &data) override
794847
{
795848
nlohmann::json error_body;
@@ -836,6 +889,17 @@ class llm_kws : public StackFlow {
836889
llm_channel->subscriber_work_id("", std::bind(&llm_kws::task_user_data, this, _llm_task_obj,
837890
std::weak_ptr<llm_channel_obj>(llm_channel),
838891
std::placeholders::_1, std::placeholders::_2));
892+
} else if (input.find("buttons_thread") != std::string::npos) {
893+
std::string socket_url = "ipc:///tmp/llm/ec_prox.event.socket";
894+
auto business_logic = std::bind(
895+
&llm_kws::task_buttons_data, this, std::weak_ptr<llm_task>(llm_task_obj),
896+
std::weak_ptr<llm_channel_obj>(llm_channel), std::placeholders::_1, std::placeholders::_2);
897+
898+
llm_channel->subscriber(
899+
socket_url, [llm_channel, business_logic](StackFlows::pzmq *p,
900+
const std::shared_ptr<StackFlows::pzmq_data> &d) {
901+
llm_channel->subscriber_event_call(business_logic, p, d);
902+
});
839903
}
840904
}
841905
llm_task_[work_id_num] = llm_task_obj;
@@ -851,6 +915,94 @@ class llm_kws : public StackFlow {
851915
}
852916
}
853917

918+
void link(const std::string &work_id, const std::string &object, const std::string &data) override
919+
{
920+
SLOGI("llm_kws::link:%s", data.c_str());
921+
int ret = 0;
922+
nlohmann::json error_body;
923+
924+
int work_id_num = sample_get_work_id_num(work_id);
925+
if (llm_task_.find(work_id_num) == llm_task_.end()) {
926+
error_body["code"] = -6;
927+
error_body["message"] = "Unit Does Not Exist";
928+
send("None", "None", error_body, work_id);
929+
return;
930+
}
931+
932+
auto llm_channel = get_channel(work_id);
933+
auto llm_task_obj = llm_task_[work_id_num];
934+
935+
if (data.find("sys") != std::string::npos) {
936+
if (audio_url_.empty()) audio_url_ = unit_call("audio", "cap", data);
937+
938+
std::weak_ptr<llm_task> _llm_task_obj = llm_task_obj;
939+
llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) {
940+
if (auto p = _llm_task_obj.lock()) p->sys_pcm_on_data(raw->string());
941+
});
942+
943+
llm_task_obj->audio_flage_ = true;
944+
llm_task_obj->inputs_.push_back(data);
945+
} else if (data.find("buttons_thread") != std::string::npos) {
946+
std::string socket_url = "ipc:///tmp/llm/ec_prox.event.socket";
947+
auto business_logic =
948+
std::bind(&llm_kws::task_buttons_data, this, std::weak_ptr<llm_task>(llm_task_obj),
949+
std::weak_ptr<llm_channel_obj>(llm_channel), std::placeholders::_1, std::placeholders::_2);
950+
951+
llm_channel->subscriber(
952+
socket_url,
953+
[llm_channel, business_logic](StackFlows::pzmq *p, const std::shared_ptr<StackFlows::pzmq_data> &d) {
954+
llm_channel->subscriber_event_call(business_logic, p, d);
955+
});
956+
957+
llm_task_obj->inputs_.push_back(data);
958+
} else {
959+
error_body["code"] = -22;
960+
error_body["message"] = "unsupported link target";
961+
send("None", "None", error_body, work_id);
962+
return;
963+
}
964+
965+
if (ret) {
966+
error_body["code"] = -20;
967+
error_body["message"] = "link false";
968+
send("None", "None", error_body, work_id);
969+
return;
970+
}
971+
send("None", "None", LLM_NO_ERROR, work_id);
972+
}
973+
974+
void unlink(const std::string &work_id, const std::string &object, const std::string &data) override
975+
{
976+
SLOGI("llm_kws::unlink:%s", data.c_str());
977+
nlohmann::json error_body;
978+
979+
int work_id_num = sample_get_work_id_num(work_id);
980+
if (llm_task_.find(work_id_num) == llm_task_.end()) {
981+
error_body["code"] = -6;
982+
error_body["message"] = "Unit Does Not Exist";
983+
send("None", "None", error_body, work_id);
984+
return;
985+
}
986+
987+
auto llm_channel = get_channel(work_id);
988+
auto llm_task_obj = llm_task_[work_id_num];
989+
990+
llm_channel->stop_subscriber_work_id(data);
991+
992+
for (auto it = llm_task_obj->inputs_.begin(); it != llm_task_obj->inputs_.end();) {
993+
if (*it == data)
994+
it = llm_task_obj->inputs_.erase(it);
995+
else
996+
++it;
997+
}
998+
999+
if (data.find("sys") != std::string::npos) {
1000+
llm_task_obj->audio_flage_ = false;
1001+
}
1002+
1003+
send("None", "None", LLM_NO_ERROR, work_id);
1004+
}
1005+
8541006
void taskinfo(const std::string &work_id, const std::string &object, const std::string &data) override
8551007
{
8561008
SLOGI("llm_kws::taskinfo:%s", data.c_str());
@@ -937,7 +1089,7 @@ class llm_kws : public StackFlow {
9371089
_zmq.send_data(out);
9381090
return LLM_NONE;
9391091
}
940-
llm_task_[work_id_num]->trigger();
1092+
llm_task_[work_id_num]->trigger_wakeup();
9411093
return LLM_NONE;
9421094
}
9431095

projects/llm_framework/tools/llm_pack.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ def create_bin_deb(package_name, version, src_folder, revision = 'm5stack1', dep
480480
'llm-model-qwen3-vl-2B-Int4-ax630c':[create_data_deb,'llm-model-qwen3-vl-2B-Int4-ax630c', '0.5', src_folder, revision],
481481
## AX650
482482
'llm-model-qwen2.5-0.5B-Int4-ax650':[create_data_deb,'llm-model-qwen2.5-0.5B-Int4-ax650', '0.4', src_folder, revision],
483-
'llm-model-qwen2.5-HA-0.5B-ctx-ax650':[create_data_deb,'llm-model-qwen2.5-HA-0.5B-ctx-ax650', '0.5', src_folder, revision],
483+
'llm-model-qwen2.5-HA-0.5B-ctx-ax650':[create_data_deb,'llm-model-qwen2.5-HA-0.5B-ctx-ax650', '0.6', src_folder, revision],
484484
'llm-model-qwen2.5-1.5B-Int4-ax650':[create_data_deb,'llm-model-qwen2.5-1.5B-Int4-ax650', '0.4', src_folder, revision],
485485
'llm-model-qwen2.5-3B-Int4-ax650':[create_data_deb,'llm-model-qwen2.5-3B-Int4-ax650', '0.4', src_folder, revision],
486486
'llm-model-qwen2.5-7B-Int4-ax650':[create_data_deb,'llm-model-qwen2.5-7B-Int4-ax650', '0.4', src_folder, revision],

0 commit comments

Comments
 (0)