pytorch · psiddh · May 6, 2026 · May 6, 2026
@@ -102,6 +102,9 @@ class LhdTokenGenerator : public TokenGenerator<T> {
       AttentionSinkRopeRunner* attention_sink_rope_runner) override;
 
  private:
+  // Bring base class's virtual prepare_io into scope so the overload below
+  // does not hide it (-Woverloaded-virtual).
+  using TokenGenerator<T>::prepare_io;
   /**
    * @brief Fill in I/O buffers with prompt token and position.
    * @param cur_token Current token.

@@ -108,6 +108,9 @@ class MultimodalLhdTokenGenerator
       AttentionSinkRopeRunner* attention_sink_rope_runner) override;
 
  private:
+  // Bring base class's virtual prepare_io into scope so the overload below
+  // does not hide it (-Woverloaded-virtual).
+  using TokenGenerator<T>::prepare_io;
   /**
    * @brief Fill in I/O buffers with prompt token and position.
    * @param cur_token Current token.

@@ -40,6 +40,8 @@ class PromptProcessor {
       const std::string& method_name,
       Metadata metadata);
 
+  virtual ~PromptProcessor() = default;
+
   /**
    * @brief Initialize I/O tensor and allocate I/O data buffer.
    * @param buffer_manager Pointer to IMemAlloc instance; by default, it uses a

@@ -102,6 +102,7 @@ Runner<T>::Runner(
     std::unique_ptr<tokenizers::Tokenizer> tokenizer,
     std::unique_ptr<executorch::extension::Module> attention_sink_rope_module)
     : module_(std::move(module)),
+      attention_sink_rope_module_(std::move(attention_sink_rope_module)),
       ngram_(ngram),
       window_(window),
       gcap_(gcap),
@@ -111,8 +112,7 @@ Runner<T>::Runner(
       temperature_(temperature),
       eval_mode_(static_cast<EvalMode>(eval_mode)),
       shared_buffer_(shared_buffer),
-      tokenizer_(std::move(tokenizer)),
-      attention_sink_rope_module_(std::move(attention_sink_rope_module)) {
+      tokenizer_(std::move(tokenizer)) {
   stats_.reset();
 
   if (decoder_model_version == "llama2") {