elastic · darius-vil · Jan 27, 2026 · Jan 27, 2026
diff --git a/bin/pytorch_inference/CResultWriter.cc b/bin/pytorch_inference/CResultWriter.cc
@@ -58,7 +58,8 @@ void CResultWriter::writeError(const std::string_view& requestId, const std::str
 void CResultWriter::wrapAndWriteInnerResponse(const std::string& innerResponse,
                                               const std::string& requestId,
                                               bool isCacheHit,
-                                              std::uint64_t timeMs) {
+                                              std::uint64_t timeMs,
+                                              std::size_t residentSize) {
     core::CBoostJsonConcurrentLineWriter jsonWriter{m_WrappedOutputStream};
     jsonWriter.onObjectBegin();
     jsonWriter.onKey(CCommandParser::REQUEST_ID);
@@ -67,6 +68,11 @@ void CResultWriter::wrapAndWriteInnerResponse(const std::string& innerResponse,
     jsonWriter.onBool(isCacheHit);
     jsonWriter.onKey(TIME_MS);
     jsonWriter.onUint64(timeMs);
+    jsonWriter.onKey(PROCESS_STATS);
+    jsonWriter.onObjectBegin();
+    jsonWriter.onKey(MEMORY_RESIDENT_SET_SIZE);
+    jsonWriter.onUint64(residentSize);
+    jsonWriter.onObjectEnd();
     jsonWriter.rawKeyAndValue(innerResponse);
     jsonWriter.onObjectEnd();
 }

diff --git a/bin/pytorch_inference/CResultWriter.h b/bin/pytorch_inference/CResultWriter.h
@@ -83,7 +83,8 @@ class CResultWriter : public TStringBufWriter {
     void wrapAndWriteInnerResponse(const std::string& innerResponse,
                                    const std::string& requestId,
                                    bool isCacheHit,
-                                   std::uint64_t timeMs);
+                                   std::uint64_t timeMs,
+                                   std::size_t residentSize);
 
     //! Write the prediction portion of an inference result.
     template<std::size_t N>

diff --git a/bin/pytorch_inference/Main.cc b/bin/pytorch_inference/Main.cc
@@ -133,9 +133,11 @@ bool handleRequest(ml::torch::CCommandParser::CRequestCacheInterface& cache,
                          }
                      },
                      [&](const auto& innerResponseJson_, bool isCacheHit) {
-                         resultWriter.wrapAndWriteInnerResponse(innerResponseJson_,
-                                                                requestId, isCacheHit,
-                                                                stopWatch.stop());
+                         std::size_t residentSetSize =
+                             ml::core::CProcessStats::residentSetSize();
+                         resultWriter.wrapAndWriteInnerResponse(
+                             innerResponseJson_, requestId, isCacheHit,
+                             stopWatch.stop(), residentSetSize);
                      });
     });
     return true;

diff --git a/bin/pytorch_inference/unittest/CResultWriterTest.cc b/bin/pytorch_inference/unittest/CResultWriterTest.cc
@@ -99,11 +99,12 @@ BOOST_AUTO_TEST_CASE(testWrapAndWriteInferenceResult) {
     std::ostringstream output;
     {
         ml::torch::CResultWriter resultWriter{output};
-        resultWriter.wrapAndWriteInnerResponse(innerPortion, "req4", true, 123);
+        resultWriter.wrapAndWriteInnerResponse(innerPortion, "req4", true, 123, 111);
     }
-    std::string expected = "[{\"request_id\":\"req4\",\"cache_hit\":true,"
-                           "\"time_ms\":123,\"result\":{\"inference\":"
-                           "[[[1.0,1.0,1.0],[1.0,1.0,1.0],[1.0,1.0,1.0],[1.0,1.0,1.0],[1.0,1.0,1.0]]]}}\n]";
+    std::string expected =
+        "[{\"request_id\":\"req4\",\"cache_hit\":true,"
+        "\"time_ms\":123,\"process_stats\":{\"memory_rss\":111},\"result\":{\"inference\":"
+        "[[[1.0,1.0,1.0],[1.0,1.0,1.0],[1.0,1.0,1.0],[1.0,1.0,1.0],[1.0,1.0,1.0]]]}}\n]";
     std::string actual = output.str();
 
     LOG_INFO(<< "expected: " << expected);

diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc
@@ -32,6 +32,7 @@
 === Enhancements
 
 * Downgrade log severity for a batch of recoverable errors. (See {ml-pull}[#2889].)
+* Add pytorch_process rss memory stat to inference response. (See {ml-pull}[#2896].)
 
 == {es} version 9.2.0