getsentry · victoria-yining-huang · Dec 9, 2025 · Dec 16, 2025 · Dec 16, 2025 · Dec 17, 2025
@@ -13,6 +13,9 @@
                 },
                 "metrics": {
                     "$ref": "#/definitions/Metrics"
+                },
+                "streaming_platform_dsn": {
+                    "type": "string"
                 }
             }
         },

@@ -1,10 +1,13 @@
 import importlib
 import json
 import logging
-from typing import Any, Optional, cast
+import multiprocessing
+import sys
+from typing import Any, Mapping, Optional, cast
 
 import click
 import jsonschema
+import sentry_sdk
 import yaml
 
 from sentry_streams.adapters.loader import load_adapter
@@ -59,13 +62,36 @@ def iterate_edges(
                     step_streams[branch_name] = next_step_stream[branch_name]
 
 
+def _load_pipeline_in_process(application: str) -> Pipeline[Any]:
+    """
+    Worker function that runs in a separate process to load the pipeline.
+    Returns the Pipeline object directly, or raises an exception on error.
+
+    Customer code exceptions are allowed to propagate naturally so that the customer's
+    Sentry SDK (if initialized) can capture them.
+    """
+    import contextlib
+
+    pipeline_globals: dict[str, Any] = {}
+
+    with contextlib.redirect_stdout(sys.stderr):
+        with open(application, "r") as f:
+            exec(f.read(), pipeline_globals)
+
+    if "pipeline" not in pipeline_globals:
+        raise ValueError("Application file must define a 'pipeline' variable")
+
+    pipeline = cast(Pipeline[Any], pipeline_globals["pipeline"])
+    return pipeline
+
+
 def load_runtime(
     name: str,
     log_level: str,
     adapter: str,
-    config: str,
     segment_id: Optional[str],
     application: str,
+    environment_config: Mapping[str, Any],
 ) -> Any:
 
     logging.basicConfig(
@@ -74,22 +100,13 @@ def load_runtime(
         datefmt="%Y-%m-%d %H:%M:%S",
     )
 
-    pipeline_globals: dict[str, Any] = {}
-
-    with open(application) as f:
-        exec(f.read(), pipeline_globals)
-
-    with open(config, "r") as config_file:
-        environment_config = yaml.safe_load(config_file)
-
-    config_template = importlib.resources.files("sentry_streams") / "config.json"
-    with config_template.open("r") as file:
-        schema = json.load(file)
-
-        try:
-            jsonschema.validate(environment_config, schema)
-        except Exception:
-            raise
+    # Execute the application in a subprocess to build the pipeline
+    # The subprocess will return the pipeline object or raise an exception
+    # Note: Customer print() and logging statements (redirected to stderr)
+    # do not trigger platform Sentry alerts.
+    with multiprocessing.Pool(processes=1) as pool:
+        pipeline: Pipeline[Any] = pool.apply(_load_pipeline_in_process, (application,))
+        logger.info("Successfully loaded pipeline from subprocess")
 
     metric_config = environment_config.get("metrics", {})
     if metric_config.get("type") == "datadog":
@@ -113,7 +130,6 @@ def load_runtime(
         metric_config = {}
 
     assigned_segment_id = int(segment_id) if segment_id else None
-    pipeline: Pipeline[Any] = pipeline_globals["pipeline"]
     runtime: Any = load_adapter(adapter, environment_config, assigned_segment_id, metric_config)
     translator = RuntimeTranslator(runtime)
 
@@ -177,7 +193,24 @@ def main(
     segment_id: Optional[str],
     application: str,
 ) -> None:
-    runtime = load_runtime(name, log_level, adapter, config, segment_id, application)
+    with open(config, "r") as config_file:
+        environment_config = yaml.safe_load(config_file)
+    config_template = importlib.resources.files("sentry_streams") / "config.json"
+    with config_template.open("r") as file:
+        schema = json.load(file)
+
+        try:
+            jsonschema.validate(environment_config, schema)
+        except Exception:
+            raise
+
+    streaming_platform_dsn = environment_config.get("streaming_platform_dsn")
+    if streaming_platform_dsn:
+        sentry_sdk.init(
+            dsn=streaming_platform_dsn,
+            send_default_pii=True,
+        )
+    runtime = load_runtime(name, log_level, adapter, segment_id, application, environment_config)
     runtime.run()
 
 

@@ -74,16 +74,33 @@ pub fn run(args: Args) -> Result<(), Box<dyn std::error::Error>> {
     })?;
 
     let runtime: Py<PyAny> = traced_with_gil!(|py| {
+        // Read and parse the config file as YAML to create environment_config
+        let yaml_module = py.import("yaml")?;
+        let config_path = runtime_config
+            .config_file
+            .to_str()
+            .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("Invalid config file path"))?;
+
+        let config_file = std::fs::File::open(config_path).map_err(|e| {
+            pyo3::exceptions::PyIOError::new_err(format!("Failed to open config file: {}", e))
+        })?;
+        let config_reader = std::io::BufReader::new(config_file);
+        let config_str = std::io::read_to_string(config_reader).map_err(|e| {
+            pyo3::exceptions::PyIOError::new_err(format!("Failed to read config file: {}", e))
+        })?;
+
+        let environment_config = yaml_module.getattr("safe_load")?.call1((config_str,))?;
+
         let runtime = py
             .import("sentry_streams.runner")?
             .getattr("load_runtime")?
             .call1((
                 runtime_config.name,
                 runtime_config.log_level,
                 runtime_config.adapter_name,
-                runtime_config.config_file,
                 runtime_config.segment_id,
                 runtime_config.application_name,
+                environment_config,
             ))?
             .unbind();
         PyResult::Ok(runtime)

@@ -0,0 +1,148 @@
+from typing import Any, Generator, List, Optional
+from unittest.mock import patch
+
+import pytest
+import sentry_sdk
+from sentry_sdk.transport import Transport
+
+from sentry_streams.pipeline.pipeline import Pipeline
+from sentry_streams.runner import load_runtime
+
+
+class CaptureTransport(Transport):
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.events: List[Any] = []
+        self.envelopes: List[Any] = []
+
+    def capture_event(self, event: Any) -> None:
+        self.events.append(event)
+        return None
+
+    def capture_envelope(self, envelope: Any) -> None:
+        self.envelopes.append(envelope)
+        return None
+
+    def flush(self, timeout: float, callback: Optional[Any] = None) -> None:
+        """Flush is called when SDK shuts down."""
+        pass
+
+
+@pytest.fixture
+def temp_fixture_dir(tmp_path: Any) -> Any:
+    fixture_dir = tmp_path / "fixtures"
+    fixture_dir.mkdir()
+    return fixture_dir
+
+
+@pytest.fixture(autouse=True)
+def reset_metrics_backend() -> Generator[None, None, None]:
+    """Reset the global metrics backend between tests."""
+    from sentry_streams import metrics
+
+    try:
+        from arroyo.utils import metrics as arroyo_metrics
+
+        has_arroyo = True
+    except ImportError:
+        has_arroyo = False
+
+    # Reset before each test
+    metrics.metrics._metrics_backend = None
+    if has_arroyo:
+        arroyo_metrics._metrics_backend = None
+
+    yield
+
+    # Reset to None after each test
+    metrics.metrics._metrics_backend = None
+    if has_arroyo:
+        arroyo_metrics._metrics_backend = None
+
+
+@pytest.fixture
+def platform_transport() -> CaptureTransport:
+    transport = CaptureTransport()
+    # Clear any existing Sentry client
+    sentry_sdk.get_client().close()
+    return transport
+
+
+def test_multiprocess_pipe_communication_success(
+    platform_transport: CaptureTransport, temp_fixture_dir: Any
+) -> None:
+    sentry_sdk.init(
+        dsn="https://platform@example.com/456",
+        transport=platform_transport,
+    )
+
+    app_file = temp_fixture_dir / "simple_app.py"
+    app_file.write_text(
+        """
+from sentry_streams.pipeline import streaming_source
+pipeline = streaming_source(name="test", stream_name="test-stream")
+"""
+    )
+
+    with (
+        patch("sentry_streams.runner.load_adapter") as mock_load_adapter,
+        patch("sentry_streams.runner.iterate_edges") as mock_iterate_edges,
+    ):
+        mock_runtime = type(
+            "MockRuntime",
+            (),
+            {
+                "run": lambda self: None,
+                "source": lambda self, step: "mock_stream",
+                "complex_step_override": lambda self: {},
+            },
+        )()
+        mock_load_adapter.return_value = mock_runtime
+
+        runtime = load_runtime(
+            name="test",
+            log_level="INFO",
+            adapter="arroyo",
+            segment_id=None,
+            application=str(app_file),
+            environment_config={"metrics": {"type": "dummy"}},
+        )
+
+        assert runtime is not None
+
+        mock_iterate_edges.assert_called_once()
+        pipeline_arg = mock_iterate_edges.call_args[0][0]  # First positional argument
+        assert isinstance(pipeline_arg, Pipeline)
+
+
+def test_subprocess_sends_error_status_with_details(
+    platform_transport: CaptureTransport, temp_fixture_dir: Any
+) -> None:
+    """Test that detailed error messages are captured when subprocess sends status='error'."""
+    sentry_sdk.init(
+        dsn="https://platform@example.com/456",
+        transport=platform_transport,
+    )
+
+    # Create an app file that doesn't define 'pipeline' variable
+    app_file = temp_fixture_dir / "missing_pipeline.py"
+    app_file.write_text(
+        """
+from sentry_streams.pipeline import streaming_source
+# Intentionally not defining 'pipeline' variable
+my_pipeline = streaming_source(name="test", stream_name="test-stream")
+"""
+    )
+
+    with pytest.raises(ValueError) as exc_info:
+        load_runtime(
+            name="test",
+            log_level="INFO",
+            adapter="arroyo",
+            segment_id=None,
+            application=str(app_file),
+            environment_config={"metrics": {"type": "dummy"}},
+        )
+
+    assert "Application file must define a 'pipeline' variable" in str(exc_info.value)