Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions docs/configuration/sinks/slack.rst
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,31 @@ Add the following to your ``values.yaml`` file and upgrade:
value: "https://api.robusta.dev/slack/"


Handling Slack Rate Limits
-------------------------------------------------------------------

Slack's Web API rate-limits calls to ``chat.postMessage`` (roughly one message per
second per channel). When the limit is exceeded, Slack returns HTTP ``429 Too Many
Requests`` with a ``Retry-After`` header (in seconds). Robusta's Slack sink uses
the official ``slack-sdk`` built-in ``RateLimitErrorRetryHandler``, which transparently
waits for the duration specified by ``Retry-After`` and retries the request.

If you see the error ``ratelimited`` in the runner logs, or notifications are being
dropped during alert bursts, you can increase the number of retry attempts with the
``SLACK_RATE_LIMIT_RETRIES`` environment variable on the runner pod (default: ``3``).

.. code-block:: yaml

runner:
additional_env_vars:
- name: SLACK_RATE_LIMIT_RETRIES
value: "5"

Higher values make the sink more resilient to sustained rate limiting at the cost
of longer delays between a notification being generated and being delivered — each
retry waits for the ``Retry-After`` value returned by Slack (typically a few seconds,
but can be longer under heavy throttling).

Redirect to Platform
-------------------------------------------------------------------

Expand Down
1 change: 1 addition & 0 deletions src/robusta/core/model/env_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def load_bool(env_var, default: bool):
TELEMETRY_PERIODIC_SEC = int(os.environ.get("TELEMETRY_PERIODIC_SEC", 60 * 60 * 24)) # 24H

SLACK_REQUEST_TIMEOUT = int(os.environ.get("SLACK_REQUEST_TIMEOUT", 90))
SLACK_RATE_LIMIT_RETRIES = int(os.environ.get("SLACK_RATE_LIMIT_RETRIES", 2))
SLACK_TABLE_COLUMNS_LIMIT = int(os.environ.get("SLACK_TABLE_COLUMNS_LIMIT", 3))
SLACK_FORWARD_URL = os.environ.get("SLACK_FORWARD_URL") # forward endpoint "https://api.robusta.dev/slack/"
DISCORD_TABLE_COLUMNS_LIMIT = int(os.environ.get("DISCORD_TABLE_COLUMNS_LIMIT", 4))
Expand Down
13 changes: 12 additions & 1 deletion src/robusta/integrations/slack/sender.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,14 @@
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
from slack_sdk.http_retry import all_builtin_retry_handlers
from slack_sdk.http_retry.builtin_handlers import RateLimitErrorRetryHandler
from robusta.core.sinks.slack.templates.template_loader import template_loader

from robusta.core.model.env_vars import (
ADDITIONAL_CERTIFICATE,
HOLMES_ASK_SLACK_BUTTON_ENABLED,
HOLMES_ENABLED,
SLACK_RATE_LIMIT_RETRIES,
SLACK_REQUEST_TIMEOUT,
SLACK_TABLE_COLUMNS_LIMIT,
SLACK_FORWARD_URL,
Expand Down Expand Up @@ -57,6 +59,15 @@
MENTION_PATTERN = re.compile(r"<[^>]+>")


def _build_retry_handlers():
handlers = all_builtin_retry_handlers()
return [
RateLimitErrorRetryHandler(max_retry_count=SLACK_RATE_LIMIT_RETRIES)
if isinstance(h, RateLimitErrorRetryHandler) else h
for h in handlers
]


class SlackSender:
verified_api_tokens: Set[str] = set()
channel_name_to_id = {}
Expand All @@ -77,7 +88,7 @@ def __init__(self, slack_token: str, account_id: str, cluster_name: str, signing
token=slack_token,
ssl=ssl_context,
timeout=SLACK_REQUEST_TIMEOUT,
retry_handlers=all_builtin_retry_handlers(),
retry_handlers=_build_retry_handlers(),
base_url=SLACK_FORWARD_URL or WebClient.BASE_URL
)
if SLACK_FORWARD_URL:
Expand Down
Loading