Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ optional-dependencies = { test-tools = [
"prometheus-client (>=0.0.16)",
"psycopg2-binary (>=2.9,<3)",
"requests",
"sentry-sdk (>=2.0.0,<3.0.0)",
"simplejson (>=3,<4)",
"structlog (>=24.4,<26)",
"typing_extensions",
], task-processor = [
"backoff (>=2.2.1,<3.0.0)",
"django (>4,<6)",
Expand Down
2 changes: 2 additions & 0 deletions src/common/core/constants.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
DEFAULT_PROMETHEUS_MULTIPROC_DIR_NAME = "flagsmith-prometheus"

LOGGING_DEFAULT_ROOT_LOG_LEVEL = "WARNING"
176 changes: 174 additions & 2 deletions src/common/core/logging.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,39 @@
import json
import logging
import logging.config
import os
import threading
from typing import Any

import structlog
import structlog.contextvars
import structlog.dev
import structlog.processors
import structlog.stdlib
from structlog.typing import EventDict, Processor, WrappedLogger
from typing_extensions import TypedDict

from common.core.constants import LOGGING_DEFAULT_ROOT_LOG_LEVEL

logger = logging.getLogger(__name__)


class JsonRecord(TypedDict, extra_items=Any, total=False): # type: ignore[call-arg] # TODO https://github.com/python/mypy/issues/18176
levelname: str
message: str
timestamp: str
logger_name: str
pid: int | None
thread_name: str | None
exc_info: str


class JsonFormatter(logging.Formatter):
"""Custom formatter for json logs."""

def get_json_record(self, record: logging.LogRecord) -> dict[str, Any]:
def get_json_record(self, record: logging.LogRecord) -> JsonRecord:
formatted_message = record.getMessage()
json_record = {
json_record: JsonRecord = {
"levelname": record.levelname,
"message": formatted_message,
"timestamp": self.formatTime(record, self.datefmt),
Expand All @@ -22,3 +47,150 @@ def get_json_record(self, record: logging.LogRecord) -> dict[str, Any]:

def format(self, record: logging.LogRecord) -> str:
return json.dumps(self.get_json_record(record))


def setup_logging(
log_level: str = "INFO",
log_format: str = "generic",
logging_configuration_file: str | None = None,
application_loggers: list[str] | None = None,
) -> None:
"""
Set up logging for the application.

This should be called early, before Django settings are loaded, to ensure
that all log output is properly formatted from the start.

Args:
log_level: The log level for application code (e.g. "DEBUG", "INFO").
log_format: Either "generic" or "json".
logging_configuration_file: Path to a JSON logging config file.
If provided, this takes precedence over other format options.
application_loggers: Top-level logger names for application packages.
These loggers are set to ``log_level`` while the root logger uses
``max(log_level, WARNING)`` to suppress noise from third-party
libraries. If ``log_level`` is DEBUG, everything logs at DEBUG.
"""
if logging_configuration_file:
with open(logging_configuration_file) as f:
config = json.load(f)
logging.config.dictConfig(config)
else:
log_level_int = logging.getLevelNamesMapping()[log_level.upper()]
root_level_int = logging.getLevelNamesMapping()[LOGGING_DEFAULT_ROOT_LOG_LEVEL]
# Suppress third-party noise at WARNING, but if the user requests
# DEBUG, honour that for the entire process.
effective_root_level = (
log_level_int if log_level_int < logging.INFO else root_level_int
)

dict_config: dict[str, Any] = {
"version": 1,
"disable_existing_loggers": False,
"handlers": {
"console": {
"class": "logging.StreamHandler",
"stream": "ext://sys.stdout",
"level": log_level,
},
},
"root": {
"level": effective_root_level,
"handlers": ["console"],
},
"loggers": {
name: {"level": log_level, "handlers": [], "propagate": True}
for name in application_loggers or []
},
}
logging.config.dictConfig(dict_config)

setup_structlog(log_format=log_format)


def map_event_to_json_record(
logger: WrappedLogger,
method_name: str,
event_dict: EventDict,
) -> EventDict:
"""Map structlog fields to match :class:`JsonFormatter` output schema."""
record: JsonRecord = {
"message": event_dict.pop("event", ""),
"levelname": event_dict.pop("level", "").upper(),
"logger_name": event_dict.pop("logger", ""),
"pid": os.getpid(),
"thread_name": threading.current_thread().name,
}
if "exception" in event_dict:
record["exc_info"] = event_dict.pop("exception")
# Merge remaining structlog keys (e.g. extra_key from bind()) with the
# canonical record so they appear in the JSON output.
event_dict.update(record)
return event_dict


def build_processor_formatter(log_format: str) -> structlog.stdlib.ProcessorFormatter:
"""Build a ``ProcessorFormatter`` for the given log format.

This is used both by :func:`setup_structlog` (for the root logger) and by
Gunicorn logger classes so that all log output shares the same rendering.
"""
if log_format == "json":
renderer_processors: list[Processor] = [
map_event_to_json_record,
structlog.processors.JSONRenderer(),
]
else:
renderer_processors = [
structlog.dev.ConsoleRenderer(),
]

foreign_pre_chain: list[Processor] = [
structlog.contextvars.merge_contextvars,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.stdlib.ExtraAdder(),
]

return structlog.stdlib.ProcessorFormatter(
processors=[
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
*renderer_processors,
],
foreign_pre_chain=foreign_pre_chain,
)


def setup_structlog(
log_format: str,
) -> None:
"""Configure structlog to route through stdlib logging."""
from common.core.sentry import sentry_processor

formatter = build_processor_formatter(log_format)

# Replace the formatter on existing root handlers with ProcessorFormatter.
root = logging.getLogger()
for handler in root.handlers:
handler.setFormatter(formatter)

structlog.configure(
processors=[
structlog.contextvars.merge_contextvars,
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.StackInfoRenderer(),
structlog.processors.UnicodeDecoder(),
structlog.processors.format_exc_info,
structlog.processors.TimeStamper(fmt="iso"),
sentry_processor,
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
],
wrapper_class=structlog.stdlib.BoundLogger,
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
cache_logger_on_first_use=True,
)
15 changes: 14 additions & 1 deletion src/common/core/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
)

from common.core.cli import healthcheck
from common.core.logging import setup_logging

logger = logging.getLogger(__name__)

Expand All @@ -32,7 +33,19 @@ def ensure_cli_env() -> typing.Generator[None, None, None]:
"""
ctx = contextlib.ExitStack()

# TODO @khvn26 Move logging setup to here
# Set up logging early, before Django settings are loaded.
application_loggers_env = os.environ.get("APPLICATION_LOGGERS", "")
setup_logging(
log_level=os.environ.get("LOG_LEVEL", "INFO"),
log_format=os.environ.get("LOG_FORMAT", "generic"),
logging_configuration_file=os.environ.get("LOGGING_CONFIGURATION_FILE"),
application_loggers=[
logger_name
for name in application_loggers_env.split(",")
if (logger_name := name.strip())
]
or None,
)

# Prometheus multiproc support
if not os.environ.get("PROMETHEUS_MULTIPROC_DIR"):
Expand Down
22 changes: 22 additions & 0 deletions src/common/core/sentry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import sentry_sdk
from structlog.typing import EventDict, WrappedLogger

_SKIP_CONTEXT_FIELDS = frozenset({"event", "level", "timestamp", "_record"})


def sentry_processor(
logger: WrappedLogger,
method_name: str,
event_dict: EventDict,
) -> EventDict:
"""
Structlog processor that enriches Sentry with structured context and tags.

Since structlog routes through stdlib, Sentry's LoggingIntegration
automatically captures ERROR+ logs as Sentry events. This processor
adds structured context on top of that.
"""
context = {k: v for k, v in event_dict.items() if k not in _SKIP_CONTEXT_FIELDS}
sentry_sdk.set_context("structlog", context)

return event_dict
60 changes: 50 additions & 10 deletions src/common/gunicorn/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
Statsd as StatsdGunicornLogger,
)

from common.core.logging import JsonFormatter
from common.core.logging import JsonFormatter, JsonRecord, build_processor_formatter
from common.gunicorn import metrics
from common.gunicorn.constants import (
WSGI_EXTRA_PREFIX,
Expand All @@ -21,7 +21,30 @@
from common.gunicorn.utils import get_extra


class GunicornAccessLogJsonRecord(JsonRecord, extra_items=Any, total=False): # type: ignore[call-arg] # TODO https://github.com/python/mypy/issues/18176
time: str
path: str
remote_ip: str
route: str
method: str
status: str
user_agent: str
duration_in_ms: int
response_size_in_bytes: int


class GunicornAccessLogJsonFormatter(JsonFormatter):
"""JSON formatter for Gunicorn access logs.

Extends :class:`~common.core.logging.JsonFormatter` with request-specific
fields extracted from Gunicorn's ``LogRecord.args`` dict.

This uses a stdlib ``Formatter`` rather than a structlog
``ProcessorFormatter`` because Gunicorn populates ``record.args`` with
its own format variables — working with the ``LogRecord`` directly is
the natural abstraction for this data.
"""

def _get_extra(self, record_args: dict[str, Any]) -> dict[str, Any]:
ret: dict[str, dict[str, Any]] = {}

Expand All @@ -31,7 +54,7 @@ def _get_extra(self, record_args: dict[str, Any]) -> dict[str, Any]:
# Gunicorn's access log format string for
# request headers, response headers and environ variables
# without the % prefix, e.g. "{origin}i" or "{flagsmith.environment_id}e"
# https://docs.gunicorn.org/en/stable/settings.html#access-log-format
# https://gunicorn.org/reference/settings/#access_log_format
for extra_key in extra_items_to_log:
extra_key_lower = extra_key.lower()
if (
Expand All @@ -49,7 +72,7 @@ def _get_extra(self, record_args: dict[str, Any]) -> dict[str, Any]:

return ret

def get_json_record(self, record: logging.LogRecord) -> dict[str, Any]:
def get_json_record(self, record: logging.LogRecord) -> GunicornAccessLogJsonRecord:
args = record.args

if TYPE_CHECKING:
Expand All @@ -61,6 +84,7 @@ def get_json_record(self, record: logging.LogRecord) -> dict[str, Any]:

return {
**super().get_json_record(record),
**self._get_extra(args), # type: ignore[typeddict-item] # TODO https://github.com/python/mypy/issues/18176
"time": datetime.strptime(args["t"], "[%d/%b/%Y:%H:%M:%S %z]").isoformat(),
"path": url,
"remote_ip": args["h"],
Expand All @@ -70,11 +94,12 @@ def get_json_record(self, record: logging.LogRecord) -> dict[str, Any]:
"user_agent": args["a"],
"duration_in_ms": args["M"],
"response_size_in_bytes": args["B"] or 0,
**self._get_extra(args),
}


class PrometheusGunicornLogger(StatsdGunicornLogger): # type: ignore[misc]
"""Gunicorn logger that records Prometheus metrics on each access log entry."""

def access(
self,
resp: Response,
Expand Down Expand Up @@ -104,17 +129,32 @@ def access(


class GunicornJsonCapableLogger(PrometheusGunicornLogger):
"""Gunicorn logger that aligns formatting with the application logging setup.

Gunicorn manages its own loggers (``gunicorn.error``, ``gunicorn.access``)
with ``propagate=False``, so they bypass the root handler configured by
:func:`~common.core.logging.setup_logging`. This class bridges that gap:

* **Error log** — receives a ``ProcessorFormatter`` (via
:func:`~common.core.logging.build_processor_formatter`) so that
Gunicorn's operational messages share the same format and ISO timestamps
as application logs.
* **Access log (JSON mode)** — receives a
:class:`GunicornAccessLogJsonFormatter` that produces structured JSON
with request-specific fields. In generic mode the access log keeps
Gunicorn's default CLF format.
"""

def setup(self, cfg: Config) -> None:
super().setup(cfg)
if getattr(settings, "LOG_FORMAT", None) == "json":
self._set_handler(
self.error_log,
cfg.errorlog,
JsonFormatter(),
)
log_format = getattr(settings, "LOG_FORMAT", "generic")
if log_format == "json":
self._set_handler(
self.access_log,
cfg.accesslog,
GunicornAccessLogJsonFormatter(),
stream=sys.stdout,
)
formatter = build_processor_formatter(log_format)
for handler in self.error_log.handlers:
handler.setFormatter(formatter)
Loading
Loading