Skip to content
69 changes: 69 additions & 0 deletions .github/workflows/acquisition-validation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
name: check acquisition datasource
on:
push:
branches:
- master
- docker/schema
pull_request:
branches:
- master

jobs:
datasource-validation:
runs-on: ubuntu-latest
steps:
- name: checkout repo
uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: "stable"

- name: Install yq
uses: mikefarah/yq@v4.50.1
with:
cmd: yq --version

- name: cleanup json
run: find ./pkg/acquisition -type f -name '*json' -exec rm {} \;

- name: transform to json all datasource schema
uses: mikefarah/yq@v4.50.1
with:
cmd: |
set -euxo pipefail
find './pkg/acquisition' -type f -name '*schema.yaml' -print0 |
while IFS= read -r -d '' f; do
yq -o=json "$f" > "${f%.yaml}.json"
done

- name: split YAML to per-doc JSON (no jq)
run: |
set -euxo pipefail

find . \
\( -path './pkg/acquisition/schema/valid/*' -o -path './pkg/acquisition/schema/invalid/*' \) \
-type f -name '*.yaml' -print0 |
while IFS= read -r -d '' f; do
base="${f%.yaml}"
i=0
while :; do
out="${base}.${i}.json"
yq -o=json 'select(documentIndex == '"$i"')' "$f" > "$out" || true
if [ ! -s "$out" ]; then rm -f "$out"; break; fi
i=$((i+1))
done
echo "split $f -> ${i} JSON doc(s)"
done
- name: debug
run: |
find .
- name: validate datasources against schemas
run: |
set -euxo pipefail
sed -i 's/\.yaml/.json/' ./pkg/acquisition/schema/datasource_schema.json
go install github.com/santhosh-tekuri/jsonschema/cmd/jv@latest
JV="$(go env GOPATH)/bin/jv"
for item in ./pkg/acquisition/schema/valid/*.json; do
echo validating "$item"
"$JV" ./pkg/acquisition/schema/datasource_schema.json "$item"
done
194 changes: 194 additions & 0 deletions pkg/acquisition/modules/docker/docker_schema.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
$schema: https://json-schema.org/draft/2020-12/schema
title: CrowdSec Docker datasource
description: >
Schema for docker acquisition entries consumed by CrowdSec. Every field
mirrors pkg/acquisition/modules/docker.DockerConfiguration and the embedded
configuration.DataSourceCommonCfg.
type: object
additionalProperties: false
properties:
source:
type: string
const: docker
description: >
Must be docker to bind this acquisition entry to the Docker datasource.
mode:
type: string
enum: [tail, cat]
default: tail
description: >
Acquisition mode (tail streams logs, cat performs a finite read).
labels:
type: object
minProperties: 1
description: >
Labels attached to emitted events (for example type: nginx).
additionalProperties:
type: string
properties:
type:
type: string
description: Parser/collection selector; strongly recommended.
log_level:
type: string
enum: [panic, fatal, error, warn, warning, info, debug, trace]
description: >
Overrides the module logger level for this datasource.
name:
type: string
description: Friendly identifier for the datasource entry.
use_time_machine:
type: boolean
default: false
description: >
Replays past events when supported by the acquisition module.
unique_id:
type: string
description: >
Stable identifier injected by cscli/crowdsec autop-run (usually not user set).
transform:
type: string
description: >
expr program applied to events before they enter the pipeline.
check_interval:
type: string
pattern: "^[0-9]+(ns|us|ms|s|m|h)$"
description: >
Poll interval used by DSN-driven oneshot mode (deprecated in streaming configs).
follow_stdout:
type: boolean
default: true
description: >
Stream stdout logs from matching containers/services.
follow_stderr:
type: boolean
default: true
description: >
Stream stderr logs from matching containers/services.
since:
type: string
format: date-time
description: >
RFC3339 lower-bound timestamp; defaults to the current UTC time if omitted.
until:
type: string
format: date-time
description: >
RFC3339 upper-bound timestamp for finite reads.
docker_host:
type: string
description: >
Optional Docker API endpoint (unix://, tcp:// or npipe://). Defaults to client.FromEnv.
container_name:
$ref: "#/$defs/identifierList"
description: Exact container names to follow.
container_id:
$ref: "#/$defs/identifierList"
description: Exact container IDs to follow.
container_name_regexp:
$ref: "#/$defs/regexpList"
description: Go regular expressions to match container names.
container_id_regexp:
$ref: "#/$defs/regexpList"
description: Go regular expressions to match container IDs.
service_name:
$ref: "#/$defs/identifierList"
description: Exact Swarm service names to follow.
service_id:
$ref: "#/$defs/identifierList"
description: Exact Swarm service IDs to follow.
service_name_regexp:
$ref: "#/$defs/regexpList"
description: Go regular expressions to match service names.
service_id_regexp:
$ref: "#/$defs/regexpList"
description: Go regular expressions to match service IDs.
use_container_labels:
type: boolean
default: false
description: >
Populate CrowdSec labels from Docker container labels. Mutually exclusive
with explicit container selectors.
use_service_labels:
type: boolean
default: false
description: >
Populate CrowdSec labels from Docker service labels. Mutually exclusive
with explicit service selectors.
required:
- source
allOf:
- description: >
At least one explicit selector or label-driven selector is required,
matching hasContainerConfig/hasServiceConfig.
anyOf:
- required: [container_name]
- required: [container_id]
- required: [container_name_regexp]
- required: [container_id_regexp]
- required: [service_name]
- required: [service_id]
- required: [service_name_regexp]
- required: [service_id_regexp]
- properties:
use_container_labels:
const: true
required: [use_container_labels]
- properties:
use_service_labels:
const: true
required: [use_service_labels]
- if:
properties:
use_container_labels:
const: true
required: [use_container_labels]
then:
not:
anyOf:
- required: [container_name]
- required: [container_id]
- required: [container_name_regexp]
- required: [container_id_regexp]
- if:
properties:
use_service_labels:
const: true
required: [use_service_labels]
then:
not:
anyOf:
- required: [service_name]
- required: [service_id]
- required: [service_name_regexp]
- required: [service_id_regexp]
examples:
- source: docker
mode: tail
labels:
type: nginx
container_name:
- web
follow_stderr: false
- source: docker
mode: cat
labels:
type: traefik
use_service_labels: true
follow_stdout: true
$defs:
identifierList:
type: array
minItems: 1
uniqueItems: true
items:
type: string
minLength: 1
regexpList:
type: array
minItems: 1
uniqueItems: true
items:
type: string
minLength: 1
format: regex
8 changes: 8 additions & 0 deletions pkg/acquisition/schema/datasource_schema.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
$schema: https://json-schema.org/draft/2020-12/schema
title: CrowdSec Docker datasource
description: >
Schema for acquisition entries consumed by CrowdSec. Every field
mirrors the configuration of at least one acquisition module and the embedded
configuration DataSourceCommonCfg.
anyOf:
- $ref: ../modules/docker/docker_schema.yaml
31 changes: 31 additions & 0 deletions pkg/acquisition/schema/valid/docker.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
source: docker
container_name:
- my_container_name
container_id:
- 843ee92d231b
labels:
type: log_type
---
source: docker
container_name_regexp:
- my_containers_*
container_id_regexp:
- i-*
labels:
type: log_type
---
source: docker
service_name:
- my_service_name
service_id:
- abcdef123456
labels:
type: log_type
---
source: docker
service_name_regexp:
- web_*
service_id_regexp:
- svc-*
labels:
type: log_type