Production-leaning remediation engine for Linux fleets. Safety, speed, auditability.
Verified builds and checksums. Air-gapped installers available for Enterprise.
ghcr.io/ganges/selfheal:0.1.0
version: "3.9"
services:
zookeeper:
image: bitnami/zookeeper:3.9
kafka:
image: bitnami/kafka:3.7
selfheal:
image: ghcr.io/ganges/selfheal:0.1.0
environment:
- BOOTSTRAP_SERVERS=kafka:9092
- OPENAI_API_KEY=${OPENAI_API_KEY}
ports:
- "8000:8000"
# 1) Install package
sudo dpkg -i selfheal_0.0.9_amd64.deb # or rpm -ivh selfheal-0.0.9.x86_64.rpm
# 2) Configure
sudo mkdir -p /etc/selfheal
sudo tee /etc/selfheal/config.yml <<'YAML'
bootstrap_servers: 172.28.1.20:9092
log_level: INFO
allowlist:
- name: restart_apache
cmd: ["systemctl","restart","apache2"]
YAML
# 3) Enable service
sudo systemctl enable --now selfheal
# 4) Verify
curl -s http://localhost:8000/healthz
User overrides live in /etc/selfheal/config.yml. Runtime flags via env vars. Supports dry-run and verbosity levels.
# /etc/selfheal/config.yml
bootstrap_servers: 172.28.1.20:9092
consumer_group: selfheal-actions-v1
api:
host: 0.0.0.0
port: 8000
llm:
provider: openai
model: gpt-5-mini
max_tokens: 400
safety:
dry_run: false
allowlist:
- name: restart_apache
cmd: ["systemctl","restart","apache2"]
logging:
level: INFO
path: /var/log/selfheal/selfheal.log
POST /ingress/webhook
{
"receiver": "web.hook",
"status": "firing",
"alerts": [
{
"status": "firing",
"labels": {
"alertname": "apacheServiceDown",
"instance": "172.18.35.51:9100",
"job": "node_exporter_metrics",
"name": "apache2.service",
"state": "inactive"
},
"annotations": {
"summary": "Apache service not running on 172.18.35.51"
}
}
]
}