diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 4075406..7b05b13 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -21,6 +21,7 @@ make setup-local # Install dependencies locally make test # Run test suite make test-docker # Run tests in Docker container make lint # Check code style with StandardRB +make docs-config # Regenerate docs/configuration.md from docs/config-schema.json make fix # Auto-fix code style issues make console # Start interactive Ruby console make clean # Remove generated files @@ -65,10 +66,17 @@ This uses the repo's `.venv` for pre-commit. If you already have `pre-commit` on bundle exec bin/nerve --help # Validate config -bundle exec bin/nerve -c example/nerve.conf.json --check +bundle exec bin/nerve -c example/nerve.conf.json --check-config # Run with config (requires ZooKeeper) bundle exec bin/nerve -c example/nerve.conf.json + +# Run with config + CLI overrides +bundle exec bin/nerve \ + -c example/nerve.conf.json \ + --instance-id my-host \ + --prometheus-enabled=true \ + --statsd-host statsd.local ``` ## Project Structure diff --git a/Makefile b/Makefile index 0f27ec8..327945c 100644 --- a/Makefile +++ b/Makefile @@ -55,6 +55,10 @@ test-docker: ## Run tests in Docker container lint: ## Check code style with StandardRB $(BUNDLE) exec standardrb +.PHONY: docs-config +docs-config: ## Regenerate configuration reference from JSON schema + $(BUNDLE) exec ruby scripts/generate_config_docs.rb + .PHONY: fix fix: ## Auto-fix code style issues $(BUNDLE) exec standardrb --fix diff --git a/README.md b/README.md index 297b4e0..4399249 100644 --- a/README.md +++ b/README.md @@ -43,81 +43,19 @@ for your infra/apps. ## Configuration ## -Nerve depends on a single configuration file, in json format. -It is usually called `nerve.conf.json`. -An example config file is available in `example/nerve.conf.json`. -The config file is composed of two main sections: +Nerve builds its effective configuration from three sources: -* `instance_id`: the name nerve will submit when registering services; makes debugging easier -* `heartbeat_path`: a path to a file on disk to touch as nerve makes progress. This allows you to work around https://github.com/zk-ruby/zk/issues/50 by restarting a stuck nerve. -* `services`: the hash (from service name to config) of the services nerve will be monitoring -* `service_conf_dir`: path to a directory in which each json file will be interpreted as a service with the basename of the file minus the .json extension +* Base config file (`--config`) +* Optional overlay config (`--config-overlay`, deep-merged) +* Optional CLI overrides (applied last) -### Services Config ### +Start with `example/nerve.conf.json` for a working baseline. -Each service that nerve will be monitoring is specified in the `services` hash. -The key is the name of the service, and the value is a configuration hash telling nerve how to monitor the service. -The configuration contains the following options: +For the full reference, including merge semantics, precedence, CLI flag scope, +and service/check/reporter fields, see: -* `host`: the default host on which to make service checks; you should make this your *public* ip to ensure your service is publicly accessible -* `port`: the default port for service checks; nerve will report the `host`:`port` combo via your chosen reporter -* `reporter_type`: the mechanism used to report up/down information; depending on the reporter you choose, additional parameters may be required. Defaults to `zookeeper` -* `check_interval`: the frequency with which service checks will be initiated; defaults to `500ms` -* `check_mocked`: whether or not health check is mocked, the host check always returns healthy and report up when the value is true -* `checks`: a list of checks that nerve will perform; if all of the pass, the service will be registered; otherwise, it will be un-registered -* `rate_limiting` (optional): a hash containing the configuration for rate limiting (see 'Rate Limiting' below) -* `weight` (optional): a positive integer weight value which can be used to affect the haproxy backend weighting in synapse. -* `haproxy_server_options` (optional): a string containing any special haproxy server options for this service instance. For example if you wanted to set a service instance as a backup. -* `labels` (optional): an object containing user-defined key-value pairs that describe this service instance. For example, you could label service instances with datacenter information. - -#### Rate Limiting #### - -Rate limiting is configured in the `rate_limiting` hash. If enabled, rate limiting is done via the [Token-Bucket algorithm](https://en.wikipedia.org/wiki/Token_bucket). -That hash contains the following values: - -* `shadow_mode` (optional): shadow mode emits metrics/logs for rate limiting, but does not actually throttle requests (defaults to `true`). Set to `false` to throttle requests. -* `average_rate` (optional): enforced average rate limit for reporting (defaults to `infinity`) -* `max_burst` (optional): enforced maximum burst for reporting (defaults to `infinity`) - -#### Zookeeper Reporter #### - -If you set your `reporter_type` to `"zookeeper"` you should also set these parameters: - -* `zk_hosts`: a list of the zookeeper hosts comprising the [ensemble](https://zookeeper.apache.org/doc/r3.1.2/zookeeperAdmin.html#sc_zkMulitServerSetup) that nerve will submit registration to -* `zk_path`: the path (or [znode](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_zkDataModel_znodes)) where the registration will be created; nerve will create the [ephemeral node](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#Ephemeral+Nodes) that is the registration as a child of this path -* `use_path_encoding`: flag to turn on path encoding optimization, the canonical config data at host level (e.g. ip, port, az) is encoded using json base64 and written as zk child name, the zk child data will still be written for backward compatibility - -#### Etcd Reporter #### - -Note: Etcd support is currently experimental! - -If you set your `reporter_type` to `"etcd"` you should also set these parameters: - -* `etcd_host`: etcd host that nerve will submit registration to -* `etcd_port`: port to connect to etcd. -* `etcd_path`: the path where the registration will be created; nerve will create a node with a 30s ttl that is the registration as a child of this path, and then update it every few seconds - -### Checks ### - -The core of nerve is a set of service checks. -Each service can define a number of checks, and all of them must pass for the service to be registered. -Although the exact parameters passed to each check are different, all take a number of common arguments: - -* `type`: (required) the kind of check; you can see available check types in the `lib/nerve/service_watcher` dir of this repo -* `name`: (optional) a descriptive, human-readable name for the check; it will be auto-generated based on the other parameters if not specified -* `host`: (optional) the host on which the check will be performed; defaults to the `host` of the service to which the check belongs -* `port`: (optional) the port on which the check will be performed; like `host`, it defaults to the `port` of the service -* `timeout`: (optional) maximum time the check can take; defaults to `100ms` -* `rise`: (optional) how many consecutive checks must pass before the check is considered passing; defaults to 1 -* `fall`: (optional) how many consecutive checks must fail before the check is considered failing; defaults to 1 - -#### Custom External Checks #### - -If you would like to run a custom check but don't feel like trying to get it merged into this project, there is a mechanism for including external checks thanks to @bakins (airbnb/nerve#36). -Build your custom check as a separate gem and make sure to `bundle install` it on your system. - -Ideally, you should name your gem `"nerve-watcher-#{type}"`, as that is what nerve will `require` on boot. -However, if you have a custom name for your gem, you can specify that in the `module` argument to the check. +* [`docs/configuration.md`](docs/configuration.md) +* [`docs/config-schema.json`](docs/config-schema.json) ## Contributing diff --git a/docs/config-schema.json b/docs/config-schema.json new file mode 100644 index 0000000..7186339 --- /dev/null +++ b/docs/config-schema.json @@ -0,0 +1,426 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/Yelp/nerve/blob/master/docs/config-schema.json", + "title": "Nerve configuration", + "description": "Schema for Nerve base configuration, optional overlay configuration, and CLI-overridable fields.", + "type": "object", + "additionalProperties": true, + "required": [ + "instance_id", + "services" + ], + "x-config-sources": [ + { + "name": "Base config file", + "flag": "--config", + "required": true, + "description": "Primary YAML/JSON configuration file." + }, + { + "name": "Overlay config file", + "flag": "--config-overlay", + "required": false, + "description": "Optional deep-merge overlay loaded after the base config." + }, + { + "name": "CLI overrides", + "required": false, + "description": "Typed flags applied after config files." + } + ], + "x-config-precedence": [ + { + "source": "CLI overrides", + "description": "Highest precedence. Explicit runtime overrides for selected fields." + }, + { + "source": "Overlay config (`--config-overlay`)", + "description": "Deep-merged on top of the base config after service file loading." + }, + { + "source": "Base config (`--config`)", + "description": "Lowest precedence baseline configuration." + } + ], + "x-load-order": [ + "Parse the base config from --config.", + "Resolve service_conf_dir in this order: CLI override, overlay, base config.", + "Load per-service YAML/JSON files from service_conf_dir into services.", + "Deep-merge overlay config on top of the base+service config.", + "Apply CLI overrides (including --instance-id aliases and typed global flags)." + ], + "x-doc-sections": [ + { + "title": "Top-level fields", + "pointer": "#" + }, + { + "title": "StatsD fields", + "pointer": "#/properties/statsd" + }, + { + "title": "Prometheus fields", + "pointer": "#/properties/prometheus" + }, + { + "title": "Service fields", + "pointer": "#/$defs/service" + }, + { + "title": "Rate limiting fields", + "pointer": "#/$defs/rate_limiting" + }, + { + "title": "Check fields", + "pointer": "#/$defs/check" + } + ], + "x-cli-examples": [ + [ + "bundle exec bin/nerve \\", + " --config /etc/nerve/nerve.conf.json \\", + " --config-overlay /etc/nerve/overlay.yaml \\", + " --instance-id host-123 \\", + " --prometheus-enabled=true \\", + " --prometheus-histogram-buckets-zk='[0.01, 0.1, 1.0]'" + ], + [ + "bundle exec bin/nerve \\", + " --config /etc/nerve/nerve.conf.json \\", + " --statsd-host statsd.local \\", + " --statsd-port 8125 \\", + " --max-repeated-report-failures 20" + ] + ], + "properties": { + "instance_id": { + "type": "string", + "description": "Instance identifier reported to service discovery backends.", + "x-cli-flags": [ + "--instance-id", + "--instance_id", + "-i" + ] + }, + "services": { + "type": "object", + "description": "Map of service name to service configuration.", + "default": {}, + "additionalProperties": { + "$ref": "#/$defs/service" + } + }, + "heartbeat_path": { + "type": "string", + "description": "Optional path for a heartbeat file touched on each main loop iteration.", + "x-cli-flags": [ + "--heartbeat-path" + ] + }, + "service_conf_dir": { + "type": "string", + "description": "Optional directory containing per-service YAML/JSON config files.", + "x-cli-flags": [ + "--service-conf-dir" + ] + }, + "max_repeated_report_failures": { + "type": "integer", + "minimum": 1, + "description": "Maximum consecutive reporter failures before stopping a watcher.", + "x-cli-flags": [ + "--max-repeated-report-failures" + ] + }, + "statsd": { + "type": "object", + "description": "Global StatsD client settings.", + "additionalProperties": true, + "properties": { + "host": { + "type": "string", + "description": "StatsD host.", + "x-cli-flags": [ + "--statsd-host" + ] + }, + "port": { + "type": "integer", + "description": "StatsD port.", + "x-cli-flags": [ + "--statsd-port" + ] + } + } + }, + "prometheus": { + "type": "object", + "description": "Prometheus metrics endpoint configuration.", + "additionalProperties": true, + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable Prometheus metrics endpoint.", + "x-cli-flags": [ + "--prometheus-enabled" + ] + }, + "port": { + "type": "integer", + "description": "Prometheus metrics listener port.", + "x-cli-flags": [ + "--prometheus-port" + ] + }, + "bind": { + "type": "string", + "description": "Prometheus metrics bind address.", + "x-cli-flags": [ + "--prometheus-bind" + ] + }, + "histogram_buckets_zk": { + "type": "array", + "description": "Histogram bucket boundaries for ZooKeeper operation metrics.", + "items": { + "type": "number" + }, + "x-cli-flags": [ + "--prometheus-histogram-buckets-zk" + ] + }, + "histogram_buckets_main_loop": { + "type": "array", + "description": "Histogram bucket boundaries for main loop duration metrics.", + "items": { + "type": "number" + }, + "x-cli-flags": [ + "--prometheus-histogram-buckets-main-loop" + ] + } + } + } + }, + "$defs": { + "service": { + "type": "object", + "description": "Configuration for a single service watcher.", + "required": [ + "host", + "port" + ], + "additionalProperties": true, + "properties": { + "host": { + "type": "string", + "description": "Host that checks should target by default." + }, + "port": { + "type": "integer", + "description": "Port that checks should target by default." + }, + "reporter_type": { + "type": "string", + "description": "Reporter backend type. Built-in values are zookeeper and etcd.", + "default": "zookeeper" + }, + "check_interval": { + "type": "number", + "minimum": 0, + "description": "Seconds between health-check cycles.", + "default": 0.5 + }, + "check_mocked": { + "type": "boolean", + "description": "If true, skip checks and report the service as healthy.", + "default": false + }, + "checks": { + "type": "array", + "description": "Health checks for this service. All checks must pass for the service to be considered up.", + "items": { + "$ref": "#/$defs/check" + }, + "default": [] + }, + "rate_limiting": { + "$ref": "#/$defs/rate_limiting" + }, + "max_repeated_report_failures": { + "type": "integer", + "minimum": 1, + "description": "Per-service override for max consecutive report failures before stopping watcher.", + "default": 10 + }, + "load_test_concurrency": { + "type": "integer", + "minimum": 1, + "description": "If set, Nerve launches this many watcher instances for the service." + }, + "weight": { + "type": "integer", + "minimum": 0, + "description": "Optional service weight included in reporter payload." + }, + "haproxy_server_options": { + "type": "string", + "description": "Optional HAProxy server options included in reporter payload." + }, + "labels": { + "type": "object", + "description": "Optional labels included in reporter payload.", + "additionalProperties": true + }, + "zk_hosts": { + "type": "array", + "description": "Required when reporter_type is zookeeper. List of ZooKeeper hosts.", + "items": { + "type": "string" + } + }, + "zk_path": { + "type": "string", + "description": "Required when reporter_type is zookeeper. Parent path for ephemeral registration keys." + }, + "use_path_encoding": { + "type": "boolean", + "description": "When true, encode full payload into the ZooKeeper child key where possible." + }, + "etcd_host": { + "type": "string", + "description": "Required when reporter_type is etcd. Etcd host." + }, + "etcd_port": { + "type": "integer", + "description": "Etcd port.", + "default": 4003 + }, + "etcd_path": { + "type": "string", + "description": "Etcd parent path where service keys are created.", + "default": "/" + } + } + }, + "rate_limiting": { + "type": "object", + "description": "Token-bucket rate limiter for status transition reports.", + "additionalProperties": true, + "properties": { + "shadow_mode": { + "type": "boolean", + "description": "When true, throttling is observed but updates are still sent.", + "default": true + }, + "average_rate": { + "type": "number", + "minimum": 0, + "description": "Average allowed status transitions per second.", + "default": "infinity" + }, + "max_burst": { + "type": "number", + "minimum": 1, + "description": "Maximum burst size when tokens are available.", + "default": "infinity" + } + } + }, + "check": { + "type": "object", + "description": "Health check definition. Built-in check types are noop, tcp, http, redis, and rabbitmq.", + "required": [ + "type" + ], + "additionalProperties": true, + "properties": { + "type": { + "type": "string", + "description": "Check type. Custom external checks are also supported via plugins." + }, + "module": { + "type": "string", + "description": "Optional Ruby module/gem to require for custom check types." + }, + "name": { + "type": "string", + "description": "Optional check name. Defaults to a generated name if omitted." + }, + "host": { + "type": "string", + "description": "Target host. Defaults to the service host if omitted." + }, + "port": { + "type": "integer", + "description": "Target port. Defaults to the service port if omitted." + }, + "timeout": { + "type": "number", + "minimum": 0, + "description": "Check timeout in seconds.", + "default": 0.1 + }, + "rise": { + "type": "integer", + "minimum": 1, + "description": "Consecutive successes required to transition to up.", + "default": 1 + }, + "fall": { + "type": "integer", + "minimum": 1, + "description": "Consecutive failures required to transition to down.", + "default": 1 + }, + "uri": { + "type": "string", + "description": "Required for http checks. Request path, for example /health." + }, + "ssl": { + "type": "boolean", + "description": "HTTP check only. Use TLS when true.", + "default": false + }, + "read_timeout": { + "type": "number", + "minimum": 0, + "description": "HTTP check only. Socket read timeout in seconds." + }, + "open_timeout": { + "type": "number", + "minimum": 0, + "description": "HTTP check only. Connection open timeout in seconds.", + "default": 0.2 + }, + "ssl_timeout": { + "type": "number", + "minimum": 0, + "description": "HTTP check only. TLS handshake timeout in seconds.", + "default": 0.2 + }, + "headers": { + "type": "object", + "description": "HTTP check only. Additional request headers.", + "additionalProperties": { + "type": "string" + } + }, + "expect": { + "type": "string", + "description": "HTTP check only. Substring expected in response body." + }, + "username": { + "type": "string", + "description": "RabbitMQ check only. Username.", + "default": "guest" + }, + "password": { + "type": "string", + "description": "RabbitMQ check only. Password.", + "default": "guest" + } + } + } + } +} diff --git a/docs/configuration.md b/docs/configuration.md new file mode 100644 index 0000000..474a93f --- /dev/null +++ b/docs/configuration.md @@ -0,0 +1,178 @@ + + +# Configuration Reference + +Canonical source: `docs/config-schema.json`. + +Nerve computes effective runtime configuration from layered sources. + +## Configuration Inputs + +- `Base config file` (`--config`): required. Primary YAML/JSON configuration file. +- `Overlay config file` (`--config-overlay`): optional. Optional deep-merge overlay loaded after the base config. +- `CLI overrides`: optional. Typed flags applied after config files. + +## Configuration Precedence + +Highest precedence first: + +1. CLI overrides: Highest precedence. Explicit runtime overrides for selected fields. +2. Overlay config (`--config-overlay`): Deep-merged on top of the base config after service file loading. +3. Base config (`--config`): Lowest precedence baseline configuration. + +## Load And Merge Order + +1. Parse the base config from --config. +2. Resolve service_conf_dir in this order: CLI override, overlay, base config. +3. Load per-service YAML/JSON files from service_conf_dir into services. +4. Deep-merge overlay config on top of the base+service config. +5. Apply CLI overrides (including --instance-id aliases and typed global flags). + +## CLI Override Flags + +Only selected global fields are CLI-overridable today: + +| Config path | CLI flags | Type | Notes | +| --- | --- | --- | --- | +| `instance_id` | `--instance-id`, `--instance_id`, `-i` | `string` | Instance identifier reported to service discovery backends. | +| `heartbeat_path` | `--heartbeat-path` | `string` | Optional path for a heartbeat file touched on each main loop iteration. | +| `service_conf_dir` | `--service-conf-dir` | `string` | Optional directory containing per-service YAML/JSON config files. | +| `max_repeated_report_failures` | `--max-repeated-report-failures` | `integer` | Maximum consecutive reporter failures before stopping a watcher. Minimum: `1`. | +| `statsd.host` | `--statsd-host` | `string` | StatsD host. | +| `statsd.port` | `--statsd-port` | `integer` | StatsD port. | +| `prometheus.enabled` | `--prometheus-enabled` | `boolean` | Enable Prometheus metrics endpoint. | +| `prometheus.port` | `--prometheus-port` | `integer` | Prometheus metrics listener port. | +| `prometheus.bind` | `--prometheus-bind` | `string` | Prometheus metrics bind address. | +| `prometheus.histogram_buckets_zk` | `--prometheus-histogram-buckets-zk` | `array` | Histogram bucket boundaries for ZooKeeper operation metrics. | +| `prometheus.histogram_buckets_main_loop` | `--prometheus-histogram-buckets-main-loop` | `array` | Histogram bucket boundaries for main loop duration metrics. | + +## Top-level fields + +Schema for Nerve base configuration, optional overlay configuration, and CLI-overridable fields. + +| Field | Type | Required | Default | CLI flags | Description | +| --- | --- | --- | --- | --- | --- | +| `instance_id` | `string` | yes | - | `--instance-id`, `--instance_id`, `-i` | Instance identifier reported to service discovery backends. | +| `services` | `object` | yes | `{}` | - | Map of service name to service configuration. | +| `heartbeat_path` | `string` | no | - | `--heartbeat-path` | Optional path for a heartbeat file touched on each main loop iteration. | +| `service_conf_dir` | `string` | no | - | `--service-conf-dir` | Optional directory containing per-service YAML/JSON config files. | +| `max_repeated_report_failures` | `integer` | no | - | `--max-repeated-report-failures` | Maximum consecutive reporter failures before stopping a watcher. Minimum: `1`. | +| `statsd` | `object` | no | - | - | Global StatsD client settings. | +| `prometheus` | `object` | no | - | - | Prometheus metrics endpoint configuration. | + +Additional properties are allowed. + +## StatsD fields + +Global StatsD client settings. + +| Field | Type | Required | Default | CLI flags | Description | +| --- | --- | --- | --- | --- | --- | +| `host` | `string` | no | - | `--statsd-host` | StatsD host. | +| `port` | `integer` | no | - | `--statsd-port` | StatsD port. | + +Additional properties are allowed. + +## Prometheus fields + +Prometheus metrics endpoint configuration. + +| Field | Type | Required | Default | CLI flags | Description | +| --- | --- | --- | --- | --- | --- | +| `enabled` | `boolean` | no | - | `--prometheus-enabled` | Enable Prometheus metrics endpoint. | +| `port` | `integer` | no | - | `--prometheus-port` | Prometheus metrics listener port. | +| `bind` | `string` | no | - | `--prometheus-bind` | Prometheus metrics bind address. | +| `histogram_buckets_zk` | `array` | no | - | `--prometheus-histogram-buckets-zk` | Histogram bucket boundaries for ZooKeeper operation metrics. | +| `histogram_buckets_main_loop` | `array` | no | - | `--prometheus-histogram-buckets-main-loop` | Histogram bucket boundaries for main loop duration metrics. | + +Additional properties are allowed. + +## Service fields + +Configuration for a single service watcher. + +| Field | Type | Required | Default | CLI flags | Description | +| --- | --- | --- | --- | --- | --- | +| `host` | `string` | yes | - | - | Host that checks should target by default. | +| `port` | `integer` | yes | - | - | Port that checks should target by default. | +| `reporter_type` | `string` | no | `zookeeper` | - | Reporter backend type. Built-in values are zookeeper and etcd. | +| `check_interval` | `number` | no | `0.5` | - | Seconds between health-check cycles. Minimum: `0`. | +| `check_mocked` | `boolean` | no | `false` | - | If true, skip checks and report the service as healthy. | +| `checks` | `array` | no | `[]` | - | Health checks for this service. All checks must pass for the service to be considered up. | +| `rate_limiting` | `rate_limiting` | no | - | - | Token-bucket rate limiter for status transition reports. | +| `max_repeated_report_failures` | `integer` | no | `10` | - | Per-service override for max consecutive report failures before stopping watcher. Minimum: `1`. | +| `load_test_concurrency` | `integer` | no | - | - | If set, Nerve launches this many watcher instances for the service. Minimum: `1`. | +| `weight` | `integer` | no | - | - | Optional service weight included in reporter payload. Minimum: `0`. | +| `haproxy_server_options` | `string` | no | - | - | Optional HAProxy server options included in reporter payload. | +| `labels` | `object` | no | - | - | Optional labels included in reporter payload. | +| `zk_hosts` | `array` | no | - | - | Required when reporter_type is zookeeper. List of ZooKeeper hosts. | +| `zk_path` | `string` | no | - | - | Required when reporter_type is zookeeper. Parent path for ephemeral registration keys. | +| `use_path_encoding` | `boolean` | no | - | - | When true, encode full payload into the ZooKeeper child key where possible. | +| `etcd_host` | `string` | no | - | - | Required when reporter_type is etcd. Etcd host. | +| `etcd_port` | `integer` | no | `4003` | - | Etcd port. | +| `etcd_path` | `string` | no | `/` | - | Etcd parent path where service keys are created. | + +Additional properties are allowed. + +## Rate limiting fields + +Token-bucket rate limiter for status transition reports. + +| Field | Type | Required | Default | CLI flags | Description | +| --- | --- | --- | --- | --- | --- | +| `shadow_mode` | `boolean` | no | `true` | - | When true, throttling is observed but updates are still sent. | +| `average_rate` | `number` | no | `infinity` | - | Average allowed status transitions per second. Minimum: `0`. | +| `max_burst` | `number` | no | `infinity` | - | Maximum burst size when tokens are available. Minimum: `1`. | + +Additional properties are allowed. + +## Check fields + +Health check definition. Built-in check types are noop, tcp, http, redis, and rabbitmq. + +| Field | Type | Required | Default | CLI flags | Description | +| --- | --- | --- | --- | --- | --- | +| `type` | `string` | yes | - | - | Check type. Custom external checks are also supported via plugins. | +| `module` | `string` | no | - | - | Optional Ruby module/gem to require for custom check types. | +| `name` | `string` | no | - | - | Optional check name. Defaults to a generated name if omitted. | +| `host` | `string` | no | - | - | Target host. Defaults to the service host if omitted. | +| `port` | `integer` | no | - | - | Target port. Defaults to the service port if omitted. | +| `timeout` | `number` | no | `0.1` | - | Check timeout in seconds. Minimum: `0`. | +| `rise` | `integer` | no | `1` | - | Consecutive successes required to transition to up. Minimum: `1`. | +| `fall` | `integer` | no | `1` | - | Consecutive failures required to transition to down. Minimum: `1`. | +| `uri` | `string` | no | - | - | Required for http checks. Request path, for example /health. | +| `ssl` | `boolean` | no | `false` | - | HTTP check only. Use TLS when true. | +| `read_timeout` | `number` | no | - | - | HTTP check only. Socket read timeout in seconds. Minimum: `0`. | +| `open_timeout` | `number` | no | `0.2` | - | HTTP check only. Connection open timeout in seconds. Minimum: `0`. | +| `ssl_timeout` | `number` | no | `0.2` | - | HTTP check only. TLS handshake timeout in seconds. Minimum: `0`. | +| `headers` | `object` | no | - | - | HTTP check only. Additional request headers. | +| `expect` | `string` | no | - | - | HTTP check only. Substring expected in response body. | +| `username` | `string` | no | `guest` | - | RabbitMQ check only. Username. | +| `password` | `string` | no | `guest` | - | RabbitMQ check only. Password. | + +Additional properties are allowed. + +## Extensibility + +- Top-level config allows additional properties so operators can carry non-core config values. +- Service objects allow additional properties for custom reporters and service-specific extensions. +- Check objects allow additional properties for plugin-defined check parameters. + +## Examples + +```bash +bundle exec bin/nerve \ + --config /etc/nerve/nerve.conf.json \ + --config-overlay /etc/nerve/overlay.yaml \ + --instance-id host-123 \ + --prometheus-enabled=true \ + --prometheus-histogram-buckets-zk='[0.01, 0.1, 1.0]' +``` + +```bash +bundle exec bin/nerve \ + --config /etc/nerve/nerve.conf.json \ + --statsd-host statsd.local \ + --statsd-port 8125 \ + --max-repeated-report-failures 20 +``` diff --git a/lib/nerve/configuration_manager.rb b/lib/nerve/configuration_manager.rb index b972561..2b81a2c 100644 --- a/lib/nerve/configuration_manager.rb +++ b/lib/nerve/configuration_manager.rb @@ -1,4 +1,5 @@ require "yaml" +require "json" require "optparse" require "nerve/log" @@ -6,11 +7,110 @@ module Nerve class ConfigurationManager include Logging + class BooleanFlagValue + end + + class JsonArrayFlagValue + end + + OptionParser.accept(BooleanFlagValue) do |value| + normalized = value.to_s.strip.downcase + case normalized + when "true" + true + when "false" + false + else + raise OptionParser::InvalidArgument, "expected true or false, got #{value.inspect}" + end + end + + OptionParser.accept(JsonArrayFlagValue) do |value| + parsed = JSON.parse(value) + unless parsed.is_a?(Array) + raise OptionParser::InvalidArgument, "expected JSON array, got #{value.inspect}" + end + parsed + rescue JSON::ParserError + raise OptionParser::InvalidArgument, "expected JSON array, got #{value.inspect}" + end + + FLAG_TYPES = { + string: String, + integer: Integer, + boolean: BooleanFlagValue, + json_array: JsonArrayFlagValue + }.freeze + + CONFIG_OVERRIDE_FLAGS = [ + { + switches: ["--heartbeat-path heartbeat_path"], + path: ["heartbeat_path"], + type: :string, + description: "path to heartbeat file touched on each main loop iteration" + }, + { + switches: ["--max-repeated-report-failures count"], + path: ["max_repeated_report_failures"], + type: :integer, + description: "maximum repeated report failures before stopping a watcher" + }, + { + switches: ["--service-conf-dir service_conf_dir"], + path: ["service_conf_dir"], + type: :string, + description: "directory containing per-service config files" + }, + { + switches: ["--statsd-host host"], + path: ["statsd", "host"], + type: :string, + description: "StatsD host" + }, + { + switches: ["--statsd-port port"], + path: ["statsd", "port"], + type: :integer, + description: "StatsD port" + }, + { + switches: ["--prometheus-enabled enabled"], + path: ["prometheus", "enabled"], + type: :boolean, + description: "enable Prometheus metrics endpoint (true/false)" + }, + { + switches: ["--prometheus-port port"], + path: ["prometheus", "port"], + type: :integer, + description: "Prometheus metrics listen port" + }, + { + switches: ["--prometheus-bind bind"], + path: ["prometheus", "bind"], + type: :string, + description: "Prometheus metrics bind address" + }, + { + switches: ["--prometheus-histogram-buckets-zk buckets"], + path: ["prometheus", "histogram_buckets_zk"], + type: :json_array, + description: "Prometheus zk histogram buckets as a JSON array" + }, + { + switches: ["--prometheus-histogram-buckets-main-loop buckets"], + path: ["prometheus", "histogram_buckets_main_loop"], + type: :json_array, + description: "Prometheus main loop histogram buckets as a JSON array" + } + ].freeze + attr_reader :options attr_reader :config def parse_options_from_argv! options = {} + config_overrides = {} # set command line options optparse = OptionParser.new do |opts| opts.banner = <<~EOB @@ -20,20 +120,21 @@ def parse_options_from_argv! EOB options[:config] = ENV["NERVE_CONFIG"] - opts.on("-c config", "--config config", String, "path to nerve config") do |key, value| - options[:config] = key + opts.on("-c config", "--config config", String, "path to nerve config") do |value| + options[:config] = value end options[:config_overlay] = ENV["NERVE_CONFIG_OVERLAY"] opts.on("-o config_overlay", "--config-overlay config_overlay", String, - "path to overlay config (deep-merged on top of main config)") do |key, value| - options[:config_overlay] = key + "path to overlay config (deep-merged on top of main config)") do |value| + options[:config_overlay] = value end options[:instance_id] = ENV["NERVE_INSTANCE_ID"] - opts.on("-i instance_id", "--instance_id instance_id", String, - "reported as `name` to ZK; overrides instance id from config file") do |key, value| - options[:instance_id] = key + opts.on("-i instance_id", "--instance_id instance_id", "--instance-id instance_id", String, + "reported as `name` to ZK; overrides instance id from config file") do |value| + options[:instance_id] = value + set_deep_value!(config_overrides, ["instance_id"], value) end options[:check_config] = ENV["NERVE_CHECK_CONFIG"] @@ -42,12 +143,19 @@ def parse_options_from_argv! options[:check_config] = true end + CONFIG_OVERRIDE_FLAGS.each do |override_flag| + opts.on(*override_flag[:switches], FLAG_TYPES.fetch(override_flag[:type]), override_flag[:description]) do |value| + set_deep_value!(config_overrides, override_flag[:path], value) + end + end + opts.on("-h", "--help", "Display this screen") do puts opts exit end end optparse.parse! + options[:config_overrides] = config_overrides unless config_overrides.empty? options end @@ -59,17 +167,17 @@ def generate_nerve_config(options) config = parse_config_file(options[:config]) config["services"] ||= {} - if config.has_key?("service_conf_dir") - cdir = File.expand_path(config["service_conf_dir"]) - if !Dir.exist?(cdir) - raise "service conf dir does not exist:#{cdir}" - end - cfiles = Dir.glob(File.join(cdir, "*.{yaml,json}")) - cfiles.each { |x| config["services"][File.basename(x[/(.*)\.(yaml|json)$/, 1])] = parse_config_file(x) } - end - + overlay = nil if options[:config_overlay] overlay = parse_overlay_file(options[:config_overlay]) + end + + load_service_configs!( + config, + resolve_service_conf_dir(config, overlay, options[:config_overrides] || {}) + ) + + if overlay config = deep_merge(config, overlay) if overlay end @@ -77,6 +185,10 @@ def generate_nerve_config(options) config["instance_id"] = options[:instance_id] end + if options[:config_overrides] + config = deep_merge(config, options[:config_overrides]) + end + config end @@ -110,6 +222,37 @@ def reload! private + def set_deep_value!(hash, path, value) + target = hash + path[0...-1].each do |part| + target[part] ||= {} + target = target[part] + end + target[path.last] = value + end + + def resolve_service_conf_dir(config, overlay, config_overrides) + if config_overrides.key?("service_conf_dir") + config_overrides["service_conf_dir"] + elsif overlay.is_a?(Hash) && overlay.key?("service_conf_dir") + overlay["service_conf_dir"] + else + config["service_conf_dir"] + end + end + + def load_service_configs!(config, service_conf_dir) + return unless service_conf_dir + + cdir = File.expand_path(service_conf_dir) + if !Dir.exist?(cdir) + raise "service conf dir does not exist:#{cdir}" + end + + cfiles = Dir.glob(File.join(cdir, "*.{yaml,json}")) + cfiles.each { |x| config["services"][File.basename(x[/(.*)\.(yaml|json)$/, 1])] = parse_config_file(x) } + end + def parse_overlay_file(path) if File.read(path).strip.empty? {} diff --git a/scripts/generate_config_docs.rb b/scripts/generate_config_docs.rb new file mode 100755 index 0000000..966abe6 --- /dev/null +++ b/scripts/generate_config_docs.rb @@ -0,0 +1,247 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "json" + +ROOT_DIR = File.expand_path("..", __dir__) +SCHEMA_PATH = File.join(ROOT_DIR, "docs", "config-schema.json") +OUTPUT_PATH = File.join(ROOT_DIR, "docs", "configuration.md") + +def resolve_pointer(document, pointer) + return document if pointer == "#" + + tokens = pointer.sub(%r{\A#/}, "").split("/").map do |token| + token.gsub("~1", "/").gsub("~0", "~") + end + + tokens.reduce(document) do |memo, token| + memo.fetch(token) + end +end + +def dereference(node, schema) + return node unless node.is_a?(Hash) && node["$ref"] + + referenced = resolve_pointer(schema, node["$ref"]) + referenced.merge(node.except("$ref")) +end + +def format_default(value) + case value + when String + value + when Numeric, TrueClass, FalseClass + value.to_s + else + JSON.generate(value) + end +end + +def schema_type(node) + return ref_name(node["$ref"]) if node["$ref"] + + type = node["type"] + return "any" unless type + + if type.is_a?(Array) + return type.join(" | ") + end + + if type == "array" + item = node["items"] || {} + return "array<#{schema_type(item)}>" + end + + if type == "object" + additional = node["additionalProperties"] + if additional.is_a?(Hash) && additional["$ref"] && !node["properties"]&.any? + return "object" + end + end + + type +end + +def ref_name(ref) + ref.split("/").last +end + +def normalized_description(node, schema) + effective = dereference(node, schema) + parts = [] + parts << effective["description"] if effective["description"] + + if effective["enum"] + enum_values = effective["enum"].map { |value| "`#{format_default(value)}`" } + parts << "Allowed values: #{enum_values.join(", ")}." + end + + if effective.key?("minimum") + parts << "Minimum: `#{effective["minimum"]}`." + end + + if effective.key?("maximum") + parts << "Maximum: `#{effective["maximum"]}`." + end + + parts.join(" ") +end + +def render_rows(node, schema) + required = Array(node["required"]) + + (node["properties"] || {}).map do |name, property_schema| + effective = dereference(property_schema, schema) + cli_flags = Array(property_schema["x-cli-flags"]).map { |flag| "`#{flag}`" } + + { + name: name, + type: schema_type(property_schema), + required: required.include?(name) ? "yes" : "no", + default: effective.key?("default") ? "`#{format_default(effective["default"])}`" : "-", + cli: cli_flags.empty? ? "-" : cli_flags.join(", "), + description: normalized_description(property_schema, schema) + } + end +end + +def escape_cell(value) + value.to_s.gsub("|", "\\|") +end + +def additional_properties_note(node) + additional = node["additionalProperties"] + return nil unless node["type"] == "object" || node["properties"] + + case additional + when true + "Additional properties are allowed." + when false + "Additional properties are not allowed." + when Hash + if additional["$ref"] + "Additional properties must match `#{ref_name(additional["$ref"])}`." + else + "Additional properties must match `#{schema_type(additional)}`." + end + end +end + +def collect_cli_flags(node, schema, path = [], output = []) + return output unless node.is_a?(Hash) + + if node["x-cli-flags"] + output << { + path: path.join("."), + flags: node["x-cli-flags"], + type: schema_type(node), + description: normalized_description(node, schema) + } + end + + (node["properties"] || {}).each do |name, child| + collect_cli_flags(child, schema, path + [name], output) + end + + output +end + +schema = JSON.parse(File.read(SCHEMA_PATH)) + +lines = [] +lines << "" +lines << "" +lines << "# Configuration Reference" +lines << "" +lines << "Canonical source: `docs/config-schema.json`." +lines << "" +lines << "Nerve computes effective runtime configuration from layered sources." +lines << "" +lines << "## Configuration Inputs" +lines << "" + +Array(schema["x-config-sources"]).each do |source| + required = source["required"] ? "required" : "optional" + flag = source["flag"] ? " (`#{source["flag"]}`)" : "" + lines << "- `#{source["name"]}`#{flag}: #{required}. #{source["description"]}" +end + +lines << "" +lines << "## Configuration Precedence" +lines << "" +lines << "Highest precedence first:" +lines << "" +Array(schema["x-config-precedence"]).each_with_index do |source, index| + lines << "#{index + 1}. #{source["source"]}: #{source["description"]}" +end + +lines << "" +lines << "## Load And Merge Order" +lines << "" +Array(schema["x-load-order"]).each_with_index do |step, index| + lines << "#{index + 1}. #{step}" +end + +cli_flags = collect_cli_flags(schema, schema) + +lines << "" +lines << "## CLI Override Flags" +lines << "" +lines << "Only selected global fields are CLI-overridable today:" +lines << "" +lines << "| Config path | CLI flags | Type | Notes |" +lines << "| --- | --- | --- | --- |" +cli_flags.each do |entry| + flags = entry[:flags].map { |flag| "`#{flag}`" }.join(", ") + lines << "| `#{entry[:path]}` | #{flags} | `#{entry[:type]}` | #{escape_cell(entry[:description])} |" +end + +Array(schema["x-doc-sections"]).each do |section| + node = resolve_pointer(schema, section["pointer"]) + lines << "" + lines << "## #{section["title"]}" + lines << "" + lines << node["description"] if node["description"] + lines << "" if node["description"] + + rows = render_rows(node, schema) + if rows.empty? + lines << "No fields documented in this section." + else + lines << "| Field | Type | Required | Default | CLI flags | Description |" + lines << "| --- | --- | --- | --- | --- | --- |" + rows.each do |row| + description = row[:description].empty? ? "-" : escape_cell(row[:description]) + lines << "| `#{row[:name]}` | `#{row[:type]}` | #{row[:required]} | #{row[:default]} | #{row[:cli]} | #{description} |" + end + end + + note = additional_properties_note(node) + if note + lines << "" + lines << note + end +end + +lines << "" +lines << "## Extensibility" +lines << "" +lines << "- Top-level config allows additional properties so operators can carry non-core config values." +lines << "- Service objects allow additional properties for custom reporters and service-specific extensions." +lines << "- Check objects allow additional properties for plugin-defined check parameters." + +examples = Array(schema["x-cli-examples"]) +unless examples.empty? + lines << "" + lines << "## Examples" + + examples.each do |example_lines| + lines << "" + lines << "```bash" + example_lines.each { |line| lines << line } + lines << "```" + end +end + +File.write(OUTPUT_PATH, lines.join("\n") + "\n") +puts "Wrote #{OUTPUT_PATH}" diff --git a/spec/configuration_manager_spec.rb b/spec/configuration_manager_spec.rb index 1c5d4cf..f422dfd 100644 --- a/spec/configuration_manager_spec.rb +++ b/spec/configuration_manager_spec.rb @@ -3,6 +3,14 @@ require "nerve/configuration_manager" describe Nerve::ConfigurationManager do + def with_argv(argv) + original_argv = ARGV.dup + ARGV.replace(argv) + yield + ensure + ARGV.replace(original_argv) + end + describe "parsing config" do let(:config_manager) { Nerve::ConfigurationManager.new } let(:nerve_config) { "#{File.dirname(__FILE__)}/../example/nerve.conf.json" } @@ -30,6 +38,66 @@ "etcd_service1", "zookeeper_service1" ) end + + it "parses named CLI config override flags" do + options = with_argv([ + "--instance-id", "cli-instance", + "--heartbeat-path", "/var/run/nerve.heartbeat", + "--max-repeated-report-failures", "22", + "--service-conf-dir", "/etc/nerve/services", + "--statsd-host", "statsd.example", + "--statsd-port", "8126", + "--prometheus-enabled", "true", + "--prometheus-port", "9393", + "--prometheus-bind", "127.0.0.1", + "--prometheus-histogram-buckets-zk", "[0.1, 1.0]", + "--prometheus-histogram-buckets-main-loop", "[0.2, 2.0]" + ]) { config_manager.parse_options_from_argv! } + + expect(options[:instance_id]).to eq("cli-instance") + expect(options[:config_overrides]).to eq({ + "instance_id" => "cli-instance", + "heartbeat_path" => "/var/run/nerve.heartbeat", + "max_repeated_report_failures" => 22, + "service_conf_dir" => "/etc/nerve/services", + "statsd" => { + "host" => "statsd.example", + "port" => 8126 + }, + "prometheus" => { + "enabled" => true, + "port" => 9393, + "bind" => "127.0.0.1", + "histogram_buckets_zk" => [0.1, 1.0], + "histogram_buckets_main_loop" => [0.2, 2.0] + } + }) + end + + it "accepts the legacy --instance_id flag alias" do + options = with_argv(["--instance_id", "legacy-instance"]) { config_manager.parse_options_from_argv! } + + expect(options[:instance_id]).to eq("legacy-instance") + expect(options[:config_overrides]["instance_id"]).to eq("legacy-instance") + end + + it "rejects invalid boolean values for config override flags" do + expect { + with_argv(["--prometheus-enabled", "not-a-bool"]) { config_manager.parse_options_from_argv! } + }.to raise_error(OptionParser::InvalidArgument, /--prometheus-enabled/) + end + + it "rejects non-array JSON values for array config override flags" do + expect { + with_argv(["--prometheus-histogram-buckets-zk", "{\"bad\":true}"]) { config_manager.parse_options_from_argv! } + }.to raise_error(OptionParser::InvalidArgument, /JSON array/) + end + + it "fails on unknown config override flags" do + expect { + with_argv(["--not-a-real-override", "foo"]) { config_manager.parse_options_from_argv! } + }.to raise_error(OptionParser::InvalidOption) + end end describe "config overlay" do @@ -65,6 +133,91 @@ expect(config_manager.config["services"]).to be_a(Hash) end + it "applies CLI overrides after overlay" do + overlay_path = File.join(tmpdir, "overlay.json") + File.write(overlay_path, JSON.generate({ + "heartbeat_path" => "/overlay/heartbeat", + "max_repeated_report_failures" => 4, + "statsd" => { + "host" => "overlay.statsd", + "port" => 8127 + }, + "prometheus" => { + "enabled" => false, + "port" => 9293 + } + })) + + allow(config_manager).to receive(:parse_options_from_argv!) { + { + config: nerve_config, + instance_id: nil, + check_config: false, + config_overlay: overlay_path, + config_overrides: { + "heartbeat_path" => "/cli/heartbeat", + "max_repeated_report_failures" => 11, + "statsd" => { + "host" => "cli.statsd" + }, + "prometheus" => { + "enabled" => true + } + } + } + } + config_manager.parse_options! + config_manager.reload! + + expect(config_manager.config["heartbeat_path"]).to eq("/cli/heartbeat") + expect(config_manager.config["max_repeated_report_failures"]).to eq(11) + expect(config_manager.config["statsd"]).to eq({ + "host" => "cli.statsd", + "port" => 8127 + }) + expect(config_manager.config["prometheus"]["enabled"]).to eq(true) + expect(config_manager.config["prometheus"]["port"]).to eq(9293) + end + + it "uses CLI service_conf_dir over overlay and file config values" do + overlay_services_dir = File.join(tmpdir, "overlay_services") + cli_services_dir = File.join(tmpdir, "cli_services") + FileUtils.mkdir_p(overlay_services_dir) + FileUtils.mkdir_p(cli_services_dir) + + File.write(File.join(overlay_services_dir, "overlay_only.json"), JSON.generate({ + "host" => "10.0.0.8", + "port" => 1111 + })) + File.write(File.join(cli_services_dir, "cli_only.json"), JSON.generate({ + "host" => "10.0.0.9", + "port" => 2222 + })) + + overlay_path = File.join(tmpdir, "overlay.json") + File.write(overlay_path, JSON.generate({ + "service_conf_dir" => overlay_services_dir + })) + + allow(config_manager).to receive(:parse_options_from_argv!) { + { + config: nerve_config, + instance_id: nil, + check_config: false, + config_overlay: overlay_path, + config_overrides: { + "service_conf_dir" => cli_services_dir + } + } + } + config_manager.parse_options! + config_manager.reload! + + expect(config_manager.config["service_conf_dir"]).to eq(cli_services_dir) + expect(config_manager.config["services"]).to have_key("cli_only") + expect(config_manager.config["services"]).not_to have_key("overlay_only") + end + it "deep-merges nested hashes from overlay" do overlay_path = File.join(tmpdir, "overlay.json") File.write(overlay_path, JSON.generate({