caduceus.yaml

## SPDX-FileCopyrightText: 2023 Comcast Cable Communications Management, LLC
## SPDX-License-Identifier: Apache-2.0
---
# The unique fully-qualified-domain-name of the server.
# (Optional)
fqdn: "caduceus-instance-123.example.com"

# Unsure where server is used.
# (Optional)
server: "caduceus-instance-123.example.com"

########################################
#   Labeling/Tracing via HTTP Headers Configuration
########################################

# Provides this build number to the X-Caduceus-Build header for
# showing machine version information.  The build number SHOULD
# match the scheme `version-build` but there is not a strict requirement.
# (Optional)
build: "0.1.3-434"

# Provides the region information to the X-Caduceus-Region header
# for showing what region this machine is located in.  The region
# is arbitrary and optional.
# (Optional)
region: "east"

# Provides the flavor information to the X-Caduceus-Flavor header
# for showing what flavor this machine is associated with.  The flavor
# is arbitrary and optional.
# (Optional)
flavor: "mint"

########################################
#   primary endpoint Configuration
########################################

# primary defines the details needed for the primary endpoint.  The
# primary endpoint accepts the events from talaria (typically).
primary:
  # address provides the port number for the endpoint to bind to.
  # ":443" is ideal, but may require some special handling due to it being
  # a reserved (by the kernel) port.
  address: ":6000"
  # HTTPS/TLS
  #
  # certificateFile provides the public key and CA chain in PEM format if
  # TLS is used.  Note: the certificate needs to match the fqdn for clients
  # to accept without issue.
  #
  # keyFile provides the private key that matches the certificateFile
  # (Optional)
  # certificateFile: "/etc/caduceus/public.pem"
  # keyFile: "/etc/caduceus/private.pem"

########################################
#   health endpoint Configuration
########################################

# health defines the details needed for the health check endpoint.  The
# health check endpoint is generally used by services (like AWS Route53
# or consul) to determine if this particular machine is healthy or not.
health:
  # address provides the port number for the endpoint to bind to.
  # ":80" is ideal, but may require some special handling due to it being
  # a reserved (by the kernel) port.
  address: ":6001"

  # logInterval appears to be present from before we had formal metrics
  # (Deprecated)
  # logInterval: "60s"
  # options appears to be present from before we had formal metrics
  # (Deprecated)
  # options:
  #  - "PayloadsOverZero"
  #  - "PayloadsOverHundred"
  #  - "PayloadsOverThousand"
  #  - "PayloadsOverTenThousand"

########################################
#   Debugging/pprof Configuration
########################################

# pprof defines the details needed for the pprof debug endpoint.
# (Optional)
pprof:
  # address provides the port number for the endpoint to bind to.
  address: ":6002"

########################################
#   Metrics Configuration
########################################

# metric defines the details needed for the prometheus metrics endpoint
# (Optional)
metric:
  # address provides the port number for the endpoint to bind to.  Port 9389
  # was chosen because it does not conflict with any of the other prometheus
  # metrics or other machines in the xmidt cluster.  You may use any port you
  # wish.
  address: ":9389"

  # metricsOptions provides the details needed to configure the prometheus
  # metric data.  Metrics generally have the form:
  #
  # {namespace}_{subsystem}_{metric}
  #
  # so if you use the suggested value below, your metrics are prefixed like
  # this:
  #
  # xmidt_caduceus_{metric}
  #
  # (Optional)
  metricsOptions:
    # namespace is the namespace of the metrics provided
    # (Optional)
    namespace: "xmidt"
    # subsystem is the subsystem of the metrics provided
    # (Optional)
    subsystem: "caduceus"

touchstone:
  # DefaultNamespace is the prometheus namespace to apply when a metric has no namespace
  defaultNamespace: "xmidt"
  # DefaultSubsystem is the prometheus subsystem to apply when a metric has no subsystem
  defaultSubsystem: "caduceus"

########################################
#   Service Discovery Configuration
########################################

# service defines the parameters needed to interact with the consul cluster
# for service discovery.  Presently only consul is supported.  This is
# presently only used by Prometheus to discover machines to monitor, but
# in the not-too-distant future talaria will use this interaction to load
# balance across all caduceus machines instead of using DNS.
# (Optional)
service:
  # consul configures the consul library in caduceus to use the local
  # service discovery agent
  consul:
    # client defines how to connect to the local consul agent (on the same
    # VM/container)
    client:
      # address is the address of the local consul agent
      address: "127.0.0.1:8500"
      # scheme is how the consul library should interact with the local
      # consul agent
      scheme: "http"
      # waitTime is TBD
      waitTime: "30s"

    # disableGenerateID is TBD
    disableGenerateID: true

    # registrations defines what services caduceus should register with
    # consul
    #
    #     id      - the VM/container instance name registered with consul
    #     name    - the name of service being registered
    #     tags    - a list of tags to associate with this registration
    #     address - the mechanism to reach the service (generally unique fqdn)
    #     port    - the port to reach the service at
    #     checks  - the list of checks to perform to determine if the service
    #               is available/healthy
    #         checkID                        - TBD
    #         ttl                            - how long the check is valid for
    #         deregisterCriticalServiceAfter - the duration to wait before the
    #                                          service is removed due to check
    #                                          failures
    registrations:
      - id: "caduceus-instance-123.example.com"
        name: "caduceus"
        tags:
          - "prod"
          - "mint"
          - "stage=prod"
          - "flavor=mint"
        address: "caduceus-instance-123.example.com"
        port: 6001
        checks:
          - checkID: "caduceus-instance-123.example.com:ttl"
            ttl: "30s"
            deregisterCriticalServiceAfter: "70s"

########################################
#   Logging Related Configuration
########################################

# log configures the logging subsystem details
log:
  # file is the name of the most recent log file.  If set to "stdout" this
  # will log to os.Stdout.
  # (Optional) defaults to os.TempDir()
  file: "stdout"

  # level is the logging level to use - INFO, DEBUG, WARN, ERROR
  # (Optional) defaults to ERROR
  level: "DEBUG"

  # maxsize is the maximum file size in MB
  # (Optional) defaults to max 100MB
  maxsize: 50

  # maxage is the maximum number of days to retain old log files
  # (Optional) defaults to ignore age limit (0)
  maxage: 30

  # maxbackups is the maximum number of old log files to retain
  # (Optional) defaults to retain all (0)
  maxbackups: 10

  # json is a flag indicating whether JSON logging output should be used.
  # (Optional) defaults to false
  json: true

zap:
  # OutputPaths is a list of URLs or file paths to write logging output to.
  outputPaths:
    - stdout
    # - /var/log/caduceus/caduceus.log

  # Level is the minimum enabled logging level. Note that this is a dynamic
  # level, so calling Config.Level.SetLevel will atomically change the log
  # level of all loggers descended from this config.
  level: debug

  # EncoderConfig sets options for the chosen encoder. See
  # zapcore.EncoderConfig for details.
  errorOutputPaths:
    - stderr
    # - /var/log/caduceus/caduceus.log

  # EncoderConfig sets options for the chosen encoder. See
  # zapcore.EncoderConfig for details.
  encoderConfig:
    messageKey: message
    levelKey: key
    levelEncoder: lowercase

  # Encoding sets the logger's encoding. Valid values are "json" and
  # "console", as well as any third-party encodings registered via
  # RegisterEncoder.
  encoding: json

########################################
#   Authorization Related Configuration
########################################

# Any combination of these configurations may be used for authorization.
# If ANY match, the request goes onwards.

# jwtValidator provides the details about where to get the keys for JWT
# kid values and their associated information (expiration, etc) for JWTs
# used as authorization.
# (Optional)
jwtValidator:
  Config:
    Resolve:
      # Template is a URI template used to fetch keys.  This template may
      # use a single parameter named keyID, e.g. http://keys.com/{keyID}.
      # This field is required and has no default.
      Template: "http://localhost/{keyID}"
    Refresh:
      Sources:
        # URI is the location where keys are served.  By default, clortho supports
        # file://, http://, and https:// URIs, as well as standard file system paths
        # such as /etc/foo/bar.jwk.
        #
        # This field is required and has no default.
        - URI: "http://localhost"

# authHeader provides the list of basic auth headers that caduceus will accept
# as authorization
# (Optional)
authHeader: ["xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=", "dXNlcjpwYXNz"]

# capabilityCheck provides the details needed for checking an incoming JWT's
# capabilities.  If the type of check isn't provided, no checking is done.  The
# type can be "monitor" or "enforce".  If it is empty or a different value, no
# checking is done.  If "monitor" is provided, the capabilities are checked but
# the request isn't rejected when there isn't a valid capability for the
# request. Instead, a message is logged.  When "enforce" is provided, a request
# that doesn't have the needed capability is rejected.
#
# The capability is expected to have the format:
#
# {prefix}{endpoint}:{method}
#
# The prefix can be a regular expression.  If it's empty, no capability check
# is done.  The endpoint is a regular expression that should match the endpoint
# the request was sent to. The method is usually the method of the request, such as
# GET.  The accept all method is a catchall string that indicates the capability
# is approved for all methods.
# (Optional)
# capabilityCheck:
#   # type provides the mode for capability checking.
#   type: "enforce"
#   # prefix provides the regex to match the capability before the endpoint.
#   prefix: "prefix Here"
#   # acceptAllMethod provides a way to have a capability that allows all
#   # methods for a specific endpoint.
#   acceptAllMethod: "all"
#   # endpointBuckets provides regular expressions to use against the request
#   # endpoint in order to group requests for a metric label.
#   endpointBuckets:
#     - "hook\\b"
#     - "hooks\\b"
#     - "notify\\b"

##############################################################################
# Webhooks Related Configuration
##############################################################################
# webhook provides configuration for storing and obtaining webhook
# information using Argus.
webhook:
  # JWTParserType establishes which parser type will be used by the JWT token
  # acquirer used by Argus. Options include 'simple' and 'raw'.
  # Simple: parser assumes token payloads have the following structure:
  # https://github.com/xmidt-org/bascule/blob/c011b128d6b95fa8358228535c63d1945347adaa/acquire/bearer.go#L77
  # Raw: parser assumes all of the token payload == JWT token
  # (Optional). Defaults to 'simple'.
  JWTParserType: "raw"
  BasicClientConfig:
    # listen is the subsection that configures the listening feature of the argus client
    # (Optional)
    listen:
      # pullInterval provides how often the current webhooks list gets refreshed.
      pullInterval: 5s

    # bucket is the partition name where webhooks will be stored.
    bucket: "webhooks"

    # address is Argus' network location.
    address: "http://localhost:6600"

    # auth the authentication method for argus.
    auth:
      # basic configures basic authentication for argus.
      # Must be of form: 'Basic xyz=='
      basic: "Basic dXNlcjpwYXNz"
  #
  #    # jwt configures jwt style authentication for argus.
  #    JWT:
  #      # requestHeaders are added to the request for the token.
  #      # (Optional)
  #      # requestHeaders:
  #      #   "": ""
  #
  #      # authURL is the URL to access the token.
  #      authURL: ""
  #
  #      # timeout is how long the request to get the token will take before
  #      # timing out.
  #      timeout: "1m"
  #
  #      # buffer is the length of time before a token expires to get a new token.
  #      buffer: "2m"

########################################
#   Delivery Pipeline Related Configuration
########################################

# (Deprecated)
# numWorkerThreads: 3000
# jobQueueSize: 6000

# sender provides the details for each "sender" that services the unique
# webhook url endpoint
sender:
  # numWorkersPerSender defines the maximum number of outgoing concurrent
  # HTTP client requests to a particular webhook url.  This number is for
  # THIS server only, to determine the total maximum, multiply this value
  # by the number of caducues machines.
  numWorkersPerSender: 5000

  # queueSizePerSender the maximum queue depth (in events) the sender will
  # store before cutting off the webhook because the delivery pipeline has
  # backed up.
  queueSizePerSender: 10000

  # cutOffPeriod is the duration of time the webhook will be cut off if the
  # delivery pipeline gets backed up.  All outstanding events will be
  # dropped, as well as all new events otherwise destined for this webhook
  # will be dropped.  This period of time is to allow the webhook server
  # time to recover.
  cutOffPeriod: 10s

  # linger is the duration of time after a webhook has not been registered
  # before the delivery pipeline is torn down.
  linger: 180s

  # (Deprecated)
  # clientTimeout: 60s

  # disableClientHostnameValidation provides a way to bypass TLS validation
  # failures on HTTPS requests when sending events to webhooks.
  # NOTE: Setting this to true allows for a potential man-in-the-middle
  # scenario between caduceus and a webhook.
  # (Optional) defaults to false
  disableClientHostnameValidation: false

  # deliveryRetries is the maximum number of delivery attempts caduceus will
  # make before dropping an event
  deliveryRetries: 1

  # deliveryInterval is the time to wait after a failed delivery attempt
  # before attempting to deliver again
  deliveryInterval: 10ms

  # responseHeaderTimeout is the time to wait for a response before giving up
  # and marking the delivery a failure
  responseHeaderTimeout: 10s

  # customPIDs is a custom list of allowed PartnerIDs that will be used if a message
  # has no partner IDs.  When empty, a message with no partner IDs will not be sent
  # to any listeners when enforcing the partner ID check.
  customPIDs: []

  # disablePartnerIDs dictates whether or not to enforce the partnerID check
  # Defaults to 'false'.
  disablePartnerIDs: false

# (Deprecated)
# profilerFrequency: 15
# profilerDuration: 15
# profilerQueueSize: 100
# totalIncomingPayloadSizeBuckets:
#   - 100
#   - 1000
#   - 10000
# perSourceIncomingPayloadSizeBuckets:
#   - 100
#   - 1000
#   - 10000

# tracing provides configuration around traces using OpenTelemetry.
# (Optional). By default, a 'noop' tracer provider is used and tracing is disabled.
tracing:
  # provider is the name of the trace provider to use. Currently, otlp/grpc, otlp/http, stdout, jaeger and zipkin are supported.
  # 'noop' can also be used as provider to explicitly disable tracing.
  provider: "noop"

  # skipTraceExport only applies when provider is stdout. Set skipTraceExport to true
  # so that trace information is not written to stdout.
  # skipTraceExport: true

  # endpoint is where trace information should be routed. Applies to otlp, zipkin, and jaeger. OTLP/gRPC uses port 4317 by default.
  # OTLP/HTTP uses port 4318 by default.
  # endpoint: "http://localhost:9411/api/v2/spans"


# timeouts that apply to the Argus HTTP client.
# (Optional) By default, the values below will be used.
argusClientTimeout:
  # clientTimeout is the timeout for requests made through this
  # HTTP client. This timeout includes connection time, any
  # redirects, and reading the response body.
  clientTimeout: 50s

  # netDialerTimeout is the maximum amount of time the HTTP Client Dialer will
  # wait for a connect to complete.
  netDialerTimeout: 5s

# previousVersionSupport allows us to support two different major versions of
# the API at the same time from the same application.  When this is true,
# caduceus will support both "/v3" and "/v4" endpoints.  When false, only "/v4"
# endpoints will be supported.
previousVersionSupport: true