Skip to content

Commit

Permalink
Improve health client logging syntax p2 (#7676)
Browse files Browse the repository at this point in the history
* add double quotes to event strings

* specific readme for tc-health-client building and testing.

* add example commands for testing to readme

* typo fix
  • Loading branch information
fabikd committed Jul 26, 2023
1 parent 2233d22 commit ae9ef25
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 14 deletions.
38 changes: 38 additions & 0 deletions infrastructure/cdn-in-a-box/health/tc-health-client-ReadMe.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

# CDN-in-a-Box Health Client Testing

## Building and Running

Build and run cdn-in-a-box `docker-compose -f docker-compose.yml -f docker-compose.expose-ports.yml up`, once up and running, using docker desktop, navigate to terminal tab of an edge or mid. cd into `/var/log/trafficcontrol` and run `tail -f tc-health-client.log`. Click on the `Open in external terminal` on upper right side and cd into `/usr/bin` and run `./tc-health-client`. Wait for the dispersion time to pass and then logs will start in the window where the tail command was ran. After that you may interact with it via Traffic Portal.

## Config files for Testing Only

For testing only the `tc-health-client.json` are the settings used to run it locally and can be changed. If changed `purge` all containers and run `docker-compose -f docker-compose.yml -f docker-compose.expose-ports.yml up` in the `infrastructure/cdn-in-a-box/` folder. Same applies if the `tc-health-client.service` and `to-creds` files are changed. The `tc-health-client.service` is set for `Debug` mode with `vvv` which is different from Production which is `vv`.

## Rebuilding the tc-health-client only

Delete the `trafficcontrol-health-client-[version].rpm` from the `\dist` folder and from `/trafficcontrol/infrastructure/cdn-in-a-box/health` then cd into `/trafficcontrol` and run `./pkg -v -8 -b tc-health-client_build` this builds the RPM to be used with docker or `./pkg -v -8 tc-health-client_build` to build x86_64. Then copy the rpm from `/dist` into `/trafficcontrol/infrastructure/cdn-in-a-box/health` and rename it to `trafficcontrol-health-client.rpm` by removing the version. Build and run with `docker-compose -f docker-compose.yml -f docker-compose`.

## Example Testing Commands

Cd into `/opt/trafficserver/bin/` and run `./traffic_ctl host down --reason active mid-01.infra.ciab.test` or `./traffic_ctl host status mid-01.infra.ciab.test` update it as needed for other servers or reason codes.

At the `/opt` level of a running containder for either edge or mid run `curl -vL http://trafficmonitor.infra.ciab.test:80` to test traffic_monitor
12 changes: 6 additions & 6 deletions tc-health-client/tmagent/markdownservice.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ func getParentFQDNs(pi *ParentInfo, tmh *TrafficMonitorHealth, l4h *ParentHealth
//
// This is a safety mechanism: if for any reason most or all parents are marked down, something
// is seriously wrong, possibly with the health code itself, and therefore don't mark any parents down,
const HealthSafetyRatio = 0.3 // TODO make configurable? // 0.3
const HealthSafetyRatio = 0.3 // TODO make configurable?

func doMarkdown(pi *ParentInfo) {
cfg := pi.Cfg.Get()
Expand Down Expand Up @@ -360,7 +360,7 @@ func doMarkdown(pi *ParentInfo) {
if oldAvailable != newAvailable {
// do not mark down if the configuration disables mark downs.
if !cfg.EnableActiveMarkdowns && !newAvailable {
log.Infof("markdown monitored_host=%v host_status=%v event=TM reports host is not available", fqdn, pv.Status())
log.Infof("markdown monitored_host=%v host_status=%v event=\"TM reports host is not available\"", fqdn, pv.Status())
} else {
if newParentStatus, err := markParent(cfg, parentStatus, isAvailable.Status, newAvailable); err != nil {
log.Errorln(err.Error())
Expand Down Expand Up @@ -409,7 +409,7 @@ func markParent(cfg *config.Cfg, pv ParentStatus, cacheStatus string, available
if !available { // unavailable
unavailablePollCount += 1
if unavailablePollCount < cfg.UnavailablePollThreshold {
log.Infof("markdown monitored_host=%v host_status=%v event=TM indicates host is unavailable but the UnavailablePollThreshold has not been reached", hostName, hostStatus)
log.Infof("markdown monitored_host=%v host_status=UNAVAILABLE event=\"TM indicates host is unavailable but the UnavailablePollThreshold has not been reached\"", hostName)
hostAvailable = true
} else {
// marking the host down
Expand All @@ -422,13 +422,13 @@ func markParent(cfg *config.Cfg, pv ParentStatus, cacheStatus string, available
// reset the poll counts
markUpPollCount = 0
unavailablePollCount = 0
log.Infof("marked monitored_host=%v host_status=%v event=%v\n", hostName, hostStatus, cacheStatus)
log.Infof("marked monitored_host=%v host_status=%v event=\"%v\"\n", hostName, hostStatus, cacheStatus)
}
} else { // available
// marking the host up
markUpPollCount += 1
if markUpPollCount < cfg.MarkUpPollThreshold {
log.Infof("TM indicates %s is available but the MarkUpPollThreshold has not been reached", hostName)
log.Infof("monitored_host=%v event=\"TM indicates host is available but the MarkUpPollThreshold has not been reached\"", hostName)
hostAvailable = false
} else {
if err := execTrafficCtl(pv.Fqdn, true, cfg.ReasonCode, cfg.TrafficServerBinDir); err != nil {
Expand All @@ -439,7 +439,7 @@ func markParent(cfg *config.Cfg, pv ParentStatus, cacheStatus string, available
// reset the poll counts
unavailablePollCount = 0
markUpPollCount = 0
log.Infof("markdown monitored_host=%v host_status=%v event=%v\n", hostName, hostStatus, cacheStatus)
log.Infof("markdown monitored_host=%v host_status=%v event=\"%v\"\n", hostName, hostStatus, cacheStatus)
}
}

Expand Down
4 changes: 2 additions & 2 deletions tc-health-client/tmagent/tmagent.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ func (pi *ParentInfo) UpdateParentInfo(cfg *config.Cfg) error {
} else {
// log.Infof("updated parents from new %s, total parents: %d\n", ParentsFile, len(pi.Parents))
// TODO track map len
log.Infof("tm-agent total_parents=%v event=updated parents from new parent.config\n", len(pi.GetParents()))
log.Infof("tm-agent total_parents=%v event=\"updated parents from new parent.config\"\n", len(pi.GetParents()))
}
}

Expand All @@ -381,7 +381,7 @@ func (pi *ParentInfo) UpdateParentInfo(cfg *config.Cfg) error {
} else {
// log.Infof("updated parents from new %s total parents: %d\n", StrategiesFile, len(pi.Parents))
// TODO track map len
log.Infof("tm-agent total_parents=%v event=updated parents from new strategies.yaml\n", len(pi.GetParents()))
log.Infof("tm-agent total_parents=%v event=\"updated parents from new strategies.yaml\"\n", len(pi.GetParents()))
}
}

Expand Down
10 changes: 5 additions & 5 deletions tc-health-client/tmagent/tmhealthservice.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func loopPollAndUpdateCacheStatus(pi *ParentInfo, doneChan <-chan struct{}, upda
}
doTrafficOpsReq := toLoginDispersion <= 0

log.Infoln("service-status service=tm-health event=starting")
log.Infoln("service-status service=tm-health event=\"starting\"")
start := time.Now()
doPollAndUpdateCacheStatus(pi, doTrafficOpsReq)
updateHealthSignal()
Expand Down Expand Up @@ -108,9 +108,9 @@ func doPollAndUpdateCacheStatus(pi *ParentInfo, doTrafficOpsReq bool) {
if err != nil {
log.Errorf("poll-status %v\n", err.Error())
if err := pi.GetTOData(cfg); err != nil {
log.Errorln("update event=could not update the list of trafficmonitors, keeping the old config")
log.Errorln("update event=\"could not update the list of trafficmonitors, keeping the old config\"")
} else {
log.Infoln("service-status service=tm-health event=updated TrafficMonitor statuses from TrafficOps")
log.Infoln("service-status service=tm-health event=\"updated TrafficMonitor statuses from TrafficOps\"")
}

// log the poll state data if enabled
Expand All @@ -132,9 +132,9 @@ func doPollAndUpdateCacheStatus(pi *ParentInfo, doTrafficOpsReq bool) {
if doTrafficOpsReq {
// TODO move to its own TO poller
if err = pi.GetTOData(cfg); err != nil {
log.Errorln("update event=could not update the list of trafficmonitors, keeping the old config")
log.Errorln("update event=\"could not update the list of trafficmonitors, keeping the old config\"")
} else {
log.Infoln("service-status service=tm-health event=updated TrafficMonitor statuses from TrafficOps")
log.Infoln("service-status service=tm-health event=\"updated TrafficMonitor statuses from TrafficOps\"")
}
}

Expand Down
2 changes: 1 addition & 1 deletion traffic_monitor/tmclient/tmclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ func (c *TMClient) getBytes(path string) ([]byte, error) {
defer log.Close(resp.Body, "Unable to close http client "+url)

if resp.StatusCode < 200 || resp.StatusCode > 299 {
return nil, fmt.Errorf("monitor='"+url+"' monitor_status=%v event=error in TrafficMonitor polling returned bad status", resp.StatusCode)
return nil, fmt.Errorf("monitor='"+url+"' monitor_status=%v event=\"error in TrafficMonitor polling returned bad status\"", resp.StatusCode)
}

respBts, err := ioutil.ReadAll(resp.Body)
Expand Down

0 comments on commit ae9ef25

Please sign in to comment.