diff --git a/infrastructure/cdn-in-a-box/health/tc-health-client-ReadMe.md b/infrastructure/cdn-in-a-box/health/tc-health-client-ReadMe.md new file mode 100644 index 0000000000..231efbd096 --- /dev/null +++ b/infrastructure/cdn-in-a-box/health/tc-health-client-ReadMe.md @@ -0,0 +1,38 @@ + + +# CDN-in-a-Box Health Client Testing + +## Building and Running + +Build and run cdn-in-a-box `docker-compose -f docker-compose.yml -f docker-compose.expose-ports.yml up`, once up and running, using docker desktop, navigate to terminal tab of an edge or mid. cd into `/var/log/trafficcontrol` and run `tail -f tc-health-client.log`. Click on the `Open in external terminal` on upper right side and cd into `/usr/bin` and run `./tc-health-client`. Wait for the dispersion time to pass and then logs will start in the window where the tail command was ran. After that you may interact with it via Traffic Portal. + +## Config files for Testing Only + +For testing only the `tc-health-client.json` are the settings used to run it locally and can be changed. If changed `purge` all containers and run `docker-compose -f docker-compose.yml -f docker-compose.expose-ports.yml up` in the `infrastructure/cdn-in-a-box/` folder. Same applies if the `tc-health-client.service` and `to-creds` files are changed. The `tc-health-client.service` is set for `Debug` mode with `vvv` which is different from Production which is `vv`. + +## Rebuilding the tc-health-client only + +Delete the `trafficcontrol-health-client-[version].rpm` from the `\dist` folder and from `/trafficcontrol/infrastructure/cdn-in-a-box/health` then cd into `/trafficcontrol` and run `./pkg -v -8 -b tc-health-client_build` this builds the RPM to be used with docker or `./pkg -v -8 tc-health-client_build` to build x86_64. Then copy the rpm from `/dist` into `/trafficcontrol/infrastructure/cdn-in-a-box/health` and rename it to `trafficcontrol-health-client.rpm` by removing the version. Build and run with `docker-compose -f docker-compose.yml -f docker-compose`. + +## Example Testing Commands + +Cd into `/opt/trafficserver/bin/` and run `./traffic_ctl host down --reason active mid-01.infra.ciab.test` or `./traffic_ctl host status mid-01.infra.ciab.test` update it as needed for other servers or reason codes. + +At the `/opt` level of a running containder for either edge or mid run `curl -vL http://trafficmonitor.infra.ciab.test:80` to test traffic_monitor \ No newline at end of file diff --git a/tc-health-client/tmagent/markdownservice.go b/tc-health-client/tmagent/markdownservice.go index 135d298273..03cce7d6a8 100644 --- a/tc-health-client/tmagent/markdownservice.go +++ b/tc-health-client/tmagent/markdownservice.go @@ -309,7 +309,7 @@ func getParentFQDNs(pi *ParentInfo, tmh *TrafficMonitorHealth, l4h *ParentHealth // // This is a safety mechanism: if for any reason most or all parents are marked down, something // is seriously wrong, possibly with the health code itself, and therefore don't mark any parents down, -const HealthSafetyRatio = 0.3 // TODO make configurable? // 0.3 +const HealthSafetyRatio = 0.3 // TODO make configurable? func doMarkdown(pi *ParentInfo) { cfg := pi.Cfg.Get() @@ -360,7 +360,7 @@ func doMarkdown(pi *ParentInfo) { if oldAvailable != newAvailable { // do not mark down if the configuration disables mark downs. if !cfg.EnableActiveMarkdowns && !newAvailable { - log.Infof("markdown monitored_host=%v host_status=%v event=TM reports host is not available", fqdn, pv.Status()) + log.Infof("markdown monitored_host=%v host_status=%v event=\"TM reports host is not available\"", fqdn, pv.Status()) } else { if newParentStatus, err := markParent(cfg, parentStatus, isAvailable.Status, newAvailable); err != nil { log.Errorln(err.Error()) @@ -409,7 +409,7 @@ func markParent(cfg *config.Cfg, pv ParentStatus, cacheStatus string, available if !available { // unavailable unavailablePollCount += 1 if unavailablePollCount < cfg.UnavailablePollThreshold { - log.Infof("markdown monitored_host=%v host_status=%v event=TM indicates host is unavailable but the UnavailablePollThreshold has not been reached", hostName, hostStatus) + log.Infof("markdown monitored_host=%v host_status=UNAVAILABLE event=\"TM indicates host is unavailable but the UnavailablePollThreshold has not been reached\"", hostName) hostAvailable = true } else { // marking the host down @@ -422,13 +422,13 @@ func markParent(cfg *config.Cfg, pv ParentStatus, cacheStatus string, available // reset the poll counts markUpPollCount = 0 unavailablePollCount = 0 - log.Infof("marked monitored_host=%v host_status=%v event=%v\n", hostName, hostStatus, cacheStatus) + log.Infof("marked monitored_host=%v host_status=%v event=\"%v\"\n", hostName, hostStatus, cacheStatus) } } else { // available // marking the host up markUpPollCount += 1 if markUpPollCount < cfg.MarkUpPollThreshold { - log.Infof("TM indicates %s is available but the MarkUpPollThreshold has not been reached", hostName) + log.Infof("monitored_host=%v event=\"TM indicates host is available but the MarkUpPollThreshold has not been reached\"", hostName) hostAvailable = false } else { if err := execTrafficCtl(pv.Fqdn, true, cfg.ReasonCode, cfg.TrafficServerBinDir); err != nil { @@ -439,7 +439,7 @@ func markParent(cfg *config.Cfg, pv ParentStatus, cacheStatus string, available // reset the poll counts unavailablePollCount = 0 markUpPollCount = 0 - log.Infof("markdown monitored_host=%v host_status=%v event=%v\n", hostName, hostStatus, cacheStatus) + log.Infof("markdown monitored_host=%v host_status=%v event=\"%v\"\n", hostName, hostStatus, cacheStatus) } } diff --git a/tc-health-client/tmagent/tmagent.go b/tc-health-client/tmagent/tmagent.go index 93eaa791b1..ae24a33a34 100644 --- a/tc-health-client/tmagent/tmagent.go +++ b/tc-health-client/tmagent/tmagent.go @@ -370,7 +370,7 @@ func (pi *ParentInfo) UpdateParentInfo(cfg *config.Cfg) error { } else { // log.Infof("updated parents from new %s, total parents: %d\n", ParentsFile, len(pi.Parents)) // TODO track map len - log.Infof("tm-agent total_parents=%v event=updated parents from new parent.config\n", len(pi.GetParents())) + log.Infof("tm-agent total_parents=%v event=\"updated parents from new parent.config\"\n", len(pi.GetParents())) } } @@ -381,7 +381,7 @@ func (pi *ParentInfo) UpdateParentInfo(cfg *config.Cfg) error { } else { // log.Infof("updated parents from new %s total parents: %d\n", StrategiesFile, len(pi.Parents)) // TODO track map len - log.Infof("tm-agent total_parents=%v event=updated parents from new strategies.yaml\n", len(pi.GetParents())) + log.Infof("tm-agent total_parents=%v event=\"updated parents from new strategies.yaml\"\n", len(pi.GetParents())) } } diff --git a/tc-health-client/tmagent/tmhealthservice.go b/tc-health-client/tmagent/tmhealthservice.go index c3fb5366db..14cd76ae44 100644 --- a/tc-health-client/tmagent/tmhealthservice.go +++ b/tc-health-client/tmagent/tmhealthservice.go @@ -78,7 +78,7 @@ func loopPollAndUpdateCacheStatus(pi *ParentInfo, doneChan <-chan struct{}, upda } doTrafficOpsReq := toLoginDispersion <= 0 - log.Infoln("service-status service=tm-health event=starting") + log.Infoln("service-status service=tm-health event=\"starting\"") start := time.Now() doPollAndUpdateCacheStatus(pi, doTrafficOpsReq) updateHealthSignal() @@ -108,9 +108,9 @@ func doPollAndUpdateCacheStatus(pi *ParentInfo, doTrafficOpsReq bool) { if err != nil { log.Errorf("poll-status %v\n", err.Error()) if err := pi.GetTOData(cfg); err != nil { - log.Errorln("update event=could not update the list of trafficmonitors, keeping the old config") + log.Errorln("update event=\"could not update the list of trafficmonitors, keeping the old config\"") } else { - log.Infoln("service-status service=tm-health event=updated TrafficMonitor statuses from TrafficOps") + log.Infoln("service-status service=tm-health event=\"updated TrafficMonitor statuses from TrafficOps\"") } // log the poll state data if enabled @@ -132,9 +132,9 @@ func doPollAndUpdateCacheStatus(pi *ParentInfo, doTrafficOpsReq bool) { if doTrafficOpsReq { // TODO move to its own TO poller if err = pi.GetTOData(cfg); err != nil { - log.Errorln("update event=could not update the list of trafficmonitors, keeping the old config") + log.Errorln("update event=\"could not update the list of trafficmonitors, keeping the old config\"") } else { - log.Infoln("service-status service=tm-health event=updated TrafficMonitor statuses from TrafficOps") + log.Infoln("service-status service=tm-health event=\"updated TrafficMonitor statuses from TrafficOps\"") } } diff --git a/traffic_monitor/tmclient/tmclient.go b/traffic_monitor/tmclient/tmclient.go index 31268193b4..e3ebc6f038 100644 --- a/traffic_monitor/tmclient/tmclient.go +++ b/traffic_monitor/tmclient/tmclient.go @@ -207,7 +207,7 @@ func (c *TMClient) getBytes(path string) ([]byte, error) { defer log.Close(resp.Body, "Unable to close http client "+url) if resp.StatusCode < 200 || resp.StatusCode > 299 { - return nil, fmt.Errorf("monitor='"+url+"' monitor_status=%v event=error in TrafficMonitor polling returned bad status", resp.StatusCode) + return nil, fmt.Errorf("monitor='"+url+"' monitor_status=%v event=\"error in TrafficMonitor polling returned bad status\"", resp.StatusCode) } respBts, err := ioutil.ReadAll(resp.Body)