From 421d126c2d8cfef1fb3a3753a9ac0182aead5592 Mon Sep 17 00:00:00 2001 From: Sam Sloman Date: Wed, 25 Feb 2026 11:54:32 +0000 Subject: [PATCH 1/2] BDOG-3589: adds html suppression to upstream error response body --- .../uk/gov/hmrc/http/HttpErrorFunctions.scala | 31 +++++++++++++++-- .../hmrc/http/HttpErrorFunctionsSpec.scala | 33 ++++++++++++++++--- 2 files changed, 58 insertions(+), 6 deletions(-) diff --git a/http-verbs-play-30/src/main/scala/uk/gov/hmrc/http/HttpErrorFunctions.scala b/http-verbs-play-30/src/main/scala/uk/gov/hmrc/http/HttpErrorFunctions.scala index 54488219..5e5aece7 100644 --- a/http-verbs-play-30/src/main/scala/uk/gov/hmrc/http/HttpErrorFunctions.scala +++ b/http-verbs-play-30/src/main/scala/uk/gov/hmrc/http/HttpErrorFunctions.scala @@ -1,5 +1,5 @@ /* - * Copyright 2023 HM Revenue & Customs + * Copyright 2026 HM Revenue & Customs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,13 +49,40 @@ trait HttpErrorFunctions { def is5xx(status: Int) = status >= 500 && status < 600 + private def sanitiseHtmlErrorBody(response: HttpResponse): String = { + + import scala.util.matching.Regex + + val TitleRegex: Regex = """(?is)]*>(.*?)""".r + + def isHtml(body: String): Boolean = { + response.header("Content-Type") + .exists(_.toLowerCase.contains("text/html")) || + body.trim.toLowerCase.startsWith(" if (t.length > 200) t.take(200) + "...[truncated]" else t) + + Option.when(isHtml(response.body)) { + extractTitle(response.body) + .map(t => s"[HTML error response suppressed] — title: $t") + .getOrElse("[HTML error response suppressed] — no title found") + }.getOrElse(response.body) + } + // Note, no special handling of BadRequest or NotFound // they will be returned as `Left(Upstream4xxResponse(status = 400))` and `Left(Upstream4xxResponse(status = 404))` respectively def handleResponseEither(httpMethod: String, url: String)(response: HttpResponse): Either[UpstreamErrorResponse, HttpResponse] = response.status match { case status if is4xx(status) || is5xx(status) => Left(UpstreamErrorResponse( - message = upstreamResponseMessage(httpMethod, url, status, response.body), + message = upstreamResponseMessage(httpMethod, url, status, sanitiseHtmlErrorBody(response)), statusCode = status, reportAs = if (is4xx(status)) HttpExceptions.INTERNAL_SERVER_ERROR else HttpExceptions.BAD_GATEWAY, headers = response.headers diff --git a/http-verbs-play-30/src/test/scala/uk/gov/hmrc/http/HttpErrorFunctionsSpec.scala b/http-verbs-play-30/src/test/scala/uk/gov/hmrc/http/HttpErrorFunctionsSpec.scala index 426a283d..4fce4b0e 100644 --- a/http-verbs-play-30/src/test/scala/uk/gov/hmrc/http/HttpErrorFunctionsSpec.scala +++ b/http-verbs-play-30/src/test/scala/uk/gov/hmrc/http/HttpErrorFunctionsSpec.scala @@ -33,6 +33,10 @@ class HttpErrorFunctionsSpec // Disable shrinking implicit def noShrink[T]: Shrink[T] = Shrink.shrinkAny + val exampleVerb = "GET" + val exampleUrl = "http://example.com/something" + val exampleBody = "this is the string body" + "HttpErrorFunctions.handleResponseEither" should { "return the response if the status code is between 200 and 299" in { forAll(Gen.choose(200, 299))(expectResponse) @@ -47,11 +51,32 @@ class HttpErrorFunctionsSpec forAll(Gen.choose(0, 399))(expectResponse) forAll(Gen.choose(600, 1000))(expectResponse) } - } - val exampleVerb = "GET" - val exampleUrl = "http://example.com/something" - val exampleBody = "this is the string body" + "suppress HTML in error response body" in { + val htmlBody = + """ + | + | Error + | Something went wrong + |""".stripMargin + + val response = + HttpResponse( + status = 500, + body = htmlBody, + headers = Map("Content-Type" -> Seq("text/html")) + ) + + new HttpErrorFunctions { + val result = handleResponseEither(exampleVerb, exampleUrl)(response) + result match { + case Left(err) => err.message should include ("HTML error response suppressed") + err.message should not include ("Something went wrong") + case Right(_) => fail("Expected Left(UpstreamErrorResponse), got Right") + } + } + } + } def expectError(statusCode: Int, reportAs: Int): Unit = new HttpErrorFunctions { From dae31f61844b380d5f7bd4b8bf33974d12e4e2b7 Mon Sep 17 00:00:00 2001 From: Sam Sloman Date: Fri, 27 Feb 2026 09:04:50 +0000 Subject: [PATCH 2/2] BDOG-3589: adds html suppression to streamed upstream response error body --- .../uk/gov/hmrc/http/HttpErrorFunctions.scala | 17 +++++----- .../hmrc/http/HttpErrorFunctionsSpec.scala | 34 +++++++++++++++++++ 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/http-verbs-play-30/src/main/scala/uk/gov/hmrc/http/HttpErrorFunctions.scala b/http-verbs-play-30/src/main/scala/uk/gov/hmrc/http/HttpErrorFunctions.scala index 5e5aece7..408b04c6 100644 --- a/http-verbs-play-30/src/main/scala/uk/gov/hmrc/http/HttpErrorFunctions.scala +++ b/http-verbs-play-30/src/main/scala/uk/gov/hmrc/http/HttpErrorFunctions.scala @@ -49,31 +49,30 @@ trait HttpErrorFunctions { def is5xx(status: Int) = status >= 500 && status < 600 - private def sanitiseHtmlErrorBody(response: HttpResponse): String = { + private def sanitiseHtmlErrorBody(response: HttpResponse, body: String): String = { import scala.util.matching.Regex val TitleRegex: Regex = """(?is)]*>(.*?)""".r - def isHtml(body: String): Boolean = { + def isHtml: Boolean = { response.header("Content-Type") .exists(_.toLowerCase.contains("text/html")) || body.trim.toLowerCase.startsWith(" if (t.length > 200) t.take(200) + "...[truncated]" else t) - Option.when(isHtml(response.body)) { - extractTitle(response.body) + if (isHtml) { + extractTitle .map(t => s"[HTML error response suppressed] — title: $t") .getOrElse("[HTML error response suppressed] — no title found") - }.getOrElse(response.body) + } else body } // Note, no special handling of BadRequest or NotFound @@ -82,7 +81,7 @@ trait HttpErrorFunctions { response.status match { case status if is4xx(status) || is5xx(status) => Left(UpstreamErrorResponse( - message = upstreamResponseMessage(httpMethod, url, status, sanitiseHtmlErrorBody(response)), + message = upstreamResponseMessage(httpMethod, url, status, sanitiseHtmlErrorBody(response, response.body)), statusCode = status, reportAs = if (is4xx(status)) HttpExceptions.INTERNAL_SERVER_ERROR else HttpExceptions.BAD_GATEWAY, headers = response.headers @@ -117,7 +116,7 @@ trait HttpErrorFunctions { case e: TimeoutException => "" } UpstreamErrorResponse( - message = upstreamResponseMessage(httpMethod, url, status, errorMessage), + message = upstreamResponseMessage(httpMethod, url, status, sanitiseHtmlErrorBody(response, errorMessage)), statusCode = status, reportAs = if (is4xx(status)) HttpExceptions.INTERNAL_SERVER_ERROR else HttpExceptions.BAD_GATEWAY, headers = response.headers diff --git a/http-verbs-play-30/src/test/scala/uk/gov/hmrc/http/HttpErrorFunctionsSpec.scala b/http-verbs-play-30/src/test/scala/uk/gov/hmrc/http/HttpErrorFunctionsSpec.scala index 4fce4b0e..b10c3eed 100644 --- a/http-verbs-play-30/src/test/scala/uk/gov/hmrc/http/HttpErrorFunctionsSpec.scala +++ b/http-verbs-play-30/src/test/scala/uk/gov/hmrc/http/HttpErrorFunctionsSpec.scala @@ -16,6 +16,10 @@ package uk.gov.hmrc.http +import org.apache.pekko.actor.ActorSystem +import org.apache.pekko.stream.Materializer +import org.apache.pekko.stream.scaladsl.Source +import org.apache.pekko.util.ByteString import org.scalacheck.{Gen, Shrink} import org.scalatest.EitherValues import org.scalatest.matchers.should.Matchers @@ -76,6 +80,36 @@ class HttpErrorFunctionsSpec } } } + + "suppress HTML in streamed error response body" in { + + implicit val system: ActorSystem = ActorSystem("TestSystem") + implicit val mat : Materializer = Materializer(system) + + val htmlBody = + """ + | + | Error + | Something went wrong + |""".stripMargin + + // Create a HttpResponse with bodyAsSource + val response = HttpResponse( + status = 500, + bodyAsSource = Source.single(ByteString(htmlBody)), + headers = Map("Content-Type" -> Seq("text/html")) + ) + + new HttpErrorFunctions { + val result = handleResponseEitherStream(exampleVerb, exampleUrl)(response) + + result match { + case Left(err) => err.message should include ("HTML error response suppressed") + err.message should not include ("Something went wrong") + case Right(_) => fail("Expected Left(UpstreamErrorResponse), got Right") + } + } + } } def expectError(statusCode: Int, reportAs: Int): Unit =