diff --git a/http-verbs-play-30/src/main/scala/uk/gov/hmrc/http/HttpErrorFunctions.scala b/http-verbs-play-30/src/main/scala/uk/gov/hmrc/http/HttpErrorFunctions.scala index 54488219..408b04c6 100644 --- a/http-verbs-play-30/src/main/scala/uk/gov/hmrc/http/HttpErrorFunctions.scala +++ b/http-verbs-play-30/src/main/scala/uk/gov/hmrc/http/HttpErrorFunctions.scala @@ -1,5 +1,5 @@ /* - * Copyright 2023 HM Revenue & Customs + * Copyright 2026 HM Revenue & Customs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,13 +49,39 @@ trait HttpErrorFunctions { def is5xx(status: Int) = status >= 500 && status < 600 + private def sanitiseHtmlErrorBody(response: HttpResponse, body: String): String = { + + import scala.util.matching.Regex + + val TitleRegex: Regex = """(?is)]*>(.*?)""".r + + def isHtml: Boolean = { + response.header("Content-Type") + .exists(_.toLowerCase.contains("text/html")) || + body.trim.toLowerCase.startsWith(" s"[HTML error response suppressed] — title: $t") + .getOrElse("[HTML error response suppressed] — no title found") + } else body + } + // Note, no special handling of BadRequest or NotFound // they will be returned as `Left(Upstream4xxResponse(status = 400))` and `Left(Upstream4xxResponse(status = 404))` respectively def handleResponseEither(httpMethod: String, url: String)(response: HttpResponse): Either[UpstreamErrorResponse, HttpResponse] = response.status match { case status if is4xx(status) || is5xx(status) => Left(UpstreamErrorResponse( - message = upstreamResponseMessage(httpMethod, url, status, response.body), + message = upstreamResponseMessage(httpMethod, url, status, sanitiseHtmlErrorBody(response, response.body)), statusCode = status, reportAs = if (is4xx(status)) HttpExceptions.INTERNAL_SERVER_ERROR else HttpExceptions.BAD_GATEWAY, headers = response.headers @@ -90,7 +116,7 @@ trait HttpErrorFunctions { case e: TimeoutException => "" } UpstreamErrorResponse( - message = upstreamResponseMessage(httpMethod, url, status, errorMessage), + message = upstreamResponseMessage(httpMethod, url, status, sanitiseHtmlErrorBody(response, errorMessage)), statusCode = status, reportAs = if (is4xx(status)) HttpExceptions.INTERNAL_SERVER_ERROR else HttpExceptions.BAD_GATEWAY, headers = response.headers diff --git a/http-verbs-play-30/src/test/scala/uk/gov/hmrc/http/HttpErrorFunctionsSpec.scala b/http-verbs-play-30/src/test/scala/uk/gov/hmrc/http/HttpErrorFunctionsSpec.scala index 426a283d..b10c3eed 100644 --- a/http-verbs-play-30/src/test/scala/uk/gov/hmrc/http/HttpErrorFunctionsSpec.scala +++ b/http-verbs-play-30/src/test/scala/uk/gov/hmrc/http/HttpErrorFunctionsSpec.scala @@ -16,6 +16,10 @@ package uk.gov.hmrc.http +import org.apache.pekko.actor.ActorSystem +import org.apache.pekko.stream.Materializer +import org.apache.pekko.stream.scaladsl.Source +import org.apache.pekko.util.ByteString import org.scalacheck.{Gen, Shrink} import org.scalatest.EitherValues import org.scalatest.matchers.should.Matchers @@ -33,6 +37,10 @@ class HttpErrorFunctionsSpec // Disable shrinking implicit def noShrink[T]: Shrink[T] = Shrink.shrinkAny + val exampleVerb = "GET" + val exampleUrl = "http://example.com/something" + val exampleBody = "this is the string body" + "HttpErrorFunctions.handleResponseEither" should { "return the response if the status code is between 200 and 299" in { forAll(Gen.choose(200, 299))(expectResponse) @@ -47,11 +55,62 @@ class HttpErrorFunctionsSpec forAll(Gen.choose(0, 399))(expectResponse) forAll(Gen.choose(600, 1000))(expectResponse) } - } - val exampleVerb = "GET" - val exampleUrl = "http://example.com/something" - val exampleBody = "this is the string body" + "suppress HTML in error response body" in { + val htmlBody = + """ + | + | Error + | Something went wrong + |""".stripMargin + + val response = + HttpResponse( + status = 500, + body = htmlBody, + headers = Map("Content-Type" -> Seq("text/html")) + ) + + new HttpErrorFunctions { + val result = handleResponseEither(exampleVerb, exampleUrl)(response) + result match { + case Left(err) => err.message should include ("HTML error response suppressed") + err.message should not include ("Something went wrong") + case Right(_) => fail("Expected Left(UpstreamErrorResponse), got Right") + } + } + } + + "suppress HTML in streamed error response body" in { + + implicit val system: ActorSystem = ActorSystem("TestSystem") + implicit val mat : Materializer = Materializer(system) + + val htmlBody = + """ + | + | Error + | Something went wrong + |""".stripMargin + + // Create a HttpResponse with bodyAsSource + val response = HttpResponse( + status = 500, + bodyAsSource = Source.single(ByteString(htmlBody)), + headers = Map("Content-Type" -> Seq("text/html")) + ) + + new HttpErrorFunctions { + val result = handleResponseEitherStream(exampleVerb, exampleUrl)(response) + + result match { + case Left(err) => err.message should include ("HTML error response suppressed") + err.message should not include ("Something went wrong") + case Right(_) => fail("Expected Left(UpstreamErrorResponse), got Right") + } + } + } + } def expectError(statusCode: Int, reportAs: Int): Unit = new HttpErrorFunctions {