From 53f39693f740d12720032d7526d02be89e09e38e Mon Sep 17 00:00:00 2001 From: Phodal Huang Date: Wed, 7 Aug 2024 21:47:16 +0800 Subject: [PATCH] refactor(DisplayComponent): strip HTML using Jsoup, add Apache Commons Text for XML entity unescaping #218 Update `DisplayComponent` to use Jsoup for HTML stripping and include Apache Commons Text library for unescaping XML entities. This change improves the reliability of text processing by utilizing specialized libraries. --- build.gradle.kts | 2 ++ .../kotlin/cc/unitmesh/devti/gui/component/DisplayComponent.kt | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index f1e72cd1a..1eedb4686 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -434,6 +434,8 @@ project(":") { // token count implementation("com.knuddels:jtokkit:1.0.0") + implementation("org.apache.commons:commons-text:1.12.0") + // junit testImplementation("io.kotest:kotest-assertions-core:5.7.2") testImplementation("junit:junit:4.13.2") diff --git a/src/main/kotlin/cc/unitmesh/devti/gui/component/DisplayComponent.kt b/src/main/kotlin/cc/unitmesh/devti/gui/component/DisplayComponent.kt index b4bde9a25..3d90a44f5 100644 --- a/src/main/kotlin/cc/unitmesh/devti/gui/component/DisplayComponent.kt +++ b/src/main/kotlin/cc/unitmesh/devti/gui/component/DisplayComponent.kt @@ -30,9 +30,7 @@ class DisplayComponent(question: String) : JEditorPane() { } private fun stripHtmlAndUnescapeXmlEntities(input: String): String { - // 使用 Jsoup 去除HTML标签 val text = Jsoup.parse(input).text() - // 使用 Apache Commons Text 解码XML实体 return StringEscapeUtils.unescapeXml(text) } } \ No newline at end of file