Skip to content

Commit 04c8893

Browse files
committed
feat: add new craft to filter out advertorial
1 parent 9f2d996 commit 04c8893

File tree

1 file changed

+96
-0
lines changed

1 file changed

+96
-0
lines changed

internal/recipe/advertorial.go

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
package recipe
2+
3+
import (
4+
"FeedCraft/internal/adapter"
5+
"FeedCraft/internal/util"
6+
"github.com/gin-gonic/gin"
7+
"github.com/gorilla/feeds"
8+
"github.com/samber/lo"
9+
"github.com/sirupsen/logrus"
10+
"net/http"
11+
"strings"
12+
"time"
13+
)
14+
15+
/**
16+
通过LLM 判断并排除广告软文 advertorial
17+
*/
18+
19+
const prompt = "请阅读下面的文章, 并判断是不是广告推销软文. 如果非常确信这篇文章是营销推广文章, 请返回 'true', 如果不是或者没有把握确定,请返回 'false'"
20+
21+
// CheckIfAdvertorial 判断是否为软文, 非常有把握则返回true, 如果不是或者不确定或是发生错误则返回false
22+
func CheckIfAdvertorial(content string) bool {
23+
const MinContentLength = 20
24+
if len(strings.TrimSpace(content)) < MinContentLength {
25+
return false
26+
}
27+
result, err := adapter.CallGemini(prompt, content)
28+
if err != nil {
29+
logrus.Errorf("Error checking advertorial: %v", err)
30+
return false
31+
}
32+
logrus.Info("advertorial check: ", result)
33+
return result == "true"
34+
}
35+
36+
func OptionIgnoreAdvertorial() CraftOption {
37+
return func(feed *feeds.Feed) error {
38+
items := feed.Items
39+
filtered := lo.Filter(items, func(item *feeds.Item, index int) bool {
40+
content := item.Content //TODO handle description and content field correctly
41+
return CheckIfAdvertorial(content)
42+
})
43+
feed.Items = filtered
44+
return nil
45+
}
46+
}
47+
48+
func IgnoreAdvertorialArticle(c *gin.Context) {
49+
feedUrl, ok := c.GetQuery("input_url")
50+
if !ok || len(feedUrl) == 0 {
51+
c.String(400, "empty feed url")
52+
return
53+
}
54+
craftedFeed, err := NewCraftedFeedFromUrl(feedUrl, OptionIgnoreAdvertorial())
55+
if err != nil {
56+
c.String(http.StatusInternalServerError, err.Error())
57+
return
58+
}
59+
rssStr, err := craftedFeed.OutputFeed.ToRss()
60+
if err != nil {
61+
c.String(500, err.Error())
62+
return
63+
}
64+
c.Header("Content-Type", "application/xml")
65+
c.String(200, rssStr)
66+
}
67+
68+
type CheckIfAdvertorialDebugReq struct {
69+
Url string `json:"url"`
70+
}
71+
type CheckIfAdvertorialDebugResp struct {
72+
Url string `json:"url"`
73+
FeedContent string `json:"feed_content"`
74+
IsAdvertorial bool `json:"is_advertorial"`
75+
}
76+
77+
func DebugCheckIfAdvertorial(c *gin.Context) {
78+
reqBody := &CheckIfAdvertorialDebugReq{}
79+
err := c.ShouldBindJSON(reqBody)
80+
if err != nil {
81+
c.JSON(http.StatusBadRequest, util.APIResponse[any]{Msg: err.Error()})
82+
return
83+
}
84+
webContent, err := TrivialExtractor(reqBody.Url, 1*time.Minute)
85+
if err != nil {
86+
c.JSON(http.StatusBadRequest, util.APIResponse[any]{Msg: err.Error()})
87+
return
88+
}
89+
result := CheckIfAdvertorial(webContent)
90+
ret := CheckIfAdvertorialDebugResp{
91+
Url: reqBody.Url,
92+
IsAdvertorial: result,
93+
FeedContent: webContent,
94+
}
95+
c.JSON(http.StatusOK, util.APIResponse[any]{Data: ret})
96+
}

0 commit comments

Comments
 (0)