|
| 1 | +package recipe |
| 2 | + |
| 3 | +import ( |
| 4 | + "FeedCraft/internal/adapter" |
| 5 | + "FeedCraft/internal/util" |
| 6 | + "github.com/gin-gonic/gin" |
| 7 | + "github.com/gorilla/feeds" |
| 8 | + "github.com/samber/lo" |
| 9 | + "github.com/sirupsen/logrus" |
| 10 | + "net/http" |
| 11 | + "strings" |
| 12 | + "time" |
| 13 | +) |
| 14 | + |
| 15 | +/** |
| 16 | +通过LLM 判断并排除广告软文 advertorial |
| 17 | +*/ |
| 18 | + |
| 19 | +const prompt = "请阅读下面的文章, 并判断是不是广告推销软文. 如果非常确信这篇文章是营销推广文章, 请返回 'true', 如果不是或者没有把握确定,请返回 'false'" |
| 20 | + |
| 21 | +// CheckIfAdvertorial 判断是否为软文, 非常有把握则返回true, 如果不是或者不确定或是发生错误则返回false |
| 22 | +func CheckIfAdvertorial(content string) bool { |
| 23 | + const MinContentLength = 20 |
| 24 | + if len(strings.TrimSpace(content)) < MinContentLength { |
| 25 | + return false |
| 26 | + } |
| 27 | + result, err := adapter.CallGemini(prompt, content) |
| 28 | + if err != nil { |
| 29 | + logrus.Errorf("Error checking advertorial: %v", err) |
| 30 | + return false |
| 31 | + } |
| 32 | + logrus.Info("advertorial check: ", result) |
| 33 | + return result == "true" |
| 34 | +} |
| 35 | + |
| 36 | +func OptionIgnoreAdvertorial() CraftOption { |
| 37 | + return func(feed *feeds.Feed) error { |
| 38 | + items := feed.Items |
| 39 | + filtered := lo.Filter(items, func(item *feeds.Item, index int) bool { |
| 40 | + content := item.Content //TODO handle description and content field correctly |
| 41 | + return CheckIfAdvertorial(content) |
| 42 | + }) |
| 43 | + feed.Items = filtered |
| 44 | + return nil |
| 45 | + } |
| 46 | +} |
| 47 | + |
| 48 | +func IgnoreAdvertorialArticle(c *gin.Context) { |
| 49 | + feedUrl, ok := c.GetQuery("input_url") |
| 50 | + if !ok || len(feedUrl) == 0 { |
| 51 | + c.String(400, "empty feed url") |
| 52 | + return |
| 53 | + } |
| 54 | + craftedFeed, err := NewCraftedFeedFromUrl(feedUrl, OptionIgnoreAdvertorial()) |
| 55 | + if err != nil { |
| 56 | + c.String(http.StatusInternalServerError, err.Error()) |
| 57 | + return |
| 58 | + } |
| 59 | + rssStr, err := craftedFeed.OutputFeed.ToRss() |
| 60 | + if err != nil { |
| 61 | + c.String(500, err.Error()) |
| 62 | + return |
| 63 | + } |
| 64 | + c.Header("Content-Type", "application/xml") |
| 65 | + c.String(200, rssStr) |
| 66 | +} |
| 67 | + |
| 68 | +type CheckIfAdvertorialDebugReq struct { |
| 69 | + Url string `json:"url"` |
| 70 | +} |
| 71 | +type CheckIfAdvertorialDebugResp struct { |
| 72 | + Url string `json:"url"` |
| 73 | + FeedContent string `json:"feed_content"` |
| 74 | + IsAdvertorial bool `json:"is_advertorial"` |
| 75 | +} |
| 76 | + |
| 77 | +func DebugCheckIfAdvertorial(c *gin.Context) { |
| 78 | + reqBody := &CheckIfAdvertorialDebugReq{} |
| 79 | + err := c.ShouldBindJSON(reqBody) |
| 80 | + if err != nil { |
| 81 | + c.JSON(http.StatusBadRequest, util.APIResponse[any]{Msg: err.Error()}) |
| 82 | + return |
| 83 | + } |
| 84 | + webContent, err := TrivialExtractor(reqBody.Url, 1*time.Minute) |
| 85 | + if err != nil { |
| 86 | + c.JSON(http.StatusBadRequest, util.APIResponse[any]{Msg: err.Error()}) |
| 87 | + return |
| 88 | + } |
| 89 | + result := CheckIfAdvertorial(webContent) |
| 90 | + ret := CheckIfAdvertorialDebugResp{ |
| 91 | + Url: reqBody.Url, |
| 92 | + IsAdvertorial: result, |
| 93 | + FeedContent: webContent, |
| 94 | + } |
| 95 | + c.JSON(http.StatusOK, util.APIResponse[any]{Data: ret}) |
| 96 | +} |
0 commit comments