From 2a8496d5fab4aa056ac8007ece501c3c4dfbc297 Mon Sep 17 00:00:00 2001 From: YeahOut Date: Fri, 11 Jul 2025 01:59:49 +0900 Subject: [PATCH 1/5] =?UTF-8?q?[Feature]=20#30=20-=20=EB=89=B4=EC=8A=A4=20?= =?UTF-8?q?=EB=82=9C=EC=9D=B4=EB=8F=84=20=EB=B3=80=ED=99=98=20AI=20?= =?UTF-8?q?=EC=84=9C=EB=B9=99=20=EB=B0=8F=20=EB=82=B4=EC=9A=A9/=EB=8B=A8?= =?UTF-8?q?=EC=96=B4=20=EA=B5=AC=EB=B6=84=20=EC=A0=80=EC=9E=A5=20=EA=B5=AC?= =?UTF-8?q?=ED=98=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../service/CrawledNewsService.java | 4 + .../domain/news/service/NewsService.java | 4 + .../dto/TransformRequestDTO.java | 13 ++++ .../dto/TransformedNewsResponseDTO.java | 27 +++++++ .../entity/CrawledNewsTransformed.java | 35 +++++++++ .../entity/NewsTransformed.java | 35 +++++++++ .../CrawledNewsTransformedRepository.java | 8 ++ .../repository/NewsTransformedRepository.java | 8 ++ .../service/CrawledTransformedService.java | 74 ++++++++++++++++++ .../service/TransformedNewsService.java | 76 +++++++++++++++++++ .../words/repository/WordRepository.java | 2 + src/main/resources/application.yml | 6 +- 12 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 src/main/java/dgu/newsee/domain/transformednews/dto/TransformRequestDTO.java create mode 100644 src/main/java/dgu/newsee/domain/transformednews/dto/TransformedNewsResponseDTO.java create mode 100644 src/main/java/dgu/newsee/domain/transformednews/entity/CrawledNewsTransformed.java create mode 100644 src/main/java/dgu/newsee/domain/transformednews/entity/NewsTransformed.java create mode 100644 src/main/java/dgu/newsee/domain/transformednews/repository/CrawledNewsTransformedRepository.java create mode 100644 src/main/java/dgu/newsee/domain/transformednews/repository/NewsTransformedRepository.java create mode 100644 src/main/java/dgu/newsee/domain/transformednews/service/CrawledTransformedService.java create mode 100644 src/main/java/dgu/newsee/domain/transformednews/service/TransformedNewsService.java diff --git a/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java b/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java index d579990..b946e39 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java @@ -4,6 +4,7 @@ import dgu.newsee.domain.crawlednews.repository.CrawledNewsRepository; import dgu.newsee.domain.crawlednews.util.CrawledNewsCrawler; import dgu.newsee.domain.crawlednews.util.CrawledNewsResult; +import dgu.newsee.domain.transformednews.service.CrawledTransformedService; import lombok.RequiredArgsConstructor; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -14,6 +15,7 @@ public class CrawledNewsService { private final CrawledNewsCrawler crawler; private final CrawledNewsRepository repository; + private final CrawledTransformedService crawledTransformedService; @Transactional public void crawlAndSave(String url, String category) { @@ -36,6 +38,8 @@ public void crawlAndSave(String url, String category) { .build(); repository.save(news); System.out.println("크롤링 및 저장 완료: " + normalizedUrl); + crawledTransformedService.requestTransformAndSave(news.getId(), null); // 기본 level은 "중" + } catch (Exception e) { System.err.println("크롤링 실패: " + normalizedUrl + " → " + e.getMessage()); } diff --git a/src/main/java/dgu/newsee/domain/news/service/NewsService.java b/src/main/java/dgu/newsee/domain/news/service/NewsService.java index 8d0e324..6113aba 100644 --- a/src/main/java/dgu/newsee/domain/news/service/NewsService.java +++ b/src/main/java/dgu/newsee/domain/news/service/NewsService.java @@ -5,6 +5,7 @@ import dgu.newsee.domain.news.repository.NewsRepository; import dgu.newsee.domain.news.util.NewsCrawlResult; import dgu.newsee.domain.news.util.NewsCrawler; +import dgu.newsee.domain.transformednews.service.TransformedNewsService; import dgu.newsee.domain.user.entity.User; import dgu.newsee.domain.user.repository.UserRepository; import dgu.newsee.domain.news.entity.SavedNews; @@ -21,6 +22,7 @@ public class NewsService { private final NewsRepository newsRepository; private final UserRepository userRepository; private final SavedNewsRepository savedNewsRepository; + private final TransformedNewsService transformedService; @Transactional public News crawlAndSave(NewsCrawlRequestDTO request, Long userId) { @@ -46,6 +48,8 @@ public News crawlAndSave(NewsCrawlRequestDTO request, Long userId) { .build(); newsRepository.save(news); + transformedService.requestTransformAndSave(news.getId(), null); + // 사용자 조회 User user = userRepository.findById(userId) .orElseThrow(() -> new IllegalArgumentException("사용자를 찾을 수 없습니다.")); diff --git a/src/main/java/dgu/newsee/domain/transformednews/dto/TransformRequestDTO.java b/src/main/java/dgu/newsee/domain/transformednews/dto/TransformRequestDTO.java new file mode 100644 index 0000000..07d8b8a --- /dev/null +++ b/src/main/java/dgu/newsee/domain/transformednews/dto/TransformRequestDTO.java @@ -0,0 +1,13 @@ +package dgu.newsee.domain.transformednews.dto; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +@Getter +@AllArgsConstructor +public class TransformRequestDTO { + private Long newsId; + private String title; + private String originalContent; + private String level; // default는 "중" +} diff --git a/src/main/java/dgu/newsee/domain/transformednews/dto/TransformedNewsResponseDTO.java b/src/main/java/dgu/newsee/domain/transformednews/dto/TransformedNewsResponseDTO.java new file mode 100644 index 0000000..25ec3ca --- /dev/null +++ b/src/main/java/dgu/newsee/domain/transformednews/dto/TransformedNewsResponseDTO.java @@ -0,0 +1,27 @@ +package dgu.newsee.domain.transformednews.dto; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; + +import java.util.List; + +@Getter +@Builder +@AllArgsConstructor +public class TransformedNewsResponseDTO { + private Long newsId; + private String level; + private String title; + private String transformedContent; + private String summarized; + private List difficultWords; + + @Getter + @Builder + @AllArgsConstructor + public static class DifficultWordDTO { + private String term; + private String description; + } +} diff --git a/src/main/java/dgu/newsee/domain/transformednews/entity/CrawledNewsTransformed.java b/src/main/java/dgu/newsee/domain/transformednews/entity/CrawledNewsTransformed.java new file mode 100644 index 0000000..7e1aed7 --- /dev/null +++ b/src/main/java/dgu/newsee/domain/transformednews/entity/CrawledNewsTransformed.java @@ -0,0 +1,35 @@ +package dgu.newsee.domain.transformednews.entity; + +import dgu.newsee.domain.crawlednews.entity.CrawledNews; +import dgu.newsee.domain.words.entity.Word; +import jakarta.persistence.*; +import lombok.*; + +import java.util.List; + +@Entity +@Getter +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor +@Builder +public class CrawledNewsTransformed { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + private String level; + + @Column(length = 5000) + private String transformedContent; + + @Column(length = 1000) + private String summarized; + + @OneToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "crawled_news_id") + private CrawledNews crawledNews; + + @OneToMany(mappedBy = "crawledNewsTransformed", cascade = CascadeType.ALL, orphanRemoval = true) + private List difficultWords; +} diff --git a/src/main/java/dgu/newsee/domain/transformednews/entity/NewsTransformed.java b/src/main/java/dgu/newsee/domain/transformednews/entity/NewsTransformed.java new file mode 100644 index 0000000..56a90ba --- /dev/null +++ b/src/main/java/dgu/newsee/domain/transformednews/entity/NewsTransformed.java @@ -0,0 +1,35 @@ +package dgu.newsee.domain.transformednews.entity; + +import dgu.newsee.domain.news.entity.News; +import dgu.newsee.domain.words.entity.Word; +import jakarta.persistence.*; +import lombok.*; + +import java.util.List; + +@Entity +@Getter +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor +@Builder +public class NewsTransformed { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + private String level; + + @Column(length = 5000) + private String transformedContent; + + @Column(length = 1000) + private String summarized; + + @OneToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "news_id") + private News news; + + @OneToMany(mappedBy = "newsTransformed", cascade = CascadeType.ALL, orphanRemoval = true) + private List difficultWords; +} diff --git a/src/main/java/dgu/newsee/domain/transformednews/repository/CrawledNewsTransformedRepository.java b/src/main/java/dgu/newsee/domain/transformednews/repository/CrawledNewsTransformedRepository.java new file mode 100644 index 0000000..48b79b2 --- /dev/null +++ b/src/main/java/dgu/newsee/domain/transformednews/repository/CrawledNewsTransformedRepository.java @@ -0,0 +1,8 @@ +package dgu.newsee.domain.transformednews.repository; + +import dgu.newsee.domain.transformednews.entity.CrawledNewsTransformed; +import org.springframework.data.jpa.repository.JpaRepository; + +public interface CrawledNewsTransformedRepository extends JpaRepository { + boolean existsByCrawledNewsId(Long crawledNewsId); +} diff --git a/src/main/java/dgu/newsee/domain/transformednews/repository/NewsTransformedRepository.java b/src/main/java/dgu/newsee/domain/transformednews/repository/NewsTransformedRepository.java new file mode 100644 index 0000000..b8c9f9a --- /dev/null +++ b/src/main/java/dgu/newsee/domain/transformednews/repository/NewsTransformedRepository.java @@ -0,0 +1,8 @@ +package dgu.newsee.domain.transformednews.repository; + +import dgu.newsee.domain.transformednews.entity.NewsTransformed; +import org.springframework.data.jpa.repository.JpaRepository; + +public interface NewsTransformedRepository extends JpaRepository { + boolean existsByNewsId(Long newsId); +} diff --git a/src/main/java/dgu/newsee/domain/transformednews/service/CrawledTransformedService.java b/src/main/java/dgu/newsee/domain/transformednews/service/CrawledTransformedService.java new file mode 100644 index 0000000..398882e --- /dev/null +++ b/src/main/java/dgu/newsee/domain/transformednews/service/CrawledTransformedService.java @@ -0,0 +1,74 @@ +package dgu.newsee.domain.transformednews.service; + +import com.fasterxml.jackson.databind.ObjectMapper; +import dgu.newsee.domain.crawlednews.entity.CrawledNews; +import dgu.newsee.domain.crawlednews.repository.CrawledNewsRepository; +import dgu.newsee.domain.transformednews.dto.TransformRequestDTO; +import dgu.newsee.domain.transformednews.dto.TransformedNewsResponseDTO; +import dgu.newsee.domain.transformednews.entity.CrawledNewsTransformed; +import dgu.newsee.domain.transformednews.repository.CrawledNewsTransformedRepository; +import dgu.newsee.domain.words.entity.Word; +import dgu.newsee.domain.words.repository.WordRepository; +import lombok.RequiredArgsConstructor; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.http.*; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.web.client.RestTemplate; + +@Service +@RequiredArgsConstructor +public class CrawledTransformedService { + + private final CrawledNewsRepository crawledNewsRepository; + private final CrawledNewsTransformedRepository crawledTransformedRepository; + private final WordRepository wordRepository; + + private final RestTemplate restTemplate = new RestTemplate(); + + @Value("${external.ai.url}") + private String aiServerUrl; + + @Transactional + public void requestTransformAndSave(Long crawledNewsId, String level) { + CrawledNews news = crawledNewsRepository.findById(crawledNewsId) + .orElseThrow(() -> new RuntimeException("크롤링 뉴스 없음")); + + TransformRequestDTO request = new TransformRequestDTO( + news.getId(), + news.getTitle(), + news.getContent(), + level == null ? "중" : level + ); + + HttpHeaders headers = new HttpHeaders(); + headers.setContentType(MediaType.APPLICATION_JSON); + HttpEntity entity = new HttpEntity<>(request, headers); + + ResponseEntity response = restTemplate.exchange( + aiServerUrl, HttpMethod.POST, entity, TransformedNewsResponseDTO.class + ); + + TransformedNewsResponseDTO result = response.getBody(); + if (result == null) throw new RuntimeException("AI 응답 없음"); + + CrawledNewsTransformed transformed = CrawledNewsTransformed.builder() + .crawledNews(news) + .level(result.getLevel()) + .transformedContent(result.getTransformedContent()) + .summarized(result.getSummarized()) + .build(); + crawledTransformedRepository.save(transformed); + + for (var wordDTO : result.getDifficultWords()) { + if (!wordRepository.existsByTerm(wordDTO.getTerm())) { + Word word = Word.builder() + .term(wordDTO.getTerm()) + .description(wordDTO.getDescription()) + .category("크롤링 뉴스 변환") + .build(); + wordRepository.save(word); + } + } + } +} diff --git a/src/main/java/dgu/newsee/domain/transformednews/service/TransformedNewsService.java b/src/main/java/dgu/newsee/domain/transformednews/service/TransformedNewsService.java new file mode 100644 index 0000000..3b134a7 --- /dev/null +++ b/src/main/java/dgu/newsee/domain/transformednews/service/TransformedNewsService.java @@ -0,0 +1,76 @@ +package dgu.newsee.domain.transformednews.service; + +import com.fasterxml.jackson.databind.ObjectMapper; +import dgu.newsee.domain.news.entity.News; +import dgu.newsee.domain.news.repository.NewsRepository; +import dgu.newsee.domain.transformednews.dto.TransformRequestDTO; +import dgu.newsee.domain.transformednews.dto.TransformedNewsResponseDTO; +import dgu.newsee.domain.transformednews.entity.NewsTransformed; +import dgu.newsee.domain.transformednews.repository.NewsTransformedRepository; +import dgu.newsee.domain.words.entity.Word; +import dgu.newsee.domain.words.repository.WordRepository; +import lombok.RequiredArgsConstructor; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.web.client.RestTemplate; +import org.springframework.http.*; + +import java.util.List; + +@Service +@RequiredArgsConstructor +public class TransformedNewsService { + + private final NewsRepository newsRepository; + private final NewsTransformedRepository transformedRepository; + private final WordRepository wordRepository; + + private final RestTemplate restTemplate = new RestTemplate(); + + @Value("${external.ai.url}") + private String aiServerUrl; + + @Transactional + public void requestTransformAndSave(Long newsId, String level) { + News news = newsRepository.findById(newsId) + .orElseThrow(() -> new RuntimeException("뉴스 없음")); + + TransformRequestDTO request = new TransformRequestDTO( + newsId, + news.getTitle(), + news.getContent(), + level == null ? "중" : level + ); + + HttpHeaders headers = new HttpHeaders(); + headers.setContentType(MediaType.APPLICATION_JSON); + HttpEntity entity = new HttpEntity<>(request, headers); + + ResponseEntity response = restTemplate.exchange( + aiServerUrl, HttpMethod.POST, entity, TransformedNewsResponseDTO.class + ); + + TransformedNewsResponseDTO result = response.getBody(); + if (result == null) throw new RuntimeException("AI 응답 없음"); + + NewsTransformed transformed = NewsTransformed.builder() + .news(news) + .level(result.getLevel()) + .transformedContent(result.getTransformedContent()) + .summarized(result.getSummarized()) + .build(); + transformedRepository.save(transformed); + + for (var wordDTO : result.getDifficultWords()) { + if (!wordRepository.existsByTerm(wordDTO.getTerm())) { + Word word = Word.builder() + .term(wordDTO.getTerm()) + .description(wordDTO.getDescription()) + .category("뉴스 변환") + .build(); + wordRepository.save(word); + } + } + } +} diff --git a/src/main/java/dgu/newsee/domain/words/repository/WordRepository.java b/src/main/java/dgu/newsee/domain/words/repository/WordRepository.java index 07b60ac..0142783 100644 --- a/src/main/java/dgu/newsee/domain/words/repository/WordRepository.java +++ b/src/main/java/dgu/newsee/domain/words/repository/WordRepository.java @@ -10,4 +10,6 @@ public interface WordRepository extends JpaRepository { List findByTermContainingOrDescriptionContaining(String termKeyword, String descKeyword); + + boolean existsByTerm(String term); } \ No newline at end of file diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 7f428e5..1fe312c 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -23,4 +23,8 @@ spring: pool: size: 3 main: - allow-bean-definition-overriding: true \ No newline at end of file + allow-bean-definition-overriding: true\ + +external: + ai: + url: https://a00e269d857e.ngrok-free.app \ No newline at end of file From 67c8c370c3a8219ba2828120bfb42f3ba636a014 Mon Sep 17 00:00:00 2001 From: YeahOut Date: Sat, 12 Jul 2025 10:33:44 +0900 Subject: [PATCH 2/5] =?UTF-8?q?[Refactor]=20=EC=82=AC=EC=9A=A9=EC=9E=90=20?= =?UTF-8?q?url=20+=20=EC=9E=90=EB=8F=99=ED=99=94=20=ED=86=B5=ED=95=A9=20DB?= =?UTF-8?q?=20=EA=B5=AC=EC=B6=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../crawlednews/entity/CrawledNews.java | 32 ----------------- .../repository/CrawledNewsRepository.java | 8 ----- .../service/CrawledNewsService.java | 14 ++++---- .../news/controller/NewsController.java | 6 ++-- .../domain/news/dto/NewsCrawlResponseDTO.java | 16 ++++----- .../entity/{News.java => NewsOrigin.java} | 5 ++- .../newsee/domain/news/entity/NewsStatus.java | 6 ++++ .../newsee/domain/news/entity/SavedNews.java | 34 ------------------- .../domain/news/entity/SavedNewsId.java | 29 ---------------- .../news/repository/NewsRepository.java | 4 +-- .../news/repository/SavedNewsRepository.java | 11 ------ .../domain/news/service/NewsService.java | 22 ++++-------- 12 files changed, 38 insertions(+), 149 deletions(-) delete mode 100644 src/main/java/dgu/newsee/domain/crawlednews/entity/CrawledNews.java delete mode 100644 src/main/java/dgu/newsee/domain/crawlednews/repository/CrawledNewsRepository.java rename src/main/java/dgu/newsee/domain/news/entity/{News.java => NewsOrigin.java} (77%) create mode 100644 src/main/java/dgu/newsee/domain/news/entity/NewsStatus.java delete mode 100644 src/main/java/dgu/newsee/domain/news/entity/SavedNews.java delete mode 100644 src/main/java/dgu/newsee/domain/news/entity/SavedNewsId.java delete mode 100644 src/main/java/dgu/newsee/domain/news/repository/SavedNewsRepository.java diff --git a/src/main/java/dgu/newsee/domain/crawlednews/entity/CrawledNews.java b/src/main/java/dgu/newsee/domain/crawlednews/entity/CrawledNews.java deleted file mode 100644 index b24c840..0000000 --- a/src/main/java/dgu/newsee/domain/crawlednews/entity/CrawledNews.java +++ /dev/null @@ -1,32 +0,0 @@ -package dgu.newsee.domain.crawlednews.entity; - -import dgu.newsee.global.common.BaseEntity; -import jakarta.persistence.*; -import lombok.*; - -import java.time.LocalDateTime; - -@Entity -@Getter -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor -@Builder -public class CrawledNews extends BaseEntity { - - @Id - @GeneratedValue(strategy = GenerationType.IDENTITY) - private Long id; - - private String title; - - @Lob - private String content; - - private String category; - - private String source; - - private LocalDateTime time; - - private String originalUrl; -} diff --git a/src/main/java/dgu/newsee/domain/crawlednews/repository/CrawledNewsRepository.java b/src/main/java/dgu/newsee/domain/crawlednews/repository/CrawledNewsRepository.java deleted file mode 100644 index 98d931a..0000000 --- a/src/main/java/dgu/newsee/domain/crawlednews/repository/CrawledNewsRepository.java +++ /dev/null @@ -1,8 +0,0 @@ -package dgu.newsee.domain.crawlednews.repository; - -import dgu.newsee.domain.crawlednews.entity.CrawledNews; -import org.springframework.data.jpa.repository.JpaRepository; - -public interface CrawledNewsRepository extends JpaRepository { - boolean existsByOriginalUrl(String url); -} diff --git a/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java b/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java index d579990..16a9c8d 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java @@ -1,7 +1,8 @@ package dgu.newsee.domain.crawlednews.service; -import dgu.newsee.domain.crawlednews.entity.CrawledNews; -import dgu.newsee.domain.crawlednews.repository.CrawledNewsRepository; +import dgu.newsee.domain.news.entity.NewsOrigin; +import dgu.newsee.domain.news.entity.NewsStatus; +import dgu.newsee.domain.news.repository.NewsRepository; import dgu.newsee.domain.crawlednews.util.CrawledNewsCrawler; import dgu.newsee.domain.crawlednews.util.CrawledNewsResult; import lombok.RequiredArgsConstructor; @@ -13,28 +14,29 @@ public class CrawledNewsService { private final CrawledNewsCrawler crawler; - private final CrawledNewsRepository repository; + private final NewsRepository newsRepository; @Transactional public void crawlAndSave(String url, String category) { String normalizedUrl = url.replace("/comment", "").split("\\?")[0]; - if (repository.existsByOriginalUrl(normalizedUrl)) { + if (newsRepository.existsByOriginalUrl(normalizedUrl)) { System.out.println("중복된 뉴스 URL → 저장하지 않음: " + normalizedUrl); return; } try { CrawledNewsResult result = crawler.crawl(normalizedUrl, category); - CrawledNews news = CrawledNews.builder() + NewsOrigin news = NewsOrigin.builder() .title(result.getTitle()) .content(result.getContent()) .category(result.getCategory()) .source(result.getSource()) .time(result.getTime()) .originalUrl(normalizedUrl) + .status(NewsStatus.AUTO_CRAWLED) .build(); - repository.save(news); + newsRepository.save(news); System.out.println("크롤링 및 저장 완료: " + normalizedUrl); } catch (Exception e) { System.err.println("크롤링 실패: " + normalizedUrl + " → " + e.getMessage()); diff --git a/src/main/java/dgu/newsee/domain/news/controller/NewsController.java b/src/main/java/dgu/newsee/domain/news/controller/NewsController.java index 9183c52..a7fec1e 100644 --- a/src/main/java/dgu/newsee/domain/news/controller/NewsController.java +++ b/src/main/java/dgu/newsee/domain/news/controller/NewsController.java @@ -2,7 +2,7 @@ import dgu.newsee.domain.news.dto.NewsCrawlRequestDTO; import dgu.newsee.domain.news.dto.NewsCrawlResponseDTO; -import dgu.newsee.domain.news.entity.News; +import dgu.newsee.domain.news.entity.NewsOrigin; import dgu.newsee.domain.news.service.NewsService; import dgu.newsee.global.payload.ApiResponse; import dgu.newsee.global.payload.ResponseCode; @@ -30,10 +30,10 @@ public ApiResponse crawlNews( try { Long userId = userDetails.getUserId(); - News news = newsService.crawlAndSave(request, userId); + NewsOrigin newsOrigin = newsService.crawlAndSave(request, userId); return ApiResponse.success( - new NewsCrawlResponseDTO(news), + new NewsCrawlResponseDTO(newsOrigin), ResponseCode.COMMON_SUCCESS ); } catch (IllegalArgumentException e) { diff --git a/src/main/java/dgu/newsee/domain/news/dto/NewsCrawlResponseDTO.java b/src/main/java/dgu/newsee/domain/news/dto/NewsCrawlResponseDTO.java index de13496..08c8472 100644 --- a/src/main/java/dgu/newsee/domain/news/dto/NewsCrawlResponseDTO.java +++ b/src/main/java/dgu/newsee/domain/news/dto/NewsCrawlResponseDTO.java @@ -1,6 +1,6 @@ package dgu.newsee.domain.news.dto; -import dgu.newsee.domain.news.entity.News; +import dgu.newsee.domain.news.entity.NewsOrigin; import lombok.Getter; import java.time.LocalDateTime; @@ -14,13 +14,13 @@ public class NewsCrawlResponseDTO { private LocalDateTime time; private Long newsId; - public NewsCrawlResponseDTO(News news) { - this.title = news.getTitle(); - this.content = news.getContent(); - this.category = news.getCategory(); - this.source = news.getSource(); - this.time = news.getTime(); - this.newsId = news.getId(); + public NewsCrawlResponseDTO(NewsOrigin newsOrigin) { + this.title = newsOrigin.getTitle(); + this.content = newsOrigin.getContent(); + this.category = newsOrigin.getCategory(); + this.source = newsOrigin.getSource(); + this.time = newsOrigin.getTime(); + this.newsId = newsOrigin.getId(); } } diff --git a/src/main/java/dgu/newsee/domain/news/entity/News.java b/src/main/java/dgu/newsee/domain/news/entity/NewsOrigin.java similarity index 77% rename from src/main/java/dgu/newsee/domain/news/entity/News.java rename to src/main/java/dgu/newsee/domain/news/entity/NewsOrigin.java index ec40550..6530d43 100644 --- a/src/main/java/dgu/newsee/domain/news/entity/News.java +++ b/src/main/java/dgu/newsee/domain/news/entity/NewsOrigin.java @@ -10,7 +10,7 @@ @NoArgsConstructor(access = AccessLevel.PROTECTED) @AllArgsConstructor @Builder -public class News extends BaseEntity { +public class NewsOrigin extends BaseEntity { @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @@ -28,4 +28,7 @@ public class News extends BaseEntity { private LocalDateTime time; private String originalUrl; + + @Enumerated(EnumType.STRING) // DB에는 USER_INPUT, AUTO_CRAWLED로 저장됨 + private NewsStatus status; } diff --git a/src/main/java/dgu/newsee/domain/news/entity/NewsStatus.java b/src/main/java/dgu/newsee/domain/news/entity/NewsStatus.java new file mode 100644 index 0000000..5310817 --- /dev/null +++ b/src/main/java/dgu/newsee/domain/news/entity/NewsStatus.java @@ -0,0 +1,6 @@ +package dgu.newsee.domain.news.entity; + +public enum NewsStatus { + USER_INPUT, // 0 + AUTO_CRAWLED // 1 +} diff --git a/src/main/java/dgu/newsee/domain/news/entity/SavedNews.java b/src/main/java/dgu/newsee/domain/news/entity/SavedNews.java deleted file mode 100644 index f9ab0bf..0000000 --- a/src/main/java/dgu/newsee/domain/news/entity/SavedNews.java +++ /dev/null @@ -1,34 +0,0 @@ -package dgu.newsee.domain.news.entity; - -import dgu.newsee.domain.news.entity.News; -import dgu.newsee.domain.user.entity.User; -import jakarta.persistence.*; -import lombok.*; - -import java.time.LocalDateTime; - -@Entity -@IdClass(SavedNewsId.class) -@Getter -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class SavedNews { - - @Id - @ManyToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "user_id") - private User user; - - @Id - @ManyToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "news_id") - private News news; - - private LocalDateTime savedAt; - - @PrePersist - protected void onCreate() { - this.savedAt = LocalDateTime.now(); - } -} \ No newline at end of file diff --git a/src/main/java/dgu/newsee/domain/news/entity/SavedNewsId.java b/src/main/java/dgu/newsee/domain/news/entity/SavedNewsId.java deleted file mode 100644 index 1036a79..0000000 --- a/src/main/java/dgu/newsee/domain/news/entity/SavedNewsId.java +++ /dev/null @@ -1,29 +0,0 @@ -package dgu.newsee.domain.news.entity; - -import java.io.Serializable; -import java.util.Objects; - -public class SavedNewsId implements Serializable { - private Long user; - private Long news; - - public SavedNewsId() {} - - public SavedNewsId(Long user, Long news) { - this.user = user; - this.news = news; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - SavedNewsId that = (SavedNewsId) o; - return Objects.equals(user, that.user) && Objects.equals(news, that.news); - } - - @Override - public int hashCode() { - return Objects.hash(user, news); - } -} \ No newline at end of file diff --git a/src/main/java/dgu/newsee/domain/news/repository/NewsRepository.java b/src/main/java/dgu/newsee/domain/news/repository/NewsRepository.java index 4b99a91..182652d 100644 --- a/src/main/java/dgu/newsee/domain/news/repository/NewsRepository.java +++ b/src/main/java/dgu/newsee/domain/news/repository/NewsRepository.java @@ -1,8 +1,8 @@ package dgu.newsee.domain.news.repository; -import dgu.newsee.domain.news.entity.News; +import dgu.newsee.domain.news.entity.NewsOrigin; import org.springframework.data.jpa.repository.JpaRepository; -public interface NewsRepository extends JpaRepository { +public interface NewsRepository extends JpaRepository { boolean existsByOriginalUrl(String url); } diff --git a/src/main/java/dgu/newsee/domain/news/repository/SavedNewsRepository.java b/src/main/java/dgu/newsee/domain/news/repository/SavedNewsRepository.java deleted file mode 100644 index 9023c93..0000000 --- a/src/main/java/dgu/newsee/domain/news/repository/SavedNewsRepository.java +++ /dev/null @@ -1,11 +0,0 @@ -package dgu.newsee.domain.news.repository; - -import dgu.newsee.domain.news.entity.SavedNews; -import dgu.newsee.domain.news.entity.SavedNewsId; -import dgu.newsee.domain.user.entity.User; -import dgu.newsee.domain.news.entity.News; -import org.springframework.data.jpa.repository.JpaRepository; - -public interface SavedNewsRepository extends JpaRepository { - boolean existsByUserAndNews(User user, News news); -} diff --git a/src/main/java/dgu/newsee/domain/news/service/NewsService.java b/src/main/java/dgu/newsee/domain/news/service/NewsService.java index 8d0e324..977c13c 100644 --- a/src/main/java/dgu/newsee/domain/news/service/NewsService.java +++ b/src/main/java/dgu/newsee/domain/news/service/NewsService.java @@ -1,14 +1,13 @@ package dgu.newsee.domain.news.service; import dgu.newsee.domain.news.dto.NewsCrawlRequestDTO; -import dgu.newsee.domain.news.entity.News; +import dgu.newsee.domain.news.entity.NewsOrigin; +import dgu.newsee.domain.news.entity.NewsStatus; import dgu.newsee.domain.news.repository.NewsRepository; import dgu.newsee.domain.news.util.NewsCrawlResult; import dgu.newsee.domain.news.util.NewsCrawler; import dgu.newsee.domain.user.entity.User; import dgu.newsee.domain.user.repository.UserRepository; -import dgu.newsee.domain.news.entity.SavedNews; -import dgu.newsee.domain.news.repository.SavedNewsRepository; import lombok.RequiredArgsConstructor; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -20,10 +19,9 @@ public class NewsService { private final NewsCrawler crawler; private final NewsRepository newsRepository; private final UserRepository userRepository; - private final SavedNewsRepository savedNewsRepository; @Transactional - public News crawlAndSave(NewsCrawlRequestDTO request, Long userId) { + public NewsOrigin crawlAndSave(NewsCrawlRequestDTO request, Long userId) { String url = request.getUrl(); // 중복 저장 방지 @@ -36,28 +34,22 @@ public News crawlAndSave(NewsCrawlRequestDTO request, Long userId) { NewsCrawlResult result = crawler.crawl(url); // News 객체 저장 - News news = News.builder() + NewsOrigin newsOrigin = NewsOrigin.builder() .title(result.getTitle()) .content(result.getContent()) .category(result.getCategory()) .source(result.getSource()) .time(result.getTime()) .originalUrl(url) + .status(NewsStatus.USER_INPUT) .build(); - newsRepository.save(news); + newsRepository.save(newsOrigin); // 사용자 조회 User user = userRepository.findById(userId) .orElseThrow(() -> new IllegalArgumentException("사용자를 찾을 수 없습니다.")); - // 사용자와 뉴스 연결 (SavedNews 테이블에 저장) - SavedNews savedNews = SavedNews.builder() - .user(user) - .news(news) - .build(); - savedNewsRepository.save(savedNews); - - return news; + return newsOrigin; } catch (Exception e) { throw new RuntimeException("크롤링 실패: " + e.getMessage()); From b960024963200e0e1838ac6576933269ef84fd27 Mon Sep 17 00:00:00 2001 From: YeahOut Date: Sat, 12 Jul 2025 10:44:37 +0900 Subject: [PATCH 3/5] =?UTF-8?q?[Refactor]=20#41=20-=20URL=20Parser=20?= =?UTF-8?q?=EB=A1=9C=EC=A7=81=20=ED=86=B5=ED=95=A9=20Util=20=EC=83=9D?= =?UTF-8?q?=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../controller/NewsController.java | 10 +-- .../dto/NewsCrawlRequestDTO.java | 2 +- .../dto/NewsCrawlResponseDTO.java | 4 +- .../entity/NewsOrigin.java | 2 +- .../entity/NewsStatus.java | 2 +- .../repository/NewsRepository.java | 4 +- .../service/CrawledNewsService.java | 10 +-- .../service/NewsService.java | 18 +++--- .../crawlednews/util/CrawledNewsCrawler.java | 22 +------ .../domain/crawlednews/util/NewsCrawler.java | 16 +++++ .../crawlednews/util/NewsParserUtil.java | 61 +++++++++++++++++++ ...CrawledNewsResult.java => ParsedNews.java} | 2 +- .../domain/news/util/NewsCrawlResult.java | 17 ------ .../newsee/domain/news/util/NewsCrawler.java | 44 ------------- 14 files changed, 106 insertions(+), 108 deletions(-) rename src/main/java/dgu/newsee/domain/{news => crawlednews}/controller/NewsController.java (82%) rename src/main/java/dgu/newsee/domain/{news => crawlednews}/dto/NewsCrawlRequestDTO.java (68%) rename src/main/java/dgu/newsee/domain/{news => crawlednews}/dto/NewsCrawlResponseDTO.java (85%) rename src/main/java/dgu/newsee/domain/{news => crawlednews}/entity/NewsOrigin.java (93%) rename src/main/java/dgu/newsee/domain/{news => crawlednews}/entity/NewsStatus.java (62%) rename src/main/java/dgu/newsee/domain/{news => crawlednews}/repository/NewsRepository.java (63%) rename src/main/java/dgu/newsee/domain/{news => crawlednews}/service/NewsService.java (79%) create mode 100644 src/main/java/dgu/newsee/domain/crawlednews/util/NewsCrawler.java create mode 100644 src/main/java/dgu/newsee/domain/crawlednews/util/NewsParserUtil.java rename src/main/java/dgu/newsee/domain/crawlednews/util/{CrawledNewsResult.java => ParsedNews.java} (90%) delete mode 100644 src/main/java/dgu/newsee/domain/news/util/NewsCrawlResult.java delete mode 100644 src/main/java/dgu/newsee/domain/news/util/NewsCrawler.java diff --git a/src/main/java/dgu/newsee/domain/news/controller/NewsController.java b/src/main/java/dgu/newsee/domain/crawlednews/controller/NewsController.java similarity index 82% rename from src/main/java/dgu/newsee/domain/news/controller/NewsController.java rename to src/main/java/dgu/newsee/domain/crawlednews/controller/NewsController.java index a7fec1e..af14b12 100644 --- a/src/main/java/dgu/newsee/domain/news/controller/NewsController.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/controller/NewsController.java @@ -1,9 +1,9 @@ -package dgu.newsee.domain.news.controller; +package dgu.newsee.domain.crawlednews.controller; -import dgu.newsee.domain.news.dto.NewsCrawlRequestDTO; -import dgu.newsee.domain.news.dto.NewsCrawlResponseDTO; -import dgu.newsee.domain.news.entity.NewsOrigin; -import dgu.newsee.domain.news.service.NewsService; +import dgu.newsee.domain.crawlednews.dto.NewsCrawlRequestDTO; +import dgu.newsee.domain.crawlednews.dto.NewsCrawlResponseDTO; +import dgu.newsee.domain.crawlednews.entity.NewsOrigin; +import dgu.newsee.domain.crawlednews.service.NewsService; import dgu.newsee.global.payload.ApiResponse; import dgu.newsee.global.payload.ResponseCode; import dgu.newsee.global.security.CustomUserDetails; diff --git a/src/main/java/dgu/newsee/domain/news/dto/NewsCrawlRequestDTO.java b/src/main/java/dgu/newsee/domain/crawlednews/dto/NewsCrawlRequestDTO.java similarity index 68% rename from src/main/java/dgu/newsee/domain/news/dto/NewsCrawlRequestDTO.java rename to src/main/java/dgu/newsee/domain/crawlednews/dto/NewsCrawlRequestDTO.java index a57a56d..9d15d65 100644 --- a/src/main/java/dgu/newsee/domain/news/dto/NewsCrawlRequestDTO.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/dto/NewsCrawlRequestDTO.java @@ -1,4 +1,4 @@ -package dgu.newsee.domain.news.dto; +package dgu.newsee.domain.crawlednews.dto; import lombok.Getter; diff --git a/src/main/java/dgu/newsee/domain/news/dto/NewsCrawlResponseDTO.java b/src/main/java/dgu/newsee/domain/crawlednews/dto/NewsCrawlResponseDTO.java similarity index 85% rename from src/main/java/dgu/newsee/domain/news/dto/NewsCrawlResponseDTO.java rename to src/main/java/dgu/newsee/domain/crawlednews/dto/NewsCrawlResponseDTO.java index 08c8472..08f29a9 100644 --- a/src/main/java/dgu/newsee/domain/news/dto/NewsCrawlResponseDTO.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/dto/NewsCrawlResponseDTO.java @@ -1,6 +1,6 @@ -package dgu.newsee.domain.news.dto; +package dgu.newsee.domain.crawlednews.dto; -import dgu.newsee.domain.news.entity.NewsOrigin; +import dgu.newsee.domain.crawlednews.entity.NewsOrigin; import lombok.Getter; import java.time.LocalDateTime; diff --git a/src/main/java/dgu/newsee/domain/news/entity/NewsOrigin.java b/src/main/java/dgu/newsee/domain/crawlednews/entity/NewsOrigin.java similarity index 93% rename from src/main/java/dgu/newsee/domain/news/entity/NewsOrigin.java rename to src/main/java/dgu/newsee/domain/crawlednews/entity/NewsOrigin.java index 6530d43..ecd5d7d 100644 --- a/src/main/java/dgu/newsee/domain/news/entity/NewsOrigin.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/entity/NewsOrigin.java @@ -1,4 +1,4 @@ -package dgu.newsee.domain.news.entity; +package dgu.newsee.domain.crawlednews.entity; import dgu.newsee.global.common.BaseEntity; import jakarta.persistence.*; import lombok.*; diff --git a/src/main/java/dgu/newsee/domain/news/entity/NewsStatus.java b/src/main/java/dgu/newsee/domain/crawlednews/entity/NewsStatus.java similarity index 62% rename from src/main/java/dgu/newsee/domain/news/entity/NewsStatus.java rename to src/main/java/dgu/newsee/domain/crawlednews/entity/NewsStatus.java index 5310817..365d708 100644 --- a/src/main/java/dgu/newsee/domain/news/entity/NewsStatus.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/entity/NewsStatus.java @@ -1,4 +1,4 @@ -package dgu.newsee.domain.news.entity; +package dgu.newsee.domain.crawlednews.entity; public enum NewsStatus { USER_INPUT, // 0 diff --git a/src/main/java/dgu/newsee/domain/news/repository/NewsRepository.java b/src/main/java/dgu/newsee/domain/crawlednews/repository/NewsRepository.java similarity index 63% rename from src/main/java/dgu/newsee/domain/news/repository/NewsRepository.java rename to src/main/java/dgu/newsee/domain/crawlednews/repository/NewsRepository.java index 182652d..04ed6ae 100644 --- a/src/main/java/dgu/newsee/domain/news/repository/NewsRepository.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/repository/NewsRepository.java @@ -1,6 +1,6 @@ -package dgu.newsee.domain.news.repository; +package dgu.newsee.domain.crawlednews.repository; -import dgu.newsee.domain.news.entity.NewsOrigin; +import dgu.newsee.domain.crawlednews.entity.NewsOrigin; import org.springframework.data.jpa.repository.JpaRepository; public interface NewsRepository extends JpaRepository { diff --git a/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java b/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java index 16a9c8d..19514f7 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java @@ -1,10 +1,10 @@ package dgu.newsee.domain.crawlednews.service; -import dgu.newsee.domain.news.entity.NewsOrigin; -import dgu.newsee.domain.news.entity.NewsStatus; -import dgu.newsee.domain.news.repository.NewsRepository; +import dgu.newsee.domain.crawlednews.entity.NewsOrigin; +import dgu.newsee.domain.crawlednews.entity.NewsStatus; +import dgu.newsee.domain.crawlednews.repository.NewsRepository; import dgu.newsee.domain.crawlednews.util.CrawledNewsCrawler; -import dgu.newsee.domain.crawlednews.util.CrawledNewsResult; +import dgu.newsee.domain.crawlednews.util.ParsedNews; import lombok.RequiredArgsConstructor; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -26,7 +26,7 @@ public void crawlAndSave(String url, String category) { } try { - CrawledNewsResult result = crawler.crawl(normalizedUrl, category); + ParsedNews result = crawler.crawl(normalizedUrl, category); NewsOrigin news = NewsOrigin.builder() .title(result.getTitle()) .content(result.getContent()) diff --git a/src/main/java/dgu/newsee/domain/news/service/NewsService.java b/src/main/java/dgu/newsee/domain/crawlednews/service/NewsService.java similarity index 79% rename from src/main/java/dgu/newsee/domain/news/service/NewsService.java rename to src/main/java/dgu/newsee/domain/crawlednews/service/NewsService.java index 977c13c..c7ab690 100644 --- a/src/main/java/dgu/newsee/domain/news/service/NewsService.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/service/NewsService.java @@ -1,11 +1,11 @@ -package dgu.newsee.domain.news.service; - -import dgu.newsee.domain.news.dto.NewsCrawlRequestDTO; -import dgu.newsee.domain.news.entity.NewsOrigin; -import dgu.newsee.domain.news.entity.NewsStatus; -import dgu.newsee.domain.news.repository.NewsRepository; -import dgu.newsee.domain.news.util.NewsCrawlResult; -import dgu.newsee.domain.news.util.NewsCrawler; +package dgu.newsee.domain.crawlednews.service; + +import dgu.newsee.domain.crawlednews.dto.NewsCrawlRequestDTO; +import dgu.newsee.domain.crawlednews.entity.NewsOrigin; +import dgu.newsee.domain.crawlednews.entity.NewsStatus; +import dgu.newsee.domain.crawlednews.repository.NewsRepository; +import dgu.newsee.domain.crawlednews.util.NewsCrawler; +import dgu.newsee.domain.crawlednews.util.ParsedNews; import dgu.newsee.domain.user.entity.User; import dgu.newsee.domain.user.repository.UserRepository; import lombok.RequiredArgsConstructor; @@ -31,7 +31,7 @@ public NewsOrigin crawlAndSave(NewsCrawlRequestDTO request, Long userId) { try { // 뉴스 크롤링 - NewsCrawlResult result = crawler.crawl(url); + ParsedNews result = crawler.crawl(url); // News 객체 저장 NewsOrigin newsOrigin = NewsOrigin.builder() diff --git a/src/main/java/dgu/newsee/domain/crawlednews/util/CrawledNewsCrawler.java b/src/main/java/dgu/newsee/domain/crawlednews/util/CrawledNewsCrawler.java index 7023936..e81f6c1 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/util/CrawledNewsCrawler.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/util/CrawledNewsCrawler.java @@ -5,30 +5,12 @@ import org.springframework.stereotype.Component; import java.io.IOException; -import java.time.LocalDateTime; -import java.time.format.DateTimeFormatter; @Component public class CrawledNewsCrawler { - public CrawledNewsResult crawl(String url, String category) throws IOException { + public ParsedNews crawl(String url, String category) throws IOException { Document doc = Jsoup.connect(url).get(); - - String title = doc.select("meta[property=og:title]").attr("content"); - String content = doc.select("#dic_area").text(); - String source = doc.select("meta[property=og:article:author]").attr("content"); - if (source.isBlank()) { - source = doc.select("meta[property=og:site_name]").attr("content"); - } - - String rawTime = doc.select("meta[property=og:article:published_time]").attr("content"); - LocalDateTime time; - try { - time = LocalDateTime.parse(rawTime, DateTimeFormatter.ISO_OFFSET_DATE_TIME); - } catch (Exception e) { - time = LocalDateTime.now(); - } - - return new CrawledNewsResult(title, content, category, source, time, url); + return NewsParserUtil.parse(doc, category, url); // 카테고리는 호출하는 쪽에서 지정 } } diff --git a/src/main/java/dgu/newsee/domain/crawlednews/util/NewsCrawler.java b/src/main/java/dgu/newsee/domain/crawlednews/util/NewsCrawler.java new file mode 100644 index 0000000..2c429a4 --- /dev/null +++ b/src/main/java/dgu/newsee/domain/crawlednews/util/NewsCrawler.java @@ -0,0 +1,16 @@ +package dgu.newsee.domain.crawlednews.util; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.springframework.stereotype.Component; + +import java.io.IOException; + +@Component +public class NewsCrawler { + + public ParsedNews crawl(String url) throws IOException { + Document doc = Jsoup.connect(url).get(); + return NewsParserUtil.parse(doc, null, url); + } +} diff --git a/src/main/java/dgu/newsee/domain/crawlednews/util/NewsParserUtil.java b/src/main/java/dgu/newsee/domain/crawlednews/util/NewsParserUtil.java new file mode 100644 index 0000000..57f965c --- /dev/null +++ b/src/main/java/dgu/newsee/domain/crawlednews/util/NewsParserUtil.java @@ -0,0 +1,61 @@ +package dgu.newsee.domain.crawlednews.util; + +import org.jsoup.nodes.Document; + +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; + +public class NewsParserUtil { + + public static ParsedNews parse(Document doc, String categoryFromCaller, String url) { + // 제목 + String title = doc.select("meta[property=og:title]").attr("content"); + + // 본문 + String content = doc.select("#dic_area").text(); + + // 출처 + String source = doc.select("meta[property=og:article:author]").attr("content"); + if (source.isBlank()) { + source = doc.select("meta[property=og:site_name]").attr("content"); + } + + // 시간 + String rawTime = doc.select("meta[property=og:article:published_time]").attr("content"); + LocalDateTime time; + try { + time = LocalDateTime.parse(rawTime, DateTimeFormatter.ISO_OFFSET_DATE_TIME); + } catch (Exception e) { + time = LocalDateTime.now(); + } + + // 카테고리 유추 + String category = categoryFromCaller; + if (category == null || category.isBlank()) { + category = extractCategoryFromUrl(url); + } + + return new ParsedNews(title, content, category, source, time, url); + } + + private static String extractCategoryFromUrl(String url) { + try { + int sidIndex = url.indexOf("sid="); + if (sidIndex != -1) { + String sid = url.substring(sidIndex + 4, sidIndex + 7); + return switch (sid) { + case "100" -> "정치"; + case "101" -> "경제"; + case "102" -> "사회"; + case "103" -> "생활/문화"; + case "104" -> "세계"; + case "105" -> "IT/과학"; + default -> "기타"; + }; + } + } catch (Exception e) { + // 무시하고 "기타"로 처리 + } + return "기타"; + } +} diff --git a/src/main/java/dgu/newsee/domain/crawlednews/util/CrawledNewsResult.java b/src/main/java/dgu/newsee/domain/crawlednews/util/ParsedNews.java similarity index 90% rename from src/main/java/dgu/newsee/domain/crawlednews/util/CrawledNewsResult.java rename to src/main/java/dgu/newsee/domain/crawlednews/util/ParsedNews.java index 51fee87..041602f 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/util/CrawledNewsResult.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/util/ParsedNews.java @@ -7,7 +7,7 @@ @Getter @AllArgsConstructor -public class CrawledNewsResult { +public class ParsedNews { private String title; private String content; private String category; diff --git a/src/main/java/dgu/newsee/domain/news/util/NewsCrawlResult.java b/src/main/java/dgu/newsee/domain/news/util/NewsCrawlResult.java deleted file mode 100644 index 166c37f..0000000 --- a/src/main/java/dgu/newsee/domain/news/util/NewsCrawlResult.java +++ /dev/null @@ -1,17 +0,0 @@ -package dgu.newsee.domain.news.util; - -import lombok.AllArgsConstructor; -import lombok.Getter; - -import java.time.LocalDateTime; - -@Getter -@AllArgsConstructor -public class NewsCrawlResult { - private String title; - private String content; - private String category; - private String source; - private LocalDateTime time; -} - diff --git a/src/main/java/dgu/newsee/domain/news/util/NewsCrawler.java b/src/main/java/dgu/newsee/domain/news/util/NewsCrawler.java deleted file mode 100644 index 53d5513..0000000 --- a/src/main/java/dgu/newsee/domain/news/util/NewsCrawler.java +++ /dev/null @@ -1,44 +0,0 @@ -package dgu.newsee.domain.news.util; - -import dgu.newsee.domain.news.util.NewsCrawlResult; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.springframework.stereotype.Component; - -import java.io.IOException; -import java.time.LocalDateTime; -import java.time.format.DateTimeFormatter; - -@Component -public class NewsCrawler { - - public NewsCrawlResult crawl(String url) throws IOException { - Document doc = Jsoup.connect(url).get(); - - // 제목 - String title = doc.select("meta[property=og:title]").attr("content"); - - // 본문 - String content = doc.select("#dic_area").text(); - - // 카테고리 추출 (네이버는 명확하게 드러나진 않음 → default로 지정) - String category = "기타"; - - // 출처 (언론사 이름) - String source = doc.select("meta[property=og:article:author]").attr("content"); - if (source.isBlank()) { - source = doc.select("meta[property=og:site_name]").attr("content"); // fallback - } - - // 작성 시간 (문자열 → LocalDateTime 파싱) - String rawTime = doc.select("meta[property=og:article:published_time]").attr("content"); - LocalDateTime time; - try { - time = LocalDateTime.parse(rawTime, DateTimeFormatter.ISO_OFFSET_DATE_TIME); - } catch (Exception e) { - time = LocalDateTime.now(); // fallback - } - - return new NewsCrawlResult(title, content, category, source, time); - } -} From 74071c20b83229f2b7f3fb830f363ac29af71b19 Mon Sep 17 00:00:00 2001 From: YeahOut Date: Sat, 12 Jul 2025 11:32:29 +0900 Subject: [PATCH 4/5] =?UTF-8?q?[Feature]=20#30=20-=20=EB=89=B4=EC=8A=A4=20?= =?UTF-8?q?=EB=82=9C=EC=9D=B4=EB=8F=84=EB=B3=84=20=EB=B3=80=ED=99=98=20API?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../service/CrawledNewsService.java | 12 ++- .../crawlednews/service/NewsService.java | 5 +- .../entity/CrawledNewsTransformed.java | 35 -------- .../entity/NewsTransformed.java | 18 ++-- .../entity/TransformLevel.java | 5 ++ .../CrawledNewsTransformedRepository.java | 8 -- .../service/CrawledTransformedService.java | 74 ---------------- .../service/TransformedNewsService.java | 87 ++++++++++--------- 8 files changed, 73 insertions(+), 171 deletions(-) delete mode 100644 src/main/java/dgu/newsee/domain/transformednews/entity/CrawledNewsTransformed.java create mode 100644 src/main/java/dgu/newsee/domain/transformednews/entity/TransformLevel.java delete mode 100644 src/main/java/dgu/newsee/domain/transformednews/repository/CrawledNewsTransformedRepository.java delete mode 100644 src/main/java/dgu/newsee/domain/transformednews/service/CrawledTransformedService.java diff --git a/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java b/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java index 42293c0..1e80613 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java @@ -5,19 +5,21 @@ import dgu.newsee.domain.crawlednews.repository.NewsRepository; import dgu.newsee.domain.crawlednews.util.CrawledNewsCrawler; import dgu.newsee.domain.crawlednews.util.ParsedNews; -import dgu.newsee.domain.transformednews.service.CrawledTransformedService; +import dgu.newsee.domain.transformednews.service.TransformedNewsService; import lombok.RequiredArgsConstructor; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; +import java.util.List; + @Service @RequiredArgsConstructor public class CrawledNewsService { private final CrawledNewsCrawler crawler; private final NewsRepository newsRepository; - private final NewsRepository repository; - private final CrawledTransformedService crawledTransformedService; + private final TransformedNewsService transformedNewsService; + @Transactional public void crawlAndSave(String url, String category) { @@ -30,6 +32,7 @@ public void crawlAndSave(String url, String category) { try { ParsedNews result = crawler.crawl(normalizedUrl, category); + NewsOrigin news = NewsOrigin.builder() .title(result.getTitle()) .content(result.getContent()) @@ -41,7 +44,8 @@ public void crawlAndSave(String url, String category) { .build(); newsRepository.save(news); System.out.println("크롤링 및 저장 완료: " + normalizedUrl); - crawledTransformedService.requestTransformAndSave(news.getId(), null); // 기본 level은 "중" + + transformedNewsService.requestTransformAndSaveAllLevels(news.getId(), NewsStatus.AUTO_CRAWLED); } catch (Exception e) { System.err.println("크롤링 실패: " + normalizedUrl + " → " + e.getMessage()); diff --git a/src/main/java/dgu/newsee/domain/crawlednews/service/NewsService.java b/src/main/java/dgu/newsee/domain/crawlednews/service/NewsService.java index 067e525..7252383 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/service/NewsService.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/service/NewsService.java @@ -47,7 +47,10 @@ public NewsOrigin crawlAndSave(NewsCrawlRequestDTO request, Long userId) { .build(); newsRepository.save(newsOrigin); - transformedService.requestTransformAndSave(newsOrigin.getId(), null); + transformedService.requestTransformAndSaveAllLevels( + newsOrigin.getId(), + NewsStatus.USER_INPUT + ); // 사용자 조회 User user = userRepository.findById(userId) diff --git a/src/main/java/dgu/newsee/domain/transformednews/entity/CrawledNewsTransformed.java b/src/main/java/dgu/newsee/domain/transformednews/entity/CrawledNewsTransformed.java deleted file mode 100644 index 4be5ff6..0000000 --- a/src/main/java/dgu/newsee/domain/transformednews/entity/CrawledNewsTransformed.java +++ /dev/null @@ -1,35 +0,0 @@ -package dgu.newsee.domain.transformednews.entity; - -import dgu.newsee.domain.crawlednews.entity.NewsOrigin; -import dgu.newsee.domain.words.entity.Word; -import jakarta.persistence.*; -import lombok.*; - -import java.util.List; - -@Entity -@Getter -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor -@Builder -public class CrawledNewsTransformed { - - @Id - @GeneratedValue(strategy = GenerationType.IDENTITY) - private Long id; - - private String level; - - @Column(length = 5000) - private String transformedContent; - - @Column(length = 1000) - private String summarized; - - @OneToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "crawled_news_id") - private NewsOrigin crawledNews; - - @OneToMany(mappedBy = "crawledNewsTransformed", cascade = CascadeType.ALL, orphanRemoval = true) - private List difficultWords; -} diff --git a/src/main/java/dgu/newsee/domain/transformednews/entity/NewsTransformed.java b/src/main/java/dgu/newsee/domain/transformednews/entity/NewsTransformed.java index 8d5cc2e..22fb09d 100644 --- a/src/main/java/dgu/newsee/domain/transformednews/entity/NewsTransformed.java +++ b/src/main/java/dgu/newsee/domain/transformednews/entity/NewsTransformed.java @@ -1,12 +1,9 @@ package dgu.newsee.domain.transformednews.entity; import dgu.newsee.domain.crawlednews.entity.NewsOrigin; -import dgu.newsee.domain.words.entity.Word; import jakarta.persistence.*; import lombok.*; -import java.util.List; - @Entity @Getter @NoArgsConstructor(access = AccessLevel.PROTECTED) @@ -18,18 +15,19 @@ public class NewsTransformed { @GeneratedValue(strategy = GenerationType.IDENTITY) private Long id; - private String level; + @Enumerated(EnumType.STRING) + private dgu.newsee.domain.crawlednews.entity.NewsStatus status; // 사용자/시스템 구분 + + @Enumerated(EnumType.STRING) + private TransformLevel level; - @Column(length = 5000) + + @Lob private String transformedContent; - @Column(length = 1000) private String summarized; @OneToOne(fetch = FetchType.LAZY) @JoinColumn(name = "news_id") - private NewsOrigin news; - - @OneToMany(mappedBy = "newsTransformed", cascade = CascadeType.ALL, orphanRemoval = true) - private List difficultWords; + private NewsOrigin news; // } diff --git a/src/main/java/dgu/newsee/domain/transformednews/entity/TransformLevel.java b/src/main/java/dgu/newsee/domain/transformednews/entity/TransformLevel.java new file mode 100644 index 0000000..899a68f --- /dev/null +++ b/src/main/java/dgu/newsee/domain/transformednews/entity/TransformLevel.java @@ -0,0 +1,5 @@ +package dgu.newsee.domain.transformednews.entity; + +public enum TransformLevel { + 상, 중, 하 +} diff --git a/src/main/java/dgu/newsee/domain/transformednews/repository/CrawledNewsTransformedRepository.java b/src/main/java/dgu/newsee/domain/transformednews/repository/CrawledNewsTransformedRepository.java deleted file mode 100644 index 48b79b2..0000000 --- a/src/main/java/dgu/newsee/domain/transformednews/repository/CrawledNewsTransformedRepository.java +++ /dev/null @@ -1,8 +0,0 @@ -package dgu.newsee.domain.transformednews.repository; - -import dgu.newsee.domain.transformednews.entity.CrawledNewsTransformed; -import org.springframework.data.jpa.repository.JpaRepository; - -public interface CrawledNewsTransformedRepository extends JpaRepository { - boolean existsByCrawledNewsId(Long crawledNewsId); -} diff --git a/src/main/java/dgu/newsee/domain/transformednews/service/CrawledTransformedService.java b/src/main/java/dgu/newsee/domain/transformednews/service/CrawledTransformedService.java deleted file mode 100644 index 398882e..0000000 --- a/src/main/java/dgu/newsee/domain/transformednews/service/CrawledTransformedService.java +++ /dev/null @@ -1,74 +0,0 @@ -package dgu.newsee.domain.transformednews.service; - -import com.fasterxml.jackson.databind.ObjectMapper; -import dgu.newsee.domain.crawlednews.entity.CrawledNews; -import dgu.newsee.domain.crawlednews.repository.CrawledNewsRepository; -import dgu.newsee.domain.transformednews.dto.TransformRequestDTO; -import dgu.newsee.domain.transformednews.dto.TransformedNewsResponseDTO; -import dgu.newsee.domain.transformednews.entity.CrawledNewsTransformed; -import dgu.newsee.domain.transformednews.repository.CrawledNewsTransformedRepository; -import dgu.newsee.domain.words.entity.Word; -import dgu.newsee.domain.words.repository.WordRepository; -import lombok.RequiredArgsConstructor; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.http.*; -import org.springframework.stereotype.Service; -import org.springframework.transaction.annotation.Transactional; -import org.springframework.web.client.RestTemplate; - -@Service -@RequiredArgsConstructor -public class CrawledTransformedService { - - private final CrawledNewsRepository crawledNewsRepository; - private final CrawledNewsTransformedRepository crawledTransformedRepository; - private final WordRepository wordRepository; - - private final RestTemplate restTemplate = new RestTemplate(); - - @Value("${external.ai.url}") - private String aiServerUrl; - - @Transactional - public void requestTransformAndSave(Long crawledNewsId, String level) { - CrawledNews news = crawledNewsRepository.findById(crawledNewsId) - .orElseThrow(() -> new RuntimeException("크롤링 뉴스 없음")); - - TransformRequestDTO request = new TransformRequestDTO( - news.getId(), - news.getTitle(), - news.getContent(), - level == null ? "중" : level - ); - - HttpHeaders headers = new HttpHeaders(); - headers.setContentType(MediaType.APPLICATION_JSON); - HttpEntity entity = new HttpEntity<>(request, headers); - - ResponseEntity response = restTemplate.exchange( - aiServerUrl, HttpMethod.POST, entity, TransformedNewsResponseDTO.class - ); - - TransformedNewsResponseDTO result = response.getBody(); - if (result == null) throw new RuntimeException("AI 응답 없음"); - - CrawledNewsTransformed transformed = CrawledNewsTransformed.builder() - .crawledNews(news) - .level(result.getLevel()) - .transformedContent(result.getTransformedContent()) - .summarized(result.getSummarized()) - .build(); - crawledTransformedRepository.save(transformed); - - for (var wordDTO : result.getDifficultWords()) { - if (!wordRepository.existsByTerm(wordDTO.getTerm())) { - Word word = Word.builder() - .term(wordDTO.getTerm()) - .description(wordDTO.getDescription()) - .category("크롤링 뉴스 변환") - .build(); - wordRepository.save(word); - } - } - } -} diff --git a/src/main/java/dgu/newsee/domain/transformednews/service/TransformedNewsService.java b/src/main/java/dgu/newsee/domain/transformednews/service/TransformedNewsService.java index 3b134a7..5944b6f 100644 --- a/src/main/java/dgu/newsee/domain/transformednews/service/TransformedNewsService.java +++ b/src/main/java/dgu/newsee/domain/transformednews/service/TransformedNewsService.java @@ -1,11 +1,13 @@ package dgu.newsee.domain.transformednews.service; import com.fasterxml.jackson.databind.ObjectMapper; -import dgu.newsee.domain.news.entity.News; -import dgu.newsee.domain.news.repository.NewsRepository; +import dgu.newsee.domain.crawlednews.entity.NewsOrigin; +import dgu.newsee.domain.crawlednews.entity.NewsStatus; +import dgu.newsee.domain.crawlednews.repository.NewsRepository; import dgu.newsee.domain.transformednews.dto.TransformRequestDTO; import dgu.newsee.domain.transformednews.dto.TransformedNewsResponseDTO; import dgu.newsee.domain.transformednews.entity.NewsTransformed; +import dgu.newsee.domain.transformednews.entity.TransformLevel; import dgu.newsee.domain.transformednews.repository.NewsTransformedRepository; import dgu.newsee.domain.words.entity.Word; import dgu.newsee.domain.words.repository.WordRepository; @@ -25,52 +27,59 @@ public class TransformedNewsService { private final NewsRepository newsRepository; private final NewsTransformedRepository transformedRepository; private final WordRepository wordRepository; - private final RestTemplate restTemplate = new RestTemplate(); - @Value("${external.ai.url}") - private String aiServerUrl; + @Value("${external.ai.url}") + private String aiServerUrl; + + @Transactional + public void requestTransformAndSaveAllLevels(Long newsId, NewsStatus status) { + for (String level : List.of("상", "중", "하")) { + requestTransformAndSave(newsId, level, status); + } + } - @Transactional - public void requestTransformAndSave(Long newsId, String level) { - News news = newsRepository.findById(newsId) - .orElseThrow(() -> new RuntimeException("뉴스 없음")); + @Transactional + public void requestTransformAndSave(Long newsId, String level, NewsStatus status) { + NewsOrigin news = newsRepository.findById(newsId) + .orElseThrow(() -> new RuntimeException("뉴스 없음")); - TransformRequestDTO request = new TransformRequestDTO( - newsId, - news.getTitle(), - news.getContent(), - level == null ? "중" : level - ); + TransformRequestDTO request = new TransformRequestDTO( + newsId, + news.getTitle(), + news.getContent(), + level + ); - HttpHeaders headers = new HttpHeaders(); - headers.setContentType(MediaType.APPLICATION_JSON); - HttpEntity entity = new HttpEntity<>(request, headers); + HttpHeaders headers = new HttpHeaders(); + headers.setContentType(MediaType.APPLICATION_JSON); + HttpEntity entity = new HttpEntity<>(request, headers); - ResponseEntity response = restTemplate.exchange( - aiServerUrl, HttpMethod.POST, entity, TransformedNewsResponseDTO.class - ); + ResponseEntity response = restTemplate.exchange( + aiServerUrl, HttpMethod.POST, entity, TransformedNewsResponseDTO.class + ); - TransformedNewsResponseDTO result = response.getBody(); - if (result == null) throw new RuntimeException("AI 응답 없음"); + TransformedNewsResponseDTO result = response.getBody(); + if (result == null) throw new RuntimeException("AI 응답 없음"); - NewsTransformed transformed = NewsTransformed.builder() - .news(news) - .level(result.getLevel()) - .transformedContent(result.getTransformedContent()) - .summarized(result.getSummarized()) - .build(); - transformedRepository.save(transformed); + NewsTransformed transformed = NewsTransformed.builder() + .news(news) + .level(TransformLevel.valueOf(result.getLevel())) + .transformedContent(result.getTransformedContent()) + .summarized(result.getSummarized()) + .status(status) + .build(); + transformedRepository.save(transformed); - for (var wordDTO : result.getDifficultWords()) { - if (!wordRepository.existsByTerm(wordDTO.getTerm())) { - Word word = Word.builder() - .term(wordDTO.getTerm()) - .description(wordDTO.getDescription()) - .category("뉴스 변환") - .build(); - wordRepository.save(word); + for (var wordDTO : result.getDifficultWords()) { + if (!wordRepository.existsByTerm(wordDTO.getTerm())) { + Word word = Word.builder() + .term(wordDTO.getTerm()) + .description(wordDTO.getDescription()) + .category("뉴스 변환") + .build(); + wordRepository.save(word); + } } } } -} From 7f0cb974771a817a2e4915abd01ee7b57ddacf27 Mon Sep 17 00:00:00 2001 From: YeahOut Date: Sat, 12 Jul 2025 13:32:46 +0900 Subject: [PATCH 5/5] =?UTF-8?q?[Feature]=20#30=20-=20=EB=89=B4=EC=8A=A4=20?= =?UTF-8?q?=EB=8D=B0=EC=9D=B4=ED=84=B0=20AI=20=EC=84=9C=EB=B9=99=20?= =?UTF-8?q?=EB=B0=8F=20=EB=8B=A8=EC=96=B4/=EB=82=B4=EC=9A=A9=20=ED=8C=8C?= =?UTF-8?q?=EC=9D=B4=ED=94=84=EB=9D=BC=EC=9D=B8=20=EA=B5=AC=EC=B6=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../crawlednews/dto/NewsCrawlResponseDTO.java | 2 + .../domain/crawlednews/entity/NewsOrigin.java | 4 + .../service/CrawledNewsService.java | 1 + .../crawlednews/service/NewsService.java | 1 + .../crawlednews/util/NewsParserUtil.java | 46 +++++++- .../domain/crawlednews/util/ParsedNews.java | 1 + .../transformednews/dto/ApiResponse.java | 18 +++ .../dto/TransformRequestDTO.java | 4 +- .../dto/TransformedNewsResponseDTO.java | 3 + .../entity/NewsTransformed.java | 4 +- .../entity/TransformLevel.java | 23 +++- .../service/TransformedNewsService.java | 108 +++++++++++++----- src/main/resources/application.yml | 2 +- 13 files changed, 181 insertions(+), 36 deletions(-) create mode 100644 src/main/java/dgu/newsee/domain/transformednews/dto/ApiResponse.java diff --git a/src/main/java/dgu/newsee/domain/crawlednews/dto/NewsCrawlResponseDTO.java b/src/main/java/dgu/newsee/domain/crawlednews/dto/NewsCrawlResponseDTO.java index 08f29a9..a6ae173 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/dto/NewsCrawlResponseDTO.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/dto/NewsCrawlResponseDTO.java @@ -9,6 +9,7 @@ public class NewsCrawlResponseDTO { private String title; private String content; + private String imageUrl; private String category; private String source; private LocalDateTime time; @@ -17,6 +18,7 @@ public class NewsCrawlResponseDTO { public NewsCrawlResponseDTO(NewsOrigin newsOrigin) { this.title = newsOrigin.getTitle(); this.content = newsOrigin.getContent(); + this.imageUrl = newsOrigin.getImageUrl(); this.category = newsOrigin.getCategory(); this.source = newsOrigin.getSource(); this.time = newsOrigin.getTime(); diff --git a/src/main/java/dgu/newsee/domain/crawlednews/entity/NewsOrigin.java b/src/main/java/dgu/newsee/domain/crawlednews/entity/NewsOrigin.java index ecd5d7d..eeb47aa 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/entity/NewsOrigin.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/entity/NewsOrigin.java @@ -18,6 +18,10 @@ public class NewsOrigin extends BaseEntity { private String title; + @Column(length = 1024) + private String imageUrl; + + @Lob private String content; diff --git a/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java b/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java index 1e80613..2bc8f83 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/service/CrawledNewsService.java @@ -36,6 +36,7 @@ public void crawlAndSave(String url, String category) { NewsOrigin news = NewsOrigin.builder() .title(result.getTitle()) .content(result.getContent()) + .imageUrl((result.getImageUrl())) .category(result.getCategory()) .source(result.getSource()) .time(result.getTime()) diff --git a/src/main/java/dgu/newsee/domain/crawlednews/service/NewsService.java b/src/main/java/dgu/newsee/domain/crawlednews/service/NewsService.java index 7252383..8d95452 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/service/NewsService.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/service/NewsService.java @@ -39,6 +39,7 @@ public NewsOrigin crawlAndSave(NewsCrawlRequestDTO request, Long userId) { NewsOrigin newsOrigin = NewsOrigin.builder() .title(result.getTitle()) .content(result.getContent()) + .imageUrl(result.getImageUrl()) .category(result.getCategory()) .source(result.getSource()) .time(result.getTime()) diff --git a/src/main/java/dgu/newsee/domain/crawlednews/util/NewsParserUtil.java b/src/main/java/dgu/newsee/domain/crawlednews/util/NewsParserUtil.java index 57f965c..1049a22 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/util/NewsParserUtil.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/util/NewsParserUtil.java @@ -1,6 +1,7 @@ package dgu.newsee.domain.crawlednews.util; import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; @@ -29,13 +30,50 @@ public static ParsedNews parse(Document doc, String categoryFromCaller, String u time = LocalDateTime.now(); } + // 대표 이미지 + String imageUrl = doc.select("meta[property=og:image]").attr("content"); + + if (imageUrl == null || imageUrl.isBlank()) { + try { + imageUrl = doc.select("img[src]").stream() + .map(e -> e.attr("src")) + .filter(src -> src.contains("imgnews.pstatic.net")) + .findFirst() + .orElse(null); + } catch (Exception e) { + // 무시 + } + } + System.out.println("대표 이미지 URL 최종: " + imageUrl); + + + // 카테고리 유추 - String category = categoryFromCaller; - if (category == null || category.isBlank()) { - category = extractCategoryFromUrl(url); + String category = null; + + try { + // 1. 네이버 뉴스일 경우 카테고리 직접 파싱 시도 + Element selected = doc.selectFirst("a.Nitem_link_menu[aria-selected=true]"); + if (selected != null) { + category = selected.text(); // 예: 생활/문화 + } + + // 2. 그래도 null이면 백업으로 URL에서 유추 시도 + if (category == null || category.isBlank()) { + category = extractCategoryFromUrl(url); // sid 기반 + } + + // 3. 여전히 못찾으면 fallback + if (category == null || category.isBlank()) { + category = "기타"; + } + + } catch (Exception e) { + category = "기타"; } - return new ParsedNews(title, content, category, source, time, url); + + return new ParsedNews(title, content, category, source, time, url, imageUrl); } private static String extractCategoryFromUrl(String url) { diff --git a/src/main/java/dgu/newsee/domain/crawlednews/util/ParsedNews.java b/src/main/java/dgu/newsee/domain/crawlednews/util/ParsedNews.java index 041602f..d3cbb7c 100644 --- a/src/main/java/dgu/newsee/domain/crawlednews/util/ParsedNews.java +++ b/src/main/java/dgu/newsee/domain/crawlednews/util/ParsedNews.java @@ -14,4 +14,5 @@ public class ParsedNews { private String source; private LocalDateTime time; private String url; + private String imageUrl; } diff --git a/src/main/java/dgu/newsee/domain/transformednews/dto/ApiResponse.java b/src/main/java/dgu/newsee/domain/transformednews/dto/ApiResponse.java new file mode 100644 index 0000000..64fa56d --- /dev/null +++ b/src/main/java/dgu/newsee/domain/transformednews/dto/ApiResponse.java @@ -0,0 +1,18 @@ +package dgu.newsee.domain.transformednews.dto; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; + +@Getter +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class ApiResponse { + private String code; + private String message; + private T result; + private boolean success; +} + diff --git a/src/main/java/dgu/newsee/domain/transformednews/dto/TransformRequestDTO.java b/src/main/java/dgu/newsee/domain/transformednews/dto/TransformRequestDTO.java index 07d8b8a..5954c55 100644 --- a/src/main/java/dgu/newsee/domain/transformednews/dto/TransformRequestDTO.java +++ b/src/main/java/dgu/newsee/domain/transformednews/dto/TransformRequestDTO.java @@ -2,12 +2,12 @@ import lombok.AllArgsConstructor; import lombok.Getter; +import lombok.NoArgsConstructor; @Getter @AllArgsConstructor public class TransformRequestDTO { - private Long newsId; private String title; private String originalContent; - private String level; // default는 "중" + private String level; } diff --git a/src/main/java/dgu/newsee/domain/transformednews/dto/TransformedNewsResponseDTO.java b/src/main/java/dgu/newsee/domain/transformednews/dto/TransformedNewsResponseDTO.java index 25ec3ca..fafaf63 100644 --- a/src/main/java/dgu/newsee/domain/transformednews/dto/TransformedNewsResponseDTO.java +++ b/src/main/java/dgu/newsee/domain/transformednews/dto/TransformedNewsResponseDTO.java @@ -3,12 +3,14 @@ import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Getter; +import lombok.NoArgsConstructor; import java.util.List; @Getter @Builder @AllArgsConstructor +@NoArgsConstructor public class TransformedNewsResponseDTO { private Long newsId; private String level; @@ -17,6 +19,7 @@ public class TransformedNewsResponseDTO { private String summarized; private List difficultWords; + @NoArgsConstructor @Getter @Builder @AllArgsConstructor diff --git a/src/main/java/dgu/newsee/domain/transformednews/entity/NewsTransformed.java b/src/main/java/dgu/newsee/domain/transformednews/entity/NewsTransformed.java index 22fb09d..79898d0 100644 --- a/src/main/java/dgu/newsee/domain/transformednews/entity/NewsTransformed.java +++ b/src/main/java/dgu/newsee/domain/transformednews/entity/NewsTransformed.java @@ -18,16 +18,18 @@ public class NewsTransformed { @Enumerated(EnumType.STRING) private dgu.newsee.domain.crawlednews.entity.NewsStatus status; // 사용자/시스템 구분 + @Column(name = "level", length = 10) @Enumerated(EnumType.STRING) private TransformLevel level; + @Lob private String transformedContent; private String summarized; - @OneToOne(fetch = FetchType.LAZY) + @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "news_id") private NewsOrigin news; // } diff --git a/src/main/java/dgu/newsee/domain/transformednews/entity/TransformLevel.java b/src/main/java/dgu/newsee/domain/transformednews/entity/TransformLevel.java index 899a68f..88737eb 100644 --- a/src/main/java/dgu/newsee/domain/transformednews/entity/TransformLevel.java +++ b/src/main/java/dgu/newsee/domain/transformednews/entity/TransformLevel.java @@ -1,5 +1,26 @@ package dgu.newsee.domain.transformednews.entity; public enum TransformLevel { - 상, 중, 하 + EASY("하"), + MEDIUM("중"), + HARD("상"); + + private final String kor; + + TransformLevel(String kor) { + this.kor = kor; + } + + public static TransformLevel fromKorean(String kor) { + for (TransformLevel level : TransformLevel.values()) { + if (level.kor.equals(kor)) { + return level; + } + } + throw new IllegalArgumentException("Unknown level: " + kor); + } + + public String getKorean() { + return kor; + } } diff --git a/src/main/java/dgu/newsee/domain/transformednews/service/TransformedNewsService.java b/src/main/java/dgu/newsee/domain/transformednews/service/TransformedNewsService.java index 5944b6f..74b572a 100644 --- a/src/main/java/dgu/newsee/domain/transformednews/service/TransformedNewsService.java +++ b/src/main/java/dgu/newsee/domain/transformednews/service/TransformedNewsService.java @@ -4,6 +4,7 @@ import dgu.newsee.domain.crawlednews.entity.NewsOrigin; import dgu.newsee.domain.crawlednews.entity.NewsStatus; import dgu.newsee.domain.crawlednews.repository.NewsRepository; +import dgu.newsee.domain.transformednews.dto.ApiResponse; import dgu.newsee.domain.transformednews.dto.TransformRequestDTO; import dgu.newsee.domain.transformednews.dto.TransformedNewsResponseDTO; import dgu.newsee.domain.transformednews.entity.NewsTransformed; @@ -13,10 +14,11 @@ import dgu.newsee.domain.words.repository.WordRepository; import lombok.RequiredArgsConstructor; import org.springframework.beans.factory.annotation.Value; +import org.springframework.core.ParameterizedTypeReference; +import org.springframework.http.*; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import org.springframework.web.client.RestTemplate; -import org.springframework.http.*; import java.util.List; @@ -29,57 +31,109 @@ public class TransformedNewsService { private final WordRepository wordRepository; private final RestTemplate restTemplate = new RestTemplate(); - @Value("${external.ai.url}") - private String aiServerUrl; + @Value("${external.ai.url}") + private String aiServerUrl; - @Transactional - public void requestTransformAndSaveAllLevels(Long newsId, NewsStatus status) { - for (String level : List.of("상", "중", "하")) { - requestTransformAndSave(newsId, level, status); - } + private final ObjectMapper objectMapper = new ObjectMapper(); + + @Transactional + public void requestTransformAndSaveAllLevels(Long newsId, NewsStatus status) { + for (String level : List.of("상", "중", "하")) { + requestTransformAndSave(newsId, level, status); } + } - @Transactional - public void requestTransformAndSave(Long newsId, String level, NewsStatus status) { - NewsOrigin news = newsRepository.findById(newsId) - .orElseThrow(() -> new RuntimeException("뉴스 없음")); + @Transactional + public void requestTransformAndSave(Long newsId, String level, NewsStatus status) { + NewsOrigin news = newsRepository.findById(newsId) + .orElseThrow(() -> new RuntimeException("뉴스 없음")); - TransformRequestDTO request = new TransformRequestDTO( - newsId, - news.getTitle(), - news.getContent(), - level - ); + TransformRequestDTO request = new TransformRequestDTO( + news.getTitle(), + news.getContent(), + level + ); + + // 요청 로그 출력 + try { + System.out.println("\n==== [AI 서버 요청 전송] ===="); + System.out.println("요청 URL: " + aiServerUrl); + System.out.println("요청 JSON: " + objectMapper.writeValueAsString(request)); + } catch (Exception e) { + System.out.println("요청 JSON 직렬화 실패: " + e.getMessage()); + } - HttpHeaders headers = new HttpHeaders(); - headers.setContentType(MediaType.APPLICATION_JSON); - HttpEntity entity = new HttpEntity<>(request, headers); + HttpHeaders headers = new HttpHeaders(); + headers.setContentType(MediaType.APPLICATION_JSON); + HttpEntity entity = new HttpEntity<>(request, headers); - ResponseEntity response = restTemplate.exchange( - aiServerUrl, HttpMethod.POST, entity, TransformedNewsResponseDTO.class + ResponseEntity> response = null; + + try { + response = restTemplate.exchange( + aiServerUrl, + HttpMethod.POST, + entity, + new ParameterizedTypeReference<>() {} ); + } catch (Exception e) { + //System.out.println("AI 서버 호출 중 예외 발생: " + e.getMessage()); + e.printStackTrace(); + throw new RuntimeException("AI 서버 호출 실패"); + } - TransformedNewsResponseDTO result = response.getBody(); - if (result == null) throw new RuntimeException("AI 응답 없음"); + // 응답 로그 출력 + try { + System.out.println("==== [AI 서버 응답 수신] ===="); + if (response == null) { + //System.out.println("응답 객체가 null입니다."); + throw new RuntimeException("응답이 null"); + } + + //System.out.println("응답 상태 코드: " + response.getStatusCode()); + + ApiResponse apiResponse = response.getBody(); + + if (apiResponse == null) { + //System.out.println("응답 바디가 null입니다."); + throw new RuntimeException("response.getBody()가 null"); + } + + //System.out.println("응답 바디: " + objectMapper.writeValueAsString(apiResponse)); + + if (apiResponse.getResult() == null) { + //System.out.println("result 필드가 null입니다."); + throw new RuntimeException("AI 응답의 result가 null"); + } + + // 정상 응답 처리 + TransformedNewsResponseDTO result = apiResponse.getResult(); NewsTransformed transformed = NewsTransformed.builder() .news(news) - .level(TransformLevel.valueOf(result.getLevel())) + .level(TransformLevel.fromKorean(result.getLevel())) .transformedContent(result.getTransformedContent()) .summarized(result.getSummarized()) .status(status) .build(); transformedRepository.save(transformed); + //System.out.println("변환된 뉴스 저장 완료"); for (var wordDTO : result.getDifficultWords()) { if (!wordRepository.existsByTerm(wordDTO.getTerm())) { Word word = Word.builder() .term(wordDTO.getTerm()) .description(wordDTO.getDescription()) - .category("뉴스 변환") + .category(news.getCategory()) .build(); wordRepository.save(word); } } + + } catch (Exception e) { + //System.out.println("응답 처리 중 예외 발생: " + e.getMessage()); + e.printStackTrace(); + throw new RuntimeException("응답 처리 실패"); } } +} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 1fe312c..d003256 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -27,4 +27,4 @@ spring: external: ai: - url: https://a00e269d857e.ngrok-free.app \ No newline at end of file + url: https://4a1efdb53fcb.ngrok-free.app/api/news/transfer \ No newline at end of file