Skip to content

Commit 5e9c030

Browse files
committed
arte: use recent list instead of categories
ard: use experimental topic search
2 parents 8f562be + e7365c3 commit 5e9c030

20 files changed

+1565
-175
lines changed

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ sourceCompatibility = JavaVersion.VERSION_17
2727
targetCompatibility = JavaVersion.VERSION_17
2828
group = 'de.mediathekview'
2929
archivesBaseName = "MServer"
30-
version = '3.1.227'
30+
version = '3.1.228'
3131

3232
def jarName = 'MServer.jar'
3333
def mainClass = 'mServer.Main'

src/main/java/mServer/crawler/sender/ard/ArdConstants.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,13 @@ public class ArdConstants {
77

88
public static final String ITEM_URL = API_URL + "/page-gateway/pages/ard/item/";
99

10-
public static final String TOPICS_URL = API_URL + "/page-gateway/pages/%s/shows/";
10+
public static final String TOPICS_URL = API_URL + "/page-gateway/pages/%s/editorial/experiment-a-z?embedded=false";
11+
public static final String TOPICS_COMPILATION_URL = API_URL + "/page-gateway/widgets/%s/editorials/%s?pageNumber=0&pageSize=%s";
1112
public static final String TOPIC_URL = API_URL + "/page-gateway/widgets/ard/asset/%s?pageSize=%d";
1213
public static final String DAY_PAGE_URL = API_URL + "/page-gateway/compilations/%s/pastbroadcasts?startDateTime=%sT00:00:00.000Z&endDateTime=%sT23:59:59.000Z&pageNumber=0&pageSize=%d";
1314

1415
public static final int DAY_PAGE_SIZE = 100;
16+
public static final int TOPICS_COMPILATION_PAGE_SIZE = 200;
1517
public static final int TOPIC_PAGE_SIZE = 50;
1618

1719
public static final String DEFAULT_CLIENT = "ard";

src/main/java/mServer/crawler/sender/ard/ArdCrawler.java

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,7 @@
66
import mServer.crawler.CrawlerTool;
77
import mServer.crawler.FilmeSuchen;
88
import mServer.crawler.sender.MediathekCrawler;
9-
import mServer.crawler.sender.ard.tasks.ArdDayPageTask;
10-
import mServer.crawler.sender.ard.tasks.ArdFilmDetailTask;
11-
import mServer.crawler.sender.ard.tasks.ArdTopicPageTask;
12-
import mServer.crawler.sender.ard.tasks.ArdTopicsOverviewTask;
9+
import mServer.crawler.sender.ard.tasks.*;
1310
import mServer.crawler.sender.base.CrawlerUrlDTO;
1411

1512
import java.time.LocalDateTime;
@@ -22,13 +19,12 @@
2219

2320
public class ArdCrawler extends MediathekCrawler {
2421

22+
public static final String SENDERNAME = Const.ARD;
2523
private static final int MAX_DAYS_PAST = 2;
2624
private static final int MAX_DAYS_PAST_AVAILABLE = 6;
2725
private static final DateTimeFormatter DAY_PAGE_DATE_FORMATTER
2826
= DateTimeFormatter.ofPattern("yyyy-MM-dd");
2927

30-
public static final String SENDERNAME = Const.ARD;
31-
3228
public ArdCrawler(FilmeSuchen ssearch, int startPrio) {
3329
super(ssearch, SENDERNAME, 0, 1, startPrio);
3430
}
@@ -73,13 +69,13 @@ private void addDayUrls(ConcurrentLinkedQueue<CrawlerUrlDTO> dayUrlsToCrawl, Loc
7369
}
7470

7571
private void addSpecialDays(
76-
ConcurrentLinkedQueue<CrawlerUrlDTO> dayUrlsToCrawl) {
77-
final LocalDateTime[] specialDates = new LocalDateTime[] {
72+
ConcurrentLinkedQueue<CrawlerUrlDTO> dayUrlsToCrawl) {
73+
final LocalDateTime[] specialDates = new LocalDateTime[]{
7874
};
7975

8076
final LocalDateTime minDayOnline = LocalDateTime.now().minusDays(MAX_DAYS_PAST_AVAILABLE);
8177

82-
for(LocalDateTime specialDate : specialDates) {
78+
for (LocalDateTime specialDate : specialDates) {
8379
if (specialDate.isAfter(minDayOnline)) {
8480
addDayUrls(dayUrlsToCrawl, specialDate);
8581
}
@@ -95,7 +91,7 @@ protected RecursiveTask<Set<DatenFilm>> createCrawlerTask() {
9591
if (CrawlerTool.loadLongMax()) {
9692
shows.addAll(getTopicsEntries());
9793
}
98-
94+
Log.sysLog("ARD Anzahl topics: " + shows.size());
9995
getDaysEntries().forEach(show -> {
10096
if (!shows.contains(show)) {
10197
shows.add(show);
@@ -125,20 +121,25 @@ private Set<ArdFilmInfoDto> getTopicsEntries() throws ExecutionException, Interr
125121
topics.addAll(getTopicEntriesBySender(client));
126122
}
127123

124+
Log.sysLog("ard mediathek topics: " + topics.size());
128125
ConcurrentLinkedQueue<CrawlerUrlDTO> topicUrls = new ConcurrentLinkedQueue<>(topics);
129126

130127
final ArdTopicPageTask topicTask = new ArdTopicPageTask(this, topicUrls);
131128
final Set<ArdFilmInfoDto> filmInfos = forkJoinPool.submit(topicTask).get();
129+
Log.sysLog("ard shows by topics: " + filmInfos.size());
132130
return filmInfos;
133131
}
134132

135-
private ConcurrentLinkedQueue<CrawlerUrlDTO> getTopicEntriesBySender(final String sender) throws ExecutionException, InterruptedException {
136-
ArdTopicsOverviewTask topicsTask
137-
= new ArdTopicsOverviewTask(this, createTopicsOverviewUrl(sender));
133+
private Set<CrawlerUrlDTO> getTopicEntriesBySender(final String sender) throws ExecutionException, InterruptedException {
134+
ArdTopicsTask topicsTask
135+
= new ArdTopicsTask(this, sender, createTopicsOverviewUrl(sender));
138136

139137
ConcurrentLinkedQueue<CrawlerUrlDTO> queue = new ConcurrentLinkedQueue<>(forkJoinPool.submit(topicsTask).get());
140-
Log.sysLog(sender + " topic entries: " + queue.size());
141-
return queue;
138+
Log.sysLog(sender + " topics task entries: " + queue.size());
139+
140+
final Set<CrawlerUrlDTO> topicUrls = forkJoinPool.submit(new ArdTopicsLetterTask(this, sender, queue)).get();
141+
Log.sysLog(sender + " topics: " + topicUrls.size());
142+
return topicUrls;
142143
}
143144

144145
private ConcurrentLinkedQueue<CrawlerUrlDTO> createTopicsOverviewUrl(final String client) {
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
package mServer.crawler.sender.ard;
2+
3+
import mServer.crawler.sender.base.CrawlerUrlDTO;
4+
5+
import java.util.HashSet;
6+
import java.util.Set;
7+
8+
public class PaginationUrlDto {
9+
private final Set<CrawlerUrlDTO> urls = new HashSet<>();
10+
private int actualPage;
11+
private int maxPages;
12+
13+
public void addUrl(CrawlerUrlDTO url) {
14+
urls.add(url);
15+
}
16+
17+
public void addAll(Set<CrawlerUrlDTO> urls) {
18+
this.urls.addAll(urls);
19+
}
20+
21+
public Set<CrawlerUrlDTO> getUrls() {
22+
return urls;
23+
}
24+
25+
public int getActualPage() {
26+
return actualPage;
27+
}
28+
29+
public int getMaxPages() {
30+
return maxPages;
31+
}
32+
33+
public void setActualPage(int actualPage) {
34+
this.actualPage = actualPage;
35+
}
36+
37+
public void setMaxPages(int maxPages) {
38+
this.maxPages = maxPages;
39+
}
40+
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
package mServer.crawler.sender.ard.json;
2+
3+
4+
import com.google.gson.JsonArray;
5+
import com.google.gson.JsonDeserializationContext;
6+
import com.google.gson.JsonDeserializer;
7+
import com.google.gson.JsonElement;
8+
import mServer.crawler.sender.ard.ArdConstants;
9+
import mServer.crawler.sender.base.CrawlerUrlDTO;
10+
import mServer.crawler.sender.base.JsonUtils;
11+
12+
import java.lang.reflect.Type;
13+
import java.util.HashSet;
14+
import java.util.Optional;
15+
import java.util.Set;
16+
17+
public class ArdTopicsDeserializer implements JsonDeserializer<Set<CrawlerUrlDTO>> {
18+
private static final String ELEMENT_WIDGETS = "widgets";
19+
private static final String ELEMENT_LINKS = "links";
20+
private static final String ELEMENT_SELF = "self";
21+
22+
private static final String ATTRIBUTE_ID = "id";
23+
24+
private final String sender;
25+
26+
public ArdTopicsDeserializer(String sender) {
27+
this.sender = sender;
28+
}
29+
30+
@Override
31+
public Set<CrawlerUrlDTO> deserialize(
32+
JsonElement jsonElement, Type type, JsonDeserializationContext jsonDeserializationContext) {
33+
final Set<CrawlerUrlDTO> result = new HashSet<>();
34+
35+
if (JsonUtils.hasElements(jsonElement, ELEMENT_WIDGETS)) {
36+
final JsonArray widgets = jsonElement.getAsJsonObject().getAsJsonArray(ELEMENT_WIDGETS);
37+
widgets.forEach(widget -> parseWidget(widget.getAsJsonObject()).ifPresent(result::add));
38+
}
39+
40+
return result;
41+
}
42+
43+
private Optional<CrawlerUrlDTO> parseWidget(final JsonElement compilation) {
44+
if (JsonUtils.hasElements(compilation, ELEMENT_LINKS)) {
45+
final JsonElement selfLink =
46+
compilation.getAsJsonObject().get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_SELF);
47+
final Optional<String> id =
48+
JsonUtils.getAttributeAsString(selfLink.getAsJsonObject(), ATTRIBUTE_ID);
49+
if (id.isPresent()) {
50+
return Optional.of(
51+
new CrawlerUrlDTO(
52+
String.format(
53+
ArdConstants.TOPICS_COMPILATION_URL,
54+
sender,
55+
id.get(),
56+
ArdConstants.TOPICS_COMPILATION_PAGE_SIZE)));
57+
}
58+
}
59+
60+
return Optional.empty();
61+
}
62+
}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
package mServer.crawler.sender.ard.json;
2+
3+
import com.google.gson.JsonDeserializationContext;
4+
import com.google.gson.JsonDeserializer;
5+
import com.google.gson.JsonElement;
6+
import com.google.gson.JsonObject;
7+
import mServer.crawler.sender.ard.ArdConstants;
8+
import mServer.crawler.sender.ard.PaginationUrlDto;
9+
import mServer.crawler.sender.base.CrawlerUrlDTO;
10+
import mServer.crawler.sender.base.JsonUtils;
11+
12+
import java.lang.reflect.Type;
13+
import java.util.HashSet;
14+
import java.util.Optional;
15+
import java.util.Set;
16+
17+
public class ArdTopicsLetterDeserializer implements JsonDeserializer<PaginationUrlDto> {
18+
19+
private static final String ELEMENT_TEASERS = "teasers";
20+
private static final String ELEMENT_LINKS = "links";
21+
private static final String ELEMENT_TARGET = "target";
22+
private static final String ELEMENT_PAGE_NUMBER = "pageNumber";
23+
private static final String ELEMENT_TOTAL_ELEMENTS = "totalElements";
24+
private static final String ELEMENT_PAGE_SIZE = "pageSize";
25+
private static final String ELEMENT_PAGINATION = "pagination";
26+
27+
private static final String ATTRIBUTE_ID = "id";
28+
29+
@Override
30+
public PaginationUrlDto deserialize(
31+
final JsonElement jsonElement, final Type type, final JsonDeserializationContext context) {
32+
final PaginationUrlDto results = new PaginationUrlDto();
33+
34+
if (!jsonElement.getAsJsonObject().has(ELEMENT_TEASERS)
35+
|| !jsonElement.getAsJsonObject().get(ELEMENT_TEASERS).isJsonArray()
36+
|| jsonElement.getAsJsonObject().getAsJsonArray(ELEMENT_TEASERS).isEmpty()) {
37+
return results;
38+
}
39+
40+
jsonElement.getAsJsonObject().getAsJsonArray(ELEMENT_TEASERS).forEach(teaser -> results.addAll(parseTeaser(teaser.getAsJsonObject())));
41+
42+
final JsonElement paginationElement = jsonElement.getAsJsonObject().get(ELEMENT_PAGINATION);
43+
results.setActualPage(getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_PAGE_NUMBER));
44+
final int totalElements = getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_TOTAL_ELEMENTS);
45+
final int pageSize = getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_PAGE_SIZE);
46+
int maxPageSize = pageSize == 0 ? 0 :
47+
(totalElements+pageSize-1)/pageSize;
48+
results.setMaxPages(maxPageSize);
49+
50+
return results;
51+
}
52+
53+
private int getChildElementAsIntOrNullIfNotExist(
54+
final JsonElement parentElement, final String childElementName) {
55+
if (parentElement == null || parentElement.isJsonNull()) {
56+
return 0;
57+
}
58+
return getJsonElementAsIntOrNullIfNotExist(
59+
parentElement.getAsJsonObject().get(childElementName));
60+
}
61+
62+
private int getJsonElementAsIntOrNullIfNotExist(final JsonElement element) {
63+
if (element.isJsonNull()) {
64+
return 0;
65+
}
66+
return element.getAsInt();
67+
}
68+
69+
private Set<CrawlerUrlDTO> parseTeaser(final JsonObject teaserObject) {
70+
final Set<CrawlerUrlDTO> results = new HashSet<>();
71+
72+
final Optional<String> id;
73+
74+
if (JsonUtils.checkTreePath(teaserObject, ELEMENT_LINKS, ELEMENT_TARGET)) {
75+
final JsonObject targetObject =
76+
teaserObject.get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_TARGET).getAsJsonObject();
77+
id = JsonUtils.getAttributeAsString(targetObject, ATTRIBUTE_ID);
78+
} else {
79+
id = JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_ID);
80+
}
81+
82+
id.ifPresent(
83+
nonNullId ->
84+
results.add(
85+
new CrawlerUrlDTO(
86+
String.format(
87+
ArdConstants.TOPIC_URL, nonNullId, ArdConstants.TOPIC_PAGE_SIZE))));
88+
89+
return results;
90+
}
91+
}

0 commit comments

Comments
 (0)