Skip to content

Commit

Permalink
fix trailing dash in title
Browse files Browse the repository at this point in the history
  • Loading branch information
pidoubleyou committed Nov 14, 2024
1 parent 203a12e commit a3e8fb6
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 38 deletions.
81 changes: 44 additions & 37 deletions src/main/java/mServer/crawler/AddToFilmlist.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,22 @@
import de.mediathekview.mlib.tool.Hash;
import de.mediathekview.mlib.tool.Log;
import de.mediathekview.mlib.tool.MVHttpClient;

import java.util.*;

import mServer.crawler.sender.base.UrlUtils;
import mServer.crawler.sender.orfon.OrfOnConstants;
import mServer.tool.MserverDaten;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.Request.Builder;
import okhttp3.Response;
import org.jetbrains.annotations.NotNull;

import java.io.IOException;
import java.net.SocketTimeoutException;
import java.util.*;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.jetbrains.annotations.NotNull;

import static jakarta.ws.rs.core.HttpHeaders.CONTENT_TYPE;

Expand All @@ -55,6 +53,15 @@ public AddToFilmlist(ListeFilme vonListe, ListeFilme listeEinsortieren) {
this.bannedFilmFilter = new BannedFilmFilter();
}

private static String cutOutSrfParameterInUrl(String url) {
int startIndex = url.indexOf("/hdntl=exp");
int endIndex = url.indexOf("/index-f");
if (endIndex > -1 && startIndex < endIndex) {
url = url.substring(0, startIndex) + url.substring(endIndex);
}
return url;
}

public synchronized void addLiveStream() {
if (listeEinsortieren.size() <= 0) {
return;
Expand Down Expand Up @@ -114,7 +121,7 @@ private void performUrlSearch(HashSet<Hash> hash, final int size) {
private void performInitialCleanup() {
listeEinsortieren.removeIf(f -> !f.arr[DatenFilm.FILM_URL].toLowerCase().startsWith("http"));
listeEinsortieren.removeIf(f -> f.arr[DatenFilm.FILM_SENDER].equals(Const.ORF) && f.arr[DatenFilm.FILM_URL]
.matches(OrfOnConstants.FILTER_JUGENDSCHUTZ));
.matches(OrfOnConstants.FILTER_JUGENDSCHUTZ));
listeEinsortieren.removeIf(f -> f.arr[DatenFilm.FILM_SENDER].equals(Const.ARD) && isArdUrlToRemove(f.arr[DatenFilm.FILM_URL]));
listeEinsortieren.removeIf(f -> {
String groesse = f.arr[DatenFilm.FILM_GROESSE];
Expand All @@ -129,6 +136,7 @@ private void performInitialCleanup() {
removeTimeFromOrf(listeEinsortieren);
updateAudioDescriptionOrf(listeEinsortieren);
updateAudioDescriptionSrf(listeEinsortieren);
updateTitle(listeEinsortieren);
updateArdWebsite(listeEinsortieren);
updateFunkMissingHost(listeEinsortieren);
removeSrfUrlParameter(listeEinsortieren);
Expand All @@ -142,31 +150,21 @@ private boolean isArdUrlToRemove(final String url) {

// check https://github.com/mediathekview/MServer/issues/904 for examples and more information
private void removeSrfUrlParameter(ListeFilme listeEinsortieren) {
final List<DatenFilm> list = listeEinsortieren.parallelStream()
.filter(film -> film.arr[DatenFilm.FILM_SENDER].equals(Const.SRF) && film.arr[DatenFilm.FILM_URL].contains("/hdntl=exp"))
.collect(Collectors.toList());
Log.sysLog("SRF: remove url parameter für " + list.size() + " Einträge von " + listeEinsortieren.size() );

list.forEach(film -> {
String url = film.arr[DatenFilm.FILM_URL];
String urlKlein = film.arr[DatenFilm.FILM_URL_KLEIN] == null || film.arr[DatenFilm.FILM_URL_KLEIN].isEmpty() ? "" : film.getUrlFuerAufloesung(DatenFilm.AUFLOESUNG_KLEIN);
String urlGross = film.arr[DatenFilm.FILM_URL_HD] == null || film.arr[DatenFilm.FILM_URL_HD].isEmpty() ? "" : film.getUrlFuerAufloesung(DatenFilm.AUFLOESUNG_HD);
film.arr[DatenFilm.FILM_URL] = cutOutSrfParameterInUrl(UrlUtils.removeParameters(url));
CrawlerTool.addUrlKlein(film,cutOutSrfParameterInUrl(UrlUtils.removeParameters(urlKlein)));
CrawlerTool.addUrlHd(film, cutOutSrfParameterInUrl(UrlUtils.removeParameters(urlGross)));
});

final List<DatenFilm> list = listeEinsortieren.parallelStream()
.filter(film -> film.arr[DatenFilm.FILM_SENDER].equals(Const.SRF) && film.arr[DatenFilm.FILM_URL].contains("/hdntl=exp"))
.collect(Collectors.toList());
Log.sysLog("SRF: remove url parameter für " + list.size() + " Einträge von " + listeEinsortieren.size());

list.forEach(film -> {
String url = film.arr[DatenFilm.FILM_URL];
String urlKlein = film.arr[DatenFilm.FILM_URL_KLEIN] == null || film.arr[DatenFilm.FILM_URL_KLEIN].isEmpty() ? "" : film.getUrlFuerAufloesung(DatenFilm.AUFLOESUNG_KLEIN);
String urlGross = film.arr[DatenFilm.FILM_URL_HD] == null || film.arr[DatenFilm.FILM_URL_HD].isEmpty() ? "" : film.getUrlFuerAufloesung(DatenFilm.AUFLOESUNG_HD);
film.arr[DatenFilm.FILM_URL] = cutOutSrfParameterInUrl(UrlUtils.removeParameters(url));
CrawlerTool.addUrlKlein(film, cutOutSrfParameterInUrl(UrlUtils.removeParameters(urlKlein)));
CrawlerTool.addUrlHd(film, cutOutSrfParameterInUrl(UrlUtils.removeParameters(urlGross)));
});


}

private static String cutOutSrfParameterInUrl(String url) {
int startIndex = url.indexOf("/hdntl=exp");
int endIndex = url.indexOf("/index-f");
if (endIndex > -1 && startIndex < endIndex) {
url = url.substring(0, startIndex) + url.substring(endIndex);
}
return url;
}

private void updateFunkMissingHost(ListeFilme listeEinsortieren) {
Expand All @@ -180,6 +178,15 @@ private void updateFunkMissingHost(ListeFilme listeEinsortieren) {
list.forEach(film -> film.arr[DatenFilm.FILM_URL_HD] = film.arr[DatenFilm.FILM_URL_HD].replace("https://", "https://funk-02.akamaized.net/").trim());
}

private void updateTitle(ListeFilme listeEinsortieren) {
listeEinsortieren.forEach(film -> {
final String title = film.arr[DatenFilm.FILM_TITEL].trim();
if (title.endsWith("-")) {
film.arr[DatenFilm.FILM_TITEL] = title.replaceAll("-+$", "").trim();
}
});
}

private void updateArdWebsite(ListeFilme listeEinsortieren) {
final List<DatenFilm> list = listeEinsortieren.parallelStream()
.filter(film -> film.arr[DatenFilm.FILM_SENDER].equals(Const.ARD) && !film.arr[DatenFilm.FILM_WEBSEITE].startsWith("https://www.ardmediathek.de/video/"))
Expand Down Expand Up @@ -222,10 +229,10 @@ private void updateAudioDescriptionSrf(ListeFilme listeEinsortieren) {

private void removeTimeFromOrf(ListeFilme listeEinsortieren) {
final List<DatenFilm> list = listeEinsortieren.parallelStream()
.filter(
film -> film.arr[DatenFilm.FILM_SENDER].equals(Const.ORF) && film.arr[DatenFilm.FILM_THEMA]
.matches(".*[0-9]{1,2}:[0-9][0-9]$"))
.collect(Collectors.toList());
.filter(
film -> film.arr[DatenFilm.FILM_SENDER].equals(Const.ORF) && film.arr[DatenFilm.FILM_THEMA]
.matches(".*[0-9]{1,2}:[0-9][0-9]$"))
.collect(Collectors.toList());
Log.sysLog("ORF: update Thema für " + list.size() + " Einträge.");
if (!list.isEmpty()) {
list.forEach(film -> film.arr[DatenFilm.FILM_THEMA] = film.arr[DatenFilm.FILM_THEMA].replaceAll("[0-9]{1,2}:[0-9][0-9]$", "").trim());
Expand All @@ -235,8 +242,8 @@ private void removeTimeFromOrf(ListeFilme listeEinsortieren) {
private void removeTimeFromMdrAktuell(ListeFilme listeEinsortieren) {
final String topic = "MDR aktuell";
final List<DatenFilm> list = listeEinsortieren.parallelStream()
.filter(film -> film.arr[DatenFilm.FILM_THEMA].startsWith(topic))
.collect(Collectors.toList());
.filter(film -> film.arr[DatenFilm.FILM_THEMA].startsWith(topic))
.collect(Collectors.toList());
Log.sysLog("MDR aktuell: update Thema für " + list.size() + " Einträge.");
if (!list.isEmpty()) {
list.forEach(film -> film.arr[DatenFilm.FILM_THEMA] = topic);
Expand Down Expand Up @@ -343,7 +350,7 @@ private class ImportOldFilmlistThread extends Thread {

private final Queue<DatenFilm> listeOld;
private final ArrayList<DatenFilm> localAddList = new ArrayList<>(
(vonListe.size() / NUMBER_OF_THREADS) + 500);
(vonListe.size() / NUMBER_OF_THREADS) + 500);
private int treffer = 0;
private OkHttpClient client = null;

Expand Down Expand Up @@ -386,8 +393,8 @@ public void run() {
long respLength = determineContentLength(response);

if (isRelevantContentType(response) && !removedVideo(film, response) &&
// ignore file length of m3u8-files because it is always too small
(isM3u8File(url) || respLength > MIN_SIZE_ADD_OLD)) {
// ignore file length of m3u8-files because it is always too small
(isM3u8File(url) || respLength > MIN_SIZE_ADD_OLD)) {
addOld(film);
} else {
Log.sysLog("film removed: code: " + response.code() + ": " + url);
Expand Down
17 changes: 16 additions & 1 deletion src/test/developTest/java/mServer/crawler/AddToFilmlistTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import static jakarta.ws.rs.core.HttpHeaders.CONTENT_LENGTH;
import static jakarta.ws.rs.core.HttpHeaders.CONTENT_TYPE;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.junit.Assert.*;

import de.mediathekview.mlib.Const;
Expand Down Expand Up @@ -409,6 +408,22 @@ public void testNotAddArteExtraits() {
assertEquals(list.size(), 2);
}

@Test
public void testRefreshTitleWithTrailingDash() {
final DatenFilm testFilmUpdated = createTestFilm(Const.ARD, "My Topic", "Title - ", FILM_NAME_ONLINE);
final DatenFilm testFilmNotUpdated = createTestFilm(Const.ARD, "My Topic", "Title - Episode", FILM_NAME_ONLINE);

listToAdd.add(testFilmUpdated);
listToAdd.add(testFilmNotUpdated);

AddToFilmlist target = new AddToFilmlist(list, listToAdd);
target.addOldList();

assertEquals(list.size(), 4);
assertEquals("Title", testFilmUpdated.arr[DatenFilm.FILM_TITEL]);
assertEquals("Title - Episode", testFilmNotUpdated.arr[DatenFilm.FILM_TITEL]);
}

@Test
public void testRefreshArdWebsite() {
final DatenFilm testFilmUpdated = createTestFilm(Const.ARD, "Tatort", "Test Tatort", FILM_NAME_ONLINE);
Expand Down

0 comments on commit a3e8fb6

Please sign in to comment.