Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions src/main/java/mServer/crawler/sender/ard/ArdCrawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutionException;
Expand All @@ -27,6 +28,7 @@ public class ArdCrawler extends MediathekCrawler {
= DateTimeFormatter.ofPattern("yyyy-MM-dd");

public static final String[] MISSING_TOPIC_IDS = new String[]{
// "Y3JpZDovL2JyLmRlL2Jyb2FkY2FzdFNlcmllcy9icm9hZGNhc3RTZXJpZXM6L2JyZGUvZmVybnNlaGVuL2JheWVyaXNjaGVzLWZlcm5zZWhlbi9zZW5kdW5nZW4vZGFob2FtLWlzLWRhaG9hbQ"
};

public ArdCrawler(FilmeSuchen ssearch, int startPrio) {
Expand Down Expand Up @@ -131,6 +133,7 @@ private Set<ArdFilmInfoDto> getDaysEntries() throws InterruptedException, Execut
}

private Set<ArdFilmInfoDto> getTopicsEntries() throws ExecutionException, InterruptedException {
final Set<ArdFilmInfoDto> shows = new HashSet<>();
Set<CrawlerUrlDTO> topics = new HashSet<>();
topics.addAll(getTopicEntriesBySender(ArdConstants.DEFAULT_CLIENT));
for (String client : ArdConstants.CLIENTS) {
Expand All @@ -149,9 +152,21 @@ private Set<ArdFilmInfoDto> getTopicsEntries() throws ExecutionException, Interr
ConcurrentLinkedQueue<CrawlerUrlDTO> topicUrls = new ConcurrentLinkedQueue<>(assitUrls);

final ArdTopicPageTask topicTask = new ArdTopicPageTask(this, topicUrls);
final Set<ArdFilmInfoDto> filmInfos = forkJoinPool.submit(topicTask).get();
Log.sysLog("ard shows by topics: " + filmInfos.size());
return filmInfos;
final Set<ArdFilmInfoDto> ardFilmInfosWithCompilations = forkJoinPool.submit(topicTask).get();

// add filmInfos without compilation
shows.addAll(ardFilmInfosWithCompilations.stream().filter(filmInfo -> !filmInfo.isCompilation()).toList());

// search compilations
final List<ArdFilmInfoDto> compilations = ardFilmInfosWithCompilations.stream().filter(ArdFilmInfoDto::isCompilation).toList();
final ArdTopicCompilationTask compilationTask = new ArdTopicCompilationTask(this, new ConcurrentLinkedQueue<>(compilations));
final Set<ArdFilmInfoDto> ardCompilationEntries = forkJoinPool.submit(compilationTask).get();

final int sizeBefore = shows.size();
shows.addAll(ardCompilationEntries.stream().filter(filmInfo -> !filmInfo.isCompilation()).toList());
Log.sysLog("ard shows by topics compilation: " + (shows.size() - sizeBefore));
Log.sysLog("ard shows by topics: " + shows.size());
return shows;
}

// temporary workaround for missing topics
Expand Down
13 changes: 10 additions & 3 deletions src/main/java/mServer/crawler/sender/ard/ArdFilmInfoDto.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,24 @@ public class ArdFilmInfoDto extends CrawlerUrlDTO {

private final String id;
private final int numberOfClips;
private final boolean isCompilation;

public ArdFilmInfoDto(String id, String aUrl, int numberOfClips) {
public ArdFilmInfoDto(String id, String aUrl, int numberOfClips, boolean isCompilation) {
super(aUrl);

this.id = id;
this.numberOfClips = numberOfClips;
this.isCompilation = isCompilation;
}

public String getId() {
return id;
}

public boolean isCompilation() {
return isCompilation;
}

public int getNumberOfClips() {
return numberOfClips;
}
Expand All @@ -36,11 +42,12 @@ public boolean equals(Object o) {
}
ArdFilmInfoDto that = (ArdFilmInfoDto) o;
return numberOfClips == that.numberOfClips
&& Objects.equals(id, that.id);
&& Objects.equals(id, that.id)
&& isCompilation == that.isCompilation;
}

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), id, numberOfClips);
return Objects.hash(super.hashCode(), id, numberOfClips, isCompilation);
}
}
100 changes: 100 additions & 0 deletions src/main/java/mServer/crawler/sender/ard/WdrM3U8ToMp4Converter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package mServer.crawler.sender.ard;

Check warning on line 1 in src/main/java/mServer/crawler/sender/ard/WdrM3U8ToMp4Converter.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Rename this package name to match the regular expression '^[a-z_]+(\.[a-z_][a-z0-9_]*)*$'.

See more on https://sonarcloud.io/project/issues?id=mediathekview_MServer&issues=AZ4ToKnkAfGjqw6iqfcm&open=AZ4ToKnkAfGjqw6iqfcm&pullRequest=1142

import de.mediathekview.mlib.tool.Log;
import mServer.crawler.sender.base.Qualities;

import java.util.EnumMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* * Converts an m3u8 playlist URL (in the style shown in the example) * into a list of progressive
* mp4 URLs. * * Default progressive base host: https://wdr-progressive.ard-mcdn.de * * Example: *
* input: *
* https://wdrvod-rwrtr.akamaized.net/i/,/media/.../ID_AVC-,270,360,540,720,1080,.mp4.csmil/index-...m3u8
* * * produced outputs: * https://wdr-progressive.ard-mcdn.de/media/.../ID_AVC-270.mp4 * ...
*/
public class WdrM3U8ToMp4Converter {

// Looks for /media/...<codec>-<comma-list>.mp4.csmil
// group(1) = /media/... (path before codec)
// group(2) = _<CODEC>- (codec token + trailing '-')
// group(3) = the comma separated tail that contains bitrate numbers (e.g. ",270,360,540,")
private static final Pattern MEDIA_PATTERN =
Pattern.compile(
"(/media/.+?)(_[A-Za-z0-9]+-)([^/]+?)\\.mp4\\.csmil", Pattern.CASE_INSENSITIVE);

Check warning on line 26 in src/main/java/mServer/crawler/sender/ard/WdrM3U8ToMp4Converter.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Remove duplicates in this character class.

See more on https://sonarcloud.io/project/issues?id=mediathekview_MServer&issues=AZ4ToKnkAfGjqw6iqfcn&open=AZ4ToKnkAfGjqw6iqfcn&pullRequest=1142

private final String progressiveBase; // no trailing slash

public WdrM3U8ToMp4Converter() {
this("https://wdr-progressive.ard-mcdn.de");
}

public WdrM3U8ToMp4Converter(String progressiveBase) {
if (progressiveBase == null)
throw new IllegalArgumentException("progressiveBase must not be null");
// ensure no trailing slash to make concatenation predictable
this.progressiveBase = progressiveBase.replaceAll("/+$", "");
}

/**
* Convert an m3u8 url into a list of mp4 URLs.
*
* @param m3u8Url the source m3u8 url
* @return map of mp4 URLs (in the same order as bitrates found)
* @throws IllegalArgumentException if the url cannot be parsed or no bitrates found
*/
public Map<Qualities, String> convert(String m3u8Url) {
if (m3u8Url == null) throw new IllegalArgumentException("m3u8Url must not be null");

Map<Qualities, String> result = new EnumMap<>(Qualities.class);

Matcher m = MEDIA_PATTERN.matcher(m3u8Url);
if (!m.find()) {
return result;
}

String pathBeforeCodec = m.group(1); // includes leading /media/...
String codecWithDash = m.group(2); // e.g. _AVC-
String bitrateListPart = m.group(3); // e.g. ",270,360,540,720,1080," or "270,360"

// build the base path (starts with /media/...)
String basePrefix = pathBeforeCodec + codecWithDash; // ends with '-'

// extract numeric tokens (bitrate values)
Pattern digits = Pattern.compile("\\d+");
Matcher mDigits = digits.matcher(bitrateListPart);

while (mDigits.find()) {
String bitrate = mDigits.group();
// join progressive base + basePrefix + bitrate + .mp4
String mp4 = progressiveBase + basePrefix + bitrate + ".mp4";
final Qualities resolution = getResolutionFromWidth(bitrate);
result.put(resolution, mp4);
}

if (result.isEmpty()) {
throw new IllegalArgumentException("No numeric bitrate tokens found in m3u8 URL: " + m3u8Url);
}

return result;
}

private Qualities getResolutionFromWidth(String bitrate) {
try {
return switch (Integer.parseInt(bitrate)) {
case 720 -> Qualities.NORMAL;
case 1080 -> Qualities.HD;
case 540, 360, 270 -> Qualities.SMALL;
default -> {
Log.sysLog("Unknown bitrate found in m3u8 URL: " + bitrate + ", defaulting to VERY_SMALL");
yield Qualities.SMALL;
}
};
} catch (NumberFormatException e) {
Log.errorLog(165346373, e);
return Qualities.SMALL;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ private Set<ArdFilmInfoDto> parseChannels(JsonArray channels) {

private ArdFilmInfoDto createFilmInfo(final String id, final int numberOfClips) {
final String url = String.format(ArdConstants.ITEM_URL, id);
return new ArdFilmInfoDto(id, url, numberOfClips);
return new ArdFilmInfoDto(id, url, numberOfClips, false);
}

private Optional<String> toId(final JsonObject teaserObject) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,17 @@
import mServer.crawler.sender.ard.ArdConstants;
import mServer.crawler.sender.ard.ArdFilmDto;
import mServer.crawler.sender.ard.ArdFilmInfoDto;
import mServer.crawler.sender.ard.WdrM3U8ToMp4Converter;
import mServer.crawler.sender.base.JsonUtils;
import mServer.crawler.sender.base.Qualities;
import mServer.crawler.sender.base.UrlUtils;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

public class ArdFilmDeserializer implements JsonDeserializer<List<ArdFilmDto>> {

private static final org.apache.logging.log4j.Logger LOG
private static final Logger LOG
= LogManager.getLogger(ArdFilmDeserializer.class);

private static final String GERMAN_TIME_ZONE = "Europe/Berlin";
Expand Down Expand Up @@ -110,6 +112,12 @@ public class ArdFilmDeserializer implements JsonDeserializer<List<ArdFilmDto>> {
//IGNORED_SENDER "zdf", "kika", "3sat", "arte"
}

private final WdrM3U8ToMp4Converter converter;

public ArdFilmDeserializer() {
converter = new WdrM3U8ToMp4Converter();
}

private static Optional<JsonObject> getMediaCollectionObject(final JsonObject itemObject) {
if (itemObject.has(ELEMENT_MEDIA_COLLECTION)
&& !itemObject.get(ELEMENT_MEDIA_COLLECTION).isJsonNull()
Expand Down Expand Up @@ -234,8 +242,11 @@ public List<ArdFilmDto> deserialize(
Optional<Map<Qualities, String>> videoInfoStandard = parseVideoUrls(itemObject, MARKER_VIDEO_CATEGORY_MAIN, MARKER_VIDEO_STANDARD, MARKER_VIDEO_MP4, MARKER_VIDEO_DE);
Optional<Map<Qualities, String>> videoInfoAdaptive = parseVideoUrls(itemObject, MARKER_VIDEO_CATEGORY_MAIN, MARKER_VIDEO_STANDARD, MARKER_VIDEO_CATEGORY_MPEG, MARKER_VIDEO_DE);
Optional<Map<Qualities, String>> videoInfoAD = parseVideoUrls(itemObject, MARKER_VIDEO_CATEGORY_MAIN, MARKER_VIDEO_AD, MARKER_VIDEO_MP4, MARKER_VIDEO_DE);
Optional<Map<Qualities, String>> videoInfoADAdaptive = parseVideoUrls(itemObject, MARKER_VIDEO_CATEGORY_MAIN, MARKER_VIDEO_AD, MARKER_VIDEO_CATEGORY_MPEG, MARKER_VIDEO_DE);
Optional<Map<Qualities, String>> videoInfoDGS = parseVideoUrls(itemObject, MARKER_VIDEO_DGS, MARKER_VIDEO_STANDARD, MARKER_VIDEO_MP4, MARKER_VIDEO_DE);
Optional<Map<Qualities, String>> videoInfoDGSAdaptive = parseVideoUrls(itemObject, MARKER_VIDEO_DGS, MARKER_VIDEO_STANDARD, MARKER_VIDEO_CATEGORY_MPEG, MARKER_VIDEO_DE);
Optional<Map<Qualities, String>> videoInfoOV = parseVideoUrls(itemObject, MARKER_VIDEO_CATEGORY_MAIN, MARKER_VIDEO_STANDARD, MARKER_VIDEO_MP4, MARKER_VIDEO_OV);
Optional<Map<Qualities, String>> videoInfoOVAdaptive = parseVideoUrls(itemObject, MARKER_VIDEO_CATEGORY_MAIN, MARKER_VIDEO_STANDARD, MARKER_VIDEO_CATEGORY_MPEG, MARKER_VIDEO_OV);
Optional<String> subtitles = prepareSubtitleUrl(itemObject);

if (topic.isEmpty() || title.isEmpty() || partner.isEmpty() || ADDITIONAL_SENDER.get(partner.get()) == null) {
Expand All @@ -249,7 +260,16 @@ public List<ArdFilmDto> deserialize(
|| titleoriginal.get().contains("- Hörfassung") || titleoriginal.get().contains("(mit Audiodeskription)");
// mainly funk
if (videoInfoStandard.isEmpty() && videoInfoAD.isEmpty() && videoInfoDGS.isEmpty() && videoInfoOV.isEmpty() && videoInfoAdaptive.isPresent()) {
videoInfoStandard = resolveFallbackFromPlaylist(videoInfoAdaptive);
videoInfoStandard = getResolutionsFromAdaptiveUrl(videoInfoAdaptive);
}
if (videoInfoAD.isEmpty() && videoInfoADAdaptive.isPresent()) {
videoInfoAD = getResolutionsFromAdaptiveUrl(videoInfoADAdaptive);
}
if (videoInfoDGS.isEmpty() && videoInfoDGSAdaptive.isPresent()) {
videoInfoDGS = getResolutionsFromAdaptiveUrl(videoInfoDGSAdaptive);
}
if (videoInfoOV.isEmpty() && videoInfoOVAdaptive.isPresent()) {
videoInfoOV = getResolutionsFromAdaptiveUrl(videoInfoOVAdaptive);
}
// incorrect langueage code for OV
if ((titleoriginal.get().contains(" - (Originalversion)") || titleoriginal.get().contains(" (OV)")) && videoInfoOV.isEmpty()) {
Expand Down Expand Up @@ -383,7 +403,7 @@ private void parseRelatedFilms(final ArdFilmDto filmDto, final JsonObject player
= JsonUtils.getAttributeAsString(teasersItemObject, ATTRIBUTE_ID);
if (id.isPresent()) {
final String url = String.format(ArdConstants.ITEM_URL, id.get());
filmDto.addRelatedFilm(new ArdFilmInfoDto(id.get(), url, 0));
filmDto.addRelatedFilm(new ArdFilmInfoDto(id.get(), url, 0, false));
}
}
}
Expand Down Expand Up @@ -488,29 +508,41 @@ private Optional<Map<Integer, String>> parseVideoUrlMap(final JsonObject playerP
return Optional.of(videoInfo);
}

private Optional<Map<Qualities, String>> resolveFallbackFromPlaylist(Optional<Map<Qualities, String>> videoInfoAdaptive) {
Map<Qualities, URL> qualitiesUrls = videoInfoAdaptive.get().entrySet().stream()
.collect(Collectors.toMap(Map.Entry::getKey, entry -> {
try {
return new URL(entry.getValue());
} catch (MalformedURLException e) {
LOG.error("failed converting string {} to url", entry.getValue(), e);
return null;
}
}));
if (!qualitiesUrls.containsKey(Qualities.NORMAL)) {
qualitiesUrls.put(Qualities.NORMAL, qualitiesUrls.entrySet().stream().findFirst().get().getValue());
}
//
ArdVideoInfoJsonDeserializer.loadM3U8(qualitiesUrls);
//
Map<Qualities, String> fallback = qualitiesUrls.entrySet().stream()
.collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().toString()));
//
if (!fallback.containsKey(Qualities.NORMAL) && !fallback.isEmpty()) {
fallback.put(Qualities.NORMAL, fallback.entrySet().stream().findFirst().get().getValue());
}

return Optional.of(fallback);
}
private Optional<Map<Qualities, String>> getResolutionsFromAdaptiveUrl(Optional<Map<Qualities, String>> videoInfoAdaptive) {
if (videoInfoAdaptive.isPresent()) {
if (videoInfoAdaptive.get().containsKey(Qualities.NORMAL)) {
final String url = videoInfoAdaptive.get().get(Qualities.NORMAL);
final Map<Qualities, String> mp4Urls = converter.convert(url);
if (!mp4Urls.isEmpty()) {
return Optional.of(mp4Urls);
}
}

Map<Qualities, URL> qualitiesUrls = videoInfoAdaptive.get().entrySet().stream()
.collect(Collectors.toMap(Map.Entry::getKey, entry -> {
try {
return new URL(entry.getValue());
} catch (MalformedURLException e) {
LOG.error("failed converting string {} to url", entry.getValue(), e);
return null;
}
}));
if (!qualitiesUrls.containsKey(Qualities.NORMAL)) {
qualitiesUrls.put(Qualities.NORMAL, qualitiesUrls.entrySet().stream().findFirst().get().getValue());
}
//
ArdVideoInfoJsonDeserializer.loadM3U8(qualitiesUrls);
//
Map<Qualities, String> fallback = qualitiesUrls.entrySet().stream()
.collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().toString()));
//
if (!fallback.containsKey(Qualities.NORMAL) && !fallback.isEmpty()) {
fallback.put(Qualities.NORMAL, fallback.entrySet().stream().findFirst().get().getValue());
}

return Optional.of(fallback);
}

return Optional.empty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ abstract class ArdTeasersDeserializer {
private static final String ELEMENT_LINKS = "links";
private static final String ELEMENT_TARGET = "target";

private static final String ATTRIBUTE_HREF = "href";
private static final String ATTRIBUTE_ID = "id";
private static final String ATTRIBUTE_NUMBER_OF_CLIPS = "numberOfClips";
private static final String ATTRIBUTE_TYPE = "type";

Set<ArdFilmInfoDto> parseTeasers(final JsonArray teasers) {
return StreamSupport.stream(teasers.spliterator(), true)
Expand All @@ -30,9 +32,23 @@ Set<ArdFilmInfoDto> parseTeasers(final JsonArray teasers) {
}

private ArdFilmInfoDto toFilmInfo(final JsonObject teaserObject) {
return toId(teaserObject)
.map(id -> createFilmInfo(id, getNumberOfClips(teaserObject)))
.orElse(null);
final boolean compilation = isCompilation(teaserObject);
if (compilation) {
final Optional<String> url = JsonUtils.getElementValueAsString(teaserObject, ELEMENT_LINKS, ELEMENT_TARGET, ATTRIBUTE_HREF);
final Optional<String> id = toId(teaserObject);
return url.map(s -> new ArdFilmInfoDto(id.orElse(""), s, getNumberOfClips(teaserObject), compilation)).orElse(null);
} else {
return toId(teaserObject)
.map(id -> createFilmInfo(id, getNumberOfClips(teaserObject), compilation))
.orElse(null);
}
}

private boolean isCompilation(final JsonObject teaserObject) {
if (teaserObject.has(ATTRIBUTE_TYPE)) {
return "compilation".equals(teaserObject.get(ATTRIBUTE_TYPE).getAsString());
}
return false;
}

private int getNumberOfClips(final JsonObject teaserObject) {
Expand All @@ -51,12 +67,12 @@ private Optional<String> toId(final JsonObject teaserObject) {
return JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_ID);
}

private ArdFilmInfoDto createFilmInfo(final String id, final int numberOfClips) {
private ArdFilmInfoDto createFilmInfo(final String id, final int numberOfClips, final boolean isCompilation) {
String refId = id;
if(id.contains(":")) {
refId = id.replace(":", "%3A");
}
final String url = String.format(ArdConstants.ITEM_URL, refId);
return new ArdFilmInfoDto(id, url, numberOfClips);
return new ArdFilmInfoDto(id, url, numberOfClips, isCompilation);
}
}
Loading
Loading