Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
Expand Down Expand Up @@ -61,9 +60,9 @@ protected RecursiveTask<Set<Film>> createCrawlerTask() {
ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size());

if (Boolean.TRUE.equals(crawlerConfig.getTopicsSearchEnabled())) {
final Set<CrawlerUrlDTO> senderTopicUrls = new HashSet<>();
final Set<ForkJoinTask<Set<CrawlerUrlDTO>>> senderTopicTasks = createSenderTopicTasks();

final Set<CrawlerUrlDTO> senderTopicUrls = new HashSet<>();
for (final ForkJoinTask<Set<CrawlerUrlDTO>> senderTopicTask : senderTopicTasks) {
senderTopicUrls.addAll(senderTopicTask.get());
}
Expand All @@ -75,11 +74,28 @@ protected RecursiveTask<Set<Film>> createCrawlerTask() {

final ArdTopicPageTask topicTask =
new ArdTopicPageTask(this, new ConcurrentLinkedQueue<>(assitUrls));


final Set<ArdFilmInfoDto> ardFilmInfosWithCompilations = forkJoinPool.submit(topicTask).get();

// add filmInfos without compilation
final int showsCountBefore = shows.size();
shows.addAll(forkJoinPool.submit(topicTask).get());
shows.addAll(ardFilmInfosWithCompilations.stream().filter(filmInfo -> !filmInfo.isCompilation()).toList());
LOG.debug(
"ARD crawler found {} topics for all sub-sender.", shows.size() - showsCountBefore);
"ARD crawler found {} topics excluding compilations for all sub-sender.", shows.size() - showsCountBefore);

// search compilations
final List<ArdFilmInfoDto> compilations = ardFilmInfosWithCompilations.stream().filter(ArdFilmInfoDto::isCompilation).toList();
LOG.debug(
"ARD crawler found {} compilations for all sub-sender.", compilations.size());
final ArdTopicCompilationTask compilationTask =
new ArdTopicCompilationTask(this, new ConcurrentLinkedQueue<>(compilations));
final Set<ArdFilmInfoDto> ardCompilationEntries = forkJoinPool.submit(compilationTask).get();
LOG.debug(
"ARD crawler found {} entries in compilations for all sub-sender.", ardCompilationEntries.size());

final int sizeBefore = shows.size();
shows.addAll(ardCompilationEntries.stream().filter(filmInfo -> !filmInfo.isCompilation()).toList());
LOG.debug("ARD crawler added {} entries from compilations to shows.", shows.size() - sizeBefore);
}
//
final Queue<ArdFilmInfoDto> showsFiltered = this.filterExistingFilms(shows, ArdFilmInfoDto::getId);
Expand Down Expand Up @@ -116,13 +132,12 @@ private Set<ForkJoinTask<Set<CrawlerUrlDTO>>> createSenderTopicTasks() {

private ForkJoinTask<Set<CrawlerUrlDTO>> getTopicEntriesBySender(final String sender) throws ExecutionException, InterruptedException {
Set<CrawlerUrlDTO> senderSingleLetterUrls = forkJoinPool.submit(
new ArdTopicsTask(this, sender, CreateLetterUrlQuery(sender))).get();
new ArdTopicsTask(this, sender, createLetterUrlQuery(sender))).get();

//LOG.debug("topics task result {}", senderSingleLetterUrls.size());
return forkJoinPool.submit(new ArdTopicsLetterTask(this, sender, new ConcurrentLinkedQueue<>(senderSingleLetterUrls)));
}

private Queue<CrawlerUrlDTO> CreateLetterUrlQuery(final String client) {
private Queue<CrawlerUrlDTO> createLetterUrlQuery(final String client) {
final Queue<CrawlerUrlDTO> urls = new ConcurrentLinkedQueue<>();

final String url = String.format(ArdConstants.TOPICS_URL, client);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@ public class ArdFilmInfoDto extends CrawlerUrlDTO {

private final String id;
private final int numberOfClips;
private final boolean isCompilation;

public ArdFilmInfoDto(final String id, final String aUrl, final int numberOfClips) {
public ArdFilmInfoDto(final String id, final String aUrl, final int numberOfClips, final boolean isCompilation) {
super(aUrl);

this.id = id;
this.numberOfClips = numberOfClips;
this.isCompilation = isCompilation;
}

public String getId() {
Expand All @@ -24,6 +26,8 @@ public int getNumberOfClips() {
return numberOfClips;
}

public boolean isCompilation() { return isCompilation; }

@Override
public boolean equals(final Object o) {
if (this == o) {
Expand All @@ -35,11 +39,11 @@ public boolean equals(final Object o) {
if (!super.equals(o)) {
return false;
}
return numberOfClips == that.numberOfClips && Objects.equals(id, that.id);
return numberOfClips == that.numberOfClips && Objects.equals(id, that.id) && isCompilation == that.isCompilation;
}

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), id, numberOfClips);
return Objects.hash(super.hashCode(), id, numberOfClips, isCompilation);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ private Set<ArdFilmInfoDto> parseChannels(JsonArray channels) {

private ArdFilmInfoDto createFilmInfo(final String id, final int numberOfClips) {
final String url = String.format(ArdConstants.ITEM_URL, id);
return new ArdFilmInfoDto(id, url, numberOfClips);
return new ArdFilmInfoDto(id, url, numberOfClips, false);
}

private Optional<String> toId(final JsonObject teaserObject) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ abstract class ArdTeasersDeserializer {

private static final String ATTRIBUTE_ID = "id";
private static final String ATTRIBUTE_NUMBER_OF_CLIPS = "numberOfClips";
private static final String ATTRIBUTE_TYPE = "type";

private static final String ELEMENT_PUBLICATION_SERVICE = "publicationService";
private static final String ATTRIBUTE_PARTNER = "partner";

private static final String ATTRIBUTE_HREF = "href";

Set<ArdFilmInfoDto> parseTeasers(final JsonArray teasers) {
return StreamSupport.stream(teasers.spliterator(), true)
.map(JsonElement::getAsJsonObject)
Expand All @@ -34,9 +36,23 @@ Set<ArdFilmInfoDto> parseTeasers(final JsonArray teasers) {
}

private ArdFilmInfoDto toFilmInfo(final JsonObject teaserObject) {
return toId(teaserObject)
.map(id -> createFilmInfo(id, getNumberOfClips(teaserObject)))
.orElse(null);
final boolean compilation = isCompilation(teaserObject);
if (compilation) {
final Optional<String> url = JsonUtils.getElementValueAsString(teaserObject, ELEMENT_LINKS, ELEMENT_TARGET, ATTRIBUTE_HREF);
final Optional<String> id = toId(teaserObject);
return url.map(s -> new ArdFilmInfoDto(id.orElse(""), s, getNumberOfClips(teaserObject), compilation)).orElse(null);
} else {
return toId(teaserObject)
.map(id -> createFilmInfo(id, getNumberOfClips(teaserObject), compilation))
.orElse(null);
}
}

private boolean isCompilation(final JsonObject teaserObject) {
if (teaserObject.has(ATTRIBUTE_TYPE)) {
return "compilation".equals(teaserObject.get(ATTRIBUTE_TYPE).getAsString());
}
return false;
}

private int getNumberOfClips(final JsonObject teaserObject) {
Expand All @@ -55,13 +71,13 @@ private Optional<String> toId(final JsonObject teaserObject) {
return JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_ID);
}

private ArdFilmInfoDto createFilmInfo(final String id, final int numberOfClips) {
private ArdFilmInfoDto createFilmInfo(final String id, final int numberOfClips, final boolean isCompilation) {
String refId = id;
if(id.contains(":")) {
refId = id.replace(":", "%3A");
}
final String url = String.format(ArdConstants.ITEM_URL, refId);
return new ArdFilmInfoDto(id, url, numberOfClips);
return new ArdFilmInfoDto(id, url, numberOfClips, isCompilation);
}

private boolean isRelevant(final JsonObject teaserObject) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package de.mediathekview.mserver.crawler.ard.json;

import com.google.gson.*;
import de.mediathekview.mserver.crawler.ard.ArdFilmInfoDto;
import de.mediathekview.mserver.crawler.ard.ArdTopicInfoDto;
import java.lang.reflect.Type;
import java.util.HashSet;
import java.util.Set;

public class ArdTopicCompilationDeserializer extends ArdTeasersDeserializer
implements JsonDeserializer<ArdTopicInfoDto> {

private static final String ELEMENT_WIDGETS = "widgets";
private static final String ELEMENT_TEASERS = "teasers";
private static final String ELEMENT_PAGE_NUMBER = "pageNumber";
private static final String ELEMENT_TOTAL_ELEMENTS = "totalElements";
private static final String ELEMENT_PAGE_SIZE = "pageSize";
private static final String ELEMENT_PAGINATION = "pagination";

@Override
public ArdTopicInfoDto deserialize(
final JsonElement showPageElement, final Type type, final JsonDeserializationContext context) {
final Set<ArdFilmInfoDto> results = new HashSet<>();
final ArdTopicInfoDto ardTopicInfoDto = new ArdTopicInfoDto(results);

final JsonObject showPageObject = showPageElement.getAsJsonObject();
if (showPageObject.has(ELEMENT_WIDGETS)) {
final JsonArray widgets = showPageObject.get(ELEMENT_WIDGETS).getAsJsonArray();
widgets.forEach(widget -> {
if (widget.getAsJsonObject().has(ELEMENT_TEASERS)) {
final JsonArray teasers = widget.getAsJsonObject().get(ELEMENT_TEASERS).getAsJsonArray();
results.addAll(parseTeasers(teasers));
}
});
}

final JsonElement paginationElement = showPageObject.get(ELEMENT_PAGINATION);
final int pageNumber = getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_PAGE_NUMBER);
final int totalElements = getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_TOTAL_ELEMENTS);
final int pageSize = getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_PAGE_SIZE);
ardTopicInfoDto.setPageNumber(pageNumber);
ardTopicInfoDto.setPageSize(pageSize);
ardTopicInfoDto.setTotalElements(totalElements);
return ardTopicInfoDto;
}

private int getChildElementAsIntOrNullIfNotExist(
final JsonElement parentElement, final String childElementName) {
if (parentElement == null || parentElement.isJsonNull()) {
return 0;
}
return getJsonElementAsIntOrNullIfNotExist(
parentElement.getAsJsonObject().get(childElementName));
}

private int getJsonElementAsIntOrNullIfNotExist(final JsonElement element) {
if (element.isJsonNull()) {
return 0;
}
return element.getAsInt();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package de.mediathekview.mserver.crawler.ard.tasks;

import com.google.gson.reflect.TypeToken;
import de.mediathekview.mserver.crawler.ard.ArdFilmInfoDto;
import de.mediathekview.mserver.crawler.ard.ArdTopicInfoDto;
import de.mediathekview.mserver.crawler.ard.json.ArdTopicCompilationDeserializer;
import de.mediathekview.mserver.crawler.basic.AbstractCrawler;
import de.mediathekview.mserver.crawler.basic.AbstractRecursiveConverterTask;
import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO;
import jakarta.ws.rs.client.WebTarget;
import java.lang.reflect.Type;
import java.util.Queue;

public class ArdTopicCompilationTask extends ArdTaskBase<ArdFilmInfoDto, CrawlerUrlDTO> {

Check warning on line 14 in src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicCompilationTask.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

This class has 7 parents which is greater than 5 authorized.

See more on https://sonarcloud.io/project/issues?id=mediathekview_MServer&issues=AZ4Ss07YbgnSuG7_hk08&open=AZ4Ss07YbgnSuG7_hk08&pullRequest=1143

private static final Type ARDTOPICINFODTO_TYPE_TOKEN =
new TypeToken<ArdTopicInfoDto>() {}.getType();

public ArdTopicCompilationTask(
final AbstractCrawler aCrawler, final Queue<CrawlerUrlDTO> aUrlToCrawlDtos) {
super(aCrawler, aUrlToCrawlDtos);

registerJsonDeserializer(ARDTOPICINFODTO_TYPE_TOKEN, new ArdTopicCompilationDeserializer());
}

@Override
protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarget) {
final ArdTopicInfoDto topicInfo = deserialize(aTarget, ARDTOPICINFODTO_TYPE_TOKEN, aDTO);
if (topicInfo != null
&& topicInfo.getFilmInfos() != null
&& !topicInfo.getFilmInfos().isEmpty()) {
taskResults.addAll(topicInfo.getFilmInfos());
}
}

@Override
protected AbstractRecursiveConverterTask<ArdFilmInfoDto, CrawlerUrlDTO> createNewOwnInstance(
final Queue<CrawlerUrlDTO> aElementsToProcess) {
return new ArdTopicCompilationTask(crawler, aElementsToProcess);
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package de.mediathekview.mserver.crawler.ard.json;

import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import de.mediathekview.mserver.crawler.ard.ArdConstants;
import de.mediathekview.mserver.crawler.ard.ArdFilmInfoDto;
Expand All @@ -26,55 +25,55 @@ public void testDeserialize() {
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL3JiYl8xY2RjODJjMy01ZTIyLTQ0MDctODEwZi0yMWMwYTBhY2NjMmNfcHVibGljYXRpb24"),
1),
1, false),
new ArdFilmInfoDto(
"Y3JpZDovL3JiYl9hN2RkMDNjMC0yMmU5LTRmYzEtYmNiOC1kYTg0Y2RjOWMxMWZfcHVibGljYXRpb24",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL3JiYl9hN2RkMDNjMC0yMmU5LTRmYzEtYmNiOC1kYTg0Y2RjOWMxMWZfcHVibGljYXRpb24"),
1),
1, false),
new ArdFilmInfoDto(
"Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MDQ4MzMtMzg1Mjgw",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MDQ4MzMtMzg1Mjgw"),
1),
1, false),
new ArdFilmInfoDto(
"Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MDQ4MzQtMzg1Mjgx",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MDQ4MzQtMzg1Mjgx"),
1),
1, false),
new ArdFilmInfoDto(
"Y3JpZDovL2Rhc2Vyc3RlLmRlL2Zlcm5zZWhmaWxtZSBpbSBlcnN0ZW4vMjAyNC0wOS0yOF8xNC0wMC1NRVNa",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL2Rhc2Vyc3RlLmRlL2Zlcm5zZWhmaWxtZSBpbSBlcnN0ZW4vMjAyNC0wOS0yOF8xNC0wMC1NRVNa"),
1),
1, false),
new ArdFilmInfoDto(
"Y3JpZDovL2Rhc2Vyc3RlLmRlL2Zlcm5zZWhmaWxtZSBpbSBlcnN0ZW4vMjAyNC0wOS0yOF8xNS0zMC1NRVNa",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL2Rhc2Vyc3RlLmRlL2Zlcm5zZWhmaWxtZSBpbSBlcnN0ZW4vMjAyNC0wOS0yOF8xNS0zMC1NRVNa"),
1),
1, false),
new ArdFilmInfoDto(
"Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtMmIwZDg4NDMtMzQ0YS00OTZmLTlhNDYtNGY3ODk5MjE2MmFi",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtMmIwZDg4NDMtMzQ0YS00OTZmLTlhNDYtNGY3ODk5MjE2MmFi"),
1),
1, false),
new ArdFilmInfoDto(
"Y3JpZDovL2Rhc2Vyc3RlLmRlL2RpZS1zdGlsbGVuLW1vZXJkZXIvMjAyNC0wOS0yOF8yMC0xNS1NRVNa",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL2Rhc2Vyc3RlLmRlL2RpZS1zdGlsbGVuLW1vZXJkZXIvMjAyNC0wOS0yOF8yMC0xNS1NRVNa"),
1),
1, false),
new ArdFilmInfoDto(
"Y3JpZDovL2Rhc2Vyc3RlLmRlL2hhcnR3aWctc2VlbGVyLzIwMjQtMDktMjhfMjEtNDUtTUVTWg",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL2Rhc2Vyc3RlLmRlL2hhcnR3aWctc2VlbGVyLzIwMjQtMDktMjhfMjEtNDUtTUVTWg"),
1)
1, false)
};

final ArdDayPageDeserializer instance = new ArdDayPageDeserializer();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package de.mediathekview.mserver.crawler.ard.json;

import com.google.gson.JsonElement;
import de.mediathekview.mserver.crawler.ard.ArdTopicInfoDto;
import de.mediathekview.mserver.testhelper.JsonFileReader;
import org.junit.jupiter.api.Test;

import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.MatcherAssert.assertThat;

class ArdTopicCompilationDeserializerTest {

@Test
void deserialize(){
final JsonElement jsonElement = JsonFileReader.readJson("/ard/ard_compilation_page.json");

final ArdTopicCompilationDeserializer instance = new ArdTopicCompilationDeserializer();

final ArdTopicInfoDto filmInfos = instance.deserialize(jsonElement, null, null);

assertThat(filmInfos.getFilmInfos().size(), equalTo(24));
}
}
Loading
Loading