Skip to content

Commit f09ff69

Browse files
committed
improve wfs download size estimation
1 parent 6e0224f commit f09ff69

2 files changed

Lines changed: 155 additions & 39 deletions

File tree

server/src/main/java/au/org/aodn/ogcapi/server/core/service/geoserver/wfs/DownloadWfsDataService.java

Lines changed: 64 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
import com.fasterxml.jackson.databind.JsonNode;
77
import com.fasterxml.jackson.databind.ObjectMapper;
88
import lombok.extern.slf4j.Slf4j;
9+
import org.springframework.beans.factory.annotation.Autowired;
910
import org.springframework.cache.annotation.Cacheable;
11+
import org.springframework.context.annotation.Lazy;
1012
import org.springframework.http.*;
1113
import org.springframework.web.client.RestTemplate;
1214
import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
@@ -27,6 +29,10 @@ public class DownloadWfsDataService {
2729
protected final ObjectMapper objectMapper;
2830
protected static final int SAMPLES_SIZE = 500; // A not too small sample for download size estimation
2931

32+
@Autowired
33+
@Lazy
34+
protected DownloadWfsDataService self;
35+
3036
public DownloadWfsDataService(
3137
WfsServer wfsServer,
3238
RestTemplate restTemplate,
@@ -88,13 +94,61 @@ public String prepareWfsRequestUrl(
8894
}
8995

9096
/**
91-
* We just need to estimate the download size, the way we do it is issue two query:
92-
* a. Issue a query and get the number or record hit
93-
* b. Issue a query with data download but then limit the records size, and do a liner interpolation
97+
* Unfiltered total feature count for a layer
98+
* Cached per (uuid, layerName)
99+
*/
100+
@Cacheable(CacheConfig.DOWNLOADABLE_SIZE)
101+
public BigInteger getUnfilteredRecordCount(String uuid, String layerName) {
102+
String countUrl = prepareWfsRequestUrl(
103+
uuid, null, null, null, null, layerName, "application/json", 1L, false
104+
);
105+
106+
ResponseEntity<String> response = restTemplate.exchange(countUrl, HttpMethod.GET, pretendUserEntity, String.class);
107+
if (response.getStatusCode().is2xxSuccessful() && response.getBody() != null) {
108+
try {
109+
JsonNode root = objectMapper.readTree(response.getBody());
110+
if (!root.has("totalFeatures")) {
111+
throw new RuntimeException("GeoServer GeoJSON response missing totalFeatures field");
112+
}
113+
return BigInteger.valueOf(root.get("totalFeatures").asLong());
114+
} catch (IOException e) {
115+
log.error("Failed to parse unfiltered count response for {}/{}", uuid, layerName, e);
116+
}
117+
}
118+
return null;
119+
}
120+
121+
/**
122+
* Average bytes per record for the layer in the requested output format.
123+
* Issues an unfiltered sample download so the result can be reused across calls
124+
* Cached per (uuid, layerName, outputFormat).
125+
*/
126+
@Cacheable(CacheConfig.DOWNLOADABLE_SIZE)
127+
public BigInteger getBytesPerRecord(String uuid, String layerName, String outputFormat) {
128+
BigInteger totalCount = self.getUnfilteredRecordCount(uuid, layerName);
129+
if (totalCount == null || totalCount.equals(BigInteger.ZERO)) {
130+
return BigInteger.ZERO;
131+
}
132+
133+
long sampleSize = totalCount.longValue() < SAMPLES_SIZE ? totalCount.longValue() : SAMPLES_SIZE;
134+
135+
String sampleUrl = prepareWfsRequestUrl(
136+
uuid, null, null, null, null, layerName, outputFormat, sampleSize, false
137+
);
138+
139+
ResponseEntity<byte[]> bytes = restTemplate.exchange(sampleUrl, HttpMethod.GET, pretendUserEntity, byte[].class);
140+
if (bytes.getStatusCode().is2xxSuccessful() && bytes.getBody() != null) {
141+
return BigInteger.valueOf(bytes.getBody().length).divide(BigInteger.valueOf(sampleSize));
142+
}
143+
return null;
144+
}
145+
146+
/**
147+
* Estimate download size for the user's subset. Runs the inherently subset-dependent
148+
* count query, then multiplies by the cached bytes-per-record sample.
94149
*
95150
* @return The estimated file size
96151
*/
97-
@Cacheable(CacheConfig.DOWNLOADABLE_SIZE)
98152
public BigInteger estimateDownloadSize(
99153
String uuid,
100154
String layerName,
@@ -104,7 +158,7 @@ public BigInteger estimateDownloadSize(
104158
List<String> fields,
105159
String outputFormat) throws IllegalArgumentException {
106160

107-
// Get total feature count via GeoJSON response
161+
// Subset-filtered count — not cacheable here because the subset would explode the key space.
108162
String countUrl = prepareWfsRequestUrl(
109163
uuid, startDate, endDate, multiPolygon, fields, layerName, "application/json", 1L, false
110164
);
@@ -118,26 +172,17 @@ public BigInteger estimateDownloadSize(
118172
throw new RuntimeException("GeoServer GeoJSON response missing totalFeatures field");
119173
}
120174
BigInteger featureCount = BigInteger.valueOf(root.get("totalFeatures").asLong());
121-
log.debug("Total record hits {}", featureCount);
175+
log.debug("Subset record hits {}", featureCount);
122176

123177
if (featureCount.equals(BigInteger.ZERO)) {
124178
return BigInteger.ZERO;
125179
}
126180

127-
// In case the records we have is smaller than our predefined SAMPLES_SIZE, we use smaller one.
128-
long sampleSize = featureCount.longValue() < SAMPLES_SIZE ? featureCount.longValue() : SAMPLES_SIZE;
129-
130-
// Download a small sample to measure bytes per record in the requested output format
131-
String sampleUrl = prepareWfsRequestUrl(
132-
uuid, startDate, endDate, multiPolygon, fields, layerName, outputFormat, sampleSize, false
133-
);
134-
135-
ResponseEntity<byte[]> bytes = restTemplate.exchange(sampleUrl, HttpMethod.GET, pretendUserEntity, byte[].class);
136-
if (bytes.getStatusCode().is2xxSuccessful() && bytes.getBody() != null) {
137-
return featureCount
138-
.multiply(BigInteger.valueOf(bytes.getBody().length))
139-
.divide(BigInteger.valueOf(sampleSize));
181+
BigInteger bytesPerRecord = self.getBytesPerRecord(uuid, layerName, outputFormat);
182+
if (bytesPerRecord == null) {
183+
return null;
140184
}
185+
return featureCount.multiply(bytesPerRecord);
141186
} catch (IOException e) {
142187
log.error("Fail to get feature count for estimate", e);
143188
}

server/src/test/java/au/org/aodn/ogcapi/server/core/service/geoserver/wfs/DownloadWfsDataServiceTest.java

Lines changed: 91 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ public void setUp() {
7272
downloadWfsDataService = new DownloadWfsDataService(
7373
wfsServer, restTemplate, pretendUserEntity, 16384, new ObjectMapper()
7474
);
75+
76+
downloadWfsDataService.self = downloadWfsDataService;
7577
}
7678

7779
/**
@@ -268,6 +270,7 @@ public void verifyRequestUrlGenerateCorrect() {
268270
);
269271
assertEquals("https://test.com/geoserver/wfs?VERSION=1.0.0&typeName=test:layer&SERVICE=WFS&REQUEST=GetFeature&outputFormat=shape-zip&cql_filter=((timestamp DURING 2024-01-01T00:00:00Z/2024-12-31T23:59:59Z))", result, "Correct url 1");
270272
}
273+
271274
/**
272275
* Make sure the url generated contains the correct polygon
273276
*
@@ -302,6 +305,7 @@ public void verifyRequestUrlGenerateCorrectWithPolygon() throws JsonProcessingEx
302305
result,
303306
"Correct url 1");
304307
}
308+
305309
/**
306310
* Verify estimate size on success request
307311
*/
@@ -315,25 +319,28 @@ void shouldReturnEstimatedSizeWhenBothRequestsSucceed() {
315319
List<String> fields = List.of("name", "area");
316320
String format = "application/json";
317321

318-
// 1. Count response: GeoJSON with totalFeatures (1 record requested, but totalFeatures = full count)
322+
// 1. Count response: GeoJSON with totalFeatures (1 record requested, but totalFeatures = full count).
323+
// Returned for BOTH the subset-filtered count and the unfiltered count probe inside
324+
// getBytesPerRecord — both URLs use maxFeatures=1.
319325
String countJson = "{\"totalFeatures\": 227193, \"features\": []}";
320326
ResponseEntity<String> countResponse = new ResponseEntity<>(countJson, HttpStatus.OK);
321327

322-
// 2. Sample response (small payload in requested format)
323-
byte[] sampleBytes = "fake data".getBytes();
328+
// 2. Sample response. Use a payload >= SAMPLES_SIZE so bytesPerRecord = sampleBytes / 500
329+
// yields a non-zero integer (10 bytes/record here).
330+
byte[] sampleBytes = new byte[DownloadWfsDataService.SAMPLES_SIZE * 10];
324331
ResponseEntity<byte[]> sampleResponse = new ResponseEntity<>(sampleBytes, HttpStatus.OK);
325332

326333
doReturn(countResponse)
327334
.when(restTemplate).exchange(
328-
argThat((String url) -> url != null && url.contains("maxFeatures=1")),
329-
eq(HttpMethod.GET),
330-
any(HttpEntity.class),
331-
eq(String.class));
335+
argThat((String url) -> url != null && url.contains("maxFeatures=1")),
336+
eq(HttpMethod.GET),
337+
any(HttpEntity.class),
338+
eq(String.class));
332339

333340
doReturn(sampleResponse)
334341
.when(restTemplate).exchange(
335-
argThat((String url) -> url != null && url.contains("maxFeatures=" + DownloadWfsDataService.SAMPLES_SIZE)),
336-
eq(HttpMethod.GET), any(), eq(byte[].class));
342+
argThat((String url) -> url != null && url.contains("maxFeatures=" + DownloadWfsDataService.SAMPLES_SIZE)),
343+
eq(HttpMethod.GET), any(), eq(byte[].class));
337344

338345
doReturn(Optional.of("http://dummy.com/wfs"))
339346
.when(wfsServer).getFeatureServerUrl(eq(uuid), anyString());
@@ -350,26 +357,45 @@ void shouldReturnEstimatedSizeWhenBothRequestsSucceed() {
350357
BigInteger size = downloadWfsDataService.estimateDownloadSize(
351358
uuid, layer, start, end, multiPolygon, fields, format);
352359

353-
// Should call with maxFeatures=1 to get totalFeatures count via GeoJSON
360+
// Subset-filtered count (carries the cql_filter built from start/end dates)
354361
verify(restTemplate).exchange(
355-
argThat((String url) -> url != null && url.contains("maxFeatures=1") && url.contains("outputFormat=application")),
362+
argThat((String url) -> url != null
363+
&& url.contains("maxFeatures=1")
364+
&& url.contains("outputFormat=application")
365+
&& url.contains("cql_filter")),
356366
eq(HttpMethod.GET),
357367
any(),
358368
eq(String.class)
359369
);
360370

361-
// Should also call with maxFeatures=500 to sample bytes for size interpolation
371+
// Unfiltered count probe issued inside getBytesPerRecord — same maxFeatures=1
372+
// pattern but without cql_filter. Acceptance criterion: sample/count path ignores subsetting.
362373
verify(restTemplate).exchange(
363-
argThat((String url) -> url != null && url.contains("maxFeatures=" + DownloadWfsDataService.SAMPLES_SIZE)),
374+
argThat((String url) -> url != null
375+
&& url.contains("maxFeatures=1")
376+
&& url.contains("outputFormat=application")
377+
&& !url.contains("cql_filter")),
378+
eq(HttpMethod.GET),
379+
any(),
380+
eq(String.class)
381+
);
382+
383+
// Sample download with maxFeatures=500, also without subset params.
384+
verify(restTemplate).exchange(
385+
argThat((String url) -> url != null
386+
&& url.contains("maxFeatures=" + DownloadWfsDataService.SAMPLES_SIZE)
387+
&& !url.contains("cql_filter")),
364388
eq(HttpMethod.GET),
365389
any(),
366390
eq(byte[].class)
367391
);
368392

369-
// totalFeatures=227193, sampleBytes=9 bytes, SAMPLES_SIZE=500
370-
long expected = 227193L * sampleBytes.length / DownloadWfsDataService.SAMPLES_SIZE;
393+
// bytesPerRecord = sampleBytes.length / SAMPLES_SIZE; total = featureCount * bytesPerRecord
394+
long bytesPerRecord = sampleBytes.length / DownloadWfsDataService.SAMPLES_SIZE;
395+
long expected = 227193L * bytesPerRecord;
371396
assertEquals(BigInteger.valueOf(expected), size, "Size match");
372397
}
398+
373399
@Test
374400
void shouldReturnZeroWhenTotalFeaturesIsZero() {
375401
String uuid = "lyr-123";
@@ -385,10 +411,10 @@ void shouldReturnZeroWhenTotalFeaturesIsZero() {
385411

386412
doReturn(countResponse)
387413
.when(restTemplate).exchange(
388-
argThat((String url) -> url != null && url.contains("maxFeatures=1")),
389-
eq(HttpMethod.GET),
390-
any(HttpEntity.class),
391-
eq(String.class));
414+
argThat((String url) -> url != null && url.contains("maxFeatures=1")),
415+
eq(HttpMethod.GET),
416+
any(HttpEntity.class),
417+
eq(String.class));
392418

393419
doReturn(Optional.of("http://dummy.com/wfs"))
394420
.when(wfsServer).getFeatureServerUrl(eq(uuid), anyString());
@@ -490,8 +516,53 @@ void returnsNullWhenParserThrowsException() {
490516
.when(wfsServer).getDownloadableFields(eq(uuid), any(WfsServer.WfsFeatureRequest.class));
491517

492518
BigInteger size = downloadWfsDataService.estimateDownloadSize(
493-
uuid, layer, start, end, multiPolygon, fields, format);
519+
uuid, layer, start, end, multiPolygon, fields, format);
494520

495521
assertNull(size, "Size should be null when JSON parsing fails");
496522
}
523+
524+
@Test
525+
void sampleRequestIgnoresSubsetFilter() throws JsonProcessingException {
526+
String uuid = "lyr-123";
527+
String layer = "test:layer";
528+
String start = "2024-01-01";
529+
String end = "2024-12-31";
530+
Object multiPolygon = new ObjectMapper().readValue(
531+
"{ \"type\": \"MultiPolygon\", \"coordinates\": [[[[0,0],[1,0],[1,1],[0,1],[0,0]]]] }",
532+
HashMap.class
533+
);
534+
List<String> fields = List.of("name", "area");
535+
String format = "text/csv";
536+
537+
String countJson = "{\"totalFeatures\": 1000, \"features\": []}";
538+
ResponseEntity<String> countResponse = new ResponseEntity<>(countJson, HttpStatus.OK);
539+
byte[] sampleBytes = new byte[DownloadWfsDataService.SAMPLES_SIZE * 4];
540+
ResponseEntity<byte[]> sampleResponse = new ResponseEntity<>(sampleBytes, HttpStatus.OK);
541+
542+
doReturn(countResponse)
543+
.when(restTemplate).exchange(
544+
argThat((String url) -> url != null && url.contains("maxFeatures=1")),
545+
eq(HttpMethod.GET), any(HttpEntity.class), eq(String.class));
546+
doReturn(sampleResponse)
547+
.when(restTemplate).exchange(
548+
argThat((String url) -> url != null
549+
&& url.contains("maxFeatures=" + DownloadWfsDataService.SAMPLES_SIZE)),
550+
eq(HttpMethod.GET), any(), eq(byte[].class));
551+
552+
doReturn(Optional.of("http://dummy.com/wfs"))
553+
.when(wfsServer).getFeatureServerUrl(eq(uuid), anyString());
554+
doReturn(createTestWFSFieldModel())
555+
.when(wfsServer).getDownloadableFields(eq(uuid), any(WfsServer.WfsFeatureRequest.class));
556+
557+
downloadWfsDataService.estimateDownloadSize(uuid, layer, start, end, multiPolygon, fields, format);
558+
559+
// Sample URL must NOT carry the subset filter (no cql_filter, no DURING, no INTERSECTS).
560+
verify(restTemplate).exchange(
561+
argThat((String url) -> url != null
562+
&& url.contains("maxFeatures=" + DownloadWfsDataService.SAMPLES_SIZE)
563+
&& !url.contains("cql_filter")
564+
&& !url.contains("DURING")
565+
&& !url.contains("INTERSECTS")),
566+
eq(HttpMethod.GET), any(), eq(byte[].class));
567+
}
497568
}

0 commit comments

Comments
 (0)