Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 32 additions & 5 deletions src/main/java/au/csiro/fhir/export/BulkExportClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,12 @@ public class BulkExportClient {
String outputDir;

/**
* The extension to use for the output files.
* The extension to use for the output files. When set, this overrides the extension that would
* otherwise be inferred from the output format. When null, the extension is inferred from the
* output format.
*/
@Nonnull
@Builder.Default
String outputExtension = "ndjson";
@Nullable
String outputExtension;

/**
* The maximum time to wait for the export to complete. If zero or negative (default), the export
Expand Down Expand Up @@ -390,11 +391,12 @@ List<UrlDownloadEntry> getUrlDownloadEntries(@Nonnull final BulkExportResponse r
Collectors.groupingBy(BulkExportResponse.FileItem::getType, LinkedHashMap::new,
mapping(BulkExportResponse.FileItem::getUrl, toList())));

final String extension = extensionFromFormat(outputFormat, outputExtension);
return urlsByType.entrySet().stream()
.flatMap(entry -> IntStream.range(0, entry.getValue().size())
.mapToObj(index -> new UrlDownloadEntry(
URI.create(entry.getValue().get(index)),
destinationDir.child(toFileName(entry.getKey(), index, outputExtension))
destinationDir.child(toFileName(entry.getKey(), index, extension))
)
)
).collect(Collectors.toUnmodifiableList());
Expand All @@ -406,6 +408,31 @@ static String toFileName(@Nonnull final String resource, final int chunkNo,
return String.format("%s.%04d.%s", resource, chunkNo, extension);
}

/**
* Derives the appropriate file extension from a MIME type or format string.
*
* @param format the MIME type or format string (e.g., "application/fhir+ndjson", "parquet")
* @param override if non-null, this extension is used instead of inferring from the format
* @return the file extension without a leading dot (e.g., "ndjson", "parquet")
*/
@Nonnull
static String extensionFromFormat(@Nonnull final String format, @Nullable final String override) {
if (override != null) {
return override;
}
final String normalised = format.toLowerCase().trim();
if (normalised.equals("application/fhir+ndjson")
|| normalised.equals("application/x-ndjson")
|| normalised.equals("ndjson")) {
return "ndjson";
} else if (normalised.equals("application/vnd.apache.parquet")
|| normalised.equals("parquet")) {
return "parquet";
} else {
return "ndjson";
}
}

@Nonnull
private FileStore createFileStore() throws IOException {
log.debug("Creating FileStore of: {} for outputDir: {}", fileStoreFactory, outputDir);
Expand Down
129 changes: 128 additions & 1 deletion src/test/java/au/csiro/fhir/export/BulkExportClientTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,11 @@ void testMapsMultiPartResourceToSeparateFiles() {

@Test
void testMapsDifferentResourceToSeparateFiles() {

// When outputExtension is explicitly set, it should override the inferred extension.
final BulkExportClient client = BulkExportClient.builder()
.withFhirEndpointUrl("http://example.com")
.withOutputDir("output-dir")
.withOutputFormat("application/x-custom")
.withOutputExtension("xjson")
.build();

Expand Down Expand Up @@ -127,4 +128,130 @@ void testBuildsRequestWithRequestedAssociatedData() {
.build(),
client.buildBulkExportRequest());
}

@Test
void testExtensionFromFormatReturnsNdjsonForStandardMimeType() {
// The standard FHIR Bulk Data MIME type should produce ndjson extension.
assertEquals("ndjson",
BulkExportClient.extensionFromFormat("application/fhir+ndjson", null));
}

@Test
void testExtensionFromFormatReturnsNdjsonForAlternativeMimeType() {
// The alternative NDJSON MIME type should also produce ndjson extension.
assertEquals("ndjson",
BulkExportClient.extensionFromFormat("application/x-ndjson", null));
}

@Test
void testExtensionFromFormatReturnsNdjsonForShortForm() {
// The short form "ndjson" should produce ndjson extension.
assertEquals("ndjson", BulkExportClient.extensionFromFormat("ndjson", null));
}

@Test
void testExtensionFromFormatReturnsParquetForMimeType() {
// The Parquet MIME type should produce parquet extension.
assertEquals("parquet",
BulkExportClient.extensionFromFormat("application/vnd.apache.parquet", null));
}

@Test
void testExtensionFromFormatReturnsParquetForShortForm() {
// The short form "parquet" should produce parquet extension.
assertEquals("parquet", BulkExportClient.extensionFromFormat("parquet", null));
}

@Test
void testExtensionFromFormatReturnsNdjsonForUnknownFormat() {
// Unknown formats should return the default ndjson extension when no override is set.
assertEquals("ndjson",
BulkExportClient.extensionFromFormat("application/unknown", null));
}

@Test
void testExtensionFromFormatIsCaseInsensitive() {
// Format matching should be case-insensitive.
assertEquals("ndjson",
BulkExportClient.extensionFromFormat("APPLICATION/FHIR+NDJSON", null));
assertEquals("parquet",
BulkExportClient.extensionFromFormat("APPLICATION/VND.APACHE.PARQUET", null));
}

@Test
void testExtensionFromFormatReturnsOverrideWhenSet() {
// When an override is explicitly set, it should be used regardless of the format.
assertEquals("custom",
BulkExportClient.extensionFromFormat("application/fhir+ndjson", "custom"));
assertEquals("custom",
BulkExportClient.extensionFromFormat("application/vnd.apache.parquet", "custom"));
assertEquals("custom",
BulkExportClient.extensionFromFormat("application/unknown", "custom"));
}

@Test
void testUnknownFormatDefaultsToNdjsonExtension() {
// A client configured with an unknown format and no explicit outputExtension should
// default to ndjson file extensions.
final BulkExportClient unknownFormatClient = BulkExportClient.builder()
.withFhirEndpointUrl("http://example.com")
.withOutputDir("output-dir")
.withOutputFormat("application/x-custom")
.build();

final BulkExportResponse response = BulkExportResponse.builder()
.transactionTime(Instant.now())
.request("fake-request")
.output(List.of(
new FileItem("Patient", "http:/foo.bar/1", 10)
))
.deleted(Collections.emptyList())
.error(Collections.emptyList())
.build();

final List<UrlDownloadEntry> downloadUrls = unknownFormatClient.getUrlDownloadEntries(
response, FileHandle.ofLocal("output-dir"));

assertEquals(
List.of(
new UrlDownloadEntry(URI.create("http:/foo.bar/1"),
FileHandle.ofLocal("output-dir/Patient.0000.ndjson"))
),
downloadUrls
);
}

@Test
void testParquetFormatProducesParquetFileExtensions() {
// A client configured with Parquet format should produce .parquet file extensions.
final BulkExportClient parquetClient = BulkExportClient.builder()
.withFhirEndpointUrl("http://example.com")
.withOutputDir("output-dir")
.withOutputFormat("application/vnd.apache.parquet")
.build();

final BulkExportResponse response = BulkExportResponse.builder()
.transactionTime(Instant.now())
.request("fake-request")
.output(List.of(
new FileItem("Patient", "http:/foo.bar/1", 10),
new FileItem("Observation", "http:/foo.bar/2", 10)
))
.deleted(Collections.emptyList())
.error(Collections.emptyList())
.build();

final List<UrlDownloadEntry> downloadUrls = parquetClient.getUrlDownloadEntries(
response, FileHandle.ofLocal("output-dir"));

assertEquals(
List.of(
new UrlDownloadEntry(URI.create("http:/foo.bar/1"),
FileHandle.ofLocal("output-dir/Patient.0000.parquet")),
new UrlDownloadEntry(URI.create("http:/foo.bar/2"),
FileHandle.ofLocal("output-dir/Observation.0000.parquet"))
),
downloadUrls
);
}
}