Skip to content

Commit 940e444

Browse files
committed
MLE-27077 Added fix for invalid header for empty doc
1 parent 541b63e commit 940e444

3 files changed

Lines changed: 67 additions & 10 deletions

File tree

marklogic-client-api/src/main/java/com/marklogic/client/impl/OkHttpServices.java

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@
3434
import jakarta.mail.BodyPart;
3535
import jakarta.mail.Header;
3636
import jakarta.mail.MessagingException;
37+
import jakarta.mail.internet.ContentDisposition;
3738
import jakarta.mail.internet.MimeMultipart;
39+
import jakarta.mail.internet.ParseException;
3840
import jakarta.mail.util.ByteArrayDataSource;
3941
import jakarta.xml.bind.DatatypeConverter;
4042
import okhttp3.*;
@@ -1808,16 +1810,50 @@ static private long getHeaderLength(String length) {
18081810

18091811
static private String getHeaderUri(BodyPart part) {
18101812
try {
1811-
if (part != null) {
1812-
return part.getFileName();
1813+
if (part == null) {
1814+
return null;
18131815
}
1814-
// if it's not found, just return null
1816+
1817+
try {
1818+
String filename = part.getFileName();
1819+
if (filename != null) {
1820+
return filename;
1821+
}
1822+
} catch (ParseException e) {
1823+
// Jakarta Mail's parser failed due to malformed Content-Disposition header.
1824+
// Check if MarkLogic sent a malformed "format=" parameter at the end, which violates RFC 2183.
1825+
String contentDisposition = getHeader(part, "Content-Disposition");
1826+
if (contentDisposition != null && contentDisposition.matches(".*;\\s*format\\s*=\\s*$")) {
1827+
// Remove the trailing "; format=" to fix the malformed header
1828+
String cleaned = contentDisposition.replaceFirst(";\\s*format\\s*=\\s*$", "").trim();
1829+
logger.debug("Removed trailing 'format=' from malformed Content-Disposition header: {} -> {}", contentDisposition, cleaned);
1830+
return extractFilenameFromContentDisposition(cleaned);
1831+
}
1832+
throw e;
1833+
}
1834+
18151835
return null;
18161836
} catch (MessagingException e) {
18171837
throw new MarkLogicIOException(e);
18181838
}
18191839
}
18201840

1841+
static private String extractFilenameFromContentDisposition(String contentDisposition) {
1842+
if (contentDisposition == null) {
1843+
return null;
1844+
}
1845+
try {
1846+
// Use Jakarta Mail's ContentDisposition parser to extract the filename parameter. This is the class
1847+
// that throws an error when "format=" exists in the value, but that has been removed already.
1848+
ContentDisposition cd = new ContentDisposition(contentDisposition);
1849+
return cd.getParameter("filename");
1850+
} catch (ParseException e) {
1851+
logger.warn("Failed to parse cleaned Content-Disposition header: {}; cause: {}",
1852+
contentDisposition, e.getMessage());
1853+
return null;
1854+
}
1855+
}
1856+
18211857
static private void updateVersion(DocumentDescriptor descriptor, Headers headers) {
18221858
updateVersion(descriptor, extractVersion(headers.get(HEADER_ETAG)));
18231859
}

marklogic-client-api/src/test/java/com/marklogic/client/test/document/ReadDocumentPageTest.java

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,30 @@
44
package com.marklogic.client.test.document;
55

66
import com.marklogic.client.DatabaseClient;
7-
import com.marklogic.client.document.*;
7+
import com.marklogic.client.document.DocumentPage;
8+
import com.marklogic.client.document.DocumentRecord;
9+
import com.marklogic.client.document.DocumentWriteSet;
10+
import com.marklogic.client.document.JSONDocumentManager;
811
import com.marklogic.client.io.BytesHandle;
912
import com.marklogic.client.io.DocumentMetadataHandle;
1013
import com.marklogic.client.io.StringHandle;
1114
import com.marklogic.client.query.StructuredQueryBuilder;
1215
import com.marklogic.client.query.StructuredQueryDefinition;
16+
import com.marklogic.client.test.AbstractClientTest;
1317
import com.marklogic.client.test.Common;
14-
import org.junit.jupiter.api.Disabled;
1518
import org.junit.jupiter.api.Test;
1619

1720
import static org.junit.jupiter.api.Assertions.assertEquals;
1821
import static org.junit.jupiter.api.Assertions.assertTrue;
1922

20-
class ReadDocumentPageTest {
23+
class ReadDocumentPageTest extends AbstractClientTest {
2124

25+
/**
26+
* Verifies that the jakarta.mail library, instead of javax.mail, can probably read the URI.
27+
* See MLE-15748, which pertains to issues with javax.mail only allowing US-ASCII characters.
28+
*/
2229
@Test
2330
void test() {
24-
Common.deleteUrisWithPattern("/aaa-page/*");
25-
2631
final String uri = "/aaa-page/太田佳伸のXMLファイル.xml";
2732
DocumentRecord documentRecord;
2833
try (DatabaseClient client = Common.newClient()) {
@@ -38,8 +43,24 @@ void test() {
3843
}
3944

4045
@Test
41-
@Disabled("Disabling for now because this seems to be a server bug.")
42-
void testEmptyDocWithNoExtension() {
46+
void emptyTextDocument() {
47+
final String uri = "/sample/empty-file.txt";
48+
49+
try (DatabaseClient client = Common.newClient()) {
50+
JSONDocumentManager documentManager = client.newJSONDocumentManager();
51+
StructuredQueryDefinition query = new StructuredQueryBuilder().document(uri);
52+
DocumentRecord documentRecord;
53+
try (DocumentPage documentPage = documentManager.search(query, 1)) {
54+
assertTrue(documentPage.hasNext(), "Expected a document in the page, but none was found.");
55+
documentRecord = documentPage.next();
56+
}
57+
String actualUri = documentRecord.getUri();
58+
assertEquals(uri, actualUri, "The URI of the empty document should match the one written.");
59+
}
60+
}
61+
62+
@Test
63+
void emptyDocWithNoExtension() {
4364
final String collection = "empty-binary-test";
4465

4566
try (DatabaseClient client = Common.newClient()) {

test-app/src/main/ml-data/sample/empty-file.txt

Whitespace-only changes.

0 commit comments

Comments
 (0)