forked from mindee/mindee-api-java
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPdfCompressor.java
More file actions
95 lines (81 loc) · 3.24 KB
/
PdfCompressor.java
File metadata and controls
95 lines (81 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package com.mindee.pdf;
import static com.mindee.input.InputSourceUtils.hasSourceText;
import static com.mindee.input.InputSourceUtils.isPdf;
import java.awt.image.BufferedImage;
import java.io.IOException;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.image.JPEGFactory;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
/**
* PDF compression class.
*/
public class PdfCompressor {
public static byte[] compressPdf(
byte[] pdfData, Integer imageQuality,
Boolean forceSourceTextCompression, Boolean disableSourceText
)
throws IOException {
if (!isPdf(pdfData)) {
return pdfData;
}
if (forceSourceTextCompression == null) {
forceSourceTextCompression = false;
}
if (disableSourceText == null) {
disableSourceText = true;
}
if (!forceSourceTextCompression && hasSourceText(pdfData)) {
System.out.println(
"MINDEE WARNING: Found text inside of the provided PDF file. Compression operation aborted.");
return pdfData;
}
try (PDDocument inputDoc = Loader.loadPDF(pdfData);
PDDocument outputDoc = new PDDocument()) {
PDFRenderer pdfRenderer = new PDFRenderer(inputDoc);
for (int pageIndex = 0; pageIndex < inputDoc.getNumberOfPages(); pageIndex++) {
PDPage originalPage = inputDoc.getPage(pageIndex);
PDRectangle originalPageSize = originalPage.getMediaBox();
processPage(inputDoc, pageIndex, outputDoc,
pdfRenderer.renderImageWithDPI(pageIndex, 72 * (imageQuality / 100f), ImageType.ARGB),
(imageQuality / 100f), originalPageSize, disableSourceText
);
}
byte[] docAsBytes = PDFUtils.documentToBytes(outputDoc);
outputDoc.close();
return docAsBytes;
}
}
public static byte[] compressPdf(
byte[] pdfData, Integer imageQuality,
Boolean forceSourceTextCompression
) throws IOException {
return compressPdf(pdfData, imageQuality, forceSourceTextCompression, true);
}
public static byte[] compressPdf(byte[] pdfData, Integer imageQuality) throws IOException {
return compressPdf(pdfData, imageQuality, false, true);
}
public static byte[] compressPdf(byte[] pdfData) throws IOException {
return compressPdf(pdfData, 85, false, true);
}
private static void processPage(
PDDocument originalDocument, Integer pageIndex,
PDDocument outputDoc, BufferedImage image,
Float imageQuality,
PDRectangle originalPageSize, Boolean disableSourceText
)
throws IOException {
PDPage newPage = new PDPage(originalPageSize);
outputDoc.addPage(newPage);
PDImageXObject pdImage = JPEGFactory.createFromImage(outputDoc, image, imageQuality);
try (PDPageContentStream contentStream = new PDPageContentStream(outputDoc, newPage)) {
PDFUtils.addImageToPage(contentStream, pdImage, originalPageSize);
PDFUtils.extractAndAddText(originalDocument, contentStream, pageIndex, disableSourceText);
}
}
}