@@ -55,17 +55,15 @@ public BasePDFExtractor(LocalInputSource source) throws IOException {
5555 }
5656 }
5757
58- /**
59- * Converts an array to a buffered image.
60- *
61- * @param byteArray Raw byte array.
62- * @return a valid ImageIO buffer.
63- * @throws IOException Throws if the file can't be accessed.
64- */
65- private static BufferedImage byteArrayToBufferedImage (byte [] byteArray ) throws IOException {
66- try (ByteArrayInputStream stream = new ByteArrayInputStream (byteArray )) {
67- return ImageIO .read (stream );
58+ public ExtractedPDF extractSinglePage (
59+ List <Integer > pageNumbers ,
60+ boolean closeOriginal
61+ ) throws IOException {
62+ if (pageNumbers .isEmpty ()) {
63+ throw new MindeeException ("Empty indexes not allowed for extraction." );
6864 }
65+ var pdfBytes = createPdfFromExistingPdf (this .sourcePdf , pageNumbers , closeOriginal );
66+ return new ExtractedPDF (pdfBytes , makeFilename (pageNumbers ));
6967 }
7068
7169 /**
@@ -79,23 +77,37 @@ public ExtractedPDFs extractSubDocuments(List<List<Integer>> pageIndexes) throws
7977 var extractedPDFs = new ExtractedPDFs ();
8078
8179 for (List <Integer > pageIndexElement : pageIndexes ) {
82- if (pageIndexElement .isEmpty ()) {
83- throw new MindeeException ("Empty indexes not allowed for extraction." );
84- }
85- String [] splitName = InputSourceUtils .splitNameStrict (filename );
86- String fieldFilename = splitName [0 ]
87- + String .format ("_%3s" , pageIndexElement .get (0 ) + 1 ).replace (" " , "0" )
88- + "-"
89- + String
90- .format ("%3s" , pageIndexElement .get (pageIndexElement .size () - 1 ) + 1 )
91- .replace (" " , "0" )
92- + "."
93- + splitName [1 ];
94- extractedPDFs .add (extractSinglePage (pageIndexElement , fieldFilename , false ));
80+ extractedPDFs .add (extractSinglePage (pageIndexElement , false ));
9581 }
9682 return extractedPDFs ;
9783 }
9884
85+ /**
86+ * Converts an array to a buffered image.
87+ *
88+ * @param byteArray Raw byte array.
89+ * @return a valid ImageIO buffer.
90+ * @throws IOException Throws if the file can't be accessed.
91+ */
92+ private static BufferedImage byteArrayToBufferedImage (byte [] byteArray ) throws IOException {
93+ try (ByteArrayInputStream stream = new ByteArrayInputStream (byteArray )) {
94+ return ImageIO .read (stream );
95+ }
96+ }
97+
98+ /**
99+ * Make a nice filename for the split.
100+ */
101+ private String makeFilename (List <Integer > pageNumbers ) {
102+ String [] splitName = InputSourceUtils .splitNameStrict (filename );
103+ return splitName [0 ]
104+ + String .format ("_%3s" , pageNumbers .get (0 )).replace (" " , "0" )
105+ + "-"
106+ + String .format ("%3s" , pageNumbers .get (pageNumbers .size () - 1 )).replace (" " , "0" )
107+ + "."
108+ + splitName [1 ];
109+ }
110+
99111 private static PDPage clonePage (PDPage page ) {
100112
101113 COSDictionary pageDict = page .getCOSObject ();
@@ -129,28 +141,4 @@ private static byte[] createPdfFromExistingPdf(
129141 outputStream .close ();
130142 return output ;
131143 }
132-
133- public ExtractedPDF extractSinglePage (
134- List <Integer > pageNumbers ,
135- String fieldFilename ,
136- boolean closeOriginal
137- ) throws IOException {
138- var pdfBytes = createPdfFromExistingPdf (this .sourcePdf , pageNumbers , closeOriginal );
139- return new ExtractedPDF (pdfBytes , fieldFilename );
140- }
141-
142- public ExtractedPDF extractSinglePage (
143- List <Integer > pageNumbers ,
144- boolean closeOriginal
145- ) throws IOException {
146- var pdfBytes = createPdfFromExistingPdf (this .sourcePdf , pageNumbers , closeOriginal );
147- String [] splitName = InputSourceUtils .splitNameStrict (filename );
148- String fieldFilename = splitName [0 ]
149- + String .format ("_%3s" , pageNumbers .get (0 ) + 1 ).replace (" " , "0" )
150- + "-"
151- + String .format ("%3s" , pageNumbers .get (pageNumbers .size () - 1 ) + 1 ).replace (" " , "0" )
152- + "."
153- + splitName [1 ];
154- return new ExtractedPDF (pdfBytes , fieldFilename );
155- }
156144}
0 commit comments