11from typing import List , Union
22
33from mindee .error import MindeeError
4- from mindee .extraction import PdfExtractor
4+ from mindee .extraction .pdf_extractor .extracted_pdf import ExtractedPdf
5+ from mindee .extraction .pdf_extractor .pdf_extractor import PdfExtractor
56from mindee .input .sources .local_input_source import LocalInputSource
67from mindee .v2 .file_operations .split_files import SplitFiles
7- from mindee .v2 .product .split .split_range import SplitRange
8+
9+
10+ def extract_single_split (
11+ input_source : LocalInputSource , split : List [int ]
12+ ) -> ExtractedPdf :
13+ """
14+ Extracts a single split as a complete PDF from the document.
15+
16+ :param input_source: Input source to split.
17+ :param split: List of pages to keep.
18+ :return: Extracted PDF
19+ """
20+ return extract_splits (input_source , [split ])[0 ]
821
922
1023def extract_splits (
1124 input_source : LocalInputSource ,
12- splits : Union [List [SplitRange ], List [ List [int ]]],
25+ splits : Union [List [List [int ]]],
1326) -> SplitFiles :
1427 """
1528 Extracts splits as complete PDFs from the document.
@@ -21,13 +34,7 @@ def extract_splits(
2134 pdf_extractor = PdfExtractor (input_source )
2235 page_groups = []
2336 for split in splits :
24- if isinstance (split , SplitRange ):
25- lower_bound = split .page_range [0 ]
26- upper_bound = split .page_range [1 ]
27- else :
28- lower_bound = split [0 ]
29- upper_bound = split [1 ]
30- page_groups .append (list (range (lower_bound , upper_bound + 1 )))
37+ page_groups .append (list (range (split [0 ], split [1 ] + 1 )))
3138 if len (splits ) < 1 :
3239 raise MindeeError ("No indexes provided." )
3340 return SplitFiles (pdf_extractor .extract_sub_documents (page_groups ))
0 commit comments