Skip to content

Commit 18a9bfe

Browse files
committed
Enhance PDF generation steps to support multiple reference engines
- Modified `step_generate_reference_pdfs` function to accept an `engine` parameter, allowing selection between LibreOffice and MS Office for PDF conversion. - Updated command execution logic to use the appropriate script based on the selected engine. - Adjusted argument parsing in the main function to include an `--engine` option for specifying the reference engine. - Updated documentation and print statements to reflect the changes in the reference engine selection process.
1 parent cf47292 commit 18a9bfe

1,042 files changed

Lines changed: 11196 additions & 9807 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.vscode/settings.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"github.copilot.chat.backgroundCompaction": true,
3+
"github.copilot.chat.summarizeAgentConversationHistoryThreshold": 170000
4+
}

scripts/Run-Benchmark.ps1

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ param(
2020
[switch]$SkipInstall,
2121
[switch]$WithOffice,
2222
[switch]$SkipOffice,
23+
[ValidateSet("libre", "office")]
24+
[string]$Engine = "office",
2325
[string]$Filter
2426
)
2527

@@ -50,6 +52,7 @@ if ($SkipMiniPdf) { $pyArgs += "--skip-minipdf" }
5052
if ($SkipReference) { $pyArgs += "--skip-reference" }
5153
if ($WithOffice) { $pyArgs += "--with-office" }
5254
if ($SkipOffice) { $pyArgs += "--skip-office" }
55+
if ($Engine -ne "office") { $pyArgs += "--engine"; $pyArgs += $Engine }
5356
if ($Filter) { $pyArgs += "--filter"; $pyArgs += $Filter }
5457

5558
# Run the benchmark pipeline

scripts/Run-Benchmark_docx.ps1

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ param(
2020
[switch]$SkipInstall,
2121
[switch]$WithOffice,
2222
[switch]$SkipOffice,
23+
[ValidateSet("libre", "office")]
24+
[string]$Engine = "office",
2325
[string]$Filter
2426
)
2527

@@ -50,6 +52,7 @@ if ($SkipMiniPdf) { $pyArgs += "--skip-minipdf" }
5052
if ($SkipReference) { $pyArgs += "--skip-reference" }
5153
if ($WithOffice) { $pyArgs += "--with-office" }
5254
if ($SkipOffice) { $pyArgs += "--skip-office" }
55+
if ($Engine -ne "office") { $pyArgs += "--engine"; $pyArgs += $Engine }
5356
if ($Filter) { $pyArgs += "--filter"; $pyArgs += $Filter }
5457

5558
# Run the benchmark pipeline

scripts/Run-Benchmark_issues.ps1

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ param(
2222
[switch]$SkipReference,
2323
[switch]$SkipInstall,
2424
[switch]$WithOffice,
25-
[switch]$SkipOffice
25+
[switch]$SkipOffice,
26+
[ValidateSet("libre", "office")]
27+
[string]$Engine = "office"
2628
)
2729

2830
$ErrorActionPreference = "Continue"
@@ -93,14 +95,26 @@ if ($xlsxFiles -and $xlsxFiles.Count -gt 0) {
9395
}
9496

9597
if (-not $CompareOnly -and -not $SkipReference) {
96-
Write-Host '[Step 2] Converting XLSX -> PDF (LibreOffice)...' -ForegroundColor Yellow
97-
Push-Location $BenchmarkDir
98-
try {
99-
$refArgs = @("generate_reference_pdfs.py", "--xlsx-dir", $XlsxIssueDir, "--pdf-dir", $RefXlsx)
100-
if ($Filter) { $refArgs += @("--filter", $Filter) }
101-
python @refArgs
102-
} finally {
103-
Pop-Location
98+
if ($Engine -eq 'office') {
99+
Write-Host '[Step 2] Converting XLSX -> PDF (Office / Excel COM)...' -ForegroundColor Yellow
100+
Push-Location $BenchmarkDir
101+
try {
102+
$refArgs = @("generate_office_pdfs.py", "--xlsx-dir", $XlsxIssueDir, "--pdf-dir", $RefXlsx)
103+
if ($Filter) { $refArgs += @("--filter", $Filter) }
104+
python @refArgs
105+
} finally {
106+
Pop-Location
107+
}
108+
} else {
109+
Write-Host '[Step 2] Converting XLSX -> PDF (LibreOffice)...' -ForegroundColor Yellow
110+
Push-Location $BenchmarkDir
111+
try {
112+
$refArgs = @("generate_reference_pdfs.py", "--xlsx-dir", $XlsxIssueDir, "--pdf-dir", $RefXlsx)
113+
if ($Filter) { $refArgs += @("--filter", $Filter) }
114+
python @refArgs
115+
} finally {
116+
Pop-Location
117+
}
104118
}
105119
}
106120

@@ -153,14 +167,26 @@ if ($docxFiles -and $docxFiles.Count -gt 0) {
153167
}
154168

155169
if (-not $CompareOnly -and -not $SkipReference) {
156-
Write-Host '[Step 2] Converting DOCX -> PDF (LibreOffice)...' -ForegroundColor Yellow
157-
Push-Location $BenchmarkDir
158-
try {
159-
$refArgs = @("generate_reference_pdfs_docx.py", "--docx-dir", $DocxIssueDir, "--pdf-dir", $RefDocx)
160-
if ($Filter) { $refArgs += @("--filter", $Filter) }
161-
python @refArgs
162-
} finally {
163-
Pop-Location
170+
if ($Engine -eq 'office') {
171+
Write-Host '[Step 2] Converting DOCX -> PDF (Office / Word COM)...' -ForegroundColor Yellow
172+
Push-Location $BenchmarkDir
173+
try {
174+
$refArgs = @("generate_office_pdfs_docx.py", "--docx-dir", $DocxIssueDir, "--pdf-dir", $RefDocx)
175+
if ($Filter) { $refArgs += @("--filter", $Filter) }
176+
python @refArgs
177+
} finally {
178+
Pop-Location
179+
}
180+
} else {
181+
Write-Host '[Step 2] Converting DOCX -> PDF (LibreOffice)...' -ForegroundColor Yellow
182+
Push-Location $BenchmarkDir
183+
try {
184+
$refArgs = @("generate_reference_pdfs_docx.py", "--docx-dir", $DocxIssueDir, "--pdf-dir", $RefDocx)
185+
if ($Filter) { $refArgs += @("--filter", $Filter) }
186+
python @refArgs
187+
} finally {
188+
Pop-Location
189+
}
164190
}
165191
}
166192

tests/MiniPdf.Benchmark/generate_office_pdfs.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ def main():
5252
help="Directory containing .xlsx files")
5353
parser.add_argument("--pdf-dir", default="office_pdfs",
5454
help="Output directory for Office-generated PDFs")
55+
parser.add_argument("--filter", default=None, metavar="PATTERN",
56+
help="Only convert files whose name contains this substring")
5557
args = parser.parse_args()
5658

5759
xlsx_dir = os.path.abspath(args.xlsx_dir)
@@ -69,6 +71,8 @@ def main():
6971
print()
7072

7173
xlsx_files = sorted(Path(xlsx_dir).glob("*.xlsx"))
74+
if args.filter:
75+
xlsx_files = [f for f in xlsx_files if args.filter.lower() in f.stem.lower()]
7276
if not xlsx_files:
7377
print("No .xlsx files found.")
7478
sys.exit(1)

tests/MiniPdf.Benchmark/generate_office_pdfs_docx.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ def main():
4949
help="Directory containing .docx files")
5050
parser.add_argument("--pdf-dir", default="office_pdfs_docx",
5151
help="Output directory for Office-generated PDFs")
52+
parser.add_argument("--filter", default=None, metavar="PATTERN",
53+
help="Only convert files whose name contains this substring")
5254
args = parser.parse_args()
5355

5456
docx_dir = os.path.abspath(args.docx_dir)
@@ -66,6 +68,8 @@ def main():
6668
print()
6769

6870
docx_files = sorted(Path(docx_dir).glob("*.docx"))
71+
if args.filter:
72+
docx_files = [f for f in docx_files if args.filter.lower() in f.stem.lower()]
6973
if not docx_files:
7074
print("No .docx files found.")
7175
sys.exit(1)

0 commit comments

Comments
 (0)