Skip to content

Commit 18729d6

Browse files
authored
Merge pull request #2 from ANcpLua/chore/modernize-temp-paths
chore: modernize temp-path handling, harden ProcessRunner + Tesseract output
2 parents 4dc63e9 + 06845ae commit 18729d6

6 files changed

Lines changed: 58 additions & 38 deletions

File tree

CreatePdf.NET.Tests/DocumentTests.cs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -143,14 +143,20 @@ public async Task SaveAsync_WithFilename_CreatesFile()
143143
{
144144
var doc = Pdf.Create();
145145
doc.AddText("Test content");
146-
var filename = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid():N}.pdf");
146+
var tempDir = Directory.CreateTempSubdirectory("createpdf-doc-test-");
147+
var filename = Path.Combine(tempDir.FullName, "document.pdf");
147148

148-
var path = await doc.SaveAsync(filename).ConfigureAwait(true);
149-
150-
path.Should().EndWith(".pdf");
151-
File.Exists(path).Should().BeTrue();
149+
try
150+
{
151+
var path = await doc.SaveAsync(filename).ConfigureAwait(true);
152152

153-
File.Delete(path);
153+
path.Should().EndWith(".pdf");
154+
File.Exists(path).Should().BeTrue();
155+
}
156+
finally
157+
{
158+
tempDir.Delete(recursive: true);
159+
}
154160
}
155161

156162
[Fact]

CreatePdf.NET.Tests/OcrServiceTests.cs

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,22 +66,23 @@ public async Task ProcessPdfStreamAsync_HandlesCleanupErrorsGracefully()
6666
}
6767

6868
[Fact]
69-
public void TryDeleteFile_ExistingFile_DeletesSuccessfully()
69+
public void TryDeleteDirectory_ExistingDirectoryWithContents_DeletesRecursively()
7070
{
71-
var tempFile = Path.GetTempFileName();
72-
File.Exists(tempFile).Should().BeTrue();
71+
var tempDir = Directory.CreateTempSubdirectory("createpdf-ocr-test-");
72+
File.WriteAllText(Path.Combine(tempDir.FullName, "leftover.txt"), "x");
7373

74-
OcrService.TryDeleteFile(tempFile);
74+
OcrService.TryDeleteDirectory(tempDir.FullName);
7575

76-
File.Exists(tempFile).Should().BeFalse();
76+
Directory.Exists(tempDir.FullName).Should().BeFalse();
7777
}
7878

7979
[Fact]
80-
public void TryDeleteFile_NonExistentFile_DoesNotThrow()
80+
public void TryDeleteDirectory_NonExistentDirectory_DoesNotThrow()
8181
{
82-
var nonExistentFile = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString());
82+
var nonExistent = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString("N"));
83+
84+
var act = () => OcrService.TryDeleteDirectory(nonExistent);
8385

84-
var act = () => OcrService.TryDeleteFile(nonExistentFile);
8586
act.Should().NotThrow();
8687
}
8788
}

CreatePdf.NET.Tests/RuntimeSystemEnvironmentTests.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,16 @@ public void Properties_MirrorUnderlyingEnvironment()
1717
[Fact]
1818
public void FileExists_DelegatesToFileSystem()
1919
{
20-
var temp = Path.GetTempFileName();
20+
var tempDir = Directory.CreateTempSubdirectory("createpdf-runtime-test-");
21+
var temp = Path.Combine(tempDir.FullName, "probe.txt");
22+
File.WriteAllText(temp, string.Empty);
2123
try
2224
{
2325
RuntimeSystemEnvironment.Instance.FileExists(temp).Should().BeTrue();
2426
}
2527
finally
2628
{
27-
File.Delete(temp);
29+
tempDir.Delete(recursive: true);
2830
}
2931

3032
RuntimeSystemEnvironment.Instance.FileExists(temp).Should().BeFalse();

CreatePdf.NET/Internal/OcrService.cs

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ namespace CreatePdf.NET.Internal;
22

33
internal sealed class OcrService
44
{
5+
private const string TempDirPrefix = "createpdf-ocr-";
6+
57
private readonly IOcrProvider _provider;
68

79
public OcrService() : this(new TesseractOcrProvider())
@@ -16,49 +18,53 @@ internal OcrService(IOcrProvider provider)
1618
public async Task<string> ProcessPdfAsync(string pdfPath, OcrOptions options,
1719
CancellationToken cancellationToken = default)
1820
{
19-
var tempDir = Path.GetTempPath();
20-
var pdfName = Path.GetFileNameWithoutExtension(pdfPath);
21-
var pngPath = Path.Combine(tempDir, $"{pdfName}_{Guid.NewGuid():N}.png");
22-
var txtPath = Path.Combine(tempDir, $"{pdfName}_{Guid.NewGuid():N}.txt");
23-
21+
var workDir = Directory.CreateTempSubdirectory(TempDirPrefix);
2422
try
2523
{
26-
await _provider.RasterizePdfToPngAsync(pdfPath, pngPath, options, cancellationToken).ConfigureAwait(false);
27-
return await _provider.ExtractTextFromImageAsync(pngPath, txtPath, options, cancellationToken)
28-
.ConfigureAwait(false);
24+
return await OcrAsync(pdfPath, workDir.FullName, options, cancellationToken).ConfigureAwait(false);
2925
}
3026
finally
3127
{
32-
TryDeleteFile(pngPath);
33-
TryDeleteFile(txtPath);
28+
TryDeleteDirectory(workDir.FullName);
3429
}
3530
}
3631

3732
public async Task<string> ProcessPdfStreamAsync(Stream pdfStream, OcrOptions options,
3833
CancellationToken cancellationToken = default)
3934
{
40-
var tempDir = Path.GetTempPath();
41-
var pdfFileName = Path.ChangeExtension(Path.GetRandomFileName(), ".pdf");
42-
var pdfPath = Path.Combine(tempDir, pdfFileName);
43-
35+
var workDir = Directory.CreateTempSubdirectory(TempDirPrefix);
4436
try
4537
{
46-
await using (var fileStream = new FileStream(pdfPath, FileMode.CreateNew, FileAccess.Write, FileShare.None))
38+
var pdfPath = Path.Combine(workDir.FullName, "input.pdf");
39+
await using (var fileStream =
40+
new FileStream(pdfPath, FileMode.CreateNew, FileAccess.Write, FileShare.None))
4741
{
4842
await pdfStream.CopyToAsync(fileStream, cancellationToken).ConfigureAwait(false);
4943
}
5044

51-
return await ProcessPdfAsync(pdfPath, options, cancellationToken).ConfigureAwait(false);
45+
return await OcrAsync(pdfPath, workDir.FullName, options, cancellationToken).ConfigureAwait(false);
5246
}
5347
finally
5448
{
55-
TryDeleteFile(pdfPath);
49+
TryDeleteDirectory(workDir.FullName);
5650
}
5751
}
5852

59-
internal static void TryDeleteFile(string path)
53+
private async Task<string> OcrAsync(string pdfPath, string workDir, OcrOptions options,
54+
CancellationToken cancellationToken)
55+
{
56+
var pdfName = Path.GetFileNameWithoutExtension(pdfPath);
57+
var pngPath = Path.Combine(workDir, $"{pdfName}.png");
58+
var txtPath = Path.Combine(workDir, $"{pdfName}.txt");
59+
60+
await _provider.RasterizePdfToPngAsync(pdfPath, pngPath, options, cancellationToken).ConfigureAwait(false);
61+
return await _provider.ExtractTextFromImageAsync(pngPath, txtPath, options, cancellationToken)
62+
.ConfigureAwait(false);
63+
}
64+
65+
internal static void TryDeleteDirectory(string path)
6066
{
61-
if (File.Exists(path))
62-
File.Delete(path);
67+
if (Directory.Exists(path))
68+
Directory.Delete(path, recursive: true);
6369
}
6470
}

CreatePdf.NET/Internal/ProcessRunner.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ public async Task RunAsync(ProcessStartInfo startInfo, CancellationToken cancell
1010
{
1111
ArgumentNullException.ThrowIfNull(startInfo);
1212

13-
using var process = Process.Start(startInfo)!;
13+
using var process = Process.Start(startInfo)
14+
?? throw new InvalidOperationException(
15+
$"Process.Start returned null for '{startInfo.FileName}' — no new process was created.");
1416
await process.WaitForExitAsync(cancellationToken).ConfigureAwait(false);
1517
}
1618
}

CreatePdf.NET/Internal/TesseractOcrProvider.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,10 @@ await _processRunner.RunAsync(
5050
public async Task<string> ExtractTextFromImageAsync(string pngPath, string txtPath, OcrOptions options,
5151
CancellationToken cancellationToken = default)
5252
{
53-
var outputBase = txtPath[..^4];
53+
// Tesseract appends ".txt" itself, so strip the extension to give it the base path.
54+
var outputBase = Path.Combine(
55+
Path.GetDirectoryName(txtPath) ?? string.Empty,
56+
Path.GetFileNameWithoutExtension(txtPath));
5457

5558
await _processRunner.RunAsync(
5659
CreateProcessInfo(

0 commit comments

Comments
 (0)