Skip to content

Commit 2173626

Browse files
authored
Merge pull request #3 from ANcpLua/chore/copilot-followup-pr2
Address Copilot review comments from PR #2
2 parents 18729d6 + 609f61a commit 2173626

4 files changed

Lines changed: 110 additions & 6 deletions

File tree

CreatePdf.NET.Tests/OcrServiceTests.cs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,40 @@ public void TryDeleteDirectory_NonExistentDirectory_DoesNotThrow()
8585

8686
act.Should().NotThrow();
8787
}
88+
89+
[Fact]
90+
public void TryDeleteDirectory_WhenDeleteThrowsIOException_DoesNotPropagate()
91+
{
92+
var tempDir = Directory.CreateTempSubdirectory("createpdf-ocr-test-");
93+
try
94+
{
95+
var act = () => OcrService.TryDeleteDirectory(
96+
tempDir.FullName,
97+
(_, _) => throw new IOException("simulated lock"));
98+
99+
act.Should().NotThrow();
100+
}
101+
finally
102+
{
103+
Directory.Delete(tempDir.FullName, recursive: true);
104+
}
105+
}
106+
107+
[Fact]
108+
public void TryDeleteDirectory_WhenDeleteThrowsUnauthorizedAccess_DoesNotPropagate()
109+
{
110+
var tempDir = Directory.CreateTempSubdirectory("createpdf-ocr-test-");
111+
try
112+
{
113+
var act = () => OcrService.TryDeleteDirectory(
114+
tempDir.FullName,
115+
(_, _) => throw new UnauthorizedAccessException("simulated permission denied"));
116+
117+
act.Should().NotThrow();
118+
}
119+
finally
120+
{
121+
Directory.Delete(tempDir.FullName, recursive: true);
122+
}
123+
}
88124
}

CreatePdf.NET.Tests/TesseractOcrProviderTests.cs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,66 @@ await act.Should().ThrowAsync<FileNotFoundException>()
2929
processRunner.StartInfos[0].Arguments.Should().Contain("input.png");
3030
}
3131

32+
[Theory]
33+
[InlineData("out.log", "out.txt")]
34+
[InlineData("out", "out.txt")]
35+
[InlineData("foo.bar.log", "foo.bar.txt")]
36+
[InlineData("out.TXT", "out.txt")]
37+
[InlineData("out.", "out.txt")]
38+
public async Task ExtractTextFromImageAsync_DerivesActualTxtPathFromBase(string inputName, string expectedName)
39+
{
40+
var tempDir = Path.GetTempPath();
41+
var inputPath = Path.Combine(tempDir, inputName);
42+
var expectedPath = Path.Combine(tempDir, expectedName);
43+
var checkedPaths = new List<string>();
44+
var processRunner = new FakeProcessRunner();
45+
var environment = new FakeSystemEnvironment
46+
{
47+
FileExistsImpl = path =>
48+
{
49+
checkedPaths.Add(path);
50+
return false;
51+
}
52+
};
53+
var engine = new TesseractOcrProvider(environment, processRunner);
54+
55+
var act = () => engine.ExtractTextFromImageAsync(
56+
"input.png",
57+
inputPath,
58+
new OcrOptions { TesseractPath = "/bin/echo" });
59+
60+
await act.Should().ThrowAsync<FileNotFoundException>()
61+
.Where(e => e.FileName == expectedPath)
62+
.ConfigureAwait(true);
63+
64+
checkedPaths.Should().ContainSingle().Which.Should().Be(expectedPath);
65+
}
66+
67+
[Fact]
68+
public async Task ExtractTextFromImageAsync_WithNonTxtInputPath_ReadsFromDerivedTxtPath()
69+
{
70+
var actualTxtPath = Path.Combine(Path.GetTempPath(), $"createpdf-test-{Guid.NewGuid():N}.txt");
71+
var nonTxtInputPath = Path.ChangeExtension(actualTxtPath, ".log");
72+
await File.WriteAllTextAsync(actualTxtPath, "Hello\nWorld").ConfigureAwait(true);
73+
try
74+
{
75+
var processRunner = new FakeProcessRunner();
76+
var environment = new FakeSystemEnvironment { FileExistsImpl = File.Exists };
77+
var engine = new TesseractOcrProvider(environment, processRunner);
78+
79+
var result = await engine.ExtractTextFromImageAsync(
80+
"input.png",
81+
nonTxtInputPath,
82+
new OcrOptions { TesseractPath = "/bin/echo" }).ConfigureAwait(true);
83+
84+
result.Should().Be("Hello World");
85+
}
86+
finally
87+
{
88+
if (File.Exists(actualTxtPath)) File.Delete(actualTxtPath);
89+
}
90+
}
91+
3292
[Fact]
3393
public void GetPdfRasterizerExecutable_UsesExplicitConverterPath()
3494
{

CreatePdf.NET/Internal/OcrService.cs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,16 @@ private async Task<string> OcrAsync(string pdfPath, string workDir, OcrOptions o
6262
.ConfigureAwait(false);
6363
}
6464

65-
internal static void TryDeleteDirectory(string path)
65+
internal static void TryDeleteDirectory(string path, Action<string, bool>? deleteImpl = null)
6666
{
67-
if (Directory.Exists(path))
68-
Directory.Delete(path, recursive: true);
67+
try
68+
{
69+
if (!Directory.Exists(path)) return;
70+
(deleteImpl ?? Directory.Delete)(path, true);
71+
}
72+
catch (Exception ex) when (ex is IOException or UnauthorizedAccessException)
73+
{
74+
// Best-effort cleanup. Called from finally — must not mask the original exception.
75+
}
6976
}
7077
}

CreatePdf.NET/Internal/TesseractOcrProvider.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ public async Task<string> ExtractTextFromImageAsync(string pngPath, string txtPa
5454
var outputBase = Path.Combine(
5555
Path.GetDirectoryName(txtPath) ?? string.Empty,
5656
Path.GetFileNameWithoutExtension(txtPath));
57+
var actualTxtPath = outputBase + ".txt";
5758

5859
await _processRunner.RunAsync(
5960
CreateProcessInfo(
@@ -62,10 +63,10 @@ await _processRunner.RunAsync(
6263
cancellationToken)
6364
.ConfigureAwait(false);
6465

65-
if (!_systemEnvironment.FileExists(txtPath))
66-
throw new FileNotFoundException("OCR output file not found. Tesseract execution failed.", txtPath);
66+
if (!_systemEnvironment.FileExists(actualTxtPath))
67+
throw new FileNotFoundException("OCR output file not found. Tesseract execution failed.", actualTxtPath);
6768

68-
var text = await File.ReadAllTextAsync(txtPath, cancellationToken).ConfigureAwait(false);
69+
var text = await File.ReadAllTextAsync(actualTxtPath, cancellationToken).ConfigureAwait(false);
6970
return text.Trim().Replace("\n", " ").Replace("\r", " ");
7071
}
7172

0 commit comments

Comments
 (0)