Skip to content

Commit 609f61a

Browse files
ANcpLuaclaude
andcommitted
Apply PR review feedback
- Trim redundant 2nd comment line in TesseractOcrProvider (the +".txt" re-attach is self-evident from the code). - Drop duplicate WHAT comment in TesseractOcrProviderTests; the test name and variable names already state the same fact. - Convert ExtractTextFromImageAsync_WhenOutputPathHasNonTxtExtension to a Theory with 5 InlineData rows: out.log, out (no ext), foo.bar.log (multi-dot), out.TXT (uppercase), out. (trailing dot). Closes the edge-case gaps in path derivation. - Add ExtractTextFromImageAsync_WithNonTxtInputPath_ReadsFromDerivedTxtPath to cover the happy-path File.ReadAllTextAsync(actualTxtPath, ...) line that was previously only reachable via the throw branch. - Refactor OcrService.TryDeleteDirectory with an optional Action<string,bool> deleteImpl parameter (defaults to Directory.Delete) so the catch-when filter can be tested deterministically and cross-platform without chmod / FileShare tricks. - Add two tests that inject throwing delegates to cover IOException and UnauthorizedAccessException paths of the catch-when filter. Coverage gaps flagged by review are now closed; 253 tests pass on net8.0, net9.0, net10.0. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 7d1327f commit 609f61a

4 files changed

Lines changed: 78 additions & 16 deletions

File tree

CreatePdf.NET.Tests/OcrServiceTests.cs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,40 @@ public void TryDeleteDirectory_NonExistentDirectory_DoesNotThrow()
8585

8686
act.Should().NotThrow();
8787
}
88+
89+
[Fact]
90+
public void TryDeleteDirectory_WhenDeleteThrowsIOException_DoesNotPropagate()
91+
{
92+
var tempDir = Directory.CreateTempSubdirectory("createpdf-ocr-test-");
93+
try
94+
{
95+
var act = () => OcrService.TryDeleteDirectory(
96+
tempDir.FullName,
97+
(_, _) => throw new IOException("simulated lock"));
98+
99+
act.Should().NotThrow();
100+
}
101+
finally
102+
{
103+
Directory.Delete(tempDir.FullName, recursive: true);
104+
}
105+
}
106+
107+
[Fact]
108+
public void TryDeleteDirectory_WhenDeleteThrowsUnauthorizedAccess_DoesNotPropagate()
109+
{
110+
var tempDir = Directory.CreateTempSubdirectory("createpdf-ocr-test-");
111+
try
112+
{
113+
var act = () => OcrService.TryDeleteDirectory(
114+
tempDir.FullName,
115+
(_, _) => throw new UnauthorizedAccessException("simulated permission denied"));
116+
117+
act.Should().NotThrow();
118+
}
119+
finally
120+
{
121+
Directory.Delete(tempDir.FullName, recursive: true);
122+
}
123+
}
88124
}

CreatePdf.NET.Tests/TesseractOcrProviderTests.cs

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,17 @@ await act.Should().ThrowAsync<FileNotFoundException>()
2929
processRunner.StartInfos[0].Arguments.Should().Contain("input.png");
3030
}
3131

32-
[Fact]
33-
public async Task ExtractTextFromImageAsync_WhenOutputPathHasNonTxtExtension_ChecksDerivedTxtPath()
32+
[Theory]
33+
[InlineData("out.log", "out.txt")]
34+
[InlineData("out", "out.txt")]
35+
[InlineData("foo.bar.log", "foo.bar.txt")]
36+
[InlineData("out.TXT", "out.txt")]
37+
[InlineData("out.", "out.txt")]
38+
public async Task ExtractTextFromImageAsync_DerivesActualTxtPathFromBase(string inputName, string expectedName)
3439
{
35-
// Tesseract always appends ".txt" to the base it is given. If the caller hands us
36-
// a path like "out.log", the provider must read back "out.txt", not "out.log".
40+
var tempDir = Path.GetTempPath();
41+
var inputPath = Path.Combine(tempDir, inputName);
42+
var expectedPath = Path.Combine(tempDir, expectedName);
3743
var checkedPaths = new List<string>();
3844
var processRunner = new FakeProcessRunner();
3945
var environment = new FakeSystemEnvironment
@@ -46,20 +52,41 @@ public async Task ExtractTextFromImageAsync_WhenOutputPathHasNonTxtExtension_Che
4652
};
4753
var engine = new TesseractOcrProvider(environment, processRunner);
4854

49-
var tempDir = Path.GetTempPath();
50-
var nonTxtPath = Path.Combine(tempDir, "out.log");
51-
var expectedDerivedPath = Path.Combine(tempDir, "out.txt");
52-
5355
var act = () => engine.ExtractTextFromImageAsync(
5456
"input.png",
55-
nonTxtPath,
57+
inputPath,
5658
new OcrOptions { TesseractPath = "/bin/echo" });
5759

5860
await act.Should().ThrowAsync<FileNotFoundException>()
59-
.Where(e => e.FileName == expectedDerivedPath)
61+
.Where(e => e.FileName == expectedPath)
6062
.ConfigureAwait(true);
6163

62-
checkedPaths.Should().ContainSingle().Which.Should().Be(expectedDerivedPath);
64+
checkedPaths.Should().ContainSingle().Which.Should().Be(expectedPath);
65+
}
66+
67+
[Fact]
68+
public async Task ExtractTextFromImageAsync_WithNonTxtInputPath_ReadsFromDerivedTxtPath()
69+
{
70+
var actualTxtPath = Path.Combine(Path.GetTempPath(), $"createpdf-test-{Guid.NewGuid():N}.txt");
71+
var nonTxtInputPath = Path.ChangeExtension(actualTxtPath, ".log");
72+
await File.WriteAllTextAsync(actualTxtPath, "Hello\nWorld").ConfigureAwait(true);
73+
try
74+
{
75+
var processRunner = new FakeProcessRunner();
76+
var environment = new FakeSystemEnvironment { FileExistsImpl = File.Exists };
77+
var engine = new TesseractOcrProvider(environment, processRunner);
78+
79+
var result = await engine.ExtractTextFromImageAsync(
80+
"input.png",
81+
nonTxtInputPath,
82+
new OcrOptions { TesseractPath = "/bin/echo" }).ConfigureAwait(true);
83+
84+
result.Should().Be("Hello World");
85+
}
86+
finally
87+
{
88+
if (File.Exists(actualTxtPath)) File.Delete(actualTxtPath);
89+
}
6390
}
6491

6592
[Fact]

CreatePdf.NET/Internal/OcrService.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,12 @@ private async Task<string> OcrAsync(string pdfPath, string workDir, OcrOptions o
6262
.ConfigureAwait(false);
6363
}
6464

65-
internal static void TryDeleteDirectory(string path)
65+
internal static void TryDeleteDirectory(string path, Action<string, bool>? deleteImpl = null)
6666
{
6767
try
6868
{
69-
if (Directory.Exists(path))
70-
Directory.Delete(path, recursive: true);
69+
if (!Directory.Exists(path)) return;
70+
(deleteImpl ?? Directory.Delete)(path, true);
7171
}
7272
catch (Exception ex) when (ex is IOException or UnauthorizedAccessException)
7373
{

CreatePdf.NET/Internal/TesseractOcrProvider.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,7 @@ await _processRunner.RunAsync(
5050
public async Task<string> ExtractTextFromImageAsync(string pngPath, string txtPath, OcrOptions options,
5151
CancellationToken cancellationToken = default)
5252
{
53-
// Tesseract appends ".txt" itself, so strip the extension to give it the base path
54-
// and then re-attach .txt for the file we actually read back.
53+
// Tesseract appends ".txt" itself, so strip the extension to give it the base path.
5554
var outputBase = Path.Combine(
5655
Path.GetDirectoryName(txtPath) ?? string.Empty,
5756
Path.GetFileNameWithoutExtension(txtPath));

0 commit comments

Comments
 (0)