Skip to content

Commit 63d069b

Browse files
authored
Merge pull request #6 from ANcpLua/chore/finish-system-env-abstraction
chore(ocr): finish ISystemEnvironment abstraction — read via the interface
2 parents edfb0dc + 8d25af2 commit 63d069b

5 files changed

Lines changed: 74 additions & 17 deletions

File tree

CreatePdf.NET.Tests/RuntimeSystemEnvironmentTests.cs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,25 @@ public void FileExists_DelegatesToFileSystem()
3131

3232
RuntimeSystemEnvironment.Instance.FileExists(temp).Should().BeFalse();
3333
}
34+
35+
[Fact]
36+
public async Task ReadAllTextAsync_DelegatesToFileSystem()
37+
{
38+
var tempDir = Directory.CreateTempSubdirectory("createpdf-runtime-read-");
39+
try
40+
{
41+
var path = Path.Combine(tempDir.FullName, "payload.txt");
42+
await File.WriteAllTextAsync(path, "hello world").ConfigureAwait(true);
43+
44+
var text = await RuntimeSystemEnvironment.Instance
45+
.ReadAllTextAsync(path, CancellationToken.None)
46+
.ConfigureAwait(true);
47+
48+
text.Should().Be("hello world");
49+
}
50+
finally
51+
{
52+
tempDir.Delete(recursive: true);
53+
}
54+
}
3455
}

CreatePdf.NET.Tests/TesseractOcrProviderTests.cs

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -89,26 +89,52 @@ await act.Should().ThrowAsync<FileNotFoundException>()
8989
[Fact]
9090
public async Task ExtractTextFromImageAsync_WithNonTxtInputPath_ReadsFromDerivedTxtPath()
9191
{
92-
var actualTxtPath = Path.Combine(Path.GetTempPath(), $"createpdf-test-{Guid.NewGuid():N}.txt");
93-
var nonTxtInputPath = Path.ChangeExtension(actualTxtPath, ".log");
94-
await File.WriteAllTextAsync(actualTxtPath, "Hello\nWorld").ConfigureAwait(true);
95-
try
92+
string? readPath = null;
93+
var processRunner = new FakeProcessRunner();
94+
var environment = new FakeSystemEnvironment
9695
{
97-
var processRunner = new FakeProcessRunner();
98-
var environment = new FakeSystemEnvironment { FileExistsImpl = File.Exists };
99-
var engine = new TesseractOcrProvider(environment, processRunner);
96+
FileExistsImpl = _ => true,
97+
ReadAllTextImpl = p =>
98+
{
99+
readPath = p;
100+
return "Hello\nWorld";
101+
}
102+
};
103+
var engine = new TesseractOcrProvider(environment, processRunner);
100104

101-
var result = await engine.ExtractTextFromImageAsync(
102-
"input.png",
103-
nonTxtInputPath,
104-
new OcrOptions { TesseractPath = "/bin/echo" }).ConfigureAwait(true);
105+
var inputPath = Path.Combine(Path.GetTempPath(), "out.log");
106+
var expectedReadPath = Path.Combine(Path.GetTempPath(), "out.txt");
105107

106-
result.Should().Be("Hello World");
107-
}
108-
finally
108+
var result = await engine.ExtractTextFromImageAsync(
109+
"input.png",
110+
inputPath,
111+
new OcrOptions { TesseractPath = "/bin/echo" }).ConfigureAwait(true);
112+
113+
readPath.Should().Be(expectedReadPath);
114+
result.Should().Be("Hello World");
115+
}
116+
117+
[Fact]
118+
public async Task ExtractTextFromImageAsync_ReadsOutputViaSystemEnvironment_AndNormalisesWhitespace()
119+
{
120+
string? readPath = null;
121+
var environment = new FakeSystemEnvironment
109122
{
110-
if (File.Exists(actualTxtPath)) File.Delete(actualTxtPath);
111-
}
123+
FileExistsImpl = _ => true,
124+
ReadAllTextImpl = p =>
125+
{
126+
readPath = p;
127+
return " Line 1\nLine 2\rLine 3 ";
128+
}
129+
};
130+
var engine = new TesseractOcrProvider(environment, new FakeProcessRunner());
131+
132+
var result = await engine.ExtractTextFromImageAsync(
133+
"input.png", "output.txt", new OcrOptions { TesseractPath = "/bin/echo" })
134+
.ConfigureAwait(true);
135+
136+
readPath.Should().Be("output.txt", "the provider must read through the abstraction, not File directly");
137+
result.Should().Be("Line 1 Line 2 Line 3");
112138
}
113139

114140
[Fact]
@@ -222,7 +248,12 @@ private sealed class FakeSystemEnvironment : ISystemEnvironment
222248

223249
public Func<string, bool>? FileExistsImpl { get; set; }
224250

251+
public Func<string, string>? ReadAllTextImpl { get; set; }
252+
225253
public bool FileExists(string path) => (FileExistsImpl ?? (_ => false)).Invoke(path);
254+
255+
public Task<string> ReadAllTextAsync(string path, CancellationToken cancellationToken) =>
256+
Task.FromResult((ReadAllTextImpl ?? (_ => string.Empty)).Invoke(path));
226257
}
227258

228259
private sealed class FakeProcessRunner : IProcessRunner

CreatePdf.NET/Internal/ISystemEnvironment.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,6 @@ internal interface ISystemEnvironment
99
bool Is64BitOperatingSystem { get; }
1010

1111
bool FileExists(string path);
12+
13+
Task<string> ReadAllTextAsync(string path, CancellationToken cancellationToken);
1214
}

CreatePdf.NET/Internal/RuntimeSystemEnvironment.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,7 @@ internal sealed class RuntimeSystemEnvironment : ISystemEnvironment
1111
public bool Is64BitOperatingSystem => Environment.Is64BitOperatingSystem;
1212

1313
public bool FileExists(string path) => File.Exists(path);
14+
15+
public Task<string> ReadAllTextAsync(string path, CancellationToken cancellationToken) =>
16+
File.ReadAllTextAsync(path, cancellationToken);
1417
}

CreatePdf.NET/Internal/TesseractOcrProvider.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ public async Task<string> ExtractTextFromImageAsync(string pngPath, string txtPa
7272
throw new FileNotFoundException(message, actualTxtPath);
7373
}
7474

75-
var text = await File.ReadAllTextAsync(actualTxtPath, cancellationToken).ConfigureAwait(false);
75+
var text = await _systemEnvironment.ReadAllTextAsync(actualTxtPath, cancellationToken).ConfigureAwait(false);
7676
return text.Trim().Replace("\n", " ").Replace("\r", " ");
7777
}
7878

0 commit comments

Comments
 (0)