@@ -89,26 +89,52 @@ await act.Should().ThrowAsync<FileNotFoundException>()
8989 [ Fact ]
9090 public async Task ExtractTextFromImageAsync_WithNonTxtInputPath_ReadsFromDerivedTxtPath ( )
9191 {
92- var actualTxtPath = Path . Combine ( Path . GetTempPath ( ) , $ "createpdf-test-{ Guid . NewGuid ( ) : N} .txt") ;
93- var nonTxtInputPath = Path . ChangeExtension ( actualTxtPath , ".log" ) ;
94- await File . WriteAllTextAsync ( actualTxtPath , "Hello\n World" ) . ConfigureAwait ( true ) ;
95- try
92+ string ? readPath = null ;
93+ var processRunner = new FakeProcessRunner ( ) ;
94+ var environment = new FakeSystemEnvironment
9695 {
97- var processRunner = new FakeProcessRunner ( ) ;
98- var environment = new FakeSystemEnvironment { FileExistsImpl = File . Exists } ;
99- var engine = new TesseractOcrProvider ( environment , processRunner ) ;
96+ FileExistsImpl = _ => true ,
97+ ReadAllTextImpl = p =>
98+ {
99+ readPath = p ;
100+ return "Hello\n World" ;
101+ }
102+ } ;
103+ var engine = new TesseractOcrProvider ( environment , processRunner ) ;
100104
101- var result = await engine . ExtractTextFromImageAsync (
102- "input.png" ,
103- nonTxtInputPath ,
104- new OcrOptions { TesseractPath = "/bin/echo" } ) . ConfigureAwait ( true ) ;
105+ var inputPath = Path . Combine ( Path . GetTempPath ( ) , "out.log" ) ;
106+ var expectedReadPath = Path . Combine ( Path . GetTempPath ( ) , "out.txt" ) ;
105107
106- result . Should ( ) . Be ( "Hello World" ) ;
107- }
108- finally
108+ var result = await engine . ExtractTextFromImageAsync (
109+ "input.png" ,
110+ inputPath ,
111+ new OcrOptions { TesseractPath = "/bin/echo" } ) . ConfigureAwait ( true ) ;
112+
113+ readPath . Should ( ) . Be ( expectedReadPath ) ;
114+ result . Should ( ) . Be ( "Hello World" ) ;
115+ }
116+
117+ [ Fact ]
118+ public async Task ExtractTextFromImageAsync_ReadsOutputViaSystemEnvironment_AndNormalisesWhitespace ( )
119+ {
120+ string ? readPath = null ;
121+ var environment = new FakeSystemEnvironment
109122 {
110- if ( File . Exists ( actualTxtPath ) ) File . Delete ( actualTxtPath ) ;
111- }
123+ FileExistsImpl = _ => true ,
124+ ReadAllTextImpl = p =>
125+ {
126+ readPath = p ;
127+ return " Line 1\n Line 2\r Line 3 " ;
128+ }
129+ } ;
130+ var engine = new TesseractOcrProvider ( environment , new FakeProcessRunner ( ) ) ;
131+
132+ var result = await engine . ExtractTextFromImageAsync (
133+ "input.png" , "output.txt" , new OcrOptions { TesseractPath = "/bin/echo" } )
134+ . ConfigureAwait ( true ) ;
135+
136+ readPath . Should ( ) . Be ( "output.txt" , "the provider must read through the abstraction, not File directly" ) ;
137+ result . Should ( ) . Be ( "Line 1 Line 2 Line 3" ) ;
112138 }
113139
114140 [ Fact ]
@@ -222,7 +248,12 @@ private sealed class FakeSystemEnvironment : ISystemEnvironment
222248
223249 public Func < string , bool > ? FileExistsImpl { get ; set ; }
224250
251+ public Func < string , string > ? ReadAllTextImpl { get ; set ; }
252+
225253 public bool FileExists ( string path ) => ( FileExistsImpl ?? ( _ => false ) ) . Invoke ( path ) ;
254+
255+ public Task < string > ReadAllTextAsync ( string path , CancellationToken cancellationToken ) =>
256+ Task . FromResult ( ( ReadAllTextImpl ?? ( _ => string . Empty ) ) . Invoke ( path ) ) ;
226257 }
227258
228259 private sealed class FakeProcessRunner : IProcessRunner
0 commit comments