@@ -29,20 +29,18 @@ TStat = record
2929 sum: int64;
3030 cnt: int64;
3131 public
32- constructor Create(const newMin: int64; const newMax: int64;
33- const newSum: int64; const newCount: int64);
3432 function ToString : string;
3533 end ;
3634 { Using pointer to TStat saves approx. 30-60 seconds for processing 1 billion rows}
3735 PStat = ^TStat;
3836
3937type
4038 // Using this dictionary, now approx 4 mins faster than Generics.Collections.TDictionary
41- TWeatherDictionaryLG = specialize TGHashMapQP <string, PStat>;
39+ TWeatherDictionaryLG = specialize TGHashMapLP <string, PStat>;
4240
4341type
4442 // a type for storing valid lookup temperature
45- TValidTemperatureDictionary = specialize TGHashMapQP <string, int64>;
43+ TValidTemperatureDictionary = specialize TGHashMapLP <string, int64>;
4644
4745type
4846 // Create a class to encapsulate the temperature observations of each weather station.
@@ -106,15 +104,6 @@ function RemoveDots(const line: string): string;
106104 end ;
107105end ;
108106
109- constructor TStat.Create(const newMin: int64; const newMax: int64;
110- const newSum: int64; const newCount: int64);
111- begin
112- self.min := newMin;
113- self.max := newMax;
114- self.sum := newSum;
115- self.cnt := newCount;
116- end ;
117-
118107function TStat.ToString : string;
119108var
120109 minR, meanR, maxR: double; // Store the rounded values prior saving to TStringList.
@@ -314,7 +303,9 @@ procedure TWeatherStation.ParseStationAndTemp(const line: string);
314303 // Get the weather station name
315304 // Using Copy and POS - as suggested by Gemini AI.
316305 // This part saves 3 mins faster when processing 1 billion rows.
317- // parsedStation := Copy(line, 1, delimiterPos - 1);
306+
307+ // No need to create a string
308+ // parsedStation := Copy(line, 1, delimiterPos - 1);
318309 strFloatTemp := Copy(line, delimiterPos + 1 , Length(line));
319310
320311 // Using a lookup value speeds up 30-45 seconds
@@ -324,49 +315,19 @@ procedure TWeatherStation.ParseStationAndTemp(const line: string);
324315 self.AddCityTemperatureLG(Copy(line, 1 , delimiterPos - 1 ),
325316 parsedTemp);
326317 end ;
327-
328318 end ;
329-
330- { // Get position of the delimiter
331- delimiterPos := Pos(';', line);
332- if delimiterPos > 0 then
333- begin
334- // Get the weather station name
335- // Using Copy and POS - as suggested by Gemini AI.
336- // This part saves 3 mins faster when processing 1 billion rows.
337- parsedStation := Copy(line, 1, delimiterPos - 1);
338-
339- // Get the temperature recorded, as string, remove '.' from string float
340- // because we want to save it as int64.
341- strFloatTemp := Copy(line, delimiterPos + 1, Length(line));
342-
343- // strFloatTemp := StringReplace(strFloatTemp, '.', '', [rfReplaceAll]);
344- // The above operation is a bit expensive.
345- // Rewrote a simple function which prevents creation of new string
346- // in each iteration. Saved approx 20-30 seconds for 1 billion row.
347- // Remove dots turns a float into an int.
348- strFloatTemp := RemoveDots(strFloatTemp);
349-
350- // Add the weather station and the recorded temp (as int64) in the TDictionary
351- Val(strFloatTemp,
352- parsedTemp,
353- valCode);
354- if valCode <> 0 then Exit;
355-
356- // Add a record in TWeatherDictionary
357- self.AddCityTemperatureLG(parsedStation, parsedTemp);
358- end;}
359319end ;
360320
361321procedure TWeatherStation.ReadMeasurements ;
362322var
363323 fileStream: TFileStream;
324+ bufStream: TReadBufStream;
364325 streamReader: TStreamReader;
365326 line: string;
366327begin
367328
368329 // Open the file for reading
369- fileStream := TFileStream.Create(self.fname, fmOpenRead or fmShareDenyNone );
330+ fileStream := TFileStream.Create(self.fname, fmOpenRead);
370331 try
371332 streamReader := TStreamReader.Create(fileStream, 65536 * 2 , False);
372333 try
@@ -390,11 +351,6 @@ procedure TWeatherStation.ProcessMeasurements;
390351begin
391352 self.CreateLookupTemp;
392353 self.ReadMeasurements;
393- // self.ReadMeasurementsBuf;
394- // self.ReadMeasurementsClassic;
395- { This chunking method cuts ~ 30 - 40 seconds of processing time from ~6.45 to 6.00
396- But the SHA256 at the end is incorrect}
397- // self.ReadMeasurementsInChunks(self.fname);
398354 self.SortWeatherStationAndStats;
399355 self.PrintSortedWeatherStationAndStats;
400356end ;
0 commit comments