@@ -11,7 +11,6 @@ interface
1111 , streamex
1212 , bufstream
1313 , lgHashMap
14- , StrUtils
1514 { $IFDEF DEBUG}
1615 , Stopwatch
1716 { $ENDIF}
@@ -29,20 +28,18 @@ TStat = record
2928 sum: int64;
3029 cnt: int64;
3130 public
32- constructor Create(const newMin: int64; const newMax: int64;
33- const newSum: int64; const newCount: int64);
3431 function ToString : string;
3532 end ;
3633 { Using pointer to TStat saves approx. 30-60 seconds for processing 1 billion rows}
3734 PStat = ^TStat;
3835
3936type
4037 // Using this dictionary, now approx 4 mins faster than Generics.Collections.TDictionary
41- TWeatherDictionaryLG = specialize TGHashMapQP <string, PStat>;
38+ TWeatherDictionaryLG = specialize TGHashMapLP <string, PStat>;
4239
4340type
4441 // a type for storing valid lookup temperature
45- TValidTemperatureDictionary = specialize TGHashMapQP <string, int64>;
42+ TValidTemperatureDictionary = specialize TGHashMapLP <string, int64>;
4643
4744type
4845 // Create a class to encapsulate the temperature observations of each weather station.
@@ -106,15 +103,6 @@ function RemoveDots(const line: string): string;
106103 end ;
107104end ;
108105
109- constructor TStat.Create(const newMin: int64; const newMax: int64;
110- const newSum: int64; const newCount: int64);
111- begin
112- self.min := newMin;
113- self.max := newMax;
114- self.sum := newSum;
115- self.cnt := newCount;
116- end ;
117-
118106function TStat.ToString : string;
119107var
120108 minR, meanR, maxR: double; // Store the rounded values prior saving to TStringList.
@@ -250,11 +238,8 @@ procedure TWeatherStation.AddCityTemperatureLG(const cityName: string;
250238 stat: PStat;
251239begin
252240 // If city name esxists, modify temp as needed
253- if self.weatherDictionary.Contains (cityName) then
241+ if self.weatherDictionary.TryGetValue (cityName, stat ) then
254242 begin
255- // Get the temp record
256- stat := self.weatherDictionary[cityName];
257-
258243 // Update min and max temps if needed
259244 // Re-arranged the if statement, to achieve minimal if checks.
260245 // This saves approx 15 seconds when processing 1 billion row.
@@ -312,63 +297,31 @@ procedure TWeatherStation.ParseStationAndTemp(const line: string);
312297 if delimiterPos > 0 then
313298 begin
314299 // Get the weather station name
315- // Using Copy and POS - as suggested by Gemini AI.
300+ // Using Copy and POS instead of SplitString - as suggested by Gemini AI.
316301 // This part saves 3 mins faster when processing 1 billion rows.
317- // parsedStation := Copy(line, 1, delimiterPos - 1);
302+
303+ // No need to create a string
304+ // parsedStation := Copy(line, 1, delimiterPos - 1);
318305 strFloatTemp := Copy(line, delimiterPos + 1 , Length(line));
319306
320307 // Using a lookup value speeds up 30-45 seconds
321- if self.lookupStrFloatToIntList.Contains (strFloatTemp) then
308+ if self.lookupStrFloatToIntList.TryGetValue (strFloatTemp, parsedTemp ) then
322309 begin
323- parsedTemp := self.lookupStrFloatToIntList[strFloatTemp];
324- self.AddCityTemperatureLG(Copy(line, 1 , delimiterPos - 1 ),
325- parsedTemp);
310+ self.AddCityTemperatureLG(Copy(line, 1 , delimiterPos - 1 ), parsedTemp);
326311 end ;
327-
328312 end ;
329-
330- { // Get position of the delimiter
331- delimiterPos := Pos(';', line);
332- if delimiterPos > 0 then
333- begin
334- // Get the weather station name
335- // Using Copy and POS - as suggested by Gemini AI.
336- // This part saves 3 mins faster when processing 1 billion rows.
337- parsedStation := Copy(line, 1, delimiterPos - 1);
338-
339- // Get the temperature recorded, as string, remove '.' from string float
340- // because we want to save it as int64.
341- strFloatTemp := Copy(line, delimiterPos + 1, Length(line));
342-
343- // strFloatTemp := StringReplace(strFloatTemp, '.', '', [rfReplaceAll]);
344- // The above operation is a bit expensive.
345- // Rewrote a simple function which prevents creation of new string
346- // in each iteration. Saved approx 20-30 seconds for 1 billion row.
347- // Remove dots turns a float into an int.
348- strFloatTemp := RemoveDots(strFloatTemp);
349-
350- // Add the weather station and the recorded temp (as int64) in the TDictionary
351- Val(strFloatTemp,
352- parsedTemp,
353- valCode);
354- if valCode <> 0 then Exit;
355-
356- // Add a record in TWeatherDictionary
357- self.AddCityTemperatureLG(parsedStation, parsedTemp);
358- end;}
359313end ;
360314
361315procedure TWeatherStation.ReadMeasurements ;
362316var
363317 fileStream: TFileStream;
364318 streamReader: TStreamReader;
365- line: string;
366319begin
367320
368321 // Open the file for reading
369- fileStream := TFileStream.Create(self.fname, fmOpenRead or fmShareDenyNone );
322+ fileStream := TFileStream.Create(self.fname, fmOpenRead);
370323 try
371- streamReader := TStreamReader.Create(fileStream, 65536 * 2 , False);
324+ streamReader := TStreamReader.Create(fileStream, 65536 * 32 , False);
372325 try
373326 // Read and parse chunks of data until EOF -------------------------------
374327 while not streamReader.EOF do
@@ -390,11 +343,6 @@ procedure TWeatherStation.ProcessMeasurements;
390343begin
391344 self.CreateLookupTemp;
392345 self.ReadMeasurements;
393- // self.ReadMeasurementsBuf;
394- // self.ReadMeasurementsClassic;
395- { This chunking method cuts ~ 30 - 40 seconds of processing time from ~6.45 to 6.00
396- But the SHA256 at the end is incorrect}
397- // self.ReadMeasurementsInChunks(self.fname);
398346 self.SortWeatherStationAndStats;
399347 self.PrintSortedWeatherStationAndStats;
400348end ;
0 commit comments