Skip to content

Commit 3b0f544

Browse files
authored
Merge pull request #105 from ikelaiah/ikelaiah-rev08
Ikelaiah rev08
2 parents ca2bf27 + 001f712 commit 3b0f544

File tree

2 files changed

+17
-65
lines changed

2 files changed

+17
-65
lines changed

entries/ikelaiah/README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,13 @@ Iwan Kelaiah
123123
* Introduced a pointer to the weather record, `PStat = ^TStat`. This saves approx. 30 - 60 seconds.
124124

125125
* 1.7
126-
* Revision release - Sequential approach. 4-6 mins on my Inspiron 15 7510 laptop (a little improvement on speed).
126+
* Revision release - Sequential approach. 4-6 mins on my Inspiron 15 7510 laptop, around 4m50s (a little improvement on speed).
127127
* Converting Float as String to Int was a bit slow, so resorted to a lookup instead. This saves 30-45 seconds.
128-
* Re-arranged `if` statements in two places. This saves 10-15 seconds x 2 = ~ 30 seconds saving.
128+
* Re-arranged `if` statements in two places. This saves 10-15 seconds x 2 = ~ 30 seconds.
129+
130+
* 1.8
131+
* Revision release - Sequential approach. 3-5 mins on my Inspiron 15 7510 laptop, around 3m50s (a little improvement on speed).
132+
* Removed double lookup on dictionaries; removed `.Contains` and used `TryGetValue` instead. This saves approx 60 seconds.
129133

130134
## License
131135

entries/ikelaiah/src/weatherstation.pas

Lines changed: 11 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ interface
1111
, streamex
1212
, bufstream
1313
, lgHashMap
14-
, StrUtils
1514
{$IFDEF DEBUG}
1615
, Stopwatch
1716
{$ENDIF}
@@ -29,20 +28,18 @@ TStat = record
2928
sum: int64;
3029
cnt: int64;
3130
public
32-
constructor Create(const newMin: int64; const newMax: int64;
33-
const newSum: int64; const newCount: int64);
3431
function ToString: string;
3532
end;
3633
{Using pointer to TStat saves approx. 30-60 seconds for processing 1 billion rows}
3734
PStat = ^TStat;
3835

3936
type
4037
// Using this dictionary, now approx 4 mins faster than Generics.Collections.TDictionary
41-
TWeatherDictionaryLG = specialize TGHashMapQP<string, PStat>;
38+
TWeatherDictionaryLG = specialize TGHashMapLP<string, PStat>;
4239

4340
type
4441
// a type for storing valid lookup temperature
45-
TValidTemperatureDictionary = specialize TGHashMapQP<string, int64>;
42+
TValidTemperatureDictionary = specialize TGHashMapLP<string, int64>;
4643

4744
type
4845
// Create a class to encapsulate the temperature observations of each weather station.
@@ -106,15 +103,6 @@ function RemoveDots(const line: string): string;
106103
end;
107104
end;
108105

109-
constructor TStat.Create(const newMin: int64; const newMax: int64;
110-
const newSum: int64; const newCount: int64);
111-
begin
112-
self.min := newMin;
113-
self.max := newMax;
114-
self.sum := newSum;
115-
self.cnt := newCount;
116-
end;
117-
118106
function TStat.ToString: string;
119107
var
120108
minR, meanR, maxR: double; // Store the rounded values prior saving to TStringList.
@@ -250,11 +238,8 @@ procedure TWeatherStation.AddCityTemperatureLG(const cityName: string;
250238
stat: PStat;
251239
begin
252240
// If city name esxists, modify temp as needed
253-
if self.weatherDictionary.Contains(cityName) then
241+
if self.weatherDictionary.TryGetValue(cityName, stat) then
254242
begin
255-
// Get the temp record
256-
stat := self.weatherDictionary[cityName];
257-
258243
// Update min and max temps if needed
259244
// Re-arranged the if statement, to achieve minimal if checks.
260245
// This saves approx 15 seconds when processing 1 billion row.
@@ -312,63 +297,31 @@ procedure TWeatherStation.ParseStationAndTemp(const line: string);
312297
if delimiterPos > 0 then
313298
begin
314299
// Get the weather station name
315-
// Using Copy and POS - as suggested by Gemini AI.
300+
// Using Copy and POS instead of SplitString - as suggested by Gemini AI.
316301
// This part saves 3 mins faster when processing 1 billion rows.
317-
//parsedStation := Copy(line, 1, delimiterPos - 1);
302+
303+
// No need to create a string
304+
// parsedStation := Copy(line, 1, delimiterPos - 1);
318305
strFloatTemp := Copy(line, delimiterPos + 1, Length(line));
319306

320307
// Using a lookup value speeds up 30-45 seconds
321-
if self.lookupStrFloatToIntList.Contains(strFloatTemp) then
308+
if self.lookupStrFloatToIntList.TryGetValue(strFloatTemp, parsedTemp) then
322309
begin
323-
parsedTemp := self.lookupStrFloatToIntList[strFloatTemp];
324-
self.AddCityTemperatureLG(Copy(line, 1, delimiterPos - 1),
325-
parsedTemp);
310+
self.AddCityTemperatureLG(Copy(line, 1, delimiterPos - 1), parsedTemp);
326311
end;
327-
328312
end;
329-
330-
{// Get position of the delimiter
331-
delimiterPos := Pos(';', line);
332-
if delimiterPos > 0 then
333-
begin
334-
// Get the weather station name
335-
// Using Copy and POS - as suggested by Gemini AI.
336-
// This part saves 3 mins faster when processing 1 billion rows.
337-
parsedStation := Copy(line, 1, delimiterPos - 1);
338-
339-
// Get the temperature recorded, as string, remove '.' from string float
340-
// because we want to save it as int64.
341-
strFloatTemp := Copy(line, delimiterPos + 1, Length(line));
342-
343-
// strFloatTemp := StringReplace(strFloatTemp, '.', '', [rfReplaceAll]);
344-
// The above operation is a bit expensive.
345-
// Rewrote a simple function which prevents creation of new string
346-
// in each iteration. Saved approx 20-30 seconds for 1 billion row.
347-
// Remove dots turns a float into an int.
348-
strFloatTemp := RemoveDots(strFloatTemp);
349-
350-
// Add the weather station and the recorded temp (as int64) in the TDictionary
351-
Val(strFloatTemp,
352-
parsedTemp,
353-
valCode);
354-
if valCode <> 0 then Exit;
355-
356-
// Add a record in TWeatherDictionary
357-
self.AddCityTemperatureLG(parsedStation, parsedTemp);
358-
end;}
359313
end;
360314

361315
procedure TWeatherStation.ReadMeasurements;
362316
var
363317
fileStream: TFileStream;
364318
streamReader: TStreamReader;
365-
line: string;
366319
begin
367320

368321
// Open the file for reading
369-
fileStream := TFileStream.Create(self.fname, fmOpenRead or fmShareDenyNone);
322+
fileStream := TFileStream.Create(self.fname, fmOpenRead);
370323
try
371-
streamReader := TStreamReader.Create(fileStream, 65536 * 2, False);
324+
streamReader := TStreamReader.Create(fileStream, 65536 * 32, False);
372325
try
373326
// Read and parse chunks of data until EOF -------------------------------
374327
while not streamReader.EOF do
@@ -390,11 +343,6 @@ procedure TWeatherStation.ProcessMeasurements;
390343
begin
391344
self.CreateLookupTemp;
392345
self.ReadMeasurements;
393-
// self.ReadMeasurementsBuf;
394-
// self.ReadMeasurementsClassic;
395-
{This chunking method cuts ~ 30 - 40 seconds of processing time from ~6.45 to 6.00
396-
But the SHA256 at the end is incorrect}
397-
// self.ReadMeasurementsInChunks(self.fname);
398346
self.SortWeatherStationAndStats;
399347
self.PrintSortedWeatherStationAndStats;
400348
end;

0 commit comments

Comments
 (0)