Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions Client/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ def pgnout_settings(config, timestamp, cutechess_idx):
return '-pgnout %s' % (Cutechess.pgn_name(config, timestamp, cutechess_idx))

@staticmethod
def update_results(results, line):
def update_results(config, results, line, base_name, base_network):

# Given any game #, find the other in the pair
def game_to_pair(g):
Expand All @@ -514,21 +514,38 @@ def parse_finished_game(line):
tokens = line.split()
return int(tokens[2]), tokens[6]

def is_gpu_crashed(config, engine, network):
print('[WARNING] Checking if crash was caused by a GPU problem...')
try:
safe_run_benchmarks(config, 'base', engine, network)
return False
except utils.OpenBenchBadBenchException:
print('[ERROR] GPU crash detected!')
Comment on lines +517 to +523
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think crash handling performance matters. Client is either going to stop working or test is about to fail.

return True
Comment on lines +517 to +524
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want the server error report (and blacklisting) after benchmark fails. It was only about not reporting game results.


# Parse for errors resulting in adjudication
reason = line.split(':')[1]
crashed = 'disconnect' in reason or 'stalls' in reason
hw_crashed = crashed and is_gpu_crashed(config, base_name, base_network)
results['crashes' ] += 'disconnect' in reason or 'stalls' in reason
results['timelosses'] += 'on time' in reason
results['illegals' ] += 'illegal' in reason

Comment on lines 530 to 533
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I intentionally want to increment crash count for HW errors too. It makes sure we will notice to check Errors page.

# Parse Game # and result, and save
game, result = parse_finished_game(line)
results['games'][game] = result
results['games'][game] = result if not hw_crashed else 'hw_crash'

# Check to see if the Pair has finished
first, second = game_to_pair(game)
if first not in results['games'] or second not in results['games']:
return

# Don't report results when we detect a GPU issue.
if results['games'][first] == 'hw_crash' or results['games'][second] == 'hw_crash':
del results['games'][first]
del results['games'][second]
return

# Get the indices for the Pentanomial, and the two for Trinomial
p = pair_to_penta(results['games'][first], results['games'][second])
t1, t2 = pair_to_trinomial(results['games'][first], results['games'][second])
Expand Down Expand Up @@ -1024,7 +1041,7 @@ def complete_workload(config):
tasks = [] # Create each of the Cutechess workers
for x in range(cutechess_cnt):
cmd = build_cutechess_command(config, dev_name, base_name, scale_factor, timestamp, x)
tasks.append(executor.submit(run_and_parse_cutechess, config, cmd, x, results, abort_flag))
tasks.append(executor.submit(run_and_parse_cutechess, config, cmd, x, results, abort_flag, base_name, base_network))

# Process the Queue until we exit, finish, or are told to stop by the server
try:
Expand Down Expand Up @@ -1148,7 +1165,7 @@ def build_cutechess_command(config, dev_cmd, base_cmd, scale_factor, timestamp,

return ['cutechess-ob.exe', './cutechess-ob'][IS_LINUX] + flags

def run_and_parse_cutechess(config, command, cutechess_idx, results_queue, abort_flag):
def run_and_parse_cutechess(config, command, cutechess_idx, results_queue, abort_flag, base_name, base_network):

print('\n[#%d] Launching Cutechess...\n%s\n' % (cutechess_idx, command))
cutechess = Popen(command.split(), stdout=PIPE)
Expand Down Expand Up @@ -1178,7 +1195,7 @@ def run_and_parse_cutechess(config, command, cutechess_idx, results_queue, abort
print('[#%d] %s' % (cutechess_idx, line))

if 'Finished game' in line:
Cutechess.update_results(results, line)
Cutechess.update_results(config, results, line, base_name, base_network)

# Add to the results queue every time we have a game-pair finished
if any(results['pentanomial']):
Expand Down
Loading