Skip to content

Commit

Permalink
Address PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
vkarak committed Sep 22, 2023
1 parent b778a76 commit 722bca5
Showing 1 changed file with 21 additions and 21 deletions.
42 changes: 21 additions & 21 deletions reframe/core/schedulers/pbs.py
Expand Up @@ -178,9 +178,21 @@ def _update_nodelist(self, job, nodespec):
job._nodelist = [x.split('/')[0] for x in nodespec.split('+')]
job._nodelist.sort()

# The second argument is to specialise some code paths to PBS Pro only, but
# not Torque.
def _poll(self, is_pbs_pro, *jobs):
def _query_exit_code(self, job):
'''Try to retrieve the exit code of a past job.'''

# With PBS Pro we can obtain the exit status of a past job
extended_info = osext.run_command(f'qstat -xf {job.jobid}')
exit_status_match = re.search(
r'^ *Exit_status *= *(?P<exit_status>\d+)', extended_info.stdout,
flags=re.MULTILINE,
)
if exit_status_match:
return int(exit_status_match.group('exit_status'))

return None

def poll(self, *jobs):
def output_ready(job):
# We report a job as finished only when its stdout/stderr are
# written back to the working directory
Expand Down Expand Up @@ -211,19 +223,7 @@ def output_ready(job):
if job.cancelled or output_ready(job):
self.log(f'Assuming job {job.jobid} completed')
job._completed = True
if is_pbs_pro:
# With PBS Pro we can obtain the exit status of the job,
# in case it actually failed.
extended_info = osext.run_command(
f'qstat -xf {job.jobid}'
)
exit_status_match = re.search(
r'^ *Exit_status *= *(?P<exit_status>\d+)',
extended_info.stdout,
flags=re.MULTILINE,
)
if exit_status_match:
job._exitcode = int(exit_status_match.group('exit_status'))
job._exitcode = self._query_exit_code(job)

return

Expand Down Expand Up @@ -292,13 +292,13 @@ def output_ready(job):
job._exception = JobError('maximum pending time exceeded',
job.jobid)

def poll(self, *job):
self._poll(True, *job)


@register_scheduler('torque')
class TorqueJobScheduler(PbsJobScheduler):
TASKS_OPT = '-l nodes={num_nodes}:ppn={num_cpus_per_node}'

def poll(self, *job):
self._poll(False, *job)
def _query_exit_code(self, job):
'''Try to retrieve the exit code of a past job.'''

# Torque does not provide a way to retrieve the history of jobs
return None

0 comments on commit 722bca5

Please sign in to comment.