Skip to content

Commit

Permalink
Check exit status of PBS Pro jobs
Browse files Browse the repository at this point in the history
  • Loading branch information
giordano committed Sep 12, 2023
1 parent fc54c42 commit ba30cd6
Showing 1 changed file with 22 additions and 1 deletion.
23 changes: 22 additions & 1 deletion reframe/core/schedulers/pbs.py
Expand Up @@ -178,7 +178,9 @@ def _update_nodelist(self, job, nodespec):
job._nodelist = [x.split('/')[0] for x in nodespec.split('+')]
job._nodelist.sort()

def poll(self, *jobs):
# The second argument is to specialise some code paths to PBS Pro only, but
# not Torque.
def _poll(self, is_pbs_pro, *jobs):
def output_ready(job):
# We report a job as finished only when its stdout/stderr are
# written back to the working directory
Expand Down Expand Up @@ -209,6 +211,19 @@ def output_ready(job):
if job.cancelled or output_ready(job):
self.log(f'Assuming job {job.jobid} completed')
job._completed = True
if is_pbs_pro:
# With PBS Pro we can obtain the exit status of the job,
# in case it actually failed.
extended_info = osext.run_command(
f'qstat -xf {job.jobid}'
)
exit_status_match = re.search(
r'^ *Exit_status *= *(?P<exit_status>\d+)',
extended_info.stdout,
flags=re.MULTILINE,
)
if exit_status_match:
job._exitcode = exit_status_match.group('exit_status')

return

Expand Down Expand Up @@ -277,7 +292,13 @@ def output_ready(job):
job._exception = JobError('maximum pending time exceeded',
job.jobid)

def poll(self, *job):
self._poll(True, *job)


@register_scheduler('torque')
class TorqueJobScheduler(PbsJobScheduler):
TASKS_OPT = '-l nodes={num_nodes}:ppn={num_cpus_per_node}'

def poll(self, *job):
self._poll(False, *job)

0 comments on commit ba30cd6

Please sign in to comment.