Skip to content

Commit

Permalink
Check exit status of PBS Pro jobs
Browse files Browse the repository at this point in the history
  • Loading branch information
giordano committed Sep 11, 2023
1 parent b117f2e commit 10fedf5
Showing 1 changed file with 21 additions and 1 deletion.
22 changes: 21 additions & 1 deletion reframe/core/schedulers/pbs.py
Expand Up @@ -178,7 +178,9 @@ def _update_nodelist(self, job, nodespec):
job._nodelist = [x.split('/')[0] for x in nodespec.split('+')]
job._nodelist.sort()

def poll(self, *jobs):
# The second argument is to specialise some code paths to PBS Pro only, but
# not Torque.
def _poll(self, is_pbs_pro, *jobs):
def output_ready(job):
# We report a job as finished only when its stdout/stderr are
# written back to the working directory
Expand Down Expand Up @@ -209,6 +211,18 @@ def output_ready(job):
if job.cancelled or output_ready(job):
self.log(f'Assuming job {job.jobid} completed')
job._completed = True
if is_pbs_pro:
# With PBS Pro we can obtain the exit status of the job,
# in case it actually failed.
extended_info = osext.run_command(
f'qstat -xf {job.jobid}'
)
exit_status_match = re.search(
r'^ *Exit_status *= *(?P<exit_status>\d+)',
extended_info.stdout
)
if exit_status_match:
job._exitcode = jobid_match.group('exit_status')

return

Expand Down Expand Up @@ -277,7 +291,13 @@ def output_ready(job):
job._exception = JobError('maximum pending time exceeded',
job.jobid)

def poll(self, *job):
self._poll(True, *job)


@register_scheduler('torque')
class TorqueJobScheduler(PbsJobScheduler):
TASKS_OPT = '-l nodes={num_nodes}:ppn={num_cpus_per_node}'

def poll(self, *job):
self._poll(False, *job)

0 comments on commit 10fedf5

Please sign in to comment.