Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check for TaskState.Failed if deployment has failed #420

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
36 changes: 18 additions & 18 deletions levant/job_status_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func (l *levantDeployment) jobAllocationChecker(evalID *string) bool {
q := nomadHelper.GenerateBlockingQueryOptions(l.config.Template.Job.Namespace)

// Build our small internal checking struct.
levantTasks := make(map[TaskCoordinate]string)
levantTasks := make(map[TaskCoordinate]*nomad.TaskState)

for {

Expand All @@ -103,15 +103,15 @@ func (l *levantDeployment) jobAllocationChecker(evalID *string) bool {
// If we get here, set the wi to the latest Index.
q.WaitIndex = meta.LastIndex

complete, deadTasks := allocationStatusChecker(levantTasks, allocs)
complete, failedTasks := allocationStatusChecker(levantTasks, allocs)

// depending on how we finished up we report our status
// If we have no allocations left to track then we can exit and log
// information depending on the success.
if complete && deadTasks == 0 {
log.Info().Msg("levant/job_status_checker: all allocations in deployment of job are running")
if complete && failedTasks == 0 {
log.Info().Msg("levant/job_status_checker: all allocations in deployment of job are running or finished successfully")
return true
} else if complete && deadTasks > 0 {
} else if complete && failedTasks > 0 {
return false
}
}
Expand All @@ -121,31 +121,31 @@ func (l *levantDeployment) jobAllocationChecker(evalID *string) bool {
// job deployment, an update Levants internal tracking on task status based on
// this. This functionality exists as Nomad does not currently support
// deployments across all job types.
func allocationStatusChecker(levantTasks map[TaskCoordinate]string, allocs []*nomad.AllocationListStub) (bool, int) {
func allocationStatusChecker(levantTasks map[TaskCoordinate]*nomad.TaskState, allocs []*nomad.AllocationListStub) (bool, int) {

complete := true
deadTasks := 0
failedTasks := 0

for _, alloc := range allocs {
for taskName, task := range alloc.TaskStates {
for taskName, taskState := range alloc.TaskStates {
// if the state is one we haven't seen yet then we print a message
if levantTasks[TaskCoordinate{alloc.ID, taskName}] != task.State {
if levantTasks[TaskCoordinate{alloc.ID, taskName}] != taskState {
log.Info().Msgf("levant/job_status_checker: task %s in allocation %s now in %s state",
taskName, alloc.ID, task.State)
taskName, alloc.ID, taskState.State)
// then we record the new state
levantTasks[TaskCoordinate{alloc.ID, taskName}] = task.State
levantTasks[TaskCoordinate{alloc.ID, taskName}] = taskState
}

// then we have some case specific actions
switch levantTasks[TaskCoordinate{alloc.ID, taskName}] {
// if a task is still pendign we are not yet done
case "pending":
if levantTasks[TaskCoordinate{alloc.ID, taskName}].State == "pending" {
complete = false
// if the task is dead we record that
case "dead":
deadTasks++
}

// if a task failed we record that
if levantTasks[TaskCoordinate{alloc.ID, taskName}].Failed {
failedTasks++
}
}
}
return complete, deadTasks
return complete, failedTasks
}
43 changes: 28 additions & 15 deletions levant/job_status_checker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,74 +12,87 @@ import (
func TestJobStatusChecker_allocationStatusChecker(t *testing.T) {

// Build our task status maps
levantTasks1 := make(map[TaskCoordinate]string)
levantTasks2 := make(map[TaskCoordinate]string)
levantTasks3 := make(map[TaskCoordinate]string)
levantTasks1 := make(map[TaskCoordinate]*nomad.TaskState)
levantTasks2 := make(map[TaskCoordinate]*nomad.TaskState)
levantTasks3 := make(map[TaskCoordinate]*nomad.TaskState)
levantTasks4 := make(map[TaskCoordinate]*nomad.TaskState)

// Build a small AllocationListStubs with required information.
var allocs1 []*nomad.AllocationListStub
taskStates1 := make(map[string]*nomad.TaskState)
taskStates1["task1"] = &nomad.TaskState{State: "running"}
taskStates1["task1"] = &nomad.TaskState{State: "running", Failed: false}
allocs1 = append(allocs1, &nomad.AllocationListStub{
ID: "10246d87-ecd7-21ad-13b2-f0c564647d64",
TaskStates: taskStates1,
})

var allocs2 []*nomad.AllocationListStub
taskStates2 := make(map[string]*nomad.TaskState)
taskStates2["task1"] = &nomad.TaskState{State: "running"}
taskStates2["task2"] = &nomad.TaskState{State: "pending"}
taskStates2["task1"] = &nomad.TaskState{State: "running", Failed: false}
taskStates2["task2"] = &nomad.TaskState{State: "pending", Failed: false}
taskStates2["task3"] = &nomad.TaskState{State: "dead", Failed: false}
allocs2 = append(allocs2, &nomad.AllocationListStub{
ID: "20246d87-ecd7-21ad-13b2-f0c564647d64",
TaskStates: taskStates2,
})

var allocs3 []*nomad.AllocationListStub
taskStates3 := make(map[string]*nomad.TaskState)
taskStates3["task1"] = &nomad.TaskState{State: "dead"}
taskStates3["task1"] = &nomad.TaskState{State: "dead", Failed: false}
allocs3 = append(allocs3, &nomad.AllocationListStub{
ID: "30246d87-ecd7-21ad-13b2-f0c564647d64",
TaskStates: taskStates3,
})

var allocs4 []*nomad.AllocationListStub
taskStates4 := make(map[string]*nomad.TaskState)
taskStates4["task1"] = &nomad.TaskState{State: "dead", Failed: false}
taskStates4["task2"] = &nomad.TaskState{State: "dead", Failed: true}
allocs4 = append(allocs4, &nomad.AllocationListStub{
ID: "40246d87-ecd7-21ad-13b2-f0c564647d64",
TaskStates: taskStates4,
})

cases := []struct {
levantTasks map[TaskCoordinate]string
levantTasks map[TaskCoordinate]*nomad.TaskState
allocs []*nomad.AllocationListStub
dead int
expectedDead int
expectedFailed int
expectedComplete bool
}{
{
levantTasks1,
allocs1,
0,
0,
true,
},
{
levantTasks2,
allocs2,
0,
0,
false,
},
{
levantTasks3,
allocs3,
0,
true,
},
{
levantTasks4,
allocs4,
1,
true,
},
}

for _, tc := range cases {
complete, dead := allocationStatusChecker(tc.levantTasks, tc.allocs)
complete, failed := allocationStatusChecker(tc.levantTasks, tc.allocs)

if complete != tc.expectedComplete {
t.Fatalf("expected complete to be %v but got %v", tc.expectedComplete, complete)
}
if dead != tc.expectedDead {
t.Fatalf("expected %v dead task(s) but got %v", tc.expectedDead, dead)
if failed != tc.expectedFailed {
t.Fatalf("expected %v failed task(s) but got %v", tc.expectedFailed, failed)
}
}
}