Skip to content

Commit

Permalink
[ML] autoscaling context current capacity could be null, this commit …
Browse files Browse the repository at this point in the history
…handles that (#74822)

context commit may be null. This should only really happen early in a cluster's life cycle or if a node was just recently brought online. Mainly because the current node sizes have not been discovered yet and cached.

This change should really have been part of #74691
  • Loading branch information
benwtrent committed Jul 1, 2021
1 parent d70b090 commit c8c4200
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 6 deletions.
Expand Up @@ -46,7 +46,7 @@ public class AutoscalingIT extends MlNativeAutodetectIntegTestCase {

// This test assumes that xpack.ml.max_machine_memory_percent is 30
// and that xpack.ml.use_auto_machine_memory_percent is false
public void testMLAutoscalingCapacity() {
public void testMLAutoscalingCapacity() throws Exception {
SortedMap<String, Settings> deciders = new TreeMap<>();
deciders.put(MlAutoscalingDeciderService.NAME,
Settings.builder().put(MlAutoscalingDeciderService.DOWN_SCALE_DELAY.getKey(), TimeValue.ZERO).build());
Expand All @@ -57,14 +57,15 @@ public void testMLAutoscalingCapacity() {
);
assertAcked(client().execute(PutAutoscalingPolicyAction.INSTANCE, request).actionGet());

assertMlCapacity(
assertBusy(() -> assertMlCapacity(
client().execute(
GetAutoscalingCapacityAction.INSTANCE,
new GetAutoscalingCapacityAction.Request()
).actionGet(),
"Requesting scale down as tier and/or node size could be smaller",
0L,
0L);
0L)
);

putJob("job1", 100);
putJob("job2", 200);
Expand Down
Expand Up @@ -534,9 +534,13 @@ public AutoscalingDeciderResult scale(Settings configuration, AutoscalingDecider
// Due to weird rounding errors, it may be that a scale down result COULD cause a scale up
// Ensuring the scaleDown here forces the scale down result to always be lower than the current capacity.
// This is safe as we know that ALL jobs are assigned at the current capacity
.map(result -> new AutoscalingDeciderResult(
ensureScaleDown(result.requiredCapacity(), context.currentCapacity()), result.reason()
));
.map(result -> {
AutoscalingCapacity capacity = ensureScaleDown(result.requiredCapacity(), context.currentCapacity());
if (capacity == null) {
return null;
}
return new AutoscalingDeciderResult(capacity, result.reason());
});

if (maybeScaleDown.isPresent()) {
final AutoscalingDeciderResult scaleDownDecisionResult = maybeScaleDown.get();
Expand Down Expand Up @@ -599,6 +603,9 @@ public AutoscalingDeciderResult scale(Settings configuration, AutoscalingDecider
}

static AutoscalingCapacity ensureScaleDown(AutoscalingCapacity scaleDownResult, AutoscalingCapacity currentCapacity) {
if (scaleDownResult == null || currentCapacity == null) {
return null;
}
AutoscalingCapacity newCapacity = new AutoscalingCapacity(
new AutoscalingCapacity.AutoscalingResources(
currentCapacity.total().storage(),
Expand Down

0 comments on commit c8c4200

Please sign in to comment.