Skip to content

Commit

Permalink
Changed disk provisioning threshold for adjusting cassandra
Browse files Browse the repository at this point in the history
  • Loading branch information
Ram Srivatsa Kannan committed Mar 21, 2024
1 parent 6dcf066 commit 38ace23
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 5 deletions.
1 change: 1 addition & 0 deletions service_capacity_modeling/capacity_planner.py
Expand Up @@ -565,6 +565,7 @@ def _plan_certain(
)
if plan is not None:
plans.append(plan)
# print(plan

# lowest cost first
plans.sort(key=lambda p: (p.rank, p.candidate_clusters.total_annual_cost))
Expand Down
7 changes: 6 additions & 1 deletion service_capacity_modeling/models/org/netflix/cassandra.py
Expand Up @@ -314,7 +314,12 @@ def _estimate_cassandra_cluster_zonal(
),
# C* requires ephemeral disks to be 25% full because compaction
# and replacement time if we're underscaled.
required_disk_space=lambda x: x * 4,
required_disk_space=lambda x: x * 2
if instance.drive is None
or "local" in instance.drive.drive_type.name
or required_cluster_size is None
or (x / required_cluster_size) < 1000
else x + (1000 * required_cluster_size),
# C* clusters cannot recover data from neighbors quickly so we
# want to avoid clusters with more than 1 TiB of local state
max_local_disk_gib=max_local_disk_gib,
Expand Down
50 changes: 49 additions & 1 deletion tests/netflix/test_cassandra.py
Expand Up @@ -199,7 +199,7 @@ def test_high_write_throughput():
assert (
300_000
> high_writes_result.count * high_writes_result.attached_drives[0].size_gib
>= 100_000
>= 70_000
)

cluster_cost = cap_plan.candidate_clusters.annual_costs["cassandra.zonal-clusters"]
Expand Down Expand Up @@ -353,3 +353,51 @@ def test_plan_certain():
lr_clusters = cap_plan[0].candidate_clusters.zonal[0]
assert lr_clusters.count == 8
assert lr_clusters.instance.cpu == 16


def test_plan_certain_disk_space():
"""
Disk space is adjusted based on EBS and ephemeral. Takes lesser vCPUs. Correct
adjustment of disk space attributes to lower node density and hence provides
lower vCPUs.
"""
cluster_capacity = CurrentClusterCapacity(
cluster_instance_name="i4i.8xlarge",
cluster_instance_count=Interval(low=32, mid=32, high=32, confidence=1),
cpu_utilization=Interval(low=2.96, mid=4.36, high=7.47, confidence=1),
)

worn_desire = CapacityDesires(
service_tier=1,
current_clusters=CurrentClusters(zonal=[cluster_capacity]),
query_pattern=QueryPattern(
access_pattern=AccessPattern(AccessPattern.latency),
estimated_read_per_second=Interval(
low=583, mid=933, high=2790, confidence=0.98
),
estimated_write_per_second=Interval(
low=11310, mid=16491, high=20501, confidence=0.98
),
),
data_shape=DataShape(
estimated_state_size_gib=Interval(
low=71734.83, mid=72232.16, high=72385.33, confidence=0.98
),
estimated_compression_ratio=Interval(low=1, mid=1, high=1, confidence=1),
),
)
cap_plan = planner.plan_certain(
model_name="org.netflix.cassandra",
region="us-east-1",
num_results=3,
num_regions=4,
desires=worn_desire,
extra_model_arguments={
"current_asg_size": 3,
"max_disk_used_gib": 2520.8339739371695,
"max_local_disk_gib": 16384,
"required_cluster_size": 32,
},
)
lr_clusters = cap_plan[0].candidate_clusters.zonal[0]
assert lr_clusters.instance.cpu < 32
3 changes: 2 additions & 1 deletion tests/netflix/test_cassandra_uncertain.py
Expand Up @@ -101,7 +101,7 @@ def test_increasing_qps_simple():
if lr.instance.drive is None:
assert sum(dr.size_gib for dr in lr.attached_drives) >= 100
else:
assert lr.instance.drive.size_gib >= 100
assert lr.instance.drive.size_gib >= 60

result.append(
(lr_family, lr_cpu, lr_cost, cap_plan.least_regret[0].requirements.zonal[0])
Expand Down Expand Up @@ -147,6 +147,7 @@ def test_worn_dataset():
extra_model_arguments={
"max_regional_size": 200,
"copies_per_region": 2,
"require_attached_disks": True,
},
)

Expand Down
2 changes: 1 addition & 1 deletion tests/netflix/test_key_value.py
Expand Up @@ -53,7 +53,7 @@ def test_kv_increasing_qps_simple():
if zlr.instance.drive is None:
assert sum(dr.size_gib for dr in zlr.attached_drives) >= 100
else:
assert zlr.instance.drive.size_gib >= 100
assert zlr.instance.drive.size_gib >= 60

zonal_result.append(
(
Expand Down
2 changes: 1 addition & 1 deletion tests/netflix/test_time_series.py
Expand Up @@ -108,7 +108,7 @@ def test_timeseries_increasing_qps_simple():
if zlr.instance.drive is None:
assert sum(dr.size_gib for dr in zlr.attached_drives) >= 100
else:
assert zlr.instance.drive.size_gib >= 100
assert zlr.instance.drive.size_gib >= 60

zonal_result.append(
(
Expand Down

0 comments on commit 38ace23

Please sign in to comment.