Skip to content

Commit

Permalink
Add parallelism and fix builds
Browse files Browse the repository at this point in the history
  • Loading branch information
jolynch committed Apr 17, 2024
1 parent dc852a2 commit 3617f7e
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 13 deletions.
13 changes: 7 additions & 6 deletions .github/workflows/python-build.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
name: Build

on: [push, pull_request]

on:
push:
branches: ["main"]
tags: ["v*"]
pull_request:
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]

runs-on: ubuntu-latest

python-version: ["3.8", "3.9", "3.10"]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
Expand Down
18 changes: 15 additions & 3 deletions service_capacity_modeling/capacity_planner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
import functools
import logging
import math
from hashlib import blake2b
from typing import Any
from typing import Callable
Expand Down Expand Up @@ -58,13 +59,13 @@ def simulate_interval(
signed=False,
)

def sim_uncertan(count: int) -> Sequence[Interval]:
def sim_uncertain(count: int) -> Sequence[Interval]:
return [
certain_float(s)
for s in dist_for_interval(interval, seed=seed).rvs(count)
]

return sim_uncertan
return sim_uncertain

else:

Expand Down Expand Up @@ -593,6 +594,13 @@ def generate_scenarios(
desires.data_shape.reserved_instance_app_mem_gib
+ desires.data_shape.reserved_instance_system_mem_gib
)
# Applications often require a minimum amount of true parallelism
per_instance_cores = int(
math.ceil(
desires.query_pattern.estimated_read_parallelism.mid
+ desires.query_pattern.estimated_write_parallelism.mid
)
)
allowed_platforms: Set[Platform] = set(model.allowed_platforms())
allowed_drives: Set[str] = set(drives or [])
for drive_name in model.allowed_cloud_drives():
Expand All @@ -613,7 +621,11 @@ def generate_scenarios(
):
continue

if per_instance_mem > instance.ram_gib:
# If the instance doesn't have enough vertical resources, pass on it
if (
per_instance_mem > instance.ram_gib
or per_instance_cores > instance.cpu
):
continue

for drive in hardware.drives.values():
Expand Down
6 changes: 3 additions & 3 deletions service_capacity_modeling/hardware/profiles/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
try:
import importlib.resources as pkg_resources
from importlib import resources
from importlib import import_module
except ImportError:
import importlib_resources as pkg_resources # type: ignore[no-redef]
import importlib_resources as resources # type: ignore[no-redef]

import_module = __import__ # type: ignore[assignment]

Expand All @@ -12,7 +12,7 @@
current_module = import_module(__name__)
common_profiles = {}

with pkg_resources.path( # pylint: disable=deprecated-method
with resources.path( # pylint: disable=deprecated-method
current_module, "profiles.txt"
) as shape_file:
shapes = Path(shape_file.parent, "shapes")
Expand Down
22 changes: 22 additions & 0 deletions service_capacity_modeling/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,28 @@ class QueryPattern(ExcludeUnsetModel):
estimated_mean_read_size_bytes: Interval = certain_int(AVG_ITEM_SIZE_BYTES)
estimated_mean_write_size_bytes: Interval = certain_int(AVG_ITEM_SIZE_BYTES // 2)

# For workloads which have bursts of async work, what is the
# expected parallelism of those workloads. Note the summation of
# read and write parallelism will lower bound the number of cores.
estimated_read_parallelism: Interval = Field(
certain_int(1),
title="Estimated per instance parallelism on read operations",
description=(
"The estimated amount of parallel work streams on a single "
"host. For example a read triggers async callbacks that need "
"to be executed truly in parallel (not just concurrent)."
),
)
estimated_write_parallelism: Interval = Field(
certain_int(1),
title="Estimated per instance parallelism on write operations",
description=(
"The estimated amount of parallel work streams on a single "
"host. For example a write triggers async fanouts that need "
"to be executed truly in parallel (not just concurrent)."
),
)

# The latencies at which oncall engineers get involved. We want
# to provision such that we don't involve oncall
# Note that these summary statistics will be used to create reasonable
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ commands =

[testenv:notebook]
envdir = .tox/notebook
basepython=python3.8
basepython=python3.10
passenv = *
deps =
matplotlib
Expand Down

0 comments on commit 3617f7e

Please sign in to comment.