Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sonobuoy: re-set the assume role credentials if it expires #892

Merged
merged 1 commit into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 9 additions & 1 deletion bottlerocket/agents/src/bin/k8s-workload-agent/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ where
&self.config,
&self.results_dir,
info_client,
&self.aws_secret_name.as_ref(),
)
.await
}
Expand All @@ -115,7 +116,14 @@ where
.await?;
info!("Stored kubeconfig in {}", TEST_CLUSTER_KUBECONFIG_PATH);

rerun_failed_workload(TEST_CLUSTER_KUBECONFIG_PATH, &self.results_dir, info_client).await
rerun_failed_workload(
TEST_CLUSTER_KUBECONFIG_PATH,
&self.results_dir,
info_client,
&self.config,
&self.aws_secret_name.as_ref(),
)
.await
}

async fn terminate(&mut self) -> Result<(), Self::E> {
Expand Down
6 changes: 4 additions & 2 deletions bottlerocket/agents/src/bin/sonobuoy-test-agent/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ use test_agent::{
};
use testsys_model::{SecretName, TestResults};

// Default Sonobuoy agents assume role duration to 4 hours.
const DEFAULT_ASSUME_ROLE_SESSION_DURATION: i32 = 14400;
// Default Sonobuoy agents assume role duration to 1 hour.
const DEFAULT_ASSUME_ROLE_SESSION_DURATION: i32 = 3600;

struct SonobuoyTestRunner {
config: SonobuoyConfig,
Expand Down Expand Up @@ -111,6 +111,7 @@ where
&self.config,
&self.results_dir,
info_client,
&self.aws_secret_name.as_ref(),
)
.await
}
Expand Down Expand Up @@ -153,6 +154,7 @@ where
&self.config,
&self.results_dir,
info_client,
&self.aws_secret_name.as_ref(),
)
.await
}
Expand Down
36 changes: 33 additions & 3 deletions bottlerocket/agents/src/sonobuoy.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::error;
use agent_utils::aws::aws_config;
use bottlerocket_types::agent_config::{SonobuoyConfig, SONOBUOY_RESULTS_FILENAME};
use log::{error, info, trace};
use serde_json::Value;
Expand All @@ -9,7 +10,7 @@ use std::path::Path;
use std::process::Command;
use std::time::Duration;
use test_agent::InfoClient;
use testsys_model::{Outcome, TestResults};
use testsys_model::{Outcome, SecretName, TestResults};

/// Timeout for sonobuoy status to become available (seconds)
const SONOBUOY_STATUS_TIMEOUT: u64 = 900;
Expand All @@ -22,6 +23,7 @@ pub async fn run_sonobuoy<I>(
sonobuoy_config: &SonobuoyConfig,
results_dir: &Path,
info_client: &I,
aws_secret_name: &Option<&SecretName>,
) -> Result<TestResults, error::Error>
where
I: InfoClient,
Expand Down Expand Up @@ -102,7 +104,14 @@ where
.await
.context(error::SonobuoyTimeoutSnafu)??;
info!("Sonobuoy status is available, waiting for test to complete");
wait_for_sonobuoy_results(kubeconfig_path, None, info_client).await?;
wait_for_sonobuoy_results(
kubeconfig_path,
None,
info_client,
&sonobuoy_config.assume_role,
aws_secret_name,
)
.await?;
info!("Sonobuoy testing has completed, checking results");

results_sonobuoy(kubeconfig_path, results_dir)
Expand All @@ -115,6 +124,7 @@ pub async fn rerun_failed_sonobuoy<I>(
sonobuoy_config: &SonobuoyConfig,
results_dir: &Path,
info_client: &I,
aws_secret_name: &Option<&SecretName>,
) -> Result<TestResults, error::Error>
where
I: InfoClient,
Expand Down Expand Up @@ -175,7 +185,14 @@ where
.await
.context(error::SonobuoyTimeoutSnafu)??;
info!("Sonobuoy status is available, waiting for test to complete");
wait_for_sonobuoy_results(kubeconfig_path, None, info_client).await?;
wait_for_sonobuoy_results(
kubeconfig_path,
None,
info_client,
&sonobuoy_config.assume_role,
aws_secret_name,
)
.await?;
info!("Sonobuoy testing has completed, checking results");

results_sonobuoy(kubeconfig_path, results_dir)
Expand Down Expand Up @@ -220,6 +237,8 @@ pub async fn wait_for_sonobuoy_results<I>(
kubeconfig_path: &str,
namespace: Option<&str>,
info_client: &I,
assume_role: &Option<String>,
aws_secret_name: &Option<&SecretName>,
) -> Result<(), error::Error>
where
I: InfoClient,
Expand All @@ -230,10 +249,20 @@ where
..Default::default()
};
let mut retries = 0;
// Max duration for assume role credential is 3600 seconds, and we refresh every 50 loops * 30 seconds (loop sleep time) before it expires.
let mut credential_refresh_countdown = 50;

loop {
if retries > 5 {
return Err(error::Error::SonobuoyStatus { retries });
}

// Refresh the credentials if the countdown is 0
if credential_refresh_countdown == 0 {
aws_config(aws_secret_name, assume_role, &None, &None, &None, true).await?;
credential_refresh_countdown = 50;
}

let kubeconfig_arg = vec!["--kubeconfig", kubeconfig_path];
let namespace_arg = namespace
.map(|namespace| vec!["--namespace", namespace])
Expand Down Expand Up @@ -300,6 +329,7 @@ where
.for_each(|e| error!("Unable to send test update: {}", e));

tokio::time::sleep(Duration::from_secs(30)).await;
credential_refresh_countdown -= 1;
}
}

Expand Down
23 changes: 20 additions & 3 deletions bottlerocket/agents/src/workload.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::Duration;
use test_agent::InfoClient;
use testsys_model::TestResults;
use testsys_model::{SecretName, TestResults};

/// Timeout for sonobuoy status to become available (seconds)
const SONOBUOY_STATUS_TIMEOUT: u64 = 900;
Expand All @@ -24,6 +24,7 @@ pub async fn run_workload<I>(
workload_config: &WorkloadConfig,
results_dir: &Path,
info_client: &I,
aws_secret_name: &Option<&SecretName>,
) -> Result<TestResults, error::Error>
where
I: InfoClient,
Expand Down Expand Up @@ -92,7 +93,14 @@ where
.await
.context(error::SonobuoyTimeoutSnafu)??;
info!("Workload status is available, waiting for test to complete");
wait_for_sonobuoy_results(kubeconfig_path, Some("testsys-workload"), info_client).await?;
wait_for_sonobuoy_results(
kubeconfig_path,
Some("testsys-workload"),
info_client,
&workload_config.assume_role,
aws_secret_name,
)
.await?;
info!("Workload testing has completed, checking results");

results_workload(kubeconfig_path, results_dir)
Expand All @@ -103,6 +111,8 @@ pub async fn rerun_failed_workload<I>(
kubeconfig_path: &str,
results_dir: &Path,
info_client: &I,
workload_config: &WorkloadConfig,
aws_secret_name: &Option<&SecretName>,
) -> Result<TestResults, error::Error>
where
I: InfoClient,
Expand Down Expand Up @@ -138,7 +148,14 @@ where
.await
.context(error::SonobuoyTimeoutSnafu)??;
info!("Workload status is available, waiting for test to complete");
wait_for_sonobuoy_results(kubeconfig_path, Some("testsys-workload"), info_client).await?;
wait_for_sonobuoy_results(
kubeconfig_path,
Some("testsys-workload"),
info_client,
&workload_config.assume_role,
aws_secret_name,
)
.await?;
info!("Workload testing has completed, checking results");

results_workload(kubeconfig_path, results_dir)
Expand Down