Skip to content

Commit

Permalink
scrubber: add scan-metadata and hook into integration tests (#5176)
Browse files Browse the repository at this point in the history
## Problem

- Scrubber's `tidy` command requires presence of a control plane
- Scrubber has no tests at all 

## Summary of changes

- Add re-usable async streams for reading metadata from a bucket
- Add a `scan-metadata` command that reads from those streams and calls
existing `checks.rs` code to validate metadata, then returns a summary
struct for the bucket. Command returns nonzero status if errors are
found.
- Add an `enable_scrub_on_exit()` function to NeonEnvBuilder so that
tests using remote storage can request to have the scrubber run after
they finish
- Enable remote storarge and scrub_on_exit in test_pageserver_restart
and test_pageserver_chaos

This is a "toe in the water" of the overall space of validating the
scrubber. Later, we should:
- Enable scrubbing at end of tests using remote storage by default
- Make the success condition stricter than "no errors": tests should
declare what tenants+timelines they expect to see in the bucket (or
sniff these from the functions tests use to create them) and we should
require that the scrubber reports on these particular tenants/timelines.

The `tidy` command is untouched in this PR, but it should be refactored
later to use similar async streaming interface instead of the current
batch-reading approach (the streams are faster with large buckets), and
to also be covered by some tests.


---------

Co-authored-by: Joonas Koivunen <joonas@neon.tech>
Co-authored-by: Alexander Bayandin <alexander@neon.tech>
Co-authored-by: Christian Schwarz <christian@neon.tech>
Co-authored-by: Conrad Ludgate <conrad@neon.tech>
  • Loading branch information
5 people committed Sep 6, 2023
1 parent 8e25d3e commit 7439331
Show file tree
Hide file tree
Showing 18 changed files with 815 additions and 239 deletions.
36 changes: 15 additions & 21 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions pageserver/ctl/src/layer_map_analyzer.rs
Expand Up @@ -3,6 +3,7 @@
//! Currently it only analyzes holes, which are regions within the layer range that the layer contains no updates for. In the future it might do more analysis (maybe key quantiles?) but it should never return sensitive data.

use anyhow::Result;
use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
use std::cmp::Ordering;
use std::collections::BinaryHeap;
use std::ops::Range;
Expand Down Expand Up @@ -142,12 +143,12 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
let mut total_delta_layers = 0usize;
let mut total_image_layers = 0usize;
let mut total_excess_layers = 0usize;
for tenant in fs::read_dir(storage_path.join("tenants"))? {
for tenant in fs::read_dir(storage_path.join(TENANTS_SEGMENT_NAME))? {
let tenant = tenant?;
if !tenant.file_type()?.is_dir() {
continue;
}
for timeline in fs::read_dir(tenant.path().join("timelines"))? {
for timeline in fs::read_dir(tenant.path().join(TIMELINES_SEGMENT_NAME))? {
let timeline = timeline?;
if !timeline.file_type()?.is_dir() {
continue;
Expand Down
9 changes: 5 additions & 4 deletions pageserver/ctl/src/layers.rs
Expand Up @@ -5,6 +5,7 @@ use clap::Subcommand;
use pageserver::tenant::block_io::BlockCursor;
use pageserver::tenant::disk_btree::DiskBtreeReader;
use pageserver::tenant::storage_layer::delta_layer::{BlobRef, Summary};
use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
use pageserver::{page_cache, virtual_file};
use pageserver::{
repository::{Key, KEY_SIZE},
Expand Down Expand Up @@ -80,13 +81,13 @@ async fn read_delta_file(path: impl AsRef<Path>) -> Result<()> {
pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
match cmd {
LayerCmd::List { path } => {
for tenant in fs::read_dir(path.join("tenants"))? {
for tenant in fs::read_dir(path.join(TENANTS_SEGMENT_NAME))? {
let tenant = tenant?;
if !tenant.file_type()?.is_dir() {
continue;
}
println!("tenant {}", tenant.file_name().to_string_lossy());
for timeline in fs::read_dir(tenant.path().join("timelines"))? {
for timeline in fs::read_dir(tenant.path().join(TIMELINES_SEGMENT_NAME))? {
let timeline = timeline?;
if !timeline.file_type()?.is_dir() {
continue;
Expand All @@ -101,9 +102,9 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
timeline,
} => {
let timeline_path = path
.join("tenants")
.join(TENANTS_SEGMENT_NAME)
.join(tenant)
.join("timelines")
.join(TIMELINES_SEGMENT_NAME)
.join(timeline);
let mut idx = 0;
for layer in fs::read_dir(timeline_path)? {
Expand Down
5 changes: 3 additions & 2 deletions pageserver/src/config.rs
Expand Up @@ -32,7 +32,8 @@ use crate::disk_usage_eviction_task::DiskUsageEvictionTaskConfig;
use crate::tenant::config::TenantConf;
use crate::tenant::config::TenantConfOpt;
use crate::tenant::{
TENANT_ATTACHING_MARKER_FILENAME, TENANT_DELETED_MARKER_FILE_NAME, TIMELINES_SEGMENT_NAME,
TENANTS_SEGMENT_NAME, TENANT_ATTACHING_MARKER_FILENAME, TENANT_DELETED_MARKER_FILE_NAME,
TIMELINES_SEGMENT_NAME,
};
use crate::{
IGNORED_TENANT_FILE_NAME, METADATA_FILE_NAME, TENANT_CONFIG_NAME, TIMELINE_DELETE_MARK_SUFFIX,
Expand Down Expand Up @@ -563,7 +564,7 @@ impl PageServerConf {
//

pub fn tenants_path(&self) -> PathBuf {
self.workdir.join("tenants")
self.workdir.join(TENANTS_SEGMENT_NAME)
}

pub fn tenant_path(&self, tenant_id: &TenantId) -> PathBuf {
Expand Down
3 changes: 3 additions & 0 deletions pageserver/src/tenant.rs
Expand Up @@ -141,6 +141,9 @@ pub use crate::tenant::metadata::save_metadata;
// re-export for use in walreceiver
pub use crate::tenant::timeline::WalReceiverInfo;

/// The "tenants" part of `tenants/<tenant>/timelines...`
pub const TENANTS_SEGMENT_NAME: &str = "tenants";

/// Parts of the `.neon/tenants/<tenant_id>/timelines/<timeline_id>` directory prefix.
pub const TIMELINES_SEGMENT_NAME: &str = "timelines";

Expand Down
3 changes: 2 additions & 1 deletion pageserver/src/virtual_file.rs
Expand Up @@ -11,6 +11,7 @@
//! src/backend/storage/file/fd.c
//!
use crate::metrics::{STORAGE_IO_SIZE, STORAGE_IO_TIME};
use crate::tenant::TENANTS_SEGMENT_NAME;
use once_cell::sync::OnceCell;
use std::fs::{self, File, OpenOptions};
use std::io::{Error, ErrorKind, Seek, SeekFrom, Write};
Expand Down Expand Up @@ -235,7 +236,7 @@ impl VirtualFile {
let parts = path_str.split('/').collect::<Vec<&str>>();
let tenant_id;
let timeline_id;
if parts.len() > 5 && parts[parts.len() - 5] == "tenants" {
if parts.len() > 5 && parts[parts.len() - 5] == TENANTS_SEGMENT_NAME {
tenant_id = parts[parts.len() - 4].to_string();
timeline_id = parts[parts.len() - 2].to_string();
} else {
Expand Down
10 changes: 6 additions & 4 deletions s3_scrubber/Cargo.toml
Expand Up @@ -22,6 +22,10 @@ serde_json.workspace = true
serde_with.workspace = true
workspace_hack.workspace = true
utils.workspace = true
async-stream.workspace = true
tokio-stream.workspace = true
futures-util.workspace = true
itertools.workspace = true

tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
chrono = { workspace = true, default-features = false, features = ["clock", "serde"] }
Expand All @@ -30,10 +34,8 @@ aws-config = { workspace = true, default-features = false, features = ["rustls",

pageserver = {path="../pageserver"}


tracing.workspace = true
tracing-subscriber.workspace = true
clap.workspace = true

atty = "0.2"
tracing-appender = "0.2"
tracing-appender = "0.2"
histogram = "0.7"

1 comment on commit 7439331

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1692 tests run: 1611 passed, 1 failed, 80 skipped (full report)


Failures on Posgres 14

  • test_heavy_write_workload[neon_off-10-5-5]: release
# Run all failed tests locally:
scripts/pytest -vv -n $(nproc) -k "test_heavy_write_workload[neon_off-10-5-5]"

Code coverage full report

  • functions: 53.3% (7461 of 13986 functions)
  • lines: 81.6% (44093 of 54050 lines)

The comment gets automatically updated with the latest test results
7439331 at 2023-09-06T11:50:21.117Z :recycle:

Please sign in to comment.