Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scrubber: add scan-metadata and hook into integration tests #5176

Merged
merged 25 commits into from Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
8951c57
s3_scrubber: remote atty dependency
jcsp Sep 1, 2023
87421aa
scrubber: add scan-metadata command
jcsp Aug 18, 2023
f544cdc
tests: enable scrubbing at end of test
jcsp Sep 1, 2023
0f0cd9c
scrubber: make SSO_ACCOUNT_ID optional
jcsp Sep 1, 2023
5cdecb5
scrubber: respect AWS_ENDPOINT_URL
jcsp Sep 1, 2023
f9836ad
tests: enable remote storage & scrub in pageserver restart & chaos tests
jcsp Sep 1, 2023
bc68568
Update s3_scrubber/src/scan_metadata.rs
jcsp Sep 1, 2023
f51d888
Wait for custom extensions build before deploy (#5170)
bayandin Sep 1, 2023
06053dd
remote_timeline_client: tests: run upload ops on the tokio::test runt…
problame Sep 1, 2023
94ad504
rfc: Crash-Consistent Layer Map Updates By Leveraging index_part.json…
problame Sep 1, 2023
6455f0d
FileBlockReader<File> is never used (#5181)
problame Sep 1, 2023
9b91c07
pageserver: run all Rust tests with remote storage enabled (#5164)
problame Sep 1, 2023
3112aa9
proxy: error typo (#5187)
conradludgate Sep 1, 2023
b7f3ca6
tests: get test name automatically for remote storage (#5184)
jcsp Sep 1, 2023
80c942f
drop vestigial test_name arguments
jcsp Sep 1, 2023
826aafb
pageserver: define "/tenants/" in a constant
jcsp Sep 4, 2023
da440bf
Make all referencves to "tenants" and "timelines" use constants
jcsp Sep 4, 2023
40a809f
test: stash test_output_dir on NeonEnvBuilder
jcsp Sep 4, 2023
25ceb87
tests: improve subprocess_capture
jcsp Sep 4, 2023
177645c
s3_scrubber: remove main.rs span
jcsp Sep 4, 2023
f8622e5
Merge remote-tracking branch 'upstream/main' into jcsp/scrubber-scan-…
jcsp Sep 4, 2023
21c489d
clippy
jcsp Sep 4, 2023
36c57b5
Fix a doc string with angle brackes outside ``
jcsp Sep 4, 2023
a6fef13
typos
jcsp Sep 6, 2023
8ac049b
Tolerate non-unicode postgres log output
jcsp Sep 6, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
36 changes: 15 additions & 21 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions pageserver/ctl/src/layer_map_analyzer.rs
Expand Up @@ -3,6 +3,7 @@
//! Currently it only analyzes holes, which are regions within the layer range that the layer contains no updates for. In the future it might do more analysis (maybe key quantiles?) but it should never return sensitive data.

use anyhow::Result;
use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
use std::cmp::Ordering;
use std::collections::BinaryHeap;
use std::ops::Range;
Expand Down Expand Up @@ -142,12 +143,12 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
let mut total_delta_layers = 0usize;
let mut total_image_layers = 0usize;
let mut total_excess_layers = 0usize;
for tenant in fs::read_dir(storage_path.join("tenants"))? {
for tenant in fs::read_dir(storage_path.join(TENANTS_SEGMENT_NAME))? {
let tenant = tenant?;
if !tenant.file_type()?.is_dir() {
continue;
}
for timeline in fs::read_dir(tenant.path().join("timelines"))? {
for timeline in fs::read_dir(tenant.path().join(TIMELINES_SEGMENT_NAME))? {
let timeline = timeline?;
if !timeline.file_type()?.is_dir() {
continue;
Expand Down
9 changes: 5 additions & 4 deletions pageserver/ctl/src/layers.rs
Expand Up @@ -5,6 +5,7 @@ use clap::Subcommand;
use pageserver::tenant::block_io::BlockCursor;
use pageserver::tenant::disk_btree::DiskBtreeReader;
use pageserver::tenant::storage_layer::delta_layer::{BlobRef, Summary};
use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
use pageserver::{page_cache, virtual_file};
use pageserver::{
repository::{Key, KEY_SIZE},
Expand Down Expand Up @@ -80,13 +81,13 @@ async fn read_delta_file(path: impl AsRef<Path>) -> Result<()> {
pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
match cmd {
LayerCmd::List { path } => {
for tenant in fs::read_dir(path.join("tenants"))? {
for tenant in fs::read_dir(path.join(TENANTS_SEGMENT_NAME))? {
let tenant = tenant?;
if !tenant.file_type()?.is_dir() {
continue;
}
println!("tenant {}", tenant.file_name().to_string_lossy());
for timeline in fs::read_dir(tenant.path().join("timelines"))? {
for timeline in fs::read_dir(tenant.path().join(TIMELINES_SEGMENT_NAME))? {
let timeline = timeline?;
if !timeline.file_type()?.is_dir() {
continue;
Expand All @@ -101,9 +102,9 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
timeline,
} => {
let timeline_path = path
.join("tenants")
.join(TENANTS_SEGMENT_NAME)
.join(tenant)
.join("timelines")
.join(TIMELINES_SEGMENT_NAME)
.join(timeline);
let mut idx = 0;
for layer in fs::read_dir(timeline_path)? {
Expand Down
5 changes: 3 additions & 2 deletions pageserver/src/config.rs
Expand Up @@ -32,7 +32,8 @@ use crate::disk_usage_eviction_task::DiskUsageEvictionTaskConfig;
use crate::tenant::config::TenantConf;
use crate::tenant::config::TenantConfOpt;
use crate::tenant::{
TENANT_ATTACHING_MARKER_FILENAME, TENANT_DELETED_MARKER_FILE_NAME, TIMELINES_SEGMENT_NAME,
TENANTS_SEGMENT_NAME, TENANT_ATTACHING_MARKER_FILENAME, TENANT_DELETED_MARKER_FILE_NAME,
TIMELINES_SEGMENT_NAME,
};
use crate::{
IGNORED_TENANT_FILE_NAME, METADATA_FILE_NAME, TENANT_CONFIG_NAME, TIMELINE_DELETE_MARK_SUFFIX,
Expand Down Expand Up @@ -563,7 +564,7 @@ impl PageServerConf {
//

pub fn tenants_path(&self) -> PathBuf {
self.workdir.join("tenants")
self.workdir.join(TENANTS_SEGMENT_NAME)
}

pub fn tenant_path(&self, tenant_id: &TenantId) -> PathBuf {
Expand Down
3 changes: 3 additions & 0 deletions pageserver/src/tenant.rs
Expand Up @@ -141,6 +141,9 @@ pub use crate::tenant::metadata::save_metadata;
// re-export for use in walreceiver
pub use crate::tenant::timeline::WalReceiverInfo;

/// The "tenants" part of `tenants/<tenant>/timelines...`
pub const TENANTS_SEGMENT_NAME: &str = "tenants";

/// Parts of the `.neon/tenants/<tenant_id>/timelines/<timeline_id>` directory prefix.
pub const TIMELINES_SEGMENT_NAME: &str = "timelines";

Expand Down
3 changes: 2 additions & 1 deletion pageserver/src/virtual_file.rs
Expand Up @@ -11,6 +11,7 @@
//! src/backend/storage/file/fd.c
//!
use crate::metrics::{STORAGE_IO_SIZE, STORAGE_IO_TIME};
use crate::tenant::TENANTS_SEGMENT_NAME;
use once_cell::sync::OnceCell;
use std::fs::{self, File, OpenOptions};
use std::io::{Error, ErrorKind, Read, Seek, SeekFrom, Write};
Expand Down Expand Up @@ -235,7 +236,7 @@ impl VirtualFile {
let parts = path_str.split('/').collect::<Vec<&str>>();
let tenant_id;
let timeline_id;
if parts.len() > 5 && parts[parts.len() - 5] == "tenants" {
if parts.len() > 5 && parts[parts.len() - 5] == TENANTS_SEGMENT_NAME {
tenant_id = parts[parts.len() - 4].to_string();
timeline_id = parts[parts.len() - 2].to_string();
} else {
Expand Down
10 changes: 6 additions & 4 deletions s3_scrubber/Cargo.toml
Expand Up @@ -22,6 +22,10 @@ serde_json.workspace = true
serde_with.workspace = true
workspace_hack.workspace = true
utils.workspace = true
async-stream.workspace = true
tokio-stream.workspace = true
futures-util.workspace = true
itertools.workspace = true

tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
chrono = { workspace = true, default-features = false, features = ["clock", "serde"] }
Expand All @@ -30,10 +34,8 @@ aws-config = { workspace = true, default-features = false, features = ["rustls",

pageserver = {path="../pageserver"}


tracing.workspace = true
tracing-subscriber.workspace = true
clap.workspace = true

atty = "0.2"
tracing-appender = "0.2"
tracing-appender = "0.2"
jcsp marked this conversation as resolved.
Show resolved Hide resolved
histogram = "0.7"