Skip to content

Commit

Permalink
[feat] Add pub api for checking column index is sorted. (#2849)
Browse files Browse the repository at this point in the history
* [feat]Add pub api for checking column index is sorted.

* export boundary_order

* simplify the code
  • Loading branch information
Ted-Jiang committed Oct 10, 2022
1 parent 2ae2309 commit c3aac93
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 0 deletions.
27 changes: 27 additions & 0 deletions parquet/src/file/page_index/index.rs
Expand Up @@ -63,6 +63,33 @@ pub enum Index {
FIXED_LEN_BYTE_ARRAY(ByteArrayIndex),
}

impl Index {
/// Return min/max elements inside ColumnIndex are ordered or not.
pub fn is_sorted(&self) -> bool {
// 0:UNORDERED, 1:ASCENDING ,2:DESCENDING,
if let Some(order) = self.get_boundary_order() {
order.0 > (BoundaryOrder::UNORDERED.0)
} else {
false
}
}

/// Get boundary_order of this page index.
pub fn get_boundary_order(&self) -> Option<BoundaryOrder> {
match self {
Index::NONE => None,
Index::BOOLEAN(index) => Some(index.boundary_order),
Index::INT32(index) => Some(index.boundary_order),
Index::INT64(index) => Some(index.boundary_order),
Index::INT96(index) => Some(index.boundary_order),
Index::FLOAT(index) => Some(index.boundary_order),
Index::DOUBLE(index) => Some(index.boundary_order),
Index::BYTE_ARRAY(index) => Some(index.boundary_order),
Index::FIXED_LEN_BYTE_ARRAY(index) => Some(index.boundary_order),
}
}
}

/// An index of a column of [`Type`] physical representation
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct NativeIndex<T: ParquetValueType> {
Expand Down
16 changes: 16 additions & 0 deletions parquet/src/file/serialized_reader.rs
Expand Up @@ -1325,6 +1325,10 @@ mod tests {
let row_group_metadata = metadata.row_group(0);

//col0->id: INT32 UNCOMPRESSED DO:0 FPO:4 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 7299, num_nulls: 0]
assert!(!&page_indexes[0][0].is_sorted());
let boundary_order = &page_indexes[0][0].get_boundary_order();
assert!(boundary_order.is_some());
matches!(boundary_order.unwrap(), BoundaryOrder::UNORDERED);
if let Index::INT32(index) = &page_indexes[0][0] {
check_native_page_index(
index,
Expand All @@ -1337,13 +1341,15 @@ mod tests {
unreachable!()
};
//col1->bool_col:BOOLEAN UNCOMPRESSED DO:0 FPO:37329 SZ:3022/3022/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: false, max: true, num_nulls: 0]
assert!(&page_indexes[0][1].is_sorted());
if let Index::BOOLEAN(index) = &page_indexes[0][1] {
assert_eq!(index.indexes.len(), 82);
assert_eq!(row_group_offset_indexes[1].len(), 82);
} else {
unreachable!()
};
//col2->tinyint_col: INT32 UNCOMPRESSED DO:0 FPO:40351 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0]
assert!(&page_indexes[0][2].is_sorted());
if let Index::INT32(index) = &page_indexes[0][2] {
check_native_page_index(
index,
Expand All @@ -1356,6 +1362,7 @@ mod tests {
unreachable!()
};
//col4->smallint_col: INT32 UNCOMPRESSED DO:0 FPO:77676 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0]
assert!(&page_indexes[0][3].is_sorted());
if let Index::INT32(index) = &page_indexes[0][3] {
check_native_page_index(
index,
Expand All @@ -1368,6 +1375,7 @@ mod tests {
unreachable!()
};
//col5->smallint_col: INT32 UNCOMPRESSED DO:0 FPO:77676 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0]
assert!(&page_indexes[0][4].is_sorted());
if let Index::INT32(index) = &page_indexes[0][4] {
check_native_page_index(
index,
Expand All @@ -1380,6 +1388,7 @@ mod tests {
unreachable!()
};
//col6->bigint_col: INT64 UNCOMPRESSED DO:0 FPO:152326 SZ:71598/71598/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 90, num_nulls: 0]
assert!(!&page_indexes[0][5].is_sorted());
if let Index::INT64(index) = &page_indexes[0][5] {
check_native_page_index(
index,
Expand All @@ -1392,6 +1401,7 @@ mod tests {
unreachable!()
};
//col7->float_col: FLOAT UNCOMPRESSED DO:0 FPO:223924 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: -0.0, max: 9.9, num_nulls: 0]
assert!(&page_indexes[0][6].is_sorted());
if let Index::FLOAT(index) = &page_indexes[0][6] {
check_native_page_index(
index,
Expand All @@ -1404,6 +1414,7 @@ mod tests {
unreachable!()
};
//col8->double_col: DOUBLE UNCOMPRESSED DO:0 FPO:261249 SZ:71598/71598/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: -0.0, max: 90.89999999999999, num_nulls: 0]
assert!(!&page_indexes[0][7].is_sorted());
if let Index::DOUBLE(index) = &page_indexes[0][7] {
check_native_page_index(
index,
Expand All @@ -1416,6 +1427,7 @@ mod tests {
unreachable!()
};
//col9->date_string_col: BINARY UNCOMPRESSED DO:0 FPO:332847 SZ:111948/111948/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 01/01/09, max: 12/31/10, num_nulls: 0]
assert!(!&page_indexes[0][8].is_sorted());
if let Index::BYTE_ARRAY(index) = &page_indexes[0][8] {
check_bytes_page_index(
index,
Expand All @@ -1428,6 +1440,7 @@ mod tests {
unreachable!()
};
//col10->string_col: BINARY UNCOMPRESSED DO:0 FPO:444795 SZ:45298/45298/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0]
assert!(&page_indexes[0][9].is_sorted());
if let Index::BYTE_ARRAY(index) = &page_indexes[0][9] {
check_bytes_page_index(
index,
Expand All @@ -1441,12 +1454,14 @@ mod tests {
};
//col11->timestamp_col: INT96 UNCOMPRESSED DO:0 FPO:490093 SZ:111948/111948/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[num_nulls: 0, min/max not defined]
//Notice: min_max values for each page for this col not exits.
assert!(!&page_indexes[0][10].is_sorted());
if let Index::NONE = &page_indexes[0][10] {
assert_eq!(row_group_offset_indexes[10].len(), 974);
} else {
unreachable!()
};
//col12->year: INT32 UNCOMPRESSED DO:0 FPO:602041 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 2009, max: 2010, num_nulls: 0]
assert!(&page_indexes[0][11].is_sorted());
if let Index::INT32(index) = &page_indexes[0][11] {
check_native_page_index(
index,
Expand All @@ -1459,6 +1474,7 @@ mod tests {
unreachable!()
};
//col13->month: INT32 UNCOMPRESSED DO:0 FPO:639366 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 1, max: 12, num_nulls: 0]
assert!(!&page_indexes[0][12].is_sorted());
if let Index::INT32(index) = &page_indexes[0][12] {
check_native_page_index(
index,
Expand Down

0 comments on commit c3aac93

Please sign in to comment.