diff --git a/datafusion/optimizer/src/filter_push_down.rs b/datafusion/optimizer/src/filter_push_down.rs index 6383cdcf855..12976601253 100644 --- a/datafusion/optimizer/src/filter_push_down.rs +++ b/datafusion/optimizer/src/filter_push_down.rs @@ -193,11 +193,10 @@ fn on_lr_is_preserved(plan: &LogicalPlan) -> Result<(bool, bool)> { JoinType::Left => Ok((false, true)), JoinType::Right => Ok((true, false)), JoinType::Full => Ok((false, false)), - // Semi/Anti joins can not have join filter. - JoinType::Semi | JoinType::Anti => Err(DataFusionError::Internal( - "on_lr_is_preserved cannot be appplied to SEMI/ANTI-JOIN nodes" - .to_string(), - )), + JoinType::Semi | JoinType::Anti => { + // filter_push_down does not yet support SEMI/ANTI joins with join conditions + Ok((false, false)) + } }, LogicalPlan::CrossJoin(_) => Err(DataFusionError::Internal( "on_lr_is_preserved cannot be applied to CROSSJOIN nodes".to_string(), diff --git a/datafusion/optimizer/tests/integration-test.rs b/datafusion/optimizer/tests/integration-test.rs index 6dea1a243ff..2d9546f13e5 100644 --- a/datafusion/optimizer/tests/integration-test.rs +++ b/datafusion/optimizer/tests/integration-test.rs @@ -69,6 +69,38 @@ fn distribute_by() -> Result<()> { Ok(()) } +#[test] +fn semi_join_with_join_filter() -> Result<()> { + // regression test for https://github.com/apache/arrow-datafusion/issues/2888 + let sql = "SELECT * FROM test WHERE EXISTS (\ + SELECT * FROM test t2 WHERE test.col_int32 = t2.col_int32 \ + AND test.col_uint32 != t2.col_uint32)"; + let plan = test_sql(sql)?; + let expected = r#"Projection: test.col_int32, test.col_uint32, test.col_utf8, test.col_date32, test.col_date64 + Semi Join: test.col_int32 = t2.col_int32 Filter: test.col_uint32 != t2.col_uint32 + TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64] + SubqueryAlias: t2 + TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64]"#; + assert_eq!(expected, format!("{:?}", plan)); + Ok(()) +} + +#[test] +fn anti_join_with_join_filter() -> Result<()> { + // regression test for https://github.com/apache/arrow-datafusion/issues/2888 + let sql = "SELECT * FROM test WHERE NOT EXISTS (\ + SELECT * FROM test t2 WHERE test.col_int32 = t2.col_int32 \ + AND test.col_uint32 != t2.col_uint32)"; + let plan = test_sql(sql)?; + let expected = r#"Projection: test.col_int32, test.col_uint32, test.col_utf8, test.col_date32, test.col_date64 + Anti Join: test.col_int32 = t2.col_int32 Filter: test.col_uint32 != t2.col_uint32 + TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64] + SubqueryAlias: t2 + TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64]"#; + assert_eq!(expected, format!("{:?}", plan)); + Ok(()) +} + #[test] fn intersect() -> Result<()> { let sql = "SELECT col_int32, col_utf8 FROM test \