From c262e5dd137fe40db73a31eb24c15085e7aaaadd Mon Sep 17 00:00:00 2001 From: Din Date: Thu, 18 Jun 2026 15:36:42 +0100 Subject: [PATCH 1/3] ClickHouse: Support unparenthesized IN right-hand side --- src/dialect/clickhouse.rs | 4 ++++ src/dialect/mod.rs | 13 ++++++++++++ src/parser/mod.rs | 11 ++++++++++ tests/sqlparser_clickhouse.rs | 38 +++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 11 +++++++--- 5 files changed, 74 insertions(+), 3 deletions(-) diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index 6ee60cc993..8da5def075 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -80,6 +80,10 @@ impl Dialect for ClickHouseDialect { true } + fn supports_in_unparenthesized_expr(&self) -> bool { + true + } + /// See fn supports_lambda_functions(&self) -> bool { true diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 8a963cd42c..40dba457c1 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -435,6 +435,15 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports a bare expression as the right-hand + /// side of `IN`, without a parenthesized list — as in `x IN 'a'` or the + /// ClickHouse `{name:Type}` query-parameter placeholder `x IN {ids:Array(UInt64)}`. + /// The expression is wrapped into a single-element list, matching ClickHouse, + /// which reformats `x IN 'a'` to `x IN ('a')`. + fn supports_in_unparenthesized_expr(&self) -> bool { + false + } + /// Returns true if the dialect supports `BEGIN {DEFERRED | IMMEDIATE | EXCLUSIVE | TRY | CATCH} [TRANSACTION]` statements fn supports_start_transaction_modifier(&self) -> bool { false @@ -2051,6 +2060,10 @@ mod tests { self.0.supports_in_empty_list() } + fn supports_in_unparenthesized_expr(&self) -> bool { + self.0.supports_in_unparenthesized_expr() + } + fn convert_type_before_value(&self) -> bool { self.0.convert_type_before_value() } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6540cdc0d9..293526a464 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4392,6 +4392,17 @@ impl<'a> Parser<'a> { negated, }); } + // ClickHouse accepts a bare expression as the IN RHS (e.g. `x IN 'a'` or + // a `{name:Type}` placeholder), wrapping it into a single-element list. + if self.dialect.supports_in_unparenthesized_expr() + && self.peek_token_ref().token != Token::LParen + { + return Ok(Expr::InList { + expr: Box::new(expr), + list: vec![self.parse_expr()?], + negated, + }); + } self.expect_token(&Token::LParen)?; let in_op = match self.maybe_parse(|p| p.parse_query())? { Some(subquery) => Expr::InSubquery { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index cb2df1ff6f..6c97407f81 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1846,6 +1846,44 @@ fn parse_inner_array_join() { } } +#[test] +fn parse_in_unparenthesized_placeholder() { + // ClickHouse `{name:Type}` query-parameter placeholder as the IN RHS, without parens. + match clickhouse().expr_parses_to("x IN {ids:Array(UInt64)}", "x IN ({ids: Array(UInt64)})") { + Expr::InList { list, negated, .. } => { + assert!(!negated); + assert_eq!(list.len(), 1); + assert!(matches!(list[0], Expr::Dictionary(_))); + } + other => panic!("expected InList, got {other:?}"), + } + + // NOT IN sets negated. + match clickhouse().expr_parses_to( + "x NOT IN {ids:Array(UInt64)}", + "x NOT IN ({ids: Array(UInt64)})", + ) { + Expr::InList { negated, .. } => assert!(negated), + other => panic!("expected InList, got {other:?}"), + } + + // A bare scalar is also wrapped, matching ClickHouse (`x IN 'a'` -> `x IN ('a')`). + clickhouse().expr_parses_to("x IN 'a'", "x IN ('a')"); + + // The new branch must not fire when the next token is `(` (regressions). + clickhouse().verified_expr("x IN ({ids: Array(UInt64)})"); + clickhouse().verified_expr("x IN (1, 2, 3)"); + clickhouse().verified_stmt("SELECT * FROM t WHERE x IN (SELECT y FROM u)"); + + // Precedence: the trailing `AND` is not swallowed into the placeholder. + clickhouse().verified_expr("x IN ({p: Array(UInt64)}) AND y = 1"); + + // Dialect-scoped: GenericDialect (capability defaults false) still errors. + assert!(TestedDialects::new(vec![Box::new(GenericDialect {})]) + .parse_sql_statements("SELECT * FROM t WHERE x IN {ids:Array(UInt64)}") + .is_err()); +} + fn clickhouse() -> TestedDialects { TestedDialects::new(vec![Box::new(ClickHouseDialect {})]) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index be3026f637..ef7d183dec 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2375,9 +2375,11 @@ fn parse_in_unnest() { #[test] fn parse_in_error() { - // IN is no valid + // IN is no valid, except in dialects that accept an + // unparenthesized expression as the IN right-hand side (e.g. ClickHouse). let sql = "SELECT * FROM customers WHERE segment in segment"; - let res = parse_sql_statements(sql); + let res = + all_dialects_except(|d| d.supports_in_unparenthesized_expr()).parse_sql_statements(sql); assert_eq!( ParserError::ParserError("Expected: (, found: segment".to_string()), res.unwrap_err() @@ -10834,8 +10836,11 @@ fn parse_position() { #[test] fn parse_position_negative() { + // Dialects that accept an unparenthesized IN right-hand side (e.g. ClickHouse) + // report a different error here, so exclude them. let sql = "SELECT POSITION(foo IN) from bar"; - let res = parse_sql_statements(sql); + let res = + all_dialects_except(|d| d.supports_in_unparenthesized_expr()).parse_sql_statements(sql); assert_eq!( ParserError::ParserError("Expected: (, found: )".to_string()), res.unwrap_err() From 497f0a38790ee2fdd2be55689b0ad86f995aac44 Mon Sep 17 00:00:00 2001 From: Din Date: Sun, 21 Jun 2026 10:30:41 +0100 Subject: [PATCH 2/3] pr comments: remove redundant coments, simplify tests --- src/dialect/clickhouse.rs | 1 + src/dialect/mod.rs | 5 +--- src/parser/mod.rs | 2 -- tests/sqlparser_clickhouse.rs | 53 ++++++++++++++--------------------- 4 files changed, 23 insertions(+), 38 deletions(-) diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index 8da5def075..9061036f76 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -80,6 +80,7 @@ impl Dialect for ClickHouseDialect { true } + // See fn supports_in_unparenthesized_expr(&self) -> bool { true } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 40dba457c1..432b99e681 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -436,10 +436,7 @@ pub trait Dialect: Debug + Any { } /// Returns true if the dialect supports a bare expression as the right-hand - /// side of `IN`, without a parenthesized list — as in `x IN 'a'` or the - /// ClickHouse `{name:Type}` query-parameter placeholder `x IN {ids:Array(UInt64)}`. - /// The expression is wrapped into a single-element list, matching ClickHouse, - /// which reformats `x IN 'a'` to `x IN ('a')`. + /// side of `IN`, without a parenthesized list — as in `x IN 'a'`. fn supports_in_unparenthesized_expr(&self) -> bool { false } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 293526a464..fe205c0fc8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4392,8 +4392,6 @@ impl<'a> Parser<'a> { negated, }); } - // ClickHouse accepts a bare expression as the IN RHS (e.g. `x IN 'a'` or - // a `{name:Type}` placeholder), wrapping it into a single-element list. if self.dialect.supports_in_unparenthesized_expr() && self.peek_token_ref().token != Token::LParen { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 6c97407f81..06954f0f31 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1847,41 +1847,30 @@ fn parse_inner_array_join() { } #[test] -fn parse_in_unparenthesized_placeholder() { - // ClickHouse `{name:Type}` query-parameter placeholder as the IN RHS, without parens. - match clickhouse().expr_parses_to("x IN {ids:Array(UInt64)}", "x IN ({ids: Array(UInt64)})") { - Expr::InList { list, negated, .. } => { - assert!(!negated); - assert_eq!(list.len(), 1); - assert!(matches!(list[0], Expr::Dictionary(_))); - } - other => panic!("expected InList, got {other:?}"), - } +fn parse_in_unparenthesized_expr() { + // IN [expr] parses to IN ([expr]) and does not cause regressions + let dialects = all_dialects_where(|d| d.supports_in_unparenthesized_expr()); + dialects.expr_parses_to("x IN 'a'", "x IN ('a')"); + + // The branch must not fire when the next token is `(` (regressions). + dialects.verified_expr("x IN (1, 2, 3)"); + dialects.verified_stmt("SELECT * FROM t WHERE x IN (SELECT y FROM u)"); +} - // NOT IN sets negated. - match clickhouse().expr_parses_to( +#[test] +fn parse_in_unparenthesized_dictionary_placeholder() { + // IN [{placeholder:Type}] parses to IN ({placholder:Type}) + let dialects = all_dialects_where(|d| { + d.supports_in_unparenthesized_expr() && d.supports_dictionary_syntax() + }); + dialects.expr_parses_to("x IN {ids:Array(UInt64)}", "x IN ({ids: Array(UInt64)})"); + dialects.expr_parses_to( "x NOT IN {ids:Array(UInt64)}", "x NOT IN ({ids: Array(UInt64)})", - ) { - Expr::InList { negated, .. } => assert!(negated), - other => panic!("expected InList, got {other:?}"), - } - - // A bare scalar is also wrapped, matching ClickHouse (`x IN 'a'` -> `x IN ('a')`). - clickhouse().expr_parses_to("x IN 'a'", "x IN ('a')"); - - // The new branch must not fire when the next token is `(` (regressions). - clickhouse().verified_expr("x IN ({ids: Array(UInt64)})"); - clickhouse().verified_expr("x IN (1, 2, 3)"); - clickhouse().verified_stmt("SELECT * FROM t WHERE x IN (SELECT y FROM u)"); - - // Precedence: the trailing `AND` is not swallowed into the placeholder. - clickhouse().verified_expr("x IN ({p: Array(UInt64)}) AND y = 1"); - - // Dialect-scoped: GenericDialect (capability defaults false) still errors. - assert!(TestedDialects::new(vec![Box::new(GenericDialect {})]) - .parse_sql_statements("SELECT * FROM t WHERE x IN {ids:Array(UInt64)}") - .is_err()); + ); + dialects.verified_expr("x IN ({ids: Array(UInt64)})"); + // Precedence: the trailing `AND` is not swallowed. + dialects.verified_expr("x IN ({p: Array(UInt64)}) AND y = 1"); } fn clickhouse() -> TestedDialects { From 4c76f2fe323547d8dbbd08b1925d0a8f5d20818d Mon Sep 17 00:00:00 2001 From: Din Date: Sun, 21 Jun 2026 10:38:18 +0100 Subject: [PATCH 3/3] reorganize tests --- tests/sqlparser_clickhouse.rs | 18 +++++++----------- tests/sqlparser_common.rs | 28 ++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 06954f0f31..258f443679 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1849,28 +1849,24 @@ fn parse_inner_array_join() { #[test] fn parse_in_unparenthesized_expr() { // IN [expr] parses to IN ([expr]) and does not cause regressions - let dialects = all_dialects_where(|d| d.supports_in_unparenthesized_expr()); - dialects.expr_parses_to("x IN 'a'", "x IN ('a')"); + clickhouse().expr_parses_to("x IN 'a'", "x IN ('a')"); // The branch must not fire when the next token is `(` (regressions). - dialects.verified_expr("x IN (1, 2, 3)"); - dialects.verified_stmt("SELECT * FROM t WHERE x IN (SELECT y FROM u)"); + clickhouse().verified_expr("x IN (1, 2, 3)"); + clickhouse().verified_stmt("SELECT * FROM t WHERE x IN (SELECT y FROM u)"); } #[test] fn parse_in_unparenthesized_dictionary_placeholder() { // IN [{placeholder:Type}] parses to IN ({placholder:Type}) - let dialects = all_dialects_where(|d| { - d.supports_in_unparenthesized_expr() && d.supports_dictionary_syntax() - }); - dialects.expr_parses_to("x IN {ids:Array(UInt64)}", "x IN ({ids: Array(UInt64)})"); - dialects.expr_parses_to( + clickhouse().expr_parses_to("x IN {ids:Array(UInt64)}", "x IN ({ids: Array(UInt64)})"); + clickhouse().expr_parses_to( "x NOT IN {ids:Array(UInt64)}", "x NOT IN ({ids: Array(UInt64)})", ); - dialects.verified_expr("x IN ({ids: Array(UInt64)})"); + clickhouse().verified_expr("x IN ({ids: Array(UInt64)})"); // Precedence: the trailing `AND` is not swallowed. - dialects.verified_expr("x IN ({p: Array(UInt64)}) AND y = 1"); + clickhouse().verified_expr("x IN ({p: Array(UInt64)}) AND y = 1"); } fn clickhouse() -> TestedDialects { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ef7d183dec..85c81bc91f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2386,6 +2386,34 @@ fn parse_in_error() { ); } +#[test] +fn parse_in_unparenthesized_expr() { + // Dialects supporting an unparenthesized IN right-hand side wrap a bare expression + // into a single-element list (e.g. `x IN 'a'` -> `x IN ('a')`). + let dialects = all_dialects_where(|d| d.supports_in_unparenthesized_expr()); + dialects.expr_parses_to("x IN 'a'", "x IN ('a')"); + + // The branch must not fire when the next token is `(` (regressions). + dialects.verified_expr("x IN (1, 2, 3)"); + dialects.verified_stmt("SELECT * FROM t WHERE x IN (SELECT y FROM u)"); +} + +#[test] +fn parse_in_unparenthesized_dictionary_placeholder() { + // The `{name:Type}` placeholder form additionally requires dictionary syntax. + let dialects = all_dialects_where(|d| { + d.supports_in_unparenthesized_expr() && d.supports_dictionary_syntax() + }); + dialects.expr_parses_to("x IN {ids:Array(UInt64)}", "x IN ({ids: Array(UInt64)})"); + dialects.expr_parses_to( + "x NOT IN {ids:Array(UInt64)}", + "x NOT IN ({ids: Array(UInt64)})", + ); + dialects.verified_expr("x IN ({ids: Array(UInt64)})"); + // Precedence: the trailing `AND` is not swallowed. + dialects.verified_expr("x IN ({p: Array(UInt64)}) AND y = 1"); +} + #[test] fn parse_string_agg() { let sql = "SELECT a || b";