From f0a61471ae8ee1ad36b6eee73cfb7c783e3f072e Mon Sep 17 00:00:00 2001 From: Sergey Zhukov Date: Wed, 18 Feb 2026 16:27:59 +0400 Subject: [PATCH 1/2] Disallow order by within ordered-set aggregate functions argument lists(#18281) --- datafusion/sql/src/expr/function.rs | 6 ++++++ datafusion/sqllogictest/test_files/aggregate.slt | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index 641f3bb8dcad1..2f41b88e8435b 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -542,6 +542,12 @@ impl SqlToRel<'_, S> { // accept a WITHIN GROUP clause. let supports_within_group = fm.supports_within_group_clause(); + if supports_within_group && !order_by.is_empty() { + return plan_err!( + "ORDER BY must be specified using WITHIN GROUP for ordered-set aggregate functions" + ); + } + if !within_group.is_empty() && !supports_within_group { return plan_err!( "WITHIN GROUP is only supported for ordered-set aggregate functions" diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 517467110fe6d..96ce4b7b77322 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -129,6 +129,14 @@ CREATE TABLE group_median_table_nullable ( # Error tests ####### +statement error ORDER BY must be specified using WITHIN GROUP +select quantile_cont(col0, 0.75 order by col0) +from values (1, 3), (2, 2), (3, 1) t(col0, col1); + +statement error ORDER BY must be specified using WITHIN GROUP +select quantile_cont(0.75 order by col0) +from values (1, 3), (2, 2), (3, 1) t(col0, col1); + statement error DataFusion error: Error during planning: WITHIN GROUP is only supported for ordered-set aggregate functions SELECT SUM(c2) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100 From 908db8684f7fdc7dceda324f00785f4356f67946 Mon Sep 17 00:00:00 2001 From: Sergey Zhukov Date: Thu, 5 Mar 2026 10:35:56 +0400 Subject: [PATCH 2/2] supporting both syntaxes --- datafusion/sql/src/expr/function.rs | 73 ++++++++++++++----- .../sqllogictest/test_files/aggregate.slt | 34 +++++++-- 2 files changed, 81 insertions(+), 26 deletions(-) diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index 2f41b88e8435b..8f6924380d7c9 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -542,38 +542,70 @@ impl SqlToRel<'_, S> { // accept a WITHIN GROUP clause. let supports_within_group = fm.supports_within_group_clause(); - if supports_within_group && !order_by.is_empty() { - return plan_err!( - "ORDER BY must be specified using WITHIN GROUP for ordered-set aggregate functions" - ); + // Built-in ordered-set aggregates must also support WITHIN GROUP + let is_builtin_ordered_set = matches!( + name.as_str(), + "percentile_cont" + | "quantile_cont" + | "approx_percentile_cont" + | "approx_percentile_cont_with_weight" + ); + + let supports_within_group = + supports_within_group || is_builtin_ordered_set; + + let mut within_group = within_group; + let mut order_by = order_by; + + if supports_within_group + && within_group.is_empty() + && !order_by.is_empty() + { + // Inline ORDER BY syntax: + // quantile_cont(value, percentile ORDER BY value) + if args.len() >= 2 { + args.remove(0); + arg_names.remove(0); + } + + within_group = order_by; + order_by = vec![]; } - if !within_group.is_empty() && !supports_within_group { + if !supports_within_group && !within_group.is_empty() { return plan_err!( "WITHIN GROUP is only supported for ordered-set aggregate functions" ); } - // If the UDAF supports WITHIN GROUP, convert the ordering into - // sort expressions and prepend them as unnamed function args. - let order_by = if supports_within_group { - let (within_group_sorts, new_args, new_arg_names) = self - .extract_and_prepend_within_group_args( + let order_by: Vec = if supports_within_group { + if !within_group.is_empty() { + // WITHIN GROUP syntax + let sorts = self.order_by_to_sort_expr( within_group, - args, - arg_names, schema, planner_context, + false, + None, )?; - args = new_args; - arg_names = new_arg_names; - within_group_sorts - } else { - let order_by = if !order_by.is_empty() { - order_by + + if sorts.len() != 1 { + return plan_err!( + "Only a single ordering expression is permitted in WITHIN GROUP clause" + ); + } + + // Prepend ordered value expression to args + let value_expr = sorts[0].expr.clone(); + arg_names = std::iter::once(None).chain(arg_names).collect(); + args = std::iter::once(value_expr).chain(args).collect(); + + sorts } else { - within_group - }; + vec![] + } + } else { + // Normal aggregate behavior self.order_by_to_sort_expr( order_by, schema, @@ -873,6 +905,7 @@ impl SqlToRel<'_, S> { Ok((exprs, names)) } + #[expect(dead_code)] fn extract_and_prepend_within_group_args( &self, within_group: Vec, diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 96ce4b7b77322..51cc3b14dbe50 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -125,17 +125,39 @@ CREATE TABLE group_median_table_nullable ( ( 'group1', 125, 32766, 2147483646, arrow_cast(9223372036854775806,'Int64'), 100, 101, 4294967294, arrow_cast(100,'UInt64'), 3.2, 5.5, arrow_cast('NAN','Float64'), 0.0004, 0.0004 ), ( 'group1', 127, 32767, 2147483647, arrow_cast(9223372036854775807,'Int64'), 255, 65535, 4294967295, 18446744073709551615, 2.2, 2.2, arrow_cast('NAN','Float64'), 0.0005, 0.0005 ) -####### -# Error tests -####### - -statement error ORDER BY must be specified using WITHIN GROUP +query R select quantile_cont(col0, 0.75 order by col0) from values (1, 3), (2, 2), (3, 1) t(col0, col1); +---- +2.5 + +query R +select quantile_cont(col0, 0.75 order by col0 desc) +from values (1, 3), (2, 2), (3, 1) t(col0, col1); +---- +1.5 -statement error ORDER BY must be specified using WITHIN GROUP +query R select quantile_cont(0.75 order by col0) from values (1, 3), (2, 2), (3, 1) t(col0, col1); +---- +2.5 + +query R +select quantile_cont(0.75 order by col0 desc) +from values (1, 3), (2, 2), (3, 1) t(col0, col1); +---- +1.5 + +query R +select quantile_cont(0.75) within group (order by col0) +from values (1), (2), (3) t(col0); +---- +2.5 + +####### +# Error tests +####### statement error DataFusion error: Error during planning: WITHIN GROUP is only supported for ordered-set aggregate functions SELECT SUM(c2) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100