Basic Group By |
GROUP BY is used in conjunction with aggregating functions to group the results by the unaggregated columns |
Aggregating Functions |
AVG |
RANK |
STATS_MW_TEST |
CORR |
REGR_AVGX |
STATS_ONE_WAY_ANOVA |
CORR_K |
REGR_AVGY |
STATS_T_TEST_INDEP |
CORR_S |
REGR_COUNT |
STATS_T_TEST_INDEPU |
COUNT |
REGR_INTERCEPT |
STATS_T_TEST_ONE |
COVAR_POP |
REGR_R2 |
STATS_T_TEST_PAIRED |
COVAR_SAMP |
REGR_SLOPE |
STATS_WSR_TEST |
CUME_DIST |
REGR_SXX |
STDDEV |
DENSE_RANK |
REGR_SXY |
STDDEV_POP |
LISTAGG |
REGR_SYY |
STDDEV_SAMP |
MAX |
STATS_BINOMIAL_TEST |
SUM |
MEDIAN |
STATS_CROSSTAB |
SYS_OP_COUNTCHG |
MIN |
STATS_F_TEST |
VARIANCE |
PERCENTILE_CONT |
STATS_KS_TEST |
VAR_POP |
PERCENTILE_DISC |
STATS_MODE |
VAR_SAMP |
PERCENT_RANK |
|
|
|
|
SQL Statement Not Requiring GROUP BY |
SELECT COUNT(*)
FROM all_tables; |
SQL Statement With A Single Ungrouped Column Requiring GROUP BY |
SELECT table_name, COUNT(*)
FROM all_tables;
SELECT table_name, COUNT(*)
FROM all_indexes
GROUP BY table_name; |
SQL Statement With Multiple Ungrouped Columns And Multiple Grouped Columns |
set linesize 121
col index_type format a20
SELECT table_name, index_type, COUNT(leaf_blocks), COUNT(distinct_keys)
FROM all_indexes
GROUP BY table_name, index_type; |
|
Group By Result Merging |
The following demo is based on a demo posted by Michel Cadot in comp.databases.oracle.server |
conn uwclass/uwclass@pdbdev
CREATE TABLE grp_test (
year VARCHAR2(4),
result NUMBER(7));
INSERT INTO grp_test VALUES ('2021', 1000);
INSERT INTO grp_test VALUES ('2018', 1250);
INSERT INTO grp_test VALUES ('2019', 3786);
INSERT INTO grp_test VALUES ('2020', 977);
INSERT INTO grp_test VALUES ('2017', 5000);
INSERT INTO grp_test VALUES ('2011', 123);
INSERT INTO grp_test VALUES ('2018', 125);
INSERT INTO grp_test VALUES ('2013', 2000);
INSERT INTO grp_test VALUES ('2014', 2200);
INSERT INTO grp_test VALUES ('2013', 150);
COMMIT;
SELECT * FROM grp_test;
SELECT DECODE(SIGN(year-2018),1,year,'2018 or before'), SUM(result)
FROM grp_test
GROUP BY DECODE(SIGN(year-2018),1,year,'2018 or before')
ORDER BY 1; |
|
Group By With Having |
HAVING is used to perform an action on groups created by GROUP BY similar to that of the WHERE clause on rows in a basic SQL statement.
The WHERE clause limits the rows evaluated. The HAVING clause limits the grouped rows returned. |
GROUP BY With HAVING Clause |
SELECT table_name, COUNT(*)
FROM all_indexes
GROUP BY table_name
HAVING COUNT(*) > 1;
SELECT table_name, COUNT(*)
FROM all_indexes
GROUP BY table_name
HAVING COUNT(*) = 2;
SELECT table_name, COUNT(*)
FROM all_indexes
GROUP BY table_name
HAVING COUNT(*) BETWEEN 2 AND 3; |
|
Group By Substitute |
GROUP BY Substitution Demo |
conn uwclass/uwclass@pdbdev
CREATE TABLE t1 AS
SELECT rownum objid, 'name'||rownum objname
FROM all_objects;
CREATE TABLE t2 AS
SELECT objid, dbms_random.value(1,10) PRICE
FROM t1;
INSERT INTO t2
SELECT objid, dbms_random.value(1,10) PRICE
FROM t1;
CREATE INDEX t1_idx
ON t1 (objid);
CREATE INDEX t2_idx
ON t2(objid);
exec dbms_stats.gather_schema_stats(USER, CASCADE=>TRUE); |
-- observe the output
SELECT t1.objid, MAX(t2.price) PRICE
FROM t1, t2
WHERE t1.objid = t2.objid
AND t1.objname = 'name120'
GROUP BY t1.objid;
SELECT t1.objid, (
SELECT MAX(price)
FROM t2
WHERE t2.objid = t1.objid) PRICE
FROM t1
WHERE objname = 'name120'; |
-- compare the plans
set autotrace traceonly exp statistics
SELECT t1.objid, MAX(t2.price) PRICE
FROM t1, t2
WHERE t1.objid = t2.objid
AND t1.objname = 'name120'
GROUP BY t1.objid;
SELECT t1.objid, (
SELECT MAX(price)
FROM t2
WHERE t2.objid = t1.objid) PRICE
FROM t1
WHERE objname = 'name120';
set autotrace off |
More Than One Way To Approach A Problem |
conn sys@pdbdev as sysdba
set autotrace traceonly
-- conventional group by
SELECT table_name, COUNT(*) TAB_CNT
FROM all_indexes
GROUP BY table_name;
-- sub-query solution
SELECT DISTINCT table_name, (
SELECT COUNT(*)
FROM all_indexes ai2
WHERE ai2.table_name = ai1.table_name) TAB_CNT
FROM all_indexes ai1;
-- analytic counterpart
SELECT DISTINCT table_name, COUNT(*)
OVER (PARTITION BY table_name) TAB_CNT
FROM all_indexes;
set autotrace off
-- Note: this demo does not take into account the possibility that indexes with the same name occur in more than one schema |
Find the most prevalent value in a column |
SELECT cnt1.initial_extent
FROM (
SELECT initial_extent, COUNT(*) TOTAL
FROM all_tables
GROUP BY initial_extent) cnt1,
(
SELECT MAX(total) MAXTOTAL
FROM (
SELECT initial_extent, COUNT(*) TOTAL
FROM all_tables
GROUP BY initial_extent)) cnt2
WHERE cnt1.total = cnt2.maxtotal; |
|
Duplicate Removal |
Remove Duplicate Data |
conn uwclass/uwclass@pdbdev
CREATE TABLE t (
col1 VARCHAR2(3),
col2 VARCHAR2(3));
INSERT INTO t VALUES ('AAA', '123');
INSERT INTO t VALUES ('BBB', '123');
INSERT INTO t VALUES ('CCC', '789');
INSERT INTO t VALUES ('AAA', '123');
INSERT INTO t VALUES ('DDD', '123');
INSERT INTO t VALUES ('CCC', '789');
INSERT INTO t VALUES ('CCC', '987');
COMMIT;
SELECT * FROM t;
SELECT MIN(rowid), col1, col2
FROM t
GROUP BY col1, col2;
DELETE FROM t
WHERE rowid NOT IN (
SELECT MIN(rowid)
FROM t
GROUP BY col1, col2);
COMMIT;
SELECT * FROM t; |