SQL Aggregates & GROUP BY
SQL Aggregates & GROUP BY
Aggregate Functions
SELECT
COUNT(*) AS total_rows, -- count all rows including NULLs
COUNT(column) AS non_null_count, -- count non-NULL values only
COUNT(DISTINCT col) AS unique_count, -- count distinct non-NULL values
SUM(salary) AS total_salary,
AVG(salary) AS avg_salary,
MIN(salary) AS min_salary,
MAX(salary) AS max_salary
FROM employees;
NULL behavior: COUNT(col) skips NULLs. SUM/AVG/MIN/MAX all skip NULLs.
-- AVG skips NULLs — may not equal SUM/COUNT
SELECT AVG(score) FROM results; -- only counts rows where score IS NOT NULL
-- If you want NULL as 0:
SELECT AVG(COALESCE(score, 0)) FROM results;
GROUP BY — Aggregate Per Group
-- Revenue per department
SELECT department, COUNT(*) AS headcount, AVG(salary) AS avg_salary
FROM employees
GROUP BY department;
-- Multiple GROUP BY columns
SELECT department, job_title, COUNT(*) AS count
FROM employees
GROUP BY department, job_title
ORDER BY department, count DESC;
Rule: Every column in SELECT must either be in GROUP BY or be an aggregate function.
-- ❌ WRONG — name not in GROUP BY and not aggregated
SELECT name, department, COUNT(*) FROM employees GROUP BY department;
-- ✅ CORRECT
SELECT department, COUNT(*) FROM employees GROUP BY department;
HAVING — Filter After Aggregation
-- Departments with more than 5 employees
SELECT department, COUNT(*) AS headcount
FROM employees
GROUP BY department
HAVING COUNT(*) > 5;
-- Departments with avg salary above 80k, sorted
SELECT department, AVG(salary) AS avg_sal
FROM employees
GROUP BY department
HAVING AVG(salary) > 80000
ORDER BY avg_sal DESC;
WHERE vs HAVING:
| WHERE | HAVING | |
|---|---|---|
| Runs | Before GROUP BY | After GROUP BY |
| Filters | Individual rows | Aggregated groups |
| Can use aggregate | No | Yes |
-- Both together
SELECT department, COUNT(*) AS active_count
FROM employees
WHERE is_active = TRUE -- filter rows FIRST
GROUP BY department
HAVING COUNT(*) > 3; -- filter groups AFTER aggregation
GROUP BY with CASE — Conditional Buckets
-- Count by salary band
SELECT
CASE
WHEN salary >= 100000 THEN 'Senior'
WHEN salary >= 60000 THEN 'Mid'
ELSE 'Junior'
END AS level,
COUNT(*) AS count
FROM employees
GROUP BY
CASE
WHEN salary >= 100000 THEN 'Senior'
WHEN salary >= 60000 THEN 'Mid'
ELSE 'Junior'
END;
ROLLUP / CUBE / GROUPING SETS (advanced)
-- ROLLUP: subtotals + grand total
SELECT department, job_title, SUM(salary)
FROM employees
GROUP BY ROLLUP(department, job_title);
-- Produces: (dept, job), (dept, NULL=subtotal), (NULL, NULL=grand total)
-- GROUPING SETS: manual control over grouping combinations
SELECT department, job_title, SUM(salary)
FROM employees
GROUP BY GROUPING SETS (
(department, job_title),
(department),
()
);
String Aggregation
-- PostgreSQL: string_agg
SELECT department, STRING_AGG(name, ', ' ORDER BY name) AS employees
FROM employees
GROUP BY department;
-- MySQL: GROUP_CONCAT
SELECT department, GROUP_CONCAT(name ORDER BY name SEPARATOR ', ') AS employees
FROM employees
GROUP BY department;
Common Patterns
-- Percentage of total
SELECT department,
COUNT(*) AS dept_count,
ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER (), 2) AS pct
FROM employees
GROUP BY department;
-- Users with exactly 1 order (not repeated buyers)
SELECT user_id
FROM orders
GROUP BY user_id
HAVING COUNT(*) = 1;
-- Most common value in a column
SELECT status, COUNT(*) AS cnt
FROM orders
GROUP BY status
ORDER BY cnt DESC
LIMIT 1;
-- Conditional count (pivot-style)
SELECT
SUM(CASE WHEN status = 'delivered' THEN 1 ELSE 0 END) AS delivered,
SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END) AS cancelled,
SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) AS pending
FROM orders;
Related
- [[Basics]] — WHERE, ORDER BY, CASE WHEN
- [[Window Functions]] — per-row aggregates without GROUP BY collapsing rows
- [[SQL for Interviews]] — classic GROUP BY problems