Back to Notes

SQL Aggregates & GROUP BY

SQL Aggregates & GROUP BY

Aggregate Functions

SELECT
  COUNT(*)              AS total_rows,        -- count all rows including NULLs
  COUNT(column)         AS non_null_count,    -- count non-NULL values only
  COUNT(DISTINCT col)   AS unique_count,      -- count distinct non-NULL values
  SUM(salary)           AS total_salary,
  AVG(salary)           AS avg_salary,
  MIN(salary)           AS min_salary,
  MAX(salary)           AS max_salary
FROM employees;

NULL behavior: COUNT(col) skips NULLs. SUM/AVG/MIN/MAX all skip NULLs.

-- AVG skips NULLs — may not equal SUM/COUNT
SELECT AVG(score) FROM results;  -- only counts rows where score IS NOT NULL
-- If you want NULL as 0:
SELECT AVG(COALESCE(score, 0)) FROM results;

GROUP BY — Aggregate Per Group

-- Revenue per department
SELECT department, COUNT(*) AS headcount, AVG(salary) AS avg_salary
FROM employees
GROUP BY department;

-- Multiple GROUP BY columns
SELECT department, job_title, COUNT(*) AS count
FROM employees
GROUP BY department, job_title
ORDER BY department, count DESC;

Rule: Every column in SELECT must either be in GROUP BY or be an aggregate function.

-- ❌ WRONG — name not in GROUP BY and not aggregated
SELECT name, department, COUNT(*) FROM employees GROUP BY department;

-- ✅ CORRECT
SELECT department, COUNT(*) FROM employees GROUP BY department;

HAVING — Filter After Aggregation

-- Departments with more than 5 employees
SELECT department, COUNT(*) AS headcount
FROM employees
GROUP BY department
HAVING COUNT(*) > 5;

-- Departments with avg salary above 80k, sorted
SELECT department, AVG(salary) AS avg_sal
FROM employees
GROUP BY department
HAVING AVG(salary) > 80000
ORDER BY avg_sal DESC;

WHERE vs HAVING:

WHEREHAVING
RunsBefore GROUP BYAfter GROUP BY
FiltersIndividual rowsAggregated groups
Can use aggregateNoYes
-- Both together
SELECT department, COUNT(*) AS active_count
FROM employees
WHERE is_active = TRUE          -- filter rows FIRST
GROUP BY department
HAVING COUNT(*) > 3;            -- filter groups AFTER aggregation

GROUP BY with CASE — Conditional Buckets

-- Count by salary band
SELECT
  CASE
    WHEN salary >= 100000 THEN 'Senior'
    WHEN salary >= 60000  THEN 'Mid'
    ELSE 'Junior'
  END AS level,
  COUNT(*) AS count
FROM employees
GROUP BY
  CASE
    WHEN salary >= 100000 THEN 'Senior'
    WHEN salary >= 60000  THEN 'Mid'
    ELSE 'Junior'
  END;

ROLLUP / CUBE / GROUPING SETS (advanced)

-- ROLLUP: subtotals + grand total
SELECT department, job_title, SUM(salary)
FROM employees
GROUP BY ROLLUP(department, job_title);
-- Produces: (dept, job), (dept, NULL=subtotal), (NULL, NULL=grand total)

-- GROUPING SETS: manual control over grouping combinations
SELECT department, job_title, SUM(salary)
FROM employees
GROUP BY GROUPING SETS (
  (department, job_title),
  (department),
  ()
);

String Aggregation

-- PostgreSQL: string_agg
SELECT department, STRING_AGG(name, ', ' ORDER BY name) AS employees
FROM employees
GROUP BY department;

-- MySQL: GROUP_CONCAT
SELECT department, GROUP_CONCAT(name ORDER BY name SEPARATOR ', ') AS employees
FROM employees
GROUP BY department;

Common Patterns

-- Percentage of total
SELECT department,
  COUNT(*) AS dept_count,
  ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER (), 2) AS pct
FROM employees
GROUP BY department;

-- Users with exactly 1 order (not repeated buyers)
SELECT user_id
FROM orders
GROUP BY user_id
HAVING COUNT(*) = 1;

-- Most common value in a column
SELECT status, COUNT(*) AS cnt
FROM orders
GROUP BY status
ORDER BY cnt DESC
LIMIT 1;

-- Conditional count (pivot-style)
SELECT
  SUM(CASE WHEN status = 'delivered' THEN 1 ELSE 0 END) AS delivered,
  SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END) AS cancelled,
  SUM(CASE WHEN status = 'pending'   THEN 1 ELSE 0 END) AS pending
FROM orders;

Related

  • [[Basics]] — WHERE, ORDER BY, CASE WHEN
  • [[Window Functions]] — per-row aggregates without GROUP BY collapsing rows
  • [[SQL for Interviews]] — classic GROUP BY problems