Back to Notes

Writing Clean SQL

Writing Clean SQL

Formatting Rules

-- ✅ Good: uppercase keywords, lowercase identifiers, aligned columns
SELECT
  u.name,
  u.email,
  COUNT(o.id)  AS order_count,
  SUM(o.total) AS total_spent
FROM users u
JOIN orders o ON u.id = o.user_id
WHERE u.created_at >= '2026-01-01'
GROUP BY u.id, u.name, u.email
HAVING COUNT(o.id) > 0
ORDER BY total_spent DESC;

-- ❌ Bad: no formatting, hard to read
select u.name,u.email,count(o.id) as order_count,sum(o.total) as total_spent from users u join orders o on u.id=o.user_id where u.created_at>='2026-01-01' group by u.id,u.name,u.email having count(o.id)>0 order by total_spent desc;

Naming Conventions

-- ✅ Descriptive aliases
SELECT u.name AS customer_name, COUNT(o.id) AS order_count

-- ❌ Cryptic aliases
SELECT u.name AS n, COUNT(o.id) AS c

-- ✅ Alias tables clearly
FROM users u
JOIN orders o ON u.id = o.user_id

-- ❌ t1, t2 aliases (meaningless)
FROM users t1
JOIN orders t2 ON t1.id = t2.user_id

Avoid SELECT *

-- ❌ SELECT * — fragile, transfers excess data, breaks if schema changes
SELECT * FROM orders JOIN users ON orders.user_id = users.id;

-- ✅ Explicit columns
SELECT
  orders.id AS order_id,
  orders.total,
  users.name AS customer_name
FROM orders
JOIN users ON orders.user_id = users.id;

Use CTEs for Readability

-- ❌ Nested subqueries — hard to trace
SELECT *
FROM (
  SELECT user_id, COUNT(*) AS order_count
  FROM (SELECT * FROM orders WHERE status = 'delivered') d
  GROUP BY user_id
) o
WHERE order_count > 5;

-- ✅ CTEs — named, testable, readable
WITH delivered_orders AS (
  SELECT user_id
  FROM orders
  WHERE status = 'delivered'
),
order_counts AS (
  SELECT user_id, COUNT(*) AS order_count
  FROM delivered_orders
  GROUP BY user_id
)
SELECT *
FROM order_counts
WHERE order_count > 5;

Filter Early

-- ❌ Join everything, then filter — process more rows than needed
SELECT u.name, o.total
FROM users u
JOIN orders o ON u.id = o.user_id
WHERE u.country = 'IN' AND o.status = 'delivered';

-- ✅ Same thing, but CTE pre-filters (makes intent explicit, sometimes faster)
WITH indian_users AS (
  SELECT id, name FROM users WHERE country = 'IN'
),
delivered AS (
  SELECT user_id, total FROM orders WHERE status = 'delivered'
)
SELECT u.name, d.total
FROM indian_users u
JOIN delivered d ON u.id = d.user_id;

NULL Handling — Be Explicit

-- ❌ Silent NULL bugs
SELECT AVG(score) FROM results;  -- ignores NULLs silently

-- ✅ Explicit intent
SELECT AVG(COALESCE(score, 0)) FROM results;  -- treat NULL as 0
-- OR document the behavior:
-- Note: NULL scores excluded from average (absent != zero)
SELECT AVG(score) FROM results WHERE score IS NOT NULL;

Prefer Explicit JOINs

-- ❌ Implicit join (old syntax)
SELECT * FROM users u, orders o WHERE u.id = o.user_id;

-- ✅ Explicit JOIN
SELECT * FROM users u JOIN orders o ON u.id = o.user_id;

Aggregate Filter Placement

-- ❌ WRONG — can't use aggregate in WHERE
SELECT department, COUNT(*)
FROM employees
WHERE COUNT(*) > 5   -- syntax error
GROUP BY department;

-- ✅ CORRECT — HAVING for post-aggregation filter
SELECT department, COUNT(*) AS headcount
FROM employees
GROUP BY department
HAVING COUNT(*) > 5;

Window Functions Over Correlated Subqueries

-- ❌ Correlated subquery — runs N times (slow)
SELECT name, salary,
  (SELECT AVG(salary) FROM employees e2 WHERE e2.dept = e1.dept) AS dept_avg
FROM employees e1;

-- ✅ Window function — one pass
SELECT name, salary,
  AVG(salary) OVER (PARTITION BY dept) AS dept_avg
FROM employees;

Use Indexes Effectively

-- ❌ Function on indexed column — defeats index
WHERE YEAR(created_at) = 2026

-- ✅ Range query — uses index
WHERE created_at >= '2026-01-01' AND created_at < '2027-01-01'

-- ❌ Leading wildcard — can't use B-tree index
WHERE name LIKE '%alice'

-- ✅ Prefix match — uses index
WHERE name LIKE 'alice%'

Checklist Before Submitting Query

[ ] Uppercase keywords, lowercase identifiers
[ ] Meaningful table aliases (not t1/t2)
[ ] Explicit column names (no SELECT *)
[ ] WHERE filters placed correctly (not HAVING for row filters)
[ ] No function on indexed columns in WHERE
[ ] UNION ALL instead of UNION (unless dedup needed)
[ ] NULL handling explicit (COALESCE, IS NULL)
[ ] CTE for any logic used more than once

Related

  • [[Basics]] — execution order, CASE WHEN
  • [[Subqueries & CTEs]] — when to use CTEs vs subqueries
  • [[Indexes & Performance]] — index-friendly query patterns