PostgreSQL Mastery: Advanced Database Guide
PostgreSQL is the world's most advanced open-source relational database. This guide covers advanced features and best practices for building high-performance database applications.
Why PostgreSQL?
- 🚀 ACID Compliant - Reliable transactions
- 📊 Advanced Features - JSON, full-text search, geospatial
- 🔒 Robust - Data integrity and concurrency
- 🌍 Open Source - Free and community-driven
- 📈 Scalable - From small apps to enterprise systems
Installation
# macOS
brew install postgresql@15
brew services start postgresql@15
# Ubuntu
sudo apt-get update
sudo apt-get install postgresql postgresql-contrib
# Start PostgreSQL
sudo systemctl start postgresql
# Connect to PostgreSQL
psql -U postgres
Basic Operations
Database Management
-- Create database
CREATE DATABASE myapp;
-- List databases
\l
-- Connect to database
\c myapp
-- Drop database
DROP DATABASE myapp;
-- Create user
CREATE USER myuser WITH PASSWORD 'password';
-- Grant privileges
GRANT ALL PRIVILEGES ON DATABASE myapp TO myuser;
-- Revoke privileges
REVOKE ALL PRIVILEGES ON DATABASE myapp FROM myuser;
Table Operations
-- Create table
CREATE TABLE users (
id SERIAL PRIMARY KEY,
username VARCHAR(50) UNIQUE NOT NULL,
email VARCHAR(100) UNIQUE NOT NULL,
password_hash VARCHAR(255) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Add column
ALTER TABLE users ADD COLUMN last_login TIMESTAMP;
-- Modify column
ALTER TABLE users ALTER COLUMN email TYPE VARCHAR(150);
-- Drop column
ALTER TABLE users DROP COLUMN last_login;
-- Rename table
ALTER TABLE users RENAME TO app_users;
-- Drop table
DROP TABLE users;
-- List tables
\dt
-- Describe table
\d users
Advanced Data Types
JSON and JSONB
-- Create table with JSONB
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
attributes JSONB
);
-- Insert JSON data
INSERT INTO products (name, attributes) VALUES
('Laptop', '{"brand": "Dell", "ram": "16GB", "storage": "512GB SSD"}'),
('Phone', '{"brand": "Apple", "model": "iPhone 14", "color": "black"}');
-- Query JSON fields
SELECT name, attributes->>'brand' AS brand
FROM products;
-- Query nested JSON
SELECT name, attributes->'specs'->>'cpu' AS cpu
FROM products;
-- JSON array operations
SELECT name
FROM products
WHERE attributes->'features' @> '["bluetooth"]';
-- Update JSON field
UPDATE products
SET attributes = jsonb_set(attributes, '{ram}', '"32GB"')
WHERE id = 1;
-- Add JSON field
UPDATE products
SET attributes = attributes || '{"warranty": "2 years"}'
WHERE id = 1;
-- Remove JSON field
UPDATE products
SET attributes = attributes - 'warranty'
WHERE id = 1;
Arrays
-- Create table with array
CREATE TABLE posts (
id SERIAL PRIMARY KEY,
title VARCHAR(200),
tags TEXT[]
);
-- Insert array data
INSERT INTO posts (title, tags) VALUES
('PostgreSQL Guide', ARRAY['database', 'sql', 'postgres']),
('Docker Tutorial', ARRAY['docker', 'containers', 'devops']);
-- Query arrays
SELECT * FROM posts WHERE 'database' = ANY(tags);
-- Array functions
SELECT title, array_length(tags, 1) AS tag_count
FROM posts;
-- Append to array
UPDATE posts
SET tags = array_append(tags, 'tutorial')
WHERE id = 1;
-- Remove from array
UPDATE posts
SET tags = array_remove(tags, 'sql')
WHERE id = 1;
UUID
-- Enable UUID extension
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- Create table with UUID
CREATE TABLE sessions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
user_id INTEGER,
token VARCHAR(255),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Insert with auto-generated UUID
INSERT INTO sessions (user_id, token)
VALUES (1, 'abc123');
Full-Text Search
-- Create table for full-text search
CREATE TABLE articles (
id SERIAL PRIMARY KEY,
title TEXT,
content TEXT,
search_vector tsvector
);
-- Create GIN index for full-text search
CREATE INDEX articles_search_idx ON articles USING GIN(search_vector);
-- Insert data with search vector
INSERT INTO articles (title, content, search_vector) VALUES
('PostgreSQL Tutorial', 'Learn PostgreSQL database...',
to_tsvector('english', 'PostgreSQL Tutorial Learn PostgreSQL database'));
-- Update search vector automatically
CREATE TRIGGER articles_search_update
BEFORE INSERT OR UPDATE ON articles
FOR EACH ROW EXECUTE FUNCTION
tsvector_update_trigger(search_vector, 'pg_catalog.english', title, content);
-- Search query
SELECT title, content
FROM articles
WHERE search_vector @@ to_tsquery('english', 'postgresql & database');
-- Ranked search
SELECT title, ts_rank(search_vector, query) AS rank
FROM articles, to_tsquery('english', 'postgresql & database') query
WHERE search_vector @@ query
ORDER BY rank DESC;
Advanced Queries
Window Functions
-- Row number
SELECT
name,
salary,
ROW_NUMBER() OVER (ORDER BY salary DESC) as row_num
FROM employees;
-- Rank with gaps
SELECT
name,
department,
salary,
RANK() OVER (PARTITION BY department ORDER BY salary DESC) as rank
FROM employees;
-- Running total
SELECT
date,
amount,
SUM(amount) OVER (ORDER BY date) as running_total
FROM sales;
-- Moving average
SELECT
date,
amount,
AVG(amount) OVER (
ORDER BY date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as moving_avg_7_days
FROM sales;
-- Lag and Lead
SELECT
date,
amount,
LAG(amount, 1) OVER (ORDER BY date) as previous_day,
LEAD(amount, 1) OVER (ORDER BY date) as next_day
FROM sales;
Common Table Expressions (CTEs)
-- Basic CTE
WITH high_earners AS (
SELECT * FROM employees WHERE salary > 100000
)
SELECT department, COUNT(*) as count
FROM high_earners
GROUP BY department;
-- Recursive CTE (Organization hierarchy)
WITH RECURSIVE employee_hierarchy AS (
-- Base case: top-level managers
SELECT id, name, manager_id, 1 as level
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case
SELECT e.id, e.name, e.manager_id, eh.level + 1
FROM employees e
JOIN employee_hierarchy eh ON e.manager_id = eh.id
)
SELECT * FROM employee_hierarchy
ORDER BY level, name;
Lateral Joins
-- Get top 3 products per category
SELECT c.name as category, p.name as product, p.price
FROM categories c
CROSS JOIN LATERAL (
SELECT name, price
FROM products
WHERE category_id = c.id
ORDER BY price DESC
LIMIT 3
) p;
Indexing Strategies
Index Types
-- B-tree index (default)
CREATE INDEX idx_users_email ON users(email);
-- Unique index
CREATE UNIQUE INDEX idx_users_username ON users(username);
-- Partial index
CREATE INDEX idx_active_users ON users(email)
WHERE status = 'active';
-- Composite index
CREATE INDEX idx_users_name_email ON users(last_name, first_name, email);
-- GIN index for JSON
CREATE INDEX idx_products_attributes ON products USING GIN(attributes);
-- GiST index for full-text search
CREATE INDEX idx_articles_content ON articles USING GIST(search_vector);
-- Hash index
CREATE INDEX idx_users_id_hash ON users USING HASH(id);
-- Expression index
CREATE INDEX idx_users_lower_email ON users(LOWER(email));
Index Management
-- List indexes
SELECT * FROM pg_indexes WHERE tablename = 'users';
-- Analyze index usage
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch
FROM pg_stat_user_indexes
WHERE tablename = 'users';
-- Drop index
DROP INDEX idx_users_email;
-- Reindex
REINDEX TABLE users;
-- Concurrent index creation (no table lock)
CREATE INDEX CONCURRENTLY idx_users_created_at ON users(created_at);
Performance Optimization
EXPLAIN and ANALYZE
-- Explain query plan
EXPLAIN SELECT * FROM users WHERE email = 'test@example.com';
-- Analyze actual execution
EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'test@example.com';
-- Detailed analysis
EXPLAIN (ANALYZE, BUFFERS, VERBOSE)
SELECT u.*, p.title
FROM users u
JOIN posts p ON u.id = p.user_id
WHERE u.created_at > '2024-01-01';
Query Optimization
-- Use EXISTS instead of IN for large datasets
-- Slow
SELECT * FROM users
WHERE id IN (SELECT user_id FROM orders WHERE total > 1000);
-- Fast
SELECT * FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.user_id = u.id AND o.total > 1000
);
-- Use LIMIT for pagination
SELECT * FROM posts
ORDER BY created_at DESC
LIMIT 20 OFFSET 40;
-- Better pagination with keyset
SELECT * FROM posts
WHERE created_at < '2024-01-01'
ORDER BY created_at DESC
LIMIT 20;
Vacuum and Analyze
-- Manual vacuum
VACUUM users;
-- Vacuum with analyze
VACUUM ANALYZE users;
-- Full vacuum (locks table)
VACUUM FULL users;
-- Auto-vacuum settings (postgresql.conf)
autovacuum = on
autovacuum_max_workers = 3
autovacuum_naptime = 1min
Transactions and Concurrency
Transaction Isolation Levels
-- Read Committed (default)
BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;
-- Your queries
COMMIT;
-- Repeatable Read
BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
-- Your queries
COMMIT;
-- Serializable
BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;
-- Your queries
COMMIT;
Locks
-- Explicit row lock
BEGIN;
SELECT * FROM accounts WHERE id = 1 FOR UPDATE;
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
COMMIT;
-- Advisory locks
SELECT pg_advisory_lock(123);
-- Critical section
SELECT pg_advisory_unlock(123);
Replication and High Availability
Streaming Replication Setup
-- On primary server (postgresql.conf)
wal_level = replica
max_wal_senders = 3
wal_keep_size = 64
-- Create replication user
CREATE USER replicator WITH REPLICATION ENCRYPTED PASSWORD 'password';
-- On standby server (recovery.conf)
standby_mode = 'on'
primary_conninfo = 'host=primary_host port=5432 user=replicator password=password'
Logical Replication
-- On publisher
CREATE PUBLICATION my_publication FOR TABLE users, posts;
-- On subscriber
CREATE SUBSCRIPTION my_subscription
CONNECTION 'host=publisher_host dbname=mydb user=replicator password=password'
PUBLICATION my_publication;
Backup and Restore
# Backup database
pg_dump mydb > mydb_backup.sql
# Backup with compression
pg_dump mydb | gzip > mydb_backup.sql.gz
# Backup specific tables
pg_dump -t users -t posts mydb > tables_backup.sql
# Restore database
psql mydb < mydb_backup.sql
# Restore compressed backup
gunzip -c mydb_backup.sql.gz | psql mydb
# Continuous archiving (PITR)
# postgresql.conf
archive_mode = on
archive_command = 'cp %p /archive/%f'
Using PostgreSQL with Node.js
const { Pool } = require('pg');
const pool = new Pool({
host: 'localhost',
port: 5432,
database: 'myapp',
user: 'postgres',
password: 'password',
max: 20,
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000,
});
// Query
async function getUsers() {
const result = await pool.query('SELECT * FROM users LIMIT 10');
return result.rows;
}
// Parameterized query
async function getUserById(id) {
const result = await pool.query(
'SELECT * FROM users WHERE id = $1',
[id]
);
return result.rows[0];
}
// Transaction
async function transferMoney(fromId, toId, amount) {
const client = await pool.connect();
try {
await client.query('BEGIN');
await client.query(
'UPDATE accounts SET balance = balance - $1 WHERE id = $2',
[amount, fromId]
);
await client.query(
'UPDATE accounts SET balance = balance + $1 WHERE id = $2',
[amount, toId]
);
await client.query('COMMIT');
} catch (error) {
await client.query('ROLLBACK');
throw error;
} finally {
client.release();
}
}
// Prepared statements
async function getUsersPrepared() {
const queryName = 'get-users';
const queryText = 'SELECT * FROM users WHERE status = $1';
const result = await pool.query({
name: queryName,
text: queryText,
values: ['active']
});
return result.rows;
}
Best Practices
- Use Connection Pooling - Reuse database connections
- Parameterize Queries - Prevent SQL injection
- Create Appropriate Indexes - Speed up queries
- Regular VACUUM - Maintain database health
- Monitor Performance - Use pg_stat views
- Backup Regularly - Protect your data
- Use Constraints - Ensure data integrity
- Optimize Queries - Use EXPLAIN ANALYZE
Conclusion
PostgreSQL is a powerful, feature-rich database system. Master these advanced concepts to build high-performance, scalable database applications.
Resources
Enjoyed this article?
Explore more deep dives into architecture, performance, and modern .NET.