sqlglot parses, formats, and transpiles SQL across 20+ dialects. pip install sqlglot. Parse: import sqlglot; ast = sqlglot.parse_one("SELECT a, b FROM t WHERE a > 1"). Transpile: sqlglot.transpile("SELECT EPOCH_MS(1000)", read="duckdb", write="bigquery") → ["SELECT TIMESTAMP_MILLIS(1000)"]. Dialects: spark, bigquery, snowflake, duckdb, postgres, mysql, sqlite, tsql, hive, presto, trino, clickhouse, redshift, oracle. Pretty print: sqlglot.parse_one(sql).sql(dialect="postgres", pretty=True). Generate: sqlglot.generate(ast, dialect="mysql"). Tables: [t.name for t in ast.find_all(sqlglot.exp.Table)]. Columns: [c.name for c in ast.find_all(sqlglot.exp.Column)]. Transform: ast.transform(lambda n: n.replace(n.alias_or_name.upper()) if isinstance(n, exp.Column) else n). Select: sqlglot.select("a", "b").from_("t").where("a > 1").sql() — builder. JOIN: sqlglot.select("*").from_("t1").join("t2", on="t1.id = t2.id"). Diff: sqlglot.diff(ast1, ast2) → list of changes. Optimizer: from sqlglot.optimizer import optimize; optimize(sql, schema={"t": {"a": "INT"}}). qualify_columns pushdown_predicates eliminate_subqueries. Errors: sqlglot.errors.SqlglotError. sqlglot.errors.ParseError. Claude Code generates sqlglot transpilers, SQL validators, and multi-dialect query builders.
CLAUDE.md for sqlglot
## sqlglot Stack
- Version: sqlglot >= 23 | pip install sqlglot
- Parse: sqlglot.parse_one(sql, dialect="bigquery") → Expression AST
- Transpile: sqlglot.transpile(sql, read="snowflake", write="postgres") → [sql_str]
- Generate: ast.sql(dialect="duckdb", pretty=True) | sqlglot.generate(ast)
- Extract: ast.find_all(exp.Table) | ast.find_all(exp.Column)
- Build: sqlglot.select("*").from_("t").where("x > 1").sql()
- Dialects: bigquery, snowflake, spark, duckdb, postgres, mysql, sqlite, tsql
sqlglot SQL Transpilation Pipeline
# app/sql_tools.py — sqlglot parsing, transpilation, extraction, and builder
from __future__ import annotations
from typing import Any
import sqlglot
import sqlglot.expressions as exp
from sqlglot.errors import SqlglotError
# ─────────────────────────────────────────────────────────────────────────────
# 1. Parsing and transpilation
# ─────────────────────────────────────────────────────────────────────────────
def parse(sql: str, dialect: str | None = None) -> exp.Expression:
"""
Parse a SQL string into a sqlglot AST.
dialect: the source SQL dialect (e.g. "bigquery", "snowflake", "duckdb").
Raises sqlglot.errors.ParseError on invalid SQL.
"""
return sqlglot.parse_one(sql, dialect=dialect)
def transpile(
sql: str,
from_dialect: str,
to_dialect: str,
pretty: bool = False,
) -> str:
"""
Convert SQL from one dialect to another.
Returns the transpiled SQL string.
"SELECT EPOCH_MS(1000)" duckdb → bigquery: "SELECT TIMESTAMP_MILLIS(1000)"
"""
results = sqlglot.transpile(sql, read=from_dialect, write=to_dialect, pretty=pretty)
return results[0] if results else ""
def to_postgres(sql: str, from_dialect: str = "bigquery", pretty: bool = True) -> str:
"""Transpile any dialect to PostgreSQL."""
return transpile(sql, from_dialect=from_dialect, to_dialect="postgres", pretty=pretty)
def to_duckdb(sql: str, from_dialect: str = "bigquery", pretty: bool = False) -> str:
"""Transpile any dialect to DuckDB (useful for local testing of BigQuery/Snowflake SQL)."""
return transpile(sql, from_dialect=from_dialect, to_dialect="duckdb", pretty=pretty)
def to_sqlite(sql: str, from_dialect: str = "postgres", pretty: bool = False) -> str:
"""Transpile to SQLite (useful for test environments)."""
return transpile(sql, from_dialect=from_dialect, to_dialect="sqlite", pretty=pretty)
def normalize(sql: str, dialect: str | None = None, pretty: bool = True) -> str:
"""
Parse and re-generate SQL in normalized/canonical form.
Useful for SQL comparison and version-control diff reduction.
"""
ast = parse(sql, dialect=dialect)
return ast.sql(dialect=dialect, pretty=pretty)
def is_valid_sql(sql: str, dialect: str | None = None) -> bool:
"""Return True if the SQL string parses without errors."""
try:
sqlglot.parse_one(sql, dialect=dialect)
return True
except SqlglotError:
return False
# ─────────────────────────────────────────────────────────────────────────────
# 2. AST extraction
# ─────────────────────────────────────────────────────────────────────────────
def extract_tables(sql: str, dialect: str | None = None) -> list[str]:
"""
Extract all table names referenced in a SQL query.
Includes tables in FROM, JOIN, and subqueries.
"""
ast = parse(sql, dialect=dialect)
return sorted(set(
t.name
for t in ast.find_all(exp.Table)
if t.name
))
def extract_columns(sql: str, dialect: str | None = None) -> list[str]:
"""
Extract all column references from a SQL query.
Returns unqualified column names.
"""
ast = parse(sql, dialect=dialect)
return sorted(set(
c.name
for c in ast.find_all(exp.Column)
if c.name
))
def extract_select_columns(sql: str, dialect: str | None = None) -> list[dict[str, str | None]]:
"""
Extract projected columns from the SELECT clause.
Returns [{"expression", "alias"}].
"""
ast = parse(sql, dialect=dialect)
result = []
if isinstance(ast, exp.Select):
for sel in ast.expressions:
alias = sel.alias if hasattr(sel, "alias") else None
expr = sel.sql(dialect=dialect)
result.append({"expression": expr, "alias": alias})
return result
def extract_where_columns(sql: str, dialect: str | None = None) -> list[str]:
"""Extract column names referenced in the WHERE clause."""
ast = parse(sql, dialect=dialect)
where = ast.find(exp.Where)
if where is None:
return []
return sorted(set(c.name for c in where.find_all(exp.Column) if c.name))
def extract_ctes(sql: str, dialect: str | None = None) -> list[str]:
"""Return CTE names defined in a WITH clause."""
ast = parse(sql, dialect=dialect)
with_clause = ast.find(exp.With)
if with_clause is None:
return []
return [cte.alias for cte in with_clause.find_all(exp.CTE) if cte.alias]
# ─────────────────────────────────────────────────────────────────────────────
# 3. SQL builder
# ─────────────────────────────────────────────────────────────────────────────
def build_select(
table: str,
columns: list[str] | None = None,
where: str | None = None,
order_by: list[str] | None = None,
limit: int | None = None,
dialect: str = "postgres",
) -> str:
"""
Build a SELECT statement using sqlglot's builder API.
Generates safe, dialect-aware SQL without string concatenation.
"""
sel = sqlglot.select(*(columns or ["*"])).from_(table)
if where:
sel = sel.where(where)
if order_by:
sel = sel.order_by(*order_by)
if limit is not None:
sel = sel.limit(limit)
return sel.sql(dialect=dialect, pretty=True)
def build_join(
left: str,
right: str,
on: str,
columns: list[str] | None = None,
join_type: str = "inner",
dialect: str = "postgres",
) -> str:
"""Build a SELECT with a JOIN."""
sel = sqlglot.select(*(columns or ["*"])).from_(left)
sel = sel.join(right, on=on, join_type=join_type)
return sel.sql(dialect=dialect, pretty=True)
def add_row_number(
sql: str,
partition_by: list[str],
order_by: list[str],
alias: str = "rn",
dialect: str = "postgres",
) -> str:
"""
Wrap a query to add a ROW_NUMBER() window function column.
Useful for deduplication or pagination queries.
"""
part = ", ".join(partition_by)
ordr = ", ".join(order_by)
window = f"ROW_NUMBER() OVER (PARTITION BY {part} ORDER BY {ordr}) AS {alias}"
ast = parse(sql, dialect=dialect)
if isinstance(ast, exp.Select):
ast = ast.select(window)
return ast.sql(dialect=dialect, pretty=True)
# ─────────────────────────────────────────────────────────────────────────────
# 4. Multi-dialect batch transpilation
# ─────────────────────────────────────────────────────────────────────────────
def transpile_batch(
queries: list[str],
from_dialect: str,
to_dialect: str,
skip_errors: bool = True,
) -> list[dict[str, Any]]:
"""
Transpile a list of SQL queries from one dialect to another.
Returns [{"original", "transpiled", "error"}].
"""
results = []
for sql in queries:
try:
transpiled = transpile(sql, from_dialect=from_dialect, to_dialect=to_dialect)
results.append({"original": sql, "transpiled": transpiled, "error": None})
except SqlglotError as e:
if not skip_errors:
raise
results.append({"original": sql, "transpiled": None, "error": str(e)})
return results
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== Transpilation ===")
migration_queries = [
("SELECT TIMESTAMP_DIFF(a, b, DAY)", "bigquery", "postgres"),
("SELECT DATE_TRUNC('month', created_at)", "postgres", "bigquery"),
("SELECT TO_TIMESTAMP(epoch_col)", "postgres", "duckdb"),
("SELECT IFNULL(a, 0)", "mysql", "postgres"),
("SELECT NVL(a, 0)", "snowflake", "postgres"),
("SELECT TOP 10 * FROM t", "tsql", "postgres"),
("SELECT * FROM t LIMIT 10 OFFSET 20", "postgres", "mysql"),
("SELECT EPOCH_MS(ts)", "duckdb", "bigquery"),
]
for sql, src, dst in migration_queries:
try:
result = transpile(sql, from_dialect=src, to_dialect=dst)
print(f" [{src:10} → {dst:10}] {sql!r:45} → {result!r}")
except SqlglotError as e:
print(f" [{src:10} → {dst:10}] PARSE ERROR: {e}")
print("\n=== Extraction ===")
complex_sql = """
WITH cte AS (
SELECT u.id, u.name, o.total
FROM users u
JOIN orders o ON u.id = o.user_id
WHERE u.active = TRUE AND o.total > 100
)
SELECT id, name, total FROM cte ORDER BY total DESC LIMIT 5
"""
print(f" tables: {extract_tables(complex_sql)}")
print(f" columns: {extract_columns(complex_sql)}")
print(f" ctes: {extract_ctes(complex_sql)}")
print(f" where cols: {extract_where_columns(complex_sql)}")
print("\n=== Builder ===")
query = build_select(
table="orders",
columns=["user_id", "SUM(total) AS total_spend"],
where="created_at > '2024-01-01'",
order_by=["total_spend DESC"],
limit=10,
)
print(query)
print("\n=== Normalize (format + deduplicate whitespace) ===")
messy = "select u.ID,u.NAME from USERS u where u.ID>5"
print(f" Input: {messy!r}")
print(f" Normal: {normalize(messy)!r}")
print("\n=== Validation ===")
sqls = [
"SELECT a, b FROM t WHERE a > 1",
"SELECT FROM WHERE",
"INSERT INTO t (a,b) VALUES (1,2)",
]
for sql in sqls:
print(f" valid={is_valid_sql(sql)} {sql!r}")
For the sqlparse alternative — sqlparse can tokenize and format SQL but doesn’t parse into a full AST or understand SQL semantics; it cannot transpile dialects, extract table/column references reliably from nested queries, or validate SQL; sqlglot builds a proper expression tree so ast.find_all(exp.Table) correctly traverses subqueries, CTEs, and JOINs that sqlparse’s token-level approach would miss. For the sqlalchemy.text() alternative — SQLAlchemy’s text() construct sends raw SQL to the database, relying on the DB driver for dialect-specific execution; sqlglot operates purely in Python before touching any database — useful for SQL migration scripts, query validation in CI, extracting dependencies for access control, and building query builders that emit dialect-correct SQL. The Claude Skills 360 bundle includes sqlglot skill sets covering sqlglot.parse_one() and transpile(), to_postgres()/to_duckdb()/to_sqlite() shorthand converters, normalize() for canonical formatting, is_valid_sql() validation, extract_tables()/extract_columns()/extract_ctes()/extract_where_columns(), build_select() and build_join() query builders, add_row_number() window function wrapper, transpile_batch() multi-query converter, and dialect coverage for BigQuery/Snowflake/DuckDB/Spark/PostgreSQL/MySQL/SQLite/tsql. Start with the free tier to try SQL parsing and transpilation code generation.