Python’s ast module parses source code into an Abstract Syntax Tree for analysis and transformation. import ast. parse: tree = ast.parse(source, filename="<string>", mode="exec") — modes: "exec" (module), "eval" (single expression), "single" (interactive). dump: ast.dump(tree, indent=2) — human-readable tree string. literal_eval: ast.literal_eval("{'a': 1}") — safely evaluate literals (no arbitrary code execution). unparse: ast.unparse(node) (Python 3.9+) — tree back to source string. walk: ast.walk(tree) → iterator over all nodes. get_docstring: ast.get_docstring(node) — extract docstring from FunctionDef/ClassDef/Module. fix_missing_locations: ast.fix_missing_locations(tree) — fill in lineno/col_offset after manual node construction. NodeVisitor: class V(ast.NodeVisitor): def visit_FunctionDef(self, node): ... — override visit_X for node type X; call self.generic_visit(node) to recurse. NodeTransformer: same but can return new/modified nodes or None to delete. Key node types: Module, FunctionDef, AsyncFunctionDef, ClassDef, Assign, AnnAssign, Return, Import, ImportFrom, Call, Name, Attribute, Constant, arg. compile: code = compile(tree, "<string>", "exec") → code object. exec(code). Claude Code generates linters, security scanners, import analyzers, dead code detectors, and source-to-source transformers.
CLAUDE.md for ast
## ast Stack
- Stdlib: import ast
- Parse: tree = ast.parse(source_str)
- Safe: ast.literal_eval(s) # no code execution — literals only
- Walk: for node in ast.walk(tree):
- Visitor: class V(ast.NodeVisitor): def visit_FunctionDef(self, n): ...
- Transform: class T(ast.NodeTransformer): def visit_Call(self, n): return n
- Unparse: ast.unparse(tree) # Python 3.9+
ast Static Analysis Pipeline
# app/astutil.py — imports, function scan, call finder, linter, transformer
from __future__ import annotations
import ast
import textwrap
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
# ─────────────────────────────────────────────────────────────────────────────
# 1. Parse and dump helpers
# ─────────────────────────────────────────────────────────────────────────────
def parse_source(source: str, filename: str = "<string>") -> ast.Module:
"""
Parse Python source code into an AST Module.
Example:
tree = parse_source(Path("app.py").read_text(), "app.py")
"""
return ast.parse(source, filename=filename)
def parse_file(path: str | Path) -> ast.Module:
"""Parse a Python source file and return its AST."""
p = Path(path)
return ast.parse(p.read_text(encoding="utf-8"), filename=str(p))
def dump_ast(tree: ast.AST, indent: int = 2) -> str:
"""Return a human-readable indented dump of an AST node."""
return ast.dump(tree, indent=indent)
def safe_eval(expr: str) -> Any:
"""
Safely evaluate a Python literal expression string.
Handles str, int, float, complex, bool, None, dict, list, tuple, set.
Raises ValueError on non-literal expressions.
Example:
safe_eval("[1, 2, 3]") # [1, 2, 3]
safe_eval("{'key': 'value'}") # {"key": "value"}
safe_eval("open('f')") # ValueError — not a literal
"""
return ast.literal_eval(expr)
def back_to_source(tree: ast.AST) -> str:
"""
Convert an AST node back to Python source code (Python 3.9+).
Example:
tree = parse_source("x = 1 + 2")
code = back_to_source(tree) # "x = 1 + 2"
"""
return ast.unparse(tree)
# ─────────────────────────────────────────────────────────────────────────────
# 2. Import analysis
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class ImportInfo:
module: str # "os.path" or "json"
names: list[str] # ["path"] for "import os.path as path" or ["loads"] for "from json import loads"
aliases: dict[str, str] # {alias: real_name}
lineno: int
is_from: bool # True = "from X import Y"
def extract_imports(tree: ast.Module) -> list[ImportInfo]:
"""
Extract all import statements from an AST.
Example:
for imp in extract_imports(parse_source(source)):
print(imp.module, imp.names)
"""
results = []
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
results.append(ImportInfo(
module=alias.name,
names=[alias.asname or alias.name],
aliases={alias.asname: alias.name} if alias.asname else {},
lineno=node.lineno,
is_from=False,
))
elif isinstance(node, ast.ImportFrom):
module = node.module or ""
results.append(ImportInfo(
module=module,
names=[a.asname or a.name for a in node.names],
aliases={a.asname: a.name for a in node.names if a.asname},
lineno=node.lineno,
is_from=True,
))
return results
def third_party_imports(tree: ast.Module, stdlib_modules: set[str] | None = None) -> list[str]:
"""
Return module names that are likely third-party (not in stdlib).
stdlib_modules defaults to a small known set; pass None to skip filtering.
Example:
third = third_party_imports(parse_file("app.py"))
"""
_stdlib = stdlib_modules or {
"os", "sys", "re", "json", "math", "datetime", "pathlib", "typing",
"collections", "itertools", "functools", "io", "abc", "dataclasses",
"enum", "copy", "warnings", "logging", "hashlib", "hmac", "struct",
"queue", "threading", "asyncio", "urllib", "http", "email", "csv",
"inspect", "ast", "traceback", "gc", "platform", "random", "secrets",
"fractions", "decimal", "statistics", "operator", "string", "textwrap",
"time", "calendar", "zlib", "gzip", "zipfile", "tarfile", "array",
"weakref", "contextlib", "pprint", "pickle", "shelve", "uuid",
"socket", "ssl", "subprocess", "shutil", "tempfile", "glob", "fnmatch",
}
imports = extract_imports(tree)
third = []
for imp in imports:
top = imp.module.split(".")[0]
if top and top not in _stdlib and top not in third:
third.append(top)
return sorted(third)
# ─────────────────────────────────────────────────────────────────────────────
# 3. Function / class summary
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class FunctionSummary:
name: str
lineno: int
args: list[str]
is_async: bool
docstring: str | None
decorators: list[str]
returns: str | None
def list_functions(tree: ast.Module) -> list[FunctionSummary]:
"""
Return summaries of all top-level and nested (but not method) functions.
Example:
for fn in list_functions(parse_source(source)):
print(f" {fn.name}({', '.join(fn.args)})")
"""
results: list[FunctionSummary] = []
class Visitor(ast.NodeVisitor):
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
_collect(node, is_async=False)
self.generic_visit(node)
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
_collect(node, is_async=True)
self.generic_visit(node)
def _collect(node: ast.FunctionDef | ast.AsyncFunctionDef, is_async: bool) -> None:
args = [a.arg for a in node.args.args]
decs = []
for dec in node.decorator_list:
decs.append(ast.unparse(dec))
ret = ast.unparse(node.returns) if node.returns else None
results.append(FunctionSummary(
name=node.name,
lineno=node.lineno,
args=args,
is_async=is_async,
docstring=ast.get_docstring(node),
decorators=decs,
returns=ret,
))
Visitor().visit(tree)
return results
@dataclass
class ClassSummary:
name: str
lineno: int
bases: list[str]
methods: list[str]
docstring: str | None
def list_classes(tree: ast.Module) -> list[ClassSummary]:
"""
Return summaries of all class definitions.
Example:
for cls in list_classes(parse_file("models.py")):
print(f" class {cls.name}({', '.join(cls.bases)})")
"""
results = []
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef):
bases = [ast.unparse(b) for b in node.bases]
methods = [
n.name for n in ast.walk(node)
if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))
]
results.append(ClassSummary(
name=node.name,
lineno=node.lineno,
bases=bases,
methods=methods,
docstring=ast.get_docstring(node),
))
return results
# ─────────────────────────────────────────────────────────────────────────────
# 4. Security / linting checks
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class Issue:
rule: str
message: str
lineno: int
col: int
class BasicLinter(ast.NodeVisitor):
"""
A simple NodeVisitor-based linter demonstrating common checks.
Checks:
- SECURITY001: use of exec() or eval()
- SECURITY002: subprocess calls with shell=True
- STYLE001: bare except without specific exception type
- STYLE002: assert statements (removed in optimized bytecode)
"""
def __init__(self) -> None:
self.issues: list[Issue] = []
def _add(self, rule: str, msg: str, node: ast.AST) -> None:
self.issues.append(Issue(rule, msg, getattr(node, "lineno", 0), getattr(node, "col_offset", 0)))
def visit_Call(self, node: ast.Call) -> None:
fn = node.func
# exec / eval
if isinstance(fn, ast.Name) and fn.id in ("exec", "eval"):
self._add("SECURITY001", f"Use of {fn.id}() is potentially dangerous", node)
# subprocess with shell=True
if isinstance(fn, (ast.Name, ast.Attribute)):
fn_name = fn.id if isinstance(fn, ast.Name) else fn.attr
if fn_name in ("call", "run", "Popen", "check_call", "check_output"):
for kw in node.keywords:
if kw.arg == "shell" and isinstance(kw.value, ast.Constant) and kw.value.value:
self._add("SECURITY002", "subprocess called with shell=True (injection risk)", node)
self.generic_visit(node)
def visit_ExceptHandler(self, node: ast.ExceptHandler) -> None:
if node.type is None:
self._add("STYLE001", "Bare except clause — catches all exceptions including SystemExit", node)
self.generic_visit(node)
def visit_Assert(self, node: ast.Assert) -> None:
self._add("STYLE002", "Assert removed by -O flag — use explicit if/raise for runtime checks", node)
self.generic_visit(node)
def lint_source(source: str, filename: str = "<string>") -> list[Issue]:
"""
Run BasicLinter on source; return list of issues.
Example:
issues = lint_source(Path("script.py").read_text())
for issue in issues:
print(f" [{issue.rule}] line {issue.lineno}: {issue.message}")
"""
tree = ast.parse(source, filename=filename)
linter = BasicLinter()
linter.visit(tree)
return sorted(linter.issues, key=lambda i: i.lineno)
# ─────────────────────────────────────────────────────────────────────────────
# 5. Source transformer
# ─────────────────────────────────────────────────────────────────────────────
class PrintToLogTransformer(ast.NodeTransformer):
"""
NodeTransformer that replaces bare print() calls with logger.info() calls.
Demonstrates in-place AST transformation.
"""
def visit_Call(self, node: ast.Call) -> ast.AST:
self.generic_visit(node)
if isinstance(node.func, ast.Name) and node.func.id == "print":
# Replace print(args) → logger.info(args)
new_func = ast.Attribute(
value=ast.Name(id="logger", ctx=ast.Load()),
attr="info",
ctx=ast.Load(),
)
return ast.Call(func=new_func, args=node.args, keywords=node.keywords)
return node
def replace_print_with_logger(source: str) -> str:
"""
Return source with bare print() calls replaced by logger.info() calls.
Example:
new_source = replace_print_with_logger('print("hello")')
# → 'logger.info("hello")'
"""
tree = ast.parse(source)
transformer = PrintToLogTransformer()
new_tree = transformer.visit(tree)
ast.fix_missing_locations(new_tree)
return ast.unparse(new_tree)
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
source = textwrap.dedent("""\
import os
import json
from datetime import datetime
import requests # third-party
class Config:
\"\"\"Application configuration.\"\"\"
host: str = "localhost"
port: int = 8080
def load(self, path: str) -> dict:
\"\"\"Load config from a JSON file.\"\"\"
with open(path) as f:
return json.load(f)
async def fetch_data(url: str, timeout: int = 30) -> dict:
\"\"\"Fetch JSON from a URL.\"\"\"
result = await client.get(url, timeout=timeout)
return result.json()
def risky(code):
eval(code)
exec(code)
def bad_except():
try:
pass
except:
pass
assert True, "sanity"
print("Starting up")
""")
print("=== ast demo ===")
print("\n--- parse + dump (first 10 lines) ---")
tree = parse_source(source, "demo.py")
dump = dump_ast(tree, indent=2)
print("\n".join(dump.splitlines()[:10]) + "\n ...")
print("\n--- extract_imports ---")
for imp in extract_imports(tree):
print(f" line {imp.lineno}: {'from ' if imp.is_from else ''}{imp.module} → {imp.names}")
print("\n--- third_party_imports ---")
print(f" {third_party_imports(tree)}")
print("\n--- list_functions ---")
for fn in list_functions(tree):
async_label = "async " if fn.is_async else ""
print(f" {async_label}def {fn.name}({', '.join(fn.args)}) → {fn.returns} line={fn.lineno}")
print("\n--- list_classes ---")
for cls in list_classes(tree):
print(f" class {cls.name}({', '.join(cls.bases)}) methods={cls.methods}")
print("\n--- lint_source ---")
issues = lint_source(source)
for issue in issues:
print(f" [{issue.rule}] line {issue.lineno}: {issue.message}")
print("\n--- replace_print_with_logger ---")
transformed = replace_print_with_logger('print("Starting up")\nprint("Done")')
print(f" {transformed!r}")
print("\n--- safe_eval ---")
print(f" safe_eval('[1, 2, 3]') = {safe_eval('[1, 2, 3]')}")
print(f" safe_eval(\"{'a': 1}\") = {safe_eval(\"{'a': 1}\")}")
try:
safe_eval("__import__('os').system('echo hi')")
except ValueError as e:
print(f" malicious eval blocked: {type(e).__name__}")
print("\n--- back_to_source (round-trip) ---")
simple = "x = 1 + 2 * 3"
rt = back_to_source(parse_source(simple))
print(f" '{simple}' → '{rt}'")
print("\n=== done ===")
For the inspect alternative — inspect.getsource() retrieves source code of live, already-imported objects and provides signature(), getmembers(), and call-stack introspection; ast parses source text without importing or executing it — use inspect when you have a live object and need its runtime interface; use ast for static analysis, linters, code scanners, and transformation tools that must work on arbitrary source files without running them. For the libcst / rope alternative — libcst (PyPI) provides a Concrete Syntax Tree that preserves all whitespace, comments, and formatting, enabling precise source-faithful refactoring; rope (PyPI) provides a full Python refactoring toolkit (rename, extract, inline, move); the stdlib ast loses formatting on round-trip through unparse — use libcst when transformations must preserve the existing code style (comments, blank lines, trailing commas); use rope for editor-integrated rename and extract refactoring; use ast for lightweight analysis, security scanning, and transformations where reformatting is acceptable. The Claude Skills 360 bundle includes ast skill sets covering parse_source()/parse_file()/dump_ast()/safe_eval()/back_to_source() core helpers, extract_imports()/third_party_imports() import analysis, list_functions()/list_classes() structure extraction, BasicLinter NodeVisitor for SECURITY001/SECURITY002/STYLE001/STYLE002 checks, and PrintToLogTransformer NodeTransformer/replace_print_with_logger() source transformation. Start with the free tier to try Python source analysis patterns and ast pipeline code generation.