Python’s glob module finds filesystem paths matching shell-style wildcard patterns. import glob. glob: glob.glob(pattern) → list of matching paths (strings); pattern supports * (any chars in one dir level), ? (one char), [seq] (char set). iglob: glob.iglob(pattern) → iterator (lazy); same patterns; use for large trees. Recursive: glob.glob("**/*.py", recursive=True) — ** matches zero or more directories at any level; must pass recursive=True. escape: glob.escape(path) — escape *, ?, [ in a literal path component so they are treated as literals. root_dir / dir_fd (Python 3.10+): glob.glob("*.py", root_dir="/some/path") — search relative to root_dir without changing cwd. pathlib integration: Path(".").glob("**/*.py") → iterator of Path objects; Path(".").rglob("*.py") ≡ glob("**/*.py", recursive=True) relative to that path. Performance: for large trees, prefer iglob + early break over glob which collects all results first. Claude Code generates build scripts, asset collectors, test file finders, log rotators, and batch file processors.
CLAUDE.md for glob
## glob Stack
- Stdlib: import glob; from pathlib import Path
- List: glob.glob("src/**/*.py", recursive=True)
- Lazy: for p in glob.iglob("logs/*.log"): ...
- Root: glob.glob("*.py", root_dir="/project/src") # 3.10+
- Path: list(Path("src").rglob("*.py")) # Path objects
- Escape: glob.glob(glob.escape(bracket_dir) + "/*.py")
glob File Discovery Pipeline
# app/globutil.py — discovery, collection, incremental, batch, tree summary
from __future__ import annotations
import glob as _glob
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import Callable, Generator, Iterable
# ─────────────────────────────────────────────────────────────────────────────
# 1. Pattern discovery helpers
# ─────────────────────────────────────────────────────────────────────────────
def find(pattern: str, recursive: bool = True, root: str | Path | None = None) -> list[Path]:
"""
Return sorted list of Path objects matching pattern.
Example:
find("src/**/*.py") # all Python files under src/
find("*.csv", root="/data") # CSVs in /data (non-recursive)
"""
kw: dict = {"recursive": recursive}
base = str(root) if root else None
if base:
kw["root_dir"] = base
raw = _glob.glob(pattern, **kw)
if base:
return sorted(Path(base) / p for p in raw)
return sorted(Path(p) for p in raw)
def find_iter(pattern: str, recursive: bool = True, root: str | Path | None = None) -> Generator[Path, None, None]:
"""
Yield Path objects matching pattern (lazy — suitable for large trees).
Example:
for p in find_iter("**/*.log", recursive=True):
process(p)
"""
kw: dict = {"recursive": recursive}
base = str(root) if root else None
if base:
kw["root_dir"] = base
for raw in _glob.iglob(pattern, **kw):
yield Path(base) / raw if base else Path(raw)
def find_multi(
patterns: Iterable[str],
root: str | Path | None = None,
recursive: bool = True,
) -> list[Path]:
"""
Return deduplicated sorted paths matching any of the patterns.
Example:
find_multi(["src/**/*.py", "src/**/*.pyi"])
"""
seen: set[Path] = set()
result: list[Path] = []
for pat in patterns:
for p in find(pat, recursive=recursive, root=root):
if p not in seen:
seen.add(p)
result.append(p)
return sorted(result)
def find_exclude(
include_patterns: Iterable[str],
exclude_patterns: Iterable[str],
root: str | Path | None = None,
recursive: bool = True,
) -> list[Path]:
"""
Return paths matching any include pattern but not any exclude pattern.
Example:
find_exclude(["**/*.py"], ["**/*_pb2.py", "**/*.pyc", "**/migrations/**"])
"""
import fnmatch
excl = list(exclude_patterns)
all_paths = find_multi(include_patterns, root=root, recursive=recursive)
return [
p for p in all_paths
if not any(fnmatch.fnmatch(str(p), ex) for ex in excl)
and not any(fnmatch.fnmatch(p.name, ex) for ex in excl)
]
# ─────────────────────────────────────────────────────────────────────────────
# 2. Source collection presets
# ─────────────────────────────────────────────────────────────────────────────
def python_sources(root: str | Path = ".") -> list[Path]:
"""
Return all .py and .pyi files under root, excluding __pycache__ and .egg-info.
Example:
srcs = python_sources("src")
"""
return find_exclude(
include_patterns=["**/*.py", "**/*.pyi"],
exclude_patterns=["**/__pycache__/**", "**/*.egg-info/**", "**/build/**"],
root=root,
)
def test_files(root: str | Path = ".") -> list[Path]:
"""
Return all test_*.py and *_test.py files.
Example:
tests = test_files("tests")
"""
return find_multi(["**/test_*.py", "**/*_test.py"], root=root)
def log_files(root: str | Path = ".", extension: str = "*.log") -> list[Path]:
"""
Return all log files under root.
Example:
logs = log_files("/var/log", "*.log")
"""
return find(f"**/{extension}", recursive=True, root=root)
def asset_files(root: str | Path = ".", extensions: list[str] | None = None) -> list[Path]:
"""
Return all asset files (images, CSS, JS) under root.
Example:
assets = asset_files("static", ["*.png", "*.css", "*.js"])
"""
exts = extensions or ["*.png", "*.jpg", "*.jpeg", "*.gif", "*.svg",
"*.css", "*.js", "*.woff", "*.woff2"]
return find_multi([f"**/{ext}" for ext in exts], root=root)
# ─────────────────────────────────────────────────────────────────────────────
# 3. Directory tree summary
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class ExtSummary:
extension: str
count: int
total_bytes: int
@property
def total_kb(self) -> float:
return self.total_bytes / 1024
def __str__(self) -> str:
return f"{self.extension or '(no ext)':12s}: {self.count:5d} files {self.total_kb:8.1f} KB"
def summarize_by_extension(root: str | Path = ".") -> list[ExtSummary]:
"""
Count files and total size grouped by extension under root.
Returns list sorted by total bytes descending.
Example:
for s in summarize_by_extension("src"):
print(s)
"""
from collections import defaultdict
counts: dict[str, int] = defaultdict(int)
sizes: dict[str, int] = defaultdict(int)
for p in Path(root).rglob("*"):
if p.is_file():
ext = p.suffix.lower()
counts[ext] += 1
try:
sizes[ext] += p.stat().st_size
except OSError:
pass
return sorted(
[ExtSummary(ext, counts[ext], sizes[ext]) for ext in counts],
key=lambda s: -s.total_bytes,
)
# ─────────────────────────────────────────────────────────────────────────────
# 4. Batch operations
# ─────────────────────────────────────────────────────────────────────────────
def batch_process(
pattern: str,
fn: Callable[[Path], None],
recursive: bool = True,
root: str | Path | None = None,
dry_run: bool = False,
) -> list[Path]:
"""
Apply fn to every file matching pattern. Return processed paths.
Example:
def compress(p: Path) -> None:
import gzip, shutil
shutil.copy(p, str(p) + ".bak")
# compress in place ...
batch_process("logs/**/*.log", compress)
"""
paths = find(pattern, recursive=recursive, root=root)
for p in paths:
if not dry_run:
fn(p)
return paths
def collect_content(
pattern: str,
encoding: str = "utf-8",
recursive: bool = True,
root: str | Path | None = None,
) -> dict[Path, str]:
"""
Read and return text content of all matching files keyed by Path.
Example:
sources = collect_content("src/**/*.py")
total_lines = sum(c.count("\\n") for c in sources.values())
"""
result: dict[Path, str] = {}
for p in find(pattern, recursive=recursive, root=root):
try:
result[p] = p.read_text(encoding=encoding)
except (OSError, UnicodeDecodeError):
pass
return result
def newest_files(pattern: str, n: int = 10, root: str | Path | None = None) -> list[Path]:
"""
Return up to n most-recently-modified files matching pattern.
Example:
recent = newest_files("logs/**/*.log", n=5)
"""
paths = find(pattern, recursive=True, root=root)
paths.sort(key=lambda p: p.stat().st_mtime if p.exists() else 0, reverse=True)
return paths[:n]
def largest_files(pattern: str, n: int = 10, root: str | Path | None = None) -> list[tuple[Path, int]]:
"""
Return up to n largest files matching pattern as (path, bytes) tuples.
Example:
for path, size in largest_files("**/*.log", n=5):
print(f" {size/1024:.1f} KB {path}")
"""
result = []
for p in find(pattern, recursive=True, root=root):
try:
result.append((p, p.stat().st_size))
except OSError:
pass
return sorted(result, key=lambda x: -x[1])[:n]
# ─────────────────────────────────────────────────────────────────────────────
# 5. Escape and literal helpers
# ─────────────────────────────────────────────────────────────────────────────
def literal_glob(literal_path: str | Path, pattern: str) -> list[Path]:
"""
Glob inside a directory whose name may contain glob-special characters.
Example:
# Directory named "data[2025]" contains CSV files
literal_glob("data[2025]", "*.csv")
"""
safe = _glob.escape(str(literal_path))
return find(f"{safe}/{pattern}", recursive=False)
def safe_pattern(path_prefix: str | Path, glob_suffix: str) -> str:
"""
Build a safe glob pattern by escaping a literal prefix and appending a suffix.
Example:
safe_pattern("/data/[project]/src", "**/*.py")
# '/data/[[]project[]]/src/**/*.py'
"""
return f"{_glob.escape(str(path_prefix))}/{glob_suffix}"
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import tempfile
import os
print("=== glob demo ===")
# Build a small temp tree for demo
with tempfile.TemporaryDirectory() as tmp:
root = Path(tmp)
(root / "src").mkdir()
(root / "src" / "app.py").write_text("# app")
(root / "src" / "models.py").write_text("# models")
(root / "src" / "app.pyi").write_text("# stub")
(root / "tests").mkdir()
(root / "tests" / "test_app.py").write_text("# tests")
(root / "tests" / "conftest.py").write_text("# conftest")
(root / "data").mkdir()
(root / "data" / "records.csv").write_text("a,b,c")
(root / "data" / "archive.csv").write_text("x,y,z")
(root / "static").mkdir()
(root / "static" / "style.css").write_text("body{}")
(root / "static" / "logo.png").write_bytes(b"\x89PNG")
print("\n--- find all Python files ---")
py = find("**/*.py", root=root)
for p in py:
print(f" {p.relative_to(root)}")
print("\n--- test_files ---")
for p in test_files(root):
print(f" {p.relative_to(root)}")
print("\n--- find_exclude ---")
srcs = find_exclude(["**/*.py"], ["**/test_*", "**/conftest*"], root=root)
for p in srcs:
print(f" {p.relative_to(root)}")
print("\n--- find_multi (py + csv) ---")
mixed = find_multi(["**/*.py", "**/*.csv"], root=root)
for p in mixed:
print(f" {p.relative_to(root)}")
print("\n--- summarize_by_extension ---")
for s in summarize_by_extension(root):
print(f" {s}")
print("\n--- newest_files ---")
for p in newest_files("**/*", n=3, root=root):
print(f" {p.relative_to(root)}")
print("\n--- collect_content ---")
texts = collect_content("**/*.py", root=root)
for p, content in list(texts.items())[:3]:
print(f" {p.relative_to(root)}: {content.strip()!r}")
print("\n--- glob.escape ---")
special = "data[2025]/report (v1).csv"
print(f" escape({special!r}) = {_glob.escape(special)!r}")
print("\n=== done ===")
For the pathlib alternative — Path.glob() and Path.rglob() return Path objects directly and integrate naturally with the rest of the pathlib API (.stem, .suffix, .read_text(), .stat()); stdlib glob.glob() returns plain strings — use Path.glob() / Path.rglob() in modern Python (3.4+) code where you’re already working with Path objects; use glob.glob() when you need root_dir support (3.10+) or when the pattern comes as a plain string from configuration. For the os.walk alternative — os.walk() traverses a directory tree depth-first yielding (dirpath, dirnames, filenames) tuples, giving you full control over traversal order, directory pruning, and symlink handling; glob is faster to write for simple patterns but gives no control over traversal — use os.walk() when you need to prune subdirectories (e.g., skip .git, skip large archive dirs), count files per directory, or build custom traversal logic; use glob for concise one-line file discovery by pattern. The Claude Skills 360 bundle includes glob skill sets covering find()/find_iter()/find_multi()/find_exclude() discovery helpers, python_sources()/test_files()/log_files()/asset_files() presets, summarize_by_extension() tree summary, batch_process()/collect_content()/newest_files()/largest_files() batch utilities, and literal_glob()/safe_pattern() escape helpers. Start with the free tier to try file discovery patterns and glob pipeline code generation.