Blog / AI / Claude Code for glob: Python Filesystem Glob Pattern Matching

Claude Code for glob: Python Filesystem Glob Pattern Matching

Published: August 24, 2028

•

Read time: 5 min read

•

By: Claude Skills 360

Python’s glob module finds filesystem paths matching shell-style wildcard patterns. import glob. glob: glob.glob(pattern) → list of matching paths (strings); pattern supports * (any chars in one dir level), ? (one char), [seq] (char set). iglob: glob.iglob(pattern) → iterator (lazy); same patterns; use for large trees. Recursive: glob.glob("**/*.py", recursive=True) — ** matches zero or more directories at any level; must pass recursive=True. escape: glob.escape(path) — escape *, ?, [ in a literal path component so they are treated as literals. root_dir / dir_fd (Python 3.10+): glob.glob("*.py", root_dir="/some/path") — search relative to root_dir without changing cwd. pathlib integration: Path(".").glob("**/*.py") → iterator of Path objects; Path(".").rglob("*.py") ≡ glob("**/*.py", recursive=True) relative to that path. Performance: for large trees, prefer iglob + early break over glob which collects all results first. Claude Code generates build scripts, asset collectors, test file finders, log rotators, and batch file processors.

CLAUDE.md for glob

## glob Stack
- Stdlib: import glob; from pathlib import Path
- List:    glob.glob("src/**/*.py", recursive=True)
- Lazy:    for p in glob.iglob("logs/*.log"): ...
- Root:    glob.glob("*.py", root_dir="/project/src")  # 3.10+
- Path:    list(Path("src").rglob("*.py"))  # Path objects
- Escape:  glob.glob(glob.escape(bracket_dir) + "/*.py")

glob File Discovery Pipeline

# app/globutil.py — discovery, collection, incremental, batch, tree summary
from __future__ import annotations

import glob as _glob
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import Callable, Generator, Iterable


# ─────────────────────────────────────────────────────────────────────────────
# 1. Pattern discovery helpers
# ─────────────────────────────────────────────────────────────────────────────

def find(pattern: str, recursive: bool = True, root: str | Path | None = None) -> list[Path]:
    """
    Return sorted list of Path objects matching pattern.

    Example:
        find("src/**/*.py")           # all Python files under src/
        find("*.csv", root="/data")   # CSVs in /data (non-recursive)
    """
    kw: dict = {"recursive": recursive}
    base = str(root) if root else None
    if base:
        kw["root_dir"] = base
    raw = _glob.glob(pattern, **kw)
    if base:
        return sorted(Path(base) / p for p in raw)
    return sorted(Path(p) for p in raw)


def find_iter(pattern: str, recursive: bool = True, root: str | Path | None = None) -> Generator[Path, None, None]:
    """
    Yield Path objects matching pattern (lazy — suitable for large trees).

    Example:
        for p in find_iter("**/*.log", recursive=True):
            process(p)
    """
    kw: dict = {"recursive": recursive}
    base = str(root) if root else None
    if base:
        kw["root_dir"] = base
    for raw in _glob.iglob(pattern, **kw):
        yield Path(base) / raw if base else Path(raw)


def find_multi(
    patterns: Iterable[str],
    root: str | Path | None = None,
    recursive: bool = True,
) -> list[Path]:
    """
    Return deduplicated sorted paths matching any of the patterns.

    Example:
        find_multi(["src/**/*.py", "src/**/*.pyi"])
    """
    seen: set[Path] = set()
    result: list[Path] = []
    for pat in patterns:
        for p in find(pat, recursive=recursive, root=root):
            if p not in seen:
                seen.add(p)
                result.append(p)
    return sorted(result)


def find_exclude(
    include_patterns: Iterable[str],
    exclude_patterns: Iterable[str],
    root: str | Path | None = None,
    recursive: bool = True,
) -> list[Path]:
    """
    Return paths matching any include pattern but not any exclude pattern.

    Example:
        find_exclude(["**/*.py"], ["**/*_pb2.py", "**/*.pyc", "**/migrations/**"])
    """
    import fnmatch
    excl = list(exclude_patterns)
    all_paths = find_multi(include_patterns, root=root, recursive=recursive)
    return [
        p for p in all_paths
        if not any(fnmatch.fnmatch(str(p), ex) for ex in excl)
        and not any(fnmatch.fnmatch(p.name, ex) for ex in excl)
    ]


# ─────────────────────────────────────────────────────────────────────────────
# 2. Source collection presets
# ─────────────────────────────────────────────────────────────────────────────

def python_sources(root: str | Path = ".") -> list[Path]:
    """
    Return all .py and .pyi files under root, excluding __pycache__ and .egg-info.

    Example:
        srcs = python_sources("src")
    """
    return find_exclude(
        include_patterns=["**/*.py", "**/*.pyi"],
        exclude_patterns=["**/__pycache__/**", "**/*.egg-info/**", "**/build/**"],
        root=root,
    )


def test_files(root: str | Path = ".") -> list[Path]:
    """
    Return all test_*.py and *_test.py files.

    Example:
        tests = test_files("tests")
    """
    return find_multi(["**/test_*.py", "**/*_test.py"], root=root)


def log_files(root: str | Path = ".", extension: str = "*.log") -> list[Path]:
    """
    Return all log files under root.

    Example:
        logs = log_files("/var/log", "*.log")
    """
    return find(f"**/{extension}", recursive=True, root=root)


def asset_files(root: str | Path = ".", extensions: list[str] | None = None) -> list[Path]:
    """
    Return all asset files (images, CSS, JS) under root.

    Example:
        assets = asset_files("static", ["*.png", "*.css", "*.js"])
    """
    exts = extensions or ["*.png", "*.jpg", "*.jpeg", "*.gif", "*.svg",
                           "*.css", "*.js", "*.woff", "*.woff2"]
    return find_multi([f"**/{ext}" for ext in exts], root=root)


# ─────────────────────────────────────────────────────────────────────────────
# 3. Directory tree summary
# ─────────────────────────────────────────────────────────────────────────────

@dataclass
class ExtSummary:
    extension: str
    count:     int
    total_bytes: int

    @property
    def total_kb(self) -> float:
        return self.total_bytes / 1024

    def __str__(self) -> str:
        return f"{self.extension or '(no ext)':12s}: {self.count:5d} files  {self.total_kb:8.1f} KB"


def summarize_by_extension(root: str | Path = ".") -> list[ExtSummary]:
    """
    Count files and total size grouped by extension under root.
    Returns list sorted by total bytes descending.

    Example:
        for s in summarize_by_extension("src"):
            print(s)
    """
    from collections import defaultdict
    counts: dict[str, int] = defaultdict(int)
    sizes:  dict[str, int] = defaultdict(int)
    for p in Path(root).rglob("*"):
        if p.is_file():
            ext = p.suffix.lower()
            counts[ext] += 1
            try:
                sizes[ext] += p.stat().st_size
            except OSError:
                pass
    return sorted(
        [ExtSummary(ext, counts[ext], sizes[ext]) for ext in counts],
        key=lambda s: -s.total_bytes,
    )


# ─────────────────────────────────────────────────────────────────────────────
# 4. Batch operations
# ─────────────────────────────────────────────────────────────────────────────

def batch_process(
    pattern: str,
    fn: Callable[[Path], None],
    recursive: bool = True,
    root: str | Path | None = None,
    dry_run: bool = False,
) -> list[Path]:
    """
    Apply fn to every file matching pattern. Return processed paths.

    Example:
        def compress(p: Path) -> None:
            import gzip, shutil
            shutil.copy(p, str(p) + ".bak")
            # compress in place ...

        batch_process("logs/**/*.log", compress)
    """
    paths = find(pattern, recursive=recursive, root=root)
    for p in paths:
        if not dry_run:
            fn(p)
    return paths


def collect_content(
    pattern: str,
    encoding: str = "utf-8",
    recursive: bool = True,
    root: str | Path | None = None,
) -> dict[Path, str]:
    """
    Read and return text content of all matching files keyed by Path.

    Example:
        sources = collect_content("src/**/*.py")
        total_lines = sum(c.count("\\n") for c in sources.values())
    """
    result: dict[Path, str] = {}
    for p in find(pattern, recursive=recursive, root=root):
        try:
            result[p] = p.read_text(encoding=encoding)
        except (OSError, UnicodeDecodeError):
            pass
    return result


def newest_files(pattern: str, n: int = 10, root: str | Path | None = None) -> list[Path]:
    """
    Return up to n most-recently-modified files matching pattern.

    Example:
        recent = newest_files("logs/**/*.log", n=5)
    """
    paths = find(pattern, recursive=True, root=root)
    paths.sort(key=lambda p: p.stat().st_mtime if p.exists() else 0, reverse=True)
    return paths[:n]


def largest_files(pattern: str, n: int = 10, root: str | Path | None = None) -> list[tuple[Path, int]]:
    """
    Return up to n largest files matching pattern as (path, bytes) tuples.

    Example:
        for path, size in largest_files("**/*.log", n=5):
            print(f"  {size/1024:.1f} KB  {path}")
    """
    result = []
    for p in find(pattern, recursive=True, root=root):
        try:
            result.append((p, p.stat().st_size))
        except OSError:
            pass
    return sorted(result, key=lambda x: -x[1])[:n]


# ─────────────────────────────────────────────────────────────────────────────
# 5. Escape and literal helpers
# ─────────────────────────────────────────────────────────────────────────────

def literal_glob(literal_path: str | Path, pattern: str) -> list[Path]:
    """
    Glob inside a directory whose name may contain glob-special characters.

    Example:
        # Directory named "data[2025]" contains CSV files
        literal_glob("data[2025]", "*.csv")
    """
    safe = _glob.escape(str(literal_path))
    return find(f"{safe}/{pattern}", recursive=False)


def safe_pattern(path_prefix: str | Path, glob_suffix: str) -> str:
    """
    Build a safe glob pattern by escaping a literal prefix and appending a suffix.

    Example:
        safe_pattern("/data/[project]/src", "**/*.py")
        # '/data/[[]project[]]/src/**/*.py'
    """
    return f"{_glob.escape(str(path_prefix))}/{glob_suffix}"


# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────

if __name__ == "__main__":
    import tempfile
    import os

    print("=== glob demo ===")

    # Build a small temp tree for demo
    with tempfile.TemporaryDirectory() as tmp:
        root = Path(tmp)
        (root / "src").mkdir()
        (root / "src" / "app.py").write_text("# app")
        (root / "src" / "models.py").write_text("# models")
        (root / "src" / "app.pyi").write_text("# stub")
        (root / "tests").mkdir()
        (root / "tests" / "test_app.py").write_text("# tests")
        (root / "tests" / "conftest.py").write_text("# conftest")
        (root / "data").mkdir()
        (root / "data" / "records.csv").write_text("a,b,c")
        (root / "data" / "archive.csv").write_text("x,y,z")
        (root / "static").mkdir()
        (root / "static" / "style.css").write_text("body{}")
        (root / "static" / "logo.png").write_bytes(b"\x89PNG")

        print("\n--- find all Python files ---")
        py = find("**/*.py", root=root)
        for p in py:
            print(f"  {p.relative_to(root)}")

        print("\n--- test_files ---")
        for p in test_files(root):
            print(f"  {p.relative_to(root)}")

        print("\n--- find_exclude ---")
        srcs = find_exclude(["**/*.py"], ["**/test_*", "**/conftest*"], root=root)
        for p in srcs:
            print(f"  {p.relative_to(root)}")

        print("\n--- find_multi (py + csv) ---")
        mixed = find_multi(["**/*.py", "**/*.csv"], root=root)
        for p in mixed:
            print(f"  {p.relative_to(root)}")

        print("\n--- summarize_by_extension ---")
        for s in summarize_by_extension(root):
            print(f"  {s}")

        print("\n--- newest_files ---")
        for p in newest_files("**/*", n=3, root=root):
            print(f"  {p.relative_to(root)}")

        print("\n--- collect_content ---")
        texts = collect_content("**/*.py", root=root)
        for p, content in list(texts.items())[:3]:
            print(f"  {p.relative_to(root)}: {content.strip()!r}")

    print("\n--- glob.escape ---")
    special = "data[2025]/report (v1).csv"
    print(f"  escape({special!r}) = {_glob.escape(special)!r}")

    print("\n=== done ===")

For the pathlib alternative — Path.glob() and Path.rglob() return Path objects directly and integrate naturally with the rest of the pathlib API (.stem, .suffix, .read_text(), .stat()); stdlib glob.glob() returns plain strings — use Path.glob() / Path.rglob() in modern Python (3.4+) code where you’re already working with Path objects; use glob.glob() when you need root_dir support (3.10+) or when the pattern comes as a plain string from configuration. For the os.walk alternative — os.walk() traverses a directory tree depth-first yielding (dirpath, dirnames, filenames) tuples, giving you full control over traversal order, directory pruning, and symlink handling; glob is faster to write for simple patterns but gives no control over traversal — use os.walk() when you need to prune subdirectories (e.g., skip .git, skip large archive dirs), count files per directory, or build custom traversal logic; use glob for concise one-line file discovery by pattern. The Claude Skills 360 bundle includes glob skill sets covering find()/find_iter()/find_multi()/find_exclude() discovery helpers, python_sources()/test_files()/log_files()/asset_files() presets, summarize_by_extension() tree summary, batch_process()/collect_content()/newest_files()/largest_files() batch utilities, and literal_glob()/safe_pattern() escape helpers. Start with the free tier to try file discovery patterns and glob pipeline code generation.

Keep Reading

Claude Code for email.contentmanager: Python Email Content Accessors

Read and write EmailMessage body content with Python's email.contentmanager module and Claude Code — email contentmanager ContentManager for the class that maps content types to get and set handler functions allowing EmailMessage to support get_content and set_content with type-specific behaviour, email contentmanager raw_data_manager for the ContentManager instance that handles raw bytes and str payloads without any conversion, email contentmanager content_manager for the standard ContentManager instance used by email.policy.default that intelligently handles text plain text html multipart and binary content types, email contentmanager get_content_text for the handler that returns the decoded text payload of a text-star message part as a str, email contentmanager get_content_binary for the handler that returns the raw decoded bytes payload of a non-text message part, email contentmanager get_data_manager for the get-handler lookup used by EmailMessage get_content to find the right reader function for the content type, email contentmanager set_content text for the handler that creates and sets a text part correctly choosing charset and transfer encoding, email contentmanager set_content bytes for the handler that creates and sets a binary part with base64 encoding and optional filename Content-Disposition, email contentmanager EmailMessage get_content for the method that reads the message body using the registered content manager handlers, email contentmanager EmailMessage set_content for the method that sets the message body and MIME headers in one call, email contentmanager EmailMessage make_alternative make_mixed make_related for the methods that convert a simple message into a multipart container, email contentmanager EmailMessage add_attachment for the method that attaches a file or bytes to a multipart message, and email contentmanager integration with email.message and email.policy and email.mime and io for building high-level email readers attachment extractors text body accessors HTML readers and policy-aware MIME construction pipelines.

5 min read Feb 12, 2029

Claude Code for email.charset: Python Email Charset Encoding

Control header and body encoding for international email with Python's email.charset module and Claude Code — email charset Charset for the class that wraps a character set name with the encoding rules for header encoding and body encoding describing how to encode text for that charset in email messages, email charset Charset header_encoding for the attribute specifying whether headers using this charset should use QP quoted-printable encoding BASE64 encoding or no encoding, email charset Charset body_encoding for the attribute specifying the Content-Transfer-Encoding to use for message bodies in this charset such as QP or BASE64, email charset Charset output_codec for the attribute giving the Python codec name used to encode the string to bytes for the wire format, email charset Charset input_codec for the attribute giving the Python codec name used to decode incoming bytes to str, email charset Charset get_output_charset for returning the output charset name, email charset Charset header_encode for encoding a header string using the charset's header_encoding method, email charset Charset body_encode for encoding body content using the charset's body_encoding, email charset Charset convert for converting a string from the input_codec to the output_codec, email charset add_charset for registering a new charset with custom encoding rules in the global charset registry, email charset add_alias for adding an alias name that maps to an existing registered charset, email charset add_codec for registering a codec name mapping for use by the charset machinery, and email charset integration with email.message and email.mime and email.policy and email.encoders for building international email senders non-ASCII header encoders Content-Transfer-Encoding selectors charset-aware message constructors and MIME encoding pipelines.

5 min read Feb 11, 2029

Claude Code for email.utils: Python Email Address and Header Utilities

Parse and format RFC 2822 email addresses and dates with Python's email.utils module and Claude Code — email utils parseaddr for splitting a display-name plus angle-bracket address string into a realname and email address tuple, email utils formataddr for combining a realname and address string into a properly quoted RFC 2822 address with angle brackets, email utils getaddresses for parsing a list of raw address header strings each potentially containing multiple comma-separated addresses into a list of realname address tuples, email utils parsedate for parsing an RFC 2822 date string into a nine-tuple compatible with time.mktime, email utils parsedate_tz for parsing an RFC 2822 date string into a ten-tuple that includes the UTC offset timezone in seconds, email utils parsedate_to_datetime for parsing an RFC 2822 date string into an aware datetime object with timezone, email utils formatdate for formatting a POSIX timestamp or the current time as an RFC 2822 date string with optional usegmt and localtime flags, email utils format_datetime for formatting a datetime object as an RFC 2822 date string, email utils make_msgid for generating a globally unique Message-ID string with optional idstring and domain components, email utils decode_rfc2231 for decoding an RFC 2231 encoded parameter value into a tuple of charset language and value, email utils encode_rfc2231 for encoding a string as an RFC 2231 encoded parameter value, email utils collapse_rfc2231_value for collapsing a decoded RFC 2231 tuple to a Unicode string, and email utils integration with email.message and email.headerregistry and datetime and time for building address parsers date formatters message-id generators header extractors and RFC-compliant email construction utilities.

5 min read Feb 10, 2029

Put these ideas into practice

Claude Skills 360 gives you production-ready skills for everything in this article — and 2,350+ more. Start free or go all-in.

Get 360 skills free

Free $39