Python’s pkgutil module provides utilities for working with packages: enumerating submodules, reading bundled data, and extending namespace packages. import pkgutil. iter_modules: pkgutil.iter_modules(path=None) → iterator of ModuleInfo(module_finder, name, ispkg); path=None iterates sys.path for top-level modules; path=pkg.__path__ for a package’s direct children. walk_packages: pkgutil.walk_packages(path, prefix="") — recursively yields all submodules including nested packages. get_loader: pkgutil.get_loader("json") → loader object or None. find_loader: pkgutil.find_loader("json") → loader or None (deprecated in 3.12, use importlib.util.find_spec). get_data: pkgutil.get_data("mypackage", "data/file.txt") → bytes or None — reads resource from package; uses the package’s __loader__; works in zip imports. extend_path: in __init__.py: from pkgutil import extend_path; __path__ = extend_path(__path__, __name__) — merges namespace package directories from multiple entries on sys.path. ModuleInfo: namedtuple(module_finder, name, ispkg). iter_modules is useful for auto-discovery plugins, listing all installed top-level packages, and building importable plugin inventories. walk_packages can be slow on large sys.path — limit with a specific package’s __path__. Claude Code generates plugin auto-loaders, package auditors, namespace package mergers, and installed-module inventories.
CLAUDE.md for pkgutil
## pkgutil Stack
- Stdlib: import pkgutil
- List children: list(pkgutil.iter_modules(mypkg.__path__))
- Walk all: list(pkgutil.walk_packages(mypkg.__path__, prefix="mypkg."))
- Read resource: data = pkgutil.get_data("mypackage", "data/config.json")
- Top-level: [(m.name, m.ispkg) for m in pkgutil.iter_modules()]
- Namespace: __path__ = pkgutil.extend_path(__path__, __name__)
pkgutil Module Discovery Pipeline
# app/pkgutilutil.py — iter, walk, discover, load, audit, namespace
from __future__ import annotations
import importlib
import pkgutil
import sys
from dataclasses import dataclass
from types import ModuleType
from typing import Any, Generator
# ─────────────────────────────────────────────────────────────────────────────
# 1. Module iteration helpers
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class ModuleEntry:
name: str
is_pkg: bool
full_name: str = "" # filled by walk helpers
def __str__(self) -> str:
tag = "[pkg]" if self.is_pkg else "[mod]"
return f"{tag} {self.full_name or self.name}"
def list_submodules(package: ModuleType) -> list[ModuleEntry]:
"""
List direct children of a package (non-recursive).
Example:
import mypackage
for m in list_submodules(mypackage):
print(m)
"""
entries: list[ModuleEntry] = []
for info in pkgutil.iter_modules(package.__path__):
full = f"{package.__name__}.{info.name}"
entries.append(ModuleEntry(name=info.name, is_pkg=info.ispkg, full_name=full))
return entries
def walk_submodules(package: ModuleType) -> list[ModuleEntry]:
"""
Recursively walk all submodules of a package.
Example:
import email
for m in walk_submodules(email):
print(m)
"""
prefix = package.__name__ + "."
entries: list[ModuleEntry] = []
for info in pkgutil.walk_packages(package.__path__, prefix=prefix):
entries.append(ModuleEntry(
name=info.name.split(".")[-1],
is_pkg=info.ispkg,
full_name=info.name,
))
return entries
def list_top_level_packages(paths: list[str] | None = None) -> list[ModuleEntry]:
"""
List all importable top-level modules/packages on sys.path (or given paths).
Example:
pkgs = list_top_level_packages()
print([p.name for p in pkgs if p.is_pkg][:10])
"""
entries: list[ModuleEntry] = []
for info in pkgutil.iter_modules(paths):
entries.append(ModuleEntry(name=info.name, is_pkg=info.ispkg, full_name=info.name))
return sorted(entries, key=lambda e: e.name)
# ─────────────────────────────────────────────────────────────────────────────
# 2. Plugin auto-discovery
# ─────────────────────────────────────────────────────────────────────────────
def auto_import_submodules(
package: ModuleType,
recursive: bool = True,
ignore_errors: bool = True,
) -> dict[str, ModuleType]:
"""
Import all submodules of a package and return them as a dict.
Useful for side-effect registrations (e.g. plugin self-registration).
Example:
import myapp.plugins
mods = auto_import_submodules(myapp.plugins)
# Each plugin registers itself on import
"""
result: dict[str, ModuleType] = {}
walker = walk_submodules if recursive else list_submodules
for entry in walker(package):
full_name = entry.full_name
try:
mod = importlib.import_module(full_name)
result[full_name] = mod
except Exception:
if not ignore_errors:
raise
return result
def collect_plugins(
package: ModuleType,
attr: str,
recursive: bool = True,
) -> dict[str, Any]:
"""
Auto-import all submodules of package and collect objects with the given attr.
Returns {full_module_name: attr_value}.
Example:
# Each plugin.py defines plugin = Plugin(...)
plugins = collect_plugins(myapp.plugins, "plugin")
"""
result: dict[str, Any] = {}
for name, mod in auto_import_submodules(package, recursive=recursive).items():
obj = getattr(mod, attr, None)
if obj is not None:
result[name] = obj
return result
# ─────────────────────────────────────────────────────────────────────────────
# 3. Resource reading
# ─────────────────────────────────────────────────────────────────────────────
def read_resource(package: str, resource_path: str) -> bytes | None:
"""
Read a bundled resource file as bytes using pkgutil.get_data.
Works in eggs, zip imports, and installed packages.
Returns None if not found.
Example:
data = read_resource("mypackage", "data/config.json")
config = json.loads(data)
"""
return pkgutil.get_data(package, resource_path)
def read_resource_text(
package: str,
resource_path: str,
encoding: str = "utf-8",
) -> str | None:
"""
Read a bundled text resource file.
Example:
text = read_resource_text("mypackage", "templates/email.txt")
"""
data = pkgutil.get_data(package, resource_path)
return data.decode(encoding) if data is not None else None
# ─────────────────────────────────────────────────────────────────────────────
# 4. Package audit helpers
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class PackageAudit:
name: str
submodule_count: int
subpackage_count: int
all_names: list[str]
def __str__(self) -> str:
return (f"{self.name} "
f"submodules={self.submodule_count} "
f"subpackages={self.subpackage_count}")
def audit_package(package: ModuleType, recursive: bool = True) -> PackageAudit:
"""
Return a summary audit of a package's internal structure.
Example:
import email
print(audit_package(email))
"""
entries = walk_submodules(package) if recursive else list_submodules(package)
return PackageAudit(
name=package.__name__,
submodule_count=sum(1 for e in entries if not e.is_pkg),
subpackage_count=sum(1 for e in entries if e.is_pkg),
all_names=[e.full_name for e in entries],
)
def find_modules_with_attr(
package: ModuleType,
attr: str,
recursive: bool = True,
import_errors: bool = False,
) -> list[tuple[str, Any]]:
"""
Find all submodules that define a given attribute.
Returns [(full_name, attr_value), ...].
Example:
handlers = find_modules_with_attr(myapp.handlers, "handler")
"""
results: list[tuple[str, Any]] = []
walker = walk_submodules if recursive else list_submodules
for entry in walker(package):
try:
mod = importlib.import_module(entry.full_name)
val = getattr(mod, attr, None)
if val is not None:
results.append((entry.full_name, val))
except ImportError:
if import_errors:
raise
return results
# ─────────────────────────────────────────────────────────────────────────────
# 5. Namespace package utilities
# ─────────────────────────────────────────────────────────────────────────────
def is_namespace_package(package: ModuleType) -> bool:
"""
Return True if the package is a namespace package (no __file__, multi-path).
Example:
import google # namespace package if google-cloud-* is installed
print(is_namespace_package(google)) # True
"""
return (getattr(package, "__file__", None) is None and
hasattr(package, "__path__") and
not hasattr(package, "__spec__") or
getattr(getattr(package, "__spec__", None), "origin", None) is None)
def merge_namespace_path(package_name: str) -> list[str]:
"""
Return all directories contributing to a namespace package's __path__.
Useful for inspecting how a namespace package is assembled.
Example:
paths = merge_namespace_path("google")
"""
try:
mod = importlib.import_module(package_name)
return list(getattr(mod, "__path__", []))
except ImportError:
return []
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== pkgutil demo ===")
# ── list_submodules ────────────────────────────────────────────────────────
print("\n--- list_submodules(email) ---")
import email
for m in list_submodules(email)[:6]:
print(f" {m}")
# ── walk_submodules ────────────────────────────────────────────────────────
print("\n--- walk_submodules(urllib) ---")
import urllib
entries = walk_submodules(urllib)
print(f" total entries: {len(entries)}")
for m in entries[:5]:
print(f" {m}")
# ── audit_package ──────────────────────────────────────────────────────────
print("\n--- audit_package ---")
for pkg in [email, urllib]:
print(f" {audit_package(pkg)}")
# ── list_top_level_packages (first 8, sorted) ──────────────────────────────
print("\n--- list_top_level_packages (sample) ---")
all_pkgs = list_top_level_packages()
pkgs_only = [p for p in all_pkgs if p.is_pkg][:8]
for p in pkgs_only:
print(f" {p}")
print(f" ... ({len(all_pkgs)} total top-level entries)")
# ── read_resource ──────────────────────────────────────────────────────────
print("\n--- read_resource ---")
# The 'email' package has a bundled '_header_value_parser.py'; use get_data on charset data
# Use urllib.request's ca-bundle as a safe test resource
import importlib.resources as ir
try:
data = pkgutil.get_data("email", "mime/__init__.py")
print(f" email/mime/__init__.py: {len(data or b'')} bytes")
except Exception as e:
print(f" get_data test: {e}")
# ── get_loader ─────────────────────────────────────────────────────────────
print("\n--- get_loader ---")
for name in ["json", "os.path", "__nonexistent__"]:
loader = pkgutil.get_loader(name)
print(f" {name:20s}: {type(loader).__name__ if loader else None}")
# ── namespace check ────────────────────────────────────────────────────────
print("\n--- namespace packages ---")
for pkg_name in ["email", "urllib", "xml"]:
mod = importlib.import_module(pkg_name)
print(f" {pkg_name:10s} is_namespace={is_namespace_package(mod)}")
print("\n=== done ===")
For the importlib alternative — importlib.import_module(), importlib.util.find_spec(), and importlib.resources.files() cover most of what pkgutil provides with a more modern and actively maintained API; importlib.util.find_spec() replaces the deprecated pkgutil.find_loader(); importlib.resources.files() handles resources in wheels and zip-imported packages more reliably than pkgutil.get_data() — use importlib for new code; use pkgutil.iter_modules() and pkgutil.walk_packages() when you specifically need to enumerate submodules of a package, as there is no direct importlib equivalent for that pattern. For the sys.path / os.walk alternative — scanning sys.path with os.walk and filtering for .py files is another way to discover modules, but it misses zip-imported packages, namespace packages, and extension modules (.so/.pyd) — pkgutil.iter_modules() is strictly better because it delegates to the actual import machinery’s finders (including zip finders), so it returns exactly what the import system would find rather than a filesystem approximation. The Claude Skills 360 bundle includes pkgutil skill sets covering ModuleEntry with list_submodules()/walk_submodules()/list_top_level_packages(), auto_import_submodules()/collect_plugins() plugin discovery, read_resource()/read_resource_text() bundled data readers, PackageAudit with audit_package()/find_modules_with_attr(), and is_namespace_package()/merge_namespace_path() namespace utilities. Start with the free tier to try module discovery patterns and pkgutil pipeline code generation.