Python’s zipfile module creates, reads, and modifies ZIP archives. import zipfile. ZipFile: with zipfile.ZipFile(path, mode="r", compression=ZIP_DEFLATED, allowZip64=True) as zf:. Modes: "r" read, "w" write (new), "a" append, "x" create (fail if exists). Compression: ZIP_STORED = no compression, ZIP_DEFLATED = zlib deflate, ZIP_BZIP2 = bzip2, ZIP_LZMA = lzma. namelist: zf.namelist() → list of archive member names. infolist: zf.infolist() → list of ZipInfo objects (filename, file_size, compress_size, date_time, CRC). read: zf.read("file.txt") → bytes. open: with zf.open("file.txt") as f: → file-like. extract: zf.extract("file.txt", path="."). extractall: zf.extractall(path=".", members=None). write: zf.write(filename, arcname=None). writestr: zf.writestr("a.txt", "content") or zf.writestr("a.txt", bytes_data). mkdir: zf.mkdir("subdir/") (Python 3.11+). testzip: zf.testzip() → None if ok, or first bad filename. Path: zipfile.Path(zf, "subdir/") — pathlib-like navigator. is_zipfile: zipfile.is_zipfile(path_or_file). BadZipFile: raised on corrupt/invalid archives. setpassword: zf.setpassword(b"secret"). compresslevel: ZipFile(compresslevel=6). Claude Code generates build artifact bundlers, multi-file exporters, dynamic zip creators, and archive validators.
CLAUDE.md for zipfile
## zipfile Stack
- Stdlib: import zipfile
- Create: with zipfile.ZipFile("out.zip", "w", zipfile.ZIP_DEFLATED) as zf:
- Add: zf.write("file.txt") / zf.writestr("name.txt", content)
- Read: with zipfile.ZipFile("in.zip") as zf: data = zf.read("file.txt")
- List: zf.namelist() / zf.infolist()
- Extract: zf.extractall("/target/")
- In-mem: ZipFile(io.BytesIO(), "w") for in-memory zip buffers
zipfile Archive Pipeline
# app/ziputil.py — create, read, extract, in-memory, validate, pathlib-style
from __future__ import annotations
import io
import json
import os
import zipfile
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Iterator
# ─────────────────────────────────────────────────────────────────────────────
# 1. Create archives
# ─────────────────────────────────────────────────────────────────────────────
def create_zip(
output: str | Path | io.BytesIO,
files: dict[str, bytes | str],
compression: int = zipfile.ZIP_DEFLATED,
level: int = 6,
) -> None:
"""
Create a zip archive from a dict of {archive_name: content}.
Content may be bytes or str (auto-encoded as UTF-8).
Example:
create_zip("bundle.zip", {
"README.txt": "Hello",
"data.json": b'{"key": "value"}',
"src/main.py": open("main.py","rb").read(),
})
"""
with zipfile.ZipFile(output, "w", compression=compression, compresslevel=level) as zf:
for name, content in files.items():
if isinstance(content, str):
content = content.encode("utf-8")
zf.writestr(name, content)
def zip_directory(
src_dir: str | Path,
output: str | Path,
compression: int = zipfile.ZIP_DEFLATED,
level: int = 6,
base: str = "",
exclude: set[str] | None = None,
) -> int:
"""
Recursively zip a directory. Returns file count.
Example:
n = zip_directory("dist/", "release.zip", base="release/")
"""
src_dir = Path(src_dir)
exclude = exclude or set()
count = 0
with zipfile.ZipFile(output, "w", compression=compression, compresslevel=level) as zf:
for file_path in sorted(src_dir.rglob("*")):
if file_path.is_file() and file_path.name not in exclude:
rel = base + str(file_path.relative_to(src_dir))
zf.write(file_path, arcname=rel)
count += 1
return count
def add_to_zip(
archive: str | Path,
files: dict[str, bytes | str],
compression: int = zipfile.ZIP_DEFLATED,
) -> None:
"""
Append files to an existing zip archive.
Example:
add_to_zip("bundle.zip", {"CHANGELOG.txt": changelog_text})
"""
with zipfile.ZipFile(archive, "a", compression=compression) as zf:
for name, content in files.items():
if isinstance(content, str):
content = content.encode("utf-8")
zf.writestr(name, content)
# ─────────────────────────────────────────────────────────────────────────────
# 2. Read archives
# ─────────────────────────────────────────────────────────────────────────────
def zip_names(archive: str | Path) -> list[str]:
"""
List all file names inside a zip archive.
Example:
files = zip_names("release.zip")
"""
with zipfile.ZipFile(archive, "r") as zf:
return zf.namelist()
@dataclass
class ArchiveEntry:
name: str
size: int
compress_size: int
ratio: float
date: tuple
def zip_info(archive: str | Path) -> list[ArchiveEntry]:
"""
Return metadata for each entry in the archive.
Example:
for entry in zip_info("release.zip"):
print(f" {entry.name}: {entry.size:,} -> {entry.compress_size:,}")
"""
with zipfile.ZipFile(archive, "r") as zf:
return [
ArchiveEntry(
name=z.filename,
size=z.file_size,
compress_size=z.compress_size,
ratio=z.compress_size / z.file_size if z.file_size else 0.0,
date=z.date_time,
)
for z in zf.infolist()
if not z.is_dir()
]
def read_file(archive: str | Path, name: str) -> bytes:
"""
Read a single file from the archive by name.
Example:
config = read_file("bundle.zip", "config.json")
"""
with zipfile.ZipFile(archive, "r") as zf:
return zf.read(name)
def read_text(archive: str | Path, name: str, encoding: str = "utf-8") -> str:
"""Read a text file from a zip archive."""
return read_file(archive, name).decode(encoding)
def iter_files(archive: str | Path, pattern: str = "*") -> Iterator[tuple[str, bytes]]:
"""
Yield (name, bytes) for each file matching a glob pattern in the archive.
Example:
for name, data in iter_files("bundle.zip", "*.json"):
process(json.loads(data))
"""
import fnmatch
with zipfile.ZipFile(archive, "r") as zf:
for name in zf.namelist():
if not name.endswith("/") and fnmatch.fnmatch(name, pattern):
yield name, zf.read(name)
# ─────────────────────────────────────────────────────────────────────────────
# 3. Extract
# ─────────────────────────────────────────────────────────────────────────────
def extract_all(
archive: str | Path,
dest: str | Path = ".",
members: list[str] | None = None,
) -> list[Path]:
"""
Extract all (or selected) members to dest. Returns extracted paths.
Example:
paths = extract_all("bundle.zip", "/tmp/extracted")
"""
dest = Path(dest)
with zipfile.ZipFile(archive, "r") as zf:
zf.extractall(dest, members=members)
names = members or zip_names(archive)
return [dest / name for name in names if not name.endswith("/")]
def safe_extract(archive: str | Path, dest: str | Path) -> list[Path]:
"""
Extract archive with path traversal protection (blocks "../" entries).
Raises ValueError on any member that would escape dest.
Example:
paths = safe_extract("user_upload.zip", "/srv/uploads/output/")
"""
dest = Path(dest).resolve()
extracted: list[Path] = []
with zipfile.ZipFile(archive, "r") as zf:
for info in zf.infolist():
target = (dest / info.filename).resolve()
if not str(target).startswith(str(dest)):
raise ValueError(f"Path traversal blocked: {info.filename!r}")
zf.extract(info, dest)
if not info.is_dir():
extracted.append(target)
return extracted
# ─────────────────────────────────────────────────────────────────────────────
# 4. In-memory zip
# ─────────────────────────────────────────────────────────────────────────────
def build_zip_bytes(
files: dict[str, bytes | str],
compression: int = zipfile.ZIP_DEFLATED,
level: int = 6,
) -> bytes:
"""
Build a zip archive entirely in memory and return ready-to-send bytes.
Example:
zipped = build_zip_bytes({"report.pdf": pdf_bytes, "data.csv": csv_text})
response.body = zipped
response.headers["Content-Type"] = "application/zip"
"""
buf = io.BytesIO()
create_zip(buf, files, compression=compression, level=level)
return buf.getvalue()
def zip_json_records(
records: list[Any],
filename: str = "data.json",
level: int = 6,
) -> bytes:
"""
Zip a list of records as a JSON file, returning zip bytes.
Example:
zip_bytes = zip_json_records(results, "results.json")
send_download(zip_bytes, "results.zip")
"""
json_bytes = json.dumps(records, ensure_ascii=False, indent=2).encode("utf-8")
return build_zip_bytes({filename: json_bytes}, level=level)
# ─────────────────────────────────────────────────────────────────────────────
# 5. Validation
# ─────────────────────────────────────────────────────────────────────────────
def is_valid_zip(path: str | Path) -> bool:
"""Return True if path is a well-formed zip file."""
return zipfile.is_zipfile(path)
def test_zip(path: str | Path) -> tuple[bool, str | None]:
"""
Test archive integrity. Returns (ok, first_bad_file_or_None).
Example:
ok, bad = test_zip("release.zip")
if not ok:
print(f"Corrupt file: {bad}")
"""
if not zipfile.is_zipfile(path):
return False, "<not a zip file>"
try:
with zipfile.ZipFile(path, "r") as zf:
bad = zf.testzip()
return (bad is None), bad
except zipfile.BadZipFile as e:
return False, str(e)
def zip_stats(archive: str | Path) -> dict[str, Any]:
"""
Return summary statistics for an archive.
Example:
stats = zip_stats("release.zip")
print(f"{stats['file_count']} files, {stats['total_size']:,} bytes original")
"""
entries = zip_info(archive)
total = sum(e.size for e in entries)
stored = sum(e.compress_size for e in entries)
return {
"file_count": len(entries),
"total_size": total,
"stored_size": stored,
"ratio": round(stored / total, 4) if total else 0.0,
"space_saved_pct": round((1 - stored / total) * 100, 1) if total else 0.0,
"archive_size": Path(archive).stat().st_size if isinstance(archive, (str, Path)) else None,
}
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import tempfile
print("=== zipfile demo ===")
files = {
"README.txt": "Hello, zip world!\n" * 50,
"data/records.json": json.dumps([{"id": i, "name": f"User{i}"} for i in range(100)]),
"src/main.py": "def main():\n print('hello')\n\n" * 30,
"config.ini": "[app]\nhost=localhost\nport=8080\n" * 5,
}
with tempfile.TemporaryDirectory() as td:
zip_path = os.path.join(td, "bundle.zip")
print("\n--- create_zip ---")
create_zip(zip_path, files)
stats = zip_stats(zip_path)
print(f" {stats['file_count']} files, {stats['total_size']:,} original → {stats['stored_size']:,} stored")
print(f" ratio={stats['ratio']:.3f} saved={stats['space_saved_pct']:.1f}%")
print("\n--- zip_names ---")
names = zip_names(zip_path)
print(f" names: {names}")
print("\n--- zip_info ---")
for entry in zip_info(zip_path):
print(f" {entry.name:35s} {entry.size:>8,} → {entry.compress_size:>8,} ({entry.ratio:.2f})")
print("\n--- read_file / read_text ---")
readme = read_text(zip_path, "README.txt")
print(f" README.txt first line: {readme.splitlines()[0]!r}")
print("\n--- iter_files ---")
for name, data in iter_files(zip_path, "*.json"):
records = json.loads(data)
print(f" {name}: {len(records)} records")
print("\n--- add_to_zip ---")
add_to_zip(zip_path, {"CHANGELOG.txt": "v1.0 — initial release\n"})
print(f" after append: {len(zip_names(zip_path))} files")
print("\n--- extract_all ---")
extract_dir = os.path.join(td, "extracted")
paths = extract_all(zip_path, extract_dir)
print(f" extracted {len(paths)} files to {extract_dir}")
print("\n--- safe_extract ---")
try:
# Build a zip with a path-traversal member (simulate malicious zip)
evil_zip = os.path.join(td, "evil.zip")
buf = io.BytesIO()
with zipfile.ZipFile(buf, "w") as zf:
zf.writestr("../../../etc/passwd", "root:x:0:0:root:/root:/bin/bash")
Path(evil_zip).write_bytes(buf.getvalue())
safe_extract(evil_zip, os.path.join(td, "safe"))
except ValueError as e:
print(f" traversal blocked: {e}")
print("\n--- in-memory zip ---")
zip_bytes = build_zip_bytes({"hello.txt": "Hello!", "data.json": '{"x": 1}'})
print(f" in-memory zip: {len(zip_bytes):,} bytes")
recs = [{"id": i, "val": "x" * 40} for i in range(50)]
json_zip = zip_json_records(recs, "records.json")
print(f" json_records zip: {len(json_zip):,} bytes for {len(recs)} records")
print("\n--- validation ---")
ok, bad = test_zip(zip_path)
print(f" test_zip: ok={ok} bad={bad}")
print(f" is_valid: {is_valid_zip(zip_path)}")
print("\n--- zip_directory ---")
src_dir = os.path.join(td, "src")
os.makedirs(src_dir)
for i in range(5):
Path(os.path.join(src_dir, f"file{i}.txt")).write_text(f"content {i}\n" * 100)
dir_zip = os.path.join(td, "src.zip")
n = zip_directory(src_dir, dir_zip, base="src/")
print(f" zipped {n} files from directory")
print(f" {zip_stats(dir_zip)}")
print("\n=== done ===")
For the shutil.make_archive alternative — shutil.make_archive(base_name, "zip", root_dir, base_dir) creates zip archives with a single function call but offers no control over individual file metadata, compression levels, or incremental building; zipfile.ZipFile gives full control over each member’s name, content, compression, timestamp, and comment — use shutil.make_archive for quick one-liner directory bundling in build scripts; use zipfile when you need custom arcnames, in-memory zip creation, mixed file/string content, or fine-grained compression settings. For the tarfile alternative — tarfile handles .tar, .tar.gz, .tar.bz2, and .tar.xz archives, preserves Unix permissions (mode, uid, gid), symlinks, and hardlinks; ZIP archives do not preserve POSIX metadata but are natively supported on Windows and by all browsers for downloads — use tarfile for Linux/macOS deployment artifacts, Docker builds, and cases where file permissions must round-trip; use zipfile for downloads, cross-platform distribution, Python wheel files (.whl), and Office Open XML formats (.docx, .xlsx). The Claude Skills 360 bundle includes zipfile skill sets covering create_zip()/zip_directory()/add_to_zip() archive creation, zip_names()/zip_info()/read_file()/read_text()/iter_files() reading, extract_all()/safe_extract() path-traversal-safe extraction, build_zip_bytes()/zip_json_records() in-memory zip, and is_valid_zip()/test_zip()/zip_stats() validation. Start with the free tier to try archive manipulation patterns and zipfile pipeline code generation.