Blog / AI / Claude Code for zipfile: ZIP Archives in Python

Claude Code for zipfile: ZIP Archives in Python

Published: August 10, 2028

•

Read time: 5 min read

•

By: Claude Skills 360

Python’s zipfile module creates, reads, and modifies ZIP archives. import zipfile. ZipFile: with zipfile.ZipFile(path, mode="r", compression=ZIP_DEFLATED, allowZip64=True) as zf:. Modes: "r" read, "w" write (new), "a" append, "x" create (fail if exists). Compression: ZIP_STORED = no compression, ZIP_DEFLATED = zlib deflate, ZIP_BZIP2 = bzip2, ZIP_LZMA = lzma. namelist: zf.namelist() → list of archive member names. infolist: zf.infolist() → list of ZipInfo objects (filename, file_size, compress_size, date_time, CRC). read: zf.read("file.txt") → bytes. open: with zf.open("file.txt") as f: → file-like. extract: zf.extract("file.txt", path="."). extractall: zf.extractall(path=".", members=None). write: zf.write(filename, arcname=None). writestr: zf.writestr("a.txt", "content") or zf.writestr("a.txt", bytes_data). mkdir: zf.mkdir("subdir/") (Python 3.11+). testzip: zf.testzip() → None if ok, or first bad filename. Path: zipfile.Path(zf, "subdir/") — pathlib-like navigator. is_zipfile: zipfile.is_zipfile(path_or_file). BadZipFile: raised on corrupt/invalid archives. setpassword: zf.setpassword(b"secret"). compresslevel: ZipFile(compresslevel=6). Claude Code generates build artifact bundlers, multi-file exporters, dynamic zip creators, and archive validators.

CLAUDE.md for zipfile

## zipfile Stack
- Stdlib: import zipfile
- Create:   with zipfile.ZipFile("out.zip", "w", zipfile.ZIP_DEFLATED) as zf:
- Add:      zf.write("file.txt") / zf.writestr("name.txt", content)
- Read:     with zipfile.ZipFile("in.zip") as zf: data = zf.read("file.txt")
- List:     zf.namelist()  /  zf.infolist()
- Extract:  zf.extractall("/target/")
- In-mem:   ZipFile(io.BytesIO(), "w") for in-memory zip buffers

zipfile Archive Pipeline

# app/ziputil.py — create, read, extract, in-memory, validate, pathlib-style
from __future__ import annotations

import io
import json
import os
import zipfile
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Iterator


# ─────────────────────────────────────────────────────────────────────────────
# 1. Create archives
# ─────────────────────────────────────────────────────────────────────────────

def create_zip(
    output: str | Path | io.BytesIO,
    files: dict[str, bytes | str],
    compression: int = zipfile.ZIP_DEFLATED,
    level: int = 6,
) -> None:
    """
    Create a zip archive from a dict of {archive_name: content}.
    Content may be bytes or str (auto-encoded as UTF-8).

    Example:
        create_zip("bundle.zip", {
            "README.txt":    "Hello",
            "data.json":     b'{"key": "value"}',
            "src/main.py":   open("main.py","rb").read(),
        })
    """
    with zipfile.ZipFile(output, "w", compression=compression, compresslevel=level) as zf:
        for name, content in files.items():
            if isinstance(content, str):
                content = content.encode("utf-8")
            zf.writestr(name, content)


def zip_directory(
    src_dir: str | Path,
    output: str | Path,
    compression: int = zipfile.ZIP_DEFLATED,
    level: int = 6,
    base: str = "",
    exclude: set[str] | None = None,
) -> int:
    """
    Recursively zip a directory. Returns file count.

    Example:
        n = zip_directory("dist/", "release.zip", base="release/")
    """
    src_dir = Path(src_dir)
    exclude = exclude or set()
    count   = 0
    with zipfile.ZipFile(output, "w", compression=compression, compresslevel=level) as zf:
        for file_path in sorted(src_dir.rglob("*")):
            if file_path.is_file() and file_path.name not in exclude:
                rel = base + str(file_path.relative_to(src_dir))
                zf.write(file_path, arcname=rel)
                count += 1
    return count


def add_to_zip(
    archive: str | Path,
    files: dict[str, bytes | str],
    compression: int = zipfile.ZIP_DEFLATED,
) -> None:
    """
    Append files to an existing zip archive.

    Example:
        add_to_zip("bundle.zip", {"CHANGELOG.txt": changelog_text})
    """
    with zipfile.ZipFile(archive, "a", compression=compression) as zf:
        for name, content in files.items():
            if isinstance(content, str):
                content = content.encode("utf-8")
            zf.writestr(name, content)


# ─────────────────────────────────────────────────────────────────────────────
# 2. Read archives
# ─────────────────────────────────────────────────────────────────────────────

def zip_names(archive: str | Path) -> list[str]:
    """
    List all file names inside a zip archive.

    Example:
        files = zip_names("release.zip")
    """
    with zipfile.ZipFile(archive, "r") as zf:
        return zf.namelist()


@dataclass
class ArchiveEntry:
    name:          str
    size:          int
    compress_size: int
    ratio:         float
    date:          tuple


def zip_info(archive: str | Path) -> list[ArchiveEntry]:
    """
    Return metadata for each entry in the archive.

    Example:
        for entry in zip_info("release.zip"):
            print(f"  {entry.name}: {entry.size:,} -> {entry.compress_size:,}")
    """
    with zipfile.ZipFile(archive, "r") as zf:
        return [
            ArchiveEntry(
                name=z.filename,
                size=z.file_size,
                compress_size=z.compress_size,
                ratio=z.compress_size / z.file_size if z.file_size else 0.0,
                date=z.date_time,
            )
            for z in zf.infolist()
            if not z.is_dir()
        ]


def read_file(archive: str | Path, name: str) -> bytes:
    """
    Read a single file from the archive by name.

    Example:
        config = read_file("bundle.zip", "config.json")
    """
    with zipfile.ZipFile(archive, "r") as zf:
        return zf.read(name)


def read_text(archive: str | Path, name: str, encoding: str = "utf-8") -> str:
    """Read a text file from a zip archive."""
    return read_file(archive, name).decode(encoding)


def iter_files(archive: str | Path, pattern: str = "*") -> Iterator[tuple[str, bytes]]:
    """
    Yield (name, bytes) for each file matching a glob pattern in the archive.

    Example:
        for name, data in iter_files("bundle.zip", "*.json"):
            process(json.loads(data))
    """
    import fnmatch
    with zipfile.ZipFile(archive, "r") as zf:
        for name in zf.namelist():
            if not name.endswith("/") and fnmatch.fnmatch(name, pattern):
                yield name, zf.read(name)


# ─────────────────────────────────────────────────────────────────────────────
# 3. Extract
# ─────────────────────────────────────────────────────────────────────────────

def extract_all(
    archive: str | Path,
    dest: str | Path = ".",
    members: list[str] | None = None,
) -> list[Path]:
    """
    Extract all (or selected) members to dest. Returns extracted paths.

    Example:
        paths = extract_all("bundle.zip", "/tmp/extracted")
    """
    dest = Path(dest)
    with zipfile.ZipFile(archive, "r") as zf:
        zf.extractall(dest, members=members)
    names = members or zip_names(archive)
    return [dest / name for name in names if not name.endswith("/")]


def safe_extract(archive: str | Path, dest: str | Path) -> list[Path]:
    """
    Extract archive with path traversal protection (blocks "../" entries).
    Raises ValueError on any member that would escape dest.

    Example:
        paths = safe_extract("user_upload.zip", "/srv/uploads/output/")
    """
    dest = Path(dest).resolve()
    extracted: list[Path] = []
    with zipfile.ZipFile(archive, "r") as zf:
        for info in zf.infolist():
            target = (dest / info.filename).resolve()
            if not str(target).startswith(str(dest)):
                raise ValueError(f"Path traversal blocked: {info.filename!r}")
            zf.extract(info, dest)
            if not info.is_dir():
                extracted.append(target)
    return extracted


# ─────────────────────────────────────────────────────────────────────────────
# 4. In-memory zip
# ─────────────────────────────────────────────────────────────────────────────

def build_zip_bytes(
    files: dict[str, bytes | str],
    compression: int = zipfile.ZIP_DEFLATED,
    level: int = 6,
) -> bytes:
    """
    Build a zip archive entirely in memory and return ready-to-send bytes.

    Example:
        zipped = build_zip_bytes({"report.pdf": pdf_bytes, "data.csv": csv_text})
        response.body = zipped
        response.headers["Content-Type"] = "application/zip"
    """
    buf = io.BytesIO()
    create_zip(buf, files, compression=compression, level=level)
    return buf.getvalue()


def zip_json_records(
    records: list[Any],
    filename: str = "data.json",
    level: int = 6,
) -> bytes:
    """
    Zip a list of records as a JSON file, returning zip bytes.

    Example:
        zip_bytes = zip_json_records(results, "results.json")
        send_download(zip_bytes, "results.zip")
    """
    json_bytes = json.dumps(records, ensure_ascii=False, indent=2).encode("utf-8")
    return build_zip_bytes({filename: json_bytes}, level=level)


# ─────────────────────────────────────────────────────────────────────────────
# 5. Validation
# ─────────────────────────────────────────────────────────────────────────────

def is_valid_zip(path: str | Path) -> bool:
    """Return True if path is a well-formed zip file."""
    return zipfile.is_zipfile(path)


def test_zip(path: str | Path) -> tuple[bool, str | None]:
    """
    Test archive integrity. Returns (ok, first_bad_file_or_None).

    Example:
        ok, bad = test_zip("release.zip")
        if not ok:
            print(f"Corrupt file: {bad}")
    """
    if not zipfile.is_zipfile(path):
        return False, "<not a zip file>"
    try:
        with zipfile.ZipFile(path, "r") as zf:
            bad = zf.testzip()
            return (bad is None), bad
    except zipfile.BadZipFile as e:
        return False, str(e)


def zip_stats(archive: str | Path) -> dict[str, Any]:
    """
    Return summary statistics for an archive.

    Example:
        stats = zip_stats("release.zip")
        print(f"{stats['file_count']} files, {stats['total_size']:,} bytes original")
    """
    entries = zip_info(archive)
    total   = sum(e.size for e in entries)
    stored  = sum(e.compress_size for e in entries)
    return {
        "file_count":    len(entries),
        "total_size":    total,
        "stored_size":   stored,
        "ratio":         round(stored / total, 4) if total else 0.0,
        "space_saved_pct": round((1 - stored / total) * 100, 1) if total else 0.0,
        "archive_size":  Path(archive).stat().st_size if isinstance(archive, (str, Path)) else None,
    }


# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────

if __name__ == "__main__":
    import tempfile

    print("=== zipfile demo ===")

    files = {
        "README.txt":       "Hello, zip world!\n" * 50,
        "data/records.json": json.dumps([{"id": i, "name": f"User{i}"} for i in range(100)]),
        "src/main.py":      "def main():\n    print('hello')\n\n" * 30,
        "config.ini":       "[app]\nhost=localhost\nport=8080\n" * 5,
    }

    with tempfile.TemporaryDirectory() as td:
        zip_path = os.path.join(td, "bundle.zip")

        print("\n--- create_zip ---")
        create_zip(zip_path, files)
        stats = zip_stats(zip_path)
        print(f"  {stats['file_count']} files, {stats['total_size']:,} original → {stats['stored_size']:,} stored")
        print(f"  ratio={stats['ratio']:.3f}  saved={stats['space_saved_pct']:.1f}%")

        print("\n--- zip_names ---")
        names = zip_names(zip_path)
        print(f"  names: {names}")

        print("\n--- zip_info ---")
        for entry in zip_info(zip_path):
            print(f"  {entry.name:35s} {entry.size:>8,} → {entry.compress_size:>8,} ({entry.ratio:.2f})")

        print("\n--- read_file / read_text ---")
        readme = read_text(zip_path, "README.txt")
        print(f"  README.txt first line: {readme.splitlines()[0]!r}")

        print("\n--- iter_files ---")
        for name, data in iter_files(zip_path, "*.json"):
            records = json.loads(data)
            print(f"  {name}: {len(records)} records")

        print("\n--- add_to_zip ---")
        add_to_zip(zip_path, {"CHANGELOG.txt": "v1.0 — initial release\n"})
        print(f"  after append: {len(zip_names(zip_path))} files")

        print("\n--- extract_all ---")
        extract_dir = os.path.join(td, "extracted")
        paths = extract_all(zip_path, extract_dir)
        print(f"  extracted {len(paths)} files to {extract_dir}")

        print("\n--- safe_extract ---")
        try:
            # Build a zip with a path-traversal member (simulate malicious zip)
            evil_zip = os.path.join(td, "evil.zip")
            buf = io.BytesIO()
            with zipfile.ZipFile(buf, "w") as zf:
                zf.writestr("../../../etc/passwd", "root:x:0:0:root:/root:/bin/bash")
            Path(evil_zip).write_bytes(buf.getvalue())
            safe_extract(evil_zip, os.path.join(td, "safe"))
        except ValueError as e:
            print(f"  traversal blocked: {e}")

        print("\n--- in-memory zip ---")
        zip_bytes = build_zip_bytes({"hello.txt": "Hello!", "data.json": '{"x": 1}'})
        print(f"  in-memory zip: {len(zip_bytes):,} bytes")
        recs = [{"id": i, "val": "x" * 40} for i in range(50)]
        json_zip = zip_json_records(recs, "records.json")
        print(f"  json_records zip: {len(json_zip):,} bytes for {len(recs)} records")

        print("\n--- validation ---")
        ok, bad = test_zip(zip_path)
        print(f"  test_zip: ok={ok}  bad={bad}")
        print(f"  is_valid: {is_valid_zip(zip_path)}")

        print("\n--- zip_directory ---")
        src_dir = os.path.join(td, "src")
        os.makedirs(src_dir)
        for i in range(5):
            Path(os.path.join(src_dir, f"file{i}.txt")).write_text(f"content {i}\n" * 100)
        dir_zip = os.path.join(td, "src.zip")
        n = zip_directory(src_dir, dir_zip, base="src/")
        print(f"  zipped {n} files from directory")
        print(f"  {zip_stats(dir_zip)}")

    print("\n=== done ===")

For the shutil.make_archive alternative — shutil.make_archive(base_name, "zip", root_dir, base_dir) creates zip archives with a single function call but offers no control over individual file metadata, compression levels, or incremental building; zipfile.ZipFile gives full control over each member’s name, content, compression, timestamp, and comment — use shutil.make_archive for quick one-liner directory bundling in build scripts; use zipfile when you need custom arcnames, in-memory zip creation, mixed file/string content, or fine-grained compression settings. For the tarfile alternative — tarfile handles .tar, .tar.gz, .tar.bz2, and .tar.xz archives, preserves Unix permissions (mode, uid, gid), symlinks, and hardlinks; ZIP archives do not preserve POSIX metadata but are natively supported on Windows and by all browsers for downloads — use tarfile for Linux/macOS deployment artifacts, Docker builds, and cases where file permissions must round-trip; use zipfile for downloads, cross-platform distribution, Python wheel files (.whl), and Office Open XML formats (.docx, .xlsx). The Claude Skills 360 bundle includes zipfile skill sets covering create_zip()/zip_directory()/add_to_zip() archive creation, zip_names()/zip_info()/read_file()/read_text()/iter_files() reading, extract_all()/safe_extract() path-traversal-safe extraction, build_zip_bytes()/zip_json_records() in-memory zip, and is_valid_zip()/test_zip()/zip_stats() validation. Start with the free tier to try archive manipulation patterns and zipfile pipeline code generation.

Keep Reading

Claude Code for email.contentmanager: Python Email Content Accessors

Read and write EmailMessage body content with Python's email.contentmanager module and Claude Code — email contentmanager ContentManager for the class that maps content types to get and set handler functions allowing EmailMessage to support get_content and set_content with type-specific behaviour, email contentmanager raw_data_manager for the ContentManager instance that handles raw bytes and str payloads without any conversion, email contentmanager content_manager for the standard ContentManager instance used by email.policy.default that intelligently handles text plain text html multipart and binary content types, email contentmanager get_content_text for the handler that returns the decoded text payload of a text-star message part as a str, email contentmanager get_content_binary for the handler that returns the raw decoded bytes payload of a non-text message part, email contentmanager get_data_manager for the get-handler lookup used by EmailMessage get_content to find the right reader function for the content type, email contentmanager set_content text for the handler that creates and sets a text part correctly choosing charset and transfer encoding, email contentmanager set_content bytes for the handler that creates and sets a binary part with base64 encoding and optional filename Content-Disposition, email contentmanager EmailMessage get_content for the method that reads the message body using the registered content manager handlers, email contentmanager EmailMessage set_content for the method that sets the message body and MIME headers in one call, email contentmanager EmailMessage make_alternative make_mixed make_related for the methods that convert a simple message into a multipart container, email contentmanager EmailMessage add_attachment for the method that attaches a file or bytes to a multipart message, and email contentmanager integration with email.message and email.policy and email.mime and io for building high-level email readers attachment extractors text body accessors HTML readers and policy-aware MIME construction pipelines.

5 min read Feb 12, 2029

Claude Code for email.charset: Python Email Charset Encoding

Control header and body encoding for international email with Python's email.charset module and Claude Code — email charset Charset for the class that wraps a character set name with the encoding rules for header encoding and body encoding describing how to encode text for that charset in email messages, email charset Charset header_encoding for the attribute specifying whether headers using this charset should use QP quoted-printable encoding BASE64 encoding or no encoding, email charset Charset body_encoding for the attribute specifying the Content-Transfer-Encoding to use for message bodies in this charset such as QP or BASE64, email charset Charset output_codec for the attribute giving the Python codec name used to encode the string to bytes for the wire format, email charset Charset input_codec for the attribute giving the Python codec name used to decode incoming bytes to str, email charset Charset get_output_charset for returning the output charset name, email charset Charset header_encode for encoding a header string using the charset's header_encoding method, email charset Charset body_encode for encoding body content using the charset's body_encoding, email charset Charset convert for converting a string from the input_codec to the output_codec, email charset add_charset for registering a new charset with custom encoding rules in the global charset registry, email charset add_alias for adding an alias name that maps to an existing registered charset, email charset add_codec for registering a codec name mapping for use by the charset machinery, and email charset integration with email.message and email.mime and email.policy and email.encoders for building international email senders non-ASCII header encoders Content-Transfer-Encoding selectors charset-aware message constructors and MIME encoding pipelines.

5 min read Feb 11, 2029

Claude Code for email.utils: Python Email Address and Header Utilities

Parse and format RFC 2822 email addresses and dates with Python's email.utils module and Claude Code — email utils parseaddr for splitting a display-name plus angle-bracket address string into a realname and email address tuple, email utils formataddr for combining a realname and address string into a properly quoted RFC 2822 address with angle brackets, email utils getaddresses for parsing a list of raw address header strings each potentially containing multiple comma-separated addresses into a list of realname address tuples, email utils parsedate for parsing an RFC 2822 date string into a nine-tuple compatible with time.mktime, email utils parsedate_tz for parsing an RFC 2822 date string into a ten-tuple that includes the UTC offset timezone in seconds, email utils parsedate_to_datetime for parsing an RFC 2822 date string into an aware datetime object with timezone, email utils formatdate for formatting a POSIX timestamp or the current time as an RFC 2822 date string with optional usegmt and localtime flags, email utils format_datetime for formatting a datetime object as an RFC 2822 date string, email utils make_msgid for generating a globally unique Message-ID string with optional idstring and domain components, email utils decode_rfc2231 for decoding an RFC 2231 encoded parameter value into a tuple of charset language and value, email utils encode_rfc2231 for encoding a string as an RFC 2231 encoded parameter value, email utils collapse_rfc2231_value for collapsing a decoded RFC 2231 tuple to a Unicode string, and email utils integration with email.message and email.headerregistry and datetime and time for building address parsers date formatters message-id generators header extractors and RFC-compliant email construction utilities.

5 min read Feb 10, 2029

Put these ideas into practice

Claude Skills 360 gives you production-ready skills for everything in this article — and 2,350+ more. Start free or go all-in.

Get 360 skills free

Free $39