pikepdf reads and writes PDF files via libqpdf. pip install pikepdf. Open: import pikepdf; pdf = pikepdf.open("in.pdf"). Save: pdf.save("out.pdf"). Bytes: import io; buf = io.BytesIO(); pdf.save(buf); buf.getvalue(). Page count: len(pdf.pages). Get page: page = pdf.pages[0]. Delete page: del pdf.pages[2]. Reorder: pdf.pages[0], pdf.pages[1] = pdf.pages[1], pdf.pages[0]. Merge: new = pikepdf.Pdf.new(); new.pages.extend(p.pages); .... Split: for i, page in enumerate(pdf.pages): out = pikepdf.Pdf.new(); out.pages.append(page); out.save(f"{i}.pdf"). Rotate: page.rotate(90, relative=True). Metadata: pdf.docinfo["/Title"] = "My Doc". meta = pdf.open_metadata(). XMP: with pdf.open_metadata() as meta: meta["dc:title"] = "...". Encrypt: pdf.save("enc.pdf", encryption=pikepdf.Encryption(owner="ownerpass", user="userpass", R=6)). Decrypt: pikepdf.open("enc.pdf", password="userpass"). Extract images: from pikepdf import PdfImage; img = PdfImage(page.images["/Im0"]); img.as_pil_image().save("out.png"). Compress: pdf.save("out.pdf", compress_streams=True, recompress_flate=True). Copy pages: dst.pages.append(src.pages[0]). Remove annots: del page["/Annots"]. Claude Code generates pikepdf merge/split utilities, watermark pipelines, and PDF metadata editors.
CLAUDE.md for pikepdf
## pikepdf Stack
- Version: pikepdf >= 8 | pip install pikepdf
- Open: pdf = pikepdf.open("in.pdf") | with pikepdf.open("in.pdf") as pdf:
- Save: pdf.save("out.pdf") | pdf.save(BytesIO()) for in-memory
- Pages: pdf.pages[i] | del pdf.pages[i] | pages.append/extend
- Merge: new = Pdf.new(); new.pages.extend(p.pages for p in pdfs)
- Encrypt: pdf.save(path, encryption=pikepdf.Encryption(owner=pw, R=6))
pikepdf PDF Manipulation Pipeline
# app/pdf_edit.py — pikepdf merge, split, watermark, encrypt, extract, and optimize
from __future__ import annotations
import io
from pathlib import Path
from typing import Any
import pikepdf
from pikepdf import Pdf, PdfImage, Encryption
# ─────────────────────────────────────────────────────────────────────────────
# 1. Open / save helpers
# ─────────────────────────────────────────────────────────────────────────────
def open_pdf(source: str | Path | bytes) -> Pdf:
"""
Open a PDF from a file path or raw bytes.
Returns a pikepdf.Pdf; caller should close() or use as context manager.
"""
if isinstance(source, bytes):
return pikepdf.open(io.BytesIO(source))
return pikepdf.open(str(source))
def save_bytes(pdf: Pdf, compress: bool = True) -> bytes:
"""
Serialize a PDF to bytes.
compress=True: streams are flate-compressed for smaller output.
"""
buf = io.BytesIO()
pdf.save(
buf,
compress_streams=compress,
recompress_flate=compress,
object_stream_mode=pikepdf.ObjectStreamMode.generate if compress else pikepdf.ObjectStreamMode.disable,
)
return buf.getvalue()
def page_count(source: str | Path | bytes) -> int:
"""Return the number of pages without loading the full document."""
with open_pdf(source) as pdf:
return len(pdf.pages)
# ─────────────────────────────────────────────────────────────────────────────
# 2. Merge and split
# ─────────────────────────────────────────────────────────────────────────────
def merge_pdfs(
sources: list[str | Path | bytes],
output_path: str | Path | None = None,
) -> bytes:
"""
Merge multiple PDF files into one document.
Returns the merged PDF as bytes; also writes to output_path if provided.
Example:
pdf_bytes = merge_pdfs(["cover.pdf", "content.pdf", "appendix.pdf"])
"""
merged = Pdf.new()
for source in sources:
with open_pdf(source) as src:
merged.pages.extend(src.pages)
data = save_bytes(merged)
if output_path:
Path(output_path).write_bytes(data)
return data
def split_pdf(
source: str | Path | bytes,
output_dir: str | Path,
prefix: str = "page",
) -> list[Path]:
"""
Split a PDF into one file per page.
Returns list of created file paths.
"""
out = Path(output_dir)
out.mkdir(parents=True, exist_ok=True)
paths = []
with open_pdf(source) as src:
for i, page in enumerate(src.pages):
single = Pdf.new()
single.pages.append(page)
p = out / f"{prefix}_{i + 1:04d}.pdf"
single.save(str(p))
paths.append(p)
return paths
def extract_pages(
source: str | Path | bytes,
page_numbers: list[int],
output_path: str | Path | None = None,
) -> bytes:
"""
Extract specific pages (0-indexed) from a PDF.
Example: extract_pages("doc.pdf", [0, 2, 4]) → pages 1, 3, 5
"""
with open_pdf(source) as src:
out = Pdf.new()
for n in page_numbers:
out.pages.append(src.pages[n])
data = save_bytes(out)
if output_path:
Path(output_path).write_bytes(data)
return data
def rotate_pages(
source: str | Path | bytes,
degrees: int,
page_numbers: list[int] | None = None,
) -> bytes:
"""
Rotate pages by degrees (90, 180, 270).
page_numbers: 0-indexed list; None = all pages.
"""
with open_pdf(source) as pdf:
targets = page_numbers if page_numbers is not None else range(len(pdf.pages))
for i in targets:
pdf.pages[i].rotate(degrees, relative=True)
return save_bytes(pdf)
# ─────────────────────────────────────────────────────────────────────────────
# 3. Metadata
# ─────────────────────────────────────────────────────────────────────────────
def get_metadata(source: str | Path | bytes) -> dict[str, str]:
"""
Return PDF metadata (title, author, subject, creator, producer, dates).
"""
with open_pdf(source) as pdf:
info = {
"title": str(pdf.docinfo.get("/Title", "")),
"author": str(pdf.docinfo.get("/Author", "")),
"subject": str(pdf.docinfo.get("/Subject", "")),
"creator": str(pdf.docinfo.get("/Creator", "")),
"producer": str(pdf.docinfo.get("/Producer", "")),
"page_count": len(pdf.pages),
}
return info
def set_metadata(
source: str | Path | bytes,
title: str = "",
author: str = "",
subject: str = "",
keywords: str = "",
) -> bytes:
"""
Set PDF metadata fields and return the updated PDF bytes.
"""
with open_pdf(source) as pdf:
with pdf.open_metadata() as meta:
if title:
meta["dc:title"] = title
if author:
meta["dc:creator"] = [author]
if subject:
meta["dc:description"] = subject
if keywords:
meta["pdf:Keywords"] = keywords
# Also update legacy docinfo
if title: pdf.docinfo["/Title"] = title
if author: pdf.docinfo["/Author"] = author
if subject: pdf.docinfo["/Subject"] = subject
return save_bytes(pdf)
# ─────────────────────────────────────────────────────────────────────────────
# 4. Encryption
# ─────────────────────────────────────────────────────────────────────────────
def encrypt_pdf(
source: str | Path | bytes,
user_password: str,
owner_password: str | None = None,
allow_printing: bool = True,
allow_copying: bool = False,
) -> bytes:
"""
Encrypt a PDF with AES-256 (R=6).
user_password: required to open the file.
owner_password: required to change permissions (defaults to user_password).
"""
owner = owner_password or user_password
allow = pikepdf.Permissions(
print_lowres=allow_printing,
print_highres=allow_printing,
extract=allow_copying,
modify_other=False,
modify_annotation=False,
modify_form=False,
modify_assembly=False,
accessibility=True,
)
enc = Encryption(owner=owner, user=user_password, R=6, allow=allow)
with open_pdf(source) as pdf:
buf = io.BytesIO()
pdf.save(buf, encryption=enc)
return buf.getvalue()
def decrypt_pdf(source: str | Path | bytes, password: str) -> bytes:
"""Open an encrypted PDF and return an unencrypted copy as bytes."""
pdf = pikepdf.open(
io.BytesIO(source) if isinstance(source, bytes) else str(source),
password=password,
)
return save_bytes(pdf)
# ─────────────────────────────────────────────────────────────────────────────
# 5. Image extraction
# ─────────────────────────────────────────────────────────────────────────────
def extract_images(
source: str | Path | bytes,
output_dir: str | Path,
page_numbers: list[int] | None = None,
) -> list[Path]:
"""
Extract all images from the PDF and save them to output_dir.
Returns list of saved image paths.
Requires: pip install pillow
"""
out = Path(output_dir)
out.mkdir(parents=True, exist_ok=True)
saved: list[Path] = []
with open_pdf(source) as pdf:
pages = (
[pdf.pages[i] for i in page_numbers]
if page_numbers
else list(pdf.pages)
)
img_idx = 0
for page_idx, page in enumerate(pages):
for name, raw in page.images.items():
try:
pdfimg = PdfImage(raw)
pil = pdfimg.as_pil_image()
ext = pil.format.lower() if pil.format else "png"
path = out / f"page{page_idx + 1}_{img_idx:04d}.{ext}"
pil.save(str(path))
saved.append(path)
img_idx += 1
except Exception:
pass # skip unextractable images
return saved
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
from pathlib import Path
# Create a tiny test PDF with two pages using fpdf2
try:
from fpdf import FPDF
def _make_test_pdf(pages: int = 3) -> bytes:
pdf = FPDF()
for i in range(1, pages + 1):
pdf.add_page()
pdf.set_font("Helvetica", "B", 24)
pdf.cell(0, 20, f"Page {i}", align="C")
return pdf.output()
src = _make_test_pdf(4)
Path("/tmp/test_src.pdf").write_bytes(src)
print(f"Test PDF: {len(src):,} bytes, {page_count(src)} pages")
print("\n=== Metadata ===")
info = get_metadata(src)
print(info)
print("\n=== Set metadata ===")
updated = set_metadata(src, title="My Document", author="Alice", subject="Test")
print(f"Updated: {len(updated):,} bytes")
print("\n=== Extract pages [0, 2] ===")
extracted = extract_pages(src, [0, 2])
print(f"Extracted: {page_count(extracted)} pages")
print("\n=== Rotate page 0 by 90° ===")
rotated = rotate_pages(src, 90, [0])
print(f"Rotated: {page_count(rotated)} pages")
print("\n=== Encrypt + decrypt ===")
enc_pdf = encrypt_pdf(src, user_password="secret", allow_printing=True)
print(f"Encrypted: {len(enc_pdf):,} bytes")
dec_pdf = decrypt_pdf(enc_pdf, password="secret")
print(f"Decrypted: {len(dec_pdf):,} bytes, {page_count(dec_pdf)} pages")
except ImportError:
print("fpdf2 not installed — skipping demo. pip install fpdf2 pikepdf")
For the PyPDF2 / pypdf alternative — pypdf (formerly PyPDF2) is a pure-Python PDF library with a similar merge/split API and no native dependencies; pikepdf uses QPDF under the hood giving it superior handling of malformed PDFs, reliable encryption/decryption with AES-256, and image extraction via PdfImage.as_pil_image() — pikepdf is the right choice when correctness and encryption matter. For the pdfplumber / pdfminer alternative — pdfplumber and pdfminer are specialized for text and table extraction from existing PDFs (they parse text layers and detect columns); pikepdf operates at the PDF object/page level for structural operations like merge, split, rotate, watermark, and metadata — use pdfplumber when you need the text content, pikepdf when you need to manipulate the document structure. The Claude Skills 360 bundle includes pikepdf skill sets covering open_pdf()/save_bytes() helpers, merge_pdfs() multi-document combiner, split_pdf() page splitter, extract_pages() page range extractor, rotate_pages(), get_metadata()/set_metadata() with XMP and docinfo, encrypt_pdf() AES-256 with permissions, decrypt_pdf(), and extract_images() Pillow pipeline. Start with the free tier to try PDF manipulation code generation.