Python’s gzip module reads and writes gzip-format compressed files. import gzip. open: gzip.open(filename, mode="rb", compresslevel=9, encoding=None, errors=None, newline=None) — accepts paths or file-like objects; modes: "rb", "wb", "ab", "xb", "rt", "wt", "at", "xt". compress: gzip.compress(data, compresslevel=9, mtime=None) → compressed bytes; mtime=0 for deterministic output. decompress: gzip.decompress(data) → original bytes. GzipFile: gzip.GzipFile(filename, mode, compresslevel, fileobj, mtime) — low-level access; .write(b), .read(n), .peek(n), .close(). BadGzipFile: raised on invalid magic or corrupt gzip header (gzip.BadGzipFile). compresslevel: 0–9; gzip.BEST_COMPRESSION = 9, gzip.BEST_SPEED = 1. mtime: timestamp embedded in header; set to 0 for reproducible builds. Concatenated streams: gzip supports multiple members in one file — decompress reads all. Reading line-by-line: with gzip.open("f.gz", "rt") as f: for line in f: — streams decompression. Writing incrementally: open in "wb" mode, call .write() in chunks. Claude Code generates log compressors, gzip file readers, streaming ETL pipelines, and deterministic artifact builders.
CLAUDE.md for gzip
## gzip Stack
- Stdlib: import gzip
- One-shot: gzip.compress(data, compresslevel=6, mtime=0) # deterministic
- File write: with gzip.open("f.gz", "wb") as f: f.write(data)
- File read: with gzip.open("f.gz", "rb") as f: data = f.read()
- Text: with gzip.open("f.gz", "rt", encoding="utf-8") as f:
- Stream: pass fileobj=io.BytesIO() for in-memory gzip
gzip File Compression Pipeline
# app/gzutil.py — one-shot, file IO, streaming, text, concat, deterministic
from __future__ import annotations
import gzip
import io
import json
import os
from pathlib import Path
from typing import Any, Iterator
# ─────────────────────────────────────────────────────────────────────────────
# 1. One-shot helpers
# ─────────────────────────────────────────────────────────────────────────────
def gz_compress(data: bytes, level: int = 6) -> bytes:
"""
Compress bytes to gzip format.
Example:
payload = gz_compress(json_bytes, level=6)
headers["Content-Encoding"] = "gzip"
"""
return gzip.compress(data, compresslevel=level)
def gz_decompress(data: bytes) -> bytes:
"""
Decompress gzip bytes.
Example:
body = gz_decompress(response_bytes)
"""
return gzip.decompress(data)
def gz_compress_deterministic(data: bytes, level: int = 6) -> bytes:
"""
Compress bytes to gzip with mtime=0 for bit-for-bit reproducible output.
Same input always produces identical compressed bytes regardless of time.
Example:
artifact = gz_compress_deterministic(wheels_bytes)
sha256(artifact) # stable across builds
"""
return gzip.compress(data, compresslevel=level, mtime=0)
def gz_text(text: str, encoding: str = "utf-8", level: int = 6) -> bytes:
"""
Compress a text string to gzip bytes.
Example:
gz_bytes = gz_text(html_content)
"""
return gzip.compress(text.encode(encoding), compresslevel=level, mtime=0)
def gz_decode_text(data: bytes, encoding: str = "utf-8") -> str:
"""
Decompress gzip bytes and decode to str.
Example:
html = gz_decode_text(cached_gz)
"""
return gzip.decompress(data).decode(encoding)
def gz_ratio(original: bytes, compressed: bytes) -> float:
"""Return compressed/original ratio (lower = better)."""
return len(compressed) / len(original) if original else 1.0
# ─────────────────────────────────────────────────────────────────────────────
# 2. File I/O
# ─────────────────────────────────────────────────────────────────────────────
def write_gz(path: str | Path, data: bytes, level: int = 6) -> int:
"""
Write bytes to a .gz file. Returns compressed size.
Example:
n = write_gz("output.json.gz", json_bytes)
print(f"wrote {n:,} bytes compressed")
"""
compressed = gzip.compress(data, compresslevel=level, mtime=0)
Path(path).write_bytes(compressed)
return len(compressed)
def read_gz(path: str | Path) -> bytes:
"""
Read and decompress a .gz file, returning original bytes.
Example:
data = read_gz("records.json.gz")
"""
return gzip.decompress(Path(path).read_bytes())
def write_gz_text(path: str | Path, text: str, encoding: str = "utf-8", level: int = 6) -> None:
"""
Write a text string to a .gz file in text mode.
Example:
write_gz_text("report.txt.gz", report_content)
"""
with gzip.open(path, "wt", compresslevel=level, encoding=encoding) as f:
f.write(text)
def read_gz_text(path: str | Path, encoding: str = "utf-8") -> str:
"""
Read a .gz text file and return the decompressed string.
Example:
report = read_gz_text("report.txt.gz")
"""
with gzip.open(path, "rt", encoding=encoding) as f:
return f.read()
def gz_lines(path: str | Path, encoding: str = "utf-8") -> Iterator[str]:
"""
Iterate over lines of a gzip-compressed text file without loading all into memory.
Example:
for line in gz_lines("events.log.gz"):
process(json.loads(line))
"""
with gzip.open(path, "rt", encoding=encoding) as f:
for line in f:
yield line.rstrip("\n")
def compress_file(src: str | Path, dst: str | Path | None = None, level: int = 6) -> Path:
"""
Compress src to dst (defaults to src + ".gz"). Returns destination path.
Example:
out = compress_file("data.json")
print(out) # "data.json.gz"
"""
src = Path(src)
dst = Path(dst) if dst else src.with_suffix(src.suffix + ".gz")
data = src.read_bytes()
write_gz(dst, data, level=level)
return dst
def decompress_file(src: str | Path, dst: str | Path | None = None) -> Path:
"""
Decompress src.gz to dst (defaults to src without .gz suffix).
Example:
out = decompress_file("data.json.gz")
print(out) # "data.json"
"""
src = Path(src)
if dst is None:
stem = src.stem # removes the last extension (.gz)
dst = src.parent / stem
dst = Path(dst)
dst.write_bytes(read_gz(src))
return dst
# ─────────────────────────────────────────────────────────────────────────────
# 3. Streaming and in-memory
# ─────────────────────────────────────────────────────────────────────────────
def compress_stream_to_bytes(chunks: Iterator[bytes], level: int = 6) -> bytes:
"""
Compress a stream of byte chunks into a single gzip bytes object.
Example:
gz = compress_stream_to_bytes(read_chunks("big.log"))
"""
buf = io.BytesIO()
with gzip.GzipFile(fileobj=buf, mode="wb", compresslevel=level, mtime=0) as gz:
for chunk in chunks:
gz.write(chunk)
return buf.getvalue()
def decompress_stream(data: bytes, chunk_size: int = 65536) -> Iterator[bytes]:
"""
Decompress gzip bytes in chunks, yielding decompressed data incrementally.
Example:
for chunk in decompress_stream(response_body):
write_to_disk(chunk)
"""
buf = io.BytesIO(data)
with gzip.GzipFile(fileobj=buf, mode="rb") as gz:
while True:
chunk = gz.read(chunk_size)
if not chunk:
break
yield chunk
def gz_to_buffer(data: bytes, level: int = 6) -> io.BytesIO:
"""
Compress data and return an in-memory BytesIO buffer (rewound).
Example:
buf = gz_to_buffer(html_bytes)
upload(buf.read())
"""
buf = io.BytesIO()
buf.write(gzip.compress(data, compresslevel=level, mtime=0))
buf.seek(0)
return buf
# ─────────────────────────────────────────────────────────────────────────────
# 4. JSON and JSONL helpers
# ─────────────────────────────────────────────────────────────────────────────
def write_json_gz(path: str | Path, obj: Any, level: int = 6, indent: int | None = None) -> None:
"""
Serialize obj to JSON and write to a .json.gz file.
Example:
write_json_gz("results.json.gz", results_list)
"""
raw = json.dumps(obj, indent=indent, ensure_ascii=False).encode("utf-8")
write_gz(path, raw, level=level)
def read_json_gz(path: str | Path) -> Any:
"""
Read and parse a .json.gz file.
Example:
results = read_json_gz("results.json.gz")
"""
return json.loads(read_gz(path).decode("utf-8"))
def write_jsonl_gz(path: str | Path, records: Iterator[Any], level: int = 6) -> int:
"""
Write records as JSON Lines to a .jsonl.gz file. Returns record count.
Example:
n = write_jsonl_gz("events.jsonl.gz", (r.to_dict() for r in events))
"""
count = 0
with gzip.open(path, "wt", compresslevel=level, encoding="utf-8") as f:
for record in records:
f.write(json.dumps(record, ensure_ascii=False) + "\n")
count += 1
return count
def read_jsonl_gz(path: str | Path) -> Iterator[Any]:
"""
Read a .jsonl.gz file, yielding parsed JSON objects line by line.
Example:
for event in read_jsonl_gz("events.jsonl.gz"):
process(event)
"""
with gzip.open(path, "rt", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line:
yield json.loads(line)
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import tempfile
print("=== gzip demo ===")
sample_text = ("Hello gzip world! " * 500).encode("utf-8")
print(f"\nOriginal: {len(sample_text):,} bytes")
print("\n--- one-shot ---")
for level in [1, 6, 9]:
c = gz_compress(sample_text, level=level)
rt = gz_decompress(c)
print(f" level={level}: {len(c):,} bytes ratio={gz_ratio(sample_text, c):.3f} ok={rt==sample_text}")
print("\n--- deterministic ---")
a = gz_compress_deterministic(sample_text)
b = gz_compress_deterministic(sample_text)
print(f" two calls identical: {a == b}")
print("\n--- text helpers ---")
msg = "Héllo wörld! " * 100
gz_bytes = gz_text(msg)
decoded = gz_decode_text(gz_bytes)
print(f" text roundtrip ok: {decoded == msg}")
with tempfile.TemporaryDirectory() as td:
print("\n--- file IO ---")
json_path = os.path.join(td, "data.json")
gz_path = os.path.join(td, "data.json.gz")
Path(json_path).write_text("Hello world " * 200)
n = write_gz(gz_path, Path(json_path).read_bytes())
print(f" wrote {n:,} bytes to {os.path.basename(gz_path)}")
restored = read_gz(gz_path)
print(f" roundtrip ok: {restored == Path(json_path).read_bytes()}")
text_gz = os.path.join(td, "report.txt.gz")
write_gz_text(text_gz, "Line 1\nLine 2\nLine 3\n")
lines = list(gz_lines(text_gz))
print(f" gz_lines: {lines}")
print("\n--- compress/decompress file ---")
out_gz = compress_file(json_path)
print(f" compressed to {out_gz.name}")
out_json = decompress_file(out_gz, os.path.join(td, "data2.json"))
print(f" decompressed to {out_json.name}, size={out_json.stat().st_size:,}")
print("\n--- JSON and JSONL ---")
records = [{"id": i, "value": f"Record {i}", "data": "x" * 50} for i in range(100)]
jsonl_gz = os.path.join(td, "records.jsonl.gz")
n = write_jsonl_gz(jsonl_gz, iter(records))
print(f" wrote {n} JSONL records to {os.path.basename(jsonl_gz)}")
size = os.path.getsize(jsonl_gz)
print(f" compressed size: {size:,} bytes")
loaded = list(read_jsonl_gz(jsonl_gz))
print(f" read back {len(loaded)} records, match: {loaded == records}")
json_gz = os.path.join(td, "data.json.gz")
write_json_gz(json_gz, {"items": records[:5]})
result = read_json_gz(json_gz)
print(f" json.gz roundtrip: {len(result['items'])} items ok")
print("\n--- streaming ---")
chunk_size = 1024
chunks = [sample_text[i:i+chunk_size] for i in range(0, len(sample_text), chunk_size)]
gz_bytes2 = compress_stream_to_bytes(iter(chunks))
decompressed = b"".join(decompress_stream(gz_bytes2))
print(f" stream compress: {len(gz_bytes2):,} bytes roundtrip: {decompressed == sample_text}")
print("\n=== done ===")
For the zlib alternative — zlib.compress/zlib.decompress operate at the deflate protocol level and support raw deflate (no headers), zlib format, and gzip format via the wbits parameter; gzip wraps zlib to produce proper gzip files with the standard two-byte magic, OS field, and optional filename/mtime metadata — use gzip when producing files that standard tools (gunzip, tar -z, browsers via Content-Encoding: gzip) will read; use zlib directly for custom binary framing, HTTP Transfer-Encoding: deflate, or when you need raw deflate without any file-format overhead. For the lzma / py7zr alternative — lzma.compress(data, preset=6) achieves 20–40% better compression than gzip on typical text/JSON at 10–30× the CPU cost; xz files use LZMA2 and are the standard high-ratio Linux archive format; py7zr (PyPI) handles 7z archives with LZMA support — use LZMA/XZ for distributing large datasets and build artifacts where download bandwidth matters more than compression time; use gzip for network transfer, logging pipelines, and any workload where real-time or near-real-time (de)compression throughput matters. The Claude Skills 360 bundle includes gzip skill sets covering gz_compress()/gz_decompress()/gz_compress_deterministic()/gz_text()/gz_ratio() one-shot helpers, write_gz()/read_gz()/write_gz_text()/read_gz_text()/gz_lines()/compress_file()/decompress_file() file I/O, compress_stream_to_bytes()/decompress_stream()/gz_to_buffer() streaming helpers, and write_json_gz()/read_json_gz()/write_jsonl_gz()/read_jsonl_gz() JSON/JSONL integration. Start with the free tier to try gzip file compression patterns and gzip pipeline code generation.