Python’s cgi module parses HTML form submissions, URL-encoded query strings, and multipart file uploads as delivered to CGI scripts. import cgi. Main entry point: form = cgi.FieldStorage() — reads from stdin (POST body) and os.environ["QUERY_STRING"] (GET params) automatically when running as a CGI script. Access fields: form.getvalue("name") → str or None; form.getvalue("name", default) → str; form.getlist("name") → list of str (multi-value fields); form["name"].value → str; form["name"].filename → uploaded filename or None; form["name"].file → file-like object of upload data. Test mode: pass fp and environ explicitly to FieldStorage(fp=..., environ={...}, keep_blank_values=True). Headers: cgi.parse_header("text/html; charset=utf-8") → ("text/html", {"charset": "utf-8"}). Low-level: cgi.parse_multipart(fp, pdict). HTML escape: cgi.escape(s, quote=False) — now deprecated; use html.escape(). Debug: cgi.print_environ(), cgi.print_form(form). Note: deprecated 3.11, removed 3.13 — use urllib.parse.parse_qs + email.contentmanager for new code; include compatibility guard. Claude Code generates form parsers, file upload handlers, CGI test harnesses, and query string decoders.
CLAUDE.md for cgi
## cgi Stack
- Stdlib: import cgi (deprecated 3.11, removed 3.13 — guard with try/except)
- Parse: form = cgi.FieldStorage() # reads environ + stdin
- val = form.getvalue("username") # str or None
- files = form.getlist("attachment") # list for multi-value
- upload = form["file"]; name = upload.filename; data = upload.file.read()
- Header: cgi.parse_header("multipart/form-data; boundary=X")
- Modern: use urllib.parse.parse_qs(query_string) for query strings
cgi Form Data Pipeline
# app/cgiutil.py — parse form, handle uploads, test harness, header parsing
from __future__ import annotations
import html
import io
import os
import urllib.parse
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
# Guard for Python 3.13+ where cgi is removed
try:
import cgi as _cgi
_CGI_AVAILABLE = True
except ImportError:
_CGI_AVAILABLE = False
# ─────────────────────────────────────────────────────────────────────────────
# 1. Header parsing (works without cgi module)
# ─────────────────────────────────────────────────────────────────────────────
def parse_header(header_value: str) -> tuple[str, dict[str, str]]:
"""
Parse a Content-Type or Content-Disposition header into (value, params).
Uses cgi.parse_header if available, else a pure-Python implementation.
Example:
ct, params = parse_header("multipart/form-data; boundary=---X")
ct, params = parse_header("attachment; filename=report.pdf")
"""
if _CGI_AVAILABLE:
return _cgi.parse_header(header_value)
# Pure Python fallback
parts = [p.strip() for p in header_value.split(";")]
value = parts[0]
params: dict[str, str] = {}
for part in parts[1:]:
if "=" in part:
k, _, v = part.partition("=")
params[k.strip().lower()] = v.strip().strip('"')
return value, params
# ─────────────────────────────────────────────────────────────────────────────
# 2. FieldStorage wrapper
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class FormField:
name: str
value: str | None = None
filename: str | None = None
data: bytes | None = None # upload file bytes (if file upload)
content_type: str = "text/plain"
@property
def is_upload(self) -> bool:
return self.filename is not None
def __str__(self) -> str:
if self.is_upload:
size = len(self.data) if self.data else 0
return f"{self.name!r}=<upload {self.filename!r} {size}B>"
return f"{self.name!r}={self.value!r}"
@dataclass
class ParsedForm:
fields: dict[str, list[FormField]] # name → list of fields
def get(self, name: str, default: str | None = None) -> str | None:
"""Return the first string value for a field, or default."""
items = self.fields.get(name, [])
if not items or items[0].is_upload:
return default
return items[0].value
def getlist(self, name: str) -> list[str]:
"""Return all string values for a field (multi-select)."""
return [f.value for f in self.fields.get(name, [])
if not f.is_upload and f.value is not None]
def get_upload(self, name: str) -> FormField | None:
"""Return the first upload field with the given name, or None."""
for f in self.fields.get(name, []):
if f.is_upload:
return f
return None
def all_uploads(self) -> list[FormField]:
"""Return all file-upload fields."""
return [f for items in self.fields.values()
for f in items if f.is_upload]
def __str__(self) -> str:
parts = []
for name, items in self.fields.items():
for f in items:
parts.append(str(f))
return "ParsedForm(" + ", ".join(parts) + ")"
def parse_form(
fp: io.IOBase | None = None,
environ: dict[str, str] | None = None,
keep_blank: bool = False,
) -> ParsedForm:
"""
Parse HTML form data from fp (POST body) and/or environ (QUERY_STRING).
Uses cgi.FieldStorage if available, else parses URL-encoded data manually.
Falls back gracefully for multipart/upload data when cgi is unavailable.
Example:
# Simulate a POST request
body = b"username=alice&age=30"
env = {
"REQUEST_METHOD": "POST",
"CONTENT_TYPE": "application/x-www-form-urlencoded",
"CONTENT_LENGTH": str(len(body)),
}
form = parse_form(io.BytesIO(body), env)
print(form.get("username"))
"""
if _CGI_AVAILABLE and fp is not None:
fs = _cgi.FieldStorage(
fp=fp,
environ=environ or os.environ,
keep_blank_values=keep_blank,
)
result: dict[str, list[FormField]] = {}
for key in fs.keys():
items = fs[key]
if not isinstance(items, list):
items = [items]
fields: list[FormField] = []
for item in items:
if item.filename:
data = item.file.read() if item.file else b""
fields.append(FormField(
name=item.name,
filename=item.filename,
data=data,
content_type=item.type or "application/octet-stream",
))
else:
fields.append(FormField(
name=item.name,
value=item.value,
))
result[key] = fields
return ParsedForm(fields=result)
# Pure Python fallback: URL-encoded only (no multipart)
if fp is not None:
body = fp.read()
if isinstance(body, str):
body = body.encode()
else:
body = b""
env = environ or os.environ
qs = env.get("QUERY_STRING", "")
if body:
qs = (qs + "&" + body.decode("utf-8", errors="replace")).lstrip("&")
parsed = urllib.parse.parse_qs(qs, keep_blank_values=keep_blank)
result: dict[str, list[FormField]] = {}
for key, vals in parsed.items():
result[key] = [FormField(name=key, value=v) for v in vals]
return ParsedForm(fields=result)
# ─────────────────────────────────────────────────────────────────────────────
# 3. CGI test harness
# ─────────────────────────────────────────────────────────────────────────────
def make_get_environ(query_string: str) -> dict[str, str]:
"""
Build an environ dict for a simulated GET request.
Example:
env = make_get_environ("name=alice&role=admin")
"""
return {
"REQUEST_METHOD": "GET",
"QUERY_STRING": query_string,
"CONTENT_TYPE": "",
"CONTENT_LENGTH": "0",
"SERVER_NAME": "localhost",
"SERVER_PORT": "8080",
"HTTP_HOST": "localhost:8080",
}
def make_post_environ(
body: bytes,
content_type: str = "application/x-www-form-urlencoded",
) -> dict[str, str]:
"""
Build an environ dict for a simulated POST request.
Example:
env = make_post_environ(b"name=alice&age=30")
"""
return {
"REQUEST_METHOD": "POST",
"QUERY_STRING": "",
"CONTENT_TYPE": content_type,
"CONTENT_LENGTH": str(len(body)),
"SERVER_NAME": "localhost",
"SERVER_PORT": "8080",
"HTTP_HOST": "localhost:8080",
}
def build_multipart_body(
fields: dict[str, str | tuple[str, bytes, str]],
boundary: str = "----FormBoundary7MA4YWxkTrZu0gW",
) -> tuple[bytes, str]:
"""
Build a multipart/form-data body.
fields values: str for text fields, (filename, data, mime_type) for uploads.
Returns (body_bytes, content_type_header_value).
Example:
body, ct = build_multipart_body({
"username": "alice",
"avatar": ("photo.png", png_bytes, "image/png"),
})
"""
lines = []
sep = f"--{boundary}".encode()
for name, value in fields.items():
lines.append(sep)
if isinstance(value, str):
lines.append(
f'Content-Disposition: form-data; name="{name}"'.encode()
)
lines.append(b"")
lines.append(value.encode())
else:
filename, data, mime = value
lines.append(
f'Content-Disposition: form-data; name="{name}"; '
f'filename="{filename}"'.encode()
)
lines.append(f"Content-Type: {mime}".encode())
lines.append(b"")
lines.append(data)
lines.append(f"--{boundary}--".encode())
body = b"\r\n".join(lines) + b"\r\n"
content_type = f"multipart/form-data; boundary={boundary}"
return body, content_type
# ─────────────────────────────────────────────────────────────────────────────
# 4. Upload handler
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class UploadResult:
field_name: str
original_name: str
saved_path: Path
size_bytes: int
content_type: str
def __str__(self) -> str:
return (f"{self.field_name}: {self.original_name!r} "
f"→ {self.saved_path} ({self.size_bytes}B {self.content_type})")
def save_uploads(
form: ParsedForm,
upload_dir: str | Path,
allowed_types: list[str] | None = None,
max_size: int = 10 * 1024 * 1024,
) -> list[UploadResult]:
"""
Save all uploaded files from a parsed form to upload_dir.
allowed_types: list of allowed MIME type strings; None = no check.
max_size: maximum upload size in bytes.
Example:
form = parse_form(fp, env)
results = save_uploads(form, "/tmp/uploads", allowed_types=["image/png"])
for r in results:
print(r)
"""
upload_dir = Path(upload_dir)
upload_dir.mkdir(parents=True, exist_ok=True)
results = []
for upload in form.all_uploads():
if not upload.data:
continue
if max_size and len(upload.data) > max_size:
raise ValueError(
f"Upload {upload.filename!r} exceeds max size {max_size} bytes"
)
if allowed_types and upload.content_type not in allowed_types:
raise ValueError(
f"Upload content type {upload.content_type!r} not allowed"
)
# Sanitize filename
safe_name = Path(upload.filename or "upload").name
save_path = upload_dir / safe_name
save_path.write_bytes(upload.data)
results.append(UploadResult(
field_name=upload.name,
original_name=upload.filename or "",
saved_path=save_path,
size_bytes=len(upload.data),
content_type=upload.content_type,
))
return results
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
import tempfile
print("=== cgi demo ===")
if not _CGI_AVAILABLE:
print(" cgi not available (Python 3.13+); using pure-Python fallbacks")
# ── parse GET query string ─────────────────────────────────────────────────
print("\n--- GET query string ---")
qs = "username=alice&role=admin&tag=python&tag=stdlib"
env = make_get_environ(qs)
form = parse_form(io.BytesIO(b""), env)
print(f" username: {form.get('username')!r}")
print(f" role: {form.get('role')!r}")
print(f" tags: {form.getlist('tag')!r}")
# ── parse POST URL-encoded ────────────────────────────────────────────────
print("\n--- POST application/x-www-form-urlencoded ---")
body = b"name=Bob&age=35&lang=Python&lang=Go"
env = make_post_environ(body)
form = parse_form(io.BytesIO(body), env)
print(f" name: {form.get('name')!r}")
print(f" age: {form.get('age')!r}")
print(f" lang: {form.getlist('lang')!r}")
# ── parse multipart with file upload ──────────────────────────────────────
print("\n--- POST multipart/form-data with upload ---")
png_stub = b"\x89PNG\r\n\x1a\n" + b"\x00" * 20 # fake PNG header
body, ct = build_multipart_body({
"username": "alice",
"avatar": ("avatar.png", png_stub, "image/png"),
})
env = make_post_environ(body, content_type=ct)
form = parse_form(io.BytesIO(body), env)
print(f" username: {form.get('username')!r}")
upload = form.get_upload("avatar")
if upload:
print(f" upload: {upload.filename!r} "
f"{len(upload.data or b'')}B {upload.content_type}")
# ── save_uploads ───────────────────────────────────────────────────────────
print("\n--- save_uploads ---")
with tempfile.TemporaryDirectory() as tmp:
results = save_uploads(form, tmp, allowed_types=["image/png"])
for r in results:
print(f" {r}")
print(f" saved content: {r.saved_path.read_bytes()[:8].hex()}")
# ── parse_header ──────────────────────────────────────────────────────────
print("\n--- parse_header ---")
for hdr in [
"multipart/form-data; boundary=----FormBoundary7MA4YWxkTrZu0gW",
"attachment; filename=report.pdf",
"text/html; charset=utf-8",
]:
value, params = parse_header(hdr)
print(f" {hdr!r}")
print(f" value={value!r} params={params}")
# ── html.escape reminder ──────────────────────────────────────────────────
print("\n--- html.escape (cgi.escape replacement) ---")
user_input = '<script>alert("xss")</script>'
safe = html.escape(user_input, quote=True)
print(f" input: {user_input!r}")
print(f" safe: {safe!r}")
print("\n=== done ===")
For the Flask / FastAPI (PyPI) alternatives — modern Python web frameworks handle form parsing, file uploads, and query string decoding automatically via request.form["name"], request.files["avatar"].read(), and request.args.get("q"), with WSGI / ASGI integration, middleware, type validation, and production-ready MIME handling — use Flask or FastAPI for any new web application; use cgi only when maintaining a legacy CGI script deployed under Apache/nginx mod_cgi or the http.server CGI handler, keeping in mind that cgi is removed in Python 3.13 and the parse_form() / build_multipart_body() fallbacks above provide a migration path. For the urllib.parse alternative — urllib.parse.parse_qs(query_string) and urllib.parse.parse_qsl(query_string) parse URL-encoded data into a dict or list of tuples and remain fully supported in Python 3.13+ — use urllib.parse.parse_qs for all new query-string parsing; use cgi.FieldStorage (or the fallback parse_form()) only when you also need multipart form data from a raw HTTP body without a framework. The Claude Skills 360 bundle includes cgi skill sets covering parse_header() with pure-Python fallback, FormField/ParsedForm with parse_form()/get()/getlist()/get_upload()/all_uploads(), make_get_environ()/make_post_environ()/build_multipart_body() test harness helpers, and UploadResult with save_uploads() file persister. Start with the free tier to try form parsing patterns and cgi pipeline code generation.