Python’s urllib.error module defines the exceptions raised by urllib.request for URL and HTTP failures. from urllib.error import URLError, HTTPError, ContentTooShortError. URLError — base class for all urllib.request errors; e.reason holds an OSError or string (e.g. "[Errno -2] Name or service not known"). HTTPError — raised when the server returns a 4xx or 5xx status; it is a subclass of both URLError and http.client.HTTPResponse; e.code (int), e.reason (str), e.headers (mapping), e.read() (response body bytes). ContentTooShortError — raised by urlretrieve when fewer bytes arrive than Content-Length declares; e.content holds the partial download. Exception hierarchy: HTTPError → URLError → OSError. Catch order: always catch HTTPError before URLError since HTTPError is a subclass. JSON errors: json.loads(e.read().decode()) extracts the API error body from an HTTPError response. Claude Code generates robust HTTP clients, retry logic, error classifiers, JSON error parsers, and HTTP status code handlers.
CLAUDE.md for urllib.error
## urllib.error Stack
- Stdlib: from urllib.error import URLError, HTTPError, ContentTooShortError
- Order: catch HTTPError before URLError (HTTPError is a URLError subclass)
- HTTP: e.code (int), e.reason (str), e.headers, e.read() → body bytes
- Net: e.reason → OSError or string for non-HTTP network failures
- Body: json.loads(e.read().decode()) # parse JSON error response
- Note: HTTPError also behaves like an http.client.HTTPResponse
urllib.error Error Handling Pipeline
# app/urlerrorutil.py — safe fetch, retry, classifier, response parser, error report
from __future__ import annotations
import json
import time
import urllib.request
from dataclasses import dataclass, field
from typing import Any
from urllib.error import ContentTooShortError, HTTPError, URLError
# ─────────────────────────────────────────────────────────────────────────────
# 1. Error classification helpers
# ─────────────────────────────────────────────────────────────────────────────
def is_client_error(code: int) -> bool:
"""Return True for 4xx HTTP status codes."""
return 400 <= code < 500
def is_server_error(code: int) -> bool:
"""Return True for 5xx HTTP status codes."""
return 500 <= code < 600
def is_retryable(exc: Exception) -> bool:
"""
Return True for exceptions that warrant a retry:
network failures, 429 Too Many Requests, and all 5xx server errors.
Example:
retryable = is_retryable(exc)
if retryable:
time.sleep(backoff)
"""
if isinstance(exc, HTTPError):
return exc.code == 429 or is_server_error(exc.code)
if isinstance(exc, URLError):
return True # network / DNS level — always retry
return False
def status_category(code: int) -> str:
"""
Return a human-readable category for an HTTP status code.
Example:
print(status_category(404)) # "Not Found (client error)"
print(status_category(500)) # "Internal Server Error (server error)"
"""
categories = {
200: "OK", 201: "Created", 204: "No Content",
301: "Moved Permanently", 302: "Found", 304: "Not Modified",
400: "Bad Request", 401: "Unauthorized", 403: "Forbidden",
404: "Not Found", 405: "Method Not Allowed", 409: "Conflict",
422: "Unprocessable Entity", 429: "Too Many Requests",
500: "Internal Server Error", 502: "Bad Gateway",
503: "Service Unavailable", 504: "Gateway Timeout",
}
label = categories.get(code, f"HTTP {code}")
if is_client_error(code):
return f"{label} (client error)"
if is_server_error(code):
return f"{label} (server error)"
if 200 <= code < 300:
return f"{label} (success)"
if 300 <= code < 400:
return f"{label} (redirect)"
return label
# ─────────────────────────────────────────────────────────────────────────────
# 2. HTTPError body parsing
# ─────────────────────────────────────────────────────────────────────────────
def read_error_body(exc: HTTPError, max_bytes: int = 4096) -> bytes:
"""
Read and return the response body bytes from an HTTPError.
Returns empty bytes if the body cannot be read.
Example:
body = read_error_body(exc)
print(body.decode(errors="replace"))
"""
try:
return exc.read(max_bytes)
except Exception:
return b""
def parse_json_error(exc: HTTPError) -> "dict[str, Any] | None":
"""
Try to read and parse a JSON error body from an HTTPError.
Returns the parsed dict, or None if the body is not valid JSON.
Example:
payload = parse_json_error(exc)
if payload:
print(payload.get("message"))
"""
content_type = exc.headers.get("Content-Type", "")
if "json" not in content_type and not content_type.startswith("application"):
# Still try — many APIs return JSON without Content-Type: application/json
pass
body = read_error_body(exc)
if not body:
return None
try:
return json.loads(body.decode("utf-8", errors="replace"))
except (json.JSONDecodeError, UnicodeDecodeError):
return None
# ─────────────────────────────────────────────────────────────────────────────
# 3. Structured error result
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class FetchResult:
"""
Result of a safe HTTP fetch.
Example:
result = safe_fetch("https://httpbin.org/get")
if result.ok:
print(result.data)
else:
print(result.error_summary)
"""
url: str
ok: bool
data: bytes = b""
http_code: "int | None" = None
headers: "dict[str, str]" = field(default_factory=dict)
error_type: str = "" # "http", "network", "content_short", "unknown"
error_summary: str = ""
json_error: "dict[str, Any] | None" = None
retryable: bool = False
def json(self) -> Any:
"""Parse data as JSON."""
return json.loads(self.data.decode("utf-8"))
def safe_fetch(
url: str,
headers: "dict[str, str] | None" = None,
timeout: float = 10.0,
) -> FetchResult:
"""
Fetch a URL and return a FetchResult whether or not an error occurs.
Example:
r = safe_fetch("https://api.example.com/data")
if r.ok:
obj = r.json()
else:
print(r.http_code, r.error_summary)
"""
req = urllib.request.Request(url, headers=headers or {})
try:
with urllib.request.urlopen(req, timeout=timeout) as resp:
data = resp.read()
hdrs = dict(resp.headers)
return FetchResult(url=url, ok=True, data=data, http_code=resp.status, headers=hdrs)
except HTTPError as e:
body = read_error_body(e)
json_err = parse_json_error(e)
return FetchResult(
url=url, ok=False,
http_code=e.code,
error_type="http",
error_summary=f"HTTP {e.code} {status_category(e.code)}",
json_error=json_err,
retryable=is_retryable(e),
data=body,
)
except URLError as e:
reason = str(e.reason)
return FetchResult(
url=url, ok=False,
error_type="network",
error_summary=f"Network error: {reason}",
retryable=True,
)
except ContentTooShortError as e:
return FetchResult(
url=url, ok=False,
error_type="content_short",
error_summary="Download truncated (ContentTooShortError)",
data=e.content,
)
except Exception as e:
return FetchResult(
url=url, ok=False,
error_type="unknown",
error_summary=str(e),
)
# ─────────────────────────────────────────────────────────────────────────────
# 4. Retry wrapper
# ─────────────────────────────────────────────────────────────────────────────
def fetch_with_retry(
url: str,
headers: "dict[str, str] | None" = None,
max_retries: int = 3,
backoff: float = 1.0,
timeout: float = 10.0,
) -> FetchResult:
"""
Fetch a URL with exponential backoff retry for retryable errors.
Example:
r = fetch_with_retry("https://api.example.com/data", max_retries=3)
if r.ok:
print(r.json())
"""
last: "FetchResult | None" = None
for attempt in range(max_retries + 1):
result = safe_fetch(url, headers=headers, timeout=timeout)
if result.ok:
return result
last = result
if not result.retryable or attempt == max_retries:
break
sleep_time = backoff * (2 ** attempt)
time.sleep(sleep_time)
return last or FetchResult(url=url, ok=False, error_summary="No attempts made")
# ─────────────────────────────────────────────────────────────────────────────
# 5. Context-manager HTTP caller
# ─────────────────────────────────────────────────────────────────────────────
def http_get(url: str, headers: "dict[str, str] | None" = None, timeout: float = 10.0) -> bytes:
"""
Perform an HTTP GET and return the body bytes, raising on error.
HTTPError bodies are read before re-raising.
Example:
try:
body = http_get("https://api.example.com/v1/users")
except HTTPError as e:
print(e.code, e.reason)
"""
req = urllib.request.Request(url, headers=headers or {})
try:
with urllib.request.urlopen(req, timeout=timeout) as resp:
return resp.read()
except HTTPError as e:
# Read the body so it can be inspected after the context manager closes
e._body = read_error_body(e) # type: ignore[attr-defined]
raise
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== urllib.error demo ===")
# ── error classification helpers ──────────────────────────────────────────
print("\n--- status_category ---")
for code in [200, 301, 400, 401, 403, 404, 422, 429, 500, 503]:
print(f" {code}: {status_category(code)}")
print("\n--- is_retryable ---")
for code in [400, 404, 429, 500, 503]:
exc = HTTPError("http://test", code, "test", {}, None)
print(f" HTTP {code}: retryable={is_retryable(exc)}")
# ── safe_fetch against httpbin ────────────────────────────────────────────
print("\n--- safe_fetch ---")
for url in [
"https://httpbin.org/status/200",
"https://httpbin.org/status/404",
"https://httpbin.org/status/500",
"https://this-domain-does-not-exist-xyz.invalid/",
]:
r = safe_fetch(url, timeout=5.0)
status = "OK" if r.ok else f"ERR({r.error_type})"
print(f" {url[-40:]:42s} {status:20s} code={r.http_code} retry={r.retryable}")
# ── fetch_with_retry ──────────────────────────────────────────────────────
print("\n--- fetch_with_retry (200 url) ---")
r = fetch_with_retry("https://httpbin.org/get", max_retries=2, timeout=8.0)
if r.ok:
data = r.json()
print(f" ok=True url={data.get('url')!r}")
else:
print(f" ok=False {r.error_summary}")
print("\n=== done ===")
For the requests (PyPI) alternative — requests.get(url) raises requests.exceptions.HTTPError (via resp.raise_for_status()), requests.exceptions.ConnectionError, and requests.exceptions.Timeout; all inherit from requests.exceptions.RequestException — use requests for production HTTP work where you need sessions, auth, cookies, form encoding, streaming, and retries; use urllib.request + urllib.error when you need zero dependencies or must avoid third-party packages. For the httpx (PyPI) alternative — httpx mirrors the requests API but also supports async (httpx.AsyncClient) and HTTP/2; httpx.HTTPStatusError holds response.text directly — use httpx when you need async/await HTTP in asyncio services; use urllib.error for lightweight stdlib-only scripts. The Claude Skills 360 bundle includes urllib.error skill sets covering is_client_error()/is_server_error()/is_retryable()/status_category() classifiers, read_error_body()/parse_json_error() body parsers, FetchResult dataclass + safe_fetch() no-throw wrapper, fetch_with_retry() exponential backoff, and http_get() raising caller. Start with the free tier to try error handling patterns and urllib.error pipeline code generation.