Python’s urllib.parse module parses, builds, and encodes URLs. from urllib.parse import urlparse, urljoin, urlencode, parse_qs, parse_qsl, quote, unquote, urlsplit, urlunsplit, urldefrag. urlparse: r = urlparse("https://host/path?k=v#frag") → ParseResult (scheme, netloc, path, params, query, fragment). urlsplit: 5-part without params. urljoin: urljoin("https://host/a/b", "../c") → “https://host/a/c”. urlencode: urlencode({"q": "hello world", "page": 2}) → “q=hello+world&page=2”; doseq=True for list values. parse_qs: parse_qs("a=1&a=2&b=3") → {"a": ["1","2"], "b": ["3"]}. parse_qsl: ordered list of (key, value) pairs. quote: quote("/path/to file") → “/path/to%20file”; safe="/" controls unescaped chars. quote_plus: spaces → ”+”. unquote: unquote("%2Fpath%2F") → “/path/”. unquote_plus: ”+” → space. urlunsplit: urlunsplit((scheme, netloc, path, query, fragment)) → URL string. urldefrag: urldefrag(url) → (url_without_fragment, fragment). netloc split: ParseResult.hostname, .port, .username, .password. Claude Code generates URL builders, query parsers, redirect resolvers, and API endpoint constructors.
CLAUDE.md for urllib.parse
## urllib.parse Stack
- Stdlib: from urllib.parse import urlparse, urljoin, urlencode, parse_qs, quote, unquote, urlsplit, urlunsplit
- Parse: urlparse(url) → .scheme .netloc .path .query .fragment
- Build: urlunsplit((scheme, netloc, path, urlencode(params), ""))
- Encode: urlencode(dict, doseq=True) — list values
- Decode: parse_qs(query_string) — values are lists
- Escape: quote(s, safe="") — percent-encode; unquote() reverses
urllib.parse URL Pipeline
# app/urlutil.py — parse, build, normalize, sign, route matching
from __future__ import annotations
import hashlib
import hmac
import re
import time
from dataclasses import dataclass, field
from typing import Any
from urllib.parse import (
ParseResult,
parse_qs,
parse_qsl,
quote,
quote_plus,
unquote,
unquote_plus,
urlencode,
urljoin,
urlparse,
urlsplit,
urlunsplit,
urldefrag,
)
# ─────────────────────────────────────────────────────────────────────────────
# 1. Parsing helpers
# ─────────────────────────────────────────────────────────────────────────────
def parse_url(url: str) -> ParseResult:
"""
Parse a URL into its components.
Example:
r = parse_url("https://api.example.com:8080/v1/users?active=true#top")
r.scheme # "https"
r.hostname # "api.example.com"
r.port # 8080
r.path # "/v1/users"
r.query # "active=true"
"""
return urlparse(url)
def parse_query(query_string: str) -> dict[str, list[str]]:
"""
Decode a query string into a dict of lists.
Example:
parse_query("tag=python&tag=stdlib&page=2")
# {"tag": ["python", "stdlib"], "page": ["2"]}
"""
return parse_qs(query_string, keep_blank_values=True)
def parse_query_flat(query_string: str) -> dict[str, str]:
"""
Decode query string keeping only the last value for each key.
Example:
parse_query_flat("x=1&x=2&y=3") # {"x": "2", "y": "3"}
"""
return dict(parse_qsl(query_string, keep_blank_values=True))
def get_query_param(url: str, key: str, default: str | None = None) -> str | None:
"""
Extract a single query parameter from a URL.
Example:
get_query_param("https://example.com?page=2&q=hello", "page") # "2"
get_query_param("https://example.com", "page", default="1") # "1"
"""
params = parse_qs(urlparse(url).query, keep_blank_values=True)
values = params.get(key)
return values[-1] if values else default
def url_components(url: str) -> dict[str, Any]:
"""
Return URL split into a plain dict for inspection or serialization.
Example:
url_components("https://user:pw@host:8080/path?k=v#frag")
"""
r = urlparse(url)
return {
"scheme": r.scheme,
"username": r.username,
"password": r.password,
"hostname": r.hostname,
"port": r.port,
"path": r.path,
"query": r.query,
"fragment": r.fragment,
"params": parse_qs(r.query) if r.query else {},
}
# ─────────────────────────────────────────────────────────────────────────────
# 2. Building helpers
# ─────────────────────────────────────────────────────────────────────────────
def build_url(
base: str,
path: str = "",
params: dict[str, Any] | None = None,
fragment: str = "",
) -> str:
"""
Build a URL from a base, optional path extension, query params, and fragment.
Example:
build_url("https://api.example.com", "/v1/search", {"q": "py", "page": 2})
# "https://api.example.com/v1/search?q=py&page=2"
"""
r = urlparse(base)
full_path = r.path.rstrip("/") + "/" + path.lstrip("/") if path else r.path
query = urlencode(params, doseq=True) if params else r.query
return urlunsplit((r.scheme, r.netloc, full_path, query, fragment))
def set_query_params(url: str, **params: Any) -> str:
"""
Replace or add query parameters, preserving existing ones not specified.
Example:
set_query_params("https://example.com/search?q=py&page=1", page=2, sort="new")
# "https://example.com/search?q=py&page=2&sort=new"
"""
r = urlparse(url)
current = dict(parse_qsl(r.query))
current.update({k: str(v) for k, v in params.items()})
new_query = urlencode(current)
return urlunsplit((r.scheme, r.netloc, r.path, new_query, r.fragment))
def remove_query_params(url: str, *keys: str) -> str:
"""
Remove specific query parameters from a URL.
Example:
remove_query_params("https://example.com?a=1&b=2&c=3", "b", "c")
# "https://example.com?a=1"
"""
r = urlparse(url)
params = [(k, v) for k, v in parse_qsl(r.query) if k not in keys]
return urlunsplit((r.scheme, r.netloc, r.path, urlencode(params), r.fragment))
def strip_fragment(url: str) -> str:
"""Remove the #fragment portion of a URL."""
return urldefrag(url).url
def resolve_url(base: str, relative: str) -> str:
"""
Resolve a relative URL reference against a base URL.
Example:
resolve_url("https://example.com/a/b/c", "../d")
# "https://example.com/a/d"
resolve_url("https://example.com/page", "/api/v1")
# "https://example.com/api/v1"
"""
return urljoin(base, relative)
# ─────────────────────────────────────────────────────────────────────────────
# 3. Encoding and decoding
# ─────────────────────────────────────────────────────────────────────────────
def url_encode(s: str, safe: str = "") -> str:
"""
Percent-encode a string for use in URL path segments or query values.
Example:
url_encode("hello world/path") # "hello%20world%2Fpath"
url_encode("/api/v1/users", safe="/") # "/api/v1/users"
"""
return quote(s, safe=safe)
def url_decode(s: str) -> str:
"""Decode percent-encoded characters."""
return unquote(s)
def form_encode(data: dict[str, Any]) -> str:
"""
Encode a dict as application/x-www-form-urlencoded (spaces as +).
Example:
form_encode({"user": "Alice Smith", "age": 30})
# "user=Alice+Smith&age=30"
"""
return urlencode({k: str(v) for k, v in data.items()})
def form_decode(data: str) -> dict[str, str]:
"""Decode application/x-www-form-urlencoded data."""
return {k: unquote_plus(v) for k, v in parse_qsl(data)}
# ─────────────────────────────────────────────────────────────────────────────
# 4. URL normalization and validation
# ─────────────────────────────────────────────────────────────────────────────
_VALID_SCHEMES = frozenset({"http", "https", "ftp", "ftps"})
def normalize_url(url: str) -> str:
"""
Normalize a URL: lowercase scheme+host, remove default ports, strip trailing slash.
Example:
normalize_url("HTTPS://Example.COM:443/Path/")
# "https://example.com/Path"
"""
r = urlparse(url.strip())
scheme = r.scheme.lower()
host = r.hostname.lower() if r.hostname else ""
port = r.port
# Remove default ports
if (scheme == "https" and port == 443) or (scheme == "http" and port == 80):
port = None
netloc = f"{host}:{port}" if port else host
if r.username:
creds = f"{r.username}:{r.password}@" if r.password else f"{r.username}@"
netloc = creds + netloc
path = r.path.rstrip("/") or "/"
return urlunsplit((scheme, netloc, path, r.query, r.fragment))
def is_valid_url(url: str, schemes: frozenset[str] = _VALID_SCHEMES) -> bool:
"""
Return True if url has a valid scheme, netloc, and path.
Example:
is_valid_url("https://example.com/path") # True
is_valid_url("ftp://files.example.com") # True
is_valid_url("javascript:alert(1)") # False
"""
try:
r = urlparse(url)
return bool(r.scheme in schemes and r.netloc)
except Exception:
return False
def is_same_origin(url1: str, url2: str) -> bool:
"""
Return True when both URLs have the same scheme, host, and port.
Example:
is_same_origin("https://example.com/a", "https://example.com/b") # True
is_same_origin("https://example.com", "http://example.com") # False
"""
r1, r2 = urlparse(url1), urlparse(url2)
return (r1.scheme, r1.hostname, r1.port) == (r2.scheme, r2.hostname, r2.port)
def extract_domain(url: str) -> str:
"""
Extract scheme + netloc (the origin) from a URL.
Example:
extract_domain("https://api.example.com/v1/users?page=2")
# "https://api.example.com"
"""
r = urlparse(url)
return urlunsplit((r.scheme, r.netloc, "", "", ""))
# ─────────────────────────────────────────────────────────────────────────────
# 5. Signed URL generation (HMAC)
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class URLSigner:
"""
Generate and verify HMAC-signed URLs with optional expiry.
Example:
signer = URLSigner(secret=b"my-secret-key")
signed = signer.sign("https://cdn.example.com/file.pdf", expires_in=3600)
assert signer.verify(signed)
"""
secret: bytes
param_sig: str = "_sig"
param_exp: str = "_exp"
def sign(self, url: str, expires_in: int | None = None) -> str:
"""Add signature (and optional expiry) to URL."""
params: dict[str, Any] = {}
if expires_in is not None:
params[self.param_exp] = int(time.time()) + expires_in
url_with_exp = set_query_params(url, **params) if params else url
sig = self._compute_sig(url_with_exp)
return set_query_params(url_with_exp, **{self.param_sig: sig})
def verify(self, url: str, now: int | None = None) -> bool:
"""Return True if signature is valid and URL has not expired."""
r = urlparse(url)
qs = parse_qs(r.query)
sigs = qs.get(self.param_sig, [])
if not sigs:
return False
exp_vals = qs.get(self.param_exp, [])
if exp_vals:
exp = int(exp_vals[0])
if (now or int(time.time())) > exp:
return False
# Reconstruct URL without the _sig param to re-derive
unsigned = remove_query_params(url, self.param_sig)
expected = self._compute_sig(unsigned)
return hmac.compare_digest(expected, sigs[0])
def _compute_sig(self, url: str) -> str:
return hmac.new(self.secret, url.encode(), hashlib.sha256).hexdigest()[:32]
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== urllib.parse demo ===")
base = "https://api.example.com:8080/v1/search?q=python&page=1#results"
print("\n--- parse_url ---")
r = parse_url(base)
print(f" scheme={r.scheme!r} host={r.hostname!r} port={r.port} path={r.path!r}")
print(f" query={r.query!r} fragment={r.fragment!r}")
print("\n--- parse_query ---")
qs = "tag=python&tag=stdlib&page=2&sort="
print(f" parse_query: {parse_query(qs)}")
print(f" parse_query_flat: {parse_query_flat(qs)}")
print("\n--- url_components ---")
comps = url_components("https://user:[email protected]:9000/path?x=1")
for k, v in comps.items():
if v not in (None, "", {}):
print(f" {k}: {v!r}")
print("\n--- build_url ---")
url = build_url("https://api.example.com", "/v1/users", {"active": True, "tag": ["py", "web"]})
print(f" {url}")
print("\n--- set_query_params ---")
updated = set_query_params("https://example.com/search?q=py&page=1", page=2, sort="new")
print(f" {updated}")
print("\n--- remove_query_params ---")
cleaned = remove_query_params("https://example.com?a=1&utm_source=email&utm_campaign=abc", "utm_source", "utm_campaign")
print(f" {cleaned}")
print("\n--- resolve_url ---")
for rel in ["/v2/users", "../other", "?new=param", "//cdn.example.com/img.png"]:
print(f" {rel!r:30s} → {resolve_url('https://example.com/api/v1/', rel)!r}")
print("\n--- url_encode / form_encode ---")
raw = "hello world / path & more"
print(f" url_encode: {url_encode(raw)!r}")
print(f" url_decode: {url_decode(url_encode(raw))!r}")
print(f" form_encode: {form_encode({'user': 'Alice Smith', 'score': 42})!r}")
print(f" form_decode: {form_decode('user=Alice+Smith&score=42')!r}")
print("\n--- normalize_url ---")
for u in ["HTTPS://Example.COM:443/Path/", "http://localhost:80/", "https://api.example.com/v1"]:
print(f" {u!r:45s} → {normalize_url(u)!r}")
print("\n--- is_valid_url ---")
for u in ["https://example.com", "javascript:alert(1)", "ftp://files.example.com", "not-a-url"]:
print(f" {u!r:35s}: {is_valid_url(u)}")
print("\n--- is_same_origin ---")
pairs = [
("https://example.com/a", "https://example.com/b"),
("https://example.com", "http://example.com"),
("https://api.example.com", "https://cdn.example.com"),
]
for u1, u2 in pairs:
print(f" same_origin: {is_same_origin(u1, u2)} ({u1!r} vs {u2!r})")
print("\n--- URLSigner ---")
signer = URLSigner(secret=b"my-secret-key")
url = "https://cdn.example.com/files/report.pdf"
signed = signer.sign(url, expires_in=3600)
print(f" signed: {signed[:80]}...")
print(f" verify ok: {signer.verify(signed)}")
print(f" verify tampered:{signer.verify(signed.replace('report', 'other'))}")
print("\n--- strip_fragment ---")
print(f" {strip_fragment('https://example.com/page#section-3')!r}")
print("\n=== done ===")
For the httpx / requests alternative — both httpx and requests bundle URL handling into their PreparedRequest and URL objects (e.g., httpx.URL("https://host/path").params, requests.compat.urlparse), providing integration with HTTP client context like auth, redirects, and session state; urllib.parse is pure string manipulation with no network I/O or HTTP semantics — use httpx.URL or requests.PreparedRequest inside an HTTP client pipeline where the URL is about to be fetched, urllib.parse for standalone URL construction, webhook signature verification, redirect resolution, and any context where you do not want to import an HTTP client. For the yarl alternative — yarl (PyPI, used by aiohttp) provides an immutable URL class with attribute access (url.host, url.query), percentage decoding on access, and fluent query manipulation (url.update_query({"page": 2})); urllib.parse returns mutable named tuples and plain strings — use yarl in async aiohttp applications needing a clean URL object API, urllib.parse in synchronous scripts, CLIs, and stdlib-only environments. The Claude Skills 360 bundle includes urllib.parse skill sets covering parse_url()/parse_query()/parse_query_flat()/get_query_param()/url_components() parsing, build_url()/set_query_params()/remove_query_params()/strip_fragment()/resolve_url() URL building, url_encode()/url_decode()/form_encode()/form_decode() encoding utilities, normalize_url()/is_valid_url()/is_same_origin()/extract_domain() normalization, and URLSigner HMAC signed-URL generator. Start with the free tier to try URL parsing and urllib.parse pipeline code generation.