Python’s stringprep module implements RFC 3454 — the “Stringprep” framework for preparing Unicode strings for use in internet protocols (SASL, LDAP, XMPP, SIP, etc.). import stringprep. The module provides lookup functions for RFC 3454 character tables rather than a single high-level normalize function. Prohibited character check: stringprep.in_table_c11(char) → bool (True = prohibited). Tables: C.1.1 ASCII space, C.1.2 non-ASCII space, C.2.* control chars, C.3 private use, C.4 non-characters, C.5 surrogates, C.6–C.9 various. Case fold: stringprep.map_table_b2(char) → folded codepoint string (empty = no mapping); stringprep.map_table_b3(char) → empty string if the char should be deleted. Bidi: stringprep.in_table_d1(char) → RandAL class; stringprep.in_table_d2(char) → L class. Key use: build a Nodeprep/Resourceprep/SASLprep profile by applying map_b2 (case fold), unicodedata.normalize("NFKC", s) (Unicode normalization), then filter each char through the C.* tables. Claude Code generates SASL authentication preparers, LDAP validators, hostname normalizers, and internet identifier validators.
CLAUDE.md for stringprep
## stringprep Stack
- Stdlib: import stringprep, unicodedata
- Check: stringprep.in_table_c12(ch) # non-ASCII space → prohibited
- stringprep.in_table_c21_c22(ch) # control chars
- stringprep.in_table_d1(ch) # RandAL bidi class
- Fold: stringprep.map_table_b2(ch) # case fold; "" = no mapping
- stringprep.map_table_b3(ch) # delete chars mapped to nothing
- Normalize: unicodedata.normalize("NFKC", mapped)
- Note: Apply: map → NFKC normalize → check prohibited → bidi check
stringprep Profile Pipeline
# app/stringpreputil.py — saslprep, nodeprep, resourceprep, validator, normalizer
from __future__ import annotations
import stringprep
import unicodedata
from dataclasses import dataclass, field
# ─────────────────────────────────────────────────────────────────────────────
# 1. Low-level character classifiers
# ─────────────────────────────────────────────────────────────────────────────
def is_prohibited(char: str) -> bool:
"""
Return True if char is prohibited by any of the RFC 3454 C.* tables.
Example:
is_prohibited("\x00") # True (C.2.1 control)
is_prohibited(" ") # True (C.1.1 ASCII space)
is_prohibited("\U000e0001") # True (C.9 tagging)
"""
return (
stringprep.in_table_c11(char) # ASCII space
or stringprep.in_table_c12(char) # non-ASCII space
or stringprep.in_table_c21_c22(char) # control chars
or stringprep.in_table_c3(char) # private use
or stringprep.in_table_c4(char) # non-characters
or stringprep.in_table_c5(char) # surrogates
or stringprep.in_table_c6(char) # inappropriate for plain text
or stringprep.in_table_c7(char) # inappropriate for canonical rep
or stringprep.in_table_c8(char) # change display / deprecated
or stringprep.in_table_c9(char) # tagging characters
)
def is_randal(char: str) -> bool:
"""Return True if char has RandAL bidirectional property (D.1)."""
return stringprep.in_table_d1(char)
def is_l_class(char: str) -> bool:
"""Return True if char has L (Left-to-Right) bidirectional property (D.2)."""
return stringprep.in_table_d2(char)
def apply_map_b2(s: str) -> str:
"""
Apply RFC 3454 mapping table B.2 (case folding) to every character.
Characters with no mapping are passed through unchanged.
Example:
apply_map_b2("Hello") # "hello"
"""
parts: list[str] = []
for ch in s:
mapped = stringprep.map_table_b2(ch)
if mapped is not None:
parts.append(mapped)
else:
parts.append(ch)
return "".join(parts)
def apply_map_b3(s: str) -> str:
"""
Apply RFC 3454 mapping table B.3: delete characters mapped to empty string.
Example:
apply_map_b3("te\u00adst") # "test" (soft hyphen deleted)
"""
parts: list[str] = []
for ch in s:
mapped = stringprep.map_table_b3(ch)
if mapped == "":
continue # delete
elif mapped is not None:
parts.append(mapped)
else:
parts.append(ch)
return "".join(parts)
# ─────────────────────────────────────────────────────────────────────────────
# 2. SASLprep (RFC 4013)
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class PrepResult:
"""Result of a stringprep profile application."""
original: str
output: str
ok: bool
errors: list[str] = field(default_factory=list)
def saslprep(s: str, allow_unassigned: bool = False) -> PrepResult:
"""
Apply the SASLprep profile (RFC 4013) to a string.
Used for SASL PLAIN / SCRAM authentication usernames and passwords.
Steps:
1. Map B.2 (case-fold) characters
2. Remove B.3 mapped-to-nothing characters
3. NFKC normalize
4. Check for prohibited characters (C.1.2, C.2.1, C.2.2, C.3, C.4,
C.5, C.6, C.7, C.8, C.9) — RFC 4013 uses a stricter subset
5. Check bidirectional string rules
Example:
r = saslprep("secret \u00AD password") # soft hyphen removed
r = saslprep("admin") # ok
"""
errors: list[str] = []
# Step 1 + 2: map
mapped = apply_map_b2(s)
mapped = apply_map_b3(mapped)
# Step 3: NFKC normalize
normalized = unicodedata.normalize("NFKC", mapped)
# Step 4: check prohibited
for i, ch in enumerate(normalized):
if stringprep.in_table_c12(ch):
errors.append(f"prohibited non-ASCII space at position {i} (U+{ord(ch):04X})")
elif stringprep.in_table_c21_c22(ch):
errors.append(f"prohibited control character at position {i} (U+{ord(ch):04X})")
elif stringprep.in_table_c3(ch):
errors.append(f"prohibited private-use character at position {i} (U+{ord(ch):04X})")
elif stringprep.in_table_c4(ch):
errors.append(f"non-character code point at position {i} (U+{ord(ch):04X})")
elif stringprep.in_table_c5(ch):
errors.append(f"surrogate code point at position {i} (U+{ord(ch):04X})")
elif stringprep.in_table_c6(ch) or stringprep.in_table_c7(ch):
errors.append(f"inappropriate character at position {i} (U+{ord(ch):04X})")
elif stringprep.in_table_c8(ch) or stringprep.in_table_c9(ch):
errors.append(f"deprecated/tagging character at position {i} (U+{ord(ch):04X})")
# Step 5: bidi
has_randal = any(is_randal(ch) for ch in normalized)
has_l = any(is_l_class(ch) for ch in normalized)
if has_randal and has_l:
errors.append("string mixes RandAL and L bidirectional characters (RFC 3454 §6)")
if has_randal:
if normalized and not is_randal(normalized[0]):
errors.append("RandAL string must begin with a RandAL character")
if normalized and not is_randal(normalized[-1]):
errors.append("RandAL string must end with a RandAL character")
return PrepResult(
original=s,
output=normalized if not errors else s,
ok=not errors,
errors=errors,
)
# ─────────────────────────────────────────────────────────────────────────────
# 3. Simplified Nodeprep (Jabber/XMPP)
# ─────────────────────────────────────────────────────────────────────────────
# Additional characters prohibited in XMPP node (localpart) identifiers
_XMPP_NODE_PROHIBITED = set('"&\'/:<>@')
def nodeprep(s: str) -> PrepResult:
"""
Apply a simplified Nodeprep profile for XMPP node identifiers.
Case-folds, NFKC-normalizes, and checks for prohibited chars.
Example:
r = nodeprep("Romeo")
print(r.output) # "romeo"
r = nodeprep("bad@name")
print(r.ok, r.errors) # False, [...]
"""
errors: list[str] = []
mapped = apply_map_b2(s)
mapped = apply_map_b3(mapped)
normalized = unicodedata.normalize("NFKC", mapped)
if not normalized:
errors.append("empty string after mapping")
return PrepResult(original=s, output="", ok=False, errors=errors)
for i, ch in enumerate(normalized):
if is_prohibited(ch):
errors.append(f"prohibited character U+{ord(ch):04X} at position {i}")
elif ch in _XMPP_NODE_PROHIBITED:
errors.append(f"XMPP-prohibited character {ch!r} at position {i}")
return PrepResult(
original=s,
output=normalized if not errors else s,
ok=not errors,
errors=errors,
)
# ─────────────────────────────────────────────────────────────────────────────
# 4. Bulk validator
# ─────────────────────────────────────────────────────────────────────────────
def validate_many(
strings: list[str],
profile: str = "saslprep",
) -> list[PrepResult]:
"""
Apply the specified profile to a list of strings and return all results.
profile: 'saslprep' | 'nodeprep'
Example:
results = validate_many(["alice", "bob@bad", "charlie"], profile="nodeprep")
for r in results:
print(r.original, r.ok, r.errors)
"""
fn = saslprep if profile == "saslprep" else nodeprep
return [fn(s) for s in strings]
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== stringprep demo ===")
# ── character tables ──────────────────────────────────────────────────────
print("\n--- character classifiers ---")
test_chars = [
("\x00", "NULL control"),
(" ", "ASCII space"),
("\u00a0", "non-breaking space"),
("\u202c", "pop directional formatting"),
("a", "lowercase a"),
("\u0041", "uppercase A"),
("\u0600", "Arabic number sign"),
]
for ch, label in test_chars:
print(f" U+{ord(ch):04X} {label:30s} prohibited={is_prohibited(ch)} "
f"RandAL={is_randal(ch)} L={is_l_class(ch)}")
# ── map_b2 case folding ───────────────────────────────────────────────────
print("\n--- apply_map_b2 (case-fold) ---")
for s in ["Hello", "WORLD", "Straße", "Σ"]:
print(f" {s!r:12s} → {apply_map_b2(s)!r}")
# ── saslprep ──────────────────────────────────────────────────────────────
print("\n--- saslprep ---")
test_strings = [
"alice",
"Admin",
"p\u00adssword", # soft hyphen (deleted)
"secret\x00null", # null byte (prohibited)
"café",
]
for s in test_strings:
r = saslprep(s)
status = "OK" if r.ok else f"ERR: {r.errors[0][:50]}"
print(f" {s!r:22s} → {r.output!r:22s} {status}")
# ── nodeprep ──────────────────────────────────────────────────────────────
print("\n--- nodeprep ---")
node_tests = ["Romeo", "romeo", "bad@domain", "user name", "café"]
for s in node_tests:
r = nodeprep(s)
status = "OK" if r.ok else f"ERR: {r.errors[0][:50]}"
print(f" {s!r:15s} → {r.output!r:15s} {status}")
# ── validate_many ─────────────────────────────────────────────────────────
print("\n--- validate_many ---")
results = validate_many(["alice", "Bob", "café\x00"], profile="saslprep")
for r in results:
print(f" {r.original!r:15s} ok={r.ok} output={r.output!r}")
print("\n=== done ===")
For the unicodedata stdlib alternative — unicodedata.normalize("NFKC", s) applies canonical decomposition + compatibility decomposition + canonical composition, which is the normalization step inside Stringprep profiles, but it does not perform case folding or prohibited-character checks — combine unicodedata.normalize with stringprep table lookups to build a complete profile; don’t use unicodedata.normalize alone for protocol-level identifier validation. For the idna (PyPI) alternative — idna.encode("münchen.de") and idna.decode(b"xn--mnchen-3ya.de") implement IDNA 2008 (RFC 5891) internationalized domain name encoding; IDNA 2008 uses its own Nameprep-derived profile internally — use idna for hostname/domain name preparation and IDNA encoding; use stringprep tables directly when implementing SASL or XMPP protocols that specify custom Stringprep profiles. The Claude Skills 360 bundle includes stringprep skill sets covering is_prohibited()/is_randal()/is_l_class() classifiers, apply_map_b2()/apply_map_b3() mappers, PrepResult/saslprep() RFC-4013 SASLprep implementation, nodeprep() XMPP node identifier preparer, and validate_many() bulk validator. Start with the free tier to try Unicode string preparation patterns and stringprep pipeline code generation.