Python’s re module provides Perl-compatible regular expressions. import re. compile: pat = re.compile(r"\d+") — reuse for performance. match: m = re.match(r"\d+", "42abc") — anchored at start. search: m = re.search(r"\d+", "abc42") — first match anywhere. fullmatch: re.fullmatch(r"\d+", "42") — entire string must match. findall: re.findall(r"\d+", "a1b2c3") → ["1","2","3"]. finditer: for m in re.finditer(r"\d+", text): m.group(); m.span(). sub: re.sub(r"\s+", " ", text). subn: new_text, count = re.subn(r"\s+", " ", text). split: re.split(r"\s+", text). groups: m = re.search(r"(\w+)@(\w+)", email); m.group(1); m.group(2); m.groups(). named groups: m = re.search(r"(?P<user>\w+)@(?P<domain>\w+)", email); m.group("user"). groupdict: m.groupdict(). Flags: re.IGNORECASE, re.MULTILINE (^/$ per line), re.DOTALL (. matches \n), re.VERBOSE (whitespace+comments allowed), re.ASCII. Combine: re.compile(r"...", re.I | re.M). Lookahead: (?=...) positive, (?!...) negative. Lookbehind: (?<=...) positive, (?<!...) negative. Non-greedy: .*?. re.escape("a.b+c") — escape literal. backreference: \1 or \g<name>. Substitution fn: re.sub(pat, lambda m: m.group().upper(), text). Claude Code generates email/URL/log parsers, data extractors, and text normalizers.
CLAUDE.md for re
## re Stack
- Stdlib: import re
- Compile: pat = re.compile(r"...", re.I | re.M) — reuse across calls
- Extract: pat.findall(text) | [(m.group("k"),m.start()) for m in pat.finditer(text)]
- Named: r"(?P<name>...)" | m.groupdict() | re.sub(r"(?P<n>...)", lambda m: fn(m["n"]), text)
- Flags: re.I (ignore case) | re.M (multiline ^ $) | re.S (dotall) | re.X (verbose)
- Safety: always use r"..." raw strings | re.escape() for user-supplied literal strings
re Text Processing Pipeline
# app/patterns.py — compile, search, findall, sub, named groups, extractors, validators
from __future__ import annotations
import re
from dataclasses import dataclass
from typing import Any, Iterator
# ─────────────────────────────────────────────────────────────────────────────
# 1. Pre-compiled common patterns
# ─────────────────────────────────────────────────────────────────────────────
EMAIL = re.compile(
r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}",
re.ASCII,
)
URL = re.compile(
r"https?://[^\s\"'<>]+",
re.IGNORECASE,
)
IPV4 = re.compile(
r"\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}"
r"(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b",
)
ISO_DATE = re.compile(
r"\b(\d{4})[-/](\d{1,2})[-/](\d{1,2})\b",
)
UUID = re.compile(
r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b",
re.IGNORECASE,
)
SLUG = re.compile(r"^[a-z0-9]+(?:-[a-z0-9]+)*$")
LOG_LINE = re.compile(
r"""
(?P<ts>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}) # timestamp
\s+
(?P<level>DEBUG|INFO|WARNING|ERROR|CRITICAL) # log level
\s+
(?P<logger>\S+) # logger name
:\s+
(?P<msg>.+) # message
""",
re.VERBOSE,
)
# ─────────────────────────────────────────────────────────────────────────────
# 2. Extraction helpers
# ─────────────────────────────────────────────────────────────────────────────
def find_emails(text: str) -> list[str]:
"""
Extract all email addresses from text.
Example:
find_emails("contact [email protected] or [email protected]")
# ["[email protected]", "[email protected]"]
"""
return EMAIL.findall(text)
def find_urls(text: str) -> list[str]:
"""
Extract all HTTP/HTTPS URLs from text.
Example:
find_urls("Visit https://example.com or http://docs.site/guide")
"""
return URL.findall(text)
def find_ipv4(text: str) -> list[str]:
"""Extract all IPv4 addresses from text."""
return IPV4.findall(text)
def extract_named(pattern: re.Pattern, text: str) -> Iterator[dict[str, str]]:
"""
Yield groupdict for each match of a pattern with named groups.
Example:
for d in extract_named(LOG_LINE, log_content):
print(d["ts"], d["level"], d["msg"])
"""
for m in pattern.finditer(text):
yield m.groupdict()
# ─────────────────────────────────────────────────────────────────────────────
# 3. Validation helpers
# ─────────────────────────────────────────────────────────────────────────────
def is_valid_email(value: str) -> bool:
"""
Quick email format validation.
Example:
is_valid_email("[email protected]") # True
is_valid_email("not-an-email") # False
"""
return bool(EMAIL.fullmatch(value))
def is_valid_slug(value: str) -> bool:
"""
URL slug: lowercase letters, digits, hyphens.
Example:
is_valid_slug("my-post-2024") # True
is_valid_slug("My Post") # False
"""
return bool(SLUG.match(value))
def is_valid_ipv4(value: str) -> bool:
"""Full IPv4 address validation."""
return bool(IPV4.fullmatch(value))
def is_valid_uuid(value: str) -> bool:
"""UUID v1-v5 format check."""
return bool(UUID.fullmatch(value))
# ─────────────────────────────────────────────────────────────────────────────
# 4. Transformation helpers
# ─────────────────────────────────────────────────────────────────────────────
def normalize_whitespace(text: str) -> str:
"""
Collapse multiple whitespace characters into a single space.
Example:
normalize_whitespace(" hello world ") # "hello world"
"""
return re.sub(r"\s+", " ", text).strip()
def slugify(text: str) -> str:
"""
Convert text to a URL-safe slug.
Example:
slugify("Hello, World! 2024") # "hello-world-2024"
"""
text = text.lower()
text = re.sub(r"[^\w\s-]", "", text) # strip punctuation
text = re.sub(r"[\s_-]+", "-", text) # spaces → hyphens
return text.strip("-")
def mask_emails(text: str, replacement: str = "[EMAIL]") -> str:
"""
Replace all email addresses with a placeholder.
Example:
mask_emails("Contact [email protected] for details")
# "Contact [EMAIL] for details"
"""
return EMAIL.sub(replacement, text)
def mask_ips(text: str, replacement: str = "[IP]") -> str:
"""Replace all IPv4 addresses."""
return IPV4.sub(replacement, text)
def camel_to_snake(name: str) -> str:
"""
Convert CamelCase to snake_case.
Example:
camel_to_snake("HttpResponseCode") # "http_response_code"
camel_to_snake("XMLParser") # "xml_parser"
"""
s1 = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", name)
return re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s1).lower()
def snake_to_camel(name: str) -> str:
"""
Convert snake_case to CamelCase.
Example:
snake_to_camel("http_response_code") # "HttpResponseCode"
"""
return re.sub(r"(_\w)", lambda m: m.group(1)[1].upper(), name.capitalize())
# ─────────────────────────────────────────────────────────────────────────────
# 5. Log parsing
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class LogEntry:
ts: str
level: str
logger: str
msg: str
def parse_log_lines(log_text: str) -> list[LogEntry]:
"""
Parse structured log lines into LogEntry objects.
Example:
entries = parse_log_lines(log_file.read())
errors = [e for e in entries if e.level == "ERROR"]
"""
entries = []
for m in LOG_LINE.finditer(log_text):
d = m.groupdict()
entries.append(LogEntry(
ts=d["ts"], level=d["level"],
logger=d["logger"], msg=d["msg"]
))
return entries
# ─────────────────────────────────────────────────────────────────────────────
# 6. Template substitution
# ─────────────────────────────────────────────────────────────────────────────
def render_template(template: str, variables: dict[str, Any]) -> str:
"""
Replace {{variable}} placeholders with values.
Example:
render_template("Hello {{name}}, your code is {{code}}",
{"name": "Alice", "code": "A1B2"})
# "Hello Alice, your code is A1B2"
"""
def replacer(m: re.Match) -> str:
key = m.group(1).strip()
val = variables.get(key)
if val is None:
return m.group(0) # leave unchanged if not found
return str(val)
return re.sub(r"\{\{(.+?)\}\}", replacer, template)
def extract_placeholders(template: str) -> list[str]:
"""
Find all {{variables}} in a template string.
Example:
extract_placeholders("Hello {{name}}, your order {{id}} is ready")
# ["name", "id"]
"""
return [m.strip() for m in re.findall(r"\{\{(.+?)\}\}", template)]
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== re demo ===")
sample = (
"Contact [email protected] or [email protected]. "
"Server at 192.168.1.100 or https://api.example.com/v2/data. "
"Request ID: 550e8400-e29b-41d4-a716-446655440000"
)
print(f"\n--- find_emails ---")
print(f" {find_emails(sample)}")
print(f"\n--- find_urls ---")
print(f" {find_urls(sample)}")
print(f"\n--- find_ipv4 ---")
print(f" {find_ipv4(sample)}")
print(f"\n--- UUID ---")
print(f" UUIDs: {UUID.findall(sample)}")
print(f"\n--- is_valid_email ---")
for addr in ["[email protected]", "bad@", "no-at-sign", "[email protected]"]:
print(f" {addr!r:30s} → {is_valid_email(addr)}")
print(f"\n--- normalize_whitespace ---")
messy = " multiple spaces\t\there "
print(f" {normalize_whitespace(messy)!r}")
print(f"\n--- slugify ---")
for title in ["Hello World!", "Python 3.12 Release", "Café & Restaurant"]:
print(f" {title!r:30s} → {slugify(title)!r}")
print(f"\n--- camel_to_snake / snake_to_camel ---")
for name in ["HttpResponseCode", "XMLParser", "getUserByID"]:
snake = camel_to_snake(name)
camel = snake_to_camel(snake)
print(f" {name!r:25s} → {snake!r:30s} → {camel!r}")
print(f"\n--- log parsing ---")
logs = """\
2024-01-15T10:30:00 INFO app.server: Server started on port 8080
2024-01-15T10:30:01 DEBUG app.db: Connected to database
2024-01-15T10:30:05 ERROR app.auth: Invalid token received
"""
entries = parse_log_lines(logs)
for e in entries:
print(f" [{e.level:8s}] {e.logger}: {e.msg}")
print(f"\n--- render_template ---")
tpl = "Dear {{name}}, your order {{order_id}} ships on {{date}}."
result = render_template(tpl, {"name": "Alice", "order_id": "ORD-42", "date": "2024-02-01"})
print(f" {result}")
print(f" placeholders: {extract_placeholders(tpl)}")
print("\n=== done ===")
For the regex alternative — the third-party regex module (PyPI) is a drop-in superset of stdlib re with Unicode property escapes (\p{Letter}), fuzzy matching ((?:word){e<=1}), variable-length lookbehinds, possessive quantifiers, atomic groups, and overlapping matches; stdlib re covers the Perl-compatible subset that handles 95% of real-world pattern matching without external dependencies — use regex when you need Unicode property matching (scripts, categories), fuzzy approximate matching, or variable-length lookbehinds, stdlib re for everything else. For the parse alternative — parse (PyPI) provides the inverse of str.format(): parse.parse("Hello, {}!", "Hello, World!") → Result — much simpler than writing a regex for structured text extraction; stdlib re is more powerful but requires learning pattern syntax — use parse for quick structured extraction from format-like strings, re when you need full control over capturing groups, lookaheads, substitutions, and compiled performance. The Claude Skills 360 bundle includes re skill sets covering EMAIL/URL/IPV4/ISO_DATE/UUID/SLUG/LOG_LINE pre-compiled patterns, find_emails()/find_urls()/find_ipv4()/extract_named() extractors, is_valid_email()/is_valid_slug()/is_valid_uuid() validators, normalize_whitespace()/slugify()/mask_emails()/camel_to_snake()/snake_to_camel() transformers, parse_log_lines() structured extraction, and render_template()/extract_placeholders() template utilities. Start with the free tier to try text pattern matching and re extraction pipeline code generation.