Python’s shlex module provides shell-like lexing and safe shell quoting. import shlex. split: shlex.split("ls -la /tmp") → ['ls', '-la', '/tmp']; handles single/double quotes and backslash escapes; keyword posix=True is the default. shlex.split('echo "hello world"') → ['echo', 'hello world'] (quotes consumed). quote: shlex.quote(s) → shell-safe single-quoted string; shlex.quote("hello world") → "'hello world'"; shlex.quote("it's") → "'it'\\''s'". CRITICAL: always use shlex.quote() to escape values before inserting into shell strings to prevent command injection. join: shlex.join(["ls", "-la", "/path with spaces"]) → "ls -la '/path with spaces'" (Python 3.8+). shlex class: lex = shlex.shlex(s, posix=True) — iterator of token strings; lex.wordchars = chars allowed in unquoted words; lex.whitespace_split = True — only split on whitespace, no quoting; lex.commenters = chars that begin comments (default #); lex.token → last token; lex.get_token() → next token; lex.push_token(t) → push back. Claude Code generates safe subprocess command builders, config file parsers, CLI tokenizers, and shell injection validators.
CLAUDE.md for shlex
## shlex Stack
- Stdlib: import shlex
- Split: shlex.split("cmd --flag 'value with spaces'")
- Quote: shlex.quote(user_value) # REQUIRED before shell interpolation
- Join: shlex.join(["cmd", "--flag", value]) # safe reassembly
- Lex: lex = shlex.shlex(source, posix=True) # custom tokenizer
- NEVER: f"cmd {user_input}" — always shlex.quote() or avoid shell=True
shlex Shell Safety and Tokenization Pipeline
# app/shlexutil.py — safe commands, parser, injection checker, config lexer
from __future__ import annotations
import shlex
import subprocess
from dataclasses import dataclass
from pathlib import Path
from typing import Any
# ─────────────────────────────────────────────────────────────────────────────
# 1. Safe command construction
# ─────────────────────────────────────────────────────────────────────────────
def safe_args(*args: str | Path) -> list[str]:
"""
Convert a sequence of args to a list of quoted strings safe for shell use.
Prefer passing the list directly to subprocess — this is for display/logging.
Example:
safe_args("rsync", "-av", Path("/my dir/file"), "user@host:/backup/")
# ['rsync', '-av', "'/my dir/file'", 'user@host:/backup/']
"""
return [shlex.quote(str(a)) for a in args]
def build_command(base: str | list[str], **flags: Any) -> list[str]:
"""
Build a subprocess argument list from a base command and keyword flags.
Boolean True → include flag, False → omit, other → --key=value.
Example:
build_command("ffmpeg", input="video.mp4", output="out.mp4", verbose=True)
# ['ffmpeg', '--input=video.mp4', '--output=out.mp4', '--verbose']
"""
cmd = shlex.split(base) if isinstance(base, str) else list(base)
for key, val in flags.items():
cli_key = "--" + key.replace("_", "-")
if isinstance(val, bool):
if val:
cmd.append(cli_key)
elif val is not None:
cmd.append(f"{cli_key}={val}")
return cmd
def command_string(args: list[str | Path]) -> str:
"""
Return a safely quoted shell command string for logging/display.
Do NOT pass this to subprocess with shell=True — use the list form.
Example:
args = ["cp", "-r", "/path with spaces/file.txt", "/dest/"]
print(command_string(args)) # cp -r '/path with spaces/file.txt' /dest/
"""
return shlex.join(str(a) for a in args)
def parse_command(cmd: str) -> list[str]:
"""
Parse a shell command string into an argument list.
Handles quotes and backslash escapes correctly.
Example:
parse_command("git commit -m 'My commit message'")
# ['git', 'commit', '-m', 'My commit message']
"""
return shlex.split(cmd)
def parse_command_safe(cmd: str) -> list[str] | None:
"""
Parse a shell command string; return None on syntax error (unclosed quote).
Example:
parse_command_safe("echo 'bad") # None (unclosed quote)
parse_command_safe("echo 'good'") # ['echo', 'good']
"""
try:
return shlex.split(cmd)
except ValueError:
return None
# ─────────────────────────────────────────────────────────────────────────────
# 2. Injection detection and prevention
# ─────────────────────────────────────────────────────────────────────────────
# Shell metacharacters that indicate potential injection
_SHELL_METACHARACTERS = set('|&;<>()$`\\"\' \t\n*?[]#~=%{}')
def contains_shell_metacharacters(s: str) -> bool:
"""
Return True if s contains shell metacharacters that require quoting.
Example:
contains_shell_metacharacters("hello") # False
contains_shell_metacharacters("hello; rm -rf") # True
"""
return any(c in _SHELL_METACHARACTERS for c in s)
def quote_if_needed(s: str) -> str:
"""
Quote s with shlex.quote only if it contains shell metacharacters.
Example:
quote_if_needed("simple") # "simple"
quote_if_needed("path with spaces") # "'path with spaces'"
"""
if contains_shell_metacharacters(s):
return shlex.quote(s)
return s
def validate_no_injection(value: str, context: str = "") -> str:
"""
Raise ValueError if value looks like a shell injection attempt.
Returns value unchanged if safe.
Use this at boundaries where you receive filenames or args from external sources.
Example:
filename = validate_no_injection(user_input, "filename")
"""
dangerous = [";", "&&", "||", "|", "`", "$(", "${", ">", "<", "&"]
for pattern in dangerous:
if pattern in value:
ctx = f" in {context!r}" if context else ""
raise ValueError(f"Potential shell injection{ctx}: {value!r} contains {pattern!r}")
return value
# ─────────────────────────────────────────────────────────────────────────────
# 3. Custom shlex tokenizer
# ─────────────────────────────────────────────────────────────────────────────
def tokenize(source: str, *, wordchars: str | None = None, commenters: str = "#") -> list[str]:
"""
Tokenize a string using shlex with optional custom wordchars.
Example:
tokenize("key=value # comment") # ['key', '=', 'value']
tokenize("DNS.1=localhost", wordchars="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._-")
"""
lex = shlex.shlex(source, posix=True)
lex.commenters = commenters
lex.whitespace_split = False
if wordchars is not None:
lex.wordchars = wordchars
tokens = []
while True:
tok = lex.get_token()
if tok is None or tok == lex.eof:
break
tokens.append(tok)
return tokens
def tokenize_whitespace(source: str) -> list[str]:
"""
Split source purely on whitespace (no quoting logic), respecting double quotes.
Useful for simple INI-style or DSV parsing.
Example:
tokenize_whitespace(' hello "world foo" bar ')
# ['hello', 'world foo', 'bar']
"""
lex = shlex.shlex(source, posix=True)
lex.whitespace_split = True
return list(lex)
# ─────────────────────────────────────────────────────────────────────────────
# 4. Simple config/DSL parser
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class ConfigEntry:
key: str
value: str
comment: str = ""
def parse_key_value_config(text: str) -> list[ConfigEntry]:
"""
Parse a simple key=value config format using shlex tokenization.
Lines starting with # are skipped; inline comments after values are stripped.
Values may be quoted to include spaces.
Example:
cfg = parse_key_value_config('''
host = "db.example.com" # production DB
port = 5432
name = my_database
''')
{e.key: e.value for e in cfg}
# {'host': 'db.example.com', 'port': '5432', 'name': 'my_database'}
"""
entries = []
for raw_line in text.splitlines():
line = raw_line.strip()
if not line or line.startswith("#"):
continue
try:
tokens = tokenize(line)
except ValueError:
continue
# Expect: KEY = VALUE [# comment handled by shlex]
clean = [t for t in tokens if t and t != "="]
if len(clean) >= 2:
key = clean[0]
value = clean[1]
entries.append(ConfigEntry(key=key, value=value))
elif len(clean) == 1:
entries.append(ConfigEntry(key=clean[0], value=""))
return entries
def parse_flags(args_str: str) -> dict[str, str | bool]:
"""
Parse a flags string into a dict.
--flag → {flag: True}
--key=value → {key: value}
--key value → {key: value} (next token is value)
Example:
parse_flags("--verbose --output=/tmp/out.txt --jobs 4")
# {'verbose': True, 'output': '/tmp/out.txt', 'jobs': '4'}
"""
tokens = shlex.split(args_str)
result: dict[str, str | bool] = {}
i = 0
while i < len(tokens):
tok = tokens[i]
if tok.startswith("--"):
body = tok[2:]
if "=" in body:
k, v = body.split("=", 1)
result[k] = v
elif i + 1 < len(tokens) and not tokens[i + 1].startswith("-"):
result[body] = tokens[i + 1]
i += 1
else:
result[body] = True
i += 1
return result
# ─────────────────────────────────────────────────────────────────────────────
# 5. Subprocess wrappers
# ─────────────────────────────────────────────────────────────────────────────
def run(cmd: str | list[str], **kwargs: Any) -> subprocess.CompletedProcess:
"""
Run a command safely — string commands are split with shlex, never shell=True.
Example:
result = run("ls -la /tmp")
result = run(["git", "log", "--oneline", "-5"])
"""
if isinstance(cmd, str):
args = shlex.split(cmd)
else:
args = list(cmd)
kwargs.setdefault("capture_output", True)
kwargs.setdefault("text", True)
return subprocess.run(args, **kwargs)
def run_output(cmd: str | list[str], **kwargs: Any) -> str:
"""
Run cmd and return stdout as a stripped string. Raises on non-zero exit.
Example:
branch = run_output("git rev-parse --abbrev-ref HEAD")
"""
result = run(cmd, **kwargs)
result.check_returncode()
return result.stdout.strip()
# ─────────────────────────────────────────────────────────────────────────────
# Demo
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=== shlex demo ===")
print("\n--- parse_command ---")
cmds = [
"git commit -m 'fix: handle edge case'",
'echo "hello world"',
"rsync -av '/path with spaces/' user@host:/backup/",
]
for c in cmds:
parsed = parse_command(c)
print(f" {c!r}\n → {parsed}")
print("\n--- shlex.quote ---")
values = ["simple", "with spaces", "it's quoted", "$(dangerous)", "pipe|char"]
for v in values:
print(f" quote({v!r:30s}) = {shlex.quote(v)}")
print("\n--- command_string ---")
args = ["cp", "-r", "/path with spaces/file.txt", "/dest dir/"]
print(f" {command_string(args)}")
print("\n--- build_command ---")
cmd = build_command("ffmpeg -y", input="video.mp4", output="out.mp4",
vcodec="libx264", verbose=True, audio=False)
print(f" {cmd}")
print(f" as string: {command_string(cmd)}")
print("\n--- injection detection ---")
tests = ["hello", "world; rm -rf /", "$(id)", "file.txt", "name && echo pwned"]
for t in tests:
danger = contains_shell_metacharacters(t)
print(f" {t!r:35s}: meta={danger} quoted={quote_if_needed(t)}")
print("\n--- parse_key_value_config ---")
config_text = """
host = "db.example.com" # production
port = 5432
ssl = true
name = my_database
"""
entries = parse_key_value_config(config_text)
for e in entries:
print(f" {e.key} = {e.value!r}")
print("\n--- parse_flags ---")
flags = parse_flags("--verbose --output=/tmp/out.txt --jobs 4 --format json")
for k, v in flags.items():
print(f" {k}: {v!r}")
print("\n--- tokenize ---")
line = 'key = "value with spaces" # this is a comment'
print(f" tokenize({line!r})\n → {tokenize(line)}")
print("\n=== done ===")
For the subprocess alternative — subprocess.run() and family execute the command; shlex handles the safe construction and parsing of the argument list that subprocess receives — they are complementary: always build argument lists with shlex.split() or construct them as Python lists, then pass to subprocess.run() with shell=False (the default); never use shell=True with unsanitized input. For the argparse alternative — argparse provides a full CLI argument parser with --flags, subcommands, type coercion, help text, and error handling for programs that define their own command-line interface; shlex is for parsing or constructing shell command strings at runtime — use argparse when building the CLI for your own tool; use shlex.split() when you receive a command string from a config file, user input, or external system and need to decompose it safely into tokens. The Claude Skills 360 bundle includes shlex skill sets covering safe_args()/build_command()/command_string()/parse_command()/parse_command_safe() command construction, contains_shell_metacharacters()/quote_if_needed()/validate_no_injection() injection prevention, tokenize()/tokenize_whitespace() custom lexers, parse_key_value_config()/parse_flags() config/DSL parsers, and run()/run_output() safe subprocess wrappers. Start with the free tier to try shell-safe command patterns and shlex pipeline code generation.