Diffusers runs state-of-the-art image generation models. pip install diffusers transformers accelerate. Text-to-image: from diffusers import StableDiffusionPipeline, pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to("cuda"), image = pipe("a photo of an astronaut on mars").images[0]. SDXL: from diffusers import StableDiffusionXLPipeline, pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16). Negative prompt: pipe(prompt, negative_prompt="blurry, low quality", num_inference_steps=30, guidance_scale=7.5, height=1024, width=1024). Img2Img: from diffusers import StableDiffusionImg2ImgPipeline. pipe(prompt=prompt, image=init_image, strength=0.75). Inpainting: StableDiffusionInpaintPipeline with image and mask_image. ControlNet: controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny"), pipe = StableDiffusionControlNetPipeline.from_pretrained(..., controlnet=controlnet), pipe(prompt, image=canny_image, controlnet_conditioning_scale=0.5). Scheduler swap: pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config). FLUX: from diffusers import FluxPipeline, pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16). Memory: pipe.enable_attention_slicing(), pipe.enable_sequential_cpu_offload() (CPU+GPU). LoRA: pipe.load_lora_weights("path/to/lora.safetensors", adapter_name="style"), pipe.set_adapters(["style"], adapter_weights=[0.8]). Compile: pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True). Claude Code generates Diffusers inference pipelines, ControlNet workflows, LoRA loading, memory optimization, and custom diffusion loops.
CLAUDE.md for Diffusers
## Diffusers Stack
- Version: diffusers >= 0.30, transformers >= 4.40, accelerate >= 0.30
- SD 1.5: StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=float16)
- SDXL: StableDiffusionXLPipeline — 1024x1024 default, supports refiner pipeline
- FLUX: FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev") — best quality
- ControlNet: ControlNetModel + StableDiffusionControlNetPipeline or SD3ControlNetPipeline
- Memory: enable_attention_slicing() → enable_sequential_cpu_offload() → enable_xformers
- LoRA: pipe.load_lora_weights(path) → pipe.set_adapters(names, weights)
- Scheduler: pipe.scheduler = EulerAnc/DPMSolverMultistep.from_config(pipe.scheduler.config)
Image Generation Pipeline
# diffusion/generate.py — Diffusers image generation with all major pipelines
from __future__ import annotations
import io
from pathlib import Path
from typing import Optional
import torch
from PIL import Image
# ── 1. Text-to-image with SDXL ────────────────────────────────────────────────
def load_sdxl_pipeline(
model_id: str = "stabilityai/stable-diffusion-xl-base-1.0",
device: str = "cuda",
optimize: bool = True,
):
"""Load SDXL pipeline with optional memory optimizations."""
from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
pipe = StableDiffusionXLPipeline.from_pretrained(
model_id,
torch_dtype=torch.float16,
use_safetensors=True,
variant="fp16",
)
# Use Euler Ancestral scheduler (faster, slightly less sharp)
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(
pipe.scheduler.config
)
if optimize:
pipe.enable_attention_slicing() # 30% less VRAM
try:
pipe.enable_xformers_memory_efficient_attention() # xFormers if available
except Exception:
pass
pipe = pipe.to(device)
return pipe
def generate_sdxl(
pipe,
prompt: str,
negative_prompt: str = "blurry, bad quality, low resolution, ugly, deformed",
num_images: int = 1,
steps: int = 30,
guidance_scale: float = 7.5,
width: int = 1024,
height: int = 1024,
seed: Optional[int] = None,
) -> list[Image.Image]:
generator = torch.Generator("cuda").manual_seed(seed) if seed is not None else None
output = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
num_images_per_prompt=num_images,
num_inference_steps=steps,
guidance_scale=guidance_scale,
width=width,
height=height,
generator=generator,
)
return output.images
# ── 2. FLUX pipeline (state-of-the-art) ──────────────────────────────────────
def load_flux_pipeline(
model_id: str = "black-forest-labs/FLUX.1-schnell", # schnell=fast, dev=quality
device: str = "cuda",
):
"""FLUX.1 — best text-to-image quality as of 2024."""
from diffusers import FluxPipeline
pipe = FluxPipeline.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
)
# Offload text encoder and VAE to CPU to save 12GB VRAM
pipe.enable_sequential_cpu_offload()
return pipe
def generate_flux(
pipe,
prompt: str,
steps: int = 4, # Schnell is optimized for 4 steps
guidance: float = 0.0, # Schnell is guidance-distilled
width: int = 1024,
height: int = 1024,
seed: Optional[int] = None,
) -> Image.Image:
generator = torch.Generator("cpu").manual_seed(seed) if seed else None
image = pipe(
prompt,
num_inference_steps=steps,
guidance_scale=guidance,
width=width,
height=height,
generator=generator,
).images[0]
return image
# ── 3. ControlNet for conditioned generation ──────────────────────────────────
def load_controlnet_canny_pipeline(
base_model: str = "runwayml/stable-diffusion-v1-5",
device: str = "cuda",
):
"""Canny edge ControlNet — generate images matching an edge map."""
from diffusers import ControlNetModel, StableDiffusionControlNetPipeline
from diffusers import UniPCMultistepScheduler
controlnet = ControlNetModel.from_pretrained(
"lllyasviel/control_v11p_sd15_canny",
torch_dtype=torch.float16,
)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
base_model,
controlnet=controlnet,
torch_dtype=torch.float16,
safety_checker=None,
)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_attention_slicing()
return pipe.to(device)
def canny_to_control_image(image: Image.Image, low: int = 100, high: int = 200) -> Image.Image:
"""Extract Canny edges from an image for ControlNet conditioning."""
import cv2
import numpy as np
img = np.array(image)
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
edges = cv2.Canny(gray, low, high)
return Image.fromarray(np.stack([edges]*3, axis=-1))
def controlnet_generate(
pipe,
prompt: str,
control_image: Image.Image, # Edge map / depth map / pose image
negative_prompt: str = "blurry, low quality",
controlnet_scale: float = 0.5,
steps: int = 20,
guidance_scale: float = 7.5,
) -> Image.Image:
return pipe(
prompt=prompt,
negative_prompt=negative_prompt,
image=control_image,
controlnet_conditioning_scale=controlnet_scale,
num_inference_steps=steps,
guidance_scale=guidance_scale,
).images[0]
# ── 4. Img2Img and Inpainting ─────────────────────────────────────────────────
def img2img(
prompt: str,
init_image: Image.Image,
strength: float = 0.7, # 0=keep original, 1=fully transform
model_id: str = "runwayml/stable-diffusion-v1-5",
device: str = "cuda",
) -> Image.Image:
from diffusers import StableDiffusionImg2ImgPipeline
pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
model_id, torch_dtype=torch.float16, safety_checker=None
).to(device)
init_image = init_image.resize((512, 512))
return pipe(prompt=prompt, image=init_image, strength=strength).images[0]
def inpaint(
prompt: str,
image: Image.Image,
mask: Image.Image, # White = inpaint, Black = keep
model_id: str = "runwayml/stable-diffusion-inpainting",
device: str = "cuda",
) -> Image.Image:
from diffusers import StableDiffusionInpaintPipeline
pipe = StableDiffusionInpaintPipeline.from_pretrained(
model_id, torch_dtype=torch.float16, safety_checker=None
).to(device)
image = image.resize((512, 512))
mask = mask.resize((512, 512))
return pipe(prompt=prompt, image=image, mask_image=mask).images[0]
# ── 5. LoRA loading ───────────────────────────────────────────────────────────
def load_with_lora(
base_model_id: str = "stabilityai/stable-diffusion-xl-base-1.0",
lora_paths: list[str] = [], # safetensors files or Hub IDs
lora_weights: list[float] = [], # Blending weights (0-1)
device: str = "cuda",
):
"""Load SDXL pipeline and stack multiple LoRA adapters."""
from diffusers import StableDiffusionXLPipeline
pipe = StableDiffusionXLPipeline.from_pretrained(
base_model_id, torch_dtype=torch.float16
).to(device)
adapter_names = []
for i, (lora_path, weight) in enumerate(zip(lora_paths, lora_weights)):
name = f"adapter_{i}"
pipe.load_lora_weights(lora_path, adapter_name=name)
adapter_names.append(name)
if adapter_names:
pipe.set_adapters(adapter_names, adapter_weights=lora_weights)
return pipe
# ── 6. Batch generation utility ──────────────────────────────────────────────
def generate_batch(
pipe,
prompts: list[str],
output_dir: str = "outputs",
**kwargs,
) -> list[Path]:
"""Generate images for a list of prompts and save to disk."""
out = Path(output_dir)
out.mkdir(parents=True, exist_ok=True)
saved: list[Path] = []
for i, prompt in enumerate(prompts):
images = pipe(prompt, **kwargs).images
for j, image in enumerate(images):
fn = out / f"img_{i:04d}_{j}.png"
image.save(fn)
saved.append(fn)
print(f"[{i+1}/{len(prompts)}] {prompt[:50]}... → {len(images)} image(s)")
return saved
if __name__ == "__main__":
# SDXL example
pipe = load_sdxl_pipeline()
images = generate_sdxl(
pipe,
prompt="A photorealistic red fox sitting in a snowy forest at dawn",
num_images=1,
seed=42,
)
images[0].save("fox_sdxl.png")
print("Saved: fox_sdxl.png")
For the Stable Diffusion WebUI (AUTOMATIC1111) alternative when wanting a browser-based GUI with extensions, ControlNet, and LoRA loading without writing Python — A1111/ComfyUI provide no-code interfaces while Diffusers gives programmatic API access for building production pipelines, batch generation, custom schedulers, and composable diffusion workflows that can’t be automated through a GUI. For the OpenAI DALL-E API alternative when prioritizing simplicity, safety filtering, and not owning a GPU — DALL-E charges per image and sends data to OpenAI while Diffusers runs entirely locally at zero per-image cost with open-weight models that support fine-tuning for custom styles and domain-specific subjects via DreamBooth and LoRA. The Claude Skills 360 bundle includes Diffusers skill sets covering SDXL and FLUX pipelines, ControlNet conditioning, LoRA adapter loading, inpainting, DreamBooth fine-tuning, and batch generation utilities. Start with the free tier to try image generation code generation.