OpenCV provides the building blocks for production computer vision: image I/O and color space conversions, geometric transformations, feature detection, and contour analysis. YOLO models via OpenCV’s DNN module run real-time object detection with cv2.dnn.readNetFromONNX. Camera calibration removes lens distortion for measurement accuracy. Video stream processing with cv2.VideoCapture handles both file and webcam input. Morphological operations clean binary masks. Perspective transforms extract document regions. Claude Code generates OpenCV preprocessing pipelines, object detection integrations, contour analysis routines, and the video processing loops for production vision systems.
CLAUDE.md for OpenCV Projects
## OpenCV Stack
- Version: opencv-python >= 4.10, opencv-contrib-python for SIFT/ORB/ArUco
- Color: always convert BGR→RGB before display/ML models
- Deep learning: cv2.dnn for ONNX/TF models, or defer to PyTorch/ONNX Runtime
- Threading: use queue.Queue for video capture + processing pipeline
- Performance: cv2.UMat for GPU acceleration, resize before processing loops
- Testing: use fixed test images from fixtures — don't depend on camera in tests
Image Preprocessing Pipeline
# vision/preprocessing.py — standard image preprocessing operations
import cv2
import numpy as np
from dataclasses import dataclass
from typing import Optional
@dataclass
class PreprocessConfig:
target_size: tuple[int, int] = (640, 640)
normalize: bool = True # 0-255 → 0.0-1.0
bgr_to_rgb: bool = True # OpenCV reads BGR, models expect RGB
mean: tuple = (0.485, 0.456, 0.406) # ImageNet mean
std: tuple = (0.229, 0.224, 0.225) # ImageNet std
def preprocess_for_inference(
image: np.ndarray,
config: PreprocessConfig = PreprocessConfig(),
) -> np.ndarray:
"""Preprocess an OpenCV BGR image for neural network inference."""
# Resize
resized = cv2.resize(image, config.target_size, interpolation=cv2.INTER_LINEAR)
# BGR → RGB
if config.bgr_to_rgb:
resized = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
# Normalize
if config.normalize:
img_float = resized.astype(np.float32) / 255.0
img_normalized = (img_float - config.mean) / config.std
else:
img_normalized = resized.astype(np.float32)
# HWC → NCHW (batch, channels, height, width)
img_transposed = np.transpose(img_normalized, (2, 0, 1))
batch = np.expand_dims(img_transposed, axis=0)
return batch
def enhance_contrast(image: np.ndarray) -> np.ndarray:
"""CLAHE contrast enhancement for low-contrast images."""
lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(lab)
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
enhanced_l = clahe.apply(l)
enhanced_lab = cv2.merge([enhanced_l, a, b])
return cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR)
def remove_noise(image: np.ndarray, strength: int = 10) -> np.ndarray:
"""Non-local means denoising for image quality improvement."""
return cv2.fastNlMeansDenoisingColored(image, None, strength, strength, 7, 21)
def correct_perspective(
image: np.ndarray,
corners: np.ndarray, # 4 points: top-left, top-right, bottom-right, bottom-left
output_size: tuple[int, int] = (800, 1000),
) -> np.ndarray:
"""Warped perspective correction — flatten a document."""
w, h = output_size
dst_points = np.array([
[0, 0],
[w - 1, 0],
[w - 1, h - 1],
[0, h - 1],
], dtype=np.float32)
transform_matrix = cv2.getPerspectiveTransform(corners.astype(np.float32), dst_points)
warped = cv2.warpPerspective(image, transform_matrix, (w, h))
return warped
Object Detection with YOLO + ONNX
# vision/detector.py — YOLO object detection via ONNX Runtime
import cv2
import numpy as np
import onnxruntime as ort
from dataclasses import dataclass
@dataclass
class Detection:
class_id: int
class_name: str
confidence: float
bbox: tuple[int, int, int, int] # x, y, w, h
COCO_CLASSES = [
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
"truck", "boat", "traffic light", "fire hydrant", "stop sign",
# ... (80 COCO classes)
]
class YOLODetector:
def __init__(self, model_path: str, conf_threshold: float = 0.5, iou_threshold: float = 0.45):
self.conf_threshold = conf_threshold
self.iou_threshold = iou_threshold
self.input_size = (640, 640)
self.session = ort.InferenceSession(
model_path,
providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
)
self.input_name = self.session.get_inputs()[0].name
def detect(self, image: np.ndarray) -> list[Detection]:
"""Run YOLO detection on an OpenCV BGR image."""
original_h, original_w = image.shape[:2]
# Preprocess
blob = cv2.dnn.blobFromImage(
image,
scalefactor=1/255.0,
size=self.input_size,
swapRB=True,
)
# Inference
outputs = self.session.run(None, {self.input_name: blob})
predictions = outputs[0][0] # Shape: (num_detections, 85) for YOLO
# Parse detections
detections = []
for pred in predictions:
confidence = pred[4]
if confidence < self.conf_threshold:
continue
class_scores = pred[5:]
class_id = int(np.argmax(class_scores))
class_conf = class_scores[class_id] * confidence
if class_conf < self.conf_threshold:
continue
# Convert normalized bbox to pixel coords
cx, cy, w, h = pred[:4]
x = int((cx - w / 2) * original_w / self.input_size[0])
y = int((cy - h / 2) * original_h / self.input_size[1])
w = int(w * original_w / self.input_size[0])
h = int(h * original_h / self.input_size[1])
detections.append(Detection(
class_id=class_id,
class_name=COCO_CLASSES[class_id] if class_id < len(COCO_CLASSES) else "unknown",
confidence=float(class_conf),
bbox=(x, y, w, h),
))
# Non-maximum suppression
bboxes = [d.bbox for d in detections]
scores = [d.confidence for d in detections]
indices = cv2.dnn.NMSBoxes(bboxes, scores, self.conf_threshold, self.iou_threshold)
return [detections[i] for i in indices.flatten()] if len(indices) > 0 else []
def draw_detections(self, image: np.ndarray, detections: list[Detection]) -> np.ndarray:
"""Annotate image with bounding boxes and labels."""
result = image.copy()
for det in detections:
x, y, w, h = det.bbox
color = (0, 255, 0)
cv2.rectangle(result, (x, y), (x + w, y + h), color, 2)
label = f"{det.class_name}: {det.confidence:.2f}"
(text_w, text_h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
cv2.rectangle(result, (x, y - text_h - 8), (x + text_w, y), color, -1)
cv2.putText(result, label, (x, y - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
return result
Contour Analysis
# vision/contours.py — contour detection for document and object segmentation
import cv2
import numpy as np
def find_document_corners(image: np.ndarray) -> Optional[np.ndarray]:
"""Find the four corners of a document in an image."""
# Preprocess: grayscale → blur → edges
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edges = cv2.Canny(blurred, 75, 200)
# Dilate edges to close gaps
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
dilated = cv2.dilate(edges, kernel, iterations=1)
# Find contours, sorted by area descending
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)
for contour in contours[:5]: # Check top 5 largest
# Approximate contour with polygon
epsilon = 0.02 * cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, epsilon, True)
# Document should be a quadrilateral (4 corners)
if len(approx) == 4:
return approx.reshape(4, 2)
return None
def analyze_defects(image: np.ndarray) -> dict:
"""Detect surface defects using morphological operations."""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Adaptive threshold for varying lighting
thresh = cv2.adaptiveThreshold(
gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV,
blockSize=21,
C=5,
)
# Morphological cleanup: remove noise
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
cleaned = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel)
# Find and analyze defect regions
contours, _ = cv2.findContours(cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
defects = []
min_area = 50 # Filter noise
for contour in contours:
area = cv2.contourArea(contour)
if area < min_area:
continue
x, y, w, h = cv2.boundingRect(contour)
perimeter = cv2.arcLength(contour, True)
circularity = 4 * np.pi * area / (perimeter ** 2) if perimeter > 0 else 0
defects.append({
"area": area,
"bbox": (x, y, w, h),
"circularity": circularity,
"type": "scratch" if circularity < 0.3 else "spot",
})
return {
"defect_count": len(defects),
"total_defect_area": sum(d["area"] for d in defects),
"defects": defects,
}
Video Stream Processing
# vision/video_processor.py — threaded video pipeline
import cv2
import threading
import queue
from typing import Callable
class VideoProcessor:
"""Thread-safe video capture and processing pipeline."""
def __init__(
self,
source: int | str, # Camera index or file path
process_fn: Callable[[np.ndarray], np.ndarray],
frame_skip: int = 1, # Process every Nth frame
):
self.source = source
self.process_fn = process_fn
self.frame_skip = frame_skip
self.frame_queue: queue.Queue = queue.Queue(maxsize=2)
self.result_queue: queue.Queue = queue.Queue(maxsize=2)
self._stop = threading.Event()
def _capture_thread(self):
"""Capture frames in background thread."""
cap = cv2.VideoCapture(self.source)
frame_count = 0
try:
while not self._stop.is_set():
ret, frame = cap.read()
if not ret:
break
frame_count += 1
if frame_count % self.frame_skip != 0:
continue
# Non-blocking put — drop frames if queue full
try:
self.frame_queue.put_nowait(frame)
except queue.Full:
pass # Drop frame
finally:
cap.release()
self.frame_queue.put(None) # Signal end
def _process_thread(self):
"""Process frames in background thread."""
while True:
frame = self.frame_queue.get()
if frame is None:
break
try:
processed = self.process_fn(frame)
try:
self.result_queue.put_nowait(processed)
except queue.Full:
pass
except Exception as e:
print(f"Processing error: {e}")
def run_display(self, window_name: str = "Video") -> None:
"""Run pipeline with live display."""
capture = threading.Thread(target=self._capture_thread, daemon=True)
processor = threading.Thread(target=self._process_thread, daemon=True)
capture.start()
processor.start()
try:
while True:
try:
frame = self.result_queue.get(timeout=0.1)
cv2.imshow(window_name, frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
except queue.Empty:
if not capture.is_alive():
break
finally:
self._stop.set()
cv2.destroyAllWindows()
For the ONNX Runtime integration that runs exported vision models at production speed, see the ONNX guide for INT8 quantization and optimized inference sessions. For the PyTorch training pipeline that produces the models you deploy with OpenCV’s DNN module, the PyTorch guide covers training loops and model export. The Claude Skills 360 bundle includes OpenCV skill sets covering preprocessing pipelines, detection integration, and video processing. Start with the free tier to try OpenCV vision system generation.