NetworkX provides graph data structures and algorithms for network analysis. pip install networkx. import networkx as nx. Graph types: G = nx.Graph() undirected, nx.DiGraph() directed, nx.MultiGraph() multi-edge. Add nodes: G.add_node(1, label="Alice"), G.add_nodes_from([2,3,4]). Add edges: G.add_edge(1, 2, weight=0.5), G.add_edges_from([(1,2),(2,3)]). Properties: G.nodes(), G.edges(), G.number_of_nodes(), G.degree(1). Shortest path: nx.shortest_path(G, 1, 5) — list of nodes. Weighted: nx.shortest_path(G, source, target, weight="weight", method="dijkstra"). Centrality: nx.degree_centrality(G), nx.betweenness_centrality(G, normalized=True), nx.closeness_centrality(G), nx.eigenvector_centrality(G, max_iter=1000). PageRank: nx.pagerank(G, alpha=0.85, weight="weight"). Community: from networkx.algorithms.community import greedy_modularity_communities, louvain_communities, communities = louvain_communities(G, seed=42). Connected: nx.is_connected(G), list(nx.connected_components(G)). MST: nx.minimum_spanning_tree(G, weight="weight"). Flow: nx.maximum_flow(G, source, sink). Generators: nx.erdos_renyi_graph(n=100, p=0.1), nx.barabasi_albert_graph(n=100, m=2), nx.watts_strogatz_graph(n=100, k=6, p=0.1). Read/Write: nx.read_edgelist("edges.csv"), nx.write_gml(G, "graph.gml"). Draw: nx.draw_networkx(G, pos=nx.spring_layout(G)). Claude Code generates NetworkX graph builders, centrality analyzers, community detectors, pathway finders, and social network analysis pipelines.
CLAUDE.md for NetworkX
## NetworkX Stack
- Version: networkx >= 3.2
- Types: Graph (undirected) | DiGraph (directed) | MultiGraph/MultiDiGraph
- Build: G.add_node(id, **attrs) | G.add_edge(u, v, weight=w, **attrs)
- Paths: nx.shortest_path(G, s, t, weight="weight") | nx.all_pairs_shortest_path
- Centrality: degree_centrality | betweenness_centrality | eigenvector_centrality
- Community: louvain_communities(G, seed=42) | greedy_modularity_communities
- IO: read_edgelist | read_gml | read_graphml | from_pandas_edgelist
- Draw: nx.draw_networkx(G, pos=nx.spring_layout(G), with_labels=True)
NetworkX Graph Analysis Pipeline
# graphs/networkx_pipeline.py — graph analysis and network science
from __future__ import annotations
import warnings
import numpy as np
import pandas as pd
from collections import defaultdict
from pathlib import Path
import networkx as nx
from networkx.algorithms.community import (
greedy_modularity_communities,
louvain_communities,
modularity,
)
# ── 1. Graph construction ─────────────────────────────────────────────────────
def build_graph(
edges: list[tuple],
directed: bool = False,
node_attrs: dict = None,
edge_attr: str = "weight",
) -> nx.Graph:
"""
Build NetworkX graph from edge list.
edges can be (u, v) or (u, v, weight) tuples.
"""
G = nx.DiGraph() if directed else nx.Graph()
for edge in edges:
if len(edge) == 2:
u, v = edge
G.add_edge(u, v)
elif len(edge) == 3:
u, v, w = edge
G.add_edge(u, v, **{edge_attr: w})
else:
u, v, data = edge[0], edge[1], dict(zip([edge_attr], edge[2:]))
G.add_edge(u, v, **data)
if node_attrs:
for node, attrs in node_attrs.items():
G.nodes[node].update(attrs)
print(f"Graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges, directed={directed}")
return G
def from_dataframe(
df: pd.DataFrame,
source: str = "source",
target: str = "target",
edge_attrs: list[str] = None,
node_attrs: pd.DataFrame = None, # Index = node id
directed: bool = False,
) -> nx.Graph:
"""Build graph from a pandas edge DataFrame."""
G = nx.from_pandas_edgelist(
df, source=source, target=target,
edge_attr=edge_attrs,
create_using=nx.DiGraph() if directed else nx.Graph(),
)
if node_attrs is not None:
for node_id, row in node_attrs.iterrows():
if G.has_node(node_id):
G.nodes[node_id].update(row.to_dict())
return G
# ── 2. Graph statistics ───────────────────────────────────────────────────────
def graph_stats(G: nx.Graph) -> dict:
"""Compute basic graph statistics."""
is_dir = G.is_directed()
is_conn = nx.is_weakly_connected(G) if is_dir else nx.is_connected(G)
degrees = [d for _, d in G.degree()]
stats = {
"n_nodes": G.number_of_nodes(),
"n_edges": G.number_of_edges(),
"directed": is_dir,
"connected": is_conn,
"density": round(nx.density(G), 6),
"avg_degree": round(np.mean(degrees), 2),
"max_degree": max(degrees),
"self_loops": nx.number_of_selfloops(G),
}
if not is_dir and is_conn and G.number_of_nodes() < 5000:
stats["avg_clustering"] = round(nx.average_clustering(G), 4)
stats["transitivity"] = round(nx.transitivity(G), 4)
if is_conn and G.number_of_nodes() < 2000:
stats["diameter"] = nx.diameter(G.to_undirected() if is_dir else G)
return stats
# ── 3. Shortest paths ─────────────────────────────────────────────────────────
def shortest_path(
G: nx.Graph,
source: object,
target: object,
weight: str = None,
method: str = "dijkstra", # "dijkstra" | "bellman-ford"
) -> tuple[list, float]:
"""
Find shortest path between two nodes.
Returns (path, length). weight=None → hop count.
"""
try:
path = nx.shortest_path(G, source, target, weight=weight, method=method)
length = nx.shortest_path_length(G, source, target, weight=weight, method=method)
return path, length
except nx.NetworkXNoPath:
return None, float("inf")
except nx.NodeNotFound as e:
raise ValueError(f"Node not in graph: {e}")
def all_pairs_distances(
G: nx.Graph,
weight: str = "weight",
cutoff: int = None,
) -> dict[object, dict[object, float]]:
"""Compute all-pairs shortest path distances."""
return dict(nx.all_pairs_dijkstra_path_length(G, cutoff=cutoff, weight=weight))
def k_shortest_paths(
G: nx.Graph,
source: object,
target: object,
k: int = 3,
weight: str = "weight",
) -> list[list]:
"""Find k shortest simple paths (Yen's algorithm)."""
return list(nx.shortest_simple_paths(G, source, target, weight=weight))[:k]
# ── 4. Centrality analysis ────────────────────────────────────────────────────
def compute_centrality(
G: nx.Graph,
measures: list[str] = None,
weight: str = "weight",
) -> pd.DataFrame:
"""
Compute multiple centrality measures.
Returns DataFrame with one column per measure, one row per node.
measures: ["degree", "betweenness", "closeness", "eigenvector", "pagerank"]
"""
if measures is None:
measures = ["degree", "betweenness", "closeness"]
results = {}
measure_fns = {
"degree": lambda: nx.degree_centrality(G),
"betweenness": lambda: nx.betweenness_centrality(G, weight=weight, normalized=True),
"closeness": lambda: nx.closeness_centrality(G),
"eigenvector": lambda: nx.eigenvector_centrality_numpy(G, weight=weight),
"pagerank": lambda: nx.pagerank(G, alpha=0.85, weight=weight),
"katz": lambda: nx.katz_centrality_numpy(G, weight=weight),
"in_degree": lambda: nx.in_degree_centrality(G) if G.is_directed() else {},
"out_degree": lambda: nx.out_degree_centrality(G) if G.is_directed() else {},
}
for measure in measures:
if measure in measure_fns:
try:
results[measure] = measure_fns[measure]()
except Exception:
pass
df = pd.DataFrame(results)
df.index.name = "node"
return df.sort_values("degree", ascending=False) if "degree" in df else df
def top_nodes(
G: nx.Graph,
measure: str = "pagerank",
top_k: int = 10,
weight: str = "weight",
) -> list[tuple[object, float]]:
"""Return top-k nodes by a centrality measure."""
measure_fns = {
"degree": nx.degree_centrality,
"betweenness": lambda g: nx.betweenness_centrality(g, weight=weight),
"pagerank": lambda g: nx.pagerank(g, weight=weight),
"closeness": nx.closeness_centrality,
}
scores = measure_fns.get(measure, nx.degree_centrality)(G)
return sorted(scores.items(), key=lambda x: x[1], reverse=True)[:top_k]
# ── 5. Community detection ────────────────────────────────────────────────────
def detect_communities(
G: nx.Graph,
method: str = "louvain", # "louvain" | "greedy" | "label_propagation"
seed: int = 42,
) -> list[frozenset]:
"""
Detect communities in an undirected graph.
Returns list of frozensets of node IDs.
"""
ug = G.to_undirected() if G.is_directed() else G
if method == "louvain":
return louvain_communities(ug, seed=seed)
elif method == "greedy":
return greedy_modularity_communities(ug)
elif method == "label_propagation":
from networkx.algorithms.community import label_propagation_communities
return list(label_propagation_communities(ug))
raise ValueError(f"Unknown method: {method}")
def community_stats(
G: nx.Graph,
communities: list[frozenset],
) -> pd.DataFrame:
"""Compute per-community statistics."""
rows = []
for i, comm in enumerate(sorted(communities, key=len, reverse=True)):
subgraph = G.subgraph(comm)
rows.append({
"community_id": i,
"size": len(comm),
"n_edges": subgraph.number_of_edges(),
"density": round(nx.density(subgraph), 4),
})
df = pd.DataFrame(rows)
q = modularity(G.to_undirected() if G.is_directed() else G, communities)
print(f"Modularity Q = {q:.4f} | {len(communities)} communities")
return df
# ── 6. Graph generators ───────────────────────────────────────────────────────
def generate_random_graph(
n: int,
model: str = "barabasi_albert", # "erdos_renyi" | "barabasi_albert" | "watts_strogatz"
**kwargs,
) -> nx.Graph:
"""
Generate synthetic network.
- erdos_renyi: p=edge_probability (e.g. p=0.1)
- barabasi_albert: m=edges_per_new_node (scale-free, e.g. m=2)
- watts_strogatz: k=ring_neighbors p=rewiring_prob (small-world)
"""
generators = {
"erdos_renyi": lambda: nx.erdos_renyi_graph(n, kwargs.get("p", 0.05), seed=42),
"barabasi_albert": lambda: nx.barabasi_albert_graph(n, kwargs.get("m", 2), seed=42),
"watts_strogatz": lambda: nx.watts_strogatz_graph(n, kwargs.get("k", 6),
kwargs.get("p", 0.1), seed=42),
"powerlaw_cluster": lambda: nx.powerlaw_cluster_graph(n, kwargs.get("m", 2),
kwargs.get("p", 0.3), seed=42),
}
return generators[model]()
# ── 7. Subgraph and ego network ───────────────────────────────────────────────
def ego_network(
G: nx.Graph,
node: object,
radius: int = 1,
) -> nx.Graph:
"""Extract ego network (node and its neighbors up to radius hops)."""
return nx.ego_graph(G, node, radius=radius)
def extract_largest_component(G: nx.Graph) -> nx.Graph:
"""Return the largest connected component as a new graph."""
if G.is_directed():
components = nx.weakly_connected_components(G)
else:
components = nx.connected_components(G)
largest = max(components, key=len)
return G.subgraph(largest).copy()
# ── Demo ──────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("NetworkX Demo")
print("="*50)
# Build a sample social network
edges = [
("Alice", "Bob", 1.0), ("Alice", "Carol", 0.8),
("Bob", "Carol", 0.9), ("Bob", "Dave", 0.5),
("Carol", "Eve", 0.7), ("Dave", "Eve", 0.6),
("Eve", "Frank", 0.4), ("Frank", "Alice", 0.3),
("Alice", "Dave", 0.2), ("Carol", "Frank", 0.8),
]
G = build_graph(edges, directed=False)
# Stats
stats = graph_stats(G)
print(f"\nGraph stats: {stats}")
# Shortest path
path, length = shortest_path(G, "Alice", "Frank", weight="weight")
print(f"\nShortest path Alice→Frank: {path} (length={length:.2f})")
# Centrality
centrality = compute_centrality(G, measures=["degree", "betweenness", "pagerank"])
print(f"\nCentrality (top 3):\n{centrality.head(3)}")
# Community detection
communities = detect_communities(G, method="louvain")
comm_df = community_stats(G, communities)
print(f"\nCommunities:\n{comm_df}")
for i, c in enumerate(communities):
print(f" Community {i}: {sorted(c)}")
# Top nodes by PageRank
top = top_nodes(G, measure="pagerank", top_k=3)
print(f"\nTop-3 by PageRank: {top}")
# Scale-free network
sf = generate_random_graph(1000, "barabasi_albert", m=2)
sf_stats = graph_stats(sf)
print(f"\nScale-free network (n=1000, m=2): {sf_stats}")
For the graph-tool alternative when needing maximum performance on billion-edge graphs with C++ acceleration and comprehensive statistical tests — graph-tool’s C++ backend is 10-100x faster for large graphs while NetworkX’s Python API is far more accessible, integrates seamlessly with pandas and numpy, supports 200+ built-in graph algorithms, and the from_pandas_edgelist function allows instant graph loading from any structured dataset without custom parsers, making it the default choice for exploratory network analysis and knowledge graph construction. For the PyG/DGL alternative when applying graph neural networks for node classification or link prediction — PyTorch Geometric provides GCN/GAT/GraphSAGE layers that require GPU training while NetworkX handles the classic graph algorithm layer (centrality, community detection, shortest paths) that complements GNNs in production systems where you need interpretable network metrics alongside learned representations. The Claude Skills 360 bundle includes NetworkX skill sets covering graph construction from edges and DataFrames, graph statistics, Dijkstra shortest paths, degree/betweenness/pagerank centrality, Louvain community detection, ego networks, connected components, graph generators, and matplotlib visualization. Start with the free tier to try graph analysis code generation.