PYTHON
Implementing the Disjoint Set Union (DSU) Data Structure
Understand how to build a Disjoint Set Union (DSU) data structure for efficiently tracking connected components or equivalence relations in Python with optimizations.
class DSU:
def __init__(self, n):
# parent[i] stores the parent of element i. If parent[i] == i, i is a representative.
self.parent = list(range(n))
# rank[i] stores the height/size of the tree rooted at i, used for optimization.
self.rank = [0] * n
def find(self, i):
# Path compression: make every node on the path point directly to the root
if self.parent[i] == i:
return i
self.parent[i] = self.find(self.parent[i])
return self.parent[i]
def union(self, i, j):
root_i = self.find(i)
root_j = self.find(j)
if root_i != root_j:
# Union by rank: attach smaller rank tree under root of higher rank tree
if self.rank[root_i] < self.rank[root_j]:
self.parent[root_i] = root_j
elif self.rank[root_i] > self.rank[root_j]:
self.parent[root_j] = root_i
else:
self.parent[root_j] = root_i
self.rank[root_i] += 1
return True # Merged two sets
return False # Already in the same set
# Example Usage
num_elements = 5 # Elements 0, 1, 2, 3, 4
dsu = DSU(num_elements)
print(f"Initial parent array: {dsu.parent}")
dsu.union(0, 1)
print(f"Union(0, 1) -> parent: {dsu.parent}, find(0): {dsu.find(0)}, find(1): {dsu.find(1)}")
dsu.union(2, 3)
print(f"Union(2, 3) -> parent: {dsu.parent}, find(2): {dsu.find(2)}, find(3): {dsu.find(3)}")
dsu.union(0, 2)
print(f"Union(0, 2) -> parent: {dsu.parent}, find(0): {dsu.find(0)}, find(1): {dsu.find(1)}, find(2): {dsu.find(2)}, find(3): {dsu.find(3)}")
print(f"Are 1 and 3 connected? {dsu.find(1) == dsu.find(3)}")
print(f"Are 0 and 4 connected? {dsu.find(0) == dsu.find(4)}")
How it works: The Disjoint Set Union (DSU) data structure, also known as Union-Find, efficiently manages a collection of disjoint sets. It supports two primary operations: `find(i)` which determines the representative (root) of the set containing element `i`, and `union(i, j)` which merges the sets containing `i` and `j`. This implementation includes path compression and union by rank optimizations for nearly constant time operations on average, making it crucial for problems like finding connected components in a graph or Kruskal's algorithm.