PYTHON
Implement Disjoint Set Union (DSU) Data Structure
Understand and implement the Disjoint Set Union (DSU) data structure in Python, a powerful tool for efficiently tracking connected components in graphs or managing dynamic sets.
class DSU:
def __init__(self, n):
# parent[i] stores the parent of element i. If parent[i] == i, i is a root.
self.parent = list(range(n))
# size[i] stores the size of the set rooted at i.
self.size = [1] * n
def find(self, i):
# Path compression: make every node on the path point directly to the root
if self.parent[i] == i:
return i
self.parent[i] = self.find(self.parent[i])
return self.parent[i]
def union(self, i, j):
root_i = self.find(i)
root_j = self.find(j)
if root_i != root_j:
# Union by size: attach smaller tree under root of larger tree
if self.size[root_i] < self.size[root_j]:
self.parent[root_i] = root_j
self.size[root_j] += self.size[root_i]
else:
self.parent[root_j] = root_i
self.size[root_i] += self.size[root_j]
return True # Merged
return False # Already in the same set
def count_sets(self):
# Count the number of distinct roots
return sum(1 for i, p in enumerate(self.parent) if i == p)
# Example Usage:
# We have 5 elements (0, 1, 2, 3, 4)
dsu = DSU(5)
print(f"Initial sets count: {dsu.count_sets()}") # Expected: 5
dsu.union(0, 1) # Merge 0 and 1
dsu.union(2, 3) # Merge 2 and 3
print(f"Sets after two unions: {dsu.count_sets()}") # Expected: 3
dsu.union(0, 2) # Merge the set containing 0 (and 1) with the set containing 2 (and 3)
print(f"Sets after third union: {dsu.count_sets()}") # Expected: 2 (sets are {0,1,2,3} and {4})
print(f"Are 1 and 3 connected? {dsu.find(1) == dsu.find(3)}") # Expected: True
print(f"Are 1 and 4 connected? {dsu.find(1) == dsu.find(4)}") # Expected: False
How it works: The Disjoint Set Union (DSU) data structure, also known as Union-Find, efficiently manages a collection of disjoint sets. It provides two main operations: `find`, which determines the representative (root) of the set an element belongs to (with path compression for optimization), and `union`, which merges two sets into one (with union by size/rank for optimization). This is incredibly useful for solving problems related to connected components in graphs, network connectivity, clustering algorithms, and more, offering nearly constant time complexity for these operations.