Source code for graph

# -*- coding: utf-8 -*-
"""
Created on Wed Dec 23 15:11:11 2020

@author: Youri.Baeyens
"""
import pandas as pd

[docs]class connectedComponentsLabeler(object): """ A tool to identify the connected components of an undirected graph. """ hash2seq = {} seq2hash = {} hashes = [] edges = pd.DataFrame({}) numberOfNodes = 0 forest = [] def __init__(self, edges): """ :parameter edges: Edges DataFame (two columns named "a" and "b"). """ self.edges = edges self.hashes = [h for h in set(self.edges["a"]).union(self.edges["b"])] for (hash, seq) in zip(self.hashes, range(len(self.hashes))): self.hash2seq[hash] = seq self.seq2hash[seq] = hash self.numberOfNodes = len(self.hashes) self.forest = [i for i in range(0, self.numberOfNodes)] for row in self.edges.itertuples(): self.link(self.hash2seq[row.a], self.hash2seq[row.b])
[docs] def connectedComponentIdentifier(self, node): """ Identifies the connected component of a specific node. """ if self.forest[node] != node: r = self.connectedComponentIdentifier(self.forest[node]) self.forest[node] = r return r else: return node
[docs] def simplifyForest(self): """ Transform the forest into a list. Function simplifyForest is used to store the connectedComponentIdentifier of every node in forest. """ for i in range(0, len(self.forest)): self.forest[i] = self.connectedComponentIdentifier(i)
def getConnectedCompontents(self): self.simplifyForest() return pd.DataFrame({'id': self.hashes, 'cc': self.forest})