TD10 - Graphs¶

0. Preliminaries¶

import numpy as np

class G:
    def __init__(self,value=None,children=None,color='white'):
        self.value = value        
        self.children=children
        self.color=color
        
    def exploration(self,print_call_graph=False):
        seen_node_list=[self]
        print_call_graph and (print("-->",self.value) )
        for node in seen_node_list:
            for child in node.children:
                if child != None and child not in set(seen_node_list):
                    print_call_graph and print("-->",child.value)
                    seen_node_list.append(child)
            print_call_graph and print("<--",node.value)
        return len(seen_node_list)
    
    def exploration_rec(self,seen_node_list=[],print_call_graph=False,parent=None):
        if not self in set(seen_node_list):
            print_call_graph and (print("-->",self.value) )
            seen_node_list.append(self)
            for child in self.children:
                if child != None:                              
                    child.exploration_rec(seen_node_list,print_call_graph,parent=self)
            print_call_graph and parent!=None and print("<--",parent.value)
        return len(seen_node_list)
    
    def nbPaths(self,destination):
        if self == destination:
            return 1
        nb_paths = 0
        for child in self.children:
            if child != None:
                nb_paths += child.nbPaths(destination)            
        return nb_paths           
    
    def nbColor_from_node(self,color,seen_node_list=[]):
        reachable_color=0        
        if not self in set(seen_node_list):
            seen_node_list.append(self)
            if self.color==color:
                reachable_color+=1
            
            for child in self.children:
                if child != None:                              
                    reachable_color+=child.nbColor_from_node(color,seen_node_list)[0]
        return reachable_color,len(seen_node_list)

    def print_graph(self,depth=0,node_list=[]):
        if self in set(node_list):
            print("|","---" * depth,">",self.value)
            return
        else:
            print("|","---" * depth,">",self.value)
            node_list.append(self)
            for child in self.children:
                if child != None:                                    
                    child.print_graph(depth+1,node_list)

1. Connectivity¶

1.1. Trees¶

To explore the graph in a tree-fashion, we can use two approaches:

an iterative version which scans the tree-form of the graph layer after layer (from the root, gather all children, then all subchildren, etc.) and stop down the tree whenever an already seen node is met; from a graph standpoint, this corresponds to scanning the affinity matrix line by line for every line met in previous steps, starting from the root
a recursive version which, in the tree-form, goes recursively down the tree through every child until a known node is met (or None)

Remark: For pure tree-graphs, no child node already met can be met again, so the condition is never enforced

The exploration in tree-form of the whole graph $\mathcal{G}(\mathcal{V},\mathcal{E})$ with vertices (or nodes) $\mathcal{V}$ and edges (or children for each node) $\mathcal{E}$ requires that every vertex $i$ of the $n=|\mathcal{V}|$ vertices identifies its $m_i$ edges (or "children" nodes). In total, calling $m=|\mathcal{E}|=\sum_{i=1}^n$ the number of directed edges in the graph, the total storage cost of the graph information is $$O(n+m).$$

When storing the graph as its adjacency matrix of size $n\times n$, the cost is instead $$O(n^2).$$

It is thus more convenient to use a list of edges representation on sparse graphs (that is, graphs with few edges per vertex).

Function: exploration INPUT : self of type G , boolean print_call_graph OUTPUT : the number of graph elements seen from the node centered as G [+printout of print_call_graph if argument is True; iterative version]
Function: exploration_rec INPUT : self of type G , boolean print_call_graph OUTPUT : the number of graph elements seen from the node centered as G [+printout of print_call_graph if argument is True; recursive version]

def exploration(self,print_call_graph=False):
    seen_node_list=[self]
    print_call_graph and (print("-->",self.value) )
    for node in seen_node_list:
        for child in node.children:
            if child != None and child not in set(seen_node_list):
                print_call_graph and print("-->",child.value)
                seen_node_list.append(child)
        print_call_graph and print("<--",node.value)
    return len(seen_node_list)

def exploration_rec(self,seen_node_list=[],print_call_graph=False,parent=None):
    if not self in set(seen_node_list):
        print_call_graph and (print("-->",self.value) )
        seen_node_list.append(self)
        for child in self.children:
            if child != None:                              
                child.exploration_rec(seen_node_list,print_call_graph,parent=self)
        print_call_graph and parent!=None and print("<--",parent.value)
    return len(seen_node_list)

### Deterministic tree graph

tree_graph = G('A',[G('B',[G('F',[]),G('G',[])]),G('D',[G('H',[])])])

print("Visual representation of tree_graph:\n")
tree_graph.print_graph(node_list=[])

print("\nIterative exploration of tree_graph:\n")
print("\nNumber of nodes seen from root:\n",tree_graph.exploration(print_call_graph=True),"\n")

print("\nRecursive exploration of tree_graph:\n")
print("\nNumber of nodes seen from root, recursive search:\n",tree_graph.exploration_rec([],print_call_graph=True),"\n")

Visual representation of tree_graph:

|  > A
| --- > B
| ------ > F
| ------ > G
| --- > D
| ------ > H

Iterative exploration of tree_graph:

--> A
--> B
--> D
<-- A
--> F
--> G
<-- B
--> H
<-- D
<-- F
<-- G
<-- H

Number of nodes seen from root:
 6 


Recursive exploration of tree_graph:

--> A
--> B
--> F
<-- B
--> G
<-- B
<-- A
--> D
--> H
<-- D
<-- A

Number of nodes seen from root, recursive search:
 6

1.2. Generic graphs¶

### Random graph generator

graph_size = 10
list_values = np.random.randint(0,100,graph_size)

# Affinity matrix of the graph with 'p' probability of connection
p = .2
A = np.random.binomial(1,p,(graph_size,graph_size))
A = A-np.diag(np.diag(A))

nodes = []

print("List of values in the graph:",list_values)

for i in range(len(list_values)):
    nodes.append(G(value=list_values[i],children=[]))

for i in range(len(nodes)):
    for j in range(len(nodes)):
        if A[i,j]>0:
            nodes[i].children.append(nodes[j])            
            
print("\nAdjacency matrix:\n",A)
        
node_index = 1
print("\nTree representation of the graph starting from node[",node_index,"]:\n")   
    
nodes[node_index].print_graph(node_list=[])

print("\nNumber of nodes seen from node[",node_index,"]:\n",nodes[node_index].exploration())

print("\nIterative exploration of random graph:\n")
print("\nNumber of nodes seen from node[",node_index,"]:\n",nodes[node_index].exploration(print_call_graph=True),"\n")

print("\nRecursive exploration of random graph:\n")
print("\nNumber of nodes seen from node[",node_index,"]:\n",nodes[node_index].exploration_rec([],print_call_graph=True),"\n")

List of values in the graph: [22 72 38 38 93 44 97 66  0 67]

Adjacency matrix:
 [[0 1 1 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 1 0 0]
 [0 0 0 0 0 1 0 0 0 0]
 [0 0 1 0 0 0 0 0 1 0]
 [1 0 0 0 0 0 0 0 1 0]
 [0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 1 0 0 0 0]
 [1 0 0 0 0 0 0 0 1 0]]

Tree representation of the graph starting from node[ 1 ]:

|  > 72
| --- > 93
| ------ > 22
| --------- > 72
| --------- > 38
| ------------ > 44
| --------------- > 93
| ------ > 0
| --------- > 93
| --------- > 44
| --- > 66

Number of nodes seen from node[ 1 ]:
 7

Iterative exploration of random graph:

--> 72
--> 93
--> 66
<-- 72
--> 22
--> 0
<-- 93
<-- 66
--> 38
<-- 22
--> 44
<-- 0
<-- 38
<-- 44

Number of nodes seen from node[ 1 ]:
 7 


Recursive exploration of random graph:

--> 72
--> 93
--> 22
--> 38
--> 44
<-- 38
<-- 22
<-- 93
--> 0
<-- 93
<-- 72
--> 66
<-- 72

Number of nodes seen from node[ 1 ]:
 7

2. Directed Acyclic Graphs (DAG)¶

To evaluate the number of paths from a source node to a destination node, it suffices to accumulate $1$ whenever source == destination and, otherwise, to add the number of paths from each child of the source to the destination. The complexity of the operation is in the worst case similar to previously $O(n+m)$ whenever all children of all nodes need to scanned.

Function: nbPaths INPUT : self of node type , destination of node type OUTPUT : number of paths between self and destination in an acyclic graph

def nbPaths(self,destination):
    if self == destination:
        return 1
    nb_paths = 0
    for child in self.children:
        if child != None:
            nb_paths += child.nbPaths(destination)            
    return nb_paths

I use here a ugly technique to generate a random DAG: the affinity matrix of DAGs is nilpotent, so that all its eigenvalues are zeros. I generate random affinity matrices until this condition is met. Particular care must be taken to avoid sufficiently many nodes while at the same time ensuring convergence of the search.

graph_size = 25
list_values = np.random.randint(0,200,graph_size)
                                
p = .1
A = np.ones( (graph_size,graph_size) )
while(np.max(np.abs(np.linalg.eig(A)[0])) > 1e-10 ):
    A = np.random.binomial(1,p,(graph_size,graph_size))
    A = A-np.diag(np.diag(A))
    
nodes = []

print("List of values in the graph:",list_values)

for i in range(len(list_values)):
    nodes.append(G(value=list_values[i],children=[]))

for i in range(len(nodes)):
    for j in range(len(nodes)):
        if A[i,j]>0:
            nodes[i].children.append(nodes[j])            
            
print("\nAdjacency matrix:\n",A)
        
node_index = 0
print("\nTree representation of the graph starting from node[",node_index,"]:\n")   
    
nodes[node_index].print_graph(node_list=[])

List of values in the graph: [104 102  78  88  25 123  32 192 174 170 188 199 187 116 106  25   0 113
 166 166   5 177  75 180  47]

Adjacency matrix:
 [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0]
 [0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]]

Tree representation of the graph starting from node[ 0 ]:

|  > 104
| --- > 0
| ------ > 47
| --------- > 166
| --- > 180
| ------ > 106
| ------ > 113
| --------- > 170
| ------------ > 25
| --------------- > 166
| --------------- > 177
| ------------ > 188
| --------------- > 47
| --------- > 116
| ------------ > 106
| ------------ > 166
| --------------- > 25
| ------ > 166

node_s=nodes[0]
node_t=nodes[14]
print("\nNumber of paths from node[",node_s.value,"] to node[",node_t.value,"]:\n",node_s.nbPaths(node_t))

Number of paths from node[ 104 ] to node[ 106 ]:
 2

3. Colored graphs¶

To evaluate the probability of falling on two connected red nodes $N_1$ and $N_2$ in a colored graph, it suffices to observe that this probability equals: $$ \sum_{ \mathcal{C}\in \{ \text{connected components} \} } \mathbb{P} ( N_1,N_2\in\mathcal C ) \cdot \mathbb{P}(N_1,N_2\text{ red}) = \sum_{ \mathcal{C}\in \{ \text{connected components} \} } \frac{|\text{red nodes in $\mathcal C$}|(|\text{red nodes in $\mathcal C$}|-1)}{|\mathcal C|(|\mathcal C|-1)} \frac{|\mathcal C|(|\mathcal C|-1)}{|\mathcal V|(|\mathcal V|-1)} = \sum_{ \mathcal{C}\in \{ \text{connected components} \} } \frac{|\text{red nodes in $\mathcal C$}|(|\text{red nodes in $\mathcal C$}|-1)}{|\mathcal V|(|\mathcal V|-1)}.$$

It thus "suffices" to evaluate the number of red nodes in every connected component of the graph. This can be done by exploration from every node in the graph, keeping in memory the already met nodes.

Function : nbColor_from_node INPUT : self of type node, color, seen_node_list OUTPUT : number of reachable color nodes , number of seen nodes during exploration

Function : probaColor INPUT : nodes (list of nodes in graph), color, print_option OUTPUT : sample probability to draw two random connected red nodes in the graph; if print_option is True, prints out the number of red nodes and total number of nodes in each connected component of the graph.

# At G level:
def nbColor_from_node(self,color,seen_node_list=[]):
    reachable_color=0        
    if not self in set(seen_node_list):
        seen_node_list.append(self)
        if self.color==color:
            reachable_color+=1

        for child in self.children:
            if child != None:                              
                reachable_color+=child.nbColor_from_node(color,seen_node_list)[0]
    return reachable_color,len(seen_node_list)

# Independent function

def probaColor(nodes,color,print_option=False):
    probaC = 0
    seen_node_list = []
    nbTotal_running = 0
    for node in nodes:
        if node not in set(seen_node_list):
            nbColor,nbTotal = node.nbColor_from_node(color,seen_node_list)
            probaC += nbColor*(nbColor-1)/(len(nodes)*(len(nodes)-1))
            print_option and print("\n New connected component found with",nbColor,"'red' nodes out of",nbTotal-nbTotal_running,"total nodes.")
            nbTotal_running = nbTotal
    return probaC

### Random symmetric colored graph generator

graph_size = 10
list_values = np.random.randint(0,100,graph_size)

# Affinity matrix of the graph with 'p' probability of connection
p = .2
A = np.random.binomial(1,p,(graph_size,graph_size))
A = A-np.diag(np.diag(A))
A = np.tril(A)+np.tril(A).T

nodes = []

pRed = .5
for i in range(len(list_values)):
    if np.random.randint(0,2) == 0:
        color='red'
    else:
        color='white'
    nodes.append(G(value=list_values[i],children=[],color=color))


print("List of values in the graph and their colors:\n",list_values,"\n",[1*(node.color=='red') for node in nodes])    
    
for i in range(len(nodes)):
    for j in range(len(nodes)):
        if A[i,j]>0:
            nodes[i].children.append(nodes[j])
            
print("\nAdjacency matrix:\n",A)
        
node_index = 0
print("\nTree representation of the graph starting from node[",node_index,"]:\n")   
    
nodes[node_index].print_graph(node_list=[])



print("\nProba 'red'=",probaColor(nodes,'red',print_option=True))

List of values in the graph and their colors:
 [34 11 43 87 43 14 60  9 82 54] 
 [0, 1, 0, 1, 1, 0, 1, 1, 0, 0]

Adjacency matrix:
 [[0 0 1 0 1 0 1 0 0 0]
 [0 0 1 0 0 0 0 1 0 0]
 [1 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 1]
 [1 0 0 0 0 0 0 0 1 0]
 [0 0 0 1 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0]]

Tree representation of the graph starting from node[ 0 ]:

|  > 34
| --- > 43
| ------ > 34
| ------ > 11
| --------- > 43
| --------- > 9
| ------------ > 11
| --- > 43
| ------ > 34
| ------ > 82
| --------- > 43
| --- > 60
| ------ > 34

 New connected component found with 4 'red' nodes out of 7 total nodes.

 New connected component found with 1 'red' nodes out of 3 total nodes.

Proba 'red'= 0.13333333333333333

EXTRAS¶

from functools import wraps

def trace(func):
    func_name = func.__name__
    separator = '|  '

    trace.recursion_depth = 0

    @wraps(func)
    def traced_func(*args, **kwargs):

        # repeat separator N times (where N is recursion depth)
        # `map(str, args)` prepares the iterable with str representation of positional arguments
        # `", ".join(map(str, args))` will generate comma-separated list of positional arguments
        # `"x"*5` will print `"xxxxx"` - so we can use multiplication operator to repeat separator
        print(f'{separator * trace.recursion_depth}|-- {func_name}({", ".join(map(str, args))})')
        # we're diving in
        trace.recursion_depth += 1
        result = func(*args, **kwargs)
        # going out of that level of recursion
        trace.recursion_depth -= 1
        # result is printed on the next level
        print(f'{separator * (trace.recursion_depth + 1)}|-- return {result}')

        return result

    return traced_func