A graph is a concise data structure for representing relationships in the form of networks. Graphs consist of nodes, the objects of the network, and edges, the relationships. Below you will find an example of how to define a graph based on an adjacency matrix, and how to compute properties of the graph.
#c combines numbers/characters into a vector
#rbind builds matrices by combining vectors row-wise
adj <- rbind(c(0,1,0,0,0,1),
c(1,0,1,1,0,0),
c(0,1,0,1,0,0),
c(0,1,1,0,1,1),
c(0,0,0,1,0,1),
c(1,0,0,1,1,0)
)
graph <- from_adj_matrix( #function from DiagrammeR for building graph from an adjacency matrix
adj,
mode = "undirected"
)
render_graph(graph) #function from DiagrammeR visualizing a graph
apply(adj,1,sum) #Counts number of edges (1's) across each row of the adjacency matrix
## [1] 2 3 2 4 2 3
#For an undirected graph # of neighbors is the same as node degree
nNeigh <- rep(0,nrow(adj)) #Initializes variable for storing the number of neighbors
for(i in 1:nrow(adj)){#Loops over nodes
nNeigh[i] <- sum(adj[i,]==1|adj[,i]==1) #For each node counts the number of nodes that shares an edge
}
cc <- rep(0,nrow(adj)) #Initializes variable for storing the clustering coef
for(i in 1:nrow(adj)){#Loops over nodes
if(nNeigh[i]>1){ # If the node has more than one neighbor calculate clustering coef
neigh <- which(adj[i,]==1|adj[,i]==1) #Identifies which nodes share an edge with the current node
nMat <- adj[neigh,neigh] #Generates an adjacency matrix (subgraph) describing the edges between the neighbors
cc[i] <- (sum(nMat)/2)/(nNeigh[i]*(nNeigh[i]-1)/2) # Computes clustering coef for current node
} else{#Otherwise assign zero as the value
cc[i] <- 0
}
}
round(cc,digits = 2) #Round clust coef to 2 significant digits
## [1] 0.00 0.33 1.00 0.33 1.00 0.33
mean(cc) #Averages the clustering coeficients for the nodes
## [1] 0.5
sd <- matrix(0,nrow = nrow(adj),ncol = ncol((adj))) #Initializes variable for shortest distance
adjPow <- adj #Initalizes matrix for adjacency matrix raised to a power
for(pow in 1:nrow(adj)){ #Loops over possible powers/lengths
#Checks if the shortest distance has been identified and if atleast one path of the current length exists between nodes. If so, the current length/lower is store in the shortest distance matrix.
sd[adjPow!=0 & sd==0] <- pow
adjPow <- adjPow%*%adj #Increase the power adj matrix with matraix multiplication with the adj matrix
}
#Any node combination that still has a shortest distance of 0 doesn't have a path and therefore has an infinite distance.
sd[(lower.tri(sd)|upper.tri(sd)) & sd==0] <- Inf
sd
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 2 1 2 2 2 1
## [2,] 1 2 1 1 2 2
## [3,] 2 1 2 1 2 2
## [4,] 2 1 1 2 1 1
## [5,] 2 2 2 1 2 1
## [6,] 1 2 2 1 1 2
max(sd) #The diameter is the max of the shortest distances
## [1] 2
deg <- apply(adj,1,sum) #Calculates node degree
nodeDegs <- unique(deg) #Identifies unique values of node degree
propN <- rep(0,length(nodeDegs)) #Initializes variable for proportion of nodes with each degree value
for(i in 1:length(nodeDegs)){# Loops over unique node degree values
propN[i] <- sum(deg==nodeDegs[i])/nrow(adj) #Computes nodes with a given degree
}
logD <- log(nodeDegs) #Log of node degrees
logP <- log(propN) #Log proportion of nodes with each degree
model <- lm(logP~logD) #Trains a linear trend line in log-log space
#Plots degree distribution on log-log plot
plot(nodeDegs,propN,log="xy",cex = 1.5, pch = 16,col="blue",xlab="Degree of node",ylab="Fraction of nodes")
lines(nodeDegs, exp(predict(model, newdata=list(logD=logD)))) #Plots trend line