Here is the complete R script that we have used to demonstrate product network analysis:
library(igraph, quietly = TRUE)
## Creating a simple graph
simple.graph <- graph_from_literal(A-B, B-C, C-D, E-F, A-E, E-C)
plot.igraph(simple.graph)
## Graph properties
E(simple.graph) # Edges
V(simple.graph) # Vertices
## Graph attributes
V(simple.graph)$name <- c('alice', 'bob','charlie','david', 'eli','francis')
simple.graph <- set_vertex_attr(simple.graph ,"age", value = c('11', '11','15','9', '8','11'))
plot.igraph(simple.graph)
V(simple.graph)$color <- ifelse(V(simple.graph)$age == '11', "blue","green")
plot.igraph(simple.graph)
# Structural properties
degree(simple.graph) # degree of nodes
E(simple.graph)$weight <- c(10,20,35,15,25,35)
strength(simple.graph) # strength of nodes
get.adjacency(simple.graph) # adjacency matrix
# Delete edges and nodes
simple.graph <- delete.edges(simple.graph, "alice|bob" )
simple.graph <- delete.vertices(simple.graph, 'francis')
plot(simple.graph)
# use case date
data <- read.csv('data.csv')
head(data)
## Prepare the data
library(arules)
transactions.obj <- read.transactions(file = 'data.csv', format = "single",
sep = ",",
cols = c("order_id", "product_id"),
rm.duplicates = FALSE,
quote = "", skip = 0,
encoding = "unknown")
transactions.obj
# Interest Measures
support <- 0.015
# Frequent item sets
parameters = list(
support = support,
minlen = 2, # Minimal number of items per item set
maxlen = 2, # Maximal number of items per item set
target = "frequent itemsets"
)
freq.items <- apriori(transactions.obj, parameter = parameters)
# Let us examine our freq item sites
freq.items.df <- data.frame(item_set = labels(freq.items)
, support = freq.items@quality)
freq.items.df$item_set <- as.character(freq.items.df$item_set)
head(freq.items.df)
# Clean up for item pairs
library(tidyr)
freq.items.df <- separate(data = freq.items.df, col = item_set, into = c("item.1", "item.2"), sep = ",")
freq.items.df[] <- lapply(freq.items.df, gsub, pattern='\{', replacement='')
freq.items.df[] <- lapply(freq.items.df, gsub, pattern='\}', replacement='')
head(freq.items.df)
# Prepare data for graph
network.data <- freq.items.df[,c('item.1','item.2','support.count')]
names(network.data) <- c("from","to","weight")
head(network.data)
## Build the graph
library(igraph, quietly = TRUE)
set.seed(29)
my.graph <- graph_from_data_frame(network.data)
plot.igraph(my.graph,
layout=layout.fruchterman.reingold,
vertex.label.cex=.5,
edge.arrow.size=.1)
## Clustering
random.cluster <- walktrap.community(my.graph)
str(random.cluster)
random.cluster
groupings.df <- data.frame(products = random.cluster$names, group = random.cluster$membership)
head(groupings.df)
groupings.df[groupings.df$group == 2,]
groupings.df[groupings.df$group == 1,]
plot(random.cluster,my.graph,
layout=layout.fruchterman.reingold,
vertex.label.cex=.5,
edge.arrow.size=.1)
get.adjacency(my.graph)
strength(my.graph)
degree(my.graph)
## Random walks
for(var in seq(1,5)){
print (random_walk(my.graph, start = "Banana", steps = 2))
}
random_walk(my.graph, start = "Banana", steps = 3)
random_walk(my.graph, start = "Banana", steps = 4)
random_walk(my.graph, start = "Banana", steps = 5)
RShiny app code:
library(shiny)
library(arules)
library(igraph, quietly = TRUE)
server <- function(input, output) {
trans.obj <- reactive({
data <- input$datafile
transactions.obj <- read.transactions(file = data$datapath, format = "single",
sep = ",",
cols = c("order_id", "product_id"),
rm.duplicates = FALSE,
quote = "", skip = 0,
encoding = "unknown")
transactions.obj
})
trans.df <- reactive({
data <- input$datafile
if(is.null(data)){return(NULL)}
trans.df <- read.csv(data$datapath)
return(trans.df)
})
network.data <- reactive({
transactions.obj <- trans.obj()
support <- 0.015
# Frequent item sets
parameters = list(
support = support,
minlen = 2, # Minimal number of items per item set
maxlen = 2, # Maximal number of items per item set
target = "frequent itemsets"
)
freq.items <- apriori(transactions.obj, parameter = parameters)
# Let us examine our freq item sites
freq.items.df <- data.frame(item_set = labels(freq.items)
, support = freq.items@quality)
freq.items.df$item_set <- as.character(freq.items.df$item_set)
# Clean up for item pairs
library(tidyr)
freq.items.df <- separate(data = freq.items.df, col = item_set, into = c("item.1", "item.2"), sep = ",")
freq.items.df[] <- lapply(freq.items.df, gsub, pattern='\{', replacement='')
freq.items.df[] <- lapply(freq.items.df, gsub, pattern='\}', replacement='')
# Prepare data for graph
network.data <- freq.items.df[,c('item.1','item.2','support.count')]
names(network.data) <- c("from","to","weight")
return(network.data)
})
output$transactions <- renderDataTable({
trans.df()
})
output$ppairs <- renderDataTable({
network.data()
})
output$community <- renderPlot({
network.data <- network.data()
my.graph <- graph_from_data_frame(network.data)
random.cluster <- walktrap.community(my.graph)
plot(random.cluster,my.graph,
layout=layout.fruchterman.reingold,
vertex.label.cex=.5,
edge.arrow.size=.1,height = 1200, width = 1200)
})
}
ui <- fluidPage(
navbarPage("Product Pairs",
tabPanel("Transactions"
, fileInput("datafile", "select transactions csv file",
accept = c(
"text/csv",
"text/comma-separated-values,text/plain",
".csv"
)
)
, dataTableOutput("transactions")
),
tabPanel("Product Pairs"
,dataTableOutput("ppairs")),
tabPanel("Community"
,plotOutput("community"))
)
)
shinyApp(ui = ui, server = server)