The complete R script

Here is the complete R script that we have used to demonstrate product network analysis:


library(igraph, quietly = TRUE)

## Creating a simple graph
simple.graph <- graph_from_literal(A-B, B-C, C-D, E-F, A-E, E-C)
plot.igraph(simple.graph)

## Graph properties
E(simple.graph) # Edges
V(simple.graph) # Vertices

## Graph attributes
V(simple.graph)$name <- c('alice', 'bob','charlie','david', 'eli','francis')
simple.graph <- set_vertex_attr(simple.graph ,"age", value = c('11', '11','15','9', '8','11'))
plot.igraph(simple.graph)
V(simple.graph)$color <- ifelse(V(simple.graph)$age == '11', "blue","green")
plot.igraph(simple.graph)

# Structural properties
degree(simple.graph) # degree of nodes
E(simple.graph)$weight <- c(10,20,35,15,25,35)
strength(simple.graph) # strength of nodes
get.adjacency(simple.graph) # adjacency matrix


# Delete edges and nodes
simple.graph <- delete.edges(simple.graph, "alice|bob" )
simple.graph <- delete.vertices(simple.graph, 'francis')
plot(simple.graph)


# use case date
data <- read.csv('data.csv')
head(data)

## Prepare the data
library(arules)
transactions.obj <- read.transactions(file = 'data.csv', format = "single",
sep = ",",
cols = c("order_id", "product_id"),
rm.duplicates = FALSE,
quote = "", skip = 0,
encoding = "unknown")
transactions.obj

# Interest Measures
support <- 0.015

# Frequent item sets
parameters = list(
support = support,
minlen = 2, # Minimal number of items per item set
maxlen = 2, # Maximal number of items per item set
target = "frequent itemsets"
)

freq.items <- apriori(transactions.obj, parameter = parameters)

# Let us examine our freq item sites
freq.items.df <- data.frame(item_set = labels(freq.items)
, support = freq.items@quality)
freq.items.df$item_set <- as.character(freq.items.df$item_set)
head(freq.items.df)

# Clean up for item pairs
library(tidyr)
freq.items.df <- separate(data = freq.items.df, col = item_set, into = c("item.1", "item.2"), sep = ",")
freq.items.df[] <- lapply(freq.items.df, gsub, pattern='\{', replacement='')
freq.items.df[] <- lapply(freq.items.df, gsub, pattern='\}', replacement='')
head(freq.items.df)

# Prepare data for graph
network.data <- freq.items.df[,c('item.1','item.2','support.count')]
names(network.data) <- c("from","to","weight")
head(network.data)

## Build the graph
library(igraph, quietly = TRUE)
set.seed(29)
my.graph <- graph_from_data_frame(network.data)
plot.igraph(my.graph,
layout=layout.fruchterman.reingold,
vertex.label.cex=.5,
edge.arrow.size=.1)


## Clustering
random.cluster <- walktrap.community(my.graph)
str(random.cluster)
random.cluster
groupings.df <- data.frame(products = random.cluster$names, group = random.cluster$membership)
head(groupings.df)
groupings.df[groupings.df$group == 2,]
groupings.df[groupings.df$group == 1,]

plot(random.cluster,my.graph,
layout=layout.fruchterman.reingold,
vertex.label.cex=.5,
edge.arrow.size=.1)

get.adjacency(my.graph)
strength(my.graph)
degree(my.graph)

## Random walks
for(var in seq(1,5)){
print (random_walk(my.graph, start = "Banana", steps = 2))
}

random_walk(my.graph, start = "Banana", steps = 3)
random_walk(my.graph, start = "Banana", steps = 4)
random_walk(my.graph, start = "Banana", steps = 5)

RShiny app code:

library(shiny)
library(arules)
library(igraph, quietly = TRUE)


server <- function(input, output) {


trans.obj <- reactive({
data <- input$datafile
transactions.obj <- read.transactions(file = data$datapath, format = "single",
sep = ",",
cols = c("order_id", "product_id"),
rm.duplicates = FALSE,
quote = "", skip = 0,
encoding = "unknown")
transactions.obj


})

trans.df <- reactive({

data <- input$datafile
if(is.null(data)){return(NULL)}
trans.df <- read.csv(data$datapath)
return(trans.df)
})

network.data <- reactive({
transactions.obj <- trans.obj()
support <- 0.015

# Frequent item sets
parameters = list(
support = support,
minlen = 2, # Minimal number of items per item set
maxlen = 2, # Maximal number of items per item set
target = "frequent itemsets"
)

freq.items <- apriori(transactions.obj, parameter = parameters)

# Let us examine our freq item sites
freq.items.df <- data.frame(item_set = labels(freq.items)
, support = freq.items@quality)
freq.items.df$item_set <- as.character(freq.items.df$item_set)

# Clean up for item pairs
library(tidyr)
freq.items.df <- separate(data = freq.items.df, col = item_set, into = c("item.1", "item.2"), sep = ",")
freq.items.df[] <- lapply(freq.items.df, gsub, pattern='\{', replacement='')
freq.items.df[] <- lapply(freq.items.df, gsub, pattern='\}', replacement='')

# Prepare data for graph
network.data <- freq.items.df[,c('item.1','item.2','support.count')]
names(network.data) <- c("from","to","weight")
return(network.data)

})

output$transactions <- renderDataTable({
trans.df()
})
output$ppairs <- renderDataTable({

network.data()

})

output$community <- renderPlot({
network.data <- network.data()
my.graph <- graph_from_data_frame(network.data)
random.cluster <- walktrap.community(my.graph)
plot(random.cluster,my.graph,
layout=layout.fruchterman.reingold,
vertex.label.cex=.5,
edge.arrow.size=.1,height = 1200, width = 1200)
})


}

ui <- fluidPage(
navbarPage("Product Pairs",
tabPanel("Transactions"
, fileInput("datafile", "select transactions csv file",
accept = c(
"text/csv",
"text/comma-separated-values,text/plain",
".csv"
)
)
, dataTableOutput("transactions")
),
tabPanel("Product Pairs"
,dataTableOutput("ppairs")),
tabPanel("Community"
,plotOutput("community"))
)
)

shinyApp(ui = ui, server = server)
..................Content has been hidden....................

You can't read the all page of ebook, please click here login for view all page.
Reset