## Identify click sprees ("sessions") based on how far away clicks are in time

identify_clicksprees <- . %>%

arrange(userid, campaignid, datumzeit) %>%

group_by(userid, campaignid) %>%

mutate(

difftime = datumzeit-lag(datumzeit, default = 0),

difftime = as.numeric(`units<-`(difftime, "secs"))) %>%

mutate(clickspree_id = cumsum(difftime > 60*20)) %>% # X sekunden x Y Minuten

group_by(clickspree_id, add = T) %>%

mutate(

clickspree_nclicks = n(),

clickspree_nmins = abs(Reduce(`-`, range(datumzeit))),

clickspree_nmins = as.numeric(`units<-`(clickspree_nmins, "mins")),

clickspree_ageDays = difftime(Sys.time(), min(datumzeit)),

clickspree_ageDays = as.numeric(`units<-`(clickspree_ageDays, "days"))) %>%

ungroup

identify_clicksprees <- . %>%

arrange(userid, campaignid, datumzeit) %>%

group_by(userid, campaignid) %>%

mutate(

difftime = datumzeit-lag(datumzeit, default = 0),

difftime = as.numeric(`units<-`(difftime, "secs"))) %>%

mutate(clickspree_id = cumsum(difftime > 60*20)) %>% # X sekunden x Y Minuten

group_by(clickspree_id, add = T) %>%

mutate(

clickspree_nclicks = n(),

clickspree_nmins = abs(Reduce(`-`, range(datumzeit))),

clickspree_nmins = as.numeric(`units<-`(clickspree_nmins, "mins")),

clickspree_ageDays = difftime(Sys.time(), min(datumzeit)),

clickspree_ageDays = as.numeric(`units<-`(clickspree_ageDays, "days"))) %>%

ungroup

For the clustering, I used:

## Cluster the timestamps of a user

#'

#' @param datumzeit POSIXct, response timestamp

#' @param eps DBSCAN Epsilon, maximum distance for merging points into clusters

#' @param othervars properly scaled matrix of other variables to include into clustering, besides the time of day of the response (e.g. age of click, weekday, ...)

getClusters <- function(datumzeit, eps = 0.5, othervars = NULL) {

# datumzeit <- seq(as.POSIXct("2017-12-18 00:00:00"), as.POSIXct("2017-12-18 23:00:00"), "1 hour")

h <- as.hms(datumzeit)

h <- hour(h)+minute(h)/60

ha <- 2*pi*h/24

m <- cbind(x = sin(ha), y = cos(ha))

# as_data_frame(m) %>% ggplot(aes(x,y)) + geom_point() + theme_minimal()

# data.frame(x = 0:23) %>% ggplot(aes(x)) + geom_rug() + theme_minimal() + theme(panel.grid = element_blank())

if (!is.null(othervars)) m <- cbind(m, othervars)

res <- dbscan(m, c("-E", eps, "-M", 1))

return(res$class_ids)

}

#'

#' @param datumzeit POSIXct, response timestamp

#' @param eps DBSCAN Epsilon, maximum distance for merging points into clusters

#' @param othervars properly scaled matrix of other variables to include into clustering, besides the time of day of the response (e.g. age of click, weekday, ...)

getClusters <- function(datumzeit, eps = 0.5, othervars = NULL) {

# datumzeit <- seq(as.POSIXct("2017-12-18 00:00:00"), as.POSIXct("2017-12-18 23:00:00"), "1 hour")

h <- as.hms(datumzeit)

h <- hour(h)+minute(h)/60

ha <- 2*pi*h/24

m <- cbind(x = sin(ha), y = cos(ha))

# as_data_frame(m) %>% ggplot(aes(x,y)) + geom_point() + theme_minimal()

# data.frame(x = 0:23) %>% ggplot(aes(x)) + geom_rug() + theme_minimal() + theme(panel.grid = element_blank())

if (!is.null(othervars)) m <- cbind(m, othervars)

res <- dbscan(m, c("-E", eps, "-M", 1))

return(res$class_ids)

}

I also used the following libraries:

Sys.setenv("WEKA_HOME"="C:\\Users\\Rene\\Weka")

library("rJava")

library("RWekajars")

library("RWeka")

WPM("load-package", "optics_dbScan")

dbscan <- make_Weka_clusterer('weka/clusterers/DBSCAN')

library(tidyverse)

library(lubridate)

library(hms)

library("rJava")

library("RWekajars")

library("RWeka")

WPM("load-package", "optics_dbScan")

dbscan <- make_Weka_clusterer('weka/clusterers/DBSCAN')

library(tidyverse)

library(lubridate)

library(hms)

Let me know if you have questions. The code is a bit messy… I also left in some commented debugging things.

]]>I, also, seek an animated flashing red dot (or red cop car flasher) emoji for email subject lines.

]]>Sorry for late reply. Thank you so much!

I would be more than happy to see you analysis from 3, 4 and result part from your analysis, if possible.

Since we have less experience on this project, it would be good reference for us.

Let’s keep in touch by email! Thanks. ]]>

I am a master student majoring data analytics in Santa Clara Leavey School of Business. It was very impressive to see your analysis. Actually, as a school project, I am thinking of finding a best algorithm for STO. But, it was hard to find any related code resources. Would you mind sharing your R analysis? After I finish my research, I will share mine as well. If you are not willing to share it, it’s fine as well.

Thank you so much! ]]>

Would you mind sharing some of the (available to the general public) providers that allow you per user delivery times?

Which tools (marketing automation) can be used to calculate and/or set those times?

TY!

]]>