## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ----eval = F----------------------------------------------------------------- # library(xgboost) ## ----eval = F----------------------------------------------------------------- # # create 30 diagrams from circles, tori and spheres # # up to dimension 2 # diags <- lapply(X = 1:30,FUN = function(X){ # # if(X <= 10) # { # return(TDAstats::calculate_homology(TDA::circleUnif(n = 100), # dim = 2,threshold = 2)) # } # # if(X > 10 & X <= 20) # { # return(TDAstats::calculate_homology(TDA::torusUnif(n = 100,a = 0.25,c = 0.75), # dim = 2,threshold = 2)) # } # # if(X > 20) # { # return(TDAstats::calculate_homology(TDA::sphereUnif(n = 100,d = 2), # dim = 2,threshold = 2)) # } # # }) # # # subset into two features, dimension 1 and dimension 2 # T1 <- lapply(X = diags,FUN = function(X){ # # df <- X[which(X[,1] == 1),] # if(!is.matrix(df)) # { # df <- as.data.frame(t(df)) # } # return(as.data.frame(df)) # # }) # T2 <- lapply(X = diags,FUN = function(X){ # # df <- X[which(X[,1] == 2),] # if(!is.matrix(df)) # { # df <- as.data.frame(t(df)) # } # return(as.data.frame(df)) # # }) # # # calculate max persistence of each diagram # max_pers_H1 <- unlist(lapply(X = T1,FUN = function(X){ # # return(max(X[[3]] - X[[2]])) # # })) # max_pers_H2 <- unlist(lapply(X = T2,FUN = function(X){ # # if(nrow(X) == 0) # { # return(0) # } # return(max(X[[3]] - X[[2]])) # # })) # # # create random binary vector # rand_bin <- sample(factor(c("yes","no")),size = 30,replace = T) # # # specify data labels, 0 for circle, 1 for torus, 2 for sphere # labs <- rep(c(0,1,2),each = 10) ## ----eval = F----------------------------------------------------------------- # # form non-topological feature matrix # NT <- cbind(max_pers_H1,max_pers_H2,rand_bin) # # # calculate the approximate Gram matrix for each topological feature # G1 <- gram_matrix(diagrams = T1,sigma = 0.01,dim = 1,rho = 0.0001) # G2 <- gram_matrix(diagrams = T2,sigma = 0.01,dim = 2,rho = 0.0001) # # # column bind G_i's into 30x60 feature matrix # G <- cbind(G1,G2) # # # column bind G and NT into 30x63 feature matrix # Fmat <- cbind(G,NT) # # # fit XGBoost model with maximum 50 boosting iterations # model <- xgboost(data = Fmat,label = rep(c(0,1,2),each = 10),nrounds = 50,verbose = 0, # objective = "multi:softmax",num_class = 3) ## ----eval = F----------------------------------------------------------------- # new_diags <- list(TDAstats::calculate_homology(TDA::circleUnif(n = 100), # dim = 2,threshold = 2), # TDAstats::calculate_homology(TDA::torusUnif(n = 100,a = 0.25,c = 0.75), # dim = 2,threshold = 2), # TDAstats::calculate_homology(TDA::sphereUnif(n = 100,d = 2), # dim = 2,threshold = 2)) # # # subset into two features, dimension 1 and dimension 2 # T1_prime <- lapply(X = new_diags,FUN = function(X){ # # df <- X[which(X[,1] == 1),] # if(!is.matrix(df)) # { # df <- as.data.frame(t(df)) # } # return(as.data.frame(df)) # # }) # T2_prime <- lapply(X = new_diags,FUN = function(X){ # # df <- X[which(X[,1] == 2),] # if(!is.matrix(df)) # { # df <- as.data.frame(t(df)) # } # return(as.data.frame(df)) # # }) # # # calculate max persistence of each new diagram # max_pers_H1_prime <- unlist(lapply(X = T1_prime,FUN = function(X){ # # return(max(X[,3] - X[,2])) # # })) # max_pers_H2_prime <- unlist(lapply(X = T2_prime,FUN = function(X){ # # if(nrow(X) == 0) # { # return(0) # } # return(max(X[,3] - X[,2])) # # })) # # # create random binary vector # rand_bin_prime <- sample(factor(c("yes","no")),size = 3,replace = T) ## ----eval = F----------------------------------------------------------------- # # form non-topological feature matrix # NT_prime <- cbind(max_pers_H1_prime,max_pers_H2_prime,rand_bin_prime) # # # calculate the approximate cross Gram matrix for each topological feature # G1_prime <- gram_matrix(diagrams = T1_prime,other_diagrams = T1,sigma = 0.01,dim = 1, # rho = 0.0001) # G2_prime <- gram_matrix(diagrams = T2_prime,other_diagrams = T2,sigma = 0.01,dim = 2, # rho = 0.0001) # # # column bind G_i prime's into 3x60 feature matrix # G_prime <- cbind(G1_prime,G2_prime) # # # column bind G_prime and NT_prime into 3x63 feature matrix # Fmat_prime <- cbind(G_prime,NT_prime) # # # fix column names of Fmat_prime to be the same as Fmat # colnames(Fmat_prime) <- colnames(Fmat) # # # predict data labels # stats::predict(model,Fmat_prime) ## ----echo = F----------------------------------------------------------------- c(0,1,2)