r - distribution comparison for large number of data sets -
i have data named "data" this:
centre_blinded val_list 1 1104 c(-13, -1, 0, 28, -88, 28, -1, -6, -5, -58, 28, 28, 28, 28, 2, 0, 28, -26, 28, 28, 2, 28, 28, -2, -29, 0, 28, -34, -6, 0, 28, 1, 0, 0, -1, 28, 28, 0, 28, 6, 28, 0, 28, 28, 28, 0, -2, -6, -1, 4, 6, 1, -16, -7, 2, 3, 7, 0, 1, 11, 0, 1, -6, -5, 0, 3, 8, 7, 0, 0, 6, -6, 2, 36, -8, 0, -7, -7, -1, -1, -1, 7, -3, 7, 2) 2 1204 c(2, -9, 28, 28, -2, 1, -3, -1, 0, 28, 28, 28, 28, 28, 28, 3, 10, -5, -8, 9, -8, 0, 13, 0, -1, 2, -1, 0, 6, 1, 0, -7, 6, -6, 1) 3 1403 c(0, 2, 0, 2, 28, 0, -1, -35, -36, 2, 1, 1, 28, 28, 28, 0, 0, 28, -7, -35, 28, -3, -18, 28, 28, 28, -5, 0, 28, -2, 4, 5, 0, 56, 1, 0, 1, -7, -20, 0, 0, -3, 0, 1, 3, 0, 4, -2, 42, -13, 7, 10, 7, 56, 0, -5, 10, 56, 8, 56, 84, -4, 1, 0, -14, -7, -1, -48, -6, -3, 0, 7) 4 1110 c(0, 1, 0, -3, 28, 28, 0, -5, 0, 9, 15, 56, -11, -1, -7)
the first column containts id of centre, , second contains list of values. wanto build empirical distributions these values each centre , compare them pairwaise using e.g. kolmogorov-smirnov test (ks.test in r). way n x n matrix of p-values. question how , preserve ids of centres each k-s test.
my try was:
val_list_temp = as.list(data, = "centre_blinded")) val_list = val_list_temp[[2]] names(val_list) = val_list_temp[[1]]
here have ids each centre, when use expand.grid don't no how store them more:
val_table = as.data.table(expand.grid(val_list, val_list)) ks_tests = apply(x = val_table, 1, function(x) ks.test(unlist(x[1]),unlist(x[2])))
besides, how put in matrix later, visualisation?
p.s. maybe there better way using kolmogorov-smirnov test , making matrix of p-values?
is type of solution looking for? loops 6 times (4 choose 2) , makes matrix symmetrical.
#data provided a1104 <- c(-13, -1, 0, 28, -88, 28, -1, -6, -5, -58, 28, 28, 28, 28, 2, 0, 28, -26, 28, 28, 2, 28,28, -2, -29, 0, 28, -34, -6, 0, 28, 1, 0, 0, -1, 28, 28, 0, 28, 6, 28, 0, 28, 28, 28, 0, -2, -6, -1, 4, 6, 1, -16, -7, 2, 3, 7, 0, 1, 11, 0, 1, -6, -5, 0, 3, 8, 7, 0, 0, 6, -6, 2, 36, -8, 0, -7, -7, -1, -1, -1, 7, -3, 7, 2) a1204 <- c(2, -9, 28, 28, -2, 1, -3, -1, 0, 28, 28, 28, 28, 28, 28, 3, 10, -5, -8, 9, -8, 0, 13, 0, -1, 2, -1, 0, 6, 1, 0, -7, 6, -6, 1) a1403 <- c(0, 2, 0, 2, 28, 0, -1, -35, -36, 2, 1, 1, 28, 28, 28, 0, 0, 28, -7, -35, 28, -3, -18, 28, 28, 28, -5, 0, 28, -2, 4, 5, 0, 56, 1, 0, 1, -7, -20, 0, 0, -3, 0, 1, 3, 0, 4, -2, 42, -13, 7, 10, 7, 56, 0, -5, 10, 56, 8, 56, 84, -4, 1, 0, -14, -7, -1, -48, -6, -3, 0, 7) a1110 <- c(0, 1, 0, -3, 28, 28, 0, -5, 0, 9, 15, 56, -11, -1, -7) data <- list(a1104,a1204,a1403,a1110) names(data) <- c("a1104","a1204","a1403","a1110") #provide combinations want compare varcomb = combn(1:length(data), 2) #create matrix of zeros , populate ks.test result.matrix <- matrix(0,nrow=length(data),ncol=length(data)) for(i in 1:(dim(varcomb)[2])){ result.matrix[varcomb[1,i],varcomb[2,i]] <- ks.test(data[[varcomb[1,i]]],data[[varcomb[2,i]]])$p.value } result.matrix <- result.matrix+t(result.matrix) row.names(result.matrix) <- names(data) colnames(result.matrix) <- names(data) result.matrix
Comments
Post a Comment