Last updated: 2018-02-11
Code version: d53227e
creat a file by mkdir Breastcancer
and put the data file into this folder. in this case, we use matlab function as well, so we use .mat
file for the data.
creat a folder to track the code mkdir outlog
miss_gen = function(Y, percent = 0.2){
N = dim(Y)[1]
P = dim(Y)[2]
colindex = sample(P,(P*percent))
rowindex = sample(N,(N*percent))
missindex = list(colindex = colindex, rowindex = rowindex)
return(missindex)
}
# methods
flashG_wrapper = function(Y, missindex, rank_max,type = "constant"){
# delete the value in the missing position
Y_miss = Y
Y_miss[missindex$rowindex, missindex$colindex] = NA
data = flashr::flash_set_data(Y_miss)
g_flash = flashr::flash_add_greedy(data,Kmax=rank_max,ash_param=list(method = "fdr"),var_type = type)
misshat_flash = g_flash$EL %*% t(g_flash$EF)
rmse = sqrt(mean((Y[missindex$rowindex, missindex$colindex] - misshat_flash[missindex$rowindex, missindex$colindex])^2))
return(rmse)
}
flashB_wrapper = function(Y, missindex, rank_max,type = "constant"){
# delete the value in the missing position
Y_miss = Y
Y_miss[missindex$rowindex, missindex$colindex] = NA
data = flashr::flash_set_data(Y_miss)
g_flash = flashr::flash_add_greedy(data,Kmax=rank_max,ash_param=list(method = "fdr"),var_type = type)
b_flash = flash_backfit(data,g_flash)
misshat_flash = b_flash$EL %*% t(b_flash$EF)
rmse = sqrt(mean((Y[missindex$rowindex, missindex$colindex] - misshat_flash[missindex$rowindex, missindex$colindex])^2))
return(rmse)
}
softImpute_wrapper = function(Y, missindex, rank_max){
# delete the value in the missing position
Y_miss = Y
Y_miss[missindex$rowindex, missindex$colindex] = NA
gsoft = softImpute::softImpute(Y_miss, rank.max = rank_max, lambda = 30)
misshat_soft = gsoft$u %*% diag(gsoft$d) %*% t(gsoft$v)
rmse = sqrt(mean((Y[missindex$rowindex, missindex$colindex] - misshat_soft[missindex$rowindex, missindex$colindex])^2))
return(rmse)
}
PMD_wrapper = function(Y, missindex, rank_max){
# delete the value in the missing position
Y_miss = Y
Y_miss[missindex$rowindex, missindex$colindex] = NA
out = PMA::PMD(Y_miss, K = rank_max)
misshat_PMD = out$u %*% diag(out$d) %*% t(out$v)
rmse = sqrt(mean((Y[missindex$rowindex, missindex$colindex] - misshat_PMD[missindex$rowindex, missindex$colindex])^2))
return(rmse)
}
library(R.matlab)
# call the R functions
source("~/HG/flash/data/CVmissflashr/template/Rfunction.R")
# read the data
# you need to change the path for different data
Y_data = R.matlab::readMat("~/HG/flash/data/CVmissflashr/Breastcancer/example.mat")
Y = Y_data$Y
N = dim(Y)[1]
P = dim(Y)[2]
Y = Y - rowMeans(Y) %*% t(rep(1,P))
result = c()
missindex = miss_gen(Y, percent = 0.1)
# creat a folder for the output and matlab in the folder we make
if(file.exists("NBSFAout")){
unlink("NBSFAout", recursive= T)
}
system("mkdir NBSFAout")
# write the missing index
writeMat("./NBSFAout/missindex.mat", missindex = missindex)
flashG_rmse = flashG_wrapper(Y, missindex, rank_max = 50)
pmd_rmse = PMD_wrapper(Y, missindex, rank_max= 50 )
soft_rmse = softImpute_wrapper(Y, missindex, rank_max = 50)
# then call the matlab estiamtion
# this for the folder of the data
system("matlab -nosplash -nodesktop -r \"addpath(\'~/HG/flash/data/CVmissflashr/template\'); run(\'missvalue.m\');exit;\" ")
g_nsfa = readMat("./NBSFAout/NSFAresult.mat")
nsfa_rmse = as.vector(g_nsfa$rmse.NSFA)
result = c(flashG_rmse, nsfa_rmse,pmd_rmse,soft_rmse)
saveRDS(result, "./NBSFAout/output.rds")
addpath('~/HG/flash/data/missingvalue/methods/NBSF/nsfa-master/');
addpath('~/HG/flash/data/missingvalue/methods/NBSF/nsfa-master/utils/');
% you need to change the path and the data file (not centered)
load ../example.mat;
Ycentered=Y-repmat(mean(Y,2),1,size(Y,2));
settings=defaultsettings();
[settings.D,settings.N]=size(Ycentered);
settings.iterations=100;
% now the missing is decide by the this is the missing index.(or you can upload the missing matrix)
file_path = fullfile(pwd,'NBSFAout/missindex.mat');
load(file_path);
mvmask = ones(settings.D,settings.N);
mvmask(missindex.rowindex,missindex.colindex) = 0;
initialsample=init_nsfa(settings);
[finalsample,resultstable]=nsfa(Ycentered,mvmask,initialsample,settings);
Y_hat = (finalsample.G * finalsample.X);
sse_NSFA = sum(sum((Ycentered(missindex.rowindex,missindex.colindex) - Y_hat(missindex.rowindex,missindex.colindex)).^2));
mse_NSFA = sse_NSFA/(length(missindex.rowindex)*length(missindex.colindex))
rmse_NSFA = sqrt(mse_NSFA)
save_path = fullfile(pwd,'NBSFAout/NSFAresult');
save(save_path,'rmse_NSFA');
#!/bin/bash
#SBATCH --job-name=arrayJob
#SBATCH --output=/home/weidong/HG/flash/data/CVmissflashr/Breastcancer/outlog/arrayJob_%A_%a.out
#SBATCH --error=/home/weidong/HG/flash/data/CVmissflashr/Breastcancer/outlog/arrayJob_%A_%a.err
#SBATCH --array=1-100
#SBATCH --time=06:00:00
#SBATCH --partition=broadwl
#SBATCH --ntasks=1
#SBATCH --mem-per-cpu=6000
######################
# Begin work section #
######################
# Print this sub-job's task ID
cd /home/weidong/HG/flash/data/CVmissflashr/Breastcancer
mkdir test${SLURM_ARRAY_TASK_ID}
cd test${SLURM_ARRAY_TASK_ID}
Rscript --verbose /home/weidong/HG/flash/data/CVmissflashr/Breastcancer/runR.R
sbatch Jobs.sbatch
T = 100
results = matrix(NA,ncol = 4, nrow = T)
for(i in 1:T){
test_folder = paste("test", i, sep = "")
out_folder = "NBSFAout"
out_file = "output.rds"
file_name = file.path(test_folder,out_folder,out_file)
results[i,] = try(readRDS(file_name))
}
sessionInfo()
R version 3.3.0 (2016-05-03)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: OS X 10.13.3 (unknown)
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] reshape2_1.4.3 flashr_0.4-6 workflowr_0.4.0 rmarkdown_1.6
[5] ggplot2_2.2.1 R.matlab_3.6.1 softImpute_1.4 Matrix_1.2-11
[9] PMA_1.0.9 impute_1.48.0 plyr_1.8.4 ssvd_1.0
loaded via a namespace (and not attached):
[1] Rcpp_0.12.14 git2r_0.19.0 R.methodsS3_1.7.1
[4] R.utils_2.5.0 iterators_1.0.9 tools_3.3.0
[7] digest_0.6.13 memoise_1.1.0 evaluate_0.10.1
[10] tibble_1.3.4 gtable_0.2.0 lattice_0.20-35
[13] rlang_0.1.6 foreach_1.4.4 rstudioapi_0.6
[16] curl_2.8.1 yaml_2.1.16 parallel_3.3.0
[19] gridExtra_2.3 httr_1.3.0 withr_2.1.1
[22] stringr_1.2.0 knitr_1.18 devtools_1.13.3
[25] rprojroot_1.2 grid_3.3.0 R6_2.2.2
[28] flashr2_0.4-0 ashr_2.2-3 magrittr_1.5
[31] scales_0.4.1 backports_1.1.2 codetools_0.2-15
[34] htmltools_0.3.6 MASS_7.3-47 colorspace_1.3-2
[37] labeling_0.3 stringi_1.1.6 lazyeval_0.2.0
[40] doParallel_1.0.11 munsell_0.4.3 pscl_1.5.2
[43] truncnorm_1.0-7 SQUAREM_2017.10-1 R.oo_1.21.0
This R Markdown site was created with workflowr