Last updated: 2017-09-16
Code version: f8e928e
sim_hd = function(N, P, SF, SL, signal, a = rchisq(N,3),b = rchisq(P,1),mu = 0){
E = matrix(rep(0,N*P),nrow=N)
sig2_true = matrix(rep(0,N*P),nrow=N)
for(i in 1:N){
for(j in 1:P){
sig2_true[i,j] = mu + a[i] * b[j]
E[i,j] = rnorm(1,0,sqrt(mu + a[i] * b[j]))
}
}
K=1
lstart = rnorm(N, 0, signal)
fstart = rnorm(P, 0, signal)
index = sample(seq(1:N),(N*SL))
lstart[index] = 0
index = sample(seq(1:P),(P*SF))
fstart[index] = 0
Y = lstart %*% t(fstart) + E
return(list(Y = Y, L_true = lstart, F_true = fstart, Error = E,sig2_true = sig2_true))
}
sim_K = function(K, N, P, SF, SL, signal,noise){
E = matrix(rnorm(N*P,0,noise),nrow=N)
Y = E
L_true = array(0, dim = c(N,K))
F_true = array(0, dim = c(P,K))
for(k in 1:K){
lstart = rnorm(N, 0, signal)
fstart = rnorm(P, 0, signal)
index = sample(seq(1:N),(N*SL))
lstart[index] = 0
index = sample(seq(1:P),(P*SF))
fstart[index] = 0
L_true[,k] = lstart
F_true[,k] = fstart
Y = Y + lstart %*% t(fstart)
}
return(list(Y = Y, L_true = L_true, F_true = F_true, Error = E))
}
library(flashr2)
library(softImpute)
set.seed(999)
N = 100
P = 200
dat = sim_K(K=1,N, P, SF = 0.5, SL = 0.5, signal = 3,noise = 5)
# a = rchisq(N,3)
# b = rchisq(P,1)
# data = sim_hd(N, P, SF = 0.5, SL = 0.5, signal = 1, a ,b,mu = 0)
Y = dat$Y
Y_miss = Y
Y_miss[1:50,1:100] = NA
Y_miss[51:100,101:200] = NA
Here I just run flash on this data sets four times and provide different result due to the unstable initial value.
par(mfrow = c(2,2))
for(i in 1:4){
data.miss = set_flash_data(Y_miss)
g1.miss = flash_r1(data.miss,verbose=F)
plot(as.vector(dat$L_true %*% t(dat$F_true)), as.vector(g1.miss$EL %*% t(g1.miss$EF)), xlab = "truth",ylab = "estimated", main = "vectorized LF")
}
Here is a example of use softimpute. maybe I was not using it in the right way.
Here I use default lambda
, and we can try type = "svd"
. I tried different combination and this method is still not stable.
par(mfrow = c(2,2))
for(i in 1:4){
gsoft = softImpute(Y_miss, rank.max = 1,type = "als")
sfl = (gsoft$u)
sff = gsoft$d * (gsoft$v)
plot(as.vector(dat$L_true %*% t(dat$F_true)), as.vector(sfl %*% t(sff)), xlab = "truth",ylab = "estimated", main = "vectorized LF")
}
sessionInfo()
R version 3.3.0 (2016-05-03)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: OS X 10.12.6 (unknown)
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] workflowr_0.4.0 rmarkdown_1.6 softImpute_1.4 Matrix_1.2-11
[5] PMA_1.0.9 impute_1.48.0 plyr_1.8.4 flashr2_0.1-2
loaded via a namespace (and not attached):
[1] Rcpp_0.12.12 git2r_0.19.0 iterators_1.0.8
[4] tools_3.3.0 digest_0.6.12 evaluate_0.10.1
[7] tibble_1.3.3 gtable_0.2.0 lattice_0.20-35
[10] rlang_0.1.2 foreach_1.4.3 rstudioapi_0.6
[13] yaml_2.1.14 parallel_3.3.0 stringr_1.2.0
[16] knitr_1.17 REBayes_0.85 rprojroot_1.2
[19] grid_3.3.0 irlba_2.2.1 ggplot2_2.2.1
[22] flashr_0.1.1 ashr_2.1-25 magrittr_1.5
[25] scales_0.4.1 backports_1.1.0 codetools_0.2-15
[28] htmltools_0.3.6 MASS_7.3-47 assertthat_0.2.0
[31] colorspace_1.3-2 stringi_1.1.5 Rmosek_7.1.2
[34] lazyeval_0.2.0 pscl_1.4.9 doParallel_1.0.10
[37] munsell_0.4.3 truncnorm_1.0-7 SQUAREM_2016.8-2
This R Markdown site was created with workflowr