# Started 4/2/99 by Michael A. Newton ######################################################################### # Code to implement Markov chain Monte Carlo for the instability-selection # model described in Technical Report 135 from the Department of # Biostatistics and Medical Informatics, University of Wisconsin, Madison. # Contact newton@stat.wisc.edu for questions or comments. ######################################################################### # The prior is uniform and independent on (delta,xs,eta) # where eta <- (omega-delta)/(1-delta); and xs. # This uniformity induces a non-uniform prior on omega=delta+eta(1-delta) #--------------------------------------------- # Data foo <- dget("pro.data") # Prostate #foo <- dget("fjp.data") # Juvenile Polyposis coli #foo <- dget("thy.data") # Thyroid realdat <- foo$data # loss information eps <- 0.000000001 pos <- c( 0, foo$pos+eps, 1 ) # marker postions & tips # nmark <- ncol(realdat) nn <- length(pos) # nmark + 2 ncols <- nmark + 3 # number of columns in main imputed data array ntum <- nrow(realdat) #--------------------------------------------- # Some MCMC run characteristics # Typical production run magnitude #mcmc <- list( nskip=500, # block size for each saved state # nsave=2000, # number of saved states # eps1=0.003 ) # 1/2 window width for delta update # Typical size for initial testing mcmc <- list( nskip=5, # block size for each saved state nsave=10, # number of saved states eps1=0.003 ) # 1/2 window width for delta update # Prepare for update mechanism for xs, the suppressor gene position source("s.q") #--------------------------------------------- # Prior information # # xs ~ Uniform(0,1) # delta ~ Uniform(0,1) # eta ~ Uniform(0,1) where eta = (omega-delta)/(1-delta) (unconstrained) # lambda ~ Exponential( lambda0 ) # all independent lam0 <- 10 #--------------------------------------------- # Starting configuration (shouldn't matter) # # 1. parameter values lambda <- 2 delta <- 0.4 # Current marker interval; there are, nmark+1 one intervals within which xs may land interval <- max( (1:nn)[xs >= pos] ) # 2. LOH status at xs (imputed) los <- matrix( sample( c(0,1), size=ntum, replace=T ), ntum, 1 ) # 3. Missing and tip data lohdat <- cbind( rep(-1,ntum), realdat, rep(-1,ntum) ) noninf <- matrix(F,ntum,nmark+2) noninf[lohdat<0] <- T lohdat[noninf] <- 1 # one possible starting configuration # second version including los: These objects contain entire imputed data structure ord <- order( c(xs,pos) ) loh <- cbind( los, lohdat )[,ord] alltrue <- rep(T,ntum) nodat <- ( cbind( alltrue, noninf ) )[,ord] sloc <- (1:(ncols))[ord==1] # column location of Loh(Xs) must be 10,0->1,1->0,1->1 # and so order is important return( c( sum((1-x)*(1-y)),sum((1-x)*y),sum(x*(1-y)),sum(x*y)) ) } counts <- matrix(NA,4,ncols-1) # move to the right of xs for( i in sloc:(ncols-1) ) { counts[,i] <- trans( loh[,i], loh[,(i+1)] ) } # move to the left of xs for( i in sloc:2 ) { counts[,(i-1)] <- trans( loh[,i], loh[,(i-1)] ) } cstar <- counts # Initialize transition probabilities tprob <- matrix(NA,4,ncols-1) foo <- sort( c(xs,pos) ) gaps <- foo[2:ncols] - foo[1:(ncols-1)] foo <- 1-exp(-lambda*gaps) tprob[1,] <- 1 - delta*foo tprob[2,] <- delta*foo tprob[3,] <- (1-delta)*foo tprob[4,] <- 1-(1-delta)*foo tprobstar <- tprob #---------------------------------------------------------------- # Initialize storage acrate <- rep(0,4) # Metropolis acceptance rates B <- mcmc$nsave*mcmc$nskip # number of cycles thsave <- matrix(NA,mcmc$nsave,4) # saved thetas logpost <- numeric(mcmc$nsave) # log posterior dimnames(thsave) <- list( NULL, c("delta","omega","lambda","xs") ) skipcount <- 0 pskip <- 0 isave <- 1 #---------------------------------------------------------------- # Big loop for( i in 1:B ) { # Update counters skipcount <- skipcount + 1 pskip <- pskip + 1 #---------------------------------------------------------- # Update omega (note omega|delta still Unif(delta,1) in this prior) # propose from a likelihood-like Beta los <- loh[,sloc] shape1 <- sum(los) + 1 shape2 <- ntum - shape1 + 2 omegastar <- rbeta(1,shape1,shape2) if( omegastar > delta ) { mh <- 1 ok <- ifelse( runif(1) < mh, T, F ) } else{ ok <- F } if(ok){ omega <- omegastar acrate[1] <- acrate[1] + 1 } #---------------------------------------------------------- # Next update lambda and delta simultaneously (use gaps/change) # lambda update is via exponential centered at current value # delta update is uniform centered at current value reflected in (0,omega) lambdastar <- -log(runif(1))*lambda tmp <- abs(delta + runif(1,min=-mcmc$eps1,max=mcmc$eps1)) #reflect neg. deltastar <- ifelse( tmp > omega, 2*omega-tmp, tmp ) # reflect from above foo <- 1-exp( -lambdastar*gaps ) tprobstar[1,] <- 1 - deltastar*foo tprobstar[2,] <- deltastar*foo tprobstar[3,] <- (1-deltastar)*foo tprobstar[4,] <- 1-(1-deltastar)*foo # expoential(lam0) piece fee <- (lambda-lambdastar)/lam0 # loglikelihood ratio (note cancellations); and Hastings ratio tmp <- lambda/lambdastar logqrat <- log(tmp) - tmp + 1/tmp logmh <- sum(counts*log(tprobstar/tprob)) + logqrat + fee ok <- ifelse( log(runif(1)) <= logmh, T, F ) if( ok ){ lambda <- lambdastar delta <- deltastar tprob <- tprobstar acrate[2] <- acrate[2] + 1 } #---------------------------------------------------------- # Update xs, and modify downstream variates # use the cumqprob matrix from s.q tmp <- cumqprob[interval,] intervalstar <- min( (interval+c(-1,0,1))[ runif(1) <= tmp ] ) # select an interval xsstar <- runif(1, min=pos[intervalstar], max=pos[intervalstar+1] ) # select a value bar <- sort( c(xsstar,pos) ) gapsstar <- bar[2:(nn+1)] - bar[1:nn] cstar <- counts if( intervalstar == interval ) { # Proposal is symmetric and there is no change in `counts' logqrat <- 0 slocstar <- sloc } else if( intervalstar > interval ) # I might just need one else here { # Incorporate asymmetry of the proposal mechanism logqrat <- log( triplet.len[interval] ) - log( triplet.len[intervalstar] ) # change counts cstar[,interval] <- trans(loh[,(sloc+1)],loh[,(sloc-1)]) cstar[,(interval+1)] <- trans(los,loh[,(sloc+1)]) cstar[,(interval+2)] <- trans(los,loh[,(sloc+2)] ) slocstar <- sloc + 1 } else # so intervalstar = interval-1 { # Incorporate asymmetry of the proposal mechanism logqrat <- log( triplet.len[interval] ) - log( triplet.len[intervalstar] ) # change counts cstar[,(interval-1)] <- trans(los,loh[,(sloc-2)]) cstar[,interval] <- trans(los,loh[,(sloc-1)]) cstar[,(interval+1)] <- trans(loh[,(sloc-1)],loh[,(sloc+1)] ) slocstar <- sloc - 1 } foo <- 1-exp( -lambda*gapsstar ) tprobstar[1,] <- 1 - delta*foo tprobstar[2,] <- delta*foo tprobstar[3,] <- (1-delta)*foo tprobstar[4,] <- 1-(1-delta)*foo logmh <- sum(cstar*log(tprobstar) - counts*log(tprob)) + logqrat ok <- ifelse( log(runif(1)) <= logmh, T, F ) if( ok ) { xs <- xsstar loh[,sloc] <- loh[,slocstar] # rearrange the full Loh information loh[,slocstar] <- los nodat[,sloc] <- nodat[,slocstar] # rearrange the imputation information nodat[,slocstar] <- alltrue sloc <- slocstar interval <- intervalstar tprob <- tprobstar counts <- cstar gaps <- gapsstar acrate[3] <- acrate[3] + 1 } #---------------------------------------------------------- # Update imputed LOH status ind <- sample(1:ncols,size=1) # a random column in loh yy <- loh[,ind] subset <- nodat[,ind] # T means data imputed, F means real data lfac <- c(1,1) xx <- rep(1,ntum) if( ind > 1 ) { lfac <- c(tprob[3,(ind-1)]/tprob[1,(ind-1)], tprob[4,(ind-1)]/tprob[2,(ind-1)] ) xx <- loh[,(ind-1)] } rfac <- c(1,1) zz <- rep(1,ntum) if( ind < ncols ) { rfac <- c(tprob[3,ind]/tprob[1,ind], tprob[4,ind]/tprob[2,ind] ) zz <- loh[,(ind+1)] } # P(yy=1|else )/P(yy=0|else) odds <- lfac[(xx+1)]*rfac[(zz+1)] odds <- odds * ifelse( ind==sloc, (omega/(1-omega)), (delta/(1-delta)) ) mh <- odds mh[yy==1] <- 1/odds[yy==1] # odds for a change ok <- rep(F,ntum) ok[ (runif(ntum) <= mh) & (subset) ] <- T acrate[4] <- acrate[4]+mean(ok) foo <- 2*yy-1 # put on -1,1 scale instead of 0,1 foo[ok] <- -foo[ok] # swap # update state loh[,ind] <- (foo+1)/2 # back to 0,1 scale # update counts ft1 <- c(ind,ind-1) # from/to transition on left of ind ft2 <- c(ind+1,ind) # from/to transition on right of ind if( ind > sloc ){ ft1 <- rev(ft1); ft2 <- rev(ft2) } if( ind>1 ) { counts[,(ind-1)] <- trans( loh[,(ft1[1])],loh[,(ft1[2])] ) } if( ind