# Started 4/2/99 by Michael A. Newton
#########################################################################
# Code to implement Markov chain Monte Carlo for the instability-selection
# model described in Technical Report 135 from the Department of 
# Biostatistics and Medical Informatics, University of Wisconsin, Madison.
# Contact newton@stat.wisc.edu for questions or comments.
#########################################################################

# The prior is uniform and independent on (delta,xs,eta)
# where eta <- (omega-delta)/(1-delta); and xs.
# This uniformity induces a non-uniform prior on omega=delta+eta(1-delta)

#---------------------------------------------
# Data
foo <- dget("pro.data")  # Prostate
#foo <- dget("fjp.data")  # Juvenile Polyposis coli
#foo <- dget("thy.data")  # Thyroid
realdat <- foo$data      # loss information
eps <- 0.000000001
pos <- c( 0, foo$pos+eps, 1 ) # marker postions & tips
#
nmark <- ncol(realdat)
nn <- length(pos)  # nmark + 2
ncols <- nmark + 3 # number of columns in main imputed data array
ntum <- nrow(realdat)

#---------------------------------------------
# Some MCMC run characteristics

# Typical production run magnitude
#mcmc <- list(   nskip=500,    # block size for each saved state
#                nsave=2000,    # number of saved states
#                eps1=0.003 )   # 1/2 window width for delta update
# Typical size for initial testing
mcmc <- list(   nskip=5,    # block size for each saved state
                nsave=10,    # number of saved states
                eps1=0.003 )   # 1/2 window width for delta update


# Prepare for update mechanism for xs, the suppressor gene position

source("s.q")

#---------------------------------------------
# Prior information 
#
# xs ~ Uniform(0,1)
# delta ~ Uniform(0,1)
# eta ~ Uniform(0,1)   where eta = (omega-delta)/(1-delta)  (unconstrained)
# lambda ~ Exponential( lambda0 )
#  all independent

lam0 <- 10


#---------------------------------------------
# Starting configuration (shouldn't matter)
#
# 1. parameter values

lambda <- 2
delta <- 0.4

# Current marker interval; there are, nmark+1 one intervals within which xs may land
interval <-  max( (1:nn)[xs >= pos] )


# 2. LOH status at xs (imputed)
los <- matrix( sample( c(0,1), size=ntum, replace=T ), ntum, 1 )

# 3. Missing and tip data  
lohdat <- cbind( rep(-1,ntum), realdat, rep(-1,ntum) )
noninf <- matrix(F,ntum,nmark+2)
noninf[lohdat<0] <- T
lohdat[noninf] <- 1  # one possible starting configuration

# second version including los: These objects contain entire imputed data structure
ord <- order( c(xs,pos) )
loh <- cbind( los, lohdat )[,ord]
alltrue <- rep(T,ntum)
nodat <- ( cbind( alltrue, noninf ) )[,ord]
sloc <- (1:(ncols))[ord==1]  # column location of Loh(Xs) must be 1<sloc<ncols

  # Initialize counts matrix 

     # A useful function
     trans <- function(x,y)
      {
       # x and y are vectors of equal length and are binary (0,1)
       # trans returns the set of 4 transition counts; 0->0,0->1,1->0,1->1
       # and so order is important
       return( c( sum((1-x)*(1-y)),sum((1-x)*y),sum(x*(1-y)),sum(x*y)) )
      }

  counts <- matrix(NA,4,ncols-1)
  # move to the right of xs
  for( i in sloc:(ncols-1) ) { counts[,i] <- trans( loh[,i], loh[,(i+1)] )  }
  # move to the left of xs
  for( i in sloc:2 ) { counts[,(i-1)] <- trans( loh[,i], loh[,(i-1)] ) }
  cstar <- counts

  # Initialize transition probabilities
  tprob <- matrix(NA,4,ncols-1)   
  foo <- sort( c(xs,pos) )
  gaps <- foo[2:ncols] - foo[1:(ncols-1)]
  foo <- 1-exp(-lambda*gaps)
  tprob[1,] <- 1 - delta*foo
  tprob[2,] <-  delta*foo
  tprob[3,] <- (1-delta)*foo
  tprob[4,] <- 1-(1-delta)*foo
  tprobstar <- tprob


#----------------------------------------------------------------
# Initialize storage
acrate <- rep(0,4)                  # Metropolis acceptance rates
B <- mcmc$nsave*mcmc$nskip          # number of cycles
thsave <- matrix(NA,mcmc$nsave,4)   # saved thetas
logpost <- numeric(mcmc$nsave)      # log posterior
dimnames(thsave) <- list( NULL, c("delta","omega","lambda","xs") )

skipcount <- 0
pskip <- 0
isave <- 1
#----------------------------------------------------------------
# Big loop
for( i in 1:B )
 {
  # Update counters
  skipcount <- skipcount + 1
  pskip <- pskip + 1
  #----------------------------------------------------------
  # Update omega (note omega|delta still Unif(delta,1) in this prior)
  #   propose from a likelihood-like Beta
  los <- loh[,sloc]
  shape1 <-  sum(los) + 1
  shape2 <-  ntum - shape1 + 2
  omegastar <- rbeta(1,shape1,shape2)
  if( omegastar > delta )
   {
    mh <- 1  
    ok <- ifelse( runif(1) < mh, T, F ) }
  else{ ok <- F }
  if(ok){
    omega <- omegastar
    acrate[1] <- acrate[1] + 1 }
  #----------------------------------------------------------
  # Next update lambda and delta simultaneously (use gaps/change)
  # lambda update is via exponential centered at current value
  # delta update is uniform centered at current value reflected in (0,omega)

  lambdastar <- -log(runif(1))*lambda
  tmp <- abs(delta + runif(1,min=-mcmc$eps1,max=mcmc$eps1)) #reflect neg.
  deltastar <- ifelse( tmp > omega, 2*omega-tmp, tmp )  # reflect from above

  foo <- 1-exp( -lambdastar*gaps )
  tprobstar[1,] <- 1 - deltastar*foo
  tprobstar[2,] <-  deltastar*foo
  tprobstar[3,] <- (1-deltastar)*foo
  tprobstar[4,] <- 1-(1-deltastar)*foo
 
# expoential(lam0) piece 
  fee <- (lambda-lambdastar)/lam0

  # loglikelihood ratio (note cancellations); and Hastings ratio
  tmp <- lambda/lambdastar
  logqrat <- log(tmp) - tmp + 1/tmp
  logmh <- sum(counts*log(tprobstar/tprob)) + logqrat + fee
  ok <- ifelse( log(runif(1)) <= logmh, T, F )
  if( ok ){
     lambda <- lambdastar
     delta <- deltastar
     tprob <- tprobstar  
     acrate[2] <- acrate[2] + 1 }	

  #----------------------------------------------------------
  # Update xs, and modify downstream variates
  # use the cumqprob matrix from s.q   
  
  tmp <- cumqprob[interval,]
  intervalstar <- min( (interval+c(-1,0,1))[ runif(1) <= tmp ] ) # select an interval 
  xsstar <- runif(1, min=pos[intervalstar], max=pos[intervalstar+1] ) # select a value
  bar <- sort( c(xsstar,pos) )
  gapsstar <- bar[2:(nn+1)] - bar[1:nn]
  cstar <- counts
  if( intervalstar == interval )
   {
    # Proposal is symmetric and there is no change in `counts' 
    logqrat <- 0
    slocstar <- sloc  
   }
  else if( intervalstar > interval )   # I might just need one else here
   {
    # Incorporate asymmetry of the proposal mechanism
    logqrat <- log( triplet.len[interval] ) - log( triplet.len[intervalstar] )
    # change counts 
    cstar[,interval]     <- trans(loh[,(sloc+1)],loh[,(sloc-1)])
    cstar[,(interval+1)] <- trans(los,loh[,(sloc+1)])
    cstar[,(interval+2)] <- trans(los,loh[,(sloc+2)] )
    slocstar <- sloc + 1
   }
  else # so intervalstar = interval-1
   {
    # Incorporate asymmetry of the proposal mechanism
    logqrat <- log( triplet.len[interval] ) - log( triplet.len[intervalstar] )
    # change counts 
    cstar[,(interval-1)] <- trans(los,loh[,(sloc-2)])
    cstar[,interval]     <- trans(los,loh[,(sloc-1)])
    cstar[,(interval+1)] <- trans(loh[,(sloc-1)],loh[,(sloc+1)] )
    slocstar <- sloc - 1
   } 
  foo <- 1-exp( -lambda*gapsstar )
  tprobstar[1,] <- 1 - delta*foo
  tprobstar[2,] <-  delta*foo
  tprobstar[3,] <- (1-delta)*foo
  tprobstar[4,] <- 1-(1-delta)*foo

  logmh <- sum(cstar*log(tprobstar) - counts*log(tprob)) + logqrat
  ok <- ifelse( log(runif(1)) <= logmh, T, F )
  if( ok )
   {
    xs <- xsstar
    loh[,sloc] <- loh[,slocstar]       # rearrange the full Loh information
    loh[,slocstar] <- los
    nodat[,sloc] <- nodat[,slocstar]   # rearrange the imputation information
    nodat[,slocstar] <- alltrue
    sloc <- slocstar
    interval <- intervalstar
    tprob <- tprobstar
    counts <- cstar
    gaps <- gapsstar
    acrate[3] <- acrate[3] + 1
   }

  #----------------------------------------------------------
  # Update imputed LOH status

  ind <- sample(1:ncols,size=1)  # a random column in loh
  yy <- loh[,ind]
  subset <- nodat[,ind]   # T means data imputed, F means real data

  lfac <- c(1,1) 
  xx <-  rep(1,ntum)
  if( ind > 1 ) {
    lfac <- c(tprob[3,(ind-1)]/tprob[1,(ind-1)], tprob[4,(ind-1)]/tprob[2,(ind-1)] )  
    xx <- loh[,(ind-1)] }
  rfac <- c(1,1)
  zz <-  rep(1,ntum)
  if( ind < ncols ) {
    rfac <- c(tprob[3,ind]/tprob[1,ind], tprob[4,ind]/tprob[2,ind] )  
    zz <- loh[,(ind+1)] }

  # P(yy=1|else )/P(yy=0|else)
  odds <- lfac[(xx+1)]*rfac[(zz+1)]
  odds <- odds * ifelse( ind==sloc, (omega/(1-omega)), (delta/(1-delta)) )
  mh <- odds
  mh[yy==1] <- 1/odds[yy==1]    # odds for a change
  ok <- rep(F,ntum)
  ok[ (runif(ntum) <= mh) & (subset) ] <- T
  acrate[4] <- acrate[4]+mean(ok)
  foo <- 2*yy-1       # put on -1,1 scale instead of 0,1
  foo[ok] <- -foo[ok]  # swap
  # update state
  loh[,ind] <- (foo+1)/2     # back to 0,1 scale
  # update counts

  ft1 <- c(ind,ind-1) # from/to transition on left of ind
  ft2 <- c(ind+1,ind) # from/to transition on right of ind
  if( ind > sloc ){ ft1 <- rev(ft1); ft2 <- rev(ft2) }

  if( ind>1 ) { counts[,(ind-1)] <- trans( loh[,(ft1[1])],loh[,(ft1[2])] )  }         
  if( ind<ncols ) { counts[,ind] <- trans( loh[,(ft2[1])],loh[,(ft2[2])] )  }         
  
  #update los, in case it was changed
  los <- loh[,sloc]

  #----------------------------------------------------------
  # Store output, periodically
  if( skipcount == mcmc$nskip )
   {
    skipcount <- 0
    # evaluate the logposterior
    lp <- sum( counts*log(tprob) ) + sum(los)*log( omega/(1-omega) ) +
		ntum*log(1-omega)
    print( c(isave,lp) )
    logpost[isave] <- lp
    thsave[isave,] <- c(delta,omega,lambda,xs)
    isave <- isave+1
   }

 }

acrate <- acrate/B
acrate <- matrix(acrate,1,4)
dimnames(acrate) <- list(NULL,c("omega","lambda&delta","xs","impute"))

results <- list( mcmc=mcmc, acrate=acrate, prior=lam0,
                 thsave=thsave,logpost=logpost )

dput(results,"results")