# Load etm package # Reference : # Arthur Allignol (2009). etm: Empirical Transition Matrix. R package version 0.4-6. > library(etm) # Load a sample data set. # type2 = "cens" indicates a censoring state. > event <- read.table("sampledataEC.dat", header=T) > event$type2 <- ifelse(event$estatus == 0, "cens", event$type2) > event[1:6,] id estart estop estatus type1 type2 1 1 0.000000 0.258846 1 0 1 2 1 0.258846 1.000000 0 1 cens 3 2 0.000000 0.083535 1 0 1 4 2 0.083535 0.338997 1 1 2 5 2 0.338997 0.364257 1 2 3 6 2 0.364257 1.000000 0 3 cens # Create a data frame for etm function > trdata <- event[,c("id","type1","type2","estop")] > dimnames(trdata)[[2]] <- c("id","from","to","time") > trdata$from <- as.character(trdata$from) > trdata$to <- as.character(trdata$to) > trdata[1:6,] id from to time 1 1 0 1 0.258846 2 1 1 cens 1.000000 3 2 0 1 0.083535 4 2 1 2 0.338997 5 2 2 3 0.364257 6 2 3 cens 1.000000 # Define the transition matrix > tra <- matrix(ncol=9, nrow=9, FALSE) > tra[1,2] <- TRUE > tra[2,3] <- TRUE > tra[3,4] <- TRUE > tra[4,5] <- TRUE > tra[5,6] <- TRUE > tra[6,7] <- TRUE > tra[7,8] <- TRUE > tra[8,9] <- TRUE > tra [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [1,] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [2,] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE [3,] FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE [4,] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE [5,] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE [6,] FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE [7,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE [8,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE [9,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE # data = trdata is the input data frame # state.names = 0, 1, 2, ..., 8 are the possible transition states # tra = tra is the transition matrix # cens.name = "cens" indicates a censoring time # s = 0 is the starting value for computing the transition probabilities > tr <- etm(trdata, c("0","1","2","3","4","5","6","7","8"), tra, "cens", 0) # List of output available from the tr object > attributes(tr) $names [1] "est" "cov" "time" "s" "t" "trans" [7] "state.names" "cens.name" "n.risk" "n.event" "delta.na" $class [1] "etm" # To list some event times at which the transition probabilities are computed > tr$time[c(1:5,length(tr$time))] [1] 0.001238 0.003151 0.006937 0.008268 0.009119 1.000000 # Obtaining Aalen-Johansen estimate of transition probability matrix at t = 1 # Reference: # Aalen 0. and Johansen S. (1978). An empirical transition matrix for non-homogeneous # Markov chains based on censored observations. Scandinavian Journal of Statistics, # 5: 141-150 > est <- tr$est[,,length(tr$time)] > est 0 1 2 3 4 5 6 7 8 0 0.1639261 0.2969284 0.2470731 0.1319243 0.09236413 0.04047041 0.02194808 0.005365594 0 1 0.0000000 0.1791447 0.2908358 0.1983015 0.17016376 0.08938665 0.05677938 0.015388232 0 2 0.0000000 0.0000000 0.1035794 0.1683914 0.25143687 0.22286741 0.19229340 0.061431524 0 3 0.0000000 0.0000000 0.0000000 0.1079637 0.24448346 0.28555386 0.27148810 0.090510835 0 4 0.0000000 0.0000000 0.0000000 0.0000000 0.13808193 0.34325842 0.38244565 0.136213992 0 5 0.0000000 0.0000000 0.0000000 0.0000000 0.00000000 0.35555556 0.46444444 0.180000000 0 6 0.0000000 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000 0.50000000 0.500000000 0 7 0.0000000 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 1.000000000 0 8 0.0000000 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 0.000000000 1 # To get the estimated covariance matrix at t = 1 # Reference : # Andersen P.K., Borgan O., Gill R.D. and Keiding N. (1993). Statistical models based on # counting processes. Springer Series in Statistics. New York, NY: Springer > est.cov <- tr$cov[,,length(tr$time)] > est.cov[1:9,1:12] 0 0 1 0 2 0 3 0 4 0 5 0 6 0 7 0 8 0 0 1 1 1 2 1 ... 0 0 0.0008337584 0 0 0 0 0 0 0 0 -0.0003337303 0 0 1 0 0.0000000000 0 0 0 0 0 0 0 0 0.0000000000 0 0 2 0 0.0000000000 0 0 0 0 0 0 0 0 0.0000000000 0 0 3 0 0.0000000000 0 0 0 0 0 0 0 0 0.0000000000 0 0 4 0 0.0000000000 0 0 0 0 0 0 0 0 0.0000000000 0 0 5 0 0.0000000000 0 0 0 0 0 0 0 0 0.0000000000 0 0 6 0 0.0000000000 0 0 0 0 0 0 0 0 0.0000000000 0 0 7 0 0.0000000000 0 0 0 0 0 0 0 0 0.0000000000 0 0 8 0 0.0000000000 0 0 0 0 0 0 0 0 0.0000000000 0 0 : # To get P(0,1) from state 1 to 2 only > trprob(tr, "1 2", 1) [1] 0.2908358 # To get the variance of P(0,1) from state 1 to 2 only > trcov(tr, "1 2", 1) [1] 0.001813091 # To get all state occupancy probabilities in matrix form, P(0,t) > u <- tr$time > > probu <- NULL > for (k in 1:length(u)) { > tmp <- tr$est[,,k] > probu <- rbind(probu, tmp[1,]) > } > > probu <- data.frame(probu) > dimnames(probu)[[2]] <- c("P00","P01","P02","P03","P04","P05","P06","P07","P08") > probu[c(1:5,(nrow(probu)-5):nrow(probu)),] P00 P01 P02 P03 P04 P05 P06 P07 P08 1 0.9950000 0.0050000 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 0.000000000 0 2 0.9900000 0.0100000 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 0.000000000 0 3 0.9850000 0.0150000 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 0.000000000 0 4 0.9800000 0.0200000 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 0.000000000 0 5 0.9750000 0.0250000 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 0.000000000 0 : 380 0.1639261 0.3038337 0.2466588 0.1386309 0.07916644 0.04047041 0.02194808 0.005365594 0 381 0.1639261 0.3038337 0.2466588 0.1320295 0.08576791 0.04047041 0.02194808 0.005365594 0 382 0.1639261 0.3038337 0.2401677 0.1385205 0.08576791 0.04047041 0.02194808 0.005365594 0 383 0.1639261 0.2969284 0.2470731 0.1385205 0.08576791 0.04047041 0.02194808 0.005365594 0 384 0.1639261 0.2969284 0.2470731 0.1319243 0.09236413 0.04047041 0.02194808 0.005365594 0 385 0.1639261 0.2969284 0.2470731 0.1319243 0.09236413 0.04047041 0.02194808 0.005365594 0 # To compute the cumulative mean function > mu <- rep(0, nrow(probu)) > for (k in 1:8) { > mu <- mu + k*probu[,k+1] > } > mu[c(1:5,(length(mu)-4):length(mu))] [1] 0.005000 0.010000 0.015000 0.020000 0.025000 [6] 1.907911 1.914402 1.921307 1.927904 1.927904 # Plot of the cumulative mean functions - Aalen-Johansen estimate and true value postscript("etm.cmf.eventEC.eps", onefile=F, print.it=F) plot(u, mu, type="s", xlim=c(0,1), ylim=c(0,2), xlab="DAYS SINCE STUDY ENTRY", ylab="CUMULATIVE MEAN FUNCTION") lines(u, 2*u, lty=2) legend(0, 2, c("AALEN-JOHANSEN ESTIMATE","TRUE MEAN FUNCTION"), lty=c(1,2), bty="n") dev.off() # Plot of the state occupancy probabilities for state 1, 2, 3, and 4 postscript("etm.state.prob.eventEC.eps", onefile=F, print.it=F) par(mfrow=c(2,2), mai=c(0.75,0.75,0.5,0.5)) plot(u, probu$P01, type="s", xlim=c(0,1), ylim=c(0,0.4), xlab="DAYS SINCE STUDY ENTRY", ylab="STATE OCCUPANCY PROBABILITY") mtext("STATE 1", side=3, line=0.5) plot(u, probu$P02, type="s", xlim=c(0,1), ylim=c(0,0.4), xlab="DAYS SINCE STUDY ENTRY", ylab="STATE OCCUPANCY PROBABILITY") mtext("STATE 2", side=3, line=0.5) plot(u, probu$P03, type="s", xlim=c(0,1), ylim=c(0,0.4), xlab="DAYS SINCE STUDY ENTRY", ylab="STATE OCCUPANCY PROBABILITY") mtext("STATE 3", side=3, line=0.5) plot(u, probu$P04, type="s", xlim=c(0,1), ylim=c(0,0.4), xlab="DAYS SINCE STUDY ENTRY", ylab="STATE OCCUPANCY PROBABILITY") mtext("STATE 4", side=3, line=0.5) dev.off()