# Functions for the book 「Rによる保健医療データ解析演習」 # (C) Minato Nakazawa, 2007. SIQR <- function(x) { (fivenum(x)[4]-fivenum(x)[2])/2 } truemedian <- function(X,h=1) { YY <- rep(0,length(X)) XX <- table(X) q <- length(XX) k <- 0 for (i in 1:q) { L <- as.numeric(names(XX)[i])-h/2 for (j in 1:XX[[i]]) { k <- k+1 YY[k] <- L+h*(2*j-1)/(2*XX[[i]]) } } # 真の値を表示するにはprint(YY) median(YY) } geary.test <- function(X) { m.X <- mean(X) l.X <- length(X) G <- sum(abs(X-m.X))/sqrt(l.X*sum((X-m.X)^2)) p <- (1-pnorm((G-sqrt(2/pi))/sqrt(1-3/pi)*sqrt(l.X)))*2 cat("Geary's test for normality: G=",G," / p=",p,"\n") } gstem <- function (X,D=1) { .stem.out <- capture.output(stem(X,D)) .stem.len <- length(.stem.out) plot(c(1,2),c(1,.stem.len),type="n",axes=F,xlab="",ylab="") text(rep(1,.stem.len),.stem.len:1,.stem.out,pos=4) } riskratio2 <- function(X,Y,m1,m2) { # c10-4.Rより採録 data <- matrix(c(X,Y,m1-X,m2-Y,m1,m2),nr=2) colnames(data) <- c("疾病あり","疾病なし","合計") rownames(data) <- c("曝露群","対照群") print(data) RR <- (X/m1)/(Y/m2) n1 <- X+Y; T <- m1+m2; n2 <- T-n1 p.v <- 2*(1-pnorm(abs((X-n1*m1/T)/sqrt(n1*n2*m1*m2/T/T/(T-1))))) RRL <- RR*exp(-qnorm(0.975)*sqrt(1/X-1/m1+1/Y-1/m2)) RRU <- RR*exp(qnorm(0.975)*sqrt(1/X-1/m1+1/Y-1/m2)) cat("リスク比の点推定量:",RR,"(p=",p.v,") 95%信頼区間=[",RRL,",",RRU,"]\n") } oddsratio2 <- function(a,b,c,d) { # c10-5.Rより採録 data <- matrix(c(a,b,a+b,c,d,c+d,a+c,b+d,a+b+c+d),nr=3) colnames(data) <- c("疾病あり","疾病なし","合計") rownames(data) <- c("曝露群","対照群","合計") print(data) OR <- (a*d)/(b*c) N1 <- a+c; M1 <- a+b; N0 <- b+d; M0 <- c+d; T <- a+b+c+d p.v <- 2*(1-pnorm(abs((a-N1*M1/T)/sqrt(N1*N0*M1*M0/T/T/(T-1))))) ORL <- OR*exp(-qnorm(0.975)*sqrt(1/a+1/b+1/c+1/d)) ORU <- OR*exp(qnorm(0.975)*sqrt(1/a+1/b+1/c+1/d)) cat("オッズ比の点推定量:",OR," (p=",p.v, ") 95%信頼区間 = [",ORL,",",ORU,"]\n") } kappa.test <- function(x) { # c10-7.Rより採録 x <- as.matrix(x) a <- x[1,1]; b <- x[1,2]; c <- x[2,1]; d <- x[2,2] m1 <- a+b; m2 <- c+d; n1 <- a+c; n2 <- b+d; N <- sum(x) Pe <- (n1*m1/N+n2*m2/N)/N Po <- (a+d)/N kappa <- (Po-Pe)/(1-Pe) seK0 <- sqrt(Pe/(N*(1-Pe))) seK <- sqrt(Po*(1-Po)/(N*(1-Pe)^2)) p.value <- 1-pnorm(kappa/seK0) kappaL<-kappa-qnorm(0.975)*seK kappaU<-kappa+qnorm(0.975)*seK list(kappa=kappa,conf.int=c(kappaL,kappaU),p.value=p.value) } roc <- function(values,iscase) { cutoffs <- unique(sort(values)) cutoffs <- c(cutoffs,max(values)+1) ns <- length(cutoffs) sensitivity <- rep(0,ns) falsepositive <- rep(0,ns) dist <- rep(0,ns) aucp <- rep(0,ns) D <- sum(iscase==1) H <- sum(iscase==0) for (i in 1:ns) { cutoff <- cutoffs[i] positives <- ifelse(values >= cutoff,1,0) PinD <- sum(positives==1 & iscase==1) NinH <- sum(positives==0 & iscase==0) sensitivity[i] <- PinD/D falsepositive[i] <- 1-NinH/H dist[i] <- sqrt((PinD/D)^2+(NinH/H)^2) aucp[i] <- ifelse(i==1,(1-falsepositive[i])*sensitivity[i], (falsepositive[i-1]-falsepositive[i])*sensitivity[i]) } list(cutoffs,sensitivity,falsepositive,dist,aucp) } rocc <- function(...) { res <- roc(...) cat("cutoff\tsensitivity\t1-specificity\tdistance\n", sprintf("%5.3f\t%5.3f\t%5.3f\t%5.3f\n", res[[1]],res[[2]],res[[3]],res[[4]])) mlcs <- "最適カットオフポイント:%5.3f,感度:%5.3f,特異度%5.3f\nAUC=%5.3f\n" mlcc <- which.max(res[[4]]) cat(sprintf(mlcs,res[[1]][mlcc],res[[2]][mlcc],1-res[[3]][mlcc],sum(res[[5]]))) plot(res[[3]],res[[2]],type="l",lwd=2,xlab="1-特異度(specificity)", ylab="感度(sensitivity)") lines(c(0,1),c(0,1),lwd=1,lty=2) } # NagelkerkeのR^2を計算する関数定義。引数はglm()の結果 NagelkerkeR2 <- function(rr) { print(n <- nrow(rr$model)) (1-exp((rr$dev-rr$null)/n))/(1-exp(-rr$null/n)) } VIF <- function(X) { 1/(1-summary(X)$r.squared) } # 二重対数プロット。引数はsurvfitの結果。 loglogplot <- function(X) { S <- X$surv T <- X$time G <- X$ntimes.strata GG <- names(X$strata) GX <- rep(GG,G) xr <- c(0,max(T)*1.5) mas <- ifelse(max(S)==1,0.99,max(S)) mis <- ifelse(min(S)==0,0.01,min(S)) yr <- c(log(-log(mas)),log(-log(mis))) plot(T[GX==GG[1]],log(-log(S[GX==GG[1]])),type="l",lty=1, xlim=xr,ylim=yr,xlab="time",ylab="log(-log(S))", main="二重対数プロット") for (i in 2:length(GG)) { lines(T[GX==GG[i]],log(-log(S[GX==GG[i]])),lty=i) } legend(max(T),-2,legend=GG,lty=1:length(GG)) }