diff --git a/GlnLM-EffLocDisp-DT-fctNM-01_2021.R b/GlnLM-EffLocDisp-DT-fctNM-01_2021.R deleted file mode 100644 index 37d822e554106bcdbcf803a9436fb3a83fa14bb3..0000000000000000000000000000000000000000 --- a/GlnLM-EffLocDisp-DT-fctNM-01_2021.R +++ /dev/null @@ -1,345 +0,0 @@ -##################################################################################################### -## Modele ln-lineaire generalise ## -## Gestion donnees en DataTable - Optimisation Nelder-Mead en fonction ## -## 2020-01-20 ## -## source("ANC/GlnLM-EffLocDisp-DT-fctNM.R") ## -##################################################################################################### - -options(width=100); -ok <-1; -if (ok == 1) { - rm(list = ls()); -} - -library(data.table); -library(numDeriv); - -## Repertoire de travail ## -chem <- "/Users/eva.falipou/Documents/Eva/ANC/ln_linaire/"; - -##################################################################################################### -## Lecture donnees - Exemple : analyse NTK avec loq=1 ## -##################################################################################################### -ok <-1; -if (ok == 1) { - fin <- paste(chem,"BDD_exemple_ln-lineaire.csv",sep=""); - don <- fread(fin,dec=",",sep=";"); - don[,y:=NTK]; # choix de la variable dependante - ## Libelle variable dependante pour documenter affichages ## - labvdep <- "NTK"; - ## Left-censoring ## - loq <- 1.0; # LQ correspondant a la variable dependante - don[,lb:=y]; - don[y<=loq,lb:=0]; - don[,ub:=y]; - don[y<=loq,ub:=loq]; - ## Variables explicatives ## - don[,D0:=ifelse(Dispositif=="D0",1,0)]; - don[,D2:=ifelse(Dispositif=="D2",1,0)]; - don[,inf2ans:=ifelse(classe_age=="< 2 ans",1,0)]; - don[,inf70pct:=ifelse(classe_charge=="< 70%",1,0)]; - ## Nettoyage ## - don <- don[,.(y,lb,ub,D0,D2,inf2ans,inf70pct)]; - don <- na.omit(don); - ## Effects on location (median) ## - zm <- c("D0"); - #zm <- c();## No effect on location ## - nbzm <- length(zm); - ## Effects on dispersion ## - zd <- c(); - #zd <- c("D1","D2","inf2ans","inf70pct");## No effect on dispersion ## - nbzd <- length(zd); -} - -##################################################################################################### -## Log-Likelihood calculation - Based on log-normal distribution ## -##################################################################################################### -funk <- function(theta) {## theta : parameter vector (beta0,beta1,gamma0,gamma1) ## - n <- don[,.N]; - ## Regression - Effects on median ## - if (nbzm > 0) { - Zm <- cbind(array(1, dim=c(n,1)), as.matrix(don[,zm,with=F])); - } else { - Zm <- array(1, dim=c(n,1)); - } - zbm <- Zm%*%theta[1:(1+nbzm)]; - ## Regression - Effects on dispersion ## - if (nbzd > 0) { - Zd <- cbind(array(1, dim=c(n,1)), as.matrix(don[,zd,with=F])); - } else { - Zd <- array(1, dim=c(n,1)); - } - zbd <- Zd%*%theta[(2+nbzm):(2+nbzm+nbzd)]; - s <- exp(zbd); - ## Log-Likelihood calculation ## - ll <- 0; - li <- don[, which(lb == 0)]; - if (length(li) > 0) { - ll <- sum(log(plnorm(q=don[li, ub],meanlog=zbm[li],sdlog=s[li]))); - } - li <- don[, which(lb > 0 & ub > lb)]; - if (length(li) > 0) { - Fu <- plnorm(don[li, ub],meanlog=zbm[li],sdlog=s[li]); - Fl <- plnorm(don[li, lb],meanlog=zbm[li],sdlog=s[li]); - ll <- ll + sum(log(Fu - Fl)); - } - li <- don[, which(lb > 0 & ub == lb)]; - if (length(li) > 0) { - f <- dlnorm(don[li, ub],meanlog=zbm[li],sdlog=s[li]); - ll <- ll + sum(log(f)); - } - if(is.na(ll) | is.infinite(ll)) { - ll <- -1.0e200; - ## Checking calculation in case of data problem ## - #cat("theta = ", theta, " - ll = ", -ll, "\n"); - } - -ll; ## minimize -ll <=> maximize ll ## -} - -## Amotry Function ########################################################################### -amotry <- function(p, y, psum, ndim, ihi, fac) { - ptry <- numeric(); - fac1 <- (1 - fac) / ndim; - fac2 <- fac1 - fac; - for(j in 1:ndim) ptry[j] <- psum[j] * fac1 - p[ihi,j] * fac2; - ptry <- pmin(pmax(ptry, lowb), uppb); ## Bounds - ytry <- funk(ptry); - if(ytry < y[ihi]) { - y[ihi] <- ytry; - for(j in 1:ndim) { - psum[j] <- psum[j] + ptry[j] - p[ihi,j]; - p[ihi,j] <- ptry[j]; - } - } - list(ytry, p, y, psum); -} - -## Nelder-Mead calibration ## -NMcal <- function(NMpar) { - ## Libelle variable dependante ## - labvdep <- NMpar[[1]]; - ## Libelle parametres ## - labtheta <- NMpar[[2]]; - ## Nb parametres a estimer ## - nbp <- length(labtheta); - ## Valeurs de reference des parametres pour test H0 ## - thref <- NMpar[[3]]; - ## Valeurs initiales des parametres et bornes ## - theta <- NMpar[[4]];print(theta); - lowb <- NMpar[[5]]; - uppb <- NMpar[[6]]; - ## Pas de recherche initial ## - lbd <- NMpar[[7]]; - ## Initialisation simplex #################################################################### - ndim <- length(theta); - mpts <- ndim + 1; - p <- array(0, c(mpts, ndim)); - p[1,] <- theta; - for(i in 2:mpts) { - p[i,] <- p[1,]; - p[i,i-1] <- p[i,i-1] + lbd[i-1]; - } - #print(p); - psum <- numeric(); - y <- numeric(); - yr <- numeric(); - for(i in 1:mpts) y[i] <- funk(p[i,]); - ftol <- 1e-8; - maxiter <- 5000; - #maxiter <- 1; - maxcycle <- 0; - tiny <- 1e-10; - ############################################################################################## - iter <- 0; - cycle <- 0; - for(j in 1:ndim) psum[j] <- sum(p[,j]); - swap <- numeric(); - repeat { - iter <- iter + 1; - if (iter == 1 | round(iter / 100) == iter / 100) { - cat("Iteration", iter, "\n"); - print(cbind(p, y)); - } - ## Rank points of simplex ################################################################## - yr <- rank(y, ties.method="first"); - for(i in 1:mpts) { - if(yr[i] == 1) ilo <- i; - if(yr[i] == ndim) inhi <- i; - if(yr[i] == mpts) {ihi <- i;yhi <- y[i];} - } - ## Test whether convergence achieved ####################################################### - rtol <- 2 * abs(y[ihi] - y[ilo]) / (abs(y[ihi]) + abs(y[ilo]) + tiny); - if(rtol < ftol){ - print("precision reached"); - converge <- 1; - } - if(iter >= maxiter){ - print("maxiter reached"); - converge <- 0; - } - if(rtol < ftol || iter >= maxiter) { - swap[1] <- y[1];y[1] <- y[ilo];y[ilo] <- swap[1]; - swap <- p[1,];p[1,] <- p[ilo,];p[ilo,] <- swap; - print("Iteration", quote=FALSE);print(iter, quote=FALSE); - print(cbind(p, y)); - print("", quote=FALSE); - ## Display Calibration Results ######################################################## - print("Final", quote=FALSE);print(c(p[1,], y[1])); - ## Covariance matrix ## - covmat <- tryCatch(expr={solve(hessian(funk,p[1,]))},error=function(e){hinv <- 0;return(array(NA,dim=c(nbp,nbp)))}); - hinv <- ifelse(sum(is.na(covmat))>0,0,1); - cat("Covariance matrix :\n"); - print(covmat); - cat("\n"); - cat("Dependent variable : ",labvdep,"\n"); - ## Table of estimates ## - cat(format("Label", width=15)); - cat(format("Estimate", width=15, justify="right")); - cat(format("Std. Dev.", width=15, justify="right")); - cat(format("Ref", width=7, justify="right")); - cat(format("Chi2", width=15, justify="right")); - cat(format("DF", width=4, justify="right")); - cat(format("Pval", width=11, justify="right"), "\n"); - for (j in 1:length(theta)) { - thtest <- p[1,]; - thtest[j] <- thref[j]; - #chi2 <- 2 * abs(y[1] - funk(thtest)); - #stdev <- abs(p[1,j] - thref[j]) / sqrt(chi2); - stdev <- sqrt(covmat[j,j]); - chi2 <- ((p[1,j] - thref[j])^2/covmat[j,j]); - df <- 1; - pval <- pchisq(chi2, df, lower.tail=FALSE); - cat(format(labtheta[j], width=15)); - cat(formatC(p[1,j], digits=4, format="e", width=15)); - cat(formatC(stdev, digits=4, format="e", width=15)); - cat(format(thref[j], digits=2, width=7)); - cat(formatC(chi2, digits=4, format="e", width=15)); - cat(format(df, digits=1, width=4)); - cat(formatC(pval, digits=4, format="f", width=11), "\n"); - } - if (cycle < maxcycle) { - for(i in 2:mpts) { - p[i,] <- p[1,]; - p[i,i-1] <- p[i,i-1] + lbd[i-1]; - } - for(i in 1:mpts) y[i] <- funk(p[i,]); - iter <- 0; - cycle <- cycle + 1; - } - else break; - } - ## New iteration ########################################################################### - lamo <- amotry(p, y, psum, ndim, ihi, -1); - ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]]; - ##print("Factor -1"); - ##print(cbind(p, y)); - if(ytry <= y[ilo]) { - lamo <- amotry(p, y, psum, ndim, ihi, 2); - ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]]; - ##print("Factor 2"); - ##print(cbind(p, y)); - } - else if(ytry >= y[inhi]) { - ysave <- y[ihi]; - lamo <- amotry(p, y, psum, ndim, ihi, .5); - ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]]; - ##print("Factor .5"); - ##print(cbind(p, y)); - if(ytry >= ysave) { - for(i in 1:mpts) { - if(i != ilo) { - for(j in 1:ndim) p[i,j] <- 0.5 * (p[i,j] + p[ilo,j]); - y[i] <- funk(p[i,]); - } - } - for(j in 1:ndim) psum[j] <- sum(p[,j]); - } - } - } - list(p[1,],converge,hinv,covmat); -} - -##################################################################################################### -## Variance of theoretical percent-quantile ## -##################################################################################################### -delt <- function(tab,percent,covm) { - n <- tab[,.N]; - if (nbzm > 0) { - Zm <- cbind(array(1, dim=c(n,1)), as.matrix(tab[,zm,with=F])); - } else { - Zm <- array(1, dim=c(n,1)); - } - if (nbzd > 0) { - Zd <- cbind(array(1, dim=c(n,1)), as.matrix(tab[,zd,with=F])); - } else { - Zd <- array(1, dim=c(n,1)); - } - zbd <- Zd%*%theta[(2+nbzm):(2+nbzm+nbzd)]; - Zmd <- Zm; - for(j in 1:(nbzd+1)) Zmd <- cbind(Zmd, Zd[,j] * exp(zbd) * qnorm(percent)); - diag(Zmd%*%covm%*%t(Zmd)); -} - -################################################################################################### -## Model parameter estimation ## -################################################################################################### -ok <- 1; -if (ok == 1) { - ## Libelle parametres ## - labtheta <- c("beta0",zm,"gamma0",zd); - ## Nb parametres a estimer ## - nbp <- length(labtheta); - ## Valeurs de reference des parametres pour test H0 ## - thref <- rep(0,nbp); - ## Valeurs initiales des parametres et bornes ## - theta <- rep(0,nbp); - lowb <- rep(-Inf,nbp); - uppb <- rep(+Inf,nbp); - ## Pas de recherche initial ## - lbd <- rep(0.5,nbp); - NMpar <- list(labvdep,labtheta,theta,thref,lowb,uppb,lbd); - calage <- NMcal(NMpar); - thopt <- calage[[1]]; - converge <- calage[[2]]; - hinv <- calage[[3]]; - covmat <- calage[[4]]; - ##################################################################################################### - ## Quantile estimation ## - ##################################################################################################### - percent <- 0.5;## Quantile probability ## - ## Quantiles are estimated by fac variables ## - fac <- unique(c(zm,zd)); - ## Empirical (observed) quantiles ## - qua <- don[,.(qobs=quantile(ub,probs=percent,na.rm=T),nbobs=.N),by=fac]; - n <- qua[,.N]; - ## Regression - Effects on median ## - if (nbzm > 0) { - Zm <- cbind(array(1,dim=c(n,1)),as.matrix(qua[,zm,with=F])); - } else { - Zm <- array(1,dim=c(n,1)); - } - zbm <- Zm%*%thopt[1:(1+nbzm)]; - ## Regression - Effects on dispersion ## - if (nbzd > 0) { - Zd <- cbind(array(1,dim=c(n,1)),as.matrix(qua[,zd,with=F])); - } else { - Zd <- array(1,dim=c(n,1)); - } - zbd <- Zd%*%thopt[(2+nbzm):(2+nbzm+nbzd)]; - #s <- exp(zbd); - qua[,qcal:=exp(zbm+exp(zbd)*qnorm(percent))]; - ## Left-censored counts ## - lce <- don[lb<ub,.(nblce=.N),by=fac]; - qua <- merge(qua,lce,by=fac,all=TRUE); - qua[is.na(nblce),nblce:=0]; - ## Variance of theoretical quantile ## - qua[,vqc:=delt(qua,percent,covmat)]; - ## Lower and upper bounds of theoretical quantile ## - qua[,lbqc:=qcal*exp(-1.96*vqc^0.5)]; - qua[,ubqc:=qcal*exp(+1.96*vqc^0.5)]; - ## Displaying observed and theoretical quantile ## - cat("\n"); - cat("Dependent variable : ",labvdep," - Table of observed and theoretical ",percent*100,"%-quantiles:\n",sep=""); - #print(qua[, .(Z1,Z2,nbobs,nblce,qobs,qcal,vqc,lbqc,ubqc)]); - print(qua[,c(fac,"nbobs","nblce","qobs","qcal","vqc","lbqc","ubqc"),with=F]); -}