GlnLM-EffLocDisp-DT-fctNM-01_2021.R 12.45 KiB
#####################################################################################################
## Modele ln-lineaire generalise                                                                   ##
## Gestion donnees en DataTable - Optimisation Nelder-Mead en fonction                             ##
## 2020-01-20                                                                                      ##
## source("ANC/GlnLM-EffLocDisp-DT-fctNM.R")                                                       ##
#####################################################################################################
options(width=100);
ok <-1;
if (ok == 1) {
  rm(list = ls());
library(data.table);
library(numDeriv);
## Repertoire de travail ##
chem <- "/Users/eva.falipou/Documents/Eva/ANC/ln_linaire/";
#####################################################################################################
## Lecture donnees - Exemple : analyse NTK avec loq=1                                              ##
#####################################################################################################
ok <-1;
if (ok == 1) {
  fin <- paste(chem,"BDD_exemple_ln-lineaire.csv",sep="");
  don <- fread(fin,dec=",",sep=";");
  don[,y:=NTK]; # choix de la variable dependante
  ## Libelle variable dependante pour documenter affichages ##
  labvdep <- "NTK";
  ## Left-censoring ##
  loq <- 1.0; # LQ correspondant a la variable dependante 
  don[,lb:=y];
  don[y<=loq,lb:=0];
  don[,ub:=y];
  don[y<=loq,ub:=loq];
  ## Variables explicatives ##
  don[,D0:=ifelse(Dispositif=="D0",1,0)];
  don[,D2:=ifelse(Dispositif=="D2",1,0)];
  don[,inf2ans:=ifelse(classe_age=="< 2 ans",1,0)];
  don[,inf70pct:=ifelse(classe_charge=="< 70%",1,0)];
  ## Nettoyage ##
  don <- don[,.(y,lb,ub,D0,D2,inf2ans,inf70pct)];
  don <- na.omit(don);
  ## Effects on location (median) ##
  zm <- c("D0");
  #zm <- c();## No effect on location ##
  nbzm <- length(zm);
  ## Effects on dispersion ##
  zd <- c();
  #zd <- c("D1","D2","inf2ans","inf70pct");## No effect on dispersion ##
  nbzd <- length(zd);
#####################################################################################################
## Log-Likelihood calculation - Based on log-normal distribution                                   ##
#####################################################################################################
funk <- function(theta) {## theta : parameter vector (beta0,beta1,gamma0,gamma1) ##
  n <- don[,.N];
  ## Regression - Effects on median ##
  if (nbzm > 0) {
    Zm <- cbind(array(1, dim=c(n,1)), as.matrix(don[,zm,with=F]));
  } else {
    Zm <- array(1, dim=c(n,1));
  zbm <- Zm%*%theta[1:(1+nbzm)];
  ## Regression - Effects on dispersion ##
  if (nbzd > 0) {
    Zd <- cbind(array(1, dim=c(n,1)), as.matrix(don[,zd,with=F]));
  } else {
    Zd <- array(1, dim=c(n,1));
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
} zbd <- Zd%*%theta[(2+nbzm):(2+nbzm+nbzd)]; s <- exp(zbd); ## Log-Likelihood calculation ## ll <- 0; li <- don[, which(lb == 0)]; if (length(li) > 0) { ll <- sum(log(plnorm(q=don[li, ub],meanlog=zbm[li],sdlog=s[li]))); } li <- don[, which(lb > 0 & ub > lb)]; if (length(li) > 0) { Fu <- plnorm(don[li, ub],meanlog=zbm[li],sdlog=s[li]); Fl <- plnorm(don[li, lb],meanlog=zbm[li],sdlog=s[li]); ll <- ll + sum(log(Fu - Fl)); } li <- don[, which(lb > 0 & ub == lb)]; if (length(li) > 0) { f <- dlnorm(don[li, ub],meanlog=zbm[li],sdlog=s[li]); ll <- ll + sum(log(f)); } if(is.na(ll) | is.infinite(ll)) { ll <- -1.0e200; ## Checking calculation in case of data problem ## #cat("theta = ", theta, " - ll = ", -ll, "\n"); } -ll; ## minimize -ll <=> maximize ll ## } ## Amotry Function ########################################################################### amotry <- function(p, y, psum, ndim, ihi, fac) { ptry <- numeric(); fac1 <- (1 - fac) / ndim; fac2 <- fac1 - fac; for(j in 1:ndim) ptry[j] <- psum[j] * fac1 - p[ihi,j] * fac2; ptry <- pmin(pmax(ptry, lowb), uppb); ## Bounds ytry <- funk(ptry); if(ytry < y[ihi]) { y[ihi] <- ytry; for(j in 1:ndim) { psum[j] <- psum[j] + ptry[j] - p[ihi,j]; p[ihi,j] <- ptry[j]; } } list(ytry, p, y, psum); } ## Nelder-Mead calibration ## NMcal <- function(NMpar) { ## Libelle variable dependante ## labvdep <- NMpar[[1]]; ## Libelle parametres ## labtheta <- NMpar[[2]]; ## Nb parametres a estimer ## nbp <- length(labtheta); ## Valeurs de reference des parametres pour test H0 ## thref <- NMpar[[3]]; ## Valeurs initiales des parametres et bornes ## theta <- NMpar[[4]];print(theta); lowb <- NMpar[[5]]; uppb <- NMpar[[6]]; ## Pas de recherche initial ## lbd <- NMpar[[7]]; ## Initialisation simplex #################################################################### ndim <- length(theta); mpts <- ndim + 1; p <- array(0, c(mpts, ndim)); p[1,] <- theta; for(i in 2:mpts) { p[i,] <- p[1,]; p[i,i-1] <- p[i,i-1] + lbd[i-1];
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
} #print(p); psum <- numeric(); y <- numeric(); yr <- numeric(); for(i in 1:mpts) y[i] <- funk(p[i,]); ftol <- 1e-8; maxiter <- 5000; #maxiter <- 1; maxcycle <- 0; tiny <- 1e-10; ############################################################################################## iter <- 0; cycle <- 0; for(j in 1:ndim) psum[j] <- sum(p[,j]); swap <- numeric(); repeat { iter <- iter + 1; if (iter == 1 | round(iter / 100) == iter / 100) { cat("Iteration", iter, "\n"); print(cbind(p, y)); } ## Rank points of simplex ################################################################## yr <- rank(y, ties.method="first"); for(i in 1:mpts) { if(yr[i] == 1) ilo <- i; if(yr[i] == ndim) inhi <- i; if(yr[i] == mpts) {ihi <- i;yhi <- y[i];} } ## Test whether convergence achieved ####################################################### rtol <- 2 * abs(y[ihi] - y[ilo]) / (abs(y[ihi]) + abs(y[ilo]) + tiny); if(rtol < ftol){ print("precision reached"); converge <- 1; } if(iter >= maxiter){ print("maxiter reached"); converge <- 0; } if(rtol < ftol || iter >= maxiter) { swap[1] <- y[1];y[1] <- y[ilo];y[ilo] <- swap[1]; swap <- p[1,];p[1,] <- p[ilo,];p[ilo,] <- swap; print("Iteration", quote=FALSE);print(iter, quote=FALSE); print(cbind(p, y)); print("", quote=FALSE); ## Display Calibration Results ######################################################## print("Final", quote=FALSE);print(c(p[1,], y[1])); ## Covariance matrix ## covmat <- tryCatch(expr={solve(hessian(funk,p[1,]))},error=function(e){hinv <- 0;return(array(NA,dim=c(nbp,nbp)))}); hinv <- ifelse(sum(is.na(covmat))>0,0,1); cat("Covariance matrix :\n"); print(covmat); cat("\n"); cat("Dependent variable : ",labvdep,"\n"); ## Table of estimates ## cat(format("Label", width=15)); cat(format("Estimate", width=15, justify="right")); cat(format("Std. Dev.", width=15, justify="right")); cat(format("Ref", width=7, justify="right")); cat(format("Chi2", width=15, justify="right")); cat(format("DF", width=4, justify="right")); cat(format("Pval", width=11, justify="right"), "\n"); for (j in 1:length(theta)) { thtest <- p[1,]; thtest[j] <- thref[j]; #chi2 <- 2 * abs(y[1] - funk(thtest)); #stdev <- abs(p[1,j] - thref[j]) / sqrt(chi2); stdev <- sqrt(covmat[j,j]); chi2 <- ((p[1,j] - thref[j])^2/covmat[j,j]); df <- 1;
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
pval <- pchisq(chi2, df, lower.tail=FALSE); cat(format(labtheta[j], width=15)); cat(formatC(p[1,j], digits=4, format="e", width=15)); cat(formatC(stdev, digits=4, format="e", width=15)); cat(format(thref[j], digits=2, width=7)); cat(formatC(chi2, digits=4, format="e", width=15)); cat(format(df, digits=1, width=4)); cat(formatC(pval, digits=4, format="f", width=11), "\n"); } if (cycle < maxcycle) { for(i in 2:mpts) { p[i,] <- p[1,]; p[i,i-1] <- p[i,i-1] + lbd[i-1]; } for(i in 1:mpts) y[i] <- funk(p[i,]); iter <- 0; cycle <- cycle + 1; } else break; } ## New iteration ########################################################################### lamo <- amotry(p, y, psum, ndim, ihi, -1); ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]]; ##print("Factor -1"); ##print(cbind(p, y)); if(ytry <= y[ilo]) { lamo <- amotry(p, y, psum, ndim, ihi, 2); ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]]; ##print("Factor 2"); ##print(cbind(p, y)); } else if(ytry >= y[inhi]) { ysave <- y[ihi]; lamo <- amotry(p, y, psum, ndim, ihi, .5); ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]]; ##print("Factor .5"); ##print(cbind(p, y)); if(ytry >= ysave) { for(i in 1:mpts) { if(i != ilo) { for(j in 1:ndim) p[i,j] <- 0.5 * (p[i,j] + p[ilo,j]); y[i] <- funk(p[i,]); } } for(j in 1:ndim) psum[j] <- sum(p[,j]); } } } list(p[1,],converge,hinv,covmat); } ##################################################################################################### ## Variance of theoretical percent-quantile ## ##################################################################################################### delt <- function(tab,percent,covm) { n <- tab[,.N]; if (nbzm > 0) { Zm <- cbind(array(1, dim=c(n,1)), as.matrix(tab[,zm,with=F])); } else { Zm <- array(1, dim=c(n,1)); } if (nbzd > 0) { Zd <- cbind(array(1, dim=c(n,1)), as.matrix(tab[,zd,with=F])); } else { Zd <- array(1, dim=c(n,1)); } zbd <- Zd%*%theta[(2+nbzm):(2+nbzm+nbzd)]; Zmd <- Zm; for(j in 1:(nbzd+1)) Zmd <- cbind(Zmd, Zd[,j] * exp(zbd) * qnorm(percent)); diag(Zmd%*%covm%*%t(Zmd));
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
} ################################################################################################### ## Model parameter estimation ## ################################################################################################### ok <- 1; if (ok == 1) { ## Libelle parametres ## labtheta <- c("beta0",zm,"gamma0",zd); ## Nb parametres a estimer ## nbp <- length(labtheta); ## Valeurs de reference des parametres pour test H0 ## thref <- rep(0,nbp); ## Valeurs initiales des parametres et bornes ## theta <- rep(0,nbp); lowb <- rep(-Inf,nbp); uppb <- rep(+Inf,nbp); ## Pas de recherche initial ## lbd <- rep(0.5,nbp); NMpar <- list(labvdep,labtheta,theta,thref,lowb,uppb,lbd); calage <- NMcal(NMpar); thopt <- calage[[1]]; converge <- calage[[2]]; hinv <- calage[[3]]; covmat <- calage[[4]]; ##################################################################################################### ## Quantile estimation ## ##################################################################################################### percent <- 0.5;## Quantile probability ## ## Quantiles are estimated by fac variables ## fac <- unique(c(zm,zd)); ## Empirical (observed) quantiles ## qua <- don[,.(qobs=quantile(ub,probs=percent,na.rm=T),nbobs=.N),by=fac]; n <- qua[,.N]; ## Regression - Effects on median ## if (nbzm > 0) { Zm <- cbind(array(1,dim=c(n,1)),as.matrix(qua[,zm,with=F])); } else { Zm <- array(1,dim=c(n,1)); } zbm <- Zm%*%thopt[1:(1+nbzm)]; ## Regression - Effects on dispersion ## if (nbzd > 0) { Zd <- cbind(array(1,dim=c(n,1)),as.matrix(qua[,zd,with=F])); } else { Zd <- array(1,dim=c(n,1)); } zbd <- Zd%*%thopt[(2+nbzm):(2+nbzm+nbzd)]; #s <- exp(zbd); qua[,qcal:=exp(zbm+exp(zbd)*qnorm(percent))]; ## Left-censored counts ## lce <- don[lb<ub,.(nblce=.N),by=fac]; qua <- merge(qua,lce,by=fac,all=TRUE); qua[is.na(nblce),nblce:=0]; ## Variance of theoretical quantile ## qua[,vqc:=delt(qua,percent,covmat)]; ## Lower and upper bounds of theoretical quantile ## qua[,lbqc:=qcal*exp(-1.96*vqc^0.5)]; qua[,ubqc:=qcal*exp(+1.96*vqc^0.5)]; ## Displaying observed and theoretical quantile ## cat("\n"); cat("Dependent variable : ",labvdep," - Table of observed and theoretical ",percent*100,"%-quantiles:\n",sep=""); #print(qua[, .(Z1,Z2,nbobs,nblce,qobs,qcal,vqc,lbqc,ubqc)]); print(qua[,c(fac,"nbobs","nblce","qobs","qcal","vqc","lbqc","ubqc"),with=F]); }