Script Yves

2c074ed1 · Falipou Eva · d1d12e3e · 2c074ed1
Commit 2c074ed1 authored 4 years ago by Falipou Eva
Hide whitespace changes
Inline Side-by-side

Showing

with 345 additions and 0 deletions
+345 -0
--- a/GlnLM-EffLocDisp-DT-fctNM-01_2021.R
+++ b/GlnLM-EffLocDisp-DT-fctNM-01_2021.R
+###############################e#####################################################################
+## Modele ln-lineaire generalis                                                                   ##
+## Gestion donnees en DataTable - Optimisation Nelder-Mead en fonction                             ##
+## 2020-01-20                                                                                      ##
+## source("ANC/GlnLM-EffLocDisp-DT-fctNM.R")                                                       ##
+#####################################################################################################
+options(width=100);
+ok <-1;
+if (ok == 1) {
+  rm(list = ls());
+}
+library(data.table);
+library(numDeriv);
+## Repertoire de travail ##
+chem <- "/Users/eva.falipou/Documents/Eva/ANC/ln_linaire/";
+#####################################################################################################
+## Lecture donnees - Exemple : analyse NTK avec loq=1                                              ##
+#####################################################################################################
+ok <-1;
+if (ok == 1) {
+  fin <- paste(chem,"BDD_exemple_ln-lineaire.csv",sep="");
+  don <- fread(fin,dec=",",sep=";");
+  don[,y:=NTK]; # choix de la variable dependante
+  ## Libelle variable dependante pour documenter affichages ##
+  labvdep <- "NTK";
+  ## Left-censoring ##
+  loq <- 1.0; # LQ correspondant a la variable dependante 
+  don[,lb:=y];
+  don[y<=loq,lb:=0];
+  don[,ub:=y];
+  don[y<=loq,ub:=loq];
+  ## Variables explicatives ##
+  don[,D0:=ifelse(Dispositif=="D0",1,0)];
+  don[,D2:=ifelse(Dispositif=="D2",1,0)];
+  don[,inf2ans:=ifelse(classe_age=="< 2 ans",1,0)];
+  don[,inf70pct:=ifelse(classe_charge=="< 70%",1,0)];
+  ## Nettoyage ##
+  don <- don[,.(y,lb,ub,D0,D2,inf2ans,inf70pct)];
+  don <- na.omit(don);
+  ## Effects on location (median) ##
+  zm <- c("D0");
+  #zm <- c();## No effect on location ##
+  nbzm <- length(zm);
+  ## Effects on dispersion ##
+  zd <- c();
+  #zd <- c("D1","D2","inf2ans","inf70pct");## No effect on dispersion ##
+  nbzd <- length(zd);
+}
+#####################################################################################################
+## Log-Likelihood calculation - Based on log-normal distribution                                   ##
+#####################################################################################################
+funk <- function(theta) {## theta : parameter vector (beta0,beta1,gamma0,gamma1) ##
+  n <- don[,.N];
+  ## Regression - Effects on median ##
+  if (nbzm > 0) {
+    Zm <- cbind(array(1, dim=c(n,1)), as.matrix(don[,zm,with=F]));
+  } else {
+    Zm <- array(1, dim=c(n,1));
+  }
+  zbm <- Zm%*%theta[1:(1+nbzm)];
+  ## Regression - Effects on dispersion ##
+  if (nbzd > 0) {
+    Zd <- cbind(array(1, dim=c(n,1)), as.matrix(don[,zd,with=F]));
+  } else {
+    Zd <- array(1, dim=c(n,1));
+  }
+  zbd <- Zd%*%theta[(2+nbzm):(2+nbzm+nbzd)];
+  s <- exp(zbd);
+  ## Log-Likelihood calculation ##
+  ll <- 0;
+  li <- don[, which(lb == 0)];
+  if (length(li) > 0) {
+    ll <- sum(log(plnorm(q=don[li, ub],meanlog=zbm[li],sdlog=s[li])));
+  }
+  li <- don[, which(lb > 0 & ub > lb)];
+  if (length(li) > 0) {
+    Fu <- plnorm(don[li, ub],meanlog=zbm[li],sdlog=s[li]);
+    Fl <- plnorm(don[li, lb],meanlog=zbm[li],sdlog=s[li]);
+    ll <- ll + sum(log(Fu - Fl));
+  }
+  li <- don[, which(lb > 0 & ub == lb)];
+  if (length(li) > 0) {
+    f <- dlnorm(don[li, ub],meanlog=zbm[li],sdlog=s[li]);
+    ll <- ll + sum(log(f));
+  }
+  if(is.na(ll) | is.infinite(ll)) {
+    ll <- -1.0e200;
+    ## Checking calculation in case of data problem ##
+    #cat("theta = ", theta, " - ll = ", -ll, "\n");
+  }
+  -ll; ## minimize -ll <=> maximize ll ##
+}
+## Amotry Function ###########################################################################
+amotry <- function(p, y, psum, ndim, ihi, fac) {
+  ptry <- numeric();
+  fac1 <- (1 - fac) / ndim;
+  fac2 <- fac1 - fac;
+  for(j in 1:ndim) ptry[j] <- psum[j] * fac1 - p[ihi,j] * fac2;
+  ptry <- pmin(pmax(ptry, lowb), uppb); ## Bounds
+  ytry <- funk(ptry);
+  if(ytry < y[ihi]) {
+    y[ihi] <- ytry;
+    for(j in 1:ndim) {
+      psum[j] <- psum[j] + ptry[j] - p[ihi,j];
+      p[ihi,j] <- ptry[j];
+    }
+  }
+  list(ytry, p, y, psum);
+}
+## Nelder-Mead calibration ##
+NMcal <- function(NMpar) {
+  ## Libelle variable dependante ##
+  labvdep <- NMpar[[1]];
+  ## Libelle parametres ##
+  labtheta <- NMpar[[2]];
+  ## Nb parametres a estimer ##
+  nbp <- length(labtheta);
+  ## Valeurs de reference des parametres pour test H0 ##
+  thref <- NMpar[[3]];
+  ## Valeurs initiales des parametres et bornes ##
+  theta <- NMpar[[4]];print(theta);
+  lowb <- NMpar[[5]];
+  uppb <- NMpar[[6]];
+  ## Pas de recherche initial ##
+  lbd <- NMpar[[7]];
+  ## Initialisation simplex ####################################################################
+  ndim <- length(theta);
+  mpts <- ndim + 1;
+  p <- array(0, c(mpts, ndim));
+  p[1,] <- theta;
+  for(i in 2:mpts) {
+    p[i,] <- p[1,];
+    p[i,i-1] <- p[i,i-1] + lbd[i-1];
+  }
+  #print(p);
+  psum <- numeric();
+  y <- numeric();
+  yr <- numeric();
+  for(i in 1:mpts) y[i] <- funk(p[i,]);
+  ftol <- 1e-8;
+  maxiter <- 5000;
+  #maxiter <- 1;
+  maxcycle <- 0;
+  tiny <- 1e-10;
+  ##############################################################################################
+  iter <- 0;
+  cycle <- 0;
+  for(j in 1:ndim) psum[j] <- sum(p[,j]);
+  swap <- numeric();
+  repeat {
+    iter <- iter + 1;
+    if (iter == 1 | round(iter / 100) == iter / 100) {
+      cat("Iteration", iter, "\n");
+      print(cbind(p, y));
+    }
+    ## Rank points of simplex ##################################################################
+    yr <- rank(y, ties.method="first");
+    for(i in 1:mpts) {
+      if(yr[i] == 1) ilo <- i;
+      if(yr[i] == ndim) inhi <- i;
+      if(yr[i] == mpts) {ihi <- i;yhi <- y[i];}
+    }
+    ## Test whether convergence achieved #######################################################
+    rtol <- 2 * abs(y[ihi] - y[ilo]) / (abs(y[ihi]) + abs(y[ilo]) + tiny);
+    if(rtol < ftol){
+      print("precision reached");
+      converge <- 1;
+    }
+    if(iter >= maxiter){ 
+      print("maxiter reached");
+      converge <- 0;
+    }
+    if(rtol < ftol || iter >= maxiter) {
+      swap[1] <- y[1];y[1] <- y[ilo];y[ilo] <- swap[1];
+      swap <- p[1,];p[1,] <- p[ilo,];p[ilo,] <- swap;
+      print("Iteration", quote=FALSE);print(iter, quote=FALSE);
+      print(cbind(p, y));
+      print("", quote=FALSE);
+      ## Display Calibration Results ########################################################
+      print("Final", quote=FALSE);print(c(p[1,], y[1]));
+      ## Covariance matrix ##
+      covmat <- tryCatch(expr={solve(hessian(funk,p[1,]))},error=function(e){hinv <- 0;return(array(NA,dim=c(nbp,nbp)))});
+      hinv <- ifelse(sum(is.na(covmat))>0,0,1);
+      cat("Covariance matrix :\n");
+      print(covmat);
+      cat("\n");
+      cat("Dependent variable : ",labvdep,"\n");
+      ## Table of estimates ##
+      cat(format("Label", width=15));
+      cat(format("Estimate", width=15, justify="right"));
+      cat(format("Std. Dev.", width=15, justify="right"));
+      cat(format("Ref", width=7, justify="right"));
+      cat(format("Chi2", width=15, justify="right"));
+      cat(format("DF", width=4, justify="right"));
+      cat(format("Pval", width=11, justify="right"), "\n");
+      for (j in 1:length(theta)) {
+        thtest <- p[1,];
+        thtest[j] <- thref[j];
+        #chi2 <- 2 * abs(y[1] - funk(thtest));
+        #stdev <- abs(p[1,j] - thref[j]) / sqrt(chi2);
+        stdev <- sqrt(covmat[j,j]);
+        chi2 <- ((p[1,j] - thref[j])^2/covmat[j,j]);
+        df <- 1;
+        pval <- pchisq(chi2, df, lower.tail=FALSE);
+        cat(format(labtheta[j], width=15));
+        cat(formatC(p[1,j], digits=4, format="e", width=15));
+        cat(formatC(stdev, digits=4, format="e", width=15));
+        cat(format(thref[j], digits=2, width=7));
+        cat(formatC(chi2, digits=4, format="e", width=15));
+        cat(format(df, digits=1, width=4));
+        cat(formatC(pval, digits=4, format="f", width=11), "\n");
+      }
+      if (cycle < maxcycle) {
+        for(i in 2:mpts) {
+          p[i,] <- p[1,];
+          p[i,i-1] <- p[i,i-1] + lbd[i-1];
+        }
+        for(i in 1:mpts) y[i] <- funk(p[i,]);
+        iter <- 0;
+        cycle <- cycle + 1;
+      }
+      else break;
+    }
+    ## New iteration ###########################################################################
+    lamo <- amotry(p, y, psum, ndim, ihi, -1);
+    ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]];
+    ##print("Factor -1");
+    ##print(cbind(p, y));
+    if(ytry <= y[ilo]) {
+      lamo <- amotry(p, y, psum, ndim, ihi, 2);
+      ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]];
+      ##print("Factor 2");
+      ##print(cbind(p, y));
+    }
+    else if(ytry >= y[inhi]) {
+      ysave <- y[ihi];
+      lamo <- amotry(p, y, psum, ndim, ihi, .5);
+      ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]];
+      ##print("Factor .5");
+      ##print(cbind(p, y));
+      if(ytry >= ysave) {
+        for(i in 1:mpts) {
+          if(i != ilo) {
+            for(j in 1:ndim) p[i,j] <- 0.5 * (p[i,j] + p[ilo,j]);
+            y[i] <- funk(p[i,]);
+          }
+        }
+        for(j in 1:ndim) psum[j] <- sum(p[,j]);
+      }
+    }
+  }
+  list(p[1,],converge,hinv,covmat);
+}
+#####################################################################################################
+## Variance of theoretical percent-quantile                                                          ##
+#####################################################################################################
+delt <- function(tab,percent,covm) {
+  n <- tab[,.N];
+  if (nbzm > 0) {
+    Zm <- cbind(array(1, dim=c(n,1)), as.matrix(tab[,zm,with=F]));
+  } else {
+    Zm <- array(1, dim=c(n,1));
+  }
+  if (nbzd > 0) {
+    Zd <- cbind(array(1, dim=c(n,1)), as.matrix(tab[,zd,with=F]));
+  } else {
+    Zd <- array(1, dim=c(n,1));
+  }
+  zbd <- Zd%*%theta[(2+nbzm):(2+nbzm+nbzd)];
+  Zmd <- Zm;
+  for(j in 1:(nbzd+1)) Zmd <- cbind(Zmd, Zd[,j] * exp(zbd) * qnorm(percent));
+  diag(Zmd%*%covm%*%t(Zmd));
+}
+###################################################################################################
+## Model parameter estimation                                                                    ##
+###################################################################################################
+ok <- 1;
+if (ok == 1) {
+  ## Libelle parametres ##
+  labtheta <- c("beta0",zm,"gamma0",zd);
+  ## Nb parametres a estimer ##
+  nbp <- length(labtheta);
+  ## Valeurs de reference des parametres pour test H0 ##
+  thref <- rep(0,nbp);
+  ## Valeurs initiales des parametres et bornes ##
+  theta <- rep(0,nbp);
+  lowb <-  rep(-Inf,nbp);
+  uppb <-  rep(+Inf,nbp);
+  ## Pas de recherche initial ##
+  lbd <- rep(0.5,nbp);
+  NMpar <- list(labvdep,labtheta,theta,thref,lowb,uppb,lbd);
+  calage <- NMcal(NMpar);
+  thopt <- calage[[1]];
+  converge <- calage[[2]];
+  hinv <- calage[[3]];
+  covmat <- calage[[4]];
+  #####################################################################################################
+  ## Quantile estimation                                                                             ##
+  #####################################################################################################
+  percent <- 0.5;## Quantile probability ##
+  ## Quantiles are estimated by fac variables ##
+  fac <- unique(c(zm,zd));
+  ## Empirical (observed) quantiles ##
+  qua <- don[,.(qobs=quantile(ub,probs=percent,na.rm=T),nbobs=.N),by=fac];
+  n <- qua[,.N];
+  ## Regression - Effects on median ##
+  if (nbzm > 0) {
+    Zm <- cbind(array(1,dim=c(n,1)),as.matrix(qua[,zm,with=F]));
+  } else {
+    Zm <- array(1,dim=c(n,1));
+  }
+  zbm <- Zm%*%thopt[1:(1+nbzm)];
+  ## Regression - Effects on dispersion ##
+  if (nbzd > 0) {
+    Zd <- cbind(array(1,dim=c(n,1)),as.matrix(qua[,zd,with=F]));
+  } else {
+    Zd <- array(1,dim=c(n,1));
+  }
+  zbd <- Zd%*%thopt[(2+nbzm):(2+nbzm+nbzd)];
+  #s <- exp(zbd);
+  qua[,qcal:=exp(zbm+exp(zbd)*qnorm(percent))];
+  ## Left-censored counts ##
+  lce <- don[lb<ub,.(nblce=.N),by=fac];
+  qua <- merge(qua,lce,by=fac,all=TRUE);
+  qua[is.na(nblce),nblce:=0];
+  ## Variance of theoretical quantile ##
+  qua[,vqc:=delt(qua,percent,covmat)];
+  ## Lower and upper bounds of theoretical quantile ##
+  qua[,lbqc:=qcal*exp(-1.96*vqc^0.5)];
+  qua[,ubqc:=qcal*exp(+1.96*vqc^0.5)];
+  ## Displaying observed and theoretical quantile ##
+  cat("\n");
+  cat("Dependent variable : ",labvdep," - Table of observed and theoretical ",percent*100,"%-quantiles:\n",sep="");
+  #print(qua[, .(Z1,Z2,nbobs,nblce,qobs,qcal,vqc,lbqc,ubqc)]);
+  print(qua[,c(fac,"nbobs","nblce","qobs","qcal","vqc","lbqc","ubqc"),with=F]);
+}