Commit 2c074ed1 authored by Falipou Eva's avatar Falipou Eva
Browse files

Script Yves

No related merge requests found
Showing with 345 additions and 0 deletions
+345 -0
###############################e#####################################################################
## Modele ln-lineaire generalis ##
## Gestion donnees en DataTable - Optimisation Nelder-Mead en fonction ##
## 2020-01-20 ##
## source("ANC/GlnLM-EffLocDisp-DT-fctNM.R") ##
#####################################################################################################
options(width=100);
ok <-1;
if (ok == 1) {
rm(list = ls());
}
library(data.table);
library(numDeriv);
## Repertoire de travail ##
chem <- "/Users/eva.falipou/Documents/Eva/ANC/ln_linaire/";
#####################################################################################################
## Lecture donnees - Exemple : analyse NTK avec loq=1 ##
#####################################################################################################
ok <-1;
if (ok == 1) {
fin <- paste(chem,"BDD_exemple_ln-lineaire.csv",sep="");
don <- fread(fin,dec=",",sep=";");
don[,y:=NTK]; # choix de la variable dependante
## Libelle variable dependante pour documenter affichages ##
labvdep <- "NTK";
## Left-censoring ##
loq <- 1.0; # LQ correspondant a la variable dependante
don[,lb:=y];
don[y<=loq,lb:=0];
don[,ub:=y];
don[y<=loq,ub:=loq];
## Variables explicatives ##
don[,D0:=ifelse(Dispositif=="D0",1,0)];
don[,D2:=ifelse(Dispositif=="D2",1,0)];
don[,inf2ans:=ifelse(classe_age=="< 2 ans",1,0)];
don[,inf70pct:=ifelse(classe_charge=="< 70%",1,0)];
## Nettoyage ##
don <- don[,.(y,lb,ub,D0,D2,inf2ans,inf70pct)];
don <- na.omit(don);
## Effects on location (median) ##
zm <- c("D0");
#zm <- c();## No effect on location ##
nbzm <- length(zm);
## Effects on dispersion ##
zd <- c();
#zd <- c("D1","D2","inf2ans","inf70pct");## No effect on dispersion ##
nbzd <- length(zd);
}
#####################################################################################################
## Log-Likelihood calculation - Based on log-normal distribution ##
#####################################################################################################
funk <- function(theta) {## theta : parameter vector (beta0,beta1,gamma0,gamma1) ##
n <- don[,.N];
## Regression - Effects on median ##
if (nbzm > 0) {
Zm <- cbind(array(1, dim=c(n,1)), as.matrix(don[,zm,with=F]));
} else {
Zm <- array(1, dim=c(n,1));
}
zbm <- Zm%*%theta[1:(1+nbzm)];
## Regression - Effects on dispersion ##
if (nbzd > 0) {
Zd <- cbind(array(1, dim=c(n,1)), as.matrix(don[,zd,with=F]));
} else {
Zd <- array(1, dim=c(n,1));
}
zbd <- Zd%*%theta[(2+nbzm):(2+nbzm+nbzd)];
s <- exp(zbd);
## Log-Likelihood calculation ##
ll <- 0;
li <- don[, which(lb == 0)];
if (length(li) > 0) {
ll <- sum(log(plnorm(q=don[li, ub],meanlog=zbm[li],sdlog=s[li])));
}
li <- don[, which(lb > 0 & ub > lb)];
if (length(li) > 0) {
Fu <- plnorm(don[li, ub],meanlog=zbm[li],sdlog=s[li]);
Fl <- plnorm(don[li, lb],meanlog=zbm[li],sdlog=s[li]);
ll <- ll + sum(log(Fu - Fl));
}
li <- don[, which(lb > 0 & ub == lb)];
if (length(li) > 0) {
f <- dlnorm(don[li, ub],meanlog=zbm[li],sdlog=s[li]);
ll <- ll + sum(log(f));
}
if(is.na(ll) | is.infinite(ll)) {
ll <- -1.0e200;
## Checking calculation in case of data problem ##
#cat("theta = ", theta, " - ll = ", -ll, "\n");
}
-ll; ## minimize -ll <=> maximize ll ##
}
## Amotry Function ###########################################################################
amotry <- function(p, y, psum, ndim, ihi, fac) {
ptry <- numeric();
fac1 <- (1 - fac) / ndim;
fac2 <- fac1 - fac;
for(j in 1:ndim) ptry[j] <- psum[j] * fac1 - p[ihi,j] * fac2;
ptry <- pmin(pmax(ptry, lowb), uppb); ## Bounds
ytry <- funk(ptry);
if(ytry < y[ihi]) {
y[ihi] <- ytry;
for(j in 1:ndim) {
psum[j] <- psum[j] + ptry[j] - p[ihi,j];
p[ihi,j] <- ptry[j];
}
}
list(ytry, p, y, psum);
}
## Nelder-Mead calibration ##
NMcal <- function(NMpar) {
## Libelle variable dependante ##
labvdep <- NMpar[[1]];
## Libelle parametres ##
labtheta <- NMpar[[2]];
## Nb parametres a estimer ##
nbp <- length(labtheta);
## Valeurs de reference des parametres pour test H0 ##
thref <- NMpar[[3]];
## Valeurs initiales des parametres et bornes ##
theta <- NMpar[[4]];print(theta);
lowb <- NMpar[[5]];
uppb <- NMpar[[6]];
## Pas de recherche initial ##
lbd <- NMpar[[7]];
## Initialisation simplex ####################################################################
ndim <- length(theta);
mpts <- ndim + 1;
p <- array(0, c(mpts, ndim));
p[1,] <- theta;
for(i in 2:mpts) {
p[i,] <- p[1,];
p[i,i-1] <- p[i,i-1] + lbd[i-1];
}
#print(p);
psum <- numeric();
y <- numeric();
yr <- numeric();
for(i in 1:mpts) y[i] <- funk(p[i,]);
ftol <- 1e-8;
maxiter <- 5000;
#maxiter <- 1;
maxcycle <- 0;
tiny <- 1e-10;
##############################################################################################
iter <- 0;
cycle <- 0;
for(j in 1:ndim) psum[j] <- sum(p[,j]);
swap <- numeric();
repeat {
iter <- iter + 1;
if (iter == 1 | round(iter / 100) == iter / 100) {
cat("Iteration", iter, "\n");
print(cbind(p, y));
}
## Rank points of simplex ##################################################################
yr <- rank(y, ties.method="first");
for(i in 1:mpts) {
if(yr[i] == 1) ilo <- i;
if(yr[i] == ndim) inhi <- i;
if(yr[i] == mpts) {ihi <- i;yhi <- y[i];}
}
## Test whether convergence achieved #######################################################
rtol <- 2 * abs(y[ihi] - y[ilo]) / (abs(y[ihi]) + abs(y[ilo]) + tiny);
if(rtol < ftol){
print("precision reached");
converge <- 1;
}
if(iter >= maxiter){
print("maxiter reached");
converge <- 0;
}
if(rtol < ftol || iter >= maxiter) {
swap[1] <- y[1];y[1] <- y[ilo];y[ilo] <- swap[1];
swap <- p[1,];p[1,] <- p[ilo,];p[ilo,] <- swap;
print("Iteration", quote=FALSE);print(iter, quote=FALSE);
print(cbind(p, y));
print("", quote=FALSE);
## Display Calibration Results ########################################################
print("Final", quote=FALSE);print(c(p[1,], y[1]));
## Covariance matrix ##
covmat <- tryCatch(expr={solve(hessian(funk,p[1,]))},error=function(e){hinv <- 0;return(array(NA,dim=c(nbp,nbp)))});
hinv <- ifelse(sum(is.na(covmat))>0,0,1);
cat("Covariance matrix :\n");
print(covmat);
cat("\n");
cat("Dependent variable : ",labvdep,"\n");
## Table of estimates ##
cat(format("Label", width=15));
cat(format("Estimate", width=15, justify="right"));
cat(format("Std. Dev.", width=15, justify="right"));
cat(format("Ref", width=7, justify="right"));
cat(format("Chi2", width=15, justify="right"));
cat(format("DF", width=4, justify="right"));
cat(format("Pval", width=11, justify="right"), "\n");
for (j in 1:length(theta)) {
thtest <- p[1,];
thtest[j] <- thref[j];
#chi2 <- 2 * abs(y[1] - funk(thtest));
#stdev <- abs(p[1,j] - thref[j]) / sqrt(chi2);
stdev <- sqrt(covmat[j,j]);
chi2 <- ((p[1,j] - thref[j])^2/covmat[j,j]);
df <- 1;
pval <- pchisq(chi2, df, lower.tail=FALSE);
cat(format(labtheta[j], width=15));
cat(formatC(p[1,j], digits=4, format="e", width=15));
cat(formatC(stdev, digits=4, format="e", width=15));
cat(format(thref[j], digits=2, width=7));
cat(formatC(chi2, digits=4, format="e", width=15));
cat(format(df, digits=1, width=4));
cat(formatC(pval, digits=4, format="f", width=11), "\n");
}
if (cycle < maxcycle) {
for(i in 2:mpts) {
p[i,] <- p[1,];
p[i,i-1] <- p[i,i-1] + lbd[i-1];
}
for(i in 1:mpts) y[i] <- funk(p[i,]);
iter <- 0;
cycle <- cycle + 1;
}
else break;
}
## New iteration ###########################################################################
lamo <- amotry(p, y, psum, ndim, ihi, -1);
ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]];
##print("Factor -1");
##print(cbind(p, y));
if(ytry <= y[ilo]) {
lamo <- amotry(p, y, psum, ndim, ihi, 2);
ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]];
##print("Factor 2");
##print(cbind(p, y));
}
else if(ytry >= y[inhi]) {
ysave <- y[ihi];
lamo <- amotry(p, y, psum, ndim, ihi, .5);
ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]];
##print("Factor .5");
##print(cbind(p, y));
if(ytry >= ysave) {
for(i in 1:mpts) {
if(i != ilo) {
for(j in 1:ndim) p[i,j] <- 0.5 * (p[i,j] + p[ilo,j]);
y[i] <- funk(p[i,]);
}
}
for(j in 1:ndim) psum[j] <- sum(p[,j]);
}
}
}
list(p[1,],converge,hinv,covmat);
}
#####################################################################################################
## Variance of theoretical percent-quantile ##
#####################################################################################################
delt <- function(tab,percent,covm) {
n <- tab[,.N];
if (nbzm > 0) {
Zm <- cbind(array(1, dim=c(n,1)), as.matrix(tab[,zm,with=F]));
} else {
Zm <- array(1, dim=c(n,1));
}
if (nbzd > 0) {
Zd <- cbind(array(1, dim=c(n,1)), as.matrix(tab[,zd,with=F]));
} else {
Zd <- array(1, dim=c(n,1));
}
zbd <- Zd%*%theta[(2+nbzm):(2+nbzm+nbzd)];
Zmd <- Zm;
for(j in 1:(nbzd+1)) Zmd <- cbind(Zmd, Zd[,j] * exp(zbd) * qnorm(percent));
diag(Zmd%*%covm%*%t(Zmd));
}
###################################################################################################
## Model parameter estimation ##
###################################################################################################
ok <- 1;
if (ok == 1) {
## Libelle parametres ##
labtheta <- c("beta0",zm,"gamma0",zd);
## Nb parametres a estimer ##
nbp <- length(labtheta);
## Valeurs de reference des parametres pour test H0 ##
thref <- rep(0,nbp);
## Valeurs initiales des parametres et bornes ##
theta <- rep(0,nbp);
lowb <- rep(-Inf,nbp);
uppb <- rep(+Inf,nbp);
## Pas de recherche initial ##
lbd <- rep(0.5,nbp);
NMpar <- list(labvdep,labtheta,theta,thref,lowb,uppb,lbd);
calage <- NMcal(NMpar);
thopt <- calage[[1]];
converge <- calage[[2]];
hinv <- calage[[3]];
covmat <- calage[[4]];
#####################################################################################################
## Quantile estimation ##
#####################################################################################################
percent <- 0.5;## Quantile probability ##
## Quantiles are estimated by fac variables ##
fac <- unique(c(zm,zd));
## Empirical (observed) quantiles ##
qua <- don[,.(qobs=quantile(ub,probs=percent,na.rm=T),nbobs=.N),by=fac];
n <- qua[,.N];
## Regression - Effects on median ##
if (nbzm > 0) {
Zm <- cbind(array(1,dim=c(n,1)),as.matrix(qua[,zm,with=F]));
} else {
Zm <- array(1,dim=c(n,1));
}
zbm <- Zm%*%thopt[1:(1+nbzm)];
## Regression - Effects on dispersion ##
if (nbzd > 0) {
Zd <- cbind(array(1,dim=c(n,1)),as.matrix(qua[,zd,with=F]));
} else {
Zd <- array(1,dim=c(n,1));
}
zbd <- Zd%*%thopt[(2+nbzm):(2+nbzm+nbzd)];
#s <- exp(zbd);
qua[,qcal:=exp(zbm+exp(zbd)*qnorm(percent))];
## Left-censored counts ##
lce <- don[lb<ub,.(nblce=.N),by=fac];
qua <- merge(qua,lce,by=fac,all=TRUE);
qua[is.na(nblce),nblce:=0];
## Variance of theoretical quantile ##
qua[,vqc:=delt(qua,percent,covmat)];
## Lower and upper bounds of theoretical quantile ##
qua[,lbqc:=qcal*exp(-1.96*vqc^0.5)];
qua[,ubqc:=qcal*exp(+1.96*vqc^0.5)];
## Displaying observed and theoretical quantile ##
cat("\n");
cat("Dependent variable : ",labvdep," - Table of observed and theoretical ",percent*100,"%-quantiles:\n",sep="");
#print(qua[, .(Z1,Z2,nbobs,nblce,qobs,qcal,vqc,lbqc,ubqc)]);
print(qua[,c(fac,"nbobs","nblce","qobs","qcal","vqc","lbqc","ubqc"),with=F]);
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment