diff --git a/generalized ln-linear.R b/generalized ln-linear.R new file mode 100644 index 0000000000000000000000000000000000000000..734d05297c305828dcb0d21d5a5f984e60576482 --- /dev/null +++ b/generalized ln-linear.R @@ -0,0 +1,571 @@ + +library(shiny) +library(dplyr) +library(rhandsontable) +library(data.table) +library(numDeriv) + +# UI +ui <- fluidPage( + + # App title ---- + titlePanel("Generalized ln-linear"), + + sidebarLayout( + # Sidebar panel for inputs ---- + sidebarPanel( + + h3("Create data table"), + + # Input: ---- + fileInput("file1", "Choose csv file with semicolon separator", + multiple = FALSE, + accept = c("text/csv", + "text/comma-separated-values,text/plain", + ".csv")), + + uiOutput("var_ui"), + + #selectInput("var","Select parameter:",choices=c("MES"="MES","DCO"="DCO","DBO5"="DBO5","NK"="NK","NNH4"="NNH4","NNO3"="NNO3")), + + numericInput("LQ", "Quantification limit of the selected parameter", min=0, value=1), + + numericInput("nbcov", "Number of explanatory variable modalities", min=1, value=2, max=10), + + rHandsontableOutput("tabEdit"), # affichage de la table a editer + + actionButton("go", "Create"), + + br(), + br(), + br(), + + h3("Effects on median"), + uiOutput("choose_median"), + + h3("Effects on dispersion"), + uiOutput("choose_disp"), + + h3("Choose quantile (%)"), + numericInput("alpha", "Quantile", min=1, value=50, max=100), + + actionButton("run", "Show results")), + + + # Main panel for displaying outputs ---- + mainPanel( + + tabsetPanel(type="tabs", + tabPanel("Tutorial", + h3("How to use the Generalized ln-linear Shiny App"), + br(), + p(strong("> Select the csv file containing the data to be analyzed")), + p("It has to be a csv file with semicolon separator"), + br(), + p(strong("> Choose the dependent variable")), + p("For example TSS"), + br(), + p(strong("> State the value of the corresponding quantification limit")), + p("For example 2.0 (mg/L) for TSS"), + br(), + p(strong("> Indicate the number of explanatory variable modalities to be investigated")), + p("Other than those corresponding to the reference set"), + br(), + p(strong("> Complete the table describing each explanatory variable modality with the following information:")), + p("- the label of the modality that you want to see in the different results tables",em("(for example 'inf2ans')")), + p("- the column header in your csv file corresponding to the explanatory variable",em("(for example 'classe_age')")), + p("- the expression of the modality inside this column in your csv file",em("(for example '< 2 ans')")), + br(), + p(strong("> Click on the 'Create' button and then check in the 'Data Table' panel if the table is well-built")), + br(), + p(strong("> Choose the modalities to consider in order to evaluate their effets on median and dispersion")), + br(), + p(strong("> Choose the quantile you want to be returned by the model")), + br(), + p(strong("> Lauch the model by clicking on the 'Show results' button. The table of estimates and the table of quantiles appear in the 'Results' panel.")) + ), + + tabPanel("Data Table", + textOutput("lab_don"), + tableOutput("don")), + + tabPanel("Results", + + h3("> Table of estimates"), + textOutput("lab_res"), + dataTableOutput("test"), + + h3("> Quantiles"), + textOutput("lab_quant"), + dataTableOutput("quantile")) + ) + ) + ) +) + + +ln_lineaire <- function(labvdep,don,zm,zd,alpha){ + + nbzm <- length(zm); + nbzd <- length(zd); + outputs <- list() + + ##################################################################################################### + ## Log-Likelihood calculation - Based on log-normal distribution ## + ##################################################################################################### + funk <- function(theta) {## theta : parameter vector (beta0,beta1,gamma0,gamma1) ## + n <- don[,.N]; + ## Regression - Effects on median ## + if (nbzm > 0) { + Zm <- cbind(array(1, dim=c(n,1)), as.matrix(don[,zm,with=F])); + } else { + Zm <- array(1, dim=c(n,1)); + } + zbm <- Zm%*%theta[1:(1+nbzm)]; + ## Regression - Effects on dispersion ## + if (nbzd > 0) { + Zd <- cbind(array(1, dim=c(n,1)), as.matrix(don[,zd,with=F])); + } else { + Zd <- array(1, dim=c(n,1)); + } + zbd <- Zd%*%theta[(2+nbzm):(2+nbzm+nbzd)]; + s <- exp(zbd); + ## Log-Likelihood calculation ## + ll <- 0; + li <- don[, which(lb == 0)]; + if (length(li) > 0) { + ll <- sum(log(plnorm(q=don[li, ub],meanlog=zbm[li],sdlog=s[li]))); + } + li <- don[, which(lb > 0 & ub > lb)]; + if (length(li) > 0) { + Fu <- plnorm(don[li, ub],meanlog=zbm[li],sdlog=s[li]); + Fl <- plnorm(don[li, lb],meanlog=zbm[li],sdlog=s[li]); + ll <- ll + sum(log(Fu - Fl)); + } + li <- don[, which(lb > 0 & ub == lb)]; + if (length(li) > 0) { + f <- dlnorm(don[li, ub],meanlog=zbm[li],sdlog=s[li]); + ll <- ll + sum(log(f)); + } + if(is.na(ll) | is.infinite(ll)) { + ll <- -1.0e200; + ## Checking calculation in case of data problem ## + #cat("theta = ", theta, " - ll = ", -ll, "\n"); + } + -ll; ## minimize -ll <=> maximize ll ## + } + + ## Amotry Function ########################################################################### + amotry <- function(p, y, psum, ndim, ihi, fac) { + ptry <- numeric(); + fac1 <- (1 - fac) / ndim; + fac2 <- fac1 - fac; + for(j in 1:ndim) ptry[j] <- psum[j] * fac1 - p[ihi,j] * fac2; + ptry <- pmin(pmax(ptry, lowb), uppb); ## Bounds + ytry <- funk(ptry); + if(ytry < y[ihi]) { + y[ihi] <- ytry; + for(j in 1:ndim) { + psum[j] <- psum[j] + ptry[j] - p[ihi,j]; + p[ihi,j] <- ptry[j]; + } + } + list(ytry, p, y, psum); + } + + ## Nelder-Mead calibration ## + NMcal <- function(NMpar) { + ## Libelle variable dependante ## + labvdep <- NMpar[[1]]; + ## Libelle parametres ## + labtheta <- NMpar[[2]]; + ## Nb parametres a estimer ## + nbp <- length(labtheta); + ## Valeurs de reference des parametres pour test H0 ## + thref <- NMpar[[3]]; + ## Valeurs initiales des parametres et bornes ## + theta <- NMpar[[4]]; + #print(theta); + lowb <- NMpar[[5]]; + uppb <- NMpar[[6]]; + ## Pas de recherche initial ## + lbd <- NMpar[[7]]; + ## Initialisation simplex #################################################################### + ndim <- length(theta); + mpts <- ndim + 1; + p <- array(0, c(mpts, ndim)); + p[1,] <- theta; + for(i in 2:mpts) { + p[i,] <- p[1,]; + p[i,i-1] <- p[i,i-1] + lbd[i-1]; + } + #print(p); + psum <- numeric(); + y <- numeric(); + yr <- numeric(); + for(i in 1:mpts) y[i] <- funk(p[i,]); + ftol <- 1e-8; + maxiter <- 5000; + #maxiter <- 1; + maxcycle <- 0; + tiny <- 1e-10; + ############################################################################################## + iter <- 0; + cycle <- 0; + for(j in 1:ndim) psum[j] <- sum(p[,j]); + swap <- numeric(); + repeat { + iter <- iter + 1; + # if (iter == 1 | round(iter / 100) == iter / 100) { + # cat("Iteration", iter, "\n"); + # print(cbind(p, y)); + # } + ## Rank points of simplex ################################################################## + yr <- rank(y, ties.method="first"); + for(i in 1:mpts) { + if(yr[i] == 1) ilo <- i; + if(yr[i] == ndim) inhi <- i; + if(yr[i] == mpts) {ihi <- i;yhi <- y[i];} + } + ## Test whether convergence achieved ####################################################### + rtol <- 2 * abs(y[ihi] - y[ilo]) / (abs(y[ihi]) + abs(y[ilo]) + tiny); + if(rtol < ftol){ + print("precision reached"); + converge <- 1; + } + if(iter >= maxiter){ + print("maxiter reached"); + converge <- 0; + } + if(rtol < ftol || iter >= maxiter) { + swap[1] <- y[1];y[1] <- y[ilo];y[ilo] <- swap[1]; + swap <- p[1,];p[1,] <- p[ilo,];p[ilo,] <- swap; + # print("Iteration", quote=FALSE);print(iter, quote=FALSE); + # print(cbind(p, y)); + # print("", quote=FALSE); + + ## Display Calibration Results ######################################################## + #print("Final", quote=FALSE);print(c(p[1,], y[1])); + ## Covariance matrix ## + covmat <- tryCatch(expr={solve(hessian(funk,p[1,]))},error=function(e){hinv <- 0;return(array(NA,dim=c(nbp,nbp)))}); + hinv <- ifelse(sum(is.na(covmat))>0,0,1); + + ## Table of estimates ## + result <- data.frame(Label=character(0),Estimate=numeric(0), Std_Dev=numeric(0),Ref=numeric(0),Chi2=numeric(),DF=numeric(0),Pval=numeric(0)) + + for (j in 1:length(theta)) { + thtest <- p[1,]; + thtest[j] <- thref[j]; + #chi2 <- 2 * abs(y[1] - funk(thtest)); + #stdev <- abs(p[1,j] - thref[j]) / sqrt(chi2); + stdev <- sqrt(covmat[j,j]); + chi2 <- ((p[1,j] - thref[j])^2/covmat[j,j]); + df <- 1; + pval <- pchisq(chi2, df, lower.tail=FALSE); + + new <- list(Label=labtheta[j], + Estimate=formatC(p[1,j], digits=4, format="e"), + Std_Dev=formatC(stdev, digits=4, format="e"), + Ref=thref[j], + Chi2=formatC(chi2, digits=4, format="e"), + DF=df, + Pval=formatC(pval,digits=4,format="f")) + + result <- rbind(result, new, stringsAsFactors=FALSE) + } + + if (cycle < maxcycle) { + for(i in 2:mpts) { + p[i,] <- p[1,]; + p[i,i-1] <- p[i,i-1] + lbd[i-1]; + } + for(i in 1:mpts) y[i] <- funk(p[i,]); + iter <- 0; + cycle <- cycle + 1; + } + else break; + } + ## New iteration ########################################################################### + lamo <- amotry(p, y, psum, ndim, ihi, -1); + ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]]; + ##print("Factor -1"); + ##print(cbind(p, y)); + if(ytry <= y[ilo]) { + lamo <- amotry(p, y, psum, ndim, ihi, 2); + ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]]; + ##print("Factor 2"); + ##print(cbind(p, y)); + } + else if(ytry >= y[inhi]) { + ysave <- y[ihi]; + lamo <- amotry(p, y, psum, ndim, ihi, .5); + ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]]; + ##print("Factor .5"); + ##print(cbind(p, y)); + if(ytry >= ysave) { + for(i in 1:mpts) { + if(i != ilo) { + for(j in 1:ndim) p[i,j] <- 0.5 * (p[i,j] + p[ilo,j]); + y[i] <- funk(p[i,]); + } + } + for(j in 1:ndim) psum[j] <- sum(p[,j]); + } + } + } + list(p[1,],converge,hinv,covmat,result); + } + + ##################################################################################################### + ## Variance of theoretical alpha-quantile ## + ##################################################################################################### + delt <- function(tab,alpha,covm) { + n <- tab[,.N]; + if (nbzm > 0) { + Zm <- cbind(array(1, dim=c(n,1)), as.matrix(tab[,zm,with=F])); + } else { + Zm <- array(1, dim=c(n,1)); + } + if (nbzd > 0) { + Zd <- cbind(array(1, dim=c(n,1)), as.matrix(tab[,zd,with=F])); + } else { + Zd <- array(1, dim=c(n,1)); + } + zbd <- Zd%*%theta[(2+nbzm):(2+nbzm+nbzd)]; + Zmd <- Zm; + for(j in 1:(nbzd+1)) Zmd <- cbind(Zmd, Zd[,j] * exp(zbd) * qnorm(alpha)); + diag(Zmd%*%covm%*%t(Zmd)); + } + + ################################################################################################### + ## Model parameter estimation ## + ################################################################################################### + + ## Libelle parametres ## + labtheta <- c("beta0",zm,"gamma0",zd); + ## Nb parametres a estimer ## + nbp <- length(labtheta); + ## Valeurs de reference des parametres pour test H0 ## + thref <- rep(0,nbp); + ## Valeurs initiales des parametres et bornes ## + theta <- rep(0,nbp); + lowb <- rep(-Inf,nbp); + uppb <- rep(+Inf,nbp); + ## Pas de recherche initial ## + lbd <- rep(0.5,nbp); + NMpar <- list(labvdep,labtheta,theta,thref,lowb,uppb,lbd); + calage <- NMcal(NMpar); + thopt <- calage[[1]]; + converge <- calage[[2]]; + hinv <- calage[[3]]; + covmat <- calage[[4]]; + result <- calage[[5]]; + + outputs[[1]] <- result + + ##################################################################################################### + ## Quantile estimation ## + ##################################################################################################### + + ## Quantiles are estimated by fac variables ## + fac <- unique(c(zm,zd)); + ## Empirical (observed) quantiles ## + qua <- don[,.(qobs=quantile(ub,probs=alpha,na.rm=T),nbobs=.N),by=fac]; + n <- qua[,.N]; + ## Regression - Effects on median ## + if (nbzm > 0) { + Zm <- cbind(array(1,dim=c(n,1)),as.matrix(qua[,zm,with=F])); + } else { + Zm <- array(1,dim=c(n,1)); + } + zbm <- Zm%*%thopt[1:(1+nbzm)]; + ## Regression - Effects on dispersion ## + if (nbzd > 0) { + Zd <- cbind(array(1,dim=c(n,1)),as.matrix(qua[,zd,with=F])); + } else { + Zd <- array(1,dim=c(n,1)); + } + zbd <- Zd%*%thopt[(2+nbzm):(2+nbzm+nbzd)]; + #s <- exp(zbd); + qua[,qcal:=exp(zbm+exp(zbd)*qnorm(alpha))]; + ## Left-censored counts ## + lce <- don[lb<ub,.(nblce=.N),by=fac]; + qua <- merge(qua,lce,by=fac,all=TRUE); + qua[is.na(nblce),nblce:=0]; + ## Variance of theoretical quantile ## + qua[,vqc:=delt(qua,alpha,covmat)]; + ## Lower and upper bounds of theoretical quantile ## + qua[,lbqc:=qcal*exp(-1.96*vqc^0.5)]; + qua[,ubqc:=qcal*exp(+1.96*vqc^0.5)]; + + ## Displaying observed and theoretical quantile ## + qua <- qua[,c(fac,"nbobs","nblce","qobs","qcal","vqc","lbqc","ubqc"),with=F] + qua$qcal <- round(qua$qcal, digits = 3) + qua[,(ncol(qua)-2):ncol(qua)] <- round(qua[,(ncol(qua)-2):ncol(qua)],digits=4) + + outputs[[2]] <- qua + + return(outputs) +} + + +# server +server <- function(input, output) { + + # Importation du fichier de donnees + data <- reactive({ + req(input$file1) + tryCatch( + { + df <- read.csv(input$file1$datapath, + header = TRUE, + sep = ";", dec = ",") + }, + error = function(e) { + # return a safeError if a parsing error occurs + stop(safeError(e)) + } + ) + }) + + output$var_ui <- renderUI({ + selectInput("var","Select parameter", choices=names(data())) + }) + + # Selection covariables + values <- reactiveValues(mat=NULL, don=NULL) + observe({ + if (!is.null(input$tabEdit)) + values$mat <- hot_to_r(input$tabEdit) + }) + + observe({ + nb<-input$nbcov + tab <- data.frame(matrix(NA_character_, nrow=nb, ncol=3)) + colnames(tab) <- c("Label","Column header","Expression") + rownames(tab) <- paste("mod", 1:nb) + values$mat<-tab + }) + + output$tabEdit <- renderRHandsontable({ + rhandsontable(values$mat, selectCallback = TRUE) + }) + + ## Creation de la DataTable don + + # don <- eventReactive(input$go, { + # + # dt <- cbind(data(),y=data()[input$var]) + # colnames(dt)[ncol(dt)] <- "y" + # + # # Left-censoring + # loq <- input$LQ + # + # dt <- cbind(dt,lb=data()[input$var]) + # colnames(dt)[ncol(dt)] <- "lb" + # dt$lb[dt$y <= loq] <- 0 + # + # dt <- cbind(dt,ub=data()[input$var]) + # colnames(dt)[ncol(dt)] <- "ub" + # dt$ub[dt$y <= loq] <- loq + # + # ## Covariates ## + # dt <- as.data.table(dt) + # for (i in 1:input$nbcov){ + # dt[,values$mat[i,1]:=ifelse(get(values$mat[i,2])==values$mat[i,3],1,0)] + # } + # + # ## Nettoyage ## + # dt <- select(dt, -1:-(grep("y",colnames(dt))-1)) + # dt <- na.omit(dt) + # + # # Effects on location (median) and dispersion + # output$choose_median <- renderUI({ + # checkboxGroupInput("median","Effects on median", choices = values$mat[,1],selected = values$mat[,1]) + # }) + # + # output$choose_disp <- renderUI({ + # checkboxGroupInput("disp","Effects on dispersion", choices = values$mat[,1],selected = values$mat[,1]) + # }) + # + # output$lab_don <- renderText({ + # paste("Parameter: ",isolate(input$var),"\n") + # }) + # + # dt + # }) + + observeEvent(input$go, { + + don <- cbind(data(),y=data()[input$var]) + colnames(don)[ncol(don)] <- "y" + + # Left-censoring + loq <- input$LQ + + don <- cbind(don,lb=data()[input$var]) + colnames(don)[ncol(don)] <- "lb" + don$lb[don$y <= loq] <- 0 + + don <- cbind(don,ub=data()[input$var]) + colnames(don)[ncol(don)] <- "ub" + don$ub[don$y <= loq] <- loq + + ## Covariates ## + don <- as.data.table(don) + for (i in 1:input$nbcov){ + don[,values$mat[i,1]:=ifelse(get(values$mat[i,2])==values$mat[i,3],1,0)] + } + + ## Nettoyage ## + don <- select(don, -1:-(grep("y",colnames(don))-1)) + don <- na.omit(don) + + values$don <- don + + + # Effects on location (median) and dispersion + output$choose_median <- renderUI({ + checkboxGroupInput("median","Effects on median", choices = values$mat[,1],selected = values$mat[,1]) + }) + + output$choose_disp <- renderUI({ + checkboxGroupInput("disp","Effects on dispersion", choices = values$mat[,1],selected = values$mat[,1]) + }) + + output$lab_don <- renderText({ + paste("Parameter: ",isolate(input$var),"\n") + }) + + }) + + + #output$don <- renderTable(don()) #verification don + output$don <- renderTable(values$don) #verification don + + ## Lancement modele + observeEvent(input$run,{ + + #outputs <- ln_lineaire(labvdep=input$var,don=don(),zm=input$median,zd=input$disp,alpha=input$alpha/100) + outputs <- ln_lineaire(labvdep=input$var,don=values$don,zm=input$median,zd=input$disp,alpha=input$alpha/100) + + # return table of estimates + output$lab_res <- renderText({ + paste("Parameter: ",isolate(input$var),"\n") + }) + output$test <- renderDataTable(outputs[[1]], options = list(dom = 't', searching = FALSE) + ) + + # return table of quantiles + output$lab_quant <- renderText({ + paste("Dependent variable: ",isolate(input$var)," - Table of observed and theoretical ",input$alpha,"% quantiles:\n") + }) + output$quantile <- renderDataTable(outputs[[2]], options = list(dom = 't', searching = FALSE)) + + }) + +} + +# Create Shiny app ---- +shinyApp(ui, server) +