Ajout de la première version du code

b2fe7bf2 · Falipou Eva · 91f67c2a · b2fe7bf2
Commit b2fe7bf2 authored 4 years ago by Falipou Eva
Hide whitespace changes
Inline Side-by-side

Showing

with 571 additions and 0 deletions
+571 -0
--- a/generalized ln-linear.R
+++ b/generalized ln-linear.R
+
+library(shiny)
+library(dplyr)
+library(rhandsontable)
+library(data.table)
+library(numDeriv)
+
+# UI
+ui <- fluidPage(
+  
+  # App title ----
+  titlePanel("Generalized ln-linear"),
+  
+  sidebarLayout(
+    # Sidebar panel for inputs ----
+    sidebarPanel(
+      
+      h3("Create data table"),
+      
+      # Input:  ----
+      fileInput("file1", "Choose csv file with semicolon separator",
+                multiple = FALSE,
+                accept = c("text/csv",
+                           "text/comma-separated-values,text/plain",
+                           ".csv")),
+      
+      uiOutput("var_ui"),
+      
+      #selectInput("var","Select parameter:",choices=c("MES"="MES","DCO"="DCO","DBO5"="DBO5","NK"="NK","NNH4"="NNH4","NNO3"="NNO3")),
+      
+      numericInput("LQ", "Quantification limit of the selected parameter", min=0, value=1),
+      
+      numericInput("nbcov", "Number of explanatory variable modalities", min=1, value=2, max=10),
+      
+      rHandsontableOutput("tabEdit"), # affichage de la table a editer
+    
+      actionButton("go", "Create"),
+ 
+      br(),
+      br(),
+      br(),
+    
+      h3("Effects on median"),
+        uiOutput("choose_median"),
+    
+      h3("Effects on dispersion"),
+        uiOutput("choose_disp"),
+    
+      h3("Choose quantile (%)"),
+        numericInput("alpha", "Quantile", min=1, value=50, max=100),
+    
+      actionButton("run", "Show results")),
+
+    
+    # Main panel for displaying outputs ----
+    mainPanel(
+      
+      tabsetPanel(type="tabs",
+                  tabPanel("Tutorial",
+                       h3("How to use the Generalized ln-linear Shiny App"),
+                       br(),
+                       p(strong("> Select the csv file containing the data to be analyzed")),
+                       p("It has to be a csv file with semicolon separator"),
+                       br(),
+                       p(strong("> Choose the dependent variable")),
+                       p("For example TSS"),
+                       br(),
+                       p(strong("> State the value of the corresponding quantification limit")),
+                       p("For example 2.0 (mg/L) for TSS"),
+                       br(),
+                       p(strong("> Indicate the number of explanatory variable modalities to be investigated")),
+                       p("Other than those corresponding to the reference set"),
+                       br(),
+                       p(strong("> Complete the table describing each explanatory variable modality with the following information:")),
+                       p("- the label of the modality that you want to see in the different results tables",em("(for example 'inf2ans')")),
+                       p("- the column header in your csv file corresponding to the explanatory variable",em("(for example 'classe_age')")),
+                       p("- the expression of the modality inside this column in your csv file",em("(for example '< 2 ans')")),
+                       br(),
+                       p(strong("> Click on the 'Create' button and then check in the 'Data Table' panel if the table is well-built")),
+                       br(),
+                       p(strong("> Choose the modalities to consider in order to evaluate their effets on median and dispersion")),
+                       br(),
+                       p(strong("> Choose the quantile you want to be returned by the model")),
+                       br(),
+                       p(strong("> Lauch the model by clicking on the 'Show results' button. The table of estimates and the table of quantiles appear in the 'Results' panel."))
+                       ),
+                  
+                  tabPanel("Data Table",
+                           textOutput("lab_don"),
+                           tableOutput("don")),
+                  
+                  tabPanel("Results",
+                           
+                       h3("> Table of estimates"),
+                       textOutput("lab_res"),
+                       dataTableOutput("test"),
+                       
+                       h3("> Quantiles"),
+                       textOutput("lab_quant"),
+                       dataTableOutput("quantile"))
+                  )
+    )
+  )
+)
+
+
+ln_lineaire <- function(labvdep,don,zm,zd,alpha){
+  
+  nbzm <- length(zm);
+  nbzd <- length(zd);
+  outputs <- list()
+  
+  #####################################################################################################
+  ## Log-Likelihood calculation - Based on log-normal distribution                                   ##
+  #####################################################################################################
+  funk <- function(theta) {## theta : parameter vector (beta0,beta1,gamma0,gamma1) ##
+    n <- don[,.N];
+    ## Regression - Effects on median ##
+    if (nbzm > 0) {
+      Zm <- cbind(array(1, dim=c(n,1)), as.matrix(don[,zm,with=F]));
+    } else {
+      Zm <- array(1, dim=c(n,1));
+    }
+    zbm <- Zm%*%theta[1:(1+nbzm)];
+    ## Regression - Effects on dispersion ##
+    if (nbzd > 0) {
+      Zd <- cbind(array(1, dim=c(n,1)), as.matrix(don[,zd,with=F]));
+    } else {
+      Zd <- array(1, dim=c(n,1));
+    }
+    zbd <- Zd%*%theta[(2+nbzm):(2+nbzm+nbzd)];
+    s <- exp(zbd);
+    ## Log-Likelihood calculation ##
+    ll <- 0;
+    li <- don[, which(lb == 0)];
+    if (length(li) > 0) {
+      ll <- sum(log(plnorm(q=don[li, ub],meanlog=zbm[li],sdlog=s[li])));
+    }
+    li <- don[, which(lb > 0 & ub > lb)];
+    if (length(li) > 0) {
+      Fu <- plnorm(don[li, ub],meanlog=zbm[li],sdlog=s[li]);
+      Fl <- plnorm(don[li, lb],meanlog=zbm[li],sdlog=s[li]);
+      ll <- ll + sum(log(Fu - Fl));
+    }
+    li <- don[, which(lb > 0 & ub == lb)];
+    if (length(li) > 0) {
+      f <- dlnorm(don[li, ub],meanlog=zbm[li],sdlog=s[li]);
+      ll <- ll + sum(log(f));
+    }
+    if(is.na(ll) | is.infinite(ll)) {
+      ll <- -1.0e200;
+      ## Checking calculation in case of data problem ##
+      #cat("theta = ", theta, " - ll = ", -ll, "\n");
+    }
+    -ll; ## minimize -ll <=> maximize ll ##
+  }
+  
+  ## Amotry Function ###########################################################################
+  amotry <- function(p, y, psum, ndim, ihi, fac) {
+    ptry <- numeric();
+    fac1 <- (1 - fac) / ndim;
+    fac2 <- fac1 - fac;
+    for(j in 1:ndim) ptry[j] <- psum[j] * fac1 - p[ihi,j] * fac2;
+    ptry <- pmin(pmax(ptry, lowb), uppb); ## Bounds
+    ytry <- funk(ptry);
+    if(ytry < y[ihi]) {
+      y[ihi] <- ytry;
+      for(j in 1:ndim) {
+        psum[j] <- psum[j] + ptry[j] - p[ihi,j];
+        p[ihi,j] <- ptry[j];
+      }
+    }
+    list(ytry, p, y, psum);
+  }
+  
+  ## Nelder-Mead calibration ##
+  NMcal <- function(NMpar) {
+    ## Libelle variable dependante ##
+    labvdep <- NMpar[[1]];
+    ## Libelle parametres ##
+    labtheta <- NMpar[[2]];
+    ## Nb parametres a estimer ##
+    nbp <- length(labtheta);
+    ## Valeurs de reference des parametres pour test H0 ##
+    thref <- NMpar[[3]];
+    ## Valeurs initiales des parametres et bornes ##
+    theta <- NMpar[[4]];
+    #print(theta);
+    lowb <- NMpar[[5]];
+    uppb <- NMpar[[6]];
+    ## Pas de recherche initial ##
+    lbd <- NMpar[[7]];
+    ## Initialisation simplex ####################################################################
+    ndim <- length(theta);
+    mpts <- ndim + 1;
+    p <- array(0, c(mpts, ndim));
+    p[1,] <- theta;
+    for(i in 2:mpts) {
+      p[i,] <- p[1,];
+      p[i,i-1] <- p[i,i-1] + lbd[i-1];
+    }
+    #print(p);
+    psum <- numeric();
+    y <- numeric();
+    yr <- numeric();
+    for(i in 1:mpts) y[i] <- funk(p[i,]);
+    ftol <- 1e-8;
+    maxiter <- 5000;
+    #maxiter <- 1;
+    maxcycle <- 0;
+    tiny <- 1e-10;
+    ##############################################################################################
+    iter <- 0;
+    cycle <- 0;
+    for(j in 1:ndim) psum[j] <- sum(p[,j]);
+    swap <- numeric();
+    repeat {
+      iter <- iter + 1;
+      # if (iter == 1 | round(iter / 100) == iter / 100) {
+      #   cat("Iteration", iter, "\n");
+      #   print(cbind(p, y));
+      # }
+      ## Rank points of simplex ##################################################################
+      yr <- rank(y, ties.method="first");
+      for(i in 1:mpts) {
+        if(yr[i] == 1) ilo <- i;
+        if(yr[i] == ndim) inhi <- i;
+        if(yr[i] == mpts) {ihi <- i;yhi <- y[i];}
+      }
+      ## Test whether convergence achieved #######################################################
+      rtol <- 2 * abs(y[ihi] - y[ilo]) / (abs(y[ihi]) + abs(y[ilo]) + tiny);
+      if(rtol < ftol){
+        print("precision reached");
+        converge <- 1;
+      }
+      if(iter >= maxiter){ 
+        print("maxiter reached");
+        converge <- 0;
+      }
+      if(rtol < ftol || iter >= maxiter) {
+        swap[1] <- y[1];y[1] <- y[ilo];y[ilo] <- swap[1];
+        swap <- p[1,];p[1,] <- p[ilo,];p[ilo,] <- swap;
+        # print("Iteration", quote=FALSE);print(iter, quote=FALSE);
+        # print(cbind(p, y));
+        # print("", quote=FALSE);
+        
+        ## Display Calibration Results ########################################################
+        #print("Final", quote=FALSE);print(c(p[1,], y[1]));
+        ## Covariance matrix ##
+        covmat <- tryCatch(expr={solve(hessian(funk,p[1,]))},error=function(e){hinv <- 0;return(array(NA,dim=c(nbp,nbp)))});
+        hinv <- ifelse(sum(is.na(covmat))>0,0,1);
+        
+        ## Table of estimates ##
+        result <- data.frame(Label=character(0),Estimate=numeric(0), Std_Dev=numeric(0),Ref=numeric(0),Chi2=numeric(),DF=numeric(0),Pval=numeric(0))
+        
+        for (j in 1:length(theta)) {
+          thtest <- p[1,];
+          thtest[j] <- thref[j];
+          #chi2 <- 2 * abs(y[1] - funk(thtest));
+          #stdev <- abs(p[1,j] - thref[j]) / sqrt(chi2);
+          stdev <- sqrt(covmat[j,j]);
+          chi2 <- ((p[1,j] - thref[j])^2/covmat[j,j]);
+          df <- 1;
+          pval <- pchisq(chi2, df, lower.tail=FALSE);
+          
+          new <- list(Label=labtheta[j],
+                      Estimate=formatC(p[1,j], digits=4, format="e"),
+                      Std_Dev=formatC(stdev, digits=4, format="e"),
+                      Ref=thref[j],
+                      Chi2=formatC(chi2, digits=4, format="e"),
+                      DF=df,
+                      Pval=formatC(pval,digits=4,format="f"))
+          
+          result <- rbind(result, new, stringsAsFactors=FALSE)
+        }
+        
+        if (cycle < maxcycle) {
+          for(i in 2:mpts) {
+            p[i,] <- p[1,];
+            p[i,i-1] <- p[i,i-1] + lbd[i-1];
+          }
+          for(i in 1:mpts) y[i] <- funk(p[i,]);
+          iter <- 0;
+          cycle <- cycle + 1;
+        }
+        else break;
+      }
+      ## New iteration ###########################################################################
+      lamo <- amotry(p, y, psum, ndim, ihi, -1);
+      ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]];
+      ##print("Factor -1");
+      ##print(cbind(p, y));
+      if(ytry <= y[ilo]) {
+        lamo <- amotry(p, y, psum, ndim, ihi, 2);
+        ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]];
+        ##print("Factor 2");
+        ##print(cbind(p, y));
+      }
+      else if(ytry >= y[inhi]) {
+        ysave <- y[ihi];
+        lamo <- amotry(p, y, psum, ndim, ihi, .5);
+        ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]];
+        ##print("Factor .5");
+        ##print(cbind(p, y));
+        if(ytry >= ysave) {
+          for(i in 1:mpts) {
+            if(i != ilo) {
+              for(j in 1:ndim) p[i,j] <- 0.5 * (p[i,j] + p[ilo,j]);
+              y[i] <- funk(p[i,]);
+            }
+          }
+          for(j in 1:ndim) psum[j] <- sum(p[,j]);
+        }
+      }
+    }
+    list(p[1,],converge,hinv,covmat,result);
+  }
+  
+  #####################################################################################################
+  ## Variance of theoretical alpha-quantile                                                          ##
+  #####################################################################################################
+  delt <- function(tab,alpha,covm) {
+    n <- tab[,.N];
+    if (nbzm > 0) {
+      Zm <- cbind(array(1, dim=c(n,1)), as.matrix(tab[,zm,with=F]));
+    } else {
+      Zm <- array(1, dim=c(n,1));
+    }
+    if (nbzd > 0) {
+      Zd <- cbind(array(1, dim=c(n,1)), as.matrix(tab[,zd,with=F]));
+    } else {
+      Zd <- array(1, dim=c(n,1));
+    }
+    zbd <- Zd%*%theta[(2+nbzm):(2+nbzm+nbzd)];
+    Zmd <- Zm;
+    for(j in 1:(nbzd+1)) Zmd <- cbind(Zmd, Zd[,j] * exp(zbd) * qnorm(alpha));
+    diag(Zmd%*%covm%*%t(Zmd));
+  }
+  
+  ###################################################################################################
+  ## Model parameter estimation                                                                    ##
+  ###################################################################################################
+  
+  ## Libelle parametres ##
+  labtheta <- c("beta0",zm,"gamma0",zd);
+  ## Nb parametres a estimer ##
+  nbp <- length(labtheta);
+  ## Valeurs de reference des parametres pour test H0 ##
+  thref <- rep(0,nbp);
+  ## Valeurs initiales des parametres et bornes ##
+  theta <- rep(0,nbp);
+  lowb <-  rep(-Inf,nbp);
+  uppb <-  rep(+Inf,nbp);
+  ## Pas de recherche initial ##
+  lbd <- rep(0.5,nbp);
+  NMpar <- list(labvdep,labtheta,theta,thref,lowb,uppb,lbd);
+  calage <- NMcal(NMpar);
+  thopt <- calage[[1]];
+  converge <- calage[[2]];
+  hinv <- calage[[3]];
+  covmat <- calage[[4]];
+  result <- calage[[5]];
+  
+  outputs[[1]] <- result
+  
+  #####################################################################################################
+  ## Quantile estimation                                                                             ##
+  #####################################################################################################
+  
+  ## Quantiles are estimated by fac variables ##
+  fac <- unique(c(zm,zd));
+  ## Empirical (observed) quantiles ##
+  qua <- don[,.(qobs=quantile(ub,probs=alpha,na.rm=T),nbobs=.N),by=fac];
+  n <- qua[,.N];
+  ## Regression - Effects on median ##
+  if (nbzm > 0) {
+    Zm <- cbind(array(1,dim=c(n,1)),as.matrix(qua[,zm,with=F]));
+  } else {
+    Zm <- array(1,dim=c(n,1));
+  }
+  zbm <- Zm%*%thopt[1:(1+nbzm)];
+  ## Regression - Effects on dispersion ##
+  if (nbzd > 0) {
+    Zd <- cbind(array(1,dim=c(n,1)),as.matrix(qua[,zd,with=F]));
+  } else {
+    Zd <- array(1,dim=c(n,1));
+  }
+  zbd <- Zd%*%thopt[(2+nbzm):(2+nbzm+nbzd)];
+  #s <- exp(zbd);
+  qua[,qcal:=exp(zbm+exp(zbd)*qnorm(alpha))];
+  ## Left-censored counts ##
+  lce <- don[lb<ub,.(nblce=.N),by=fac];
+  qua <- merge(qua,lce,by=fac,all=TRUE);
+  qua[is.na(nblce),nblce:=0];
+  ## Variance of theoretical quantile ##
+  qua[,vqc:=delt(qua,alpha,covmat)];
+  ## Lower and upper bounds of theoretical quantile ##
+  qua[,lbqc:=qcal*exp(-1.96*vqc^0.5)];
+  qua[,ubqc:=qcal*exp(+1.96*vqc^0.5)];
+  
+  ## Displaying observed and theoretical quantile ##
+  qua <- qua[,c(fac,"nbobs","nblce","qobs","qcal","vqc","lbqc","ubqc"),with=F]
+  qua$qcal <- round(qua$qcal, digits = 3)
+  qua[,(ncol(qua)-2):ncol(qua)] <- round(qua[,(ncol(qua)-2):ncol(qua)],digits=4)
+  
+  outputs[[2]] <- qua
+  
+  return(outputs)
+}
+
+
+# server
+server <- function(input, output) {
+  
+  # Importation du fichier de donnees
+  data <- reactive({
+    req(input$file1)
+    tryCatch(
+      {
+        df <- read.csv(input$file1$datapath,
+                       header = TRUE,
+                       sep = ";", dec = ",")
+      },
+      error = function(e) {
+        # return a safeError if a parsing error occurs
+        stop(safeError(e))
+      }
+    )
+  })
+  
+  output$var_ui <- renderUI({
+    selectInput("var","Select parameter", choices=names(data()))
+  })
+
+  # Selection covariables
+  values <- reactiveValues(mat=NULL, don=NULL)
+  observe({
+    if (!is.null(input$tabEdit))
+      values$mat <- hot_to_r(input$tabEdit)
+  })
+  
+  observe({
+    nb<-input$nbcov
+    tab <- data.frame(matrix(NA_character_, nrow=nb, ncol=3))
+    colnames(tab) <- c("Label","Column header","Expression")
+    rownames(tab) <- paste("mod", 1:nb)
+    values$mat<-tab
+  })
+  
+  output$tabEdit <- renderRHandsontable({
+    rhandsontable(values$mat, selectCallback = TRUE)
+  })
+  
+  ## Creation de la DataTable don
+  
+  # don <- eventReactive(input$go, {
+  # 
+  #   dt <- cbind(data(),y=data()[input$var])
+  #   colnames(dt)[ncol(dt)] <- "y"
+  # 
+  #   # Left-censoring
+  #   loq <- input$LQ
+  #   
+  #   dt <- cbind(dt,lb=data()[input$var])
+  #   colnames(dt)[ncol(dt)] <- "lb"
+  #   dt$lb[dt$y <= loq] <- 0
+  #   
+  #   dt <- cbind(dt,ub=data()[input$var])
+  #   colnames(dt)[ncol(dt)] <- "ub"
+  #   dt$ub[dt$y <= loq] <- loq
+  # 
+  #   ## Covariates ##
+  #   dt <- as.data.table(dt)
+  #   for (i in 1:input$nbcov){
+  #   dt[,values$mat[i,1]:=ifelse(get(values$mat[i,2])==values$mat[i,3],1,0)]
+  #   }
+  #   
+  #   ## Nettoyage ##
+  #   dt <- select(dt, -1:-(grep("y",colnames(dt))-1))
+  #   dt <- na.omit(dt)
+  #   
+  #   # Effects on location (median) and dispersion
+  #   output$choose_median <- renderUI({
+  #     checkboxGroupInput("median","Effects on median", choices = values$mat[,1],selected = values$mat[,1])
+  #   })
+  #   
+  #   output$choose_disp <- renderUI({
+  #     checkboxGroupInput("disp","Effects on dispersion", choices = values$mat[,1],selected = values$mat[,1])
+  #   })
+  #   
+  #   output$lab_don <- renderText({
+  #     paste("Parameter: ",isolate(input$var),"\n")
+  #   })
+  #   
+  #   dt
+  # })
+  
+  observeEvent(input$go, {
+
+    don <- cbind(data(),y=data()[input$var])
+    colnames(don)[ncol(don)] <- "y"
+
+    # Left-censoring
+    loq <- input$LQ
+
+    don <- cbind(don,lb=data()[input$var])
+    colnames(don)[ncol(don)] <- "lb"
+    don$lb[don$y <= loq] <- 0
+
+    don <- cbind(don,ub=data()[input$var])
+    colnames(don)[ncol(don)] <- "ub"
+    don$ub[don$y <= loq] <- loq
+
+    ## Covariates ##
+    don <- as.data.table(don)
+    for (i in 1:input$nbcov){
+    don[,values$mat[i,1]:=ifelse(get(values$mat[i,2])==values$mat[i,3],1,0)]
+    }
+
+    ## Nettoyage ##
+    don <- select(don, -1:-(grep("y",colnames(don))-1))
+    don <- na.omit(don)
+    
+    values$don <- don
+
+    
+    # Effects on location (median) and dispersion
+    output$choose_median <- renderUI({
+      checkboxGroupInput("median","Effects on median", choices = values$mat[,1],selected = values$mat[,1])
+    })
+
+    output$choose_disp <- renderUI({
+      checkboxGroupInput("disp","Effects on dispersion", choices = values$mat[,1],selected = values$mat[,1])
+    })
+
+    output$lab_don <- renderText({
+      paste("Parameter: ",isolate(input$var),"\n")
+    })
+
+  })
+  
+
+  #output$don <- renderTable(don())  #verification don
+  output$don <- renderTable(values$don)  #verification don
+  
+  ## Lancement modele
+  observeEvent(input$run,{
+    
+    #outputs <- ln_lineaire(labvdep=input$var,don=don(),zm=input$median,zd=input$disp,alpha=input$alpha/100)
+    outputs <- ln_lineaire(labvdep=input$var,don=values$don,zm=input$median,zd=input$disp,alpha=input$alpha/100)
+    
+    # return table of estimates
+    output$lab_res <- renderText({
+      paste("Parameter: ",isolate(input$var),"\n")
+    })
+    output$test <- renderDataTable(outputs[[1]], options = list(dom = 't', searching = FALSE)
+    )
+    
+    # return table of quantiles
+    output$lab_quant <- renderText({
+      paste("Dependent variable: ",isolate(input$var)," - Table of observed and theoretical ",input$alpha,"% quantiles:\n")
+    })
+    output$quantile <- renderDataTable(outputs[[2]], options = list(dom = 't', searching = FALSE))
+
+  })
+    
+}
+
+# Create Shiny app ----
+shinyApp(ui, server)
+