generalized ln-linear.R 19.59 KiB
library(shiny)
library(dplyr)
library(rhandsontable)
library(data.table)
library(numDeriv)
library(gridExtra)
ln_lineaire <- function(labvdep,don,zm,zd,alpha){
  nbzm <- length(zm);
  nbzd <- length(zd);
  outputs <- list()
  #####################################################################################################
  ## Log-Likelihood calculation - Based on log-normal distribution                                   ##
  #####################################################################################################
  funk <- function(theta) {## theta : parameter vector (beta0,beta1,gamma0,gamma1) ##
    n <- don[,.N];
    ## Regression - Effects on median ##
    if (nbzm > 0) {
      Zm <- cbind(array(1, dim=c(n,1)), as.matrix(don[,zm,with=F]));
    } else {
      Zm <- array(1, dim=c(n,1));
    zbm <- Zm%*%theta[1:(1+nbzm)];
    ## Regression - Effects on dispersion ##
    if (nbzd > 0) {
      Zd <- cbind(array(1, dim=c(n,1)), as.matrix(don[,zd,with=F]));
    } else {
      Zd <- array(1, dim=c(n,1));
    zbd <- Zd%*%theta[(2+nbzm):(2+nbzm+nbzd)];
    s <- exp(zbd);
    ## Log-Likelihood calculation ##
    ll <- 0;
    li <- don[, which(lb == 0)];
    if (length(li) > 0) {
      ll <- sum(log(plnorm(q=don[li, ub],meanlog=zbm[li],sdlog=s[li])));
    li <- don[, which(lb > 0 & ub > lb)];
    if (length(li) > 0) {
      Fu <- plnorm(don[li, ub],meanlog=zbm[li],sdlog=s[li]);
      Fl <- plnorm(don[li, lb],meanlog=zbm[li],sdlog=s[li]);
      ll <- ll + sum(log(Fu - Fl));
    li <- don[, which(lb > 0 & ub == lb)];
    if (length(li) > 0) {
      f <- dlnorm(don[li, ub],meanlog=zbm[li],sdlog=s[li]);
      ll <- ll + sum(log(f));
    if(is.na(ll) | is.infinite(ll)) {
      ll <- -1.0e200;
      ## Checking calculation in case of data problem ##
      #cat("theta = ", theta, " - ll = ", -ll, "\n");
    -ll; ## minimize -ll <=> maximize ll ##
  ## Amotry Function ###########################################################################
  amotry <- function(p, y, psum, ndim, ihi, fac) {
    ptry <- numeric();
    fac1 <- (1 - fac) / ndim;
    fac2 <- fac1 - fac;
    for(j in 1:ndim) ptry[j] <- psum[j] * fac1 - p[ihi,j] * fac2;
    ptry <- pmin(pmax(ptry, lowb), uppb); ## Bounds
    ytry <- funk(ptry);
    if(ytry < y[ihi]) {
      y[ihi] <- ytry;
      for(j in 1:ndim) {
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
psum[j] <- psum[j] + ptry[j] - p[ihi,j]; p[ihi,j] <- ptry[j]; } } list(ytry, p, y, psum); } ## Nelder-Mead calibration ## NMcal <- function(NMpar) { ## Libelle variable dependante ## labvdep <- NMpar[[1]]; ## Libelle parametres ## labtheta <- NMpar[[2]]; ## Nb parametres a estimer ## nbp <- length(labtheta); ## Valeurs de reference des parametres pour test H0 ## thref <- NMpar[[3]]; ## Valeurs initiales des parametres et bornes ## theta <- NMpar[[4]]; #print(theta); lowb <- NMpar[[5]]; uppb <- NMpar[[6]]; ## Pas de recherche initial ## lbd <- NMpar[[7]]; ## Initialisation simplex #################################################################### ndim <- length(theta); mpts <- ndim + 1; p <- array(0, c(mpts, ndim)); p[1,] <- theta; for(i in 2:mpts) { p[i,] <- p[1,]; p[i,i-1] <- p[i,i-1] + lbd[i-1]; } #print(p); psum <- numeric(); y <- numeric(); yr <- numeric(); for(i in 1:mpts) y[i] <- funk(p[i,]); ftol <- 1e-8; maxiter <- 5000; #maxiter <- 1; maxcycle <- 0; tiny <- 1e-10; ############################################################################################## iter <- 0; cycle <- 0; for(j in 1:ndim) psum[j] <- sum(p[,j]); swap <- numeric(); repeat { iter <- iter + 1; # if (iter == 1 | round(iter / 100) == iter / 100) { # cat("Iteration", iter, "\n"); # print(cbind(p, y)); # } ## Rank points of simplex ################################################################## yr <- rank(y, ties.method="first"); for(i in 1:mpts) { if(yr[i] == 1) ilo <- i; if(yr[i] == ndim) inhi <- i; if(yr[i] == mpts) {ihi <- i;yhi <- y[i];} } ## Test whether convergence achieved ####################################################### rtol <- 2 * abs(y[ihi] - y[ilo]) / (abs(y[ihi]) + abs(y[ilo]) + tiny); if(rtol < ftol){ print("precision reached"); converge <- 1; } if(iter >= maxiter){ print("maxiter reached"); converge <- 0;
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
} if(rtol < ftol || iter >= maxiter) { swap[1] <- y[1];y[1] <- y[ilo];y[ilo] <- swap[1]; swap <- p[1,];p[1,] <- p[ilo,];p[ilo,] <- swap; # print("Iteration", quote=FALSE);print(iter, quote=FALSE); # print(cbind(p, y)); # print("", quote=FALSE); ## Display Calibration Results ######################################################## #print("Final", quote=FALSE);print(c(p[1,], y[1])); ## Covariance matrix ## covmat <- tryCatch(expr={solve(hessian(funk,p[1,]))},error=function(e){hinv <- 0;return(array(NA,dim=c(nbp,nbp)))}); hinv <- ifelse(sum(is.na(covmat))>0,0,1); ## Table of estimates ## result <- data.frame(Label=character(0),Estimate=numeric(0), Std_Dev=numeric(0),Ref=numeric(0),Chi2=numeric(),DF=numeric(0),Pval=numeric(0)) for (j in 1:length(theta)) { thtest <- p[1,]; thtest[j] <- thref[j]; #chi2 <- 2 * abs(y[1] - funk(thtest)); #stdev <- abs(p[1,j] - thref[j]) / sqrt(chi2); stdev <- sqrt(covmat[j,j]); chi2 <- ((p[1,j] - thref[j])^2/covmat[j,j]); df <- 1; pval <- pchisq(chi2, df, lower.tail=FALSE); new <- list(Label=labtheta[j], Estimate=formatC(p[1,j], digits=4, format="e"), Std_Dev=formatC(stdev, digits=4, format="e"), Ref=thref[j], Chi2=formatC(chi2, digits=4, format="e"), DF=df, Pval=formatC(pval,digits=4,format="f")) result <- rbind(result, new, stringsAsFactors=FALSE) } if (cycle < maxcycle) { for(i in 2:mpts) { p[i,] <- p[1,]; p[i,i-1] <- p[i,i-1] + lbd[i-1]; } for(i in 1:mpts) y[i] <- funk(p[i,]); iter <- 0; cycle <- cycle + 1; } else break; } ## New iteration ########################################################################### lamo <- amotry(p, y, psum, ndim, ihi, -1); ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]]; ##print("Factor -1"); ##print(cbind(p, y)); if(ytry <= y[ilo]) { lamo <- amotry(p, y, psum, ndim, ihi, 2); ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]]; ##print("Factor 2"); ##print(cbind(p, y)); } else if(ytry >= y[inhi]) { ysave <- y[ihi]; lamo <- amotry(p, y, psum, ndim, ihi, .5); ytry <- lamo[[1]];p <- lamo[[2]];y <- lamo[[3]];psum <- lamo[[4]]; ##print("Factor .5"); ##print(cbind(p, y)); if(ytry >= ysave) { for(i in 1:mpts) { if(i != ilo) { for(j in 1:ndim) p[i,j] <- 0.5 * (p[i,j] + p[ilo,j]);
211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
y[i] <- funk(p[i,]); } } for(j in 1:ndim) psum[j] <- sum(p[,j]); } } } list(p[1,],converge,hinv,covmat,result); } ##################################################################################################### ## Variance of theoretical alpha-quantile ## ##################################################################################################### delt <- function(tab,alpha,covm) { n <- tab[,.N]; if (nbzm > 0) { Zm <- cbind(array(1, dim=c(n,1)), as.matrix(tab[,zm,with=F])); } else { Zm <- array(1, dim=c(n,1)); } if (nbzd > 0) { Zd <- cbind(array(1, dim=c(n,1)), as.matrix(tab[,zd,with=F])); } else { Zd <- array(1, dim=c(n,1)); } zbd <- Zd%*%theta[(2+nbzm):(2+nbzm+nbzd)]; Zmd <- Zm; for(j in 1:(nbzd+1)) Zmd <- cbind(Zmd, Zd[,j] * exp(zbd) * qnorm(alpha)); diag(Zmd%*%covm%*%t(Zmd)); } ################################################################################################### ## Model parameter estimation ## ################################################################################################### ## Libelle parametres ## labtheta <- c("beta0",zm,"gamma0",zd); ## Nb parametres a estimer ## nbp <- length(labtheta); ## Valeurs de reference des parametres pour test H0 ## thref <- rep(0,nbp); ## Valeurs initiales des parametres et bornes ## theta <- rep(0,nbp); lowb <- rep(-Inf,nbp); uppb <- rep(+Inf,nbp); ## Pas de recherche initial ## lbd <- rep(0.5,nbp); NMpar <- list(labvdep,labtheta,theta,thref,lowb,uppb,lbd); calage <- NMcal(NMpar); thopt <- calage[[1]]; converge <- calage[[2]]; hinv <- calage[[3]]; covmat <- calage[[4]]; result <- calage[[5]]; outputs[[1]] <- result ##################################################################################################### ## Quantile estimation ## ##################################################################################################### ## Quantiles are estimated by fac variables ## fac <- unique(c(zm,zd)); ## Empirical (observed) quantiles ## qua <- don[,.(qobs=quantile(ub,probs=alpha,na.rm=T),nbobs=.N),by=fac]; n <- qua[,.N]; ## Regression - Effects on median ## if (nbzm > 0) { Zm <- cbind(array(1,dim=c(n,1)),as.matrix(qua[,zm,with=F])); } else {
281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
Zm <- array(1,dim=c(n,1)); } zbm <- Zm%*%thopt[1:(1+nbzm)]; ## Regression - Effects on dispersion ## if (nbzd > 0) { Zd <- cbind(array(1,dim=c(n,1)),as.matrix(qua[,zd,with=F])); } else { Zd <- array(1,dim=c(n,1)); } zbd <- Zd%*%thopt[(2+nbzm):(2+nbzm+nbzd)]; #s <- exp(zbd); qua[,qcal:=exp(zbm+exp(zbd)*qnorm(alpha))]; ## Left-censored counts ## lce <- don[lb<ub,.(nblce=.N),by=fac]; qua <- merge(qua,lce,by=fac,all=TRUE); qua[is.na(nblce),nblce:=0]; ## Variance of theoretical quantile ## qua[,vqc:=delt(qua,alpha,covmat)]; ## Lower and upper bounds of theoretical quantile ## qua[,lbqc:=qcal*exp(-1.96*vqc^0.5)]; qua[,ubqc:=qcal*exp(+1.96*vqc^0.5)]; ## Displaying observed and theoretical quantile ## qua <- qua[,c(fac,"nbobs","nblce","qobs","qcal","vqc","lbqc","ubqc"),with=F] qua$qcal <- round(qua$qcal, digits = 3) qua[,(ncol(qua)-2):ncol(qua)] <- round(qua[,(ncol(qua)-2):ncol(qua)],digits=4) outputs[[2]] <- qua return(outputs) } # UI ui <- fluidPage( # App title ---- titlePanel("Generalized ln-linear"), sidebarLayout( # Sidebar panel for inputs ---- sidebarPanel( h3("Create data table"), # Input: ---- fileInput("file1", "Choose csv file with semicolon separator", multiple = FALSE, accept = c("text/csv", "text/comma-separated-values,text/plain", ".csv")), uiOutput("var_ui"), numericInput("LQ", "Quantification limit of the selected parameter", min=0, value=1), numericInput("nbcov", "Number of explanatory variable modalities", min=1, value=2, max=10), rHandsontableOutput("tabEdit"), # affichage de la table a editer actionButton("go", "Create"), br(), br(), br(), h3("Effects on median"), uiOutput("choose_median"),
351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
h3("Effects on dispersion"), uiOutput("choose_disp"), h3("Choose quantile (%)"), numericInput("alpha", "Quantile", min=1, value=50, max=100), actionButton("run", "Show results"), br(), br(), br(), h3("Download results"), textInput("title","Document title","Results"), downloadButton("download","Download")), # Main panel for displaying outputs ---- mainPanel( tabsetPanel(type="tabs", tabPanel("Tutorial", h3("How to use the Generalized ln-linear Shiny App"), br(), p(strong("> Select the csv file containing the data to be analyzed")), p("It has to be a csv file with semicolon separator"), p("The file should contain the different dependent variables (ex : TSS, CDO, BDO,...) and the different explanatory variables with corresponding column headers (ex: facility, age group,...)"), br(), p(strong("> Choose the dependent variable")), p("For example TSS"), br(), p(strong("> State the value of the corresponding quantification limit")), p("For example 2.0 (mg/L) for TSS"), br(), p(strong("> Indicate the number of explanatory variable modalities to be investigated")), p("Other than those corresponding to the reference set"), br(), p(strong("> Complete the table describing each explanatory variable modality with the following information:")), p("- the label of the modality that you want to see in the different results tables",em("(for example 'inf2ans')")), p("- the column header in your csv file corresponding to the explanatory variable",em("(for example 'classe_age')")), p("- the expression of the modality inside this column in your csv file",em("(for example '< 2 ans')")), br(), p(strong("> Click on the 'Create' button and then check in the 'Data Table' panel if the table is well-built")), br(), p(strong("> Choose the modalities to consider in order to evaluate their effets on median and dispersion")), br(), p(strong("> Choose the quantile you want to be returned by the model")), br(), p(strong("> Lauch the model by clicking on the 'Show results' button. The table of estimates and the table of quantiles appear in the 'Results' panel.")), br(), p(strong("> Export results")), p("It is finaly possible to comment the results (dataset used, reference set, conclusions,...) and to export them in a word file using the 'Download' button (with a title choosen by the user).") ), tabPanel("Data Table", textOutput("lab_don"), tableOutput("don")), tabPanel("Results", h3("> Table of estimates"), textOutput("lab_res"), dataTableOutput("test"), h3("> Quantiles"), textOutput("lab_quant"), dataTableOutput("quantile"), h3("Comment"), tags$textarea(id="description", rows=6, cols=80,))
421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
) ) ) ) # server server <- function(input, output) { #Reactive values values <- reactiveValues(mat=NULL, don=NULL) # Importation du fichier de donnees data <- reactive({ req(input$file1) tryCatch( { df <- read.csv(input$file1$datapath, header = TRUE, sep = ";", dec = ",") }, error = function(e) { # return a safeError if a parsing error occurs stop(safeError(e)) } ) }) output$var_ui <- renderUI({ selectInput("var","Select parameter", choices=names(data())) }) # Selection covariables observe({ if (!is.null(input$tabEdit)) values$mat <- hot_to_r(input$tabEdit) }) observe({ nb<-input$nbcov tab <- data.frame(matrix(NA_character_, nrow=nb, ncol=3)) colnames(tab) <- c("Label","Column header","Expression") rownames(tab) <- paste("mod", 1:nb) values$mat<-tab }) output$tabEdit <- renderRHandsontable({ rhandsontable(values$mat, selectCallback = TRUE) }) ## Creation de la DataTable don observeEvent(input$go, { don <- cbind(data(),y=data()[input$var]) colnames(don)[ncol(don)] <- "y" # Left-censoring loq <- input$LQ don <- cbind(don,lb=data()[input$var]) colnames(don)[ncol(don)] <- "lb" don$lb[don$y <= loq] <- 0 don <- cbind(don,ub=data()[input$var]) colnames(don)[ncol(don)] <- "ub" don$ub[don$y <= loq] <- loq ## Covariates ## don <- as.data.table(don) for (i in 1:input$nbcov){
491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
don[,values$mat[i,1]:=ifelse(get(values$mat[i,2])==values$mat[i,3],1,0)] } ## Nettoyage ## don <- select(don, -1:-(grep("y",colnames(don))-1)) don <- na.omit(don) values$don <- don # Effects on location (median) and dispersion output$choose_median <- renderUI({ checkboxGroupInput("median","Effects on median", choices = values$mat[,1],selected = values$mat[,1]) }) output$choose_disp <- renderUI({ checkboxGroupInput("disp","Effects on dispersion", choices = values$mat[,1],selected = values$mat[,1]) }) output$lab_don <- renderText({ paste("Parameter: ",isolate(input$var),"\n") }) }) output$don <- renderTable(values$don) #verification don ## Lancement modele observeEvent(input$run,{ outputs <- ln_lineaire(labvdep=input$var,don=values$don,zm=input$median,zd=input$disp,alpha=input$alpha/100) # return table of estimates output$lab_res <- renderText({ paste("Parameter: ",isolate(input$var),"\n") }) output$test <- renderDataTable(outputs[[1]], options = list(dom = 't', searching = FALSE) ) # return table of quantiles output$lab_quant <- renderText({ paste("Dependent variable: ",isolate(input$var)," - Table of observed and theoretical ",input$alpha,"% quantiles:\n") }) output$quantile <- renderDataTable(outputs[[2]], options = list(dom = 't', searching = FALSE)) output$download <- downloadHandler( filename = function(){paste0(input$title, ".doc")}, content = function(file) { # Copy the report file to a temporary directory before processing it, in # case we don't have write permissions to the current working dir (which # can happen when deployed). tempReport <- file.path(tempdir(), "results.Rmd") file.copy("results.Rmd", tempReport, overwrite = TRUE) # Set up parameters to pass to Rmd document params <- list(description = input$description, estimates = outputs[[1]], quantile = outputs[[2]] ) # Knit the document, passing in the `params` list, and eval it in a # child of the global environment (this isolates the code in the document # from the code in this app). rmarkdown::render(tempReport, output_file = file, params = params, envir = new.env(parent = globalenv()) ) } )
561562563564565566567568569570
}) } # Create Shiny app ---- shinyApp(ui, server)