diff --git a/R/format.function.R b/R/format.function.R
index 72cffcd651b315d379596f00cd0c5ac94c5b9611..011ed1b7d7e3d88792c2e888179983a4f667c04a 100644
--- a/R/format.function.R
+++ b/R/format.function.R
@@ -41,7 +41,7 @@ names( DATA.BASP) <- c("id.plot",colnames(BASP))
 
 #### MERGE with indivudal tree
 data.indiv <- data.frame(id.tree,sp,id.plot,diam)
-data.merge <-  merge(data.indiv,DATA.BASP,by="id.plot")
+data.merge <-  merge(data.indiv,DATA.BASP,by="id.plot", sort = FALSE)
 rm(data.indiv,DATA.BASP)
 gc()
 
diff --git a/merge.data.FRANCE.R b/merge.data.FRANCE.R
index ac9e4864fea20025cbc115578dc90e1c8e09e309..69f32dbe6877e1db4dc073d8e244ae95808717b7 100644
--- a/merge.data.FRANCE.R
+++ b/merge.data.FRANCE.R
@@ -132,7 +132,8 @@ dataIFN.FRANCE$ecocode <- GRECO.temp ## a single code for each ecoregion
 ###compute numer of dead per plot to remove plot with disturbance
 perc.dead <- tapply(dataIFN.FRANCE[["dead"]],INDEX=dataIFN.FRANCE[["idp"]],FUN=function.perc.dead)
 ## VARIABLE TO SELECT PLOT WITH NOT BIG DISTURBANCE KEEP OFTHER VARIABLES IF AVAILABLE (disturbance record)
-dataIFN.FRANCE <- merge(dataIFN.FRANCE,data.frame(idp=as.numeric(names(perc.dead)),perc.dead=perc.dead))
+dataIFN.FRANCE <- merge(dataIFN.FRANCE,data.frame(idp=as.numeric(names(perc.dead)),perc.dead=perc.dead),
+                        sort=FALSE)
 
 
 ###########################################################################################
diff --git a/ms/FrenchNFIdataformatting_workshop.md b/ms/FrenchNFIdataformatting_workshop.md
new file mode 100644
index 0000000000000000000000000000000000000000..e0ee048d417986d5fe1904d03594d0a08aee3626
--- /dev/null
+++ b/ms/FrenchNFIdataformatting_workshop.md
@@ -0,0 +1,157 @@
+% Description of the French National Forest Inventory data and climatic data
+% Georges Kunstler
+
+## Department of Biological Sciences Macquarie University, Sydney, NSW / Irstea EMGR Grenoble France <georges.kunstler@gmail.com>
+
+# Introduction
+
+This document describes the data from the French National Forest Inventory (NFI) and the climatic data used in the working group on traits and competition. 
+
+
+# Data description
+
+The French National Forest Inventory comprises a network of temporary plots established on a grid of approximately 1000 x 1000 m. Ten percent of the cell is sampled each year. If a particular grid node falls within a forested area, a plot is established (randomly located in a square of 450m around the center of the cell), the soil type is characterized and dendrometric data are measured. Measurements are taken in three concentric circular subplots of different radii, based on circumference at breast height ($C_{130}$). All trees with $C_{130}$  > 23.5 cm, > 70.5 cm and > 117.5 cm were measured within a radius of 6 m, 9 m and 15 m, respectively.  For each measured tree, stem circumference, species, status (dead or alive, including only tree that died less than five years ago according to bark and small branches state), and radial growth over five years were recorded.  The radial growth was determined from two short cores taken at breast height.  Soil properties were analysed using a soil pit of up to 1 m depth located in the center of the plot.  One or two soil horizons were distinguished from the soil pit, and depth, texture (based on eight classes) and coarse fragment content were recorded for each horizon.  Maximum soil water content ($SWHC''$) was computed based on these three variables, using standard values of water retention for each texture class (following Piedallu et al. 2011).
+
+
+The following document give details about data formatting and the computation of the competition index.
+
+## Data downloaded
+ The data were downloaded from [IGN website](http://inventaire-forestier.ign.fr) for each of following year of inventory: 2005, 2006, 2007, 2008, 2009, 2010 and 2011. For each year, four files are provided: individual alive trees data, individual dead trees data, plots data, ecological data. It was needed to merge data for each year and to homogenize the different variables because the variables and the category of the variables have changed between years (see at the end of the document). In addition we purchased form IGN the exact elevation of the plot (the exact coordinates of plots are not available only the center of the 1x1km cell where the plot is located).
+ 
+ 
+# Structure of data for analysis
+
+For the analysis we need for each country a list with three elements.
+
+* First element is a  data.frame with columns
+
+    - $tree.id$ a unique identifier of each tree
+    - $sp$ the species code
+    - $plot$ the plot code
+    - $ecocode$ the ecoregion code (trying to merge similar ecoregion to have ecoregion with enough observation per ecoregion
+    - $D$ diameter growth in cm
+    - $G$ the diameter growth rate in mm / yr.
+    - $dead$ a dummy variable 0 alive 1 dead
+    - $year$ the number of year for the growth measurement
+    - $htot$ the height of the individual (m) for the data base for which it is availble to compute max height per species
+    - $Lon$ Longitude of the plot in WGS84
+    - $Lat$ Latitude of teh plots in WGS84
+    - $perc.dead$ the percentage of dead computed on each plot to exlude plot with perturbation (equal 1 for plot with known perturbation)
+
+* Second element is a data.frame with columns
+
+    - $tree.id$ a unique identifier of each tree
+    - $ecocode$ the species code
+    - one column per species with the name as in the species code $sp$ in the previous the plot code
+	- $BATOT.COMPET$ the sum of the basal area of all species
+
+* Third element is a data.frame with columns
+
+	- $sp$ the species code as in previous table
+	- $Latin_name$ the latin name of the species
+    - $Leaf.N.mean$ Leaf Nitrogen per mass mean for the species [need to add unit for all]
+	- $Seed.mass.mean$ 
+	- $SLA.mean$
+	- $Wood.density.mean$
+	- $Max.height.mean$
+	- and the same columns with $sd$ instead of $mean$ with either the mean sd within species if species mean or the mean sd with genus if genus mean because no species data
+
+# Competition index
+
+We computed the sum of basal area (BA) per plot (including the weighting of each tree to have a basal area in $m^2/ha$) total and per species without the  BA of the target tree (see the R function BA.SP.FUN in the file format.function.R).
+
+
+# Climatic data
+
+For each plots the monthly temperature and total precipitation was taken from a GIS data base at $1 km_2$ resolution developed by Piedallu et al. (2012). The solar radiation accounting for cloudiness cover was also retrieved for each plots from a data base at 1 $km_2$ resolution (Piedallu & Gegout 2008). The temperature was corrected for the actual elevation of the plot using geospatial krigging of the temperature laps rate.
+
+Based on this data we computed the sum of degree days above 5.56 $^\circ C$ ($SDD$) and a water stress index computed from a monthly water budget (using the model of Bugmann & Cramer 1998) ($WB$).
+
+* To compute the $SDD$ we used a spline of the average monthly temperatures.
+* The water stress index ($WS$) is based on the ratio of the actual evapotranspiration over the potential evapotranspiration (the details of the calculation is presented below).
+
+## Water budget model
+The monthly potential evapotranspiration ($PET_m$) was computed using the Turc equation (Turc, 1961).
+\begin{equation} 
+PET_m = n \times 0.0133333 \times (Rg_m +50) \times (t_m/(t_m+15)
+\end{equation}
+with $n$ = number of days of the month, $t_m$ = the monthly temperature and $Rg_m$ = the monthly radiation. 
+	The water budget computed monthly soil water content ($SWC_m$), with initial condition for January $SWC_m$ set as $SWC_{max}$ (the maximum soil water content). Then monthly soil water content was iteratively computed using the following equation.
+\begin{equation}  
+SWC_{m+1} =  min(SWC_m+Ps_m - AET_m,SWC_{max}) 	
+\end{equation}
+
+with $Ps_m$ = the infiltrating precipitation, $AET_m$ = the monthly actual evapotranspiration.
+$AET_m = min(D_m,S_m)$
+with $D_m = PET_m – Pi_m$ where $Pi_m$ is the the intercepted precipitation.
+and $S_m = cw *SWC_m/SWC_{max}$ where $cw$ is a parameter denoting the maximum evapotranspiration from a saturated soil under conditions of high demand (as in Bugmann & Cramer 1998 we assume that $cw$ = 12 cm/month).
+
+$Pi_m$ and $Ps_m$ are computed as:
+$Pi_m = min(fi * P_m , PET_m)$
+with $fi$ = a parameter denoting the fraction of precipitation that is intercepted and is set at a value of 0.3 following Bugmann & Cramer (1998), and $P_m$ = the monthly precipitation.
+$Ps_m =  P_m - Pi_m$
+
+The water stress index was computed as 
+\begin{equation}  
+WS =   \frac{\sum_{m=1}^{12} AET_m}{\sum_{m=1}^{12} PET_m}
+\end{equation}
+
+### Maximum soil water content
+
+$SWC_{max}$ was computed following (Piedallu et al. 2011) as 
+\begin{equation} 
+SWC_{max} =  (1-RO)\times (\sum_{i=1}^n {(1-{\sqrt{SC_i}}^3) \times (\theta_i^{2.0} - \theta_i^{4.2}) \times T_i}) 	
+\end{equation}
+with $n$ the number of horizons in the soil profile. $SC_i$ is the stone proportional content in horizon $i$, $\theta_i^2.0$ and $\theta_i^4.2$ are the water content at respectively  -100 hPa and -15000hPa matric potential of horizon $i$ (according to Al Majou et al. 2008), $T_i$ is the thickness of the horizon $i$ in millimeters and $RO$ is the proportion of rock outcrop recoded for the plot. 
+
+
+# Matching of different years
+
+See R script READ.DATA.NFI.R
+
+## Alive tree
+
+(@) In 2005 and 2006 the Variable $veget$ had either the value 0 no damaged or $Z$ damaged.  From 2007 the damaged have been recorded in the variable $acci$ with the value 0 for no damage and 1 to 5 for different type of damage. A variable $vege4 with value 0 no damage or 1 damage have been created for all year.
+
+(@) Variable $orir$ give the origin of the tree: recruit from seed (1) or from resprouting (0 only in 2005 and 2006 - but 0 for resprout and 2 for resprout from wind thrown tree from 2007 and onward). 
+
+(@) Variable $simplif$ show which the tree that were simplified only after 2009.
+
+(@) Variables $sfgui$ $sfgeliv$ $sfpied$ $sfdorge$ $sfcoeur$ were provided only after 2009.
+
+
+Note that radial growth was not measured for all tree in a plot. If the number of tree of given species and a given size class ($C_{130}$ classes 23.5-70.5, 70.5-117.5, 117.5-164.5, >164.5cm) is greater than 6, the radial growth is measured only on 6 individuals. 
+
+## Dead tree
+
+(@) Before 2008 only $C_0$ circumference at the base of the tree was provided for the dead tree, not $C_{130}$ circumference at 1m30. Using data from the previous French NFI in the Alps and the Jura (see Kunstler et al. 2011 for a description of the data) that was recording both $C_0$ and $C_{130}$ we fitted a RMA linear regression (with package $lmodel2$ in R) between $C_0$ and $C_{130}$ and then predicted the $C_{130}$ for each dead individual from 2005 to 2007. Note that the species description was not as accurate in the previous NFI data (code with only number grouping species together for rare species) so the prediction have been done at this taxonomic levels and when no data was available the model was fitted with all species together.
+
+(@) The variable $datemort$ providing an estimation of the date of death was no recored before 2008.
+
+
+Then data for alive and data for dead tree were merged.
+
+## Plot data
+
+(@) The variable $plisi$ occurrence of an edge was not recorded in 2005 and 2006.
+
+(@) The variable $sfo$ stand structure was not recorded in 2005.
+
+(@) The variable $incid$ occurrence of a disturbance was not recorded before 2009.
+
+## Ecological data
+
+We only use the pedological variables. The was no changes in variables between years for the variables we used (soil description).
+
+
+
+# References
+- Al Majou, H., Bruand, A., Duval, O., (2008) The use of in situ volumetric water content at field capacity to improve the prediction of soil water retention properties. Canadian Journal of Soil Science, **88**, 533-541.
+- Bugmann, H. & Cramer, W. (1998) Improving the behaviour of forest gap models along drought gradients. Forest Ecology and Management,**103**, 247-263.
+- Kunstler, G., Albert, C.H., Courbaud, B., Lavergne, S., Thuiller, W., Vieilledent, G., Zimmermann, N.E., Coomes, D.A. (2011) Effects of competition on tree radial-growth vary in importance but not in intensity along climatic gradients. Journal of Ecology, **99**, 300–312.
+- Piedallu, C., and Gegout, G. (2008) Efficient Assessment of Topographic Solar Radiation to Improve Plant Distribution Models. Agricultural and Forest Meteorology, **148**, 1696–1706. 
+- Piedallu, C., J. C. Gégout, A. Bruand, & Seynave, I. (2011) Mapping Soil Water Holding Capacity over Large Areas to Predict Potential Production of Forest Stands. Geoderma, **160**, 355–366.
+- Piedallu, C., J. C. Gégout, V. Perez, F. Lebourgeois, and Field, R. (2012) Soil Water Balance Performs Better Than Climatic Water Variables in Tree Species Distribution Modelling. Global Ecology and Biogeography, **22**, 478-482.
+- Plummer, M. (2003) JAGS: A program for analysis of Bayesian graphical models using Gibbs sampling. In Proceedings of the 3rd International Workshop on Distributed Statistical Computing (DSC 2003). March, pp. 20–22.
+- Turc, L. (1961) Evaluation des besoins en eau d’irrigation, évapotranspiration potentielle. Annales Agronomiques, **12**, 13-49.
+
diff --git a/read.data.Canada.R b/read.data.Canada.R
index 885c744ebbbf2f46716218402561d7c786d315a2..24f157647c5719d49485642f64453bea9b6fdb2b 100644
--- a/read.data.Canada.R
+++ b/read.data.Canada.R
@@ -1,19 +1,14 @@
 ##########################
 ##########################
-### READ the funny format of the data
+### READ CANADA DATA
 
-##  read fiel per lines and remove "------" and leading and trailling "|  " "   |"
-texts <- grep("^[^-]",readLines("./data/raw/DataCanada/Canada_data2George.csv"),value=TRUE)
-texts <- sub("^[|] +","",texts)
-texts <- sub(" +[|]$","",texts)
-writeLines(texts,con="./data/raw/DataCanada/Canada_data2George_cleaned.txt")
+data.canada <- read.csv("./data/raw/DataCanada/Canada_Data2George_20130808.csv",header=TRUE,stringsAsFactors =FALSE)
+head(data.canada)
+dim(data.canada)
+plot(data.canada[["Lon"]],data.canada[["Lat"]])
 
-## read data cleaned
-data.Canada <- read.table("./data/raw/DataCanada/Canada_data2George_cleaned.txt",sep="|",header=TRUE,stringsAsFactors =FALSE)
-head(data.Canada)
-plot(data.Canada[["Lon"]],data.Canada[["Lat"]],cex=0.3)
-
-## plots
-length(table(data.Canada[["All_Trees_PlotID"]]))
-## plots and subplots
-length(table(paste( data.Canada[["All_Trees_PlotID"]],data.Canada[["SubPlotID"]])))
+## plot on world map
+library(rworldmap)
+newmap <- getMap(resolution = "coarse")  # different resolutions available
+plot(newmap)
+points(data.canada[["Lon"]],data.canada[["Lat"]],cex=0.2,col="red")