En raison du déménagement des baies serveurs, les services gitlab.irstea.fr et mattermost.irstea.fr seront interrompus le samedi 2 octobre 2021 au matin. Ils devraient revenir à la normale dans la journée.

Commit 14746c75 authored by Harmel Tristan's avatar Harmel Tristan
Browse files

change figure output

test application on large data set with post processing
parent 6dffd8d5
......@@ -2,11 +2,8 @@
import sys, os
import re
import datetime as dt
import julian
dublin_julian_day = 2415020
import pandas as pd
djd = 42349.44914
jd = djd + dublin_julian_day
dt = julian.from_jd(jd)
print(dt)
\ No newline at end of file
excel_jd = [42877.57571]# 42349.44914
date = pd.TimedeltaIndex(excel_jd, unit='d') + dt.datetime(1899,12,30)
print(date)
\ No newline at end of file
import os
import numpy as np
import pandas as pd
import fiona
import geopandas as gpd
# Enable fiona driver
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
import glob
import re
import matplotlib as mpl
import matplotlib.pyplot as plt
import subprocess
from multiprocessing import Pool
import plotly.graph_objects as go
import plotly.express as px
import plotly.offline as po
idir = '/DATA/OBS2CO/data/rogerio/data'
coordf = os.path.join(idir, '../metadata', 'Coordenadas_Rrs__2016_2019.xlsx')
infofile = os.path.join(idir, '../metadata/datainfo.csv')
L1dir = os.path.join(idir, 'L1')
coords = pd.read_excel(coordf)
idirs = glob.glob(L1dir + '/2*[0-9]')
idirs.sort()
def get_meta(file):
meta = pd.read_csv(file, sep='=', nrows=18, header=None).set_index(0).T
meta.columns = meta.columns.str.replace(' ', '')
return meta
def generate_database_info(idirs, infofile):
opb = os.path.basename
meta_attrs = ['%IDDevice', '%IntegrationTime', '%IDDataBack', '%IDDataCal', '%CalFactor']
df = pd.DataFrame(columns=np.concatenate([['ID', 'date', 'lat', 'lon', 'Ed_file', 'Lsky_file', 'Lt_file'],
['Ed' + s for s in meta_attrs],
['Lsky' + s for s in meta_attrs],
['Lt' + s for s in meta_attrs]]))
i = 0
# loop on directories (for each date)
for idir_ in idirs:
date = os.path.basename(idir_)
print(date)
kmlfs = glob.glob(idir_ + '/*.kml')
# loop on kml files (for each sample point)
for kmlf in kmlfs:
coord = gpd.read_file(kmlf, driver='KML')
ID, lon, lat = coord.Name.values[0], coord.geometry.x.values[0], coord.geometry.y.values[0]
ID_ = os.path.basename(kmlf).replace('.kml','')
if ID != ID_:
print(ID , ID_)
files = glob.glob(idir_ + '/*Ed*_' + ID_ + '*.mlb')
# loop on data files (for each acquisition sequence)
for Edf in files:
ID_ = Edf.split('_')[-1].replace('.mlb', '')
pattern = Edf.split('_')[0:-2]
try:
Lskyf = glob.glob(Edf.replace('Ed', 'Ld').replace(Edf.split('_')[-2], '*'))[0]
except:
print('problem with: ', date, ID_, pattern, Edf)
continue
try:
Ltf = glob.glob(Edf.replace('Ed', 'Lu').replace(Edf.split('_')[-2], '*'))[0]
except:
print('problem with: ', date, ID_, pattern, Edf)
continue
# read metadata from files
Edmeta, Lskymeta, Ltmeta = get_meta(Edf), get_meta(Lskyf), get_meta(Ltf)
# save into dataframe df
info = np.concatenate([[ID_, date, lat, lon, opb(Edf), opb(Lskyf), opb(Ltf)], \
Edmeta[meta_attrs].values[0], Lskymeta[meta_attrs].values[0],
Ltmeta[meta_attrs].values[0]])
df.loc[i] = info
i += 1
# warning
# 20170123/aw_Lu_SAM84EC_45a.mlb is missing
df.to_csv(infofile, index=False)
def plot_map(coords,datafile=''):
''' Plot for band 'wl' and 'method' '''
wl=659
wl=860
method='M99'
if datafile=='':
datafile = os.path.join(idir, 'L2/all/Rrs_stat.csv')
df = pd.read_csv(datafile, parse_dates=True)
df['datetime'] = df['date']
df['date'] = pd.DatetimeIndex(df.date).normalize()
df.set_index(['date','ID'],inplace=True)
df['latitude']=np.nan
df['longitude']=np.nan
info = pd.read_csv(infofile, index_col=[1,0], parse_dates=True)
for idx, info_ in info.iterrows():
print(idx)
df.loc[idx,'longitude']=info_.lon
df.loc[idx,'latitude']=info_.lat
df.reset_index(inplace=True)
df['month'] = df['date'].dt.month
df['date'] = df['date'].dt.strftime('%Y-%m-%d')
df_ = df.loc[(df.wl == wl) & (df.method == method)]
df_['size'] = 8
fig = px.scatter_mapbox(df_, lat="latitude", lon="longitude", color='0.5', hover_name="ID",
hover_data=["date",'method'],
size='size',
range_color=[0,df_['0.5'].max()*0.7],
animation_frame='month',
color_continuous_scale=px.colors.cyclical.IceFire,
zoom=5.5, opacity=0.45, height=900, width=1100,
title='Sample points from Rogerio; Rrs at '+str(wl)+' nm')
# fig.update_traces(marker=dict(line_width=2),selector=dict(mode='markers'))
fig.update_traces(
line=dict(
width=0.5,
), )
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(title_font_size=24,
# mapbox=go.layout.Mapbox(zoom=6,pitch=10),
mapbox_style="white-bg",
mapbox_layers=[
{
"below": 'traces',
"sourcetype": "raster",
"source": [
"https://services.arcgisonline.com/arcgis/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}"
]
},
])
# fig.update_layout(margin={"r":10,"t":0,"l":10,"b":0})
po.plot(fig, filename=os.path.join(idir, '../fig/map_rogerio_Rrs'+str(wl)+'.html'))
def call(command):
print(command)
cp = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
if cp.returncode != 0 :
print('ERROR for ',command.split(' ')[1:3])
with open('err.txt','a') as ferr:
ferr.write(command)
return
def process(infofile, method = 'M99', ncore=10):
odir = '/DATA/OBS2CO/data/rogerio/data/L2'
figdir = '/DATA/OBS2CO/data/rogerio/fig/L2'
info = pd.read_csv(infofile)
utc_conv = '4'
command = []
for i, info_ in info.iterrows():
ID = str(info_.ID)
date = str(info_.date)
print(date, ID)
odir_ = os.path.join(odir, method, date)
if not os.path.exists(odir_):
os.makedirs(odir_)
lat, lon = str(info_.lat), str(info_.lon)
Edf, Lskyf, Ltf = info_.Ed_file, info_.Lsky_file, info_.Lt_file
command.append('trios_processing ' + os.path.join(L1dir, date) + ' ' + ID + \
' awr --lat ' + lat + ' --lon ' + lon + ' --name _' + method + \
' --data_files "' + Edf + ' ' + Lskyf + ' ' + Ltf + \
'" --odir ' + odir_ + ' --utc_conv=' + utc_conv + \
' --method ' + method + ' --plot --figdir ' + figdir + ' --format mlb')
with Pool(processes=ncore) as pool:
print(pool.map(call, command))
pool.close
def post_process(create_stat_file=False):
idir = '/DATA/OBS2CO/data/rogerio/data/L2'
stat_file= os.path.join(idir, 'all/Rrs_stat.csv')
if create_stat_file:
odf =pd.DataFrame()
for method in ['M99','osoaa','temp_opt']:
files = glob.glob(os.path.join(idir, method)+'/2*[0-9]/Rrs*csv')
for file in files:
info=file.split('_')
ID = info[-2].replace('idpr','')
if method == 'temp_opt':
ID = info[-3].replace('idpr','')
df = pd.read_csv(file, header=[0, 1], index_col=0, parse_dates=True)
date = df.index.mean()
Rrs = df.Rrs.quantile([0.25,0.5,0.75]).T
wl=Rrs.index.values
Rrs.index = pd.MultiIndex.from_product([[date],[ID], [method],wl],names=['date','ID','method','wl'])
odf = pd.concat([odf,Rrs])
# save data file
odf.to_csv(stat_file)
df = pd.read_csv(stat_file, index_col=[0], parse_dates=True)
df.sort_values(['date','ID','method','wl'],inplace=True)
import matplotlib.pyplot as plt
ncols=4
plt.rcParams.update({'font.size': 18})
obj = df.groupby(['date','ID'])
N = obj.groups.__len__()
rows = N // ncols + (1 if N % ncols else 0)
aspect_ratio = 1 * ncols
fig, axs = plt.subplots(nrows=rows, ncols=ncols, figsize=(20,rows * aspect_ratio))
fig.subplots_adjust(left=0.1, right=0.9, hspace=.5, wspace=0.45)
c = dict(M99='red',osoaa='black',temp_opt='blue')
from trios.utils.utils import plot as up
from trios.utils import utils as u
from trios.process import *
from trios import __package__, __version__
for (idx,group),ax in zip(obj,axs.flatten()):
print(group.ID.unique())
type_list = {'awr': 'aw', 'iwr': 'uw', 'swr': 'Lu0+'}
type_description = {'awr': 'Above-Water Radiometry', 'iwr': 'In-Water Radiometry',
'swr': 'Surface-Water Radiometry'}
for method, g in group.groupby('method'):
ax.plot(g.wl,g['0.5'],label=method, color=c[method])
ax.fill_between(g.wl,g['0.25'],g['0.75'], alpha=0.35, color=c[method])
ax.legend(loc='best', frameon=False)
ax.set_xlabel(r'Wavelength (nm)')
ax.set_title('ID '+idx[1]+', '+idx[0].date().__str__())
plt.savefig('/DATA/OBS2CO/data/rogerio/fig/compare_method.pdf', bbox_inches='tight')
plt.close()
df,Lskyf,Ltf = [os.path.join(idir,f) for f in filenames.split(' ')]
print('process files Ed: '+ Edf+', Lsky: '+Lskyf+', Lt: '+Ltf )
uawr = u.awr_data(idpr, Edf=Edf, Lskyf=Lskyf, Ltf=Ltf)
if uawr.Edf:
df, wl = uawr.reader(lat, lon, alt)
date = df.index[0].date().__str__()
if ofile:
ofile = os.path.join(odir, ofile)
else:
ofile = os.path.join(odir, 'Rrs_awr_' + date + '_idpr' + idpr + name + '.csv')
# ----------------------------
# to execute
# ----------------------------
if noclobber and os.path.exists(ofile):
print('Skip processing: data already processed with "--no_clobber" set')
return
# plot_map(coords)
# generate_database_info(idirs,infofile)
method = 'M99'
process(infofile, method=method, ncore=10)
if plot:
figfile = os.path.join(figdir, 'trios_awr_' + date + '_idpr' + idpr + name + '.png')
else:
figfile = ""
for method in ['M99','osoaa','temp_opt']:
process(infofile, method=method, ncore=10)
awr = awr_process(df, wl, name, idpr)
Rrs = awr.call_process(method, ofile, vza=vza, azi=azi, ws=ws, aot=aot, plot_file=figfile)
\ No newline at end of file
post_process(create_stat_file=True)
\ No newline at end of file
......@@ -2,7 +2,7 @@
Usage:
trios_processing <input_dir> <IDpr> <measurement_type> --lat <lat> --lon <lon> \
[--data_files=<filenames>] \
[--data_files=<filenames>] [--format=<file_format>] \
[--altitude=alt] [--ofile <ofile>] [--odir <odir>] [--plot] [--figdir <figdir>] \
[--name <name>] [--method <method>] [--no_clobber] \
[--vza <vza>] [--azi <azi>] [--ws <ws>] [--aot <aot>] [--utc_conv <hours>]
......@@ -16,6 +16,11 @@ Options:
<input_dir> Input directory where the trios.csv files are located
<IDpr> IDpr label of the measurement sequence to process
<measurement_type> awr, swr, iwr
--data_files <filenames> File names (space separated) of the files to be processed in <input_dir>
--format <file_format> File suffix which can be recognized
('csv': for pre processed files given in csv format;
'mlb': for files given by Trios software)
[default: csv]
--lat <lat> Latitude in decimal degrees
--lon <lon> Longitude in decimal degrees
......@@ -67,6 +72,7 @@ def main():
idir = os.path.abspath(args['<input_dir>'])
idpr = args['<IDpr>']
filenames = args['--data_files']
file_format = args['--format']
meas_type = args['<measurement_type>']
method = args['--method']
lat = float(args['--lat'])
......@@ -130,6 +136,7 @@ def main():
# -----------------------------------------------
# AWR processing
# -----------------------------------------------
if filenames is None:
uawr = u.awr_data(idpr, files)
else:
......@@ -137,8 +144,8 @@ def main():
print('process files Ed: '+ Edf+', Lsky: '+Lskyf+', Lt: '+Ltf )
uawr = u.awr_data(idpr, Edf=Edf, Lskyf=Lskyf, Ltf=Ltf)
if uawr.Edf:
df, wl = uawr.reader(lat, lon, alt, utc_conv=utc_conv)
try:
df, wl = uawr.reader(lat, lon, alt, utc_conv=utc_conv, file_format=file_format)
date = df.index[0].date().__str__()
if ofile:
ofile = os.path.join(odir, ofile)
......@@ -156,6 +163,8 @@ def main():
awr = awr_process(df, wl, name, idpr)
Rrs = awr.call_process(method, ofile, vza=vza, azi=azi, ws=ws, aot=aot, plot_file=figfile)
except:
raise('Problem for: '+ ofile)
elif meas_type == 'iwr':
# -----------------------------------------------
......
......@@ -135,66 +135,77 @@ class awr_process:
if plot_file:
# ------------------
# plotting
# plotting envelope
# ------------------
Ltm = Lt.mean(axis=0)
Edm = Ed.mean(axis=0)
def add_envelope(ax,wl,values,label='',**kwargs):
up.add_curve(ax, wl, values.mean(axis=0), label=label, c='black',**kwargs)
ax.fill_between(wl,np.quantile(values,0.05, axis=0),np.quantile(values,0.95, axis=0), alpha=0.25, color='grey')
ax.fill_between(wl,np.quantile(values,0.25, axis=0),np.quantile(values,0.75, axis=0), alpha=0.35, color='red')
mpl.rcParams.update({'font.size': 18})
fig, axs = plt.subplots(nrows=2, ncols=3, figsize=(20, 12))
fig.subplots_adjust(left=0.1, right=0.9, hspace=.5, wspace=0.45)
# ---- Ed
ax = axs[0, 0]
up.add_curve(ax, wl, Ed.mean(axis=0),
label=r'$L_{sky}$', c='red') # just to put the two labels
up.add_curve(ax, wl, Ed.mean(axis=0), Ed.std(axis=0),
label=r'$E_s$', c='black')
ax.set_ylabel(r'$E_{d}(0^{+})$')
add_envelope(ax,wl,Ed,label=r'$E_s$')
#up.add_curve(ax, wl, Ed.mean(axis=0), Ed.std(axis=0), label=r'$E_s$', c='black')
ax.set_ylabel(r'$E_{d}(0^{+})\ (mW\ m^{-2}\ nm^{-1})$')
ax.set_xlabel(r'Wavelength (nm)')
ax.legend(loc='best', frameon=False)
# ---- Lsky
ax2 = ax.twinx()
up.add_curve(ax2, wl, Lsky.mean(axis=0), Lsky.std(axis=0),
label=r'$L_{sky}$', c='red')
ax2.set_ylabel(r'$L_{sky}$', color='r')
ax2.tick_params('y', colors='r')
ax = axs[0, 1] #ax2 = ax.twinx()
add_envelope(ax,wl,Lsky,label=r'$L_{sky}$')
ax.set_ylabel(r'$L_{sky}\ (mW\ m^{-2}\ nm^{-1}\ sr^{-1})$')#, color='r')
#ax.tick_params('y', colors='r')
ax.set_xlabel(r'Wavelength (nm)')
ax.legend(loc='best', frameon=False)
# ---- Lt vs Lsurf
ax = axs[0, 1]
up.add_curve(ax, wl, Lt.mean(axis=0), Lt.std(axis=0),
label=r'$L_t$', c='black')
up.add_curve(ax, wl, Lsky.mean(axis=0) * rho, Lsky.std(axis=0) * rho,
label=method + ' (' + str(round(rho, 4)) + ')', c='violet')
ax.set_ylabel(r'$L_t\ or L_{surf}$')
ax = axs[0, 2]
add_envelope(ax,wl,Lt,label=r'$L_t$')
#up.add_curve(ax, wl, Lt.mean(axis=0), Lt.std(axis=0), label=r'$L_t$', c='black')
add_envelope(ax,wl,Lsky* rho,label=r'$L_{surf}$',linestyle=':')
# up.add_curve(ax, wl, Lsky.mean(axis=0) * rho, Lsky.std(axis=0) * rho,
# label=method + ' (' + str(round(rho, 4)) + ')', c='violet')
ax.set_ylabel(r'$L_t\ or\ L_{surf}\ (mW\ m^{-2}\ nm^{-1}\ sr^{-1})$')
ax.set_xlabel(r'Wavelength (nm)')
ax.legend(loc='best', frameon=False)
# ---- Proportion o(Lt - Lsurf ) /Lt
ax = axs[0, 2]
up.add_curve(ax, wl, Lsky.mean(axis=0) * rho / Ltm, Lsky.std(axis=0) * rho,
label=method + ' (' + str(round(rho, 4)) + ')', c='violet')
ax = axs[1, 0]
add_envelope(ax,wl,Lsky * rho / Ltm,label=r'$L_{surf}/L_t$')
# up.add_curve(ax, wl, Lsky.mean(axis=0) * rho / Ltm, Lsky.std(axis=0) * rho,
# label=method + ' (' + str(round(rho, 4)) + ')', c='violet')
ax.set_ylabel(r'$L_{surf}/L_t$')
ax.set_xlabel(r'Wavelength (nm)')
# ---- Lw
ax = axs[1, 0]
up.add_curve(ax, wl, Rrs.mean(axis=0) * Edm, Rrs.std(axis=0) * Edm,
label=method + ' (' + str(round(rho, 4)) + ')', c='violet')
ax = axs[1, 1]
add_envelope(ax,wl,Rrs* Edm)
# up.add_curve(ax, wl, Rrs.mean(axis=0) * Edm, Rrs.std(axis=0) * Edm,
# label=method + ' (' + str(round(rho, 4)) + ')', c='blue')
ax.set_ylabel(r'$L_{w}\ (sr^{-1})$')
ax.set_ylabel(r'$L_{w}\ (mW\ m^{-2}\ nm^{-1}\ sr^{-1})$')
ax.set_xlabel(r'Wavelength (nm)')
# ---- Rrs
ax = axs[1, 1]
up.add_curve(ax, wl, Rrs.transpose().mean(axis=1), Rrs.transpose().std(axis=1),
label=method + ' (' + str(round(rho, 4)) + ')', c='violet')
ax = axs[1, 2]
add_envelope(ax,wl,Rrs)
# up.add_curve(ax, wl, Rrs.transpose().mean(axis=1), Rrs.transpose().std(axis=1),
# label=method + ' (' + str(round(rho, 4)) + ')', c='blue')
ax.set_ylabel(r'$R_{rs}\ (sr^{-1})$')
ax.set_xlabel(r'Wavelength (nm)')
ax.set_title('azi=' + str(azi) + ', vza=' + str(vza) + ', sza=' + str(round(sza.mean(), 2)))
fig.suptitle('trios_awr ' + self.name + ' idpr' + self.idpr, fontsize=16)
fig.suptitle('trios_awr ' + self.name + ' idpr' + self.idpr, fontsize=18)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
fig.savefig(plot_file)
plt.close()
......
import pandas as pd
import datetime as dt
from scipy.interpolate import interp1d
from trios.utils.sunposition import sunpos
......@@ -28,7 +29,7 @@ class awr_data:
self.idpr = idpr
def reader(self, lat, lon, alt=0, name='', index_idx=[0], utc_conv=0):
def reader(self, lat, lon, alt=0, name='', index_idx=[0], utc_conv=0, file_format='csv'):
'''
Read above-water data files for a given acquisition series (idpr),
merge the different data types:
......@@ -48,13 +49,14 @@ class awr_data:
:return:
'''
df = pd.DataFrame()
# ''' read files with pandas format '''
d = data(index_idx)
Ed, wl_Ed = d.load_csv(self.Edf, utc_conv=utc_conv)
Lsky, wl_Lsky = d.load_csv(self.Lskyf, utc_conv=utc_conv)
Lt, wl_Lt = d.load_csv(self.Ltf, utc_conv=utc_conv)
d = data(index_idx, file_type=file_format)
Ed, wl_Ed = d.load_file(self.Edf, utc_conv=utc_conv)
Lsky, wl_Lsky = d.load_file(self.Lskyf, utc_conv=utc_conv)
Lt, wl_Lt = d.load_file(self.Ltf, utc_conv=utc_conv)
# ''' interpolate Ed, Lt and Lsky data upon common wavelength'''
wl = wl_common
......@@ -277,12 +279,20 @@ class fit:
class data:
def __init__(self, index_idx=[0]):
def __init__(self, index_idx=[0], file_type='csv'):
# first position should be datetime index
# followed by the other parameters used for indexing (e.g. azimuth, view angle)
self.index_idx = index_idx
self.file_type = file_type
pass
def load_file(self, file, utc_conv=0):
if self.file_type == 'csv':
return self.load_csv(file, utc_conv=utc_conv)
elif self.file_type == 'mlb':
return self.load_mlb(file, utc_conv=utc_conv)
def load_csv(self, file, utc_conv=0):
print(file)
# dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S') + pd.to_timedelta(utc_conv, 'h')
......@@ -309,6 +319,37 @@ class data:
wl = df.columns
return df, wl
def load_mlb(self, file, utc_conv=0):
print(file)
# dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S') + pd.to_timedelta(utc_conv, 'h')
if len(file) > 1 and not isinstance(file, str):
print('Warning! Multiple files found but only one expected, trios first file of the list:')
print(file)
file_ = file[0]
else:
file_ = file
# df = pd.read_csv(file, date_parser=dateparse, sep=';', index_col=0, na_values=['-NAN'])
header = pd.read_csv(file_, sep='\s+', skiprows=19, nrows=1)
df = pd.read_csv(file_, sep='\s+', na_values=['-NAN'], engine='python', skiprows=21, header=None)
df.columns = header.values[0]
# get list of indexes
col = self.index_idx[0]
# local to UTC conversion
df.iloc[:, col] = pd.TimedeltaIndex(df.iloc[:, col], unit='d') + dt.datetime(1899, 12, 30) \
+ pd.to_timedelta(utc_conv, 'h')
df.set_index(df.iloc[:, col], inplace=True)
# keep spectra/radiometric data only:
df = df.loc[:, df.columns.notnull()]
df = df.dropna(axis=1, how='all').dropna(axis=0, how='all')
df.columns = df.columns.astype('float') # str.extract('(\d+)',expand=False).astype('float')
# resort to get data in increasing time order
df.sort_index(inplace=True)
wl = df.columns
return df, wl
class reshape:
def __init__(self):
......@@ -333,8 +374,8 @@ class plot:
@staticmethod
def add_curve(ax, x, mean, std=None, c='red', label='', **kwargs):
ax.plot(x, mean, linestyle='solid', c=c, lw=2.5,
alpha=0.8, label=label, *kwargs)
ax.plot(x, mean, c=c, lw=2.5,
alpha=0.8, label=label, **kwargs)
if np.any(std):
ax.fill_between(x,
mean - std,
......