An error occurred while loading the file. Please try again.
-
Guillaume Perréal authored1540157a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of PHYMOBAT 3.0.
# Copyright 2016 Sylvio Laventure (IRSTEA - UMR TETIS)
#
# PHYMOBAT 3.0 is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# PHYMOBAT 3.0 is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with PHYMOBAT 3.0. If not, see <http://www.gnu.org/licenses/>.
import os, sys, glob, re, time
import math, subprocess, json
import urllib.request
import tarfile, zipfile
import requests
try :
import ogr
except :
import osgeo.ogr as org
import numpy as np
import lxml
from collections import defaultdict, UserDict
import Constantes, Satellites, Outils
import RasterSat_by_date
class Archive():
"""
Class to list, download and unpack Theia image archive because of a shapefile (box).
This shapefile get extent of the area.
:param captor: Name of the satellite (ex: Landsat or SpotWorldHeritage ...).
Name used to the url on website Theia Land
:type captor: str
:param list_year: Processing's year (string for one year)
:type list_year: list of str
:param box: Path of the study area
:type box: str
:param folder: Path of the source folder
:type folder: str
:param repertory: Name of the archive's folder
:type repertory: str
"""
def __init__(self, captor, list_year, box, repertory, proxy_enabled):
"""
Create a new 'Archive' instance
"""
self._captor = captor
self._list_year = list_year.split(";")
self._box = box
self._repertory = repertory
self._proxy_enabled = proxy_enabled
# Archive's list (two dimensions) :
# 1. List of the website path archives
# 2. List of the local path archives
self.list_archive = []
self.logger = Outils.Log("Log", "archive")
self.server = Satellites.SATELLITE[self._captor]["server"]
self.resto = Satellites.SATELLITE[self._captor]["resto"]
self.token_type = Satellites.SATELLITE[self._captor]["token_type"]
self.url = '' # str : complete website JSON database
self.list_img = [] # List (dim 5) to get year, month, day, path of multispectral's images and path of cloud's images
self.single_date = [] # date list without duplication
def set_list_archive_to_try(self, few_list_archive):
"""
Test function to download a few archives
:param few_list_archive: [archive_download, out_archive] with :
* archive_dowload : Archives downloaded
* out_archive : Output archives path
:type few_list_archive: list dimension 2
"""
_few_list_archive = np.array(few_list_archive)
# Verify list informations
if _few_list_archive.ndim < 2:
self.logger.info('Few information in the list')
else:
self.list_archive = few_list_archive
def utm_to_latlng(self, zone, easting, northing, northernHemisphere=True):
"""
Function to convert UTM to geographic coordinates
:param zone: UTM zone
:type zone: int
:param easting: Coordinates UTM in x
:type easting: float
:param northing: Coordinates UTM in y
:type northing: float
:param northernHemisphere: North hemisphere or not
:type northernHemisphere: boolean
:returns: tuple -- integer on the **longitude** and **latitude**
Source : http://www.ibm.com/developerworks/java/library/j-coordconvert/index.html
"""
if not northernHemisphere:
northing = 10000000 - northing
a = 6378137
e = 0.081819191
e1sq = 0.006739497
k0 = 0.9996
arc = northing / k0
mu = arc / (a * (1 - math.pow(e, 2) / 4.0 - 3 * math.pow(e, 4) / 64.0 - 5 * math.pow(e, 6) / 256.0))
ei = (1 - math.pow((1 - e * e), (1 / 2.0))) / (1 + math.pow((1 - e * e), (1 / 2.0)))
ca = 3 * ei / 2 - 27 * math.pow(ei, 3) / 32.0
cb = 21 * math.pow(ei, 2) / 16 - 55 * math.pow(ei, 4) / 32
cc = 151 * math.pow(ei, 3) / 96
cd = 1097 * math.pow(ei, 4) / 512
phi1 = mu + ca * math.sin(2 * mu) + cb * math.sin(4 * mu) + cc * math.sin(6 * mu) + cd * math.sin(8 * mu)
n0 = a / math.pow((1 - math.pow((e * math.sin(phi1)), 2)), (1 / 2.0))
r0 = a * (1 - e * e) / math.pow((1 - math.pow((e * math.sin(phi1)), 2)), (3 / 2.0))
fact1 = n0 * math.tan(phi1) / r0
_a1 = 500000 - easting
dd0 = _a1 / (n0 * k0)
fact2 = dd0 * dd0 / 2
t0 = math.pow(math.tan(phi1), 2)
Q0 = e1sq * math.pow(math.cos(phi1), 2)
fact3 = (5 + 3 * t0 + 10 * Q0 - 4 * Q0 * Q0 - 9 * e1sq) * math.pow(dd0, 4) / 24
fact4 = (61 + 90 * t0 + 298 * Q0 + 45 * t0 * t0 - 252 * e1sq - 3 * Q0 * Q0) * math.pow(dd0, 6) / 720
lof1 = _a1 / (n0 * k0)
lof2 = (1 + 2 * t0 + Q0) * math.pow(dd0, 3) / 6.0
lof3 = (5 - 2 * Q0 + 28 * t0 - 3 * math.pow(Q0, 2) + 8 * e1sq + 24 * math.pow(t0, 2)) * math.pow(dd0, 5) / 120
_a2 = (lof1 - lof2 + lof3) / math.cos(phi1)
_a3 = _a2 * 180 / math.pi
latitude = 180 * (phi1 - fact1 * (fact2 + fact3 + fact4)) / math.pi
if not northernHemisphere:
latitude = -latitude
longitude = ((zone > 0) and (6 * zone - 183.0) or 3.0) - _a3
return (longitude, latitude)
def coord_box_dd(self):
"""
Function to get area's coordinates of shapefile
:returns: str -- **area_coord_corner** : Area coordinates corner
--> Left bottom on x, Left bottom on y, Right top on x, Right top on y
:Example:
>>> import Archive
>>> test = Archive(captor, list_year, box, folder, repertory, proxy_enabled)
>>> coor_test = test.coord_box_dd()
>>> coor_test
'45.52, 2.25, 46.71, 3.27'
"""
# Processus to convert the UTM shapefile in decimal degrees shapefile with ogr2ogr in command line
utm_outfile = os.path.split(self._box)[0] + '/UTM_' + os.path.split(self._box)[1]
if os.path.exists(utm_outfile):
os.remove(utm_outfile)
process_tocall = ['ogr2ogr', '-f', 'Esri Shapefile', '-t_srs', 'EPSG:32631', utm_outfile, self._box]
subprocess.call(process_tocall)
# To get shapefile extent
# Avec import ogr
driver = ogr.GetDriverByName('ESRI Shapefile')
# Open shapefile
data_source = driver.Open(utm_outfile, 0)
if data_source is None:
self.logger.error('Could not open file')
sys.exit(1)
shp_ogr = data_source.GetLayer()
# Extent
extent_ = shp_ogr.GetExtent() # (xMin, xMax, yMin, yMax)
## Close source data
data_source.Destroy()
# Coordinates min in decimal degrees
LL_min = self.utm_to_latlng(31, extent_[0], extent_[2], northernHemisphere=True)
# Coordinates max in decimal degrees
LL_max = self.utm_to_latlng(31, extent_[1], extent_[3], northernHemisphere=True)
# AdressUrl = 'http://spirit.cnes.fr/resto/Landsat/?format=html&lang=fr&q=2013&box=' + str(LL_Min[0]) + ',' + str(LL_Min[1]) + ',' + str(LL_Max[0]) + ',' + str(LL_Max[1])
area_coord_corner = str(LL_min[0]) + ',' + str(LL_min[1]) + ',' + str(LL_max[0]) + ',' + str(LL_max[1])
return area_coord_corner
def listing(self):
"""
Function to list available archive on plateform Theia Land, and on the area
"""
# Loop on the years
self.logger.info("Images availables")
for year in self._list_year:
first_date = year.split(',')[0]
# Tricks to put whether a year or a date (year-month-day)
try:
end_date = year.split(',')[1]
self.logger.info("=============== {0} to {1} ===============".format(first_date, end_date))
self.url = "{0}/{1}/api/collections/{2}/search.json?lang=fr&_pretty=true&completionDate={3}&box={4}&maxRecord=500&startDate={5}".format(self.server, self.resto, self._captor, end_date, self.coord_box_dd(), first_date)
except IndexError:
self.logger.info("=============== {0} ===============".format(first_date))
self.url = "{0}/{1}/api/collections/{2}/search.json?lang=fr&_pretty=true&q={3}&box={4}&maxRecord=500".format(self.server, self.resto, self._captor, year, self.coord_box_dd())
# Initialisation variable for a next page
# There is a next page, next = 1
# There isn't next page, next = 0
next_ = 1
# To know path to download images
while next_ == 1:
try :
request_headers = {"User-Agent": "Magic-browser"}
req = urllib.request.Request(self.url, headers = request_headers) # Connexion in the database
data = urllib.request.urlopen(req).read() # Read in the database
new_data = re.sub(b"null", b"'null'", data) # Remove "null" because Python don't like
new_data = re.sub(b"false", b"False", new_data) # Remove "false" and replace by False (Python know False with a capital letter F)
# Transform the data in dictionary
data_Dict = defaultdict(list)
data_Dict = UserDict(eval(new_data))
# Select archives to download
for d in range(len(data_Dict['features'])):
name_archive = data_Dict['features'][d]['properties']['productIdentifier']
feature_id = data_Dict["features"][d]['id']
link_archive = data_Dict['features'][d]['properties']['services']['download']['url'].replace("\\", "")
url_archive = link_archive.replace(self.resto, "rocket/#")
archive_download = url_archive.replace("/download", "") # Path to download
out_archive = "{0}/{1}.tgz".format(self._repertory, name_archive)# Name after download
# if len(self.list_archive) == 0 : self.list_archive.append([archive_download, out_archive, feature_id])
self.list_archive.append([archive_download, out_archive, feature_id])
# Verify if there is another page (next)
if data_Dict['properties']['links'][len(data_Dict['properties']['links'])-1]['title'] == 'suivant':
self.url = data_Dict['properties']['links'][len(data_Dict['properties']['links'])-1]['href'].replace("\\", "")
else:
next_ = 0
except Exception as e:
self.logger.error("Error connexion or error variable : {0}".format(e))
sys.exit(1)
self.logger.info("{0} image(s) matched the criteria.".format(len(self.list_archive)))
return len(self.list_archive)
def download_auto(self, user_theia, password_theia):
"""
Function to download images archive automatically on Theia land data center.
Source : https://github.com/olivierhagolle/theia_download
:param user_theia: Username Theia Land data center
:type user_theia: str
:param password_theia: Password Theia Land data center
:type password_theia: str
"""
#============================================================
# get a token to be allowed to bypass the authentification.
# The token is only valid for two hours. If your connection is slow
# or if you are downloading lots of products
#=============================================================
proxyDict = {
"http" : "{0}".format(self._proxy_enabled.proxy), \
"https" : "{0}".format(self._proxy_enabled.proxy), \
"ftp" : "{0}".format(self._proxy_enabled.proxy) \
}
url = "{0}/services/authenticate/".format(self.server)
payload = {'ident' : "{0}".format(user_theia), 'pass' : "{0}".format(password_theia)}
reponse = requests.post(url, data=payload, proxies=proxyDict)
try:
self.logger.debug(reponse.json())
token = reponse.json()["access_token"]
except Exception as e:
self.logger.error("Error during the identification request")
sys.exit(-1)
#====================
# Download
#====================
# Loop on list archive to download images
head = {"Authorization": "Bearer {0}".format(token)}
for d in range(len(self.list_archive)):
# Download if not exist
if not os.path.exists(self.list_archive[d][1]):
self.logger.info("Téléchargement : {0}".format(os.path.basename(self.list_archive[d][1])))
url = "{0}/{1}/collections/{2}/{3}/download/?issuerId=theia".format(self.server, self.resto, self._captor, self.list_archive[d][2])
reponse = requests.get(url, headers=head, proxies=proxyDict)
with (open(self.list_archive[d][1], "wb")) as fichier :
fichier.write(reponse.content)
self.logger.info("All images have been downloaded !")
def decompress(self):
"""
Function to unpack archives and store informations of the images (date, path, ...)
"""
self.logger.info("Unpack archives")
for annee in self._list_year:
first_date = annee.split(',')[0]
# Tricks to put whether a year or a date (year-month-day)
try:
end_date = annee.split(',')[1]
self.logger.info("=============== {0} to {1} ===============".format(first_date, end_date))
except IndexError:
self.logger.info ("=============== {0} ===============".format(first_date))
img_in_glob = sorted(glob.glob("{0}/*gz".format(self._repertory)))
if not img_in_glob :
self.logger.error("There isn't tgzfile in the folder")
sys.exit(1)
else:
# Create a folder "Unpack"
folder_unpack = "{0}/Unpack".format(self._repertory)
if not os.path.isdir(folder_unpack):
os.mkdir(folder_unpack)
for img in img_in_glob:
# Unpack the archives if they aren't again!
if self._captor == 'Landsat':
out_folder_unpack = folder_unpack + '/' + os.path.split(img)[1][:len(os.path.split(img)[1])-4]
if not os.path.exists(out_folder_unpack):
self.logger.info('Unpack :'+os.path.split(img)[1])
tfile = tarfile.open(img, 'r:gz')
tfile.extractall(str(folder_unpack))
# On xmlfile, extract dates, path of images, cloud's images
xmlfile = lxml.etree.parse(str(out_folder_unpack) + '/' + os.path.split(img)[1][:len(os.path.split(img)[1])-4] + '.xml')
# Date images
# Exemple : '2015-09-27 10:41:25.956749'
# To get year, month and day ---> .split(' ')[0].split('-')
di = xmlfile.xpath("/METADATA/HEADER/DATE_PDV")[0].text.split(' ')[0].split('-')
# Multispectral images
hi = out_folder_unpack + '/' + xmlfile.xpath("/METADATA/FILES/ORTHO_SURF_CORR_ENV")[0].text
# Cloud images
# Cloud's path not exists in xmlfile, then replace 'SAT' by 'NUA'
ci = out_folder_unpack + '/' + xmlfile.xpath("/METADATA/FILES/MASK_SATURATION")[0].text.replace('_SAT', '_NUA')
elif self._captor == 'SENTINEL2':
tzip = zipfile.ZipFile(img)
# Extract band 2 (Blue), 3 (Green), 4 (Red), and 8 (NIR) at surface reflectance 10m, XML file, Cloud mask at 10m
extent_img = ['_SRE_B2.tif', '_SRE_B3.tif', '_SRE_B4.tif', '_SRE_B8.tif', '_MTD_ALL.xml', '_CLM_R1.tif']
zip_img = tzip.namelist() # Images list in the archive
zip_folder = zip_img[0].split('/')[0]
out_folder_unpack = folder_unpack + '/' + zip_folder
self.logger.info('Unpack :' + os.path.split(zip_folder)[1])
extraction_img = []
for e in extent_img:
z_out = [f for f in zip_img if e in f][0]
extraction_img.append(folder_unpack + '/' + str(z_out))
if not os.path.exists(folder_unpack + '/' + str(z_out)):
self.logger.info('Extract :%s' %(z_out))
tzip.extract(str(z_out), str(folder_unpack))
# On xmlfile, extract dates, path of images, cloud's images
xmlfile = lxml.etree.parse(str(extraction_img[4]))
# Date images
di = xmlfile.xpath("/Muscate_Metadata_Document/Product_Characteristics/ACQUISITION_DATE")[0].text.split('T')[0].split('-')
# Multispectral images
hi = out_folder_unpack + '/' + xmlfile.xpath("/Muscate_Metadata_Document/Product_Characteristics/PRODUCT_ID")[0].text + '.tif'
if not os.path.exists(hi):
# For Sentinel2 from Theia plateform, we need to make a stack layer for rasters
# There is a function that make this in RasterSat_by_date class
stack_img = RasterSat_by_date.RasterSat_by_date('', '', [0])
input_stack = extraction_img[:4]
input_stack.insert(0,'-separate')
stack_img.vrt_translate_gdal('vrt', input_stack, hi[:-4] + '.VRT')
stack_img.vrt_translate_gdal('translate', hi[:-4] + '.VRT', hi)
# Cloud images
ci = out_folder_unpack + '/' + xmlfile.xpath("/Muscate_Metadata_Document/Product_Organisation/Muscate_Product/Mask_List/Mask/Mask_File_List/MASK_FILE")[2].text
self.list_img.append([di[0], di[1], di[2], str(hi), str(ci)])
# Create a list with dates without duplicates
if not di in self.single_date:
self.single_date.append(di)
self.logger.info("End of unpack archives.")