An error occurred while loading the file. Please try again.
-
Cédric Traizet authoredba09b3e7
library(geonapi)
library(geometa)
library(uuid)
library(stringr)
working_dir = getwd()
# Read params
datasets <- read.csv(file=paste0(working_dir, "/params/datasets.csv"), sep = ",")
csv_error <- data.frame()
for (dataset in datasets$n){
print(paste0("Working on: ", datasets$Title.Identifier..if.no.DOI.available.[dataset]))
error_bool = FALSE
if(datasets$Description.of.the.dataset.Abstract[dataset] == ""){# where there is no abstract
error <- data.frame(
n = dataset,
uuid = datasets$uuid[dataset],
title = datasets$Title.Identifier..if.no.DOI.available.[dataset],
error = 'missing abstract',
contact = datasets$Producer[dataset],
error_level = 'error'
)
error_bool = TRUE
csv_error <- rbind(csv_error, error)
}
if (datasets$Producer[dataset] %in% c("mundialis")){# we harvest them
error <- data.frame(
n = dataset,
uuid = datasets$uuid[dataset],
title = datasets$Title.Identifier..if.no.DOI.available.[dataset],
error = 'already harvested',
contact = datasets$Producer[dataset],
error_level = 'error'
)
error_bool = TRUE
csv_error <- rbind(csv_error, error)
}
if (datasets$Link..html..to.an.image.logo.figure.representing.the.database[dataset] == ""){
error <- data.frame(
n = dataset,
uuid = datasets$uuid[dataset],
title = datasets$Title.Identifier..if.no.DOI.available.[dataset],
error = 'missing thumbnail',
contact = datasets$Producer[dataset],
error_level = 'warning'
)
csv_error <- rbind(csv_error, error)
}
if(error_bool == FALSE){ #skip when error
metadata_id <- datasets$uuid[dataset]
##Création métadonnée
md = ISOMetadata$new()
metadata_id=paste(metadata_id)
md$setFileIdentifier(metadata_id)
md$setCharacterSet("utf8")
md$setMetadataStandardName("ISO 19115:2003/19139")
md$setLanguage("eng")
md$setDateStamp(Sys.time())
# md$setHierarchyLevel("dataset")
md$setHierarchyLevel(tolower(paste(datasets$Dataset.or.Software[dataset])))
##Creation identification
ident <- ISODataIdentification$new()
ident$setAbstract(paste(datasets$Description.of.the.dataset.Abstract[dataset]))
ident$setLanguage("eng")
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
if (datasets$Producer[dataset] %in% c("ERGO")){
ident$addTopicCategory("environment")
}
else {# disease data
ident$addTopicCategory("health")
}
## keywords
### General Keywords
dynamic_keywords <- ISOKeywords$new()
for (kw in unlist(strsplit(paste(datasets$Database.Key.words[dataset]), ", "))){
dynamic_keywords$addKeyword(kw)
}
for (kw in unlist(strsplit(paste(datasets$Usefull.for.which.diseases[dataset]), ", "))){
dynamic_keywords$addKeyword(kw)
}
for (kw in unlist(strsplit(paste(datasets$Dataset.type[dataset]), ", "))){
dynamic_keywords$addKeyword(kw)
}
ident$addKeywords(dynamic_keywords)
# add links data access
distrib <- ISODistribution$new()
dto <- ISODigitalTransferOptions$new()
## data access
link <- paste0(datasets$Give.the.DOI..or.URL..to.access.the.dataset.in.the.data.repository[dataset])
newURL <- ISOOnlineResource$new()
newURL$setName("Access to data")
newURL$setLinkage(link)
newURL$setProtocol("WWW:LINK-1.0-http--link")
dto$addOnlineResource(newURL)
## code
link <- paste0(datasets$Code.available..link.[dataset])
newURL <- ISOOnlineResource$new()
newURL$setName("Access to code")
newURL$setLinkage(link)
newURL$setProtocol("WWW:LINK-1.0-http--link")
dto$addOnlineResource(newURL)
## if publication
link <- paste0(datasets$If.the.dataset.is.linked.to.a.publication..specify.the.DOI.of.the.publication[dataset])
newURL <- ISOOnlineResource$new()
newURL$setName("Access to publication")
newURL$setLinkage(link)
newURL$setProtocol("WWW:LINK-1.0-http--link")
dto$addOnlineResource(newURL)
distrib$setDigitalTransferOptions(dto)
md$setDistributionInfo(distrib)
## Producer
rp <- ISOResponsibleParty$new()
producer = paste0(datasets$Producer[dataset])
rp$setOrganisationName(producer)
rp$setRole("principalInvestigator")
ident$addPointOfContact(rp)
#adding legal constraint(s)
if(nchar(as.character(datasets$Licence[dataset])) !=0) {
lc <- ISOLegalConstraints$new()
lc$addUseLimitation(datasets$Licence[dataset])
ident$setResourceConstraints(lc)
}
# Titre et identification
ct <- ISOCitation$new()
ct$setTitle(paste(datasets$Title.Identifier..if.no.DOI.available.[dataset]))
isoid=ISOMetaIdentifier$new(code = datasets$uuid[dataset])
ct$setIdentifier(isoid)
ident$setCitation(ct)
## thumbnail
141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
for(thumbnail in unlist(strsplit(paste(datasets$Link..html..to.an.image.logo.figure.representing.the.database[dataset]), ", "))){
go <- ISOBrowseGraphic$new(
fileName = thumbnail,
fileDescription = "thumbnail",
fileType = "image/png"
)
ident$addGraphicOverview(go)
}
md$addIdentificationInfo(ident)
# Conversion to iso19139 and saving the XML file
md$encode(inspire = FALSE)
nom_fichier = str_replace_all(datasets$Title.Identifier..if.no.DOI.available.[dataset], " ", "_")
nom_fichier = str_replace_all(nom_fichier, "/", "_")
nom_fichier = paste(nom_fichier, "xml", sep=".")
chemin_fichier = paste("xml_generated", nom_fichier, sep="/")
md$save(chemin_fichier)
# require(XML)
# filenames = list.files("xml_generated", pattern="*.xml")
# for (file in filenames) {# nous parcourons l'ensemble des fichiers xml
# chemin_fichier = paste("xml_generated", file, sep="/")
# xml = xmlParse(chemin_fichier)
# md = ISOMetadata$new(xml = xml) # création de l'objet ISOMetadata
# created = gn$insertMetadata( # insertion dans GeoNetwork
# xml = md$encode(),
# group = "1",
# )
# }
}
}
current_datetime <- format(Sys.time(), "%Y%m%d_%H%M%S")
filename <- paste0("logs/csv_error_", current_datetime, ".csv")
write.csv(csv_error, filename, row.names = FALSE)