An error occurred while loading the file. Please try again.
-
Fize Jacques authored3ecbe3e1
import BioTex.Execution;
import BuildListToValidate.BuildFilterManyLists;
import Object.CandidatTerm;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import java.lang.Integer;
/**
*
* @author juanlossio
*/
public class Principal {
/**
* @param args the command line arguments
*/
static ArrayList<CandidatTerm> list_candidat_terms_validated = new ArrayList<CandidatTerm>();
public static void main(String[] args) throws IOException{
System.out.println("Read Configuration");
HashMap<String,String> mapConf=new HashMap<>();
File configuration=new File("configuration.txt");
if(configuration.exists()){
LineIterator it=FileUtils.lineIterator(configuration);
while(it.hasNext()){
String lineConf=it.next();
String[] sp = lineConf.split("=");
if(sp.length == 2){
mapConf.put(sp[0].trim(), sp[1].trim());
}
else{
System.err.println("Error: "+lineConf+" is not valid !");
System.exit(1);
}
}
}
else{
System.out.println("No 'configuration.txt' file found.");
System.exit(1);
}
String[] arr_key=new String[]{"patronNumber","patternsSrc","datasetSrc","stopwordsSrc","treetaggerSrc","typeOfTerms","language","score"};
boolean flag=false;
for (String key : arr_key) {
if (!mapConf.containsKey(key)){
flag=true;
System.err.println("Config Var: "+key+" is missing !");
}
}
if(flag){System.exit(1);}
/*
* Variables to find: the Pattern List, DataSetReference for Validation, and file where the Tagger Tool is installed
*/
String source_patterns = mapConf.get("patternsSrc");
String source_dataset_reference = mapConf.get("datasetSrc");
String source_stop_words = mapConf.get("stopwordsSrc");
String source_tagger =mapConf.get("treetaggerSrc");
int patronNb=Integer.parseInt(mapConf.get("patronNumber"));
// String basePath="/Users/jacquesfize/Downloads/BioTex";
// String source_patterns = basePath+"/patterns";
// String source_dataset_reference = basePath+"/dataSetReference";
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
// String source_stop_words = basePath+"/stopWords";
// String source_tagger = "/Users/jacquesfize/.treetagger";
System.out.println("Configuration is loaded");
if (args.length <1){
System.out.println("Usage : java Principal <inputFile> [<outputDir>]");
System.exit(1);
}
/*
* Variable that saves the extracted terms
*/
String source_OUTPUT = System.getProperty("user.dir")+"/output"; //Mettre le dossier où vous voulez que les fichiers se sauvegardent
/*
* File to be analized for the term extraction
*/
// String file_to_be_analyzed = args[0];
// if (args.length == 2){
// source_OUTPUT=args[1];
// }
boolean mkdir = new File(source_OUTPUT).mkdir();
/*
* Language : english, french, spanish
* number_patrons : number of first pattern to take into account
* typeTerms : all (single word + multi words terms),
* multi (multi words terms)
* measure = 15 possible measures
* tool_Tagger: TreeTagger by default
*/
String type_of_terms = mapConf.get("typeOfTerms"); // all multi
String language = mapConf.get("language"); // english french spanish
int frequency_min_of_terms = 1; // frequency minimal to extract the terms
list_candidat_terms_validated = Execution.main_execution(
language, //english french spanish
patronNb, // nombre de patrons
type_of_terms,
mapConf.get("score"), // For one document : L_value C_value
// For a set of documents : LIDF_value F-OCapi_A F-OCapi_M F-OCapi_S F-TFIDF-C_A F-TFIDF-C_M F-TFIDF-C_S
// TFIDF_A TFIDF_M TFIDF_S Okapi_A Okapi_M Okapi_S
2,/* 1 = single file (only for L_value or C_value)
2 = set of files (for LIDF-value or any measure)
*/
frequency_min_of_terms,
args[0],
"TreeTagger",
source_patterns,
source_dataset_reference,
source_tagger,
(args.length == 2)?args[1]:source_OUTPUT
);
BuildFilterManyLists.createList(list_candidat_terms_validated,source_stop_words,source_OUTPUT,type_of_terms,language);
System.out.println("Fin de l'exécution");
}
}