Principal.java 4.86 KiB
import BioTex.Execution;
import BuildListToValidate.BuildFilterManyLists;
import Object.CandidatTerm;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import java.lang.Integer;
/**
 * @author juanlossio
public class Principal {
    /**
     * @param args the command line arguments
    static ArrayList<CandidatTerm> list_candidat_terms_validated = new ArrayList<CandidatTerm>();
    public static void main(String[] args) throws IOException{
    	System.out.println("Read Configuration");
        HashMap<String,String> mapConf=new HashMap<>();
        File configuration=new File("configuration.txt");
        if(configuration.exists()){
            LineIterator it=FileUtils.lineIterator(configuration);
            while(it.hasNext()){
                String lineConf=it.next();
                String[] sp = lineConf.split("=");
                if(sp.length == 2){
                    mapConf.put(sp[0].trim(), sp[1].trim());
                else{
                    System.err.println("Error: "+lineConf+" is not valid !");
                    System.exit(1);
        else{
            System.out.println("No 'configuration.txt' file found.");
            System.exit(1);
        String[] arr_key=new String[]{"patronNumber","patternsSrc","datasetSrc","stopwordsSrc","treetaggerSrc","typeOfTerms","language","score"};
        boolean flag=false;
        for (String key : arr_key) {
            if (!mapConf.containsKey(key)){
                flag=true;
                System.err.println("Config Var: "+key+" is missing !");
        if(flag){System.exit(1);}
    	 * Variables to find: the Pattern List, DataSetReference for Validation, and file where the Tagger Tool is installed
    	String source_patterns = mapConf.get("patternsSrc");
    	String source_dataset_reference = mapConf.get("datasetSrc");
    	String source_stop_words = mapConf.get("stopwordsSrc");
    	String source_tagger =mapConf.get("treetaggerSrc");
	int patronNb=Integer.parseInt(mapConf.get("patronNumber"));
//        String basePath="/Users/jacquesfize/Downloads/BioTex";
//    	String source_patterns = basePath+"/patterns";
//    	String source_dataset_reference = basePath+"/dataSetReference";
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
// String source_stop_words = basePath+"/stopWords"; // String source_tagger = "/Users/jacquesfize/.treetagger"; System.out.println("Configuration is loaded"); if (args.length <1){ System.out.println("Usage : java Principal <inputFile> [<outputDir>]"); System.exit(1); } /* * Variable that saves the extracted terms */ String source_OUTPUT = System.getProperty("user.dir")+"/output"; //Mettre le dossier où vous voulez que les fichiers se sauvegardent /* * File to be analized for the term extraction */ // String file_to_be_analyzed = args[0]; // if (args.length == 2){ // source_OUTPUT=args[1]; // } boolean mkdir = new File(source_OUTPUT).mkdir(); /* * Language : english, french, spanish * number_patrons : number of first pattern to take into account * typeTerms : all (single word + multi words terms), * multi (multi words terms) * measure = 15 possible measures * tool_Tagger: TreeTagger by default */ String type_of_terms = mapConf.get("typeOfTerms"); // all multi String language = mapConf.get("language"); // english french spanish int frequency_min_of_terms = 1; // frequency minimal to extract the terms list_candidat_terms_validated = Execution.main_execution( language, //english french spanish patronNb, // nombre de patrons type_of_terms, mapConf.get("score"), // For one document : L_value C_value // For a set of documents : LIDF_value F-OCapi_A F-OCapi_M F-OCapi_S F-TFIDF-C_A F-TFIDF-C_M F-TFIDF-C_S // TFIDF_A TFIDF_M TFIDF_S Okapi_A Okapi_M Okapi_S 2,/* 1 = single file (only for L_value or C_value) 2 = set of files (for LIDF-value or any measure) */ frequency_min_of_terms, args[0], "TreeTagger", source_patterns, source_dataset_reference, source_tagger, (args.length == 2)?args[1]:source_OUTPUT ); BuildFilterManyLists.createList(list_candidat_terms_validated,source_stop_words,source_OUTPUT,type_of_terms,language); System.out.println("Fin de l'exécution"); } }