diff --git a/.gitignore b/.gitignore index fd972571f02065e55440157eb91a91e4af1543db..dd029ad06e516449caf4624c57e728de671ba3f1 100644 --- a/.gitignore +++ b/.gitignore @@ -28,4 +28,5 @@ __pycache__/ *cache.json *.gexf temp_cluster_2/ -agromada* \ No newline at end of file +agromada* +output* \ No newline at end of file diff --git a/auto_fill_annotation.py b/auto_fill_annotation.py index f47d2364708bc8ccd70c49e807f146fc06927dad..8ccf6f917a0965b1bfa0e3e86a4911859fe31d4c 100644 --- a/auto_fill_annotation.py +++ b/auto_fill_annotation.py @@ -4,8 +4,7 @@ import argparse, os import warnings -import os, re, glob -import pandas as pd +import os, re, glob,json import networkx as nx import numpy as np @@ -19,18 +18,24 @@ from strpython.models.str import STR from strpython.helpers.sim_matrix import matrix_to_pandas_dataframe, read_bz2_matrix -def main(dataset, matrix_sim_dir, raw_graph_dir, selected_graphs, threshold, inclusion_fn, adjacency_fn): +def main(dataset, matrix_sim_dir, raw_graph_dir, selected_graphs, threshold, inclusion_fn, adjacency_fn,min_carac_fn, min_size_G1,min_size_G2,n_car_min_doc1,n_car_min_doc2): annotater = AnnotationAutomatic(dataset, threshold, inclusion_fn, adjacency_fn) first_step_output = "output_first_step_{0}_{1}".format(dataset, threshold) last_step_output = "output_final_{0}_{1}".format(dataset, threshold) generate_annotation_dataframe(matrix_sim_dir, selected_graphs, first_step_output) - extract_criteria_4_all(annotater, first_step_output, raw_graph_dir, dataset, threshold) + # size_str = extract_criteria_4_all(annotater, first_step_output, raw_graph_dir, dataset, threshold) if not os.path.exists(last_step_output): os.makedirs(last_step_output) - for fn in glob.glob(os.path.join(first_step_output,"*.csv")): - annotate_eval_sample(annotater, fn, os.path.join(last_step_output, os.path.basename(fn))) - synthesize(last_step_output,"{0}_{1}.csv".format(dataset,threshold)) + + # for fn in tqdm(glob.glob(os.path.join(first_step_output,"*.csv")),desc="Annotate sample"): + # annotate_eval_sample(annotater, fn, os.path.join(last_step_output, os.path.basename(fn)),size_str) + + min_carac_dict=None + if min_carac_fn != "" and os.path.exists(min_carac_fn): + min_carac_dict=json.load(open(min_carac_fn)) + + synthesize(last_step_output,"{0}_{1}.csv".format(dataset,threshold),min_size_G1,min_size_G2,min_carac_dict,n_car_min_doc1,n_car_min_doc2) @@ -47,6 +52,7 @@ def generate_annotation_dataframe(matrix_sim_dir, selected_graphs, output_dir): ------- """ + if not os.path.exists(matrix_sim_dir): raise FileNotFoundError("Similarity matrix directory not found : {0}".format(matrix_sim_dir)) @@ -55,6 +61,8 @@ def generate_annotation_dataframe(matrix_sim_dir, selected_graphs, output_dir): type_ = "_".join(os.path.basename(fn).split("_")[1:]).replace(".npy.bz2", "") print("Proceeding...", measure, type_) + if os.path.exists(os.path.join(output_dir, "{0}_{1}.csv".format(measure, type_))): + continue df = matrix_to_pandas_dataframe(np.nan_to_num(read_bz2_matrix(fn)), selected_graphs, measure, type_) @@ -96,30 +104,38 @@ def extract_criteria_4_all(annotater, csv_input_dir, raw_graph_dir, dataset, thr # Load STRs strs = {} + size_STR={} + + def load(fn): + id_ = int(re.findall("\d+", fn)[-1]) + strs[id_] = STR.from_networkx_graph(nx.read_gexf(fn)) + size_STR[id_] = len(strs[id_]) + for file in tqdm(glob.glob(os.path.join(raw_graph_dir, "*.gexf")), desc="Load Graphs"): id_ = int(re.findall("\d+", file)[-1]) strs[id_] = STR.from_networkx_graph(nx.read_gexf(file)) - - # Do the annotation for a match between two STR + size_STR[id_]= len(strs[id_]) + #Do the annotation for a match between two STR def annotate(x): try: return annotater.all(strs[int(x.G1)], strs[int(x.G2)], int(x.G1), int(x.G2)) except KeyError as e: - annotater.matching_cache.add(int(x.G1), int(x.G2), *(0, 0, 0, 0)) - return [0, 0, 0, 0] + annotater.matching_cache.add(int(x.G1), int(x.G2), *(0, 0, 0, 0,300000)) + return [0, 0, 0, 0,300000,0] # Annotation Time + print("Computing Criteria for each match") matching_dataframe["res"] = matching_dataframe.progress_apply(lambda x: annotate(x), axis=1) - matching_dataframe.res = matching_dataframe.res.apply(lambda x: list(map(int, x)) if x else []) - for ix, col in enumerate("c1 c2 c3 c4".split()): + matching_dataframe.res = matching_dataframe.res.apply(lambda x: [int(x[0]),int(x[1]),int(x[2]),int(x[3]),float(x[4])] if x else []) + for ix, col in enumerate("c1 c2 c3 c4 c5".split()): matching_dataframe[col] = matching_dataframe.res.apply(lambda x: x[ix] if len(x) > 0 else 0) del matching_dataframe["res"] # Writiting output - matching_dataframe.to_csv(output_file) + return size_STR -def annotate_eval_sample(annotater, csv_file, output_file): +def annotate_eval_sample(annotater, csv_file, output_file, size_str): """ Third Step Parameters @@ -141,21 +157,23 @@ def annotate_eval_sample(annotater, csv_file, output_file): try: return annotater.all(None, None, x.G1, x.G2) except Exception as e: - return [0, 0, 0, 0] + return [0, 0, 0, 0,300000] df["res"] = df.apply(lambda x: foo(x), axis=1) - df.res = df.res.apply(lambda x: list(map(int, x)) if x else []) # if bool + df.res = df.res.apply(lambda x: list(map(float, x)) if x else []) # if bool df[["c1"]] = df.res.apply(lambda x: x[0] if len(x) > 0 else 0) df[["c2"]] = df.res.apply(lambda x: x[1] if len(x) > 0 else 0) df[["c3"]] = df.res.apply(lambda x: x[2] if len(x) > 0 else 0) df[["c4"]] = df.res.apply(lambda x: x[3] if len(x) > 0 else 0) - + df[["c5"]] = df.res.apply(lambda x: x[4] if len(x) > 0 else 300000) + df["size_G1"] =df.apply(lambda x: size_str[x.G1] if x.G1 in size_str else 0, axis=1) + df["size_G2"] = df.apply(lambda x: size_str[x.G2] if x.G2 in size_str else 0, axis=1) del df["res"] df.to_csv(output_file) -def synthesize(last_step_output,output_filename): +def synthesize(last_step_output,output_filename,min_size_G1=None,min_size_G2=None,min_carac_dict=None,ncar_min_doc1=0,ncar_min_doc2=0): """ Fourth Step Parameters @@ -168,16 +186,41 @@ def synthesize(last_step_output,output_filename): """ fns = glob.glob(os.path.join(last_step_output, "*.csv")) - + if min_size_G1: + output_filename= output_filename+"_ming1_{0}".format(min_size_G1) + if min_size_G2: + output_filename= output_filename+"_ming2_{0}".format(min_size_G2) + if min_carac_dict and ncar_min_doc1 > 0: + output_filename= output_filename+"_mindoc1len_{0}".format(ncar_min_doc1) + if min_carac_dict and ncar_min_doc2 > 0: + output_filename= output_filename+"_mindoc2len_{0}".format(ncar_min_doc2) data = [] - for fn in fns: + for fn in tqdm(fns,desc="Synthetise Results"): df = pd.read_csv(fn) + if min_size_G1: + df= df[df.size_G1 >= min_size_G1] + + if min_size_G2: + df = df[df.size_G2 >= min_size_G2] + + if min_carac_dict and ncar_min_doc1>0: + df["len_doc1"]=df.apply(lambda x:min_carac_dict[str(x.G1)],axis=1) + df =df[df.len_doc1 >= ncar_min_doc1] + + if min_carac_dict and ncar_min_doc2>0: + df["len_doc2"]=df.apply(lambda x:min_carac_dict[str(x.G2)] if str(x.G2) in min_carac_dict else 0,axis=1) + df =df[df.len_doc2 >= ncar_min_doc2] + + df = df.replace([np.inf, -np.inf], 300000) + df["c5"] = 1 - (df.c5 - df.c5.min()) / (df.c5.max() - df.c5.min()) + if len(df) <1: + continue mes = np.unique(df.sim_measure)[0] type_ = np.unique(df.type_str)[0] - val = df.groupby("G1").mean().mean()["c1 c2 c3 c4".split()].values.tolist() + val = df.groupby("G1").mean().mean()["c1 c2 c3 c4 c5".split()].values.tolist() val.insert(0, type_) val.insert(0, mes) data.append(val) - pd.DataFrame(data, columns="mesure type c1 c2 c3 c4".split()) - res = pd.DataFrame(data, columns="mesure type c1 c2 c3 c4".split()) + + res = pd.DataFrame(data, columns="mesure type c1 c2 c3 c4 c5".split()) res.to_csv(output_filename) \ No newline at end of file diff --git a/notebooks/MatchingAnalysis/.ipynb_checkpoints/output-checkpoint.png b/notebooks/MatchingAnalysis/.ipynb_checkpoints/output-checkpoint.png deleted file mode 100644 index 75f7e9c75d4441c3e92dc5ba1e0b6eda544c1fda..0000000000000000000000000000000000000000 Binary files a/notebooks/MatchingAnalysis/.ipynb_checkpoints/output-checkpoint.png and /dev/null differ diff --git a/notebooks/MatchingAnalysis/MADA_growth_criteria.pdf b/notebooks/MatchingAnalysis/MADA_growth_criteria.pdf deleted file mode 100644 index af5d011f655f22b1ea9c2700cce35a10227f99ff..0000000000000000000000000000000000000000 Binary files a/notebooks/MatchingAnalysis/MADA_growth_criteria.pdf and /dev/null differ diff --git a/notebooks/MatchingAnalysis/Result_AnaysisV2_MADA.ipynb b/notebooks/MatchingAnalysis/Result_AnaysisV2_MADA.ipynb index dcbb6caf95d7abe25da059ca880e9202cfda9b35..a43373c0651f76a98f00ea2c33fa65a235c9b5db 100644 --- a/notebooks/MatchingAnalysis/Result_AnaysisV2_MADA.ipynb +++ b/notebooks/MatchingAnalysis/Result_AnaysisV2_MADA.ipynb @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 113, + "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2018-09-28T05:03:09.093753Z", @@ -36,7 +36,7 @@ }, "outputs": [], "source": [ - "data=pd.read_csv(\"../../agromad_05_mean_nbpoint.csv\",index_col=0)\n", + "data=pd.read_csv(\"../../bvlac_2_0.5.csvming1_10\",index_col=0)\n", "data=data[data.mesure != \"BP\"]\n", "data[\"mean\"]=np.mean(data[\"c1 c2 c3 c4\".split()].values,axis=1)\n", "data[\"sum\"]=np.sum(data[\"c1 c2 c3 c4\".split()].values,axis=1)\n", @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 114, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -65,234 +65,19 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 4, "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "<style type=\"text/css\" >\n", - " #T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col8 {\n", - " : ;\n", - " background-color: #d64541;\n", - " color: white;\n", - " } #T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col9 {\n", - " background-color: yellow;\n", - " : ;\n", - " } #T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col10 {\n", - " background-color: yellow;\n", - " : ;\n", - " } #T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col12 {\n", - " background-color: yellow;\n", - " : ;\n", - " } #T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row1_col11 {\n", - " : ;\n", - " background-color: #d64541;\n", - " color: white;\n", - " } #T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row2_col11 {\n", - " : ;\n", - " background-color: #d64541;\n", - " color: white;\n", - " } #T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row3_col11 {\n", - " background-color: yellow;\n", - " : ;\n", - " } #T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row4_col8 {\n", - " background-color: yellow;\n", - " : ;\n", - " } #T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row4_col11 {\n", - " background-color: yellow;\n", - " : ;\n", - " } #T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row5_col11 {\n", - " background-color: yellow;\n", - " : ;\n", - " } #T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row7_col11 {\n", - " background-color: yellow;\n", - " : ;\n", - " } #T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row8_col9 {\n", - " : ;\n", - " background-color: #d64541;\n", - " color: white;\n", - " } #T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row8_col11 {\n", - " background-color: yellow;\n", - " : ;\n", - " } #T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row9_col10 {\n", - " : ;\n", - " background-color: #d64541;\n", - " color: white;\n", - " } #T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row9_col12 {\n", - " : ;\n", - " background-color: #d64541;\n", - " color: white;\n", - " }</style><table id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820\" ><thead> <tr> <th class=\"blank level0\" ></th> <th class=\"col_heading level0 col0\" >mesure</th> <th class=\"col_heading level0 col1\" >type</th> <th class=\"col_heading level0 col2\" >c1</th> <th class=\"col_heading level0 col3\" >c2</th> <th class=\"col_heading level0 col4\" >c3</th> <th class=\"col_heading level0 col5\" >c4</th> <th class=\"col_heading level0 col6\" >mean</th> <th class=\"col_heading level0 col7\" >sum</th> <th class=\"col_heading level0 col8\" >c1_w</th> <th class=\"col_heading level0 col9\" >c2_w</th> <th class=\"col_heading level0 col10\" >c3_w</th> <th class=\"col_heading level0 col11\" >c4_w</th> <th class=\"col_heading level0 col12\" >sum_w</th> </tr></thead><tbody>\n", - " <tr>\n", - " <th id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820level0_row0\" class=\"row_heading level0 row0\" >6</th>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col0\" class=\"data row0 col0\" >BOW</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col1\" class=\"data row0 col1\" >inra_gen_country</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col2\" class=\"data row0 col2\" >0.664</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col3\" class=\"data row0 col3\" >0.246</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col4\" class=\"data row0 col4\" >0.342</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col5\" class=\"data row0 col5\" >0.194</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col6\" class=\"data row0 col6\" >0.3615</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col7\" class=\"data row0 col7\" >1.446</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col8\" class=\"data row0 col8\" >0.0664</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col9\" class=\"data row0 col9\" >0.0984</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col10\" class=\"data row0 col10\" >0.1368</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col11\" class=\"data row0 col11\" >0.0194</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row0_col12\" class=\"data row0 col12\" >0.321</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820level0_row1\" class=\"row_heading level0 row1\" >3</th>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row1_col0\" class=\"data row1 col0\" >BOW</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row1_col1\" class=\"data row1 col1\" >inra</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row1_col2\" class=\"data row1 col2\" >0.732</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row1_col3\" class=\"data row1 col3\" >0.23</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row1_col4\" class=\"data row1 col4\" >0.336</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row1_col5\" class=\"data row1 col5\" >0.19</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row1_col6\" class=\"data row1 col6\" >0.372</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row1_col7\" class=\"data row1 col7\" >1.488</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row1_col8\" class=\"data row1 col8\" >0.0732</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row1_col9\" class=\"data row1 col9\" >0.092</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row1_col10\" class=\"data row1 col10\" >0.1344</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row1_col11\" class=\"data row1 col11\" >0.019</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row1_col12\" class=\"data row1 col12\" >0.3186</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820level0_row2\" class=\"row_heading level0 row2\" >101</th>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row2_col0\" class=\"data row2 col0\" >BOW</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row2_col1\" class=\"data row2 col1\" >inra_ext_2</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row2_col2\" class=\"data row2 col2\" >0.724</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row2_col3\" class=\"data row2 col3\" >0.226</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row2_col4\" class=\"data row2 col4\" >0.336</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row2_col5\" class=\"data row2 col5\" >0.19</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row2_col6\" class=\"data row2 col6\" >0.369</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row2_col7\" class=\"data row2 col7\" >1.476</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row2_col8\" class=\"data row2 col8\" >0.0724</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row2_col9\" class=\"data row2 col9\" >0.0904</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row2_col10\" class=\"data row2 col10\" >0.1344</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row2_col11\" class=\"data row2 col11\" >0.019</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row2_col12\" class=\"data row2 col12\" >0.3162</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820level0_row3\" class=\"row_heading level0 row3\" >137</th>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row3_col0\" class=\"data row3 col0\" >MCS</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row3_col1\" class=\"data row3 col1\" >gen_region</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row3_col2\" class=\"data row3 col2\" >0.69</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row3_col3\" class=\"data row3 col3\" >0.228</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row3_col4\" class=\"data row3 col4\" >0.334</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row3_col5\" class=\"data row3 col5\" >0.198</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row3_col6\" class=\"data row3 col6\" >0.3625</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row3_col7\" class=\"data row3 col7\" >1.45</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row3_col8\" class=\"data row3 col8\" >0.069</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row3_col9\" class=\"data row3 col9\" >0.0912</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row3_col10\" class=\"data row3 col10\" >0.1336</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row3_col11\" class=\"data row3 col11\" >0.0198</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row3_col12\" class=\"data row3 col12\" >0.3136</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820level0_row4\" class=\"row_heading level0 row4\" >106</th>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row4_col0\" class=\"data row4 col0\" >BOW</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row4_col1\" class=\"data row4 col1\" >ext_2</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row4_col2\" class=\"data row4 col2\" >0.744</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row4_col3\" class=\"data row4 col3\" >0.22</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row4_col4\" class=\"data row4 col4\" >0.326</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row4_col5\" class=\"data row4 col5\" >0.198</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row4_col6\" class=\"data row4 col6\" >0.372</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row4_col7\" class=\"data row4 col7\" >1.488</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row4_col8\" class=\"data row4 col8\" >0.0744</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row4_col9\" class=\"data row4 col9\" >0.088</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row4_col10\" class=\"data row4 col10\" >0.1304</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row4_col11\" class=\"data row4 col11\" >0.0198</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row4_col12\" class=\"data row4 col12\" >0.3126</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820level0_row5\" class=\"row_heading level0 row5\" >92</th>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row5_col0\" class=\"data row5 col0\" >BOW</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row5_col1\" class=\"data row5 col1\" >ext_1</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row5_col2\" class=\"data row5 col2\" >0.736</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row5_col3\" class=\"data row5 col3\" >0.216</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row5_col4\" class=\"data row5 col4\" >0.328</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row5_col5\" class=\"data row5 col5\" >0.198</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row5_col6\" class=\"data row5 col6\" >0.3695</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row5_col7\" class=\"data row5 col7\" >1.478</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row5_col8\" class=\"data row5 col8\" >0.0736</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row5_col9\" class=\"data row5 col9\" >0.0864</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row5_col10\" class=\"data row5 col10\" >0.1312</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row5_col11\" class=\"data row5 col11\" >0.0198</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row5_col12\" class=\"data row5 col12\" >0.311</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820level0_row6\" class=\"row_heading level0 row6\" >73</th>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row6_col0\" class=\"data row6 col0\" >Jaccard</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row6_col1\" class=\"data row6 col1\" >biotex_bvlac</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row6_col2\" class=\"data row6 col2\" >0.726</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row6_col3\" class=\"data row6 col3\" >0.214</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row6_col4\" class=\"data row6 col4\" >0.334</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row6_col5\" class=\"data row6 col5\" >0.192</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row6_col6\" class=\"data row6 col6\" >0.3665</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row6_col7\" class=\"data row6 col7\" >1.466</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row6_col8\" class=\"data row6 col8\" >0.0726</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row6_col9\" class=\"data row6 col9\" >0.0856</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row6_col10\" class=\"data row6 col10\" >0.1336</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row6_col11\" class=\"data row6 col11\" >0.0192</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row6_col12\" class=\"data row6 col12\" >0.311</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820level0_row7\" class=\"row_heading level0 row7\" >51</th>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row7_col0\" class=\"data row7 col0\" >MCS</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row7_col1\" class=\"data row7 col1\" >ext_2</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row7_col2\" class=\"data row7 col2\" >0.7</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row7_col3\" class=\"data row7 col3\" >0.216</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row7_col4\" class=\"data row7 col4\" >0.332</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row7_col5\" class=\"data row7 col5\" >0.198</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row7_col6\" class=\"data row7 col6\" >0.3615</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row7_col7\" class=\"data row7 col7\" >1.446</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row7_col8\" class=\"data row7 col8\" >0.07</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row7_col9\" class=\"data row7 col9\" >0.0864</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row7_col10\" class=\"data row7 col10\" >0.1328</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row7_col11\" class=\"data row7 col11\" >0.0198</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row7_col12\" class=\"data row7 col12\" >0.309</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820level0_row8\" class=\"row_heading level0 row8\" >67</th>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row8_col0\" class=\"data row8 col0\" >BOW</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row8_col1\" class=\"data row8 col1\" >str_object</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row8_col2\" class=\"data row8 col2\" >0.732</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row8_col3\" class=\"data row8 col3\" >0.208</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row8_col4\" class=\"data row8 col4\" >0.332</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row8_col5\" class=\"data row8 col5\" >0.198</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row8_col6\" class=\"data row8 col6\" >0.3675</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row8_col7\" class=\"data row8 col7\" >1.47</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row8_col8\" class=\"data row8 col8\" >0.0732</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row8_col9\" class=\"data row8 col9\" >0.0832</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row8_col10\" class=\"data row8 col10\" >0.1328</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row8_col11\" class=\"data row8 col11\" >0.0198</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row8_col12\" class=\"data row8 col12\" >0.309</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820level0_row9\" class=\"row_heading level0 row9\" >134</th>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row9_col0\" class=\"data row9 col0\" >VertexEdgeOverlap</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row9_col1\" class=\"data row9 col1\" >inra_ext_2</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row9_col2\" class=\"data row9 col2\" >0.71</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row9_col3\" class=\"data row9 col3\" >0.23</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row9_col4\" class=\"data row9 col4\" >0.316</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row9_col5\" class=\"data row9 col5\" >0.194</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row9_col6\" class=\"data row9 col6\" >0.3625</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row9_col7\" class=\"data row9 col7\" >1.45</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row9_col8\" class=\"data row9 col8\" >0.071</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row9_col9\" class=\"data row9 col9\" >0.092</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row9_col10\" class=\"data row9 col10\" >0.1264</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row9_col11\" class=\"data row9 col11\" >0.0194</td>\n", - " <td id=\"T_6958a7d0_4af9_11e9_9cd7_6a0002e84820row9_col12\" class=\"data row9 col12\" >0.3088</td>\n", - " </tr>\n", - " </tbody></table>" - ], - "text/plain": [ - "<pandas.io.formats.style.Styler at 0x13d4df3c8>" - ] - }, - "execution_count": 116, - "metadata": {}, - "output_type": "execute_result" + "ename": "NameError", + "evalue": "name 'colorize' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-4-d2caa3de5718>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"c5_w\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mc5\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;36m0.1\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"sum_w\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"c1_w c2_w c3_w c4_w c5_w\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mcolorize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"sum\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mascending\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"sum_w\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mascending\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"c1_w c2_w c3_w c4_w c5_w sum_w\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'colorize' is not defined" + ] } ], "source": [ @@ -300,8 +85,9 @@ "data[\"c2_w\"]=data.c2.apply(lambda x: 0.4*x)\n", "data[\"c3_w\"]=data.c3.apply(lambda x: 0.4*x)\n", "data[\"c4_w\"]=data.c4.apply(lambda x: 0.1*x)\n", - "data[\"sum_w\"]=np.sum(data[\"c1_w c2_w c3_w c4_w\".split()].values,axis=1)\n", - "colorize(data.sort_values(\"sum\",ascending=False).head(10).sort_values(\"sum_w\",ascending=False),\"c1_w c2_w c3_w c4_w sum_w\".split())" + "data[\"c5_w\"]=data.c5.apply(lambda x: 0.1*x)\n", + "data[\"sum_w\"]=np.sum(data[\"c1_w c2_w c3_w c4_w c5_w\".split()].values,axis=1)\n", + "colorize(data.sort_values(\"sum\",ascending=False).head(10).sort_values(\"sum_w\",ascending=False),\"c1_w c2_w c3_w c4_w c5_w sum_w\".split())" ] }, { @@ -802,7 +588,7 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2018-09-26T12:55:10.491478Z", @@ -827,23 +613,7 @@ " i+=1\n", " return indices,pareto_frontier\n", "\n", - "def highlight_max(s):\n", - " '''\n", - " highlight the maximum in a Series yellow.\n", - " '''\n", - " is_max = s == s.max()\n", - " return ['background-color: yellow' if v else '' for v in is_max]\n", - "def highlight_min(s):\n", - " '''\n", - " highlight the maximum in a Series yellow.\n", - " '''\n", - " is_max = s == s.min()\n", - " return ['background-color: #d64541;color:white;' if v else '' for v in is_max]\n", - "\n", - "def colorize(df,fields):\n", - " return df.style.apply(highlight_max,subset=fields).apply(highlight_min,subset=fields)\n", - "\n", - "to_colorize=\"c1 c2 c3 c4 mean sum\".split()" + "to_colorize=\"c1 c2 c3 c4 c5 mean sum\".split()\n" ] }, { @@ -857,7 +627,7 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2018-09-26T12:55:10.899176Z", @@ -869,182 +639,159 @@ "data": { "text/html": [ "<style type=\"text/css\" >\n", - " #T_6afb48ae_4af9_11e9_8332_6a0002e84820row0_col0 {\n", + " #T_1102a8f4_4c06_11e9_af47_6a0002e84820row0_col0 {\n", + " background-color: yellow;\n", + " : ;\n", + " } #T_1102a8f4_4c06_11e9_af47_6a0002e84820row0_col2 {\n", " background-color: yellow;\n", " : ;\n", - " } #T_6afb48ae_4af9_11e9_8332_6a0002e84820row0_col2 {\n", + " } #T_1102a8f4_4c06_11e9_af47_6a0002e84820row0_col3 {\n", " background-color: yellow;\n", " : ;\n", - " } #T_6afb48ae_4af9_11e9_8332_6a0002e84820row0_col4 {\n", + " } #T_1102a8f4_4c06_11e9_af47_6a0002e84820row0_col5 {\n", " background-color: yellow;\n", " : ;\n", - " } #T_6afb48ae_4af9_11e9_8332_6a0002e84820row0_col5 {\n", + " } #T_1102a8f4_4c06_11e9_af47_6a0002e84820row0_col6 {\n", " background-color: yellow;\n", " : ;\n", - " } #T_6afb48ae_4af9_11e9_8332_6a0002e84820row2_col3 {\n", + " } #T_1102a8f4_4c06_11e9_af47_6a0002e84820row1_col3 {\n", " background-color: yellow;\n", " : ;\n", - " } #T_6afb48ae_4af9_11e9_8332_6a0002e84820row5_col1 {\n", + " } #T_1102a8f4_4c06_11e9_af47_6a0002e84820row1_col4 {\n", " background-color: yellow;\n", " : ;\n", - " } #T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col0 {\n", + " } #T_1102a8f4_4c06_11e9_af47_6a0002e84820row2_col3 {\n", + " background-color: yellow;\n", + " : ;\n", + " } #T_1102a8f4_4c06_11e9_af47_6a0002e84820row5_col1 {\n", + " background-color: yellow;\n", + " : ;\n", + " } #T_1102a8f4_4c06_11e9_af47_6a0002e84820row8_col0 {\n", + " : ;\n", + " background-color: #d64541;\n", + " color: white;\n", + " } #T_1102a8f4_4c06_11e9_af47_6a0002e84820row8_col1 {\n", " : ;\n", " background-color: #d64541;\n", " color: white;\n", - " } #T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col1 {\n", + " } #T_1102a8f4_4c06_11e9_af47_6a0002e84820row8_col2 {\n", " : ;\n", " background-color: #d64541;\n", " color: white;\n", - " } #T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col2 {\n", + " } #T_1102a8f4_4c06_11e9_af47_6a0002e84820row8_col3 {\n", " : ;\n", " background-color: #d64541;\n", " color: white;\n", - " } #T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col3 {\n", + " } #T_1102a8f4_4c06_11e9_af47_6a0002e84820row8_col4 {\n", " : ;\n", " background-color: #d64541;\n", " color: white;\n", - " } #T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col4 {\n", + " } #T_1102a8f4_4c06_11e9_af47_6a0002e84820row8_col5 {\n", " : ;\n", " background-color: #d64541;\n", " color: white;\n", - " } #T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col5 {\n", + " } #T_1102a8f4_4c06_11e9_af47_6a0002e84820row8_col6 {\n", " : ;\n", " background-color: #d64541;\n", " color: white;\n", - " }</style><table id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820\" ><thead> <tr> <th class=\"blank level0\" ></th> <th class=\"col_heading level0 col0\" >c1</th> <th class=\"col_heading level0 col1\" >c2</th> <th class=\"col_heading level0 col2\" >c3</th> <th class=\"col_heading level0 col3\" >c4</th> <th class=\"col_heading level0 col4\" >mean</th> <th class=\"col_heading level0 col5\" >sum</th> <th class=\"col_heading level0 col6\" >c1_w</th> <th class=\"col_heading level0 col7\" >c2_w</th> <th class=\"col_heading level0 col8\" >c3_w</th> <th class=\"col_heading level0 col9\" >c4_w</th> <th class=\"col_heading level0 col10\" >sum_w</th> </tr> <tr> <th class=\"index_name level0\" >mesure</th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> </tr></thead><tbody>\n", + " }</style><table id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820\" ><thead> <tr> <th class=\"blank level0\" ></th> <th class=\"col_heading level0 col0\" >c1</th> <th class=\"col_heading level0 col1\" >c2</th> <th class=\"col_heading level0 col2\" >c3</th> <th class=\"col_heading level0 col3\" >c4</th> <th class=\"col_heading level0 col4\" >c5</th> <th class=\"col_heading level0 col5\" >mean</th> <th class=\"col_heading level0 col6\" >sum</th> </tr> <tr> <th class=\"index_name level0\" >mesure</th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> <th class=\"blank\" ></th> </tr></thead><tbody>\n", " <tr>\n", - " <th id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820level0_row0\" class=\"row_heading level0 row0\" >BOW</th>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row0_col0\" class=\"data row0 col0\" >0.667391</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row0_col1\" class=\"data row0 col1\" >0.215652</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row0_col2\" class=\"data row0 col2\" >0.324174</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row0_col3\" class=\"data row0 col3\" >0.193739</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row0_col4\" class=\"data row0 col4\" >0.350239</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row0_col5\" class=\"data row0 col5\" >1.40096</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row0_col6\" class=\"data row0 col6\" >0.667391</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row0_col7\" class=\"data row0 col7\" >0.215652</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row0_col8\" class=\"data row0 col8\" >0.324174</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row0_col9\" class=\"data row0 col9\" >0.193739</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row0_col10\" class=\"data row0 col10\" >1.40096</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820level0_row1\" class=\"row_heading level0 row1\" >VertexEdgeOverlap</th>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row1_col0\" class=\"data row1 col0\" >0.664609</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row1_col1\" class=\"data row1 col1\" >0.212174</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row1_col2\" class=\"data row1 col2\" >0.316174</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row1_col3\" class=\"data row1 col3\" >0.194435</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row1_col4\" class=\"data row1 col4\" >0.346848</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row1_col5\" class=\"data row1 col5\" >1.38739</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row1_col6\" class=\"data row1 col6\" >0.664609</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row1_col7\" class=\"data row1 col7\" >0.212174</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row1_col8\" class=\"data row1 col8\" >0.316174</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row1_col9\" class=\"data row1 col9\" >0.194435</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row1_col10\" class=\"data row1 col10\" >1.38739</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820level0_row2\" class=\"row_heading level0 row2\" >MCS</th>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row2_col0\" class=\"data row2 col0\" >0.649913</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row2_col1\" class=\"data row2 col1\" >0.215304</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row2_col2\" class=\"data row2 col2\" >0.318174</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row2_col3\" class=\"data row2 col3\" >0.194696</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row2_col4\" class=\"data row2 col4\" >0.344522</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row2_col5\" class=\"data row2 col5\" >1.37809</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row2_col6\" class=\"data row2 col6\" >0.649913</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row2_col7\" class=\"data row2 col7\" >0.215304</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row2_col8\" class=\"data row2 col8\" >0.318174</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row2_col9\" class=\"data row2 col9\" >0.194696</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row2_col10\" class=\"data row2 col10\" >1.37809</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820level0_row3\" class=\"row_heading level0 row3\" >Jaccard</th>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row3_col0\" class=\"data row3 col0\" >0.609043</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row3_col1\" class=\"data row3 col1\" >0.217217</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row3_col2\" class=\"data row3 col2\" >0.303739</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row3_col3\" class=\"data row3 col3\" >0.182957</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row3_col4\" class=\"data row3 col4\" >0.328239</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row3_col5\" class=\"data row3 col5\" >1.31296</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row3_col6\" class=\"data row3 col6\" >0.609043</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row3_col7\" class=\"data row3 col7\" >0.217217</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row3_col8\" class=\"data row3 col8\" >0.303739</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row3_col9\" class=\"data row3 col9\" >0.182957</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row3_col10\" class=\"data row3 col10\" >1.31296</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820level0_row4\" class=\"row_heading level0 row4\" >DeepWalk</th>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row4_col0\" class=\"data row4 col0\" >0.463217</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row4_col1\" class=\"data row4 col1\" >0.348609</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row4_col2\" class=\"data row4 col2\" >0.127043</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row4_col3\" class=\"data row4 col3\" >0.060087</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row4_col4\" class=\"data row4 col4\" >0.249739</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row4_col5\" class=\"data row4 col5\" >0.998957</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row4_col6\" class=\"data row4 col6\" >0.463217</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row4_col7\" class=\"data row4 col7\" >0.348609</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row4_col8\" class=\"data row4 col8\" >0.127043</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row4_col9\" class=\"data row4 col9\" >0.060087</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row4_col10\" class=\"data row4 col10\" >0.998957</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820level0_row5\" class=\"row_heading level0 row5\" >WeisfeleirLehmanKernel</th>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row5_col0\" class=\"data row5 col0\" >0.352435</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row5_col1\" class=\"data row5 col1\" >0.541913</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row5_col2\" class=\"data row5 col2\" >0.0561739</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row5_col3\" class=\"data row5 col3\" >0.00417391</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row5_col4\" class=\"data row5 col4\" >0.238674</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row5_col5\" class=\"data row5 col5\" >0.954696</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row5_col6\" class=\"data row5 col6\" >0.352435</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row5_col7\" class=\"data row5 col7\" >0.541913</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row5_col8\" class=\"data row5 col8\" >0.0561739</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row5_col9\" class=\"data row5 col9\" >0.00417391</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row5_col10\" class=\"data row5 col10\" >0.954696</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820level0_row6\" class=\"row_heading level0 row6\" >GraphEditDistance</th>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row6_col0\" class=\"data row6 col0\" >0.372727</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row6_col1\" class=\"data row6 col1\" >0.144182</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row6_col2\" class=\"data row6 col2\" >0.150545</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row6_col3\" class=\"data row6 col3\" >0.107091</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row6_col4\" class=\"data row6 col4\" >0.193636</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row6_col5\" class=\"data row6 col5\" >0.774545</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row6_col6\" class=\"data row6 col6\" >0.372727</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row6_col7\" class=\"data row6 col7\" >0.144182</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row6_col8\" class=\"data row6 col8\" >0.150545</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row6_col9\" class=\"data row6 col9\" >0.107091</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row6_col10\" class=\"data row6 col10\" >0.774545</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820level0_row7\" class=\"row_heading level0 row7\" >PolyIntersect</th>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row7_col0\" class=\"data row7 col0\" >0.1696</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row7_col1\" class=\"data row7 col1\" >0.1124</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row7_col2\" class=\"data row7 col2\" >0.0596</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row7_col3\" class=\"data row7 col3\" >0.0408</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row7_col4\" class=\"data row7 col4\" >0.0956</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row7_col5\" class=\"data row7 col5\" >0.3824</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row7_col6\" class=\"data row7 col6\" >0.1696</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row7_col7\" class=\"data row7 col7\" >0.1124</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row7_col8\" class=\"data row7 col8\" >0.0596</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row7_col9\" class=\"data row7 col9\" >0.0408</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row7_col10\" class=\"data row7 col10\" >0.3824</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820level0_row8\" class=\"row_heading level0 row8\" >Graph2Vec</th>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col0\" class=\"data row8 col0\" >0.0206087</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col1\" class=\"data row8 col1\" >0.0264348</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col2\" class=\"data row8 col2\" >0.00243478</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col3\" class=\"data row8 col3\" >0</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col4\" class=\"data row8 col4\" >0.0123696</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col5\" class=\"data row8 col5\" >0.0494783</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col6\" class=\"data row8 col6\" >0.0206087</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col7\" class=\"data row8 col7\" >0.0264348</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col8\" class=\"data row8 col8\" >0.00243478</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col9\" class=\"data row8 col9\" >0</td>\n", - " <td id=\"T_6afb48ae_4af9_11e9_8332_6a0002e84820row8_col10\" class=\"data row8 col10\" >0.0494783</td>\n", + " <th id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820level0_row0\" class=\"row_heading level0 row0\" >BOW</th>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row0_col0\" class=\"data row0 col0\" >0.67284</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row0_col1\" class=\"data row0 col1\" >0.245059</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row0_col2\" class=\"data row0 col2\" >0.395483</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row0_col3\" class=\"data row0 col3\" >0.223377</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row0_col4\" class=\"data row0 col4\" >0.683918</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row0_col5\" class=\"data row0 col5\" >0.38419</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row0_col6\" class=\"data row0 col6\" >1.53676</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820level0_row1\" class=\"row_heading level0 row1\" >VertexEdgeOverlap</th>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row1_col0\" class=\"data row1 col0\" >0.672501</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row1_col1\" class=\"data row1 col1\" >0.245398</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row1_col2\" class=\"data row1 col2\" >0.38611</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row1_col3\" class=\"data row1 col3\" >0.223377</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row1_col4\" class=\"data row1 col4\" >0.693066</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row1_col5\" class=\"data row1 col5\" >0.381846</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row1_col6\" class=\"data row1 col6\" >1.52739</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820level0_row2\" class=\"row_heading level0 row2\" >MCS</th>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row2_col0\" class=\"data row2 col0\" >0.657256</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row2_col1\" class=\"data row2 col1\" >0.247883</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row2_col2\" class=\"data row2 col2\" >0.388707</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row2_col3\" class=\"data row2 col3\" >0.223377</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row2_col4\" class=\"data row2 col4\" >0.684032</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row2_col5\" class=\"data row2 col5\" >0.379305</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row2_col6\" class=\"data row2 col6\" >1.51722</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820level0_row3\" class=\"row_heading level0 row3\" >Jaccard</th>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row3_col0\" class=\"data row3 col0\" >0.640994</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row3_col1\" class=\"data row3 col1\" >0.240542</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row3_col2\" class=\"data row3 col2\" >0.372671</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row3_col3\" class=\"data row3 col3\" >0.210615</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row3_col4\" class=\"data row3 col4\" >0.660207</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row3_col5\" class=\"data row3 col5\" >0.366206</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row3_col6\" class=\"data row3 col6\" >1.46482</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820level0_row4\" class=\"row_heading level0 row4\" >DeepWalk</th>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row4_col0\" class=\"data row4 col0\" >0.49712</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row4_col1\" class=\"data row4 col1\" >0.389723</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row4_col2\" class=\"data row4 col2\" >0.152005</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row4_col3\" class=\"data row4 col3\" >0.0658385</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row4_col4\" class=\"data row4 col4\" >0.610745</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row4_col5\" class=\"data row4 col5\" >0.276172</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row4_col6\" class=\"data row4 col6\" >1.10469</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820level0_row5\" class=\"row_heading level0 row5\" >WeisfeleirLehmanKernel</th>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row5_col0\" class=\"data row5 col0\" >0.394128</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row5_col1\" class=\"data row5 col1\" >0.493619</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row5_col2\" class=\"data row5 col2\" >0.0725014</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row5_col3\" class=\"data row5 col3\" >0.00496894</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row5_col4\" class=\"data row5 col4\" >0.528249</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row5_col5\" class=\"data row5 col5\" >0.241304</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row5_col6\" class=\"data row5 col6\" >0.965217</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820level0_row6\" class=\"row_heading level0 row6\" >GraphEditDistance</th>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row6_col0\" class=\"data row6 col0\" >0.379693</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row6_col1\" class=\"data row6 col1\" >0.170248</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row6_col2\" class=\"data row6 col2\" >0.186068</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row6_col3\" class=\"data row6 col3\" >0.121842</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row6_col4\" class=\"data row6 col4\" >0.430875</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row6_col5\" class=\"data row6 col5\" >0.214463</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row6_col6\" class=\"data row6 col6\" >0.857851</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820level0_row7\" class=\"row_heading level0 row7\" >PolyIntersect</th>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row7_col0\" class=\"data row7 col0\" >0.189091</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row7_col1\" class=\"data row7 col1\" >0.122597</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row7_col2\" class=\"data row7 col2\" >0.0774026</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row7_col3\" class=\"data row7 col3\" >0.052987</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row7_col4\" class=\"data row7 col4\" >0.227495</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row7_col5\" class=\"data row7 col5\" >0.110519</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row7_col6\" class=\"data row7 col6\" >0.442078</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820level0_row8\" class=\"row_heading level0 row8\" >Graph2Vec</th>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row8_col0\" class=\"data row8 col0\" >0.0246189</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row8_col1\" class=\"data row8 col1\" >0.0280068</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row8_col2\" class=\"data row8 col2\" >0.00316206</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row8_col3\" class=\"data row8 col3\" >0</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row8_col4\" class=\"data row8 col4\" >0.0395227</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row8_col5\" class=\"data row8 col5\" >0.0139469</td>\n", + " <td id=\"T_1102a8f4_4c06_11e9_af47_6a0002e84820row8_col6\" class=\"data row8 col6\" >0.0557877</td>\n", " </tr>\n", " </tbody></table>" ], "text/plain": [ - "<pandas.io.formats.style.Styler at 0x13c658668>" + "<pandas.io.formats.style.Styler at 0x10ede50f0>" ] }, - "execution_count": 120, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -1094,7 +841,7 @@ }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2018-09-26T12:55:10.937714Z", @@ -1106,342 +853,310 @@ "data": { "text/html": [ "<style type=\"text/css\" >\n", - " #T_f54c6818_4afa_11e9_bcf6_6a0002e84820row0_col2 {\n", + " #T_239dc4d8_4c06_11e9_895b_6a0002e84820row0_col2 {\n", " background-color: yellow;\n", " : ;\n", - " } #T_f54c6818_4afa_11e9_bcf6_6a0002e84820row0_col6 {\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row0_col4 {\n", " background-color: yellow;\n", " : ;\n", - " } #T_f54c6818_4afa_11e9_bcf6_6a0002e84820row0_col7 {\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row0_col5 {\n", " background-color: yellow;\n", " : ;\n", - " } #T_f54c6818_4afa_11e9_bcf6_6a0002e84820row1_col4 {\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row0_col7 {\n", " background-color: yellow;\n", " : ;\n", - " } #T_f54c6818_4afa_11e9_bcf6_6a0002e84820row1_col5 {\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row0_col8 {\n", " background-color: yellow;\n", " : ;\n", - " } #T_f54c6818_4afa_11e9_bcf6_6a0002e84820row7_col3 {\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row1_col5 {\n", " background-color: yellow;\n", " : ;\n", - " } #T_f54c6818_4afa_11e9_bcf6_6a0002e84820row9_col5 {\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row2_col5 {\n", + " background-color: yellow;\n", + " : ;\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row3_col5 {\n", + " background-color: yellow;\n", + " : ;\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row3_col6 {\n", + " background-color: yellow;\n", + " : ;\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row4_col5 {\n", + " background-color: yellow;\n", + " : ;\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row5_col5 {\n", + " background-color: yellow;\n", + " : ;\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row6_col5 {\n", + " background-color: yellow;\n", + " : ;\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row7_col5 {\n", + " background-color: yellow;\n", + " : ;\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row9_col3 {\n", + " background-color: yellow;\n", + " : ;\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row10_col5 {\n", + " : ;\n", + " background-color: #d64541;\n", + " color: white;\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row11_col2 {\n", " : ;\n", " background-color: #d64541;\n", " color: white;\n", - " } #T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col2 {\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row11_col3 {\n", " : ;\n", " background-color: #d64541;\n", " color: white;\n", - " } #T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col3 {\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row11_col4 {\n", " : ;\n", " background-color: #d64541;\n", " color: white;\n", - " } #T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col4 {\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row11_col5 {\n", " : ;\n", " background-color: #d64541;\n", " color: white;\n", - " } #T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col5 {\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row11_col6 {\n", " : ;\n", " background-color: #d64541;\n", " color: white;\n", - " } #T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col6 {\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row11_col7 {\n", " : ;\n", " background-color: #d64541;\n", " color: white;\n", - " } #T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col7 {\n", + " } #T_239dc4d8_4c06_11e9_895b_6a0002e84820row11_col8 {\n", " : ;\n", " background-color: #d64541;\n", " color: white;\n", - " }</style><table id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820\" ><thead> <tr> <th class=\"blank level0\" ></th> <th class=\"col_heading level0 col0\" >mesure</th> <th class=\"col_heading level0 col1\" >type</th> <th class=\"col_heading level0 col2\" >c1</th> <th class=\"col_heading level0 col3\" >c2</th> <th class=\"col_heading level0 col4\" >c3</th> <th class=\"col_heading level0 col5\" >c4</th> <th class=\"col_heading level0 col6\" >mean</th> <th class=\"col_heading level0 col7\" >sum</th> <th class=\"col_heading level0 col8\" >c1_w</th> <th class=\"col_heading level0 col9\" >c2_w</th> <th class=\"col_heading level0 col10\" >c3_w</th> <th class=\"col_heading level0 col11\" >c4_w</th> <th class=\"col_heading level0 col12\" >sum_w</th> </tr></thead><tbody>\n", + " }</style><table id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820\" ><thead> <tr> <th class=\"blank level0\" ></th> <th class=\"col_heading level0 col0\" >mesure</th> <th class=\"col_heading level0 col1\" >type</th> <th class=\"col_heading level0 col2\" >c1</th> <th class=\"col_heading level0 col3\" >c2</th> <th class=\"col_heading level0 col4\" >c3</th> <th class=\"col_heading level0 col5\" >c4</th> <th class=\"col_heading level0 col6\" >c5</th> <th class=\"col_heading level0 col7\" >mean</th> <th class=\"col_heading level0 col8\" >sum</th> </tr></thead><tbody>\n", " <tr>\n", - " <th id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820level0_row0\" class=\"row_heading level0 row0\" >51</th>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row0_col0\" class=\"data row0 col0\" >MCS</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row0_col1\" class=\"data row0 col1\" >ext_2</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row0_col2\" class=\"data row0 col2\" >0.7</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row0_col3\" class=\"data row0 col3\" >0.216</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row0_col4\" class=\"data row0 col4\" >0.332</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row0_col5\" class=\"data row0 col5\" >0.198</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row0_col6\" class=\"data row0 col6\" >0.3615</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row0_col7\" class=\"data row0 col7\" >1.446</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row0_col8\" class=\"data row0 col8\" >0.7</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row0_col9\" class=\"data row0 col9\" >0.216</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row0_col10\" class=\"data row0 col10\" >0.332</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row0_col11\" class=\"data row0 col11\" >0.198</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row0_col12\" class=\"data row0 col12\" >1.446</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820level0_row1\" class=\"row_heading level0 row1\" >7</th>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row1_col0\" class=\"data row1 col0\" >BOW</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row1_col1\" class=\"data row1 col1\" >dev_du</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row1_col2\" class=\"data row1 col2\" >0.698</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row1_col3\" class=\"data row1 col3\" >0.196</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row1_col4\" class=\"data row1 col4\" >0.336</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row1_col5\" class=\"data row1 col5\" >0.2</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row1_col6\" class=\"data row1 col6\" >0.3575</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row1_col7\" class=\"data row1 col7\" >1.43</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row1_col8\" class=\"data row1 col8\" >0.698</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row1_col9\" class=\"data row1 col9\" >0.196</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row1_col10\" class=\"data row1 col10\" >0.336</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row1_col11\" class=\"data row1 col11\" >0.2</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row1_col12\" class=\"data row1 col12\" >1.43</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820level0_row2\" class=\"row_heading level0 row2\" >29</th>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row2_col0\" class=\"data row2 col0\" >MCS</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row2_col1\" class=\"data row2 col1\" >biotex_lda_bvlac_ext_1</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row2_col2\" class=\"data row2 col2\" >0.656</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row2_col3\" class=\"data row2 col3\" >0.226</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row2_col4\" class=\"data row2 col4\" >0.306</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row2_col5\" class=\"data row2 col5\" >0.194</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row2_col6\" class=\"data row2 col6\" >0.3455</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row2_col7\" class=\"data row2 col7\" >1.382</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row2_col8\" class=\"data row2 col8\" >0.656</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row2_col9\" class=\"data row2 col9\" >0.226</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row2_col10\" class=\"data row2 col10\" >0.306</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row2_col11\" class=\"data row2 col11\" >0.194</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row2_col12\" class=\"data row2 col12\" >1.382</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820level0_row3\" class=\"row_heading level0 row3\" >21</th>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row3_col0\" class=\"data row3 col0\" >VertexEdgeOverlap</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row3_col1\" class=\"data row3 col1\" >biotex_lda_bvlac_gen_country</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row3_col2\" class=\"data row3 col2\" >0.63</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row3_col3\" class=\"data row3 col3\" >0.224</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row3_col4\" class=\"data row3 col4\" >0.318</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row3_col5\" class=\"data row3 col5\" >0.192</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row3_col6\" class=\"data row3 col6\" >0.341</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row3_col7\" class=\"data row3 col7\" >1.364</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row3_col8\" class=\"data row3 col8\" >0.63</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row3_col9\" class=\"data row3 col9\" >0.224</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row3_col10\" class=\"data row3 col10\" >0.318</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row3_col11\" class=\"data row3 col11\" >0.192</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row3_col12\" class=\"data row3 col12\" >1.364</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820level0_row4\" class=\"row_heading level0 row4\" >20</th>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row4_col0\" class=\"data row4 col0\" >MCS</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row4_col1\" class=\"data row4 col1\" >inra_gen_country</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row4_col2\" class=\"data row4 col2\" >0.628</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row4_col3\" class=\"data row4 col3\" >0.226</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row4_col4\" class=\"data row4 col4\" >0.306</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row4_col5\" class=\"data row4 col5\" >0.192</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row4_col6\" class=\"data row4 col6\" >0.338</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row4_col7\" class=\"data row4 col7\" >1.352</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row4_col8\" class=\"data row4 col8\" >0.628</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row4_col9\" class=\"data row4 col9\" >0.226</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row4_col10\" class=\"data row4 col10\" >0.306</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row4_col11\" class=\"data row4 col11\" >0.192</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row4_col12\" class=\"data row4 col12\" >1.352</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820level0_row5\" class=\"row_heading level0 row5\" >8</th>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row5_col0\" class=\"data row5 col0\" >BOW</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row5_col1\" class=\"data row5 col1\" >biotex_lda_bvlac</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row5_col2\" class=\"data row5 col2\" >0.624</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row5_col3\" class=\"data row5 col3\" >0.232</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row5_col4\" class=\"data row5 col4\" >0.302</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row5_col5\" class=\"data row5 col5\" >0.19</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row5_col6\" class=\"data row5 col6\" >0.337</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row5_col7\" class=\"data row5 col7\" >1.348</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row5_col8\" class=\"data row5 col8\" >0.624</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row5_col9\" class=\"data row5 col9\" >0.232</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row5_col10\" class=\"data row5 col10\" >0.302</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row5_col11\" class=\"data row5 col11\" >0.19</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row5_col12\" class=\"data row5 col12\" >1.348</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820level0_row6\" class=\"row_heading level0 row6\" >55</th>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row6_col0\" class=\"data row6 col0\" >Jaccard</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row6_col1\" class=\"data row6 col1\" >dev_du_ext_2</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row6_col2\" class=\"data row6 col2\" >0.598</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row6_col3\" class=\"data row6 col3\" >0.232</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row6_col4\" class=\"data row6 col4\" >0.32</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row6_col5\" class=\"data row6 col5\" >0.198</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row6_col6\" class=\"data row6 col6\" >0.337</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row6_col7\" class=\"data row6 col7\" >1.348</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row6_col8\" class=\"data row6 col8\" >0.598</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row6_col9\" class=\"data row6 col9\" >0.232</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row6_col10\" class=\"data row6 col10\" >0.32</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row6_col11\" class=\"data row6 col11\" >0.198</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row6_col12\" class=\"data row6 col12\" >1.348</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820level0_row7\" class=\"row_heading level0 row7\" >30</th>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row7_col0\" class=\"data row7 col0\" >WeisfeleirLehmanKernel</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row7_col1\" class=\"data row7 col1\" >biotex_bvlac</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row7_col2\" class=\"data row7 col2\" >0.41</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row7_col3\" class=\"data row7 col3\" >0.574</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row7_col4\" class=\"data row7 col4\" >0.032</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row7_col5\" class=\"data row7 col5\" >0.004</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row7_col6\" class=\"data row7 col6\" >0.255</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row7_col7\" class=\"data row7 col7\" >1.02</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row7_col8\" class=\"data row7 col8\" >0.41</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row7_col9\" class=\"data row7 col9\" >0.574</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row7_col10\" class=\"data row7 col10\" >0.032</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row7_col11\" class=\"data row7 col11\" >0.004</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row7_col12\" class=\"data row7 col12\" >1.02</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820level0_row8\" class=\"row_heading level0 row8\" >28</th>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row8_col0\" class=\"data row8 col0\" >DeepWalk</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row8_col1\" class=\"data row8 col1\" >biotex_lda_bvlac_ext_2</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row8_col2\" class=\"data row8 col2\" >0.426</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row8_col3\" class=\"data row8 col3\" >0.4</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row8_col4\" class=\"data row8 col4\" >0.1</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row8_col5\" class=\"data row8 col5\" >0.05</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row8_col6\" class=\"data row8 col6\" >0.244</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row8_col7\" class=\"data row8 col7\" >0.976</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row8_col8\" class=\"data row8 col8\" >0.426</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row8_col9\" class=\"data row8 col9\" >0.4</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row8_col10\" class=\"data row8 col10\" >0.1</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row8_col11\" class=\"data row8 col11\" >0.05</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row8_col12\" class=\"data row8 col12\" >0.976</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820level0_row9\" class=\"row_heading level0 row9\" >49</th>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row9_col0\" class=\"data row9 col0\" >GraphEditDistance</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row9_col1\" class=\"data row9 col1\" >dev_du_gen_region</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row9_col2\" class=\"data row9 col2\" >0.106</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row9_col3\" class=\"data row9 col3\" >0.098</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row9_col4\" class=\"data row9 col4\" >0.002</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row9_col5\" class=\"data row9 col5\" >0</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row9_col6\" class=\"data row9 col6\" >0.0515</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row9_col7\" class=\"data row9 col7\" >0.206</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row9_col8\" class=\"data row9 col8\" >0.106</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row9_col9\" class=\"data row9 col9\" >0.098</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row9_col10\" class=\"data row9 col10\" >0.002</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row9_col11\" class=\"data row9 col11\" >0</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row9_col12\" class=\"data row9 col12\" >0.206</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820level0_row10\" class=\"row_heading level0 row10\" >1</th>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col0\" class=\"data row10 col0\" >Graph2Vec</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col1\" class=\"data row10 col1\" >biotex_bvlac_ext_1</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col2\" class=\"data row10 col2\" >0.012</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col3\" class=\"data row10 col3\" >0.026</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col4\" class=\"data row10 col4\" >0</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col5\" class=\"data row10 col5\" >0</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col6\" class=\"data row10 col6\" >0.0095</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col7\" class=\"data row10 col7\" >0.038</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col8\" class=\"data row10 col8\" >0.012</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col9\" class=\"data row10 col9\" >0.026</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col10\" class=\"data row10 col10\" >0</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col11\" class=\"data row10 col11\" >0</td>\n", - " <td id=\"T_f54c6818_4afa_11e9_bcf6_6a0002e84820row10_col12\" class=\"data row10 col12\" >0.038</td>\n", + " <th id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820level0_row0\" class=\"row_heading level0 row0\" >6</th>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row0_col0\" class=\"data row0 col0\" >BOW</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row0_col1\" class=\"data row0 col1\" >inra_gen_country</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row0_col2\" class=\"data row0 col2\" >0.685714</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row0_col3\" class=\"data row0 col3\" >0.290909</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row0_col4\" class=\"data row0 col4\" >0.418182</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row0_col5\" class=\"data row0 col5\" >0.223377</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row0_col6\" class=\"data row0 col6\" >0.708985</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row0_col7\" class=\"data row0 col7\" >0.404545</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row0_col8\" class=\"data row0 col8\" >1.61818</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820level0_row1\" class=\"row_heading level0 row1\" >5</th>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row1_col0\" class=\"data row1 col0\" >VertexEdgeOverlap</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row1_col1\" class=\"data row1 col1\" >gen_country</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row1_col2\" class=\"data row1 col2\" >0.654545</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row1_col3\" class=\"data row1 col3\" >0.280519</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row1_col4\" class=\"data row1 col4\" >0.397403</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row1_col5\" class=\"data row1 col5\" >0.223377</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row1_col6\" class=\"data row1 col6\" >0.706389</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row1_col7\" class=\"data row1 col7\" >0.388961</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row1_col8\" class=\"data row1 col8\" >1.55584</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820level0_row2\" class=\"row_heading level0 row2\" >21</th>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row2_col0\" class=\"data row2 col0\" >VertexEdgeOverlap</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row2_col1\" class=\"data row2 col1\" >biotex_lda_bvlac_gen_country</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row2_col2\" class=\"data row2 col2\" >0.662338</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row2_col3\" class=\"data row2 col3\" >0.272727</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row2_col4\" class=\"data row2 col4\" >0.38961</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row2_col5\" class=\"data row2 col5\" >0.223377</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row2_col6\" class=\"data row2 col6\" >0.703791</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row2_col7\" class=\"data row2 col7\" >0.387013</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row2_col8\" class=\"data row2 col8\" >1.54805</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820level0_row3\" class=\"row_heading level0 row3\" >29</th>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row3_col0\" class=\"data row3 col0\" >MCS</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row3_col1\" class=\"data row3 col1\" >biotex_lda_bvlac_ext_1</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row3_col2\" class=\"data row3 col2\" >0.67013</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row3_col3\" class=\"data row3 col3\" >0.267532</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row3_col4\" class=\"data row3 col4\" >0.374026</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row3_col5\" class=\"data row3 col5\" >0.223377</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row3_col6\" class=\"data row3 col6\" >0.708985</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row3_col7\" class=\"data row3 col7\" >0.383766</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row3_col8\" class=\"data row3 col8\" >1.53506</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820level0_row4\" class=\"row_heading level0 row4\" >19</th>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row4_col0\" class=\"data row4 col0\" >MCS</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row4_col1\" class=\"data row4 col1\" >biotex_lda_bvlac_ext_2</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row4_col2\" class=\"data row4 col2\" >0.664935</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row4_col3\" class=\"data row4 col3\" >0.267532</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row4_col4\" class=\"data row4 col4\" >0.371429</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row4_col5\" class=\"data row4 col5\" >0.223377</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row4_col6\" class=\"data row4 col6\" >0.708985</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row4_col7\" class=\"data row4 col7\" >0.381818</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row4_col8\" class=\"data row4 col8\" >1.52727</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820level0_row5\" class=\"row_heading level0 row5\" >69</th>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row5_col0\" class=\"data row5 col0\" >VertexEdgeOverlap</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row5_col1\" class=\"data row5 col1\" >dev_du_gen_region</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row5_col2\" class=\"data row5 col2\" >0.667532</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row5_col3\" class=\"data row5 col3\" >0.249351</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row5_col4\" class=\"data row5 col4\" >0.381818</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row5_col5\" class=\"data row5 col5\" >0.223377</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row5_col6\" class=\"data row5 col6\" >0.690807</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row5_col7\" class=\"data row5 col7\" >0.380519</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row5_col8\" class=\"data row5 col8\" >1.52208</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820level0_row6\" class=\"row_heading level0 row6\" >20</th>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row6_col0\" class=\"data row6 col0\" >MCS</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row6_col1\" class=\"data row6 col1\" >inra_gen_country</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row6_col2\" class=\"data row6 col2\" >0.646753</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row6_col3\" class=\"data row6 col3\" >0.262338</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row6_col4\" class=\"data row6 col4\" >0.374026</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row6_col5\" class=\"data row6 col5\" >0.223377</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row6_col6\" class=\"data row6 col6\" >0.66224</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row6_col7\" class=\"data row6 col7\" >0.376623</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row6_col8\" class=\"data row6 col8\" >1.50649</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820level0_row7\" class=\"row_heading level0 row7\" >54</th>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row7_col0\" class=\"data row7 col0\" >Jaccard</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row7_col1\" class=\"data row7 col1\" >inra</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row7_col2\" class=\"data row7 col2\" >0.654545</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row7_col3\" class=\"data row7 col3\" >0.212987</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row7_col4\" class=\"data row7 col4\" >0.394805</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row7_col5\" class=\"data row7 col5\" >0.223377</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row7_col6\" class=\"data row7 col6\" >0.649255</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row7_col7\" class=\"data row7 col7\" >0.371429</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row7_col8\" class=\"data row7 col8\" >1.48571</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820level0_row8\" class=\"row_heading level0 row8\" >28</th>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row8_col0\" class=\"data row8 col0\" >DeepWalk</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row8_col1\" class=\"data row8 col1\" >biotex_lda_bvlac_ext_2</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row8_col2\" class=\"data row8 col2\" >0.462338</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row8_col3\" class=\"data row8 col3\" >0.433766</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row8_col4\" class=\"data row8 col4\" >0.119481</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row8_col5\" class=\"data row8 col5\" >0.0623377</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row8_col6\" class=\"data row8 col6\" >0.659632</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row8_col7\" class=\"data row8 col7\" >0.269481</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row8_col8\" class=\"data row8 col8\" >1.07792</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820level0_row9\" class=\"row_heading level0 row9\" >30</th>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row9_col0\" class=\"data row9 col0\" >WeisfeleirLehmanKernel</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row9_col1\" class=\"data row9 col1\" >biotex_bvlac</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row9_col2\" class=\"data row9 col2\" >0.47013</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row9_col3\" class=\"data row9 col3\" >0.527273</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row9_col4\" class=\"data row9 col4\" >0.0415584</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row9_col5\" class=\"data row9 col5\" >0.00519481</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row9_col6\" class=\"data row9 col6\" >0.568792</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row9_col7\" class=\"data row9 col7\" >0.261039</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row9_col8\" class=\"data row9 col8\" >1.04416</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820level0_row10\" class=\"row_heading level0 row10\" >52</th>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row10_col0\" class=\"data row10 col0\" >Graph2Vec</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row10_col1\" class=\"data row10 col1\" >biotex_lda_bvlac</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row10_col2\" class=\"data row10 col2\" >0.0285714</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row10_col3\" class=\"data row10 col3\" >0.0415584</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row10_col4\" class=\"data row10 col4\" >0.0025974</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row10_col5\" class=\"data row10 col5\" >0</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row10_col6\" class=\"data row10 col6\" >0.0545411</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row10_col7\" class=\"data row10 col7\" >0.0181818</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row10_col8\" class=\"data row10 col8\" >0.0727273</td>\n", + " </tr>\n", + " <tr>\n", + " <th id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820level0_row11\" class=\"row_heading level0 row11\" >1</th>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row11_col0\" class=\"data row11 col0\" >Graph2Vec</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row11_col1\" class=\"data row11 col1\" >biotex_bvlac_ext_1</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row11_col2\" class=\"data row11 col2\" >0.0155844</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row11_col3\" class=\"data row11 col3\" >0.0207792</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row11_col4\" class=\"data row11 col4\" >0</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row11_col5\" class=\"data row11 col5\" >0</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row11_col6\" class=\"data row11 col6\" >0.0363613</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row11_col7\" class=\"data row11 col7\" >0.00909091</td>\n", + " <td id=\"T_239dc4d8_4c06_11e9_895b_6a0002e84820row11_col8\" class=\"data row11 col8\" >0.0363636</td>\n", " </tr>\n", " </tbody></table>" ], "text/plain": [ - "<pandas.io.formats.style.Styler at 0x13d4df048>" + "<pandas.io.formats.style.Styler at 0x135903da0>" ] }, - "execution_count": 125, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "index,data_pa=pareto_frontier_multi(data[\"c1 c2 c3 c4\".split()].values)\n", + "index,data_pa=pareto_frontier_multi(data[\"c1 c2 c3 c4 c5\".split()].values)\n", "colorize(data.iloc[index].sort_values(\"sum\",ascending=False),to_colorize)\n" ] }, { "cell_type": "code", - "execution_count": 131, + "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<style type=\"text/css\" >\n", - " #T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col2 {\n", + " #T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col2 {\n", " background-color: yellow;\n", - " : ;\n", - " } #T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col3 {\n", - " : ;\n", " background-color: #d64541;\n", " color: white;\n", - " } #T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col4 {\n", - " background-color: yellow;\n", - " : ;\n", - " } #T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col5 {\n", + " } #T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col3 {\n", " background-color: yellow;\n", - " : ;\n", - " } #T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col6 {\n", - " background-color: yellow;\n", - " : ;\n", - " } #T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col7 {\n", + " background-color: #d64541;\n", + " color: white;\n", + " } #T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col4 {\n", " background-color: yellow;\n", - " : ;\n", - " } #T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col2 {\n", - " : ;\n", " background-color: #d64541;\n", " color: white;\n", - " } #T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col3 {\n", + " } #T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col5 {\n", " background-color: yellow;\n", - " : ;\n", - " } #T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col4 {\n", - " : ;\n", " background-color: #d64541;\n", " color: white;\n", - " } #T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col5 {\n", - " : ;\n", + " } #T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col6 {\n", + " background-color: yellow;\n", " background-color: #d64541;\n", " color: white;\n", - " } #T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col6 {\n", - " : ;\n", + " } #T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col7 {\n", + " background-color: yellow;\n", " background-color: #d64541;\n", " color: white;\n", - " } #T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col7 {\n", - " : ;\n", + " } #T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col8 {\n", + " background-color: yellow;\n", " background-color: #d64541;\n", " color: white;\n", - " }</style><table id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820\" ><thead> <tr> <th class=\"blank level0\" ></th> <th class=\"col_heading level0 col0\" >mesure</th> <th class=\"col_heading level0 col1\" >type</th> <th class=\"col_heading level0 col2\" >c1</th> <th class=\"col_heading level0 col3\" >c2</th> <th class=\"col_heading level0 col4\" >c3</th> <th class=\"col_heading level0 col5\" >c4</th> <th class=\"col_heading level0 col6\" >mean</th> <th class=\"col_heading level0 col7\" >sum</th> <th class=\"col_heading level0 col8\" >c1_w</th> <th class=\"col_heading level0 col9\" >c2_w</th> <th class=\"col_heading level0 col10\" >c3_w</th> <th class=\"col_heading level0 col11\" >c4_w</th> <th class=\"col_heading level0 col12\" >sum_w</th> </tr></thead><tbody>\n", + " }</style><table id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820\" ><thead> <tr> <th class=\"blank level0\" ></th> <th class=\"col_heading level0 col0\" >mesure</th> <th class=\"col_heading level0 col1\" >type</th> <th class=\"col_heading level0 col2\" >c1</th> <th class=\"col_heading level0 col3\" >c2</th> <th class=\"col_heading level0 col4\" >c3</th> <th class=\"col_heading level0 col5\" >c4</th> <th class=\"col_heading level0 col6\" >c5</th> <th class=\"col_heading level0 col7\" >mean</th> <th class=\"col_heading level0 col8\" >sum</th> <th class=\"col_heading level0 col9\" >c1_w</th> <th class=\"col_heading level0 col10\" >c2_w</th> <th class=\"col_heading level0 col11\" >c3_w</th> <th class=\"col_heading level0 col12\" >c4_w</th> <th class=\"col_heading level0 col13\" >sum_w</th> <th class=\"col_heading level0 col14\" >c5_w</th> </tr></thead><tbody>\n", " <tr>\n", - " <th id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820level0_row0\" class=\"row_heading level0 row0\" >7</th>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col0\" class=\"data row0 col0\" >BOW</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col1\" class=\"data row0 col1\" >dev_du</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col2\" class=\"data row0 col2\" >0.698</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col3\" class=\"data row0 col3\" >0.196</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col4\" class=\"data row0 col4\" >0.336</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col5\" class=\"data row0 col5\" >0.2</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col6\" class=\"data row0 col6\" >0.3575</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col7\" class=\"data row0 col7\" >1.43</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col8\" class=\"data row0 col8\" >0.698</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col9\" class=\"data row0 col9\" >0.196</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col10\" class=\"data row0 col10\" >0.336</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col11\" class=\"data row0 col11\" >0.2</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row0_col12\" class=\"data row0 col12\" >1.43</td>\n", - " </tr>\n", - " <tr>\n", - " <th id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820level0_row1\" class=\"row_heading level0 row1\" >32</th>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col0\" class=\"data row1 col0\" >BOW</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col1\" class=\"data row1 col1\" >biotex_bvlac_gen_region</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col2\" class=\"data row1 col2\" >0.622</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col3\" class=\"data row1 col3\" >0.208</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col4\" class=\"data row1 col4\" >0.314</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col5\" class=\"data row1 col5\" >0.192</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col6\" class=\"data row1 col6\" >0.334</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col7\" class=\"data row1 col7\" >1.336</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col8\" class=\"data row1 col8\" >0.622</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col9\" class=\"data row1 col9\" >0.208</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col10\" class=\"data row1 col10\" >0.314</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col11\" class=\"data row1 col11\" >0.192</td>\n", - " <td id=\"T_35e6ba34_4afb_11e9_bbce_6a0002e84820row1_col12\" class=\"data row1 col12\" >1.336</td>\n", + " <th id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820level0_row0\" class=\"row_heading level0 row0\" >7</th>\n", + " <td id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col0\" class=\"data row0 col0\" >BOW</td>\n", + " <td id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col1\" class=\"data row0 col1\" >dev_du</td>\n", + " <td id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col2\" class=\"data row0 col2\" >0.698</td>\n", + " <td id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col3\" class=\"data row0 col3\" >0.194</td>\n", + " <td id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col4\" class=\"data row0 col4\" >0.342</td>\n", + " <td id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col5\" class=\"data row0 col5\" >0.202</td>\n", + " <td id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col6\" class=\"data row0 col6\" >0.589836</td>\n", + " <td id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col7\" class=\"data row0 col7\" >0.359</td>\n", + " <td id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col8\" class=\"data row0 col8\" >1.436</td>\n", + " <td id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col9\" class=\"data row0 col9\" >0.0698</td>\n", + " <td id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col10\" class=\"data row0 col10\" >0.0776</td>\n", + " <td id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col11\" class=\"data row0 col11\" >0.1368</td>\n", + " <td id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col12\" class=\"data row0 col12\" >0.0202</td>\n", + " <td id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col13\" class=\"data row0 col13\" >0.363384</td>\n", + " <td id=\"T_f8f26ac6_4bfe_11e9_94f0_6a0002e84820row0_col14\" class=\"data row0 col14\" >0.0589836</td>\n", " </tr>\n", " </tbody></table>" ], "text/plain": [ - "<pandas.io.formats.style.Styler at 0x13d608f28>" + "<pandas.io.formats.style.Styler at 0x12b1a1940>" ] }, - "execution_count": 131, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "index,data_pa=pareto_frontier_multi(data[data.mesure==\"BOW\"][\"c1 c2 c3 c4\".split()].values)\n", + "index,data_pa=pareto_frontier_multi(data[data.mesure==\"BOW\"][\"c1 c2 c5\".split()].values)\n", "colorize(data[data.mesure==\"BOW\"].iloc[index].sort_values(\"sum\",ascending=False).head(15),to_colorize)" ] }, diff --git a/notebooks/MatchingAnalysis/c1.png b/notebooks/MatchingAnalysis/c1.png deleted file mode 100644 index 191cd4ae246c1f9da15eec5b1f2508e8f11ba482..0000000000000000000000000000000000000000 Binary files a/notebooks/MatchingAnalysis/c1.png and /dev/null differ diff --git a/notebooks/MatchingAnalysis/c2.png b/notebooks/MatchingAnalysis/c2.png deleted file mode 100644 index 62deb108f8a718c943653cd7f5bda9b37244c0e0..0000000000000000000000000000000000000000 Binary files a/notebooks/MatchingAnalysis/c2.png and /dev/null differ diff --git a/notebooks/MatchingAnalysis/c3.png b/notebooks/MatchingAnalysis/c3.png deleted file mode 100644 index 562751ac827e31e5a1a43f37c63a5b0e55287f1d..0000000000000000000000000000000000000000 Binary files a/notebooks/MatchingAnalysis/c3.png and /dev/null differ diff --git a/notebooks/MatchingAnalysis/c4.png b/notebooks/MatchingAnalysis/c4.png deleted file mode 100644 index b6108914869ed739cc0cd9e5b81a6b497d697b24..0000000000000000000000000000000000000000 Binary files a/notebooks/MatchingAnalysis/c4.png and /dev/null differ diff --git a/notebooks/MatchingAnalysis/output.png b/notebooks/MatchingAnalysis/output.png deleted file mode 100644 index fac3f62694d1a0e876daec43f554ffaa6e146873..0000000000000000000000000000000000000000 Binary files a/notebooks/MatchingAnalysis/output.png and /dev/null differ diff --git a/notebooks/MatchingAnalysis/sum.png b/notebooks/MatchingAnalysis/sum.png deleted file mode 100644 index 25411987e939bf07c175a8b83d7a71df2e8090db..0000000000000000000000000000000000000000 Binary files a/notebooks/MatchingAnalysis/sum.png and /dev/null differ diff --git a/run_automatic_annotation.py b/run_automatic_annotation.py index 8e64ae4a52a871dd4b648d2816d13190e768568c..bf04d415750a94c6b84688df6d857f685997f7ea 100644 --- a/run_automatic_annotation.py +++ b/run_automatic_annotation.py @@ -1,22 +1,44 @@ # coding = utf-8 -import argparse -import logging,json +import argparse, shutil, os +import logging, json +from mytoolbox.env import yes_or_no +from auto_fill_annotation import main + for _ in ("boto", "elasticsearch", "urllib3", "sklearn"): logging.getLogger(_).setLevel(logging.CRITICAL) -parser=argparse.ArgumentParser() +parser = argparse.ArgumentParser() -parser.add_argument("dataset",help="Name of the dataset") -parser.add_argument("sim_matrix_dir",help="Similarity Matrix Directory") +parser.add_argument("dataset", help="Name of the dataset") +parser.add_argument("sim_matrix_dir", help="Similarity Matrix Directory") parser.add_argument("graph_data_dir", help="STR without transformation graph directory") -parser.add_argument("adjacency_fn", help ="Adjacency Information json filename") -parser.add_argument("inclusion_fn", help ="Inclusion Information json filename") -parser.add_argument("selected_json_file",help="Filename containing the STR graph you want to make your evaluation on") -parser.add_argument("-t","--threshold",default=0.5,help="Threshold for the third criteria") +parser.add_argument("adjacency_fn", help="Adjacency Information json filename") +parser.add_argument("inclusion_fn", help="Inclusion Information json filename") +parser.add_argument("selected_json_file", help="Filename containing the STR graph you want to make your evaluation on") +parser.add_argument("length_json_file", help="Filename containing the STR text length") +parser.add_argument("-t", "--threshold", default=0.5, help="Threshold for the third criteria") +parser.add_argument("-g", "--ming1",type=int, default=0, help="Return evaluation results based on min size for G1") +parser.add_argument("-j", "--ming2",type=int, default=0, help="Return evaluation results based on min size for G2") +parser.add_argument("-m", "--nb_car_doc1",type=int, default=0, help="Return evaluation results based on min size of associated text for G1") +parser.add_argument("-n", "--nb_car_doc2",type=int, default=0, help="Return evaluation results based on min size of associated text for G2") + +args = parser.parse_args() +if os.path.exists("temp_cluster") and yes_or_no("Do you want to compute STR's clusters all over again ?"): + shutil.rmtree('temp_cluster', ignore_errors=True) + os.makedirs("temp_cluster") -args=parser.parse_args() -from auto_fill_annotation import main -main(args.dataset,args.sim_matrix_dir,args.graph_data_dir,json.load(open(args.selected_json_file)),args.threshold,args.inclusion_fn, args.adjacency_fn) \ No newline at end of file +main(args.dataset, + args.sim_matrix_dir, + args.graph_data_dir, + json.load(open(args.selected_json_file)), + args.threshold, + args.inclusion_fn, + args.adjacency_fn, + args.length_json_file, + args.ming1, + args.ming2, + args.nb_car_doc1, + args.nb_car_doc2) diff --git a/run_test.py b/run_test.py new file mode 100644 index 0000000000000000000000000000000000000000..11246ec8168050af89811ecd4272e1713051f4c8 --- /dev/null +++ b/run_test.py @@ -0,0 +1,128 @@ +# coding = utf-8 +import argparse +import os + +import pandas as pd +import numpy as np + +from tqdm import tqdm +from skcriteria.madm import closeness, simple +from skcriteria import Data, MIN, MAX + + +def pareto_frontier_multi(myArray): + # Sort on first dimension + myArray = myArray[myArray[:, 0].argsort()] + # Add first row to pareto_frontier + pareto_frontier = myArray[0:1, :] + indices, i = [], 1 + # Test next row against the last row in pareto_frontier + for row in myArray[1:, :]: + if sum([row[x] >= pareto_frontier[-1][x] + for x in range(len(row))]) == len(row): + # If it is better on all features add the row to pareto_frontier + pareto_frontier = np.concatenate((pareto_frontier, [row])) + indices.append(i) + i += 1 + return indices, pareto_frontier + + +parser = argparse.ArgumentParser() +parser.add_argument("input") +parser.add_argument("output_fn") +parser.add_argument("-t","--topn",type=int,default=5) +args = parser.parse_args() + +writer = pd.ExcelWriter(args.output_fn, engine='xlsxwriter') + +if not os.path.exists(args.input): + raise FileNotFoundError("{0} does not exists !".format(args.input)) + +data = pd.read_csv(args.input, index_col=0) +data["mesure"] = data.mesure.apply(lambda x: "BOW" if x == "BagOfNodes" else x) +data["sum"] = data["c1 c2 c3 c4 c5".split()].sum(axis=1) + +combination_pareto_criteria = [ + ("c1_c2_c3_c4_c5", "c1 c2 c3 c4 c5".split()), + ("c1_c2_c5", "c1 c2 c5".split()), + ("c1_c2_c3", "c1 c2 c3".split()), + ("c3_c4", "c3 c4".split()), + ("c5", "c5".split()), + ("c2", "c2".split()), +] + +weight_criteria = [ + ("all_0.2", [0.2, 0.2, 0.2, 0.2, 0.2]), + ("c1_0.5_c5_0.5", [0.5, 0., 0., 0., 0.5]), + ("c2_0.5_c5_0.5", [0., 0.5, 0., 0., 0.5]), + ("c1_0.33_c2_0.33_c3_0,33", [0.33, 0.33, 0.33, 0., 0.]), + ("c1_0.5_c2_0.5", [0.5, 0.5, 0., 0., 0.]), + ("c3_0.5_c4_0.5", [0., 0., 0.5, 0.5, 0.]) +] + + +def get_top_combination_wsm(dataframe, weights,topn): + data = dataframe["c1 c2 c3 c4 c5".split()].values + dd = Data(data, criteria=[MAX, MAX, MAX, MAX, MAX], weights=weights[1]) + index_max = np.argsort(simple.WeightedSum().decide(dd)._rank)[:topn] + df = dataframe.iloc[index_max] + df["name"]=weights[0] + df["type_score"] = "wsm" + return df + + +def get_top_combination_pareto(dataframe, columns,topn): + index, data_pa = pareto_frontier_multi(dataframe[columns[1]].values) + df = data.iloc[index] + df = df.sort_values(by = "sum",ascending=False).head(topn) + df["name"]=columns[0] + df["type_score"] = "pareto" + return df + + +def write_excel(writer, dataframe, title): + dataframe.to_excel(writer, "result", index=False) + number_of_rows=len(dataframe) + worksheet = writer.sheets["result"] + workbook = writer.book + C_letter = 67 + I_letter= 73 + + format1 = workbook.add_format({'bg_color': '#FFC7CE', + 'font_color': '#9C0006'}) + + # Add a format. Green fill with dark green text. + format2 = workbook.add_format({'bg_color': '#C6EFCE', + 'font_color': '#006100'}) + for i in range(C_letter,I_letter): + begin=2 + for end in range(6,number_of_rows + 1,5): + ch_=chr(i) + color_range = "{0}{1}:{0}{2}".format(ch_,begin,end) + worksheet.conditional_format(color_range, {'type': 'bottom', + 'value': '1', + 'format': format1}) + + worksheet.conditional_format(color_range, {'type': 'top', + 'value': '1', + 'format': format2}) + begin=end+1 + writer.save() + +result = None +for comb_ in tqdm(combination_pareto_criteria, desc="Pareto computation"): + dd = get_top_combination_pareto(data, comb_,args.topn) + if not isinstance(result,pd.DataFrame): + result = dd + else: + result = pd.concat((result,dd),axis=0) + +for weight in tqdm(weight_criteria, desc="WSM computation"): + dd= get_top_combination_wsm(data, weight,args.topn) + if not isinstance(result,pd.DataFrame): + result = dd + else: + result = pd.concat((result,dd),axis=0) + + +write_excel(writer,result,args.output_fn.split("/")[-1]) \ No newline at end of file diff --git a/strpython/eval/automatic_annotation.py b/strpython/eval/automatic_annotation.py index c5fb261983fb0c492e4398cb58dbcfa7c4606bff..1ff91506b9f18b13bb97196163214a382c22c7b3 100644 --- a/strpython/eval/automatic_annotation.py +++ b/strpython/eval/automatic_annotation.py @@ -9,6 +9,17 @@ from ..models.str import STR from ..helpers.match_cache import MatchingCache from ..helpers.relation_extraction import AdjacencyRelation, InclusionRelation +import sys + +class JsonProgress(object): + def __init__(self,fn): + self.count = 0 + self.fn= fn + def __call__(self, obj): + self.count += 1 + if self.count %10 == 0: + sys.stdout.write("\rLoading"+self.fn+": %8d" % self.count) + return obj class AnnotationAutomatic(object): """ @@ -23,10 +34,11 @@ class AnnotationAutomatic(object): self.inc_rel_db = InclusionRelation() self.inclusion,self.adjacency = {},{} if inclusion_fn: - self.inclusion = json.load(open(inclusion_fn)) + self.inclusion = json.load(open(inclusion_fn),object_hook=JsonProgress(inclusion_fn)) if adjacency_fn: - self.adjacency = json.load(open(adjacency_fn)) + self.adjacency = json.load(open(adjacency_fn),object_hook=JsonProgress(adjacency_fn)) self.threshold = threshold_c3 + def all(self, str1, str2, id1=None, id2=None): """ @@ -47,7 +59,7 @@ class AnnotationAutomatic(object): return list(value) crit_ = [self.criterion1(str1, str2), self.criterion2(str1, str2), self.criterion3(str1, str2, id1, id2), - self.criterion4(str1, str2, id1, id2)] + self.criterion4(str1, str2, id1, id2),self.criteria5(str1, str2, id1, id2)] self.matching_cache.add(id1, id2, *crit_) return crit_ diff --git a/strpython/helpers/geo_relation_database.py b/strpython/helpers/geo_relation_database.py index 772d43cb5b2b94a08c34d1cb0be7268f2864ef13..a51cbffa10619f97bd49abf1955374349592eec0 100644 --- a/strpython/helpers/geo_relation_database.py +++ b/strpython/helpers/geo_relation_database.py @@ -27,7 +27,7 @@ class GeoRelationMatchingDatabase(): (idse1 text, idse2 text, value integer) """ matching_schema = """CREATE TABLE matching - (dataset text, g1 integer, g2 integer, c1 integer, c2 integer, c3 integer,c4 integer) + (dataset text, g1 integer, g2 integer, c1 integer, c2 integer, c3 integer,c4 integer, c5 REAL ) """ cursor.execute(inclusion_schema) cursor.execute(adjacency_schema) @@ -74,7 +74,7 @@ class GeoRelationMatchingDatabase(): self._db_connection.commit() cursor.close() - def add_matching(self, dataset: str, G1: int, G2: int, c1: bool, c2: bool, c3: bool, c4: bool): + def add_matching(self, dataset: str, G1: int, G2: int, c1: bool, c2: bool, c3: bool, c4: bool,c5: float): """ Add a matching criteria result within the database Parameters @@ -96,8 +96,8 @@ class GeoRelationMatchingDatabase(): """ cursor = self._db_connection.cursor() - cursor.execute('INSERT INTO matching VALUES(?,?,?,?,?,?,?)', - (dataset, G1, G2, int(c1), int(c2), int(c3), int(c4))) + cursor.execute('INSERT INTO matching VALUES(?,?,?,?,?,?,?,?)', + (dataset, G1, G2, int(c1), int(c2), int(c3), int(c4),float(c5))) self._db_connection.commit() cursor.close() @@ -169,7 +169,7 @@ class GeoRelationMatchingDatabase(): result_ = cursor.fetchone() cursor.close() if result_: - return True, tuple(map(int, result_[-4:])) + return True, tuple(map(float, result_[-5:])) return False, False @@ -185,9 +185,9 @@ if __name__ == "__main__": assert g.get_inclusion("GD1", "GD2") == (True, True) assert g.get_inclusion("GD2", "GD1") == (False, False) - g.add_matching("test", 1, 2, True, True, False, True) - g.add_matching("test2", 1, 2, True, False, False, True) - assert g.get_matching(1, 2, "test") == (True, (True, True, False, True)) - assert g.get_matching(1, 2, "test2") != (True, (True, True, False, True)) + g.add_matching("test", 1, 2, True, True, False, True,0.) + g.add_matching("test2", 1, 2, True, False, False, True,0.) + assert g.get_matching(1, 2, "test") == (True, (True, True, False, True,0.)) + assert g.get_matching(1, 2, "test2") != (True, (True, True, False, True,0.)) print("Passed the tests !") diff --git a/strpython/helpers/match_cache.py b/strpython/helpers/match_cache.py index 82669a2f04823d9d28edad8586c5114a82183afd..7e79c28ecf0bee7b37eb51c40cdabf096a491911 100644 --- a/strpython/helpers/match_cache.py +++ b/strpython/helpers/match_cache.py @@ -12,6 +12,6 @@ class MatchingCache: def is_match(self, id_str1: int, id_str2: int): return self.db_rel_match.get_matching(id_str1, id_str2, self.dataset) - def add(self, id_str1: int, id_str2: int, c1: int, c2: int, c3: int, c4: int): + def add(self, id_str1: int, id_str2: int, c1: int, c2: int, c3: int, c4: int, c5: float): if not self.is_match(id_str1, id_str2)[0]: - self.db_rel_match.add_matching(self.dataset, id_str1, id_str2, c1, c2, c3, c4) + self.db_rel_match.add_matching(self.dataset, id_str1, id_str2, c1, c2, c3, c4,c5) diff --git a/strpython/helpers/sim_matrix.py b/strpython/helpers/sim_matrix.py index 0462de5815333d0256c52b9401baee570988816c..1d57994345950e68021711f00d0b981ab05a2f23 100644 --- a/strpython/helpers/sim_matrix.py +++ b/strpython/helpers/sim_matrix.py @@ -36,6 +36,6 @@ def matrix_to_pandas_dataframe(matrix, selected, sim_measure, type_str, n=5): top_n = np.argsort(matrix[line])[::-1][1:n + 1] rank = 1 for val in top_n: - tab_array.append([line, val, sim, type_, rank, 0, 0, 0, 0]) + tab_array.append([line, val, sim, type_, rank, 0, 0, 0, 0,300000]) rank += 1 - return pd.DataFrame(tab_array, columns="G1 G2 sim_measure type_str rank c1 c2 c3 c4".split()) + return pd.DataFrame(tab_array, columns="G1 G2 sim_measure type_str rank c1 c2 c3 c4 c5".split())