diff --git a/auto_fill_annotation.py b/auto_fill_annotation.py index e1de0cbcddf1866d0a4c280b2eda755ec17f4111..f5c77f626242b1e6f39ad818ad64e4309065677e 100644 --- a/auto_fill_annotation.py +++ b/auto_fill_annotation.py @@ -28,16 +28,13 @@ str_graph_path = args.graph_dir strs = {} for file in glob.glob(os.path.join(str_graph_path, "*.gexf")): id_ = int(re.findall("\d+", file)[-1]) - try: - strs[id_] = STR.from_networkx_graph(nx.read_gexf(file)) - except: - strs[id_] = STR({}, []) - + strs[id_] = STR.from_networkx_graph(nx.read_gexf(file)) +#print(strs) def foo(x): try: return annotater.all(strs[x.G1], strs[x.G2]) - except: + except Exception as e: return [0, 0, 0, 0] diff --git a/exp_17_avril.sh b/bash_script/exp_17_avril.sh similarity index 100% rename from exp_17_avril.sh rename to bash_script/exp_17_avril.sh diff --git a/exp_22_may.sh b/bash_script/exp_22_may.sh similarity index 100% rename from exp_22_may.sh rename to bash_script/exp_22_may.sh diff --git a/exp_30mars.sh b/bash_script/exp_30mars.sh similarity index 100% rename from exp_30mars.sh rename to bash_script/exp_30mars.sh diff --git a/exp_fev_18.sh b/bash_script/exp_fev_18.sh similarity index 100% rename from exp_fev_18.sh rename to bash_script/exp_fev_18.sh diff --git a/generate_data.py b/depreciated/generate_data.py similarity index 100% rename from generate_data.py rename to depreciated/generate_data.py diff --git a/generate_data_csv.py b/depreciated/generate_data_csv.py similarity index 100% rename from generate_data_csv.py rename to depreciated/generate_data_csv.py diff --git a/eval.py b/eval.py deleted file mode 100644 index cb21a8c33ba3d61539b6b31478f1513ca530048d..0000000000000000000000000000000000000000 --- a/eval.py +++ /dev/null @@ -1,259 +0,0 @@ -# coding: utf-8 -import glob -import json -import os -import re -import time - -from progressbar import ProgressBar, Timer, Bar, ETA - -from gmatch4py.bag_of_cliques import BagOfCliques -from gmatch4py.ged.approximate_ged import ApproximateGraphEditDistance -from gmatch4py.ged.bipartite_graph_matching_2 import BP_2 -from gmatch4py.ged.greedy_edit_distance import GreedyEditDistance -from gmatch4py.ged.hausdorff_edit_distance import HED -from gmatch4py.jaccard import Jaccard -from gmatch4py.kernels.weisfeiler_lehman import * -from gmatch4py.mcs import MCS -from gmatch4py.vertex_edge_overlap import VertexEdgeOverlap -from strpython.nlp.bow_se import BOWSE -from strpython.pipeline import * - -# Function for output generation -def_temp = [36, -36] -temp = def_temp -max_temp = -30 -dec = 5 - - -def getLocInfo(id_): - global temp, dec - try: - data = get_data(id_) - if 'coord' in data: - return [data["coord"]["lat"], data["coord"]["lon"]] - else: - temp = [temp[0], temp[1] + dec] - if temp[1] >= max_temp: - temp = [temp[0] + dec, def_temp[1]] - return temp - except: - pass - - -def get_associated_es(associated_es_data): - global temp - new_ = {} - temp = def_temp - for id_ in associated_es_data: - try: - new_[id_] = {"label": get_data(id_)["en"], "coord": getLocInfo(id_)} - except: - new_[id_] = {"label": id_, "coord": getLocInfo(id_)} - return new_ - - -def getEdges4Draw(data, edges): - lines = [] - for ed in edges: - lines.append([data[ed[0]]["coord"], data[ed[1]]["coord"], ed[2]["color"]]) - if lines[-1][-1] == "cyan": - lines[-1][-1] = "blue"; - - return lines - - -# Similarity Function between graph and a set of graphs - -def compareMCS(graphs, selected): - return 1 - MCS.compare(graphs, selected) - - -# GED algorithm -def compareGED(graphs, selected): - return ApproximateGraphEditDistance.compare(graphs, selected) - - -def compareBP2(graphs, selected): - return BP_2.compare(graphs, selected) - - -def compareHED(graphs, selected): - return HED.compare(graphs, selected) - - -def compareGreedy(graphs, selected): - return GreedyEditDistance.compare(graphs, selected) - - -def compareWLSubTreeKernel(graphs, selected): - return 1 - WeisfeleirLehmanKernel.compare(graphs, selected, h=3) - - -def compareBOWSE(graphs, selected): - return 1 - BOWSE.compare(graphs, selected) - - -def compareBOC(graphs_array, selected): - return np.ones((len(graphs_array),len(graphs_array))) - BagOfCliques.compare(graphs_array, selected) - - -def compareVEO(graphs_array, selected): - return 1 - VertexEdgeOverlap.compare(graphs_array, selected) - - -def compareJaccard(graphs_array, selected): - return 1 - Jaccard.compare(graphs_array, selected) - - -funcDict = { - "MCS": compareMCS, - "VEO": compareVEO, - "GED": compareGED, - "BP2": compareBP2, - "HED": compareHED, - "GREEDY": compareGreedy, - "WLSUBTREE": compareWLSubTreeKernel, - "BOWSE": compareBOWSE, - "BOC": compareBOC, - "JACCARD": compareJaccard -} - -import argparse - -parser = argparse.ArgumentParser() -parser.add_argument("distance") -parser.add_argument("texts_dir") -parser.add_argument("graphs_dir") -parser.add_argument("metadata_fn") -parser.add_argument("original_dir") -parser.add_argument("-s", "--selectedGraph") -parser.add_argument("-a", "--all", action="store_true") -parser.add_argument("-o", "--output", help="Output Filename") -args = parser.parse_args() - -original_dir = args.original_dir -if not args.distance in funcDict.keys(): - raise NotFoundDistance(args.distance, funcDict) - exit() - -# Load all the text from the corpus -texts = [] -if os.path.exists(args.texts_dir): - files_glob = glob.glob(args.texts_dir + "/*.txt") - texts = [""] * len(files_glob) - for fn in files_glob: - id = int(re.findall("\d+", fn)[-1]) - texts[id] = open(fn).read() - # if not files_: - # print("No .txt files found in {0}".format(args.texts_dir)) - # exit() - # for fn in files_: - # try: - # texts.append() - # except: - # print("{0} could'nt be read !".format(fn)) - -# If output Dir doesn't exists -if not os.path.exists(args.graphs_dir): - print("No graph files were loaded !") - exit() -if not texts: - print("No text files were loaded !") - exit() - -# Load graph data and associated spatial entities of each graph - -assC = json.load(open(args.metadata_fn)) -associated_es, count_per_doc = assC[0], assC[1] - -graphs = {} -for file in glob.glob(args.graphs_dir.rstrip("/") + "/*.gexf"): - id = int(re.findall("\d+", file)[-1]) - graphs[id] = nx.read_gexf(file) - -graphs_array = [nx.Graph() for i in range(max(graphs.keys()) + 1)] -for i, g in graphs.items(): - graphs_array[i] = g - -# We take 50 documents chosen randomly. Then we test, if the top-10 returned documents are relevant ! - -if args.all: - selected_documents_ = list(graphs.keys()) -elif args.selectedGraph: - selected_documents_ = json.load(open(args.selectedGraph)) -# if args.all: -# selected_documents_=list(graphs.keys()) -# else: -# selected_documents_ = [] -# ids=[] -# for i in range(len(graphs)): -# if len(graphs[i])>1: -# ids.append(i) -# -# import random -# random.shuffle(ids) -# try: -# selected_documents_=ids[:50] -# except: -# selected_documents_=ids[:int(len(ids)/2)] - - -# Generating Evaluation Output -top_ten_documents = [] -final_data = {} - -deb = time.time() -print("Computing Similarity Matrix ...") -similarity_matrix = funcDict[args.distance](graphs_array, selected_documents_) -print("Similarity Matrix Computed in {0} s.".format(time.time() - deb)) - -graphs = {} -for file in glob.glob(original_dir.rstrip("/") + "/*.gexf"): - id = int(re.findall("\d+", file)[-1]) - graphs[id] = nx.read_gexf(file) - -nn_ = 5 - -with ProgressBar(max_value=len(selected_documents_), widgets=[' [', Timer(), '] ', Bar(), ' (', ETA(), ') ', ]) as pg: - inc = 0 - for doc_s in selected_documents_: - if not len(graphs[doc_s]) > 0: - continue - bow_score = similarity_matrix[doc_s] - top_docs_score = np.sort(bow_score).astype(float) - top_docs = np.argsort(bow_score).astype(int) - final_data[doc_s] = { - "sp_entities": get_associated_es(graphs[doc_s].nodes()), - "text": texts[doc_s], - } - final_data[doc_s]["edges"] = getEdges4Draw(final_data[doc_s]["sp_entities"], graphs[doc_s].edges(data=True)) - final_data[doc_s]["topk"] = [] - n_top_docs = len(top_docs) - for d in range(n_top_docs): - if not top_docs[d] in graphs or top_docs[d] == doc_s: - continue - if len(final_data[doc_s]["topk"]) == nn_: - break - doc_data = {} - doc_data["score"] = top_docs_score[d] - doc_data["id_txt"] = int(top_docs[d]) - doc_data["text"] = "" # texts[int(top_10_docs[d])] - doc_data["sp_entities"] = get_associated_es(graphs[doc_data["id_txt"]].nodes()) - doc_data["edges"] = getEdges4Draw(doc_data["sp_entities"], graphs[doc_data["id_txt"]].edges(data=True)) - doc_data["relevant"] = None - final_data[doc_s]["topk"].append(doc_data) - inc += 1 - pg.update(inc) - -if not args.output: - print("Saved in gui_graph_viewer/evalTop10STR_{0}.json".format(args.distance)) - open("gui_graph_viewer/evalTop10STR_{0}.json".format(args.distance), 'w').write(json.dumps(final_data, indent=4)) -else: - print("Saved in {0}/evalTop10STR_{1}.json".format(args.output, args.distance)) - if not os.path.exists(args.output): - os.makedirs(args.output) - open("{0}/evalTop10STR_{1}.json".format(args.output.rstrip("/"), args.distance), 'w').write( - json.dumps(final_data, indent=4)) - - diff --git a/eval_disambiguation.py b/eval_disambiguation.py index 0a719c3357cf9e9b5aaa4be030dbdaf8b54ca39e..69968a9518e0af564fae8a282afc8c918c7f0659 100644 --- a/eval_disambiguation.py +++ b/eval_disambiguation.py @@ -29,16 +29,12 @@ else: data_lang = json.load(open("/Users/jacquesfize/LOD_DATASETS/raw_bvlac/associated_lang.json")) data_lang = {int(k): v for k, v in data_lang.items()} - corpus_files=glob.glob("{0}/*.csv".format(corpus_dir)) - acc_MC,acc_GEO,acc_wiki=[],[],[] i=0 - for fn in corpus_files: i+=1 id_=int(re.findall(r"\d+",fn)[-1]) - #sys.stdout.write("\r{0}/{1}".format(i,len(fns))) try: df=pd.read_csv(fn) lang=data_lang[id_] @@ -61,9 +57,6 @@ for fn in corpus_files: ) -# In[63]: - - print("\naccGEO",np.mean(np.nan_to_num(acc_GEO))) print("acc_MC",np.mean(np.nan_to_num(acc_MC))) print("accWiki",np.mean(np.nan_to_num(acc_wiki))) diff --git a/generate_transform.py b/generate_transform.py index 0e8cd2d799dbff51f00f2e52cef6885823e04593..53daca51c2f9f4af8010894b14d8d6bf160156ed 100644 --- a/generate_transform.py +++ b/generate_transform.py @@ -10,7 +10,6 @@ from concurrent.futures import ThreadPoolExecutor import networkx as nx from progressbar import ProgressBar, Timer, Bar, ETA, Counter -from strpython.helpers.boundary import get_all_shapes from strpython.models.str import STR from strpython.nlp.disambiguator.geodict_gaurav import * from strpython.pipeline import * diff --git a/notebooks/EvalDesambiguisationMada.ipynb b/notebooks/EvalDesambiguisationMada.ipynb deleted file mode 100644 index 3d58d2ac2442e408c4fb916f4cae1f1451ddd073..0000000000000000000000000000000000000000 --- a/notebooks/EvalDesambiguisationMada.ipynb +++ /dev/null @@ -1,379 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-24T14:18:40.551515Z", - "start_time": "2018-08-24T14:18:40.137529Z" - } - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "%load_ext autoreload" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-24T14:18:40.558929Z", - "start_time": "2018-08-24T14:18:40.553463Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/jacquesfize/nas_cloud/Code/str-python\n" - ] - } - ], - "source": [ - "cd .." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-24T14:18:40.565725Z", - "start_time": "2018-08-24T14:18:40.560729Z" - } - }, - "outputs": [], - "source": [ - "import glob,re,sys\n", - "fns=glob.glob(\"data/mada_disambiguisation/*.csv\")\n", - "ids_list=[int(re.findall(r\"\\d+\",fn)[-1]) for fn in fns]" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-24T14:18:40.582053Z", - "start_time": "2018-08-24T14:18:40.567425Z" - } - }, - "outputs": [], - "source": [ - "import json\n", - "data_lang=json.load(open(\"/Users/jacquesfize/LOD_DATASETS/raw_bvlac/associated_lang.json\"))\n", - "data_lang={int(k):v for k,v in data_lang.items()}" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-24T14:18:43.957963Z", - "start_time": "2018-08-24T14:18:40.585425Z" - } - }, - "outputs": [], - "source": [ - "%autoreload\n", - "from strpython.nlp.disambiguator.geodict_gaurav import GauravGeodict\n", - "from strpython.nlp.disambiguator.most_common import MostCommonDisambiguator\n", - "from strpython.nlp.disambiguator.wikipedia_cooc import WikipediaDisambiguator\n", - "disMost_common=MostCommonDisambiguator()\n", - "disGaurav=GauravGeodict()\n", - "disWiki=WikipediaDisambiguator()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-24T14:18:44.015575Z", - "start_time": "2018-08-24T14:18:43.960053Z" - } - }, - "outputs": [], - "source": [ - "df=pd.read_csv(\"data/mada_disambiguisation/11.csv\")\n", - "\n", - "def accuracyMostCommon(df,lang):\n", - " df2=df[-df[\"GID\"].isin([\"O\",\"NR\",\"o\"])][[\"text\",\"GID\"]]\n", - " df2[\"disambiguation\"]=df2.text.apply(lambda x:disMost_common.disambiguate_(x,lang)[0])\n", - " return (df2.GID == df2.disambiguation).sum()/len(df2)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-24T14:18:44.023135Z", - "start_time": "2018-08-24T14:18:44.017778Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], - "source": [ - "%load_ext autoreload" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-24T14:18:44.027539Z", - "start_time": "2018-08-24T14:18:44.024973Z" - } - }, - "outputs": [], - "source": [ - "import re" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-24T14:18:44.061164Z", - "start_time": "2018-08-24T14:18:44.029278Z" - } - }, - "outputs": [], - "source": [ - "%autoreload\n", - "def accuracyGeodict(df,lang):\n", - " df2=df[-df[\"GID\"].isin([\"O\",\"NR\",\"o\"])][[\"text\",\"GID\"]]\n", - " res_dis=disGaurav.eval(df2[\"text\"].unique(),lang)\n", - " df2[\"disambiguation\"]=df2.text.apply(lambda x:res_dis[x] if x in res_dis else \"0\")\n", - " return (df2.GID == df2.disambiguation).sum()/len(df2)\n", - "\n", - "def accuracyWiki(df,lang):\n", - " df2=df[-df[\"GID\"].isin([\"O\",\"NR\",\"o\"])][[\"text\",\"GID\"]]\n", - " res_dis=disWiki.disambiguate_wiki(df2[\"text\"].unique(),lang)\n", - " df2[\"disambiguation\"]=df2.text.apply(lambda x:res_dis[x] if x in res_dis else \"0\")\n", - " return (df2.GID == df2.disambiguation).sum()/len(df2)\n", - "#df" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-24T14:42:35.179291Z", - "start_time": "2018-08-24T14:18:44.063336Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/site-packages/ipykernel_launcher.py:12: RuntimeWarning: invalid value encountered in long_scalars\n", - " if sys.path[0] == '':\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-10-f81592812190>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0macc_wiki\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maccuracyWiki\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdata_lang\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mid_\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;31m#acc_MC.append(accuracyMostCommon(df,data_lang[id_]))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;31m#acc_GEO.append(accuracyGeodict(df,data_lang[id_]))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m<ipython-input-9-7d392d282df9>\u001b[0m in \u001b[0;36maccuracyWiki\u001b[0;34m(df, lang)\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0maccuracyWiki\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlang\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mdf2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"GID\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"O\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"NR\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"o\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"text\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"GID\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0mres_dis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdisWiki\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdisambiguate_wiki\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"text\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlang\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 11\u001b[0m \u001b[0mdf2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"disambiguation\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mres_dis\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mres_dis\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m\"0\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGID\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdisambiguation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/nas_cloud/Code/str-python/strpython/nlp/disambiguator/wikipedia_cooc.py\u001b[0m in \u001b[0;36mdisambiguate_wiki\u001b[0;34m(self, entities, lang)\u001b[0m\n\u001b[1;32m 79\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mcand\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mcand2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[0;31m# take the lowest co-occurrency between two candidates\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 81\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mcand2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcand\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 82\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcand2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcand\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"weight\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mprob\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0;32mcontinue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/site-packages/networkx/classes/reportviews.py\u001b[0m in \u001b[0;36m__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1025\u001b[0m \u001b[0mseen\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1026\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbrs\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_nodes_nbrs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1027\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mnbr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mnbrs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1028\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnbr\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mseen\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1029\u001b[0m \u001b[0;32myield\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "acc_MC,acc_GEO,acc_wiki=[],[],[]\n", - "for fn in fns:\n", - " id_=int(re.findall(r\"\\d+\",fn)[-1])\n", - " \n", - " df=pd.read_csv(fn)\n", - " acc_wiki.append(accuracyWiki(df,data_lang[id_]))\n", - " acc_MC.append(accuracyMostCommon(df,data_lang[id_]))\n", - " acc_GEO.append(accuracyGeodict(df,data_lang[id_]))\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-24T14:42:35.180200Z", - "start_time": "2018-08-24T14:18:40.127Z" - } - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "np.mean(np.nan_to_num(acc_GEO))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-24T14:42:35.181124Z", - "start_time": "2018-08-24T14:18:40.128Z" - } - }, - "outputs": [], - "source": [ - "np.mean(np.nan_to_num(acc_MC))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-24T14:42:35.182157Z", - "start_time": "2018-08-24T14:18:40.130Z" - } - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "np.mean(np.nan_to_num(acc_wiki))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-24T14:42:35.182992Z", - "start_time": "2018-08-24T14:18:40.131Z" - } - }, - "outputs": [], - "source": [ - "from strpython.helpers.gazeteer_helpers import count_of_se\n", - "sum_,count=0,0\n", - "for fn in fns:\n", - " try:\n", - " id_=int(re.findall(r\"\\d+\",fn)[-1])\n", - " df=pd.read_csv(fn)\n", - " df2=df[-df[\"GID\"].isin([\"O\",\"NR\",\"o\"])][[\"text\",\"GID\"]]\n", - " counts_t=df2.text.apply(lambda x: count_of_se(x,lang=data_lang[id_]))\n", - " sum_+=counts_t.sum()\n", - " count+=len(counts_t)\n", - " except:\n", - " pass\n", - "print(sum_,count)\n", - "print(sum_/count)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-24T14:42:35.184004Z", - "start_time": "2018-08-24T14:18:40.133Z" - } - }, - "outputs": [], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.5" - }, - "toc": { - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "toc_cell": false, - "toc_position": {}, - "toc_section_display": "block", - "toc_window_display": false - }, - "varInspector": { - "cols": { - "lenName": 16, - "lenType": 16, - "lenVar": 40 - }, - "kernels_config": { - "python": { - "delete_cmd_postfix": "", - "delete_cmd_prefix": "del ", - "library": "var_list.py", - "varRefreshCmd": "print(var_dic_list())" - }, - "r": { - "delete_cmd_postfix": ") ", - "delete_cmd_prefix": "rm(", - "library": "var_list.r", - "varRefreshCmd": "cat(var_dic_list()) " - } - }, - "position": { - "height": "297px", - "left": "914px", - "right": "20px", - "top": "120px", - "width": "350px" - }, - "types_to_exclude": [ - "module", - "function", - "builtin_function_or_method", - "instance", - "_Feature" - ], - "window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/EvalDesambiguisationPADIWEB.ipynb b/notebooks/EvalDesambiguisationPADIWEB.ipynb deleted file mode 100644 index 189ca41d2ee687c5ceed5c474a554606e6fcfdb3..0000000000000000000000000000000000000000 --- a/notebooks/EvalDesambiguisationPADIWEB.ipynb +++ /dev/null @@ -1,524 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:11:06.231565Z", - "start_time": "2018-08-27T15:11:05.795641Z" - } - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import json" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:11:06.238529Z", - "start_time": "2018-08-27T15:11:06.233600Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/jacquesfize/nas_cloud/Code/str-python\n" - ] - } - ], - "source": [ - "cd .." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:11:06.330207Z", - "start_time": "2018-08-27T15:11:06.240613Z" - } - }, - "outputs": [], - "source": [ - "from elasticsearch import Elasticsearch\n", - "\n", - "from strpython.config.configuration import config\n", - "\n", - "es = Elasticsearch(config.es_server)\n", - "def get_data_by_geoname_id(id):\n", - " res = es.search(\"gazetteer\", \"place\",\n", - " body={\"query\": {\"bool\": {\"must\": [{\"term\": {\"geonameID\": id}}], \"must_not\": [], \"should\": []}}, \"from\": 0,\n", - " \"size\": 10, \"sort\": [], \"aggs\": {}})\n", - " if res[\"hits\"][\"total\"] > 0:\n", - " res = res[\"hits\"][\"hits\"][0][\"_source\"]\n", - " return res\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:11:06.346204Z", - "start_time": "2018-08-27T15:11:06.332072Z" - } - }, - "outputs": [], - "source": [ - "test=pd.read_csv(\"ens2.csv\")\n", - "def foo(x):\n", - " try:\n", - " test[test[\"sp_en\"] == x[\"id\"]].geonames_id.values[0]\n", - " except:\n", - " \"nan\"\n", - "def parse_file(fn):\n", - " id_=int(re.findall(r\"\\d+\",fn)[-1])\n", - " lang=langdetect.detect(open(\"data/EPI_ELENA/raw_text/{0}.txt\".format(id_)).read())\n", - " df=pd.read_json(fn,orient=\"index\")\n", - " try:\n", - " df=df[(df[\"type\"]==\"location\") & (df[\"annotation\"]==\"correct\")]\n", - " except:\n", - " return\n", - " df[\"geoname\"]=df[\"info\"].apply(lambda x:foo(x))\n", - " df[\"GID\"]=df[\"info\"].apply(lambda x:get_data_by_geoname_id(x[\"id\"])[\"id\"])\n", - " df[\"content\"]=df[\"content\"].apply(lambda x:re.sub(r\"\\s+\",\" \",x.strip()))\n", - " return df,lang\n", - "\n", - "def parse_file2(fn):\n", - " id_=int(re.findall(r\"\\d+\",fn)[-1])\n", - " lang=langdetect.detect(open(\"data/EPI_ELENA/raw_text/{0}.txt\".format(id_)).read())\n", - " df=pd.read_json(fn,orient=\"index\")\n", - " try:\n", - " df=df[(df[\"type\"]==\"location\") & (df[\"annotation\"]==\"correct\")]\n", - " except:\n", - " return\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:08:33.366321Z", - "start_time": "2018-08-27T15:08:33.358349Z" - } - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:11:06.356525Z", - "start_time": "2018-08-27T15:11:06.348143Z" - } - }, - "outputs": [], - "source": [ - "import glob,re,sys\n", - "fns=glob.glob(\"data/EPI_ELENA/final_annotations/*.json\")\n", - "ids_list=[int(re.findall(r\"\\d+\",fn)[-1]) for fn in fns]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:11:06.370866Z", - "start_time": "2018-08-27T15:11:06.358409Z" - } - }, - "outputs": [], - "source": [ - "import langdetect" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:11:09.749290Z", - "start_time": "2018-08-27T15:11:06.373193Z" - } - }, - "outputs": [], - "source": [ - "\n", - "from strpython.nlp.disambiguator.geodict_gaurav import GauravGeodict\n", - "from strpython.nlp.disambiguator.most_common import MostCommonDisambiguator\n", - "from strpython.nlp.disambiguator.wikipedia_cooc import WikipediaDisambiguator\n", - "disMost_common=MostCommonDisambiguator()\n", - "disGaurav=GauravGeodict()\n", - "disWiki=WikipediaDisambiguator()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:11:09.759142Z", - "start_time": "2018-08-27T15:11:09.751214Z" - } - }, - "outputs": [], - "source": [ - "df=pd.read_csv(\"data/mada_disambiguisation/11.csv\")\n", - "\n", - "def accuracyMostCommon(df,lang):\n", - " df2=df[-df[\"GID\"].isin([\"O\",\"NR\",\"o\"])][[\"content\",\"GID\"]]\n", - " df2[\"disambiguation\"]=df2.content.apply(lambda x:disMost_common.disambiguate_(x,lang)[0])\n", - " return (df2.GID == df2.disambiguation).sum()/len(df2)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:11:09.831909Z", - "start_time": "2018-08-27T15:11:09.760876Z" - } - }, - "outputs": [], - "source": [ - "%load_ext autoreload" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:11:10.512110Z", - "start_time": "2018-08-27T15:11:09.833822Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "( after annotation content index \\\n", - " 17 NaN correct Latvia 165 \n", - " 3 1.0 correct Latvia 13 \n", - " 7 NaN correct Latvia 35 \n", - " \n", - " info length type \\\n", - " 17 {'coordinates': [57, 25], 'countryCode': 'LV',... 1 location \n", - " 3 {'coordinates': [57, 25], 'countryCode': 'LV',... 1 location \n", - " 7 {'coordinates': [57, 25], 'countryCode': 'LV',... 1 location \n", - " \n", - " use_for_all geoname GID \n", - " 17 NaN None GD5551940 \n", - " 3 1.0 None GD5551940 \n", - " 7 NaN None GD5551940 , 'en')" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "parse_file(fns[0])\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:11:10.542154Z", - "start_time": "2018-08-27T15:11:10.514743Z" - } - }, - "outputs": [], - "source": [ - "%autoreload\n", - "def accuracyGeodict(df,lang):\n", - " df2=df[-df[\"GID\"].isin([\"O\",\"NR\",\"o\"])][[\"content\",\"GID\"]]\n", - " res_dis=disGaurav.eval(df2[\"content\"].unique(),lang)\n", - " df2[\"disambiguation\"]=df2.content.apply(lambda x:res_dis[x] if x in res_dis else \"0\")\n", - " return (df2.GID == df2.disambiguation).sum()/len(df2)\n", - "def accuracyWiki(df,lang):\n", - " df2=df[-df[\"GID\"].isin([\"O\",\"NR\",\"o\"])][[\"content\",\"GID\"]]\n", - " res_dis=disWiki.disambiguate_wiki(df2[\"content\"].unique(),lang)\n", - " df2[\"disambiguation\"]=df2.content.apply(lambda x:res_dis[x] if x in res_dis else \"0\")\n", - " return (df2.GID == df2.disambiguation).sum()/len(df2)\n", - "#df\n", - "#df" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:13:54.566181Z", - "start_time": "2018-08-27T15:11:10.544793Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/site-packages/ipykernel_launcher.py:11: RuntimeWarning: invalid value encountered in long_scalars\n", - " # This is added back by InteractiveShellApp.init_path()\n", - "/usr/local/lib/python3.6/site-packages/pandas/core/ops.py:816: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n", - " result = getattr(x, name)(y)\n", - "GET http://localhost:9200/gazetteer/place/_search [status:400 request:0.006s]\n", - "GET http://localhost:9200/gazetteer/place/_search [status:400 request:0.004s]\n", - "GET http://localhost:9200/gazetteer/place/_search [status:400 request:0.003s]\n" - ] - } - ], - "source": [ - "acc_MC,acc_GEO,acc_wiki=[],[],[]\n", - "for fn in fns:\n", - " \n", - " try:\n", - " df,lang=parse_file(fn)\n", - " #acc_MC.append(accuracyMostCommon(df,lang))\n", - " #acc_GEO.append(accuracyGeodict(df,lang))\n", - " acc_wiki.append(accuracyWiki(df,lang))\n", - " except:\n", - " pass\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:13:54.577715Z", - "start_time": "2018-08-27T15:13:54.568059Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/site-packages/numpy/core/fromnumeric.py:2957: RuntimeWarning: Mean of empty slice.\n", - " out=out, **kwargs)\n", - "/usr/local/lib/python3.6/site-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n", - " ret = ret.dtype.type(ret / rcount)\n" - ] - }, - { - "data": { - "text/plain": [ - "nan" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import numpy as np\n", - "np.mean(np.nan_to_num(acc_GEO))" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:13:54.584996Z", - "start_time": "2018-08-27T15:13:54.579637Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/site-packages/numpy/core/fromnumeric.py:2957: RuntimeWarning: Mean of empty slice.\n", - " out=out, **kwargs)\n", - "/usr/local/lib/python3.6/site-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n", - " ret = ret.dtype.type(ret / rcount)\n" - ] - }, - { - "data": { - "text/plain": [ - "nan" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.mean(np.nan_to_num(acc_MC))" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:13:54.591617Z", - "start_time": "2018-08-27T15:13:54.587000Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "0.5782357139650866" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import numpy as np\n", - "np.mean(np.nan_to_num(acc_wiki))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2018-06-19T13:01:36.778853Z", - "start_time": "2018-06-19T13:01:36.775832Z" - } - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "ExecuteTime": { - "end_time": "2018-08-27T15:13:54.802963Z", - "start_time": "2018-08-27T15:13:54.593650Z" - } - }, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'helpers'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-16-d620a808fc3e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mhelpers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgazeteer_helpers\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mcount_of_se\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0msum_\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfn\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mfns\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlang\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparse_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'helpers'" - ] - } - ], - "source": [ - "from helpers.gazeteer_helpers import count_of_se\n", - "sum_,count=0,0\n", - "for fn in fns:\n", - " try:\n", - " df,lang=parse_file(fn)\n", - " counts_t=df.content.apply(lambda x: count_of_se(x,lang=lang))\n", - " sum_+=counts_t.sum()\n", - " count+=len(counts_t)\n", - " except:\n", - " pass\n", - "print(sum_,count)\n", - "print(sum_/count)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.5" - }, - "toc": { - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "toc_cell": false, - "toc_position": {}, - "toc_section_display": "block", - "toc_window_display": false - }, - "varInspector": { - "cols": { - "lenName": 16, - "lenType": 16, - "lenVar": 40 - }, - "kernels_config": { - "python": { - "delete_cmd_postfix": "", - "delete_cmd_prefix": "del ", - "library": "var_list.py", - "varRefreshCmd": "print(var_dic_list())" - }, - "r": { - "delete_cmd_postfix": ") ", - "delete_cmd_prefix": "rm(", - "library": "var_list.r", - "varRefreshCmd": "cat(var_dic_list()) " - } - }, - "position": { - "height": "297px", - "left": "914px", - "right": "20px", - "top": "120px", - "width": "350px" - }, - "types_to_exclude": [ - "module", - "function", - "builtin_function_or_method", - "instance", - "_Feature" - ], - "window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/EvalTopoMadagascar.ipynb b/notebooks/EvalTopoMadagascar.ipynb deleted file mode 100644 index 9f6a358562f87d907751b06b45c0a0568cb0509f..0000000000000000000000000000000000000000 --- a/notebooks/EvalTopoMadagascar.ipynb +++ /dev/null @@ -1,719 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T06:15:39.543009Z", - "start_time": "2018-05-17T06:15:39.538598Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/jacquesfize/nas_cloud/Code/str-python\n" - ] - } - ], - "source": [ - "cd .." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T06:15:39.906690Z", - "start_time": "2018-05-17T06:15:39.545042Z" - } - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "from nlp.disambiguator.disambiguator import Disambiguator\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T06:15:41.165016Z", - "start_time": "2018-05-17T06:15:39.908807Z" - } - }, - "outputs": [], - "source": [ - "from pipeline import *\n", - "from nlp.pos_tagger.tagger import Tagger\n", - "from nlp.disambiguator.pagerank import *\n", - "from nlp.disambiguator.geodict_gaurav import *\n", - "from nlp.pos_tagger.treetagger import TreeTagger\n", - "from nlp.ner.stanford_ner import StanfordNER\n", - "from nlp.ner.polyglot import Polyglot\n", - "from nlp.ner.nltk import NLTK\n", - "from nlp.ner.gate_annie import GateAnnie\n", - "from nlp.ner.spacy import Spacy\n", - "from nlp.ner.ner import NER\n", - "from progressbar import ProgressBar\n", - "from polyglot.text import Text" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T06:15:50.113793Z", - "start_time": "2018-05-17T06:15:41.167223Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Language may not be supported by NTLK !\n" - ] - } - ], - "source": [ - "pipStanford={\n", - " \"en\":Pipeline(lang=\"english\",tagger=Tagger(),ner=StanfordNER(lang=\"en\")),\n", - " \"fr\":Pipeline(lang=\"french\",tagger=Tagger(),ner=StanfordNER(lang=\"fr\"))\n", - "}\n", - "\n", - "pipNLTK={\n", - " \"en\":Pipeline(lang=\"english\",tagger=Tagger(),ner=NLTK(lang=\"en\")),\n", - " \"fr\":Pipeline(lang=\"french\",tagger=Tagger(),ner=NLTK(lang=\"fr\"))\n", - "}\n", - "\n", - "pipPolyglot={\n", - " \"en\":Pipeline(lang=\"english\",tagger=Tagger(),ner=Polyglot(lang=\"en\")),\n", - " \"fr\":Pipeline(lang=\"english\",tagger=Tagger(),ner=Polyglot(lang=\"fr\"))\n", - "}\n", - "\n", - "pipGate={\n", - " \"en\":Pipeline(lang=\"english\",tagger=Tagger(),ner=GateAnnie(lang=\"en\")),\n", - " \"fr\":Pipeline(lang=\"french\",tagger=Tagger(),ner=GateAnnie(lang=\"fr\"))\n", - "}\n", - "\n", - "pipSpacy={\n", - " \"en\":Pipeline(lang=\"english\",tagger=Tagger(),ner=Spacy(lang=\"en\")),\n", - " \"fr\":Pipeline(lang=\"french\",tagger=Tagger(),ner=Spacy(lang=\"fr\"))\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T06:15:50.130340Z", - "start_time": "2018-05-17T06:15:50.115895Z" - } - }, - "outputs": [], - "source": [ - "import json\n", - "data_lang=json.load(open(\"/Users/jacquesfize/LOD_DATASETS/raw_bvlac/associated_lang.json\"))\n", - "data_lang={int(k):v for k,v in data_lang.items()}" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T06:15:50.138305Z", - "start_time": "2018-05-17T06:15:50.132448Z" - } - }, - "outputs": [], - "source": [ - "import glob,re,sys\n", - "fns=glob.glob(\"data/mada_disambiguisation/*.csv\")\n", - "ids_list=[int(re.findall(r\"\\d+\",fn)[-1]) for fn in fns]" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T06:15:50.143454Z", - "start_time": "2018-05-17T06:15:50.139829Z" - } - }, - "outputs": [], - "source": [ - "from ipywidgets import IntProgress\n", - "from IPython.display import display\n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T06:15:50.169663Z", - "start_time": "2018-05-17T06:15:50.145641Z" - } - }, - "outputs": [], - "source": [ - "input_dir=\"/Users/jacquesfize/LOD_DATASETS/raw_bvlac/\"\n", - "\n", - "def compute_precision_recall(pipeline):\n", - " precision=[]\n", - " recall=[]\n", - " co=0\n", - " for i in ids_list:\n", - " sys.stdout.write(\"\\r{0}/{1}\".format(co,len(ids_list)))\n", - " lang=data_lang[i]\n", - " data_real=pd.read_csv(\"data/mada_disambiguisation/{0}.csv\".format(i))\n", - " data_real=data_real[-data_real[\"GID\"].isin([\"O\",\"NR\",\"o\"])][[\"text\",\"GID\"]]\n", - " text=open(\"{0}/{1}.txt\".format(input_dir.rstrip(\"/\"),i)).read()\n", - " \n", - " try:\n", - " res_ner=pipeline[lang].ner.identify(text)\n", - " res_ner=Disambiguator.parse_corpus(res_ner)\n", - " except Exception as e:\n", - " print(e)\n", - " continue\n", - " system_data=pd.DataFrame(res_ner,columns=[\"text\",\"pos\"])\n", - " system_data=system_data[system_data[\"pos\"]==\"LOC\"]\n", - " #count_tp=system_data[\"text\"].str.lower().isin(data_real[\"text\"].str.lower()).sum()\n", - " count_tp=len(set(data_real[\"text\"].str.lower().unique())&(set(system_data[\"text\"].str.lower().unique())))\n", - " count_fp=len(system_data)-count_tp\n", - " try:\n", - " precision.append(count_tp/len(system_data[\"text\"].unique()))\n", - " except:\n", - " print(1)\n", - " precision.append(0)\n", - " try:\n", - " recall.append(count_tp/len(data_real[\"text\"].unique()))\n", - " except:\n", - " print(2)\n", - " recall.append(0)\n", - " co+=1\n", - " return precision,recall\n", - " #pd.DataFrame(res_ner,columns=[\"text\",\"pos\"])\n", - "#compute_precision_recall(pipSpacy)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T06:15:50.201209Z", - "start_time": "2018-05-17T06:15:50.171396Z" - } - }, - "outputs": [], - "source": [ - "%load_ext autoreload" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T06:27:25.917340Z", - "start_time": "2018-05-17T06:17:25.038572Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "117/2322\n", - "231/232" - ] - } - ], - "source": [ - "%autoreload\n", - "prec_sp,rec_sp=compute_precision_recall(pipSpacy)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T06:43:36.230684Z", - "start_time": "2018-05-17T06:27:55.927495Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3/2321\n", - "4/2321\n", - "41/2321\n", - "42/2321\n", - "43/2321\n", - "44/2321\n", - "46/2321\n", - "48/2321\n", - "51/232list index out of range\n", - "54/2321\n", - "61/2321\n", - "65/2321\n", - "76/2321\n", - "78/2321\n", - "79/2321\n", - "82/2321\n", - "83/2321\n", - "114/2321\n", - "116/2321\n", - "2\n", - "117/2321\n", - "156/2321\n", - "157/2321\n", - "174/2321\n", - "193/2321\n", - "194/2321\n", - "205/2321\n", - "211/2321\n", - "214/2321\n", - "215/2321\n", - "220/232list index out of range\n", - "222/2321\n", - "223/2321\n", - "229/232" - ] - } - ], - "source": [ - "%autoreload\n", - "prec_st,rec_st=compute_precision_recall(pipStanford)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T06:56:10.536873Z", - "start_time": "2018-05-17T06:43:36.284258Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "117/2322\n", - "231/232" - ] - } - ], - "source": [ - "prec_nl,rec_nl=compute_precision_recall(pipNLTK)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T07:05:03.304819Z", - "start_time": "2018-05-17T06:56:10.591028Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "41/232Package 'ner2.mg' not found in index\n", - "41/232Package 'ner2.mg' not found in index\n", - "67/232Package 'ner2.mg' not found in index\n", - "114/2321\n", - "2\n", - "228/232" - ] - } - ], - "source": [ - "prec_po,rec_po=compute_precision_recall(pipPolyglot)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T07:19:35.445903Z", - "start_time": "2018-05-17T07:05:03.362992Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2/232list index out of range\n", - "3/232list index out of range\n", - "5/232list index out of range\n", - "8/232list index out of range\n", - "15/232list index out of range\n", - "15/232list index out of range\n", - "15/232list index out of range\n", - "15/232list index out of range\n", - "15/232list index out of range\n", - "15/232list index out of range\n", - "15/232list index out of range\n", - "16/232list index out of range\n", - "21/232list index out of range\n", - "27/232list index out of range\n", - "27/232list index out of range\n", - "27/232list index out of range\n", - "27/232list index out of range\n", - "27/232list index out of range\n", - "27/232list index out of range\n", - "28/232list index out of range\n", - "28/2321\n", - "29/232list index out of range\n", - "29/232list index out of range\n", - "34/232list index out of range\n", - "34/232list index out of range\n", - "34/232list index out of range\n", - "34/232list index out of range\n", - "34/232list index out of range\n", - "35/232list index out of range\n", - "36/232list index out of range\n", - "38/232list index out of range\n", - "38/232list index out of range\n", - "38/232list index out of range\n", - "38/232list index out of range\n", - "38/232list index out of range\n", - "44/232list index out of range\n", - "49/232list index out of range\n", - "50/232list index out of range\n", - "51/232list index out of range\n", - "51/232list index out of range\n", - "52/232list index out of range\n", - "52/232list index out of range\n", - "53/232list index out of range\n", - "54/232list index out of range\n", - "56/232list index out of range\n", - "58/232list index out of range\n", - "58/232list index out of range\n", - "60/232list index out of range\n", - "60/232list index out of range\n", - "61/2321\n", - "62/2321\n", - "63/232list index out of range\n", - "63/232list index out of range\n", - "63/232list index out of range\n", - "63/232list index out of range\n", - "64/232list index out of range\n", - "64/2321\n", - "2\n", - "65/2321\n", - "66/232list index out of range\n", - "66/232list index out of range\n", - "66/232list index out of range\n", - "66/232list index out of range\n", - "66/232list index out of range\n", - "72/232list index out of range\n", - "72/232list index out of range\n", - "72/232list index out of range\n", - "72/232list index out of range\n", - "72/232list index out of range\n", - "72/232list index out of range\n", - "72/232list index out of range\n", - "72/232list index out of range\n", - "73/232list index out of range\n", - "73/232list index out of range\n", - "73/232list index out of range\n", - "73/232list index out of range\n", - "73/232list index out of range\n", - "74/232list index out of range\n", - "77/232list index out of range\n", - "80/232list index out of range\n", - "80/232list index out of range\n", - "82/232list index out of range\n", - "84/232list index out of range\n", - "84/232list index out of range\n", - "89/232list index out of range\n", - "89/232list index out of range\n", - "89/232list index out of range\n", - "89/232list index out of range\n", - "89/232list index out of range\n", - "89/232list index out of range\n", - "95/232list index out of range\n", - "95/2321\n", - "96/232list index out of range\n", - "100/232list index out of range\n", - "101/232list index out of range\n", - "102/232list index out of range\n", - "102/232list index out of range\n", - "102/232list index out of range\n", - "105/232list index out of range\n", - "108/232" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/usr/local/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 379\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# Python 2.7, use buffering of HTTP responses\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 380\u001b[0;31m \u001b[0mhttplib_response\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbuffering\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 381\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# Python 2.6 and older, Python 3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mTypeError\u001b[0m: getresponse() got an unexpected keyword argument 'buffering'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-15-ddd472848dde>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprec_ga\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mrec_ga\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcompute_precision_recall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpipGate\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m<ipython-input-8-f7e3a40e4d49>\u001b[0m in \u001b[0;36mcompute_precision_recall\u001b[0;34m(pipeline)\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mres_ner\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpipeline\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mlang\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0midentify\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0mres_ner\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mDisambiguator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse_corpus\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres_ner\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/jacquesfize/nas_cloud/Code/str-python/nlp/ner/gate_annie.py\u001b[0m in \u001b[0;36midentify\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhost\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"/ner\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"utf-8\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontent\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 21\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"utf-8\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"\\n\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"\\t\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/site-packages/requests/api.py\u001b[0m in \u001b[0;36mpost\u001b[0;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \"\"\"\n\u001b[1;32m 111\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 112\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'post'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 113\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/site-packages/requests/api.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0;31m# cases, and look like a memory leak in others.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0msessions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 58\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 59\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 506\u001b[0m }\n\u001b[1;32m 507\u001b[0m \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 508\u001b[0;31m \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 509\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 510\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 616\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 617\u001b[0m \u001b[0;31m# Send the request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 618\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 619\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 620\u001b[0m \u001b[0;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/site-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 438\u001b[0m \u001b[0mdecode_content\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 439\u001b[0m \u001b[0mretries\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_retries\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 440\u001b[0;31m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 441\u001b[0m )\n\u001b[1;32m 442\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 599\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout_obj\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 600\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 601\u001b[0;31m chunked=chunked)\n\u001b[0m\u001b[1;32m 602\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 603\u001b[0m \u001b[0;31m# If we're going to release the connection in ``finally:``, then\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 381\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# Python 2.6 and older, Python 3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 382\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 383\u001b[0;31m \u001b[0mhttplib_response\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 384\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 385\u001b[0m \u001b[0;31m# Remove the TypeError from the exception chain in Python 3;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mgetresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1329\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1330\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1331\u001b[0;31m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbegin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1332\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mConnectionError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1333\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mbegin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[0;31m# read until we get a non-100 response\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 296\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 297\u001b[0;31m \u001b[0mversion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreason\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 298\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mCONTINUE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36m_read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 257\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 258\u001b[0;31m \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_MAXLINE\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"iso-8859-1\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 259\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0m_MAXLINE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 260\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mLineTooLong\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"status line\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 584\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 585\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 586\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 587\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "prec_ga,rec_ga=compute_precision_recall(pipGate)" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T07:25:36.506464Z", - "start_time": "2018-05-17T07:25:36.496991Z" - } - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "def m(x):\n", - " return np.mean(np.nan_to_num(x))\n", - "cols=[\"NER\",\"P\",\"R\"]\n", - "df=pd.DataFrame(columns=cols)\n", - "df=pd.DataFrame([[\"StanfordNER\",m(prec_st),m(rec_st)],\n", - " [\"Polyglot\",m(prec_po),m(rec_po)],[\"NLTK\",m(prec_nl),m(rec_nl)],\n", - " [\"Spacy\",m(prec_sp),m(rec_sp)]],columns=cols)\n", - "df[\"F\"]= df.apply(lambda x: 2*((x[\"P\"]*x[\"R\"])/(x[\"P\"]+x[\"R\"])), axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T07:25:37.723293Z", - "start_time": "2018-05-17T07:25:37.713231Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>NER</th>\n", - " <th>P</th>\n", - " <th>R</th>\n", - " <th>F</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>StanfordNER</td>\n", - " <td>0.319804</td>\n", - " <td>0.169799</td>\n", - " <td>0.221822</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>Polyglot</td>\n", - " <td>0.207006</td>\n", - " <td>0.356064</td>\n", - " <td>0.261805</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>NLTK</td>\n", - " <td>0.137581</td>\n", - " <td>0.158004</td>\n", - " <td>0.147087</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>Spacy</td>\n", - " <td>0.147053</td>\n", - " <td>0.849829</td>\n", - " <td>0.250722</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " NER P R F\n", - "0 StanfordNER 0.319804 0.169799 0.221822\n", - "1 Polyglot 0.207006 0.356064 0.261805\n", - "2 NLTK 0.137581 0.158004 0.147087\n", - "3 Spacy 0.147053 0.849829 0.250722" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": { - "ExecuteTime": { - "end_time": "2018-05-17T07:51:46.198366Z", - "start_time": "2018-05-17T07:51:46.192160Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\\begin{tabular}{llrrr}\n", - "\\toprule\n", - "{} & NER & P & R & F \\\\\n", - "\\midrule\n", - "0 & StanfordNER & 0.319804 & 0.169799 & 0.221822 \\\\\n", - "1 & Polyglot & 0.207006 & 0.356064 & 0.261805 \\\\\n", - "2 & NLTK & 0.137581 & 0.158004 & 0.147087 \\\\\n", - "3 & Spacy & 0.147053 & 0.849829 & 0.250722 \\\\\n", - "\\bottomrule\n", - "\\end{tabular}\n", - "\n" - ] - } - ], - "source": [ - "print(df.to_latex())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.5" - }, - "toc": { - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "toc_cell": false, - "toc_position": {}, - "toc_section_display": "block", - "toc_window_display": false - }, - "varInspector": { - "cols": { - "lenName": 16, - "lenType": 16, - "lenVar": 40 - }, - "kernels_config": { - "python": { - "delete_cmd_postfix": "", - "delete_cmd_prefix": "del ", - "library": "var_list.py", - "varRefreshCmd": "print(var_dic_list())" - }, - "r": { - "delete_cmd_postfix": ") ", - "delete_cmd_prefix": "rm(", - "library": "var_list.r", - "varRefreshCmd": "cat(var_dic_list()) " - } - }, - "position": { - "height": "217px", - "left": "915px", - "right": "28px", - "top": "120px", - "width": "341px" - }, - "types_to_exclude": [ - "module", - "function", - "builtin_function_or_method", - "instance", - "_Feature" - ], - "window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/NER Evaluation.ipynb b/notebooks/NER Evaluation.ipynb index 85610ef95ef519d44f7473ca9ad66df867c74de9..d01c1dc0421a83c4cb88b71dec3c1ddfcd3aba9d 100644 --- a/notebooks/NER Evaluation.ipynb +++ b/notebooks/NER Evaluation.ipynb @@ -1175,9 +1175,9 @@ }, "varInspector": { "cols": { - "lenName": 16, - "lenType": 16, - "lenVar": 40 + "lenName": 16.0, + "lenType": 16.0, + "lenVar": 40.0 }, "kernels_config": { "python": { diff --git a/notebooks/TF TF-IDF IDF.ipynb b/notebooks/TF TF-IDF IDF.ipynb deleted file mode 100644 index 26f99660dfe3bdd64ffde85219bca8d87dcf6244..0000000000000000000000000000000000000000 --- a/notebooks/TF TF-IDF IDF.ipynb +++ /dev/null @@ -1,243 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/jacquesfize/nas_cloud/Code/str-python\n" - ] - } - ], - "source": [ - "cd .." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import glob,re,json,os\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "dataEPI=[open(f).read() for f in glob.glob(\"data/EPI_ELENA/raw_text/*.txt\")]" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "%autoreload\n", - "from pipeline import *\n", - "PipEn=Pipeline(lang=\"english\",tagger=Tagger(),ner=StanfordNER(lang=\"en\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "count_global=[]\n", - "for text in dataEPI:\n", - " if not text:\n", - " count_global.append({})\n", - " continue\n", - " counting,_,_= PipEn.parse(text)\n", - " count_global.append(counting)" - ] - }, - { - "cell_type": "code", - "execution_count": 98, - "metadata": {}, - "outputs": [], - "source": [ - "count_all={}\n", - "for counting in count_global:\n", - " for k,v in counting.items():\n", - " if not k in count_all:count_all[k]=0\n", - " count_all[k]+=v\n", - "count_all=np.array(list(count_all.items()),dtype=[(\"dd\",\"<U10\"),(\"de\",np.int)])" - ] - }, - { - "cell_type": "code", - "execution_count": 99, - "metadata": {}, - "outputs": [], - "source": [ - "tf=np.sort(count_all, order='de')[::-1]" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "metadata": {}, - "outputs": [], - "source": [ - "count_idf={}\n", - "for counting in count_global:\n", - " for k,v in counting.items():\n", - " if not k in count_idf:count_idf[k]=0\n", - " count_idf[k]+=1\n", - "idf=[[k,int(v)] for k,v in count_idf.items()]\n", - "for k in range(len(idf)):\n", - " idf[k]=[get_data(idf[k][0])[\"en\"],np.log(len(dataEPI)/idf[k][1])]\n", - "idf=np.array(idf)\n", - "sorted_=np.argsort(idf[:,1].astype(float))\n", - "idf=idf[sorted_]" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "metadata": {}, - "outputs": [], - "source": [ - "with open(\"resources/tf_epi.csv\",'w') as tf_w:\n", - " for t in tf:\n", - " tf_w.write(\"{0}\\t{1}\\n\".format(get_data(t[0])[\"en\"],t[1]))" - ] - }, - { - "cell_type": "code", - "execution_count": 101, - "metadata": {}, - "outputs": [], - "source": [ - "with open(\"resources/idf_epi.csv\",'w') as tf_w:\n", - " for t in idf:\n", - " tf_w.write(\"{0}\\t{1}\\n\".format(t[0],t[1]))" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "metadata": {}, - "outputs": [], - "source": [ - "dataBVLAC=[open(f).read() for f in glob.glob(\"data/BV_LAC21/*.txt\")]" - ] - }, - { - "cell_type": "code", - "execution_count": 105, - "metadata": {}, - "outputs": [], - "source": [ - "count_global_bv=json.load(open(\"associateJPT.json\"))[1]" - ] - }, - { - "cell_type": "code", - "execution_count": 107, - "metadata": {}, - "outputs": [], - "source": [ - "count_idf={}\n", - "for _, counting in count_global_bv.items():\n", - " for k,v in counting.items():\n", - " if not k in count_idf:count_idf[k]=0\n", - " count_idf[k]+=1\n", - "idf=[[k,int(v)] for k,v in count_idf.items()]\n", - "for k in range(len(idf)):\n", - " idf[k]=[get_data(idf[k][0])[\"en\"],np.log(len(dataBVLAC)/idf[k][1])]\n", - "idf=np.array(idf)\n", - "sorted_=np.argsort(idf[:,1].astype(float))\n", - "idf=idf[sorted_]" - ] - }, - { - "cell_type": "code", - "execution_count": 108, - "metadata": {}, - "outputs": [], - "source": [ - "with open(\"resources/idf_bvlac.csv\",'w') as tf_w:\n", - " for t in idf:\n", - " tf_w.write(\"{0}\\t{1}\\n\".format(t[0],t[1]))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.0" - }, - "varInspector": { - "cols": { - "lenName": 16, - "lenType": 16, - "lenVar": 40 - }, - "kernels_config": { - "python": { - "delete_cmd_postfix": "", - "delete_cmd_prefix": "del ", - "library": "var_list.py", - "varRefreshCmd": "print(var_dic_list())" - }, - "r": { - "delete_cmd_postfix": ") ", - "delete_cmd_prefix": "rm(", - "library": "var_list.r", - "varRefreshCmd": "cat(var_dic_list()) " - } - }, - "types_to_exclude": [ - "module", - "function", - "builtin_function_or_method", - "instance", - "_Feature" - ], - "window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/Update Criteria Value .ipynb b/notebooks/Update Criteria Value .ipynb deleted file mode 100644 index 03bc0820016a487c21cffcfcdf60fe798d11088a..0000000000000000000000000000000000000000 --- a/notebooks/Update Criteria Value .ipynb +++ /dev/null @@ -1,214 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2018-03-07T12:45:14.694851Z", - "start_time": "2018-03-07T12:45:14.245401Z" - } - }, - "outputs": [], - "source": [ - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2018-03-07T12:46:20.365335Z", - "start_time": "2018-03-07T12:46:20.361055Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'/Users/jacquesfize/nas_cloud/Code/str-python'" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%pwd\n" - ] - }, - { - "cell_type": "code", - "execution_count": 136, - "metadata": { - "ExecuteTime": { - "end_time": "2018-03-07T14:39:47.452843Z", - "start_time": "2018-03-07T14:39:47.445671Z" - } - }, - "outputs": [], - "source": [ - "df=pd.read_csv(\"resources/test.tsv\",delimiter=\"\\t\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 137, - "metadata": { - "ExecuteTime": { - "end_time": "2018-03-07T14:39:47.860139Z", - "start_time": "2018-03-07T14:39:47.853669Z" - } - }, - "outputs": [], - "source": [ - "freq_couples=df.groupby([\"id_g1\",\"id_g2\"]).size().reset_index(name='Freq')" - ] - }, - { - "cell_type": "code", - "execution_count": 138, - "metadata": { - "ExecuteTime": { - "end_time": "2018-03-07T14:39:55.422633Z", - "start_time": "2018-03-07T14:39:48.242558Z" - } - }, - "outputs": [], - "source": [ - "new_data=[]\n", - "for index, row in freq_couples.iterrows():\n", - " df_temp=df.query('id_g1 == {0} & id_g2 == {1}'.format(row.id_g1,row.id_g2))\n", - " freq_c_values=df_temp.groupby([\"c1_val\",\"c2_val\",\"c3_val\",\"c4_val\"]).size().reset_index(name='Freq')\n", - " n=len(freq_c_values.index)\n", - " if n >1:\n", - " #max_key=freq_c_values['Freq'].argmax()\n", - " #new_data.append([row.id_g1,row.id_g2,list(freq_c_values.iloc[max_key].drop('Freq').values)])\n", - " #new_data.append([row.id_g1,row.id_g2,df_temp.tail(1)[[\"c1_val\",\"c2_val\",\"c3_val\",\"c4_val\"]].values.tolist()[0]])\n", - " new_val=df_temp.tail(1)[[\"c1_val\",\"c2_val\",\"c3_val\",\"c4_val\"]].values.tolist()[0]\n", - " #print(new_val)\n", - " df.loc[(df.id_g1 == row.id_g1) & (df.id_g2 == row.id_g2),['c1_val']] = new_val[0]\n", - " df.loc[(df.id_g1 == row.id_g1) & (df.id_g2 == row.id_g2),['c2_val']] = new_val[1]\n", - " df.loc[(df.id_g1 == row.id_g1) & (df.id_g2 == row.id_g2),['c4_val']] = new_val[2]\n", - " df.loc[(df.id_g1 == row.id_g1) & (df.id_g2 == row.id_g2),['c3_val']] = new_val[3]" - ] - }, - { - "cell_type": "code", - "execution_count": 139, - "metadata": { - "ExecuteTime": { - "end_time": "2018-03-07T14:39:55.498705Z", - "start_time": "2018-03-07T14:39:55.492502Z" - } - }, - "outputs": [], - "source": [ - "freq_couples=df.groupby([\"id_g1\",\"id_g2\"]).size().reset_index(name='Freq')" - ] - }, - { - "cell_type": "code", - "execution_count": 140, - "metadata": { - "ExecuteTime": { - "end_time": "2018-03-07T14:40:00.850421Z", - "start_time": "2018-03-07T14:39:55.566732Z" - } - }, - "outputs": [], - "source": [ - "new_data=[]\n", - "for index, row in freq_couples.iterrows():\n", - " df_temp=df.query('id_g1 == {0} & id_g2 == {1}'.format(row.id_g1,row.id_g2))\n", - " freq_c_values=df_temp.groupby([\"c1_val\",\"c2_val\",\"c3_val\",\"c4_val\"]).size().reset_index(name='Freq')\n", - " n=len(freq_c_values.index)\n", - " if n >1:\n", - " print(1)" - ] - }, - { - "cell_type": "code", - "execution_count": 142, - "metadata": { - "ExecuteTime": { - "end_time": "2018-03-07T14:41:31.263194Z", - "start_time": "2018-03-07T14:41:31.221996Z" - } - }, - "outputs": [], - "source": [ - "df.to_csv(\"resources/test_updated.tsv\",sep=\"\\t\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.0" - }, - "toc": { - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "toc_cell": false, - "toc_position": {}, - "toc_section_display": "block", - "toc_window_display": false - }, - "varInspector": { - "cols": { - "lenName": 16, - "lenType": 16, - "lenVar": 40 - }, - "kernels_config": { - "python": { - "delete_cmd_postfix": "", - "delete_cmd_prefix": "del ", - "library": "var_list.py", - "varRefreshCmd": "print(var_dic_list())" - }, - "r": { - "delete_cmd_postfix": ") ", - "delete_cmd_prefix": "rm(", - "library": "var_list.r", - "varRefreshCmd": "cat(var_dic_list()) " - } - }, - "types_to_exclude": [ - "module", - "function", - "builtin_function_or_method", - "instance", - "_Feature" - ], - "window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/WorthItEval.ipynb b/notebooks/WorthItEval.ipynb deleted file mode 100644 index 767999f659bbdd8a80996c9e0cd79d3e03c5f2d1..0000000000000000000000000000000000000000 --- a/notebooks/WorthItEval.ipynb +++ /dev/null @@ -1,1391 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T21:36:07.248674Z", - "start_time": "2018-04-17T21:36:05.452628Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/jacquesfize/nas_cloud/Code/str-python\n" - ] - }, - { - "data": { - "text/html": [ - "<script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window.Plotly) {{require(['plotly'],function(plotly) {window.Plotly=plotly;});}}</script>" - ], - "text/vnd.plotly.v1+html": [ - "<script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window.Plotly) {{require(['plotly'],function(plotly) {window.Plotly=plotly;});}}</script>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "%cd ..\n", - "\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "%matplotlib inline\n", - "import numpy as np\n", - "import networkx as nx\n", - "import json,glob,re,operator\n", - "from math import*\n", - "\n", - "from helpers.gazeteer_helpers import get_data\n", - "from eval.pareto import is_pareto_front\n", - "from eval.visualize import *\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Chargement des données" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T21:36:08.365942Z", - "start_time": "2018-04-17T21:36:07.251065Z" - } - }, - "outputs": [], - "source": [ - "df=pd.read_csv(\"resources/results_graph_exp18fev.tsv\",delimiter=\"\\t\",index_col=0)\n", - "new_df=pd.DataFrame(columns=df.columns)\n", - "\n", - "selected_graph=json.load(open(\"data/graph_exp_fev_18/selected.json\"))\n", - "types=df.type.unique()\n", - "graph_size={}\n", - "graphs_={}\n", - "\n", - "files_glob= glob.glob(\"data/graph_exp_fev_18/normal/*.gexf\")\n", - "for fn in files_glob:\n", - " id_ = int(re.findall(\"\\d+\", fn)[-1])\n", - " graphs_[id_]=nx.read_gexf(fn)\n", - " graph_size[id_]=len(graphs_[id_])\n", - "graph_size[999]=0\n", - "nb_of_g_w_es_com={}\n", - "for g in graphs_:\n", - " if not g in nb_of_g_w_es_com:\n", - " nb_of_g_w_es_com[g]=0\n", - " for g2 in graphs_:\n", - " if not g2 == g:\n", - " if set(graphs_[g].nodes()).intersection(set(graphs_[g2].nodes())):\n", - " nb_of_g_w_es_com[g]+=1 \n", - "\n", - "df_mesure=pd.read_csv(\"resources/mesures.tsv\",delimiter=\"\\t\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T21:36:13.607526Z", - "start_time": "2018-04-17T21:36:08.368161Z" - } - }, - "outputs": [], - "source": [ - "\n", - "def get_main_class_graph(g):\n", - " class_n={}\n", - " for node in g.nodes():\n", - " data=get_data(node)\n", - " if \"class\" in data:\n", - " class_=data[\"class\"]\n", - " if isinstance(class_,str):\n", - " class_=[class_]\n", - " if not class_:\n", - " continue\n", - " if len(class_)>1:\n", - " for i in class_:\n", - " if not i == \"P-PPL\":\n", - " if not i in class_n:class_n[i]=0\n", - " class_n[i]+=1\n", - " else:\n", - " if not class_[0] in class_n:class_n[class_[0]]=0\n", - " class_n[class_[0]]+=1\n", - " return class_n\n", - "sets=set([])\n", - "for i in range(len(graphs_)):\n", - " st=get_main_class_graph(graphs_[i])\n", - " if not st:\n", - " continue\n", - " sets.add(max(st.items(), key=operator.itemgetter(1))[0])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Parameters" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T21:36:13.617123Z", - "start_time": "2018-04-17T21:36:13.609478Z" - } - }, - "outputs": [], - "source": [ - "granularity={\"A-ADM1\":1,\n", - " \"A-ADM2\":1,\n", - " \"A-ADM3\":1,\n", - " \"A-ADM4\":0,\n", - " \"A-PCLI\":2,\n", - " \"A-PCLS\":2,\n", - " \"H-RVN\":0,\n", - " \"H-SEA\":3,\n", - " \"H-STM\":3,\n", - " \"L-CONT\":3,\n", - " \"L-PRK\":0,\n", - " \"L-RESW\":0,\n", - " \"L-RGN\":0,\n", - " \"P-PPL\":0,\n", - " \"P-PPLA\":2,\n", - " \"P-PPLA2\":2,\n", - " \"P-PPLA3\":2,\n", - " \"S-BLDG\":0,\n", - " \"S-HSP\":0,\n", - " \"S-RSTN\":0,\n", - " \"T-ISL\":2,\n", - " \"T-ISLS\":1\n", - " }\n", - "n=5" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Traitement sur les données" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T21:36:58.126619Z", - "start_time": "2018-04-17T21:36:13.619448Z" - } - }, - "outputs": [], - "source": [ - "df[\"g1_size\"]=df[\"id_g1\"].apply(lambda x:graph_size[int(x)])\n", - "df[\"g2_size\"]=df[\"id_g2\"].apply(lambda x:graph_size[int(x)])\n", - "df[\"id_g1\"]=df[\"id_g1\"].astype(int)\n", - "df[\"id_g2\"]=df[\"id_g2\"].astype(int)\n", - "df[\"granularity\"]=df[\"id_g1\"].apply(lambda x:max(get_main_class_graph(graphs_[x]).items(), key=operator.itemgetter(1))[0])\n", - "df[\"granularity\"]=df[\"granularity\"].apply(lambda x:granularity[x])\n", - "#df[\"mesure\"]=df[\"mesure\"].apply(lambda x:df_mesure[df_mesure.id==x].values[0][-1])\n", - "df['c1_val']=df.c1_val.astype(int)\n", - "df['c2_val']=df.c2_val.astype(int)\n", - "df['c3_val']=df.c3_val.astype(int)\n", - "df['c4_val']=df.c4_val.astype(int)\n", - "df[\"mesure\"]=df[\"mesure\"].apply(lambda x:int(x))\n", - "df['c1+c2']=df.c1_val | df.c2_val\n", - "\n", - "df['(c1+c2)*c3']=(df.c1_val | df.c2_val) & df.c3_val\n", - "df['(c1+c2)*c3*c4']=((df.c1_val | df.c2_val) & df.c3_val) & df.c4_val \n", - "df[\"for_c\"]=df[\"id_g2\"].apply(lambda x:1)\n", - "df[\"es_in_common\"]=df[\"id_g1\"].apply(lambda x:nb_of_g_w_es_com[x])\n", - "normal=df[df.type == \"normal\"]\n", - "gen_country=df[df.type == \"gen_country\"]\n", - "gen_region=df[df.type == \"gen_region\"]\n", - "extension_1=df[df.type == \"extension_1\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T21:37:27.045346Z", - "start_time": "2018-04-17T21:36:58.128814Z" - } - }, - "outputs": [], - "source": [ - "df_mesure=pd.read_csv(\"resources/mesures.tsv\",delimiter=\"\\t\")\n", - "df[\"mesureL\"]=df[\"mesure\"].apply(lambda x:df_mesure[df_mesure.id==int(x)].values[0][-1])\n", - "rank_data=json.load(open(\"data/graph_exp_fev_18/rank.json\"))\n", - "new_df=pd.DataFrame(data=None,columns=df.columns)\n", - "for id,row in df.iterrows():\n", - " ranks=set(rank_data[row.type][row.mesureL][str(row.id_g1)][:n])\n", - " if row.id_g2 in ranks:\n", - " new_df=new_df.append(row)\n", - "df=new_df" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T21:37:31.340138Z", - "start_time": "2018-04-17T21:37:27.047369Z" - } - }, - "outputs": [], - "source": [ - "df_copy=pd.DataFrame(columns=df.columns)\n", - "for t in types:\n", - " mesures=df[df.type == t].mesure.unique()\n", - " for m in mesures:\n", - " data=df[(df.mesure == m) & (df.type == t)]\n", - " for g in selected_graph:\n", - " subset=data[data.id_g1 == g].iloc[:n]\n", - " if len(subset)<1:#No graph found\n", - " df_2=pd.DataFrame([[g,999,m,t,3,0,0,0,0,0,0,0,0,0,0,0,0,0]],columns=df.columns)\n", - " for i in range(n):df_copy=df_copy.append(df_2)\n", - " elif len(subset)<n: # not 5 associated graphs\n", - " df_2=pd.DataFrame([[g,999,m,t,3,0,0,0,0,0,0,0,0,0,0,0,0,0]],columns=df.columns)\n", - " for i in range(n-len(subset)):df_copy=df_copy.append(df_2)\n", - " else:# perfecto ! :P\n", - " df_copy=df_copy.append(subset)\n", - "df=df_copy" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Données finales" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T21:37:31.369122Z", - "start_time": "2018-04-17T21:37:31.343145Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>id_g1</th>\n", - " <th>id_g2</th>\n", - " <th>mesure</th>\n", - " <th>type</th>\n", - " <th>id_user</th>\n", - " <th>c1_val</th>\n", - " <th>c2_val</th>\n", - " <th>c3_val</th>\n", - " <th>c4_val</th>\n", - " <th>g1_size</th>\n", - " <th>g2_size</th>\n", - " <th>granularity</th>\n", - " <th>c1+c2</th>\n", - " <th>(c1+c2)*c3</th>\n", - " <th>(c1+c2)*c3*c4</th>\n", - " <th>for_c</th>\n", - " <th>es_in_common</th>\n", - " <th>mesureL</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>7574</th>\n", - " <td>101.0</td>\n", - " <td>101.0</td>\n", - " <td>1.0</td>\n", - " <td>gen_region</td>\n", - " <td>3.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>2.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>37.0</td>\n", - " <td>MCS</td>\n", - " </tr>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>57.0</td>\n", - " <td>999.0</td>\n", - " <td>10.0</td>\n", - " <td>gen_region</td>\n", - " <td>3.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2610</th>\n", - " <td>527.0</td>\n", - " <td>450.0</td>\n", - " <td>5.0</td>\n", - " <td>gen_country</td>\n", - " <td>3.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>6.0</td>\n", - " <td>6.0</td>\n", - " <td>2.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>47.0</td>\n", - " <td>HED</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6554</th>\n", - " <td>503.0</td>\n", - " <td>508.0</td>\n", - " <td>8.0</td>\n", - " <td>gen_region</td>\n", - " <td>3.0</td>\n", - " <td>0.0</td>\n", - " <td>1.0</td>\n", - " <td>0.0</td>\n", - " <td>1.0</td>\n", - " <td>2.0</td>\n", - " <td>5.0</td>\n", - " <td>0.0</td>\n", - " <td>1.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>1.0</td>\n", - " <td>3.0</td>\n", - " <td>BOWSE</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3352</th>\n", - " <td>426.0</td>\n", - " <td>31.0</td>\n", - " <td>2.0</td>\n", - " <td>gen_country</td>\n", - " <td>3.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>0.0</td>\n", - " <td>1.0</td>\n", - " <td>9.0</td>\n", - " <td>7.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>1.0</td>\n", - " <td>113.0</td>\n", - " <td>VEO</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5947</th>\n", - " <td>249.0</td>\n", - " <td>272.0</td>\n", - " <td>9.0</td>\n", - " <td>gen_region</td>\n", - " <td>3.0</td>\n", - " <td>1.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>6.0</td>\n", - " <td>3.0</td>\n", - " <td>2.0</td>\n", - " <td>1.0</td>\n", - " <td>0.0</td>\n", - " <td>0.0</td>\n", - " <td>1.0</td>\n", - " <td>78.0</td>\n", - " <td>BOC</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " id_g1 id_g2 mesure type id_user c1_val c2_val c3_val \\\n", - "7574 101.0 101.0 1.0 gen_region 3.0 1.0 1.0 1.0 \n", - "0 57.0 999.0 10.0 gen_region 3.0 0.0 0.0 0.0 \n", - "2610 527.0 450.0 5.0 gen_country 3.0 1.0 1.0 1.0 \n", - "6554 503.0 508.0 8.0 gen_region 3.0 0.0 1.0 0.0 \n", - "3352 426.0 31.0 2.0 gen_country 3.0 1.0 1.0 0.0 \n", - "5947 249.0 272.0 9.0 gen_region 3.0 1.0 0.0 0.0 \n", - "\n", - " c4_val g1_size g2_size granularity c1+c2 (c1+c2)*c3 (c1+c2)*c3*c4 \\\n", - "7574 1.0 1.0 1.0 2.0 1.0 1.0 1.0 \n", - "0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", - "2610 1.0 6.0 6.0 2.0 1.0 1.0 1.0 \n", - "6554 1.0 2.0 5.0 0.0 1.0 0.0 0.0 \n", - "3352 1.0 9.0 7.0 1.0 1.0 0.0 0.0 \n", - "5947 0.0 6.0 3.0 2.0 1.0 0.0 0.0 \n", - "\n", - " for_c es_in_common mesureL \n", - "7574 1.0 37.0 MCS \n", - "0 0.0 0.0 0 \n", - "2610 1.0 47.0 HED \n", - "6554 1.0 3.0 BOWSE \n", - "3352 1.0 113.0 VEO \n", - "5947 1.0 78.0 BOC " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.sample(frac=0.001)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T21:37:31.374617Z", - "start_time": "2018-04-17T21:37:31.371305Z" - } - }, - "outputs": [], - "source": [ - " colorized_subset=['c1_val', 'c2_val', 'c3_val',\n", - " 'c4_val', 'c1+c2', '(c1+c2)*c3', '(c1+c2)*c3*c4']" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T21:37:31.379290Z", - "start_time": "2018-04-17T21:37:31.376527Z" - } - }, - "outputs": [], - "source": [ - "keys_alone=['c1_val', 'c2_val', 'c3_val', 'c4_val']\n", - "keys_combined=['c1+c2', '(c1+c2)*c3', '(c1+c2)*c3*c4']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Quelle mesure maximise les 4 critères ?\n", - "\n", - "Procédure de test:\n", - "\n", - " * On récupére la valeur de précision pour chaque mesure et critère.\n", - " * On calcule le front de Pareto sur les 4 critères de validation\n", - " \n", - "Résultat :\n", - " MCS et VEO maximise les différents critères selon la valeur de précision moyenne sur l'ensemble des couples de graphes.\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T21:45:48.576123Z", - "start_time": "2018-04-17T21:45:48.534907Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>mesure</th>\n", - " <th>c1_val</th>\n", - " <th>c2_val</th>\n", - " <th>c3_val</th>\n", - " <th>c4_val</th>\n", - " <th>mesureL</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>2.0</td>\n", - " <td>0.872576</td>\n", - " <td>0.828255</td>\n", - " <td>0.472299</td>\n", - " <td>0.391967</td>\n", - " <td>VEO</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " mesure c1_val c2_val c3_val c4_val mesureL\n", - "1 2.0 0.872576 0.828255 0.472299 0.391967 VEO" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d_pc=df.groupby(['mesure'],as_index=False).mean()[['mesure','c1_val','c2_val','c3_val','c4_val']]\n", - "df_is_pareto = d_pc.apply(lambda row: is_pareto_front(d_pc,row, ['c1_val','c2_val','c3_val','c4_val']), axis=1)\n", - "df_pareto = d_pc.ix[df_is_pareto].sort_values(by=['c1_val','c2_val','c3_val','c4_val'])\n", - "df_pareto[\"mesureL\"]=df_pareto[\"mesure\"].apply(lambda x:df_mesure[df_mesure.id==x].values[0][-1])\n", - "\n", - "df_pareto" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T21:47:10.116120Z", - "start_time": "2018-04-17T21:47:10.107056Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ",mesure,c1_val,c2_val,c3_val,c4_val,mesureL\n", - "0,1.0,0.8636363636363636,0.8128964059196617,0.4492600422832981,0.38054968287526425,MCS\n", - "1,2.0,0.8725761772853186,0.8282548476454293,0.47229916897506924,0.39196675900277006,VEO\n", - "2,3.0,0.5844748858447488,0.5114155251141552,0.2100456621004566,0.1643835616438356,GED\n", - "3,5.0,0.532051282051282,0.47596153846153844,0.21634615384615385,0.16506410256410256,HED\n", - "4,6.0,0.40425531914893614,0.3829787234042553,0.176759410801964,0.12929623567921442,GREEDY\n", - "5,7.0,0.7154471544715447,0.6097560975609756,0.33739837398373984,0.25203252032520324,WLSUBTREE\n", - "6,8.0,0.8598984771573605,0.7796954314720812,0.43756345177664974,0.3817258883248731,BOWSE\n", - "7,9.0,0.8313131313131313,0.7202020202020202,0.4101010101010101,0.34040404040404043,BOC\n", - "8,10.0,0.7272727272727273,0.7272727272727273,0.39335664335664333,0.3146853146853147,JACCARD\n", - "\n" - ] - } - ], - "source": [ - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Quelles couple \"Mesure-TypeSTR\" maximise la validation des 4 critères ?\n", - "\n", - "Procèdure :\n", - " * On récupére la valeur de précision moyenne pour chaque critère, en fonction de la mesure et du type.\n", - " * On récupére les tuples appartenant au front de pareto sur les 4 critères.\n", - " \n", - "Résultat:\n", - " Comme dans les résultats précédents, les mesures MCS, VEO obtiennent les meilleurs scores. Enfin, les types de STR associées, donnant les meilleurs scores sont : gen_region, extension1, puis normal. On peut déjà conclure que la généralisation --**bornée Pays**-- déforme trop l'information contenue dans les graphes, on perd trop en finesse. " - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T22:01:09.854543Z", - "start_time": "2018-04-17T22:01:09.745770Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>mesure</th>\n", - " <th>type</th>\n", - " <th>c1_val</th>\n", - " <th>c2_val</th>\n", - " <th>c3_val</th>\n", - " <th>c4_val</th>\n", - " <th>mesureL</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>1.0</td>\n", - " <td>gen_country</td>\n", - " <td>0.768293</td>\n", - " <td>0.886179</td>\n", - " <td>0.451220</td>\n", - " <td>0.369919</td>\n", - " <td>MCS</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>2.0</td>\n", - " <td>gen_country</td>\n", - " <td>0.777311</td>\n", - " <td>0.882353</td>\n", - " <td>0.487395</td>\n", - " <td>0.378151</td>\n", - " <td>VEO</td>\n", - " </tr>\n", - " <tr>\n", - " <th>17</th>\n", - " <td>8.0</td>\n", - " <td>gen_region</td>\n", - " <td>0.894309</td>\n", - " <td>0.780488</td>\n", - " <td>0.447154</td>\n", - " <td>0.406504</td>\n", - " <td>BOWSE</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>1.0</td>\n", - " <td>gen_region</td>\n", - " <td>0.917355</td>\n", - " <td>0.801653</td>\n", - " <td>0.466942</td>\n", - " <td>0.400826</td>\n", - " <td>MCS</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>2.0</td>\n", - " <td>gen_region</td>\n", - " <td>0.921488</td>\n", - " <td>0.814050</td>\n", - " <td>0.466942</td>\n", - " <td>0.404959</td>\n", - " <td>VEO</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " mesure type c1_val c2_val c3_val c4_val mesureL\n", - "1 1.0 gen_country 0.768293 0.886179 0.451220 0.369919 MCS\n", - "5 2.0 gen_country 0.777311 0.882353 0.487395 0.378151 VEO\n", - "17 8.0 gen_region 0.894309 0.780488 0.447154 0.406504 BOWSE\n", - "2 1.0 gen_region 0.917355 0.801653 0.466942 0.400826 MCS\n", - "6 2.0 gen_region 0.921488 0.814050 0.466942 0.404959 VEO" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# On regroupe les données selon la mesure et le type de STR utilisé --> Pour chaque critère, on aura la valeur moyenne\n", - "# retourné par le critère sur l'ensemble des couples de graphes de la mesure.\n", - "d_pc=df.groupby(['mesure','type'],as_index=False).mean()[['mesure','type','c1_val','c2_val','c3_val','c4_val']]\n", - "df_is_pareto = d_pc.apply(lambda row: is_pareto_front(d_pc,row, ['c1_val','c2_val','c3_val','c4_val']), axis=1)\n", - "df_pareto = d_pc.ix[df_is_pareto].sort_values(by=['c1_val','c2_val','c3_val','c4_val'])\n", - "df_pareto[\"mesureL\"]=df_pareto[\"mesure\"].apply(lambda x:df_mesure[df_mesure.id==x].values[0][-1])\n", - "\n", - "\n", - "%matplotlib inline\n", - "df_pareto" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "format": "row" - }, - "source": [ - "## Et si on combine les critères ?\n", - "### Pareto sur : c1 ou c2" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T22:10:28.612698Z", - "start_time": "2018-04-17T22:10:28.553467Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>mesure</th>\n", - " <th>type</th>\n", - " <th>c1+c2</th>\n", - " <th>mesureL</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>1.0</td>\n", - " <td>gen_country</td>\n", - " <td>0.955285</td>\n", - " <td>MCS</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " mesure type c1+c2 mesureL\n", - "1 1.0 gen_country 0.955285 MCS" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d_pc=df.groupby(['mesure','type'],as_index=False).mean()[['mesure','type','c1+c2']]\n", - "df_is_pareto = d_pc.apply(lambda row: is_pareto_front(d_pc,row, ['c1+c2']), axis=1)\n", - "df_pareto = d_pc.ix[df_is_pareto].sort_values(by=['c1+c2'])\n", - "df_pareto[\"mesureL\"]=df_pareto[\"mesure\"].apply(lambda x:df_mesure[df_mesure.id==x].values[0][-1])\n", - "\n", - "\n", - "df_pareto" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Pareto sur : (c1 ou c2) et c3" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T22:13:30.339282Z", - "start_time": "2018-04-17T22:13:30.281363Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>mesure</th>\n", - " <th>type</th>\n", - " <th>(c1+c2)*c3</th>\n", - " <th>mesureL</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>2.0</td>\n", - " <td>gen_country</td>\n", - " <td>0.487395</td>\n", - " <td>VEO</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " mesure type (c1+c2)*c3 mesureL\n", - "5 2.0 gen_country 0.487395 VEO" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d_pc=df.groupby(['mesure','type'],as_index=False).mean()[['mesure','type','(c1+c2)*c3']]\n", - "df_is_pareto = d_pc.apply(lambda row: is_pareto_front(d_pc,row, ['(c1+c2)*c3']), axis=1)\n", - "df_pareto = d_pc.ix[df_is_pareto].sort_values(by=['(c1+c2)*c3'])\n", - "df_pareto[\"mesureL\"]=df_pareto[\"mesure\"].apply(lambda x:df_mesure[df_mesure.id==x].values[0][-1])\n", - "\n", - "df_pareto" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Pareto sur : (c1 ou c2) et c3 et c4" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T22:13:52.690652Z", - "start_time": "2018-04-17T22:13:52.631224Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>mesure</th>\n", - " <th>type</th>\n", - " <th>(c1+c2)*c3*c4</th>\n", - " <th>mesureL</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>22</th>\n", - " <td>9.0</td>\n", - " <td>normal</td>\n", - " <td>0.301653</td>\n", - " <td>BOC</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " mesure type (c1+c2)*c3*c4 mesureL\n", - "22 9.0 normal 0.301653 BOC" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d_pc=df.groupby(['mesure','type'],as_index=False).mean()[['mesure','type','(c1+c2)*c3*c4']]\n", - "df_is_pareto = d_pc.apply(lambda row: is_pareto_front(d_pc,row, ['(c1+c2)*c3*c4']), axis=1)\n", - "df_pareto = d_pc.ix[df_is_pareto].sort_values(by=['(c1+c2)*c3*c4'])\n", - "df_pareto[\"mesureL\"]=df_pareto[\"mesure\"].apply(lambda x:df_mesure[df_mesure.id==x].values[0][-1])\n", - "\n", - "df_pareto" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T22:13:53.665536Z", - "start_time": "2018-04-17T22:13:53.554698Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ",mesure,type,(c1+c2)*c3*c4,mesureL\n", - "22,9.0,normal,0.30165289256198347,BOC\n", - "\n" - ] - } - ], - "source": [ - "d_pc[\"mesureL\"]=d_pc[\"mesure\"].apply(lambda x:df_mesure[df_mesure.id==x].values[0][-1])\n", - "print(df_pareto.to_csv())\n", - "#print(d_pc.sort_values(by=\"c1+c2\").to_csv())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-10T21:01:00.801428Z", - "start_time": "2018-04-10T21:01:00.787430Z" - } - }, - "source": [ - "## Impact de la granularité\n", - "\n", - "### Selon la granularité" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T21:37:31.723094Z", - "start_time": "2018-04-17T21:37:31.698512Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>granularity</th>\n", - " <th>c1_val</th>\n", - " <th>c2_val</th>\n", - " <th>c3_val</th>\n", - " <th>c4_val</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>3.0</td>\n", - " <td>0.946341</td>\n", - " <td>0.819512</td>\n", - " <td>0.517073</td>\n", - " <td>0.770732</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " granularity c1_val c2_val c3_val c4_val\n", - "3 3.0 0.946341 0.819512 0.517073 0.770732" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d_pc=df.groupby(['granularity'],as_index=False).mean()[['granularity','c1_val','c2_val','c3_val','c4_val']]\n", - "df_is_pareto = d_pc.apply(lambda row: is_pareto_front(d_pc,row, ['c1_val','c2_val','c3_val','c4_val']), axis=1)\n", - "df_pareto = d_pc.ix[df_is_pareto].sort_values(by=['c1_val','c2_val','c3_val','c4_val'])\n", - "\n", - "df_pareto" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T21:37:31.976361Z", - "start_time": "2018-04-17T21:37:31.725188Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "<matplotlib.axes._subplots.AxesSubplot at 0x10e0f9668>" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAAEFCAYAAAAG45eHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xd8VNed9/HP9FGZGbVRoUuALkUSYGNMdccVDMYF24ntOLGdbJzEib3JPslmn5RN1kn2cbJpzsZOnLjEMWAbF2zj3hBgwMYgAbogelMZlRnNaPq9zx+jMkISCCFN03m/XrxUpp0jia+Ozj3ndzSqqiIIgiCkJm28GyAIgiAMHxHygiAIKUyEvCAIQgoTIS8IgpDCRMgLgiCkMH2sXzAUCqstLe2xftmYyc5OR/QvOaVy30D0L9nZ7RbNYB4X85G8Xq+L9UvGlOhf8krlvoHo30glpmsEQRBSmAh5QRCEFCZCXhAEIYWJkBcEQUhhIuQFQRBSmAh5QRCEFCZCXhAEIYXFfDOUIAiC0D9VVXF5AjS0emlo8dLY6sXh9PH9uy8c1POJkBcEQYixsKLQ5PLT2OKloaW9R6A3tvrwB8ND9loi5E9j165q/vSn3/GHPzw2JM/317/+mdzcXJYvv2lInk8QhMTlD4RpbPX2CPCGVi+NLV6aXD7CSu8Dm0wGHfnZaeRnpWE/5e1gJVzIr36vlq01DUP6nBdMyeeWyyad1WP+8Y8nefPN1zGbB//FFQQhdamqSps3GBmNd4R3Q1SQOz2BPh9nTTcwocgSCfCstI5QT8eenYY13YBGM6gSNf1KuJCPB7/fx3/910+oq6sjGAzy4IPfY/ToMfz85//Nf/7n/z3tY9eseY62Nhdf/vJ9BAIBrr76ap544ln++tc/U1OzG5fLyaRJpfzgBz+KUW8EQRgqiqLS3ObrFeANrZGRudffe1pFo4Fcq5lpE7J7jsg7/qWZYhu7CRfyt1w26axH3efqpZdeoLBwFD/5ycMcPXqETZs2cMstt3Py5IkzPvaqq67l61+/h7vvvpcNGz7i0ksvJRDwY7FY+J//eRRFUbjjjltobBzav04EQRgawVCYhlZfnyPyJqeXULj3tIpRr42E9tiOkXjU1Equ1YxelzgLFxMu5OPhyJHDzJ07H4CxY8cxduztA36s1WqltFRi587PeeONV/mP//h3TCYzLS0t/OhHPyA9PR2v10soFBqu5guCcAYeX7BrXry+pedovKXN3+djMtMMjM23kJ/dMa3SMbViz0ojK9M45NMqw0WEPDB+fDF79uxm0aJLOH78GI8//id+/OOfD/jxS5cuZ/XqZ/H7/UycOJG1a9fR0FDPT3/6MC0tLXz00fuoau/RgCAIQ0NRVRytXuQjLTREBXhnsHt8vQdZGiDHamLKuKzuIM9O75paSTenRjymRi/O0bJlK3j44Z/yjW/cRzgc5oEHHjqrx8+adT6/+tXPufPOLwMwdep0/v73v3L//fei0WgYNWo0DkfjcDRdEEaMYEjB4ewO7+ipFYfTRzCk9HqMXqfFnmVm4mhb13RKQUeg59nSMOgTZ1pluGjiMMJUGxvbYv2aMWO3WxD9S06p3DdIjv55/aEeyw273m9pp9nlp6+0SjfpsWenMbbAgi3d0GNqJctiQpsk0ypnMtiToc44kpckSQs8CswA/MA9sizXRt3+b8BtgAv4lSzL6wbTkET38ssv8vbb63t9/mtf+wZlZRVxaJEgJB9VVXF6AlHh3TPQ3d5gn4/LtpiYPDarx2qVzimWzDQDkBy/xOJhINM1ywGzLMvzJEmaCzwCLAOQJKkcuB3o3G+7UZKk92RZTrmDFpctW8GyZSvi3QxBSHihsEKzy9c1pdJjI1Crl0Cw97SKTqshz2buWj9+6tJDo0Ec7TdYAwn5hcB6AFmWN0uSNDvqtqnAB7Is+wAkSdoHVACbh7qhgiAkDn8g3MdOzvaOZYd+lD6mgc1GHYXZ6b12cuZnpZFjNaPVpsa0SqIZSMhbAWfUx2FJkvSyLIeAKuD7kiRZACMwHzhjDQC73TKYtiYN0b/klcp9g4H3T1VVnO4AdU0eTjZ5qHN0vG1q52STh9Z+lh1mWUxI47MpzE2nKC+Totx0CvMyKMrNwJox/MsOU/37NxgDCXkXEP2V03YEPLIs75Ek6Q9ERvpHgE8Ax5meMJXnzVJ9XjCV+5fKfYPe/VMUNTKtcspOzs7RuS/QezenVqMh12Zi+oRs7FHLDSPz42bMxr4jJeAN4PD2vc1/qIyE799gDCTkK4GlwOqOOfmqzhskSbIDFlmWF0iSZAPeAqoH1RJBEIZcuy+Iw+nD4fTh3d3AwWMtXYHucPZdJMto0J5SV6V7aiUnwXZzCmc2kJBfCyyWJGkjkf0Dd0uS9CBQC7wKTJUkaSsQAL4ry/LQ1ciMk1AoxMMP/4STJ08SDAa4666vsHDhxef8vKIKpTCUVFXF4wvR1BHiTU5vV6A7nD6aXD68/r53WmemGRhfaDllJB5ZQx6LaRUhds4Y8rIsK8DXTvl0TdT7Xx3KBr1Yu47tDVVnvuNZmJVfzopJSwZ8/zfffB2rNYv/+I//xOVy8qUv3T4kIS8IZ6MzxB1OL47WziCPhLejI9D7mlIBMBl15NnM5Flt5NrM5NnSKBmbjUkL+dmxL5IlxI/4TtO7CuUDDzzEpZdeDkT+o+l0/X+ZRBVKYbA6S9V2j8S7w7vzc/0dHmE26sizpZFnM3eEuDnq/TQyzPpeo/FUn7MW+pZwIb9i0pKzGnUPhb6qUM6YMYv2dg8//OG/ce+9/9LvY0UVSqE/qqriag/icHojI/BTplIczr7XjAOkmfTkZ6f1CO5ca0eYZ5lJN/UOcUHoS8KFfDz0VYWyvr6OH/zgu9xww01ceeXV/T5WVKEcuTrP4uyeB4+EucPl6wr1QB/1VAAyzHoKc9J7hnfUqDzdbIhxb4RUJUKe3lUof/3rX1FXd4LvfOd7zJ4954yPF1UoU5PSsVa8cxolMvqOGo07fYTCfYd4ZpqBorwM8qzR0ylp5NrM5FrNKVPhUEh8okAZ4Pf7efjhn+JwNBIOh8nMzESWaxg3bnzXfR555HeYTOZ+n+O221Zw551f5s47b6Om5iD/9m8PYjKZ0Gg0+P0+vvnNB9m69ZOkX12TSvO6iqLS6vZ3hbc3pHDkhLPHBc6+DoyASIjnnRrenaNxqzkhL2ym0veuLyOgf4OanxMhP8RGwA9a0vSvM8QdfVzQ7AzxvtaJQ+Qcztw+L2ymkWc1YzImXy2VZPreDcYI6N/wVKEUIkQVysQTVhRa2vynrE7pDvSWNn+/IW7LMDKh0BI1Ak9j4rhsDKjkWM2YREEsIUWIkB8gUYUy9kLhniHedWGzYxTe7Oq7EBaALdPIhCJLz2WGHfPjuVZzn1UNU30kKIxMIuSFuAmFFZrb/DS1ek9ZWhjZvdnc5qevDNcQKYRVMtraNQfeOTeeZzOTYzVh0IuRuCCACHlhGAVDCs1tvlPmwrsDvdXdf4hnW01MHm3rmE5Ji7rIaRb1UwThLIiQFwYtGIocDtHrwmbHOvHWtr6Pa9NoIMdiYvKYrJ7rw61mcrPSyLGYRIgLwhARIS8MSDCk8M62o9Q7fRxvaMPh9OF09106VqvRkGM1IY3L6poDz4sajWeJEBeEmBEh34dwOMwvf/kzjh49DGj47ne/T0nJpHN+3mStQqkoKn9Zt5utNZHSDDptJMSnjMvqVT8l12Ym22JCpxUhLgiJIOFCvnHNc7Rt2zqkz2mZfQH2m28d8P0rKz8G4E9/eoLPPtvGY489yi9+8eshbVOyUFWVZ96S2VrTQOkYG/921xzUYEgc1SYISSLhQj4eTq1C+eCD3+N73/t3AOrr68jM7P9EllSvQrn244N88PkJxuZn8q2bZpCfky6WGQpCDAXCQdoCbdgZvpOhYsp+861nNeoeCn1VoSwrq+BnP/sRH330AT/72S/7fWwqV6F8a+tR1m08RH52Gg+unCnqrQjCEAorYVyBNlr9LpwBF05/979Wv7Prc+0hLwCrx/1pUK8j/tfSdxVKgB/+8Cc0NTm4774v8cwza0hLS+v12FStQllZdZLn3t2HLdPIQytnYsswxrtJgpAUFFXBHfT0DOxTgrw14MId8KD2uf4sIk2fhs1kZZxlDDaTddDtESFP7yqUK1cu56tfvZ877rgbs9mMVqs97Rx0qlWh/Hyfg7+9XkO6Sc9DK2diz+r9y00QRhpVVWkPeXsEddfoO9Ad5q5AG4rad3VSAKPWQJbJRmFWPjaTFZvRGnnb8X6WyYbNZMGoG5qB1RlDXpIkLfAoMAPwA/fIslwbdftDwO2AAvyXLMtrh6RlMbRs2QoefvinfOMb9xEOh/n97//Miy+u4f777yUUCvGtbz142gqUs2adz69+9XPuvPPLAEydOp2///2v3H//vWg0GkaNGo3D0Rir7pwT+UgLf3q5Gr1ew7dvmcEYe2a8myQIw84X8vccaUdNl0QHeVDp/y9ynUaHzWRlvGUsNpOVrKjgjv7YrDPH9MCXM1ahlCRpBXC9LMtfkiRpLvB9WZaXddyWBewEJgEZwOeyLI/v/9kAUYUyYR2pb+OXz35GIKjwrZsqKC/J7XWfZO7fmaRy32Bk9i+ohHqEdHeId34cGX37wv5+n1eDBqvR0j3aNlnJihp9Z5ls2IxW0g1paDXDt3R4OKtQLgTWA8iyvFmSpNlRt3mAw0QCPoPIaD4lpXoVyvrmdn696nN8/jD3Xj+tz4AXhEQRVsK0Bd3dge13EazzcqLF0SPUPcH20z5PpiGD3LScjmmSntMmne9bDJnotMlbC2kgIW8FnFEfhyVJ0suy3Pl3y1FgN6ADHh7Ii9rtg1sKFE/33HMX99xz14Dum2z9a3J6+c3zO3G1B/naigquW1B82vsnW//ORir3DRK/f4qq4PZ7aPa20ux10uJtpcXn7H7f66TZ24rT13bai5bphjSy02yU5Iwl25xFdpqN7DQbOWlZ5KRFPs4yWzHoUv+YxYGEvAt6LNDURgX8NUAR0JkKb0qSVCnL8pbTPeFI+5Mxkbm9QX757Gc0NLezfGExc0rzTtv+ZOvf2UjlvkF8+6eqKt6Q77Tz3q0dFy3Darjf5zFoDdhMViZmTegx2s4yWhlfUIjq1WMz2TCd7qKlCrRDa7sP8A15X4fLYH9BDyTkK4GlwOqOOfmqqNtaAC/gl2VZlSSpFcgaVEuEmPMHwvz2+R0cb/Rw+fljWLpgQrybJCQhfzjQEdTOfladdEynKMF+n0Or0WIzWhlnGd3HSpPuj9P0/V+0TPVf0oM1kJBfCyyWJGkjkSqwd0uS9CBQK8vyK5IkXQFsliRJATYAbw9fc4WhEgor/PGlKvYfdzF3WgG3XTE5plf8hcQXUkI4/W2nX3UScOEN9T8a1qDBYsykKKPv5YI2k40sk5UMQ/qwXrQcyc4Y8rIsK8DXTvl0TdTtPwKSc8/+CKWokYJj1QeaKS/J5cvXTUUrAn7EUFSFtoC77ymTqPfdQc9pnyfDkE62KYsJ1ugVJ7YeywWT/aJlKhCboU6jpaWZr3zlDn7zmz8yfvyEc36+RKhCqaoqz769ly17Gpg0xsbXbygTZX/PgaIqhFUl8lYJd3zc/Tbc5+d7fi4cfVtf91XDKEr3+9HP3/Mx3c/X53NpQjR5WnEFTn/R0qwzYTPZGJVZ1OeqkyyTFavRMiIuWqaChAv5je/t50DN0NZ5KZmSz/zLJp7VY0KhEL/61X9hNJqGtC3x9vKGg7z32XHG2DN44KaKYT2wWlXVqBCMCh81TFhReodVVGiFT3lcfwGqnMV9w0rvdnS/XhitXoM/EOj5fIpy2tc9XVgmGoPOgM1gocQ2vnt9d68pFAtmff8b/4TYUxSF7ZuOcPXy8kE9PuFCPh76qkL5zjtvsXz5jTz99N9O+9hkqkL5zrajvFJ5iDybmQdXziTDPPCRWJ2ngRdr19Ee9uAPBnsHdz8j02ShQYNOq0Or0aLTaNFpOt+PvDXo9Gg1uo7bet7e/bjux2s1OnTavp8r+v6dz6fV6vq9b9fzdb52r9eLbksf7dBo0Wq05OdbxYXJJBMKhXn7pd0cqm1KnZCff9nEsx51n6u+qlBmZWVx4YXzzhjyyVKFctOuOp59Zx/WDCP/eutMsjIH/hfKjsZqntq9Cl/YH1ndgKZHCOm1ekw6U3dgdYRNd5CdGm7dAXdq6PUMPx1a7Sn37TcoTw3F3p/rP2wj9xerM4REEvCHeOOFak4caWX0+MEvWky4kI+HU6tQ/uIX76PRaNi2bQu1tXv52c/+L7/4xa/Jzc3r9dhkqEK5c7+DJ17bQ1pHwbH87PQBPU5RFV47+DbrD72LQWvg7mm3cU35RSIIBWGY+bxBXlu9k4aTbRSX5rH4+mmDfi4R8vSuQmm35/PjH/8cgG984z6++90f9BnwnRK5CuW+Y608urYanVbDAzdVMDZ/YAXH2oPt/G33P9ndJJNnzuG+irsYnVk0zK0VBMHd5mfdqh20ONqRygu55JpStOdwnKYIeXpXoXzggYfO6vGJWoXyaIOb367ZSVhR+eaN5ZSOHdiffMfdJ3ms6ikc3iam5pRy9/TbyTAMbPQvCMLgOVvaefW5nbQ5fVTMHsP8yyee8/6VM1ahHAaiCmUMNLR6efjpT3F6Aty7dBrzphcO6HGf1u/gmT2rCShBrhp/GUtKruyxSSVR+jccUrlvIPqX6Joa3Ly6agdeT5ALFk3g/PnjewT8cFahFEiuKpROt59HntuO0xPgtismDyjgw0qYVw6s550jH2LSGbm37A5m5g/uar4gCGen7riT11ZXEfCHWHjFJMpnjxmy5xYhP0DLlq1g2bIV8W7GGbX7gjyyageNrT6uXzCBxbPHnvEx7oCHJ3b9A7mllvy0PO6ruIuijIIYtFYQhKMHm1n/YjXhkMJlS6YglQ3sr+6BEiGfQvzBML99fifHGt1cet5oli08fclggKNtx3ms6imafS2U503lrmm3kqYXx/0JQizsr2nknVd2o9HAVTeUUVza/wKPwRIhnyJCYYU/vVTNvmNO5kzN5wuLS894weaTk5/yT/kFgkqIa4sXc82Ey0WRKEGIkT07TvLhehm9Qcc1N5Yxenz2sLyOCPkUoKgqT7y+h537mygrzuGeJdNOW3AsrIR5sXYdHxyrxKwz85WKL1KeN/h1uIIgnJ0dW46y8b39mMx6lqysIL/IOmyvJUI+yamqyj/f2cfmXfVMHGXl/hvKT1twzBVo46/Vz1DbepDCjALuK7+TgnR7DFssCCOXqqps+fggn208QkamkSW3ziAnL2NYX1OEfJJ7deMh3v30GKPzMnjg5hmYjP0XHDvkOsLjVU/T6ncy017OHVNvFsWoBCFGVFVlw9v7qP7sBNYsM0tvnYE1a/ivf4mQT2LvfXaMlz4+2FVwLDOt/4JjG09sYZW8lrCqsKzkGhaPv0QcEiIIMRIOK7z/eg37djWQY89gycoKMs6iftS5ECGfpD7ZXc8/3tqLNd3AQytnkm3p+wcmqIR4fu/LbDjxCen6NO6efjvTcqUYt1YQRq5QMMxbL+3m8P4mCkZbue7mckxnUQH2XImQT0JVB5r4y7rdmE06vnPLTApy+i450Op38peqpznoOsLozCLuK7+TvLTcGLdWEEaugD/EG89XceKokzETsrl6RRmG00ypDoczhrwkSVrgUWAG4AfukWW5tuO2mcD/RN19LrBcluXeW0OFIVF73Mkf11ah1Wr41o0VjC/s+wT32taD/KX6adoCbmYXzOQLU27CeLoT7AVBGFLe9gCvrd5JY52bEimPK5ZOQ6eP/RLlgYzklwNmWZbnSZI0F3gEWAYgy/LnwCUAkiTdDBwXAT98jjW6+e2aHYRCKvevKEMa13tdraqqfHx8E2v2vQLAjZOWcOnYRWL+XRBiyO3y8eqqnbQ2tTOlopCLrz63SpLnYiAhvxBYDyDL8mZJkmafegdJkjKAnwAXDeRF7fa+R5+pYjj6V9fk4X/W7MDjC/Gd22Zx2exxve4TCAf5y7Z/8sGhTVhMmXxn3j2UFQz9/Hsqf/9SuW8g+hcLTY1uXvnnDpwtXuZeXMLipdPiOsgaSMhbAWfUx2FJkvSyLEefgvEVYI0sy46BvGgyV4o7k+GohOf0BHj4mU9pdvm59bJJlI/P7vUazb4WHq96miNtxxhnGc295XeSo+19v3OV7JX+TieV+waif7HgqHezbtUOvO1B5lxUzMy5Y3E43EPy3IP9BTaQkHcB0c+uPSXgAb4A3DSoFgin1e4L8ZtVn9PQ4uW6eeO5ck7vEfzellr+Wv0P3EEPcwtns1K6AaMudlfvBUGAk8ecvL5mJwF/mEVXTqbsvNHxbhIwsJCvBJYCqzvm5Kuib5QkyQaYZFk+OgztG9ECwTC/e2EnRxrcXDxzFCsuKulxu6qqvH/0Y9bufx2AlaXLWTR6nph/F4QYO3KgiTdf3EU4rHD50qmUTk+cKq4DCfm1wGJJkjYCGuBuSZIeBGplWX4FKAUODV8TR6awovC/L+9i79FWZkt27rhS6hHegXCAf9Q8z7b6z7EaLXyl7ItMyjpz1UlBEIZW7Z4G3n11DxqthqtvLGPCpKGvJHkuzhjysiwrwNdO+XRN1O1biazAEYaIoqr8/fUaPq91MG1CNvcunY5W2x3wDm8Tj1U9xXH3SYqt47mn/ItkmWxxbLEgjEy7Pz/Bh+v3YjDquPamckaNG9gRm7EkNkMlGFVVWf1eLZXVdRQXWfnGinIMUWtr9zTt5Yld/6A95GXhqAu5qXQZBq34NgpCrG3/5Aib3z+AOU3PkpUzsPezZyXeRDokmNc3H+atrUcpyk3n2zdXYDZGvkWqqvL24Q945cB6dBott0+5kQWjLoxzawVh5FFVlU8+PMj2zUfIsJhYurKC7GGuJHkuRMgnkA8+P84LHx4g12rioZUzsaRHdqj6Qj6e2bOG7Y1VZJls3FN2B8W23qtsBEEYXoqi8vHb+9i9/QS27DSW3joDiy2xK7mKkE8Q22oaeHq9jCXdwEO3ziLHGvnBqW9v5LGqp6jz1DMpq5ivlH0RqzEx/ywUhFQWDiu8t66G2j0N5OZnsGTlDNIzEr9UiAj5BLDrYDN/fmUXJqOO79wyg8KOgmNVjt38fddz+MI+Lh2zkBsmXYdOG9viRoIgQDAY5q2XdnFkfzOFY6xce1NsK0meCxHycXbghIs/vFiFRhMpODah0IqiKrxx6F1eP/g2Bq2eu6bdypzC8+LdVEEYkfy+SCXJk8ecjC3J4arl02NeSfJciJCPoxMOD79Z/TmBUJj7byhnyvhsvCEvT+5+jirHHnLM2dxbfgfjLGPi3VRBGJHaPZFKko56NxOn2Ll86VR0pzleMxGJkI8Th9PLI6s+x+MLcfe1Uziv1M5JTz2PVT1JQ7sDKXsSX57+BTKNiXvVXhBSWZvTx6urduBs9jJ1RhEXXVXaY79KshAhHwcuT4BHVu2gpc3PLZdOYlHFKD5vqOKpPavwhwNcMe5iri+5Wsy/C0KctDS1s27VDtwuPzMvHMvcS0qStlyICPkY8/pD/Gb1Duqb27lm7jiunDOGl/e/wVuH38eoNfDl6bdzfsHMeDdTEEasxro21q3eia89yIUXF3PevPHxbtI5ESEfQ8FQmN+/sJPD9W0sqijimvlFPLrjCfY07yUvLZf7yu9kdGZRvJspCCPWiaOtvPF8FQF/mIuumsz0WYlRSfJciJCPkc6CYzVHWjm/1M7lCy38atvvafI1My1X4u5pt5Fu6PusVkEQht/h/U28uXYXqqJyxfVTmTwtcSpJngsR8jGgqipPviGzfZ+DqeOzOX9ugF9/9ieCSpCrJ1zOdcWL0WqS64q9IKSSfbvreW9dTVclyfETU+fAexHyMbDmg/1sqDrJ+MIMxs06ytM1GzDrTNxdficz7GXxbp4gjGi7tp/gozf3YjTpuOamckaNTbxKkudChPwwe2PzYdZ/coT8PB2Wsu18dOIABel27iu/k8KM1PhzUBCS1WebDvPJhwcxpxtYcktFwlaSPBci5IfRRztOsOaD/djsXrTS5+x3OanIm86d01aSpk/sokaCkMpUVWXzBwf4/JOjZFpNLFk5g+zc1LwmJkJ+mHwqN/Dk+hrSi+oIj6vCFVBYUnwVV024VMy/C0IcKYrKR2/uZc+Ok9hy0li6MvErSZ6LM4a8JEla4FFgBuAH7pFluTbq9muAHxE5GvBT4H5ZltXhaW5y2HOomT+/WoVpQg2q/TBGnZkvTbuNsryp8W6aIIxo4bDCu6/uYX9NI3kFmVx3S0VSVJI8FwMZUi4HzLIszwP+D/BI5w2SJFmA/waWyLJ8IZGzXhPrgMMY23e0hd+9shXd5C1o7IcZlVHI92Z/SwS8IMRZMBjmjReq2V/TSOEYG9ffNjPlAx4GNl2zEFgPIMvyZkmSZkfdNh+oAh6RJKkE+Issy41D38zkcLLJw8Nr30VTuhWN0c+s/Aq+OOVmzHpTvJsmCCOa3xfk9eerqDvmYlxJDlfeMB2DYWSUDRlIyFsBZ9THYUmS9LIsh4iM2i8FZgJu4GNJkjbJsrz3dE9ot6feFezGFi+/eutlQhN2oNXCF2bcwFJpcdLWuzidVPz+dUrlvsHI7J+7zc+LT31G/QkX02eOYvlts9DpR851sYGEvAuI/sppOwIeoAnYKstyHYAkSR8RCfzThnxjY9sgmpq4Wtzt/PSdvxMoOIRJY+a+GV9kak4pDoc73k0bcna7JeW+f51SuW8wMvvX5vTx6nM7cLZ4mTZrFAsXT6a5xROnFp6bwf6CHkjIVwJLgdWSJM0lMj3T6TOgTJKkPKAVmAs8PqiWJKmTriZ+Wfk4QWszGeTyi2seQOtL3Sv1gpAsWpo8vPrcTjxtfmbNG8eFFxWn5F/WZzKQkF8LLJYkaSORFTR3S5L0IFAry/IrkiR9H3iz476rZVmuHqa2JpwaRy1/3P4kislPTqiEH152NwWWPBp9qTtaEoRk0FjXxrpVO/F5g8y9pIRZc0fuwfdnDHlZlhXga6d8uibq9ueA54a4XQlUQYPQAAAgAElEQVRNVVU+OFrJ8/teRdVCgWc2/37tjeh1I+NCjiAkshNHWnn9+SqCgTAXX13KtJmj4t2kuBKboc5SIBzkn/ILbKn7DDVkpMC1kO8vXywCXhASwKFaB2+9tBtVUVm8bBqTpubHu0lxJ0L+LDR5W3i8+imOth1HcdvIa13Ad29ZiHGELMUShERW9dkx3nxxF1qthqtvKmNcSepUkjwXIuQHqKZ5H0/s+geeYDuhxtFkO8/nu1+YQ7pZfAkFId6qPzvOx2/vw2jUce3NFRSNscW7SQlDJNQZqKrKu0c/4qXa19GgIXBwGhbfJP71i+djHQG75QQhkamqymebjrDlo4NkZBq59uZy8gpSey/A2RIhfxr+cIB/7FnDpw07SNdm0FpdjjmUx0NfnEWeLS3ezROEEU1VVTa9v58dW46RaTVx19fnozCiy2b1SYR8Pxrbm3is6klOeOooMo/myCcSeiWNb982g9F5GfFuniCMaIqi8uF6mZqddWTlprN0ZQW59syU3uw1WCLk+7CrSeZvu57FG/IyK3s2n31gRw3C/TeXMXGUmOsThHgKhxTeeXU3B2QH9sJIJcm0dDF12h8R8lFUVeXNw++z7sCb6LQ6lo5dxhtvKPj9Qb66bDplxeJqvSDEUzAQ5s211Rw92ELRWBvX3lSO0SRi7HTEV6eDN+Tj6T2r2dFYTZbJxsqSW3l6bR1t7UHuuEpizlRxVJ8gxJPfF+S1NVXUH3cxfmIuVy6fhl4sXz4jEfJAvaeBP1c9RX17A5OzSrh10kr+sHovTS4/N1xUwqWzRse7iYIworW7/by6aifNjR4mT8vn0uumoNONnEqS52LEh/yOxl08tfs5fGE/l41dxNXjruQ3q6o44fCwePZYlswbH+8mCsKI5mr18upzO3C1+ph+3igWLZ48IguNDdaIDXlFVXj94Nu8cehdDFoDX5p2GzPzZvC753dw4ISLedMLWXn5JPHDJAhx1OzwsO65HXjcAc6bP445i0ZmJclzMSJDvj3o5cnd/6S6qYZcczb3lt/F6Iwi/vzKLnYdamHGxFzuvnYKWvHDJAhx03DSxWurd+Lzhph36URmXjg23k1KSiMu5E+463is6kkavU1MyZ7M3WW3k6FP5+k3ZbbWNFA6xsa/LC9DL+b7BCFujh9u4Y0XqgkFw1xyjcTUGUXxblLSGlEh/1nDTp7es5pAOMDicZdw/cSr0Wq0vPjRAT74/ARj8zP51k0VouCYIMTRwX0O3n5pF6oKi5dNY+IUUUnyXIyIkFdUhVf2r+ftIx9g1Bn5StkXOS+/AoC3th5l3cZD5Gel8eAtM0g3G+LcWkEYueTqOt5/rQadXss1K8oYW5wT7ybFhRIMEGxoJNhQT6ChnqDDgf3bXx/Uc6V8yLuDHv5W/Sw1Lfuwp+VyX/ldjMosBGBj9Umee3cftkwjD906E1umKc6tFYSRq2rbMTa8U4vRpOe6W8opHJ3au8uVQIBgY0MkyOvrCTY0RAK9oZ5QSwuop9ThESHf29G2Ezxe9SRNvhbKcqdw17TbSDdECot9vs/BE6/VkG7S89AtM7FniYJjghAPqqry6cbDbP34EGkZBpaunEFufma8mzUkFL+fYGNDV4gHG7sDPdTS3Odj9NnZpJVKGPLzMeYXYMgvwJg/+M2YZwx5SZK0wKPADMAP3CPLcm3U7b8FFgKdlYGWybLsHHSLhsiWus94tuYFgkqQayZcwbXFV6DVRC6m7j3ayp9erkav0/Dtm2cwJkV+oAQh2aiqysZ397Nz2zEsNjNLb63Alp0e72adFcXv755WaegM9MjH4dbWPh+jz8khbcrUjhDPjwR5QQGGPDta09DOKAxkJL8cMMuyPE+SpLnAI8CyqNvPB66SZdkxpC0bpLASZm3ta7x/bANmnYkvl99FhX161+1H6tv47fM7UBSVb95YwSRxuIAgxIWiKHzwxl7kqjqyc9NZcusMMi2JOWWq+LwEGiJTK13TKvX1BBoaCDv7CHKNBn12DulTp3WHeMeo3GC3ozXGrqDaQEJ+IbAeQJblzZIkze68oWOUPxl4TJKkAuCvsiw/MSwtHYC2gJu/Vj/DvtYDFKTn89XyOynI6L4yX9/Szq9X78DnD3Pv9dOomCgKjglCPIRDCm+/spuDex3kF1m49ubyuFeSDHu9vUI82Bh5P+zsY3JCo0Gfm0v61OmRqZWCjhDPL8Bgz0NrSIzKmAMJeSsQ3cOwJEl6WZZDQAbwe+DXgA54X5KkbbIs7zzdE9rtQ39yS23TIR759DGavC3MGT2Tr194Z9f8O0CT08tv1uzE5Qnw1RvKWbKwZMjb0Gk4+pdIUrl/qdw3SIz+BfwhVv1tKwf3OZgwKZeVd8/BNETHaJ6pfyGPB9/JOrwn6/CdPInv5MnI+ydOEuwryLVaTPY8LMUzMBcVYS4qJG1U5K25oACtIfFX4w3kK+sCor9y2o6AB2gHfivLcjuAJEnvEZm7P23ID3Vh/40ntrJq71rCSpilJVdz5fhL8LSG8HRcJvD4gvziH5/R0NzOsoXFXCjZh+1wAbvdktIHF6Ry/1K5b5AY/fN5g7y2ZicNJ9qYMCmXxcun4Wrzdl/ROwed/Qu3e3rNjQc7plrCbX28kFaLITeP9Oll3XPjnRc98+xo9D1jMgx4AE+rD/Cde8MHaLC/oAcS8pXAUmB1x5x8VdRtpcAqSZJmAVoiUztPDqolgxBSQjy/71U+Pr6JNH0a95XfxfRcqcd9/IEwv12zk+ONHi4/bwzXL5gQq+YJghDF4/azrqOSZOn0Ai69TkKrHdzO8rDHEwnxxs6LnXWcbGnCc/wEitvd+wFaLYY8O6bxE7rnxjunWHLzegV5KhlIz9YCiyVJ2ghogLslSXoQqJVl+RVJkp4GNgNB4ClZlncNX3O7Of0u/lL9NAechxmVUch95XdhT+85xx4KK/zxpSpqjzu5cFoBt4nqdYIQF9GVJMvPH82CK85c/C/sdnetG+9agtgxMlc8nl731+h06PPspBWXREK8oABjfj4GewGG3NyUDvLT0ainLrgffuq5/sl4wHmIv1Q9jTPQxvn5M/jC1Jsx6Xpe5FBUlcdf3c0nu+spL8nlmzeWx6QeTSL8STycUrl/qdw3iF//mhrdrFu1k3Z3gNkLxjN74QQ0Gg2qqqJEB3lDQ8eKlUigK+29gxydDoPdHrV+PL8r0EdJE3A0t8e8f7Fit1sGNUJNql9tqqry8fHNPL/vFRRV4YZJ13H52It6jQhUVeWfb+/jk931TBpj4+s3iIJjghAPdcedvL56J35/mPMnailu+oy6x9d3BHoditfb6zEavR5Dnh3D5Mk9gtyYX4A+NxdNP1M8Gp2oOdWXpAn5YDjIqr0vsenkVjIM6Xx5+heYkjO5z/u+UnmIdz87xhh7Bg/cVIFJFBwThGGjqiphl6tj6WFd17TKiYYAn+qmE9bomNqwkazaWjr3eGr0+sj6cWlKrw1B+uycfoNcOHtJEfItvlYer3qaw21HGWsZzb1ld5Kblt3nfd/ZdpSXNxwkz2bmwZUzyRAFxwThnEWC3NlrbrzzfcXXc5VJQ8Y4qgsvBjRckHaE8fMnYchf0DXNos/OFkEeIwkf8vta9vOX6mdwBz1cWHg+t0orMOr6Du5Nu+p49p19WDOM/OutM8kSBccEYcBUVSXsbO3a2dm5BLFzvlz1+3s9RmM0YrDnd4/GCwo44rNSvd0VqSR5YzljJlwWh94InRI25FVV5YNjlbxYuw6Am0uXcfHo+f1ekd+538ETr+0hzaTnwVtmkJ9k9S8EIRZURSHkdEbCu+siZ31XsKuBQK/HaIzG7vXjnYHesbtTb7P1GJHv3HqMyo21mMx6rrulgoJR1lh2T+hDQoZ8IBzg2ZoX2Fq/HYshk3vK72BSVnG/9993rJVH11aj1Wp44KYKxhXEf1efICQK//FjuDZ8zLG9e/CerOs7yE0mjAWFPYtldVz01NmyzrjcUVVVtm04xLbKw6RnGlmysoJcuyj8lwgSLuQd3mYer3qKY+4TTLCO497yO8gy9V9E7GiDm9+u2UkorPLNG8spHZsVw9YKQmIKu920bdmMs3ID/sOHANCazRgLi3qUsO3cEKSz2ga9h0RVVSrfqaXq0+NYbGauv20GVlG6O2EkVMjvad7L36qfxRNqZ8GoOdxcuhyDtv8mNrR6+fWqz2n3h7h3yTRmTMqLYWsFIbGo4TCeXdW4Nm7A8/l21FAItFoyKmZgXbCICZcvpKl1aLfhK4rC+6/L7K2uJ8eewZJbKshI0EqSI1VChLyqqrxz5ENe3v8GOo2W26UbWTD6wtM+xun288hz23F6Atx2xWTmlRXGqLWCkFj8J07g2rgB16aNXWVvjaNGYV2wCOvceehtkb9uI8W0hi7kQ6Ewb7+8m0P7msgfZeG6myswp4nVbIkm7iHvC/l5pmYN2xt2YjNaubf8Dopt40/7mHZfkEdW7aCx1cfS+RNYPHtsjForCIkh3O6hbesWXJUf4ztwAABtejq2Sy/DNn8hpgnFw1rCI+AP8cYL1Zw40sro8Vlcc2MZBmPc40ToQ1y/Kw3tDh6repKTnnom2ibwlbI7sJlOf9HUHwzz2+d3cqzRzaWzRrN8Uf8XZAUhlaiKQvue3bgqN+De/ilqMAgaDell5dgWLCJj5syY1DD3eYO8tnonDSfbKC7NY/H109DpxZr3RBW3kK927OHvu/+JN+Tj4jHzWTFpCfrTzL9DpODYn16qZt8xJ3Om5vOFxaWi4JiQ8gL1dbgqI9MxneeCGgoKsS1YiGXeAgzZfW8MHA7uNj/rVu2gxdGOVF7IJdeUDrqSpBAbMQ95RVV4/eDbvH7wHXRaHXdMvYW5RbMH8DiVv72+h537m5henMM9S6ah1YqAF1JT2OvFvW0LzsoN+Gr3AaBNS8N20cVYFyzCXDIx5gMcZ0ukkmSb00fF7DHMvzz2bRDOXsxD/v9VPsa24zvINmVxX/mdjLOOOeNjVFXluXf2sWlXPRNHWfnGDbGpKCkIsaQqCl65BufGDbg/3RZZz67RkD51OtYFC8icdf6QH/I8UE0NHZUkPQEuWDSB8+ePFwGfJGIe8tuO76A0exJfnn47FuPANkus23iIdz49xui8DB64eQYmoyg4JqSOYGMjzo0bcG3cQKipCQCDPR/rgoVY5y3AkBvfs4jrjjt5bXUVAX+IhVdMonz2mQdmQuKIech/ccYK5mRfgE47sKB+/7NjrP34ILnWSMGxTLFES0gBit9P27atuCo/xrtXBiK7Tq0LFmFdsJC0yYlxvenowWbWv1hNOKRw2ZIpSGKpctKJechfP2XxgA8u2LKnnmfe2osl3cC/3jqTbLHJQkhiqqri3bcXV+UG2rZtRfVH1qynSVOwzl+I5fzZaM3mOLey2/6aRt55ZTcaDVx1QxnFpWKzYTJK2IWt1QeaePzV3ZhNOh68ZSYFOaLgmJCcgk1NuDZV4qrcQLCxAQB9bi7WK6/COn8BRnt+nFvY254dJ/lwvYzeoOOaG8sYPT52K3iEoXXGkJckSQs8CswA/MA9sizX9nGf14CXZVn+33Nt1P7jTv6wtgqNRsO3bqxgfKEoOCYkF8Xvx739U1yVlbTX7AZVRWM0Ypk3H9uCRaSVSglbT33HlqNsfG8/JrOeJSsryC8SlSST2UBG8ssBsyzL8yRJmgs8Aiw75T4/A4bkV/3xRjf/s2YHoZDK/SvKkMaJEYSQHFRVxXdgP67Kj2nbuqXraLu0yaVY5y8gc/YcdGmJW7hLVVW2fnyITzceJiPTyJJbZ5CTlxHvZgnnaCAhvxBYDyDL8mZJknosapck6SZA6bzPQNjtfY/M65vb+c2anXh8Ib596ywuv2DcQJ8yofTXv1SRyv0bTN/8TU00vv8hDe+9j/f4CQCMubnkL7mW/MsuIW3UqCFu5eD11z9VUVn/UjWfbjxMdm46X/zqPLJzk2+KNJV/NgdrICFvBZxRH4clSdLLshySJKkMuB24Cfi/A33Rvi68Oj0BHn7mU5pdPlZeNomKCdlxOVn+XNntlqRs90Clcv/Opm9KMIBn+3acGzfQvqs6Mh1jMGCZMxfrgoWkT52GRqvFDbgT5OvVX//CYYX3X69h366GSCXJlRWElHDSfZ9T+WcTBv8LbCAh7wKin10ry3Ko4/07gdHAe8AEICBJ0iFZlgc8qgdo94X4zerPaWjxct288Vw1JzlH8EJqU1UV/6GDOCs30LZlM0p7OwDmkhKsCxZhuWAOuvTkmt4IBcO89fJuDtc2UTDaynU3l2MS5yKnlIGEfCWwFFjdMSdf1XmDLMvf63xfkqQfA3VnG/CBYJjfvbCTI/VuLp45ihUXlZzNwwVh2IWcrbg2b8JVuYHAieMA6GxZZF99Cdb5CzEl0HTM2Qj4Q7zxfBUnjjoZMyGbq1eUYRAbDVPOQEJ+LbBYkqSNgAa4W5KkB4FaWZZfOZcXDysK//vyLvYebWW2ZOeOK6WE2AAiCGoohHvHdlyVG/BUV4GioNHryZx9Adb5C8mYXoZGl7yB6G0P8NrqnTTWuSmR8rhiqagkmarOGPKyLCvA1075dE0f9/vx2bywoqr8/fUaPq91MG1CNvcunS4Kjglx5ztyGNeGj3Ft2YzidgNgGj8hUmJgzlx0mcl/bqnb5ePVVTtpbWpnSkUhF18tKkmmsrhshlJVldXv1VJZXUdxkZVvrCjHIEYRQpyE2ly0bd7E8S2b8Bw8BIDOYiV78VVYFyzENCZ1DqVpbW5n3XM7aHP5mTFnDPMuFZUkU11cQv71zYd5a+tRinLT+fbNFZjFiTJCjKmhEJ7qKpyVH+PZuQPCYTQ6HRmzzoscwFFWjkafWj+XdSecvPSP7Xg9QeZcVMx588aJgB8BYv5TvH7TIV748AC5VhMPrZyJJX34T7IRhE7+Y0cjB3Bs3kS4zQWAccxYbAsWUnzdYloDqfcXpc8b5MDeRja/fwC/L8SiKydTdt7oeDdLiJGYh/yjL+wgM83AgytnkmNNnGJMQuoKu924tmzGVbkB/+FDAGgzM8m6fDHWBQsxj4ucKWywWSBF1ll72vwc3OfggNzIiSOtqCpotRouXzqV0ukF8W6eEEMxD3mzUceDK2dQlJtc64mF5KKGw3h2VePauAHP59tRQyHQasmomIF1wSIyKmagNaTWenBXq5cDsoMDexupP+7q+nz+KAslkp0L5hUTUsJxbKEQDzEP+T//nysI+YOxfllhhPCfOIFrY+Q81LCzFQDjqFGROu1z56G3ZcW5hUNHVVVamto5KDdyQHbgaIisBtJoYNS4LEqkPIon55HZ8Rdzdm56Su8IFfoW85DPtpppbBQhLwydcLuHtq1bcFV+jO/AAQC06enYLr0M2/yFmCYUp8wFRlVVcdS7OSA3ckBupLU5UgRNq9UwriSHEsnOhMm5pIlrXUKH1Fo+IIwYqqLQvmc3rsoNuLd/ihoMRs5DLSuPrI6ZOROtITWCTlFU6o87u6Zi3C4/AHq9luLSPEokO+Mn5mIyi//OQm/ip0JIKoH6usjqmE0bCbU0A2AoKMS2YCGWeQswZKdGaepwWOHEkVYOyI0c3OfA64n89Ws06SidXkBxaR5jS3IwGJJ3160QGyLkhYQX9npxb9uCs3IDvtp9AGjT0rBddDHWBYswl6TGhp5QMMzRQy0ckBs5tK+JgD9SB9CcbmDqjCJKpDxGj89Gp0u9ZZ7C8BEhLyQkVVHwyjU4N27A/ek21EAgMh0zdTrWBQvInHU+WlPyn/kb8Ic4cqCZA3Ijh/c3EQoqAGRYTEhlBZRIdgrH2ETJD2HQRMgLCSXY2Ihz4wZcGzcQamoCwGDPj9SOmbcAQ25unFt47nzeIIf2OTggOzh2qJlwWAXAlp0WWRFTaie/yJISf50I8SdCXog7xe+nbdtWXJUf490rA6AxmSLLHhcsJG1yadIHnsft5+DenpuTAHLsGZRIdkpK88ixZyR9P4XEI0JeiAtVVfHu24urcgNt27ai+n0ApElTsM5fiOX82WjNyb0junNz0sG9jdRFb04qimxOKi7NIysn+Y7YE5KLCHkhpoJNTbg2VeKq3ECwsQEAfW4u1iuvwjp/AUZ7fpxbeG5aHJ7IGva9Dhz1UZuTxtq6gj1TlPMQYkiEvDDsFL8f9/ZPcVVW0l6zO3IeqtGIZd58bAsWkVYqoUnSeuY9NiftddDaFDkSUKvVMLYkhxIpjwmT8kjPSI01+0LyESEvDAtVVfEd2I+r8mPatm5B8UZ2ZqZNLsU6fwGZs+egS0uLcysHR1VV6o67ImvY5Uba+tyclCPOShUSwhlDXpIkLfAoMAPwA/fIslwbdfv9wJcAFfh/siyvHp6mCskg2NJC26ZKnBs3EKyrA0CfnUPWZVdEpmMKCuPcwsEJhxVOHm1lv+zg0F4H7Z4AENmcNHl6PiWldsYW54gzUoWEM5CR/HLALMvyvI6DvB8BlgFIkpQH/AswCzADuyVJWiPLsjpcDRYSjxIM4Nm+HefGDbTvqo5MxxgMWObMxbpgIelTpyXldEwoFObowRYOyo0cqm3C7+vYnJQW2ZxUXJrHmPHZ4mxUIaENJOQXAusBZFneLEnS7M4bZFl2SJI0U5blkCRJEwCfCPiRQVVV/IcO4qzcQNuWzSjtkbloc0kJ1gWLsFwwB1168pWTjt6cdORAM8FApDRvhsVI6fTRFJfmUTTWJs5EFZLGQELeCjijPg5LkqSXZTkE0BHw3wB+AvxuIC9qt1vOuqHJJJX7F2hpIbDhIxree5/2I0cBMGRnU3T1leRfdinpY8fEuYVnz9seQK6u591X9rB/byPhUGTXaXZuOlMriphSXsTosVloUmDXaSr/bELq928wBhLyLiD6K6ftDPhOsiz/QZKkx4A3JEm6VJbl90/3hKlc09put6RE/1RFIdTaSrChnkB9HcH6evzHj9G+ZzcoChq9nszZF2Cdv5CM6WVodDo8gCdJ+t7u9nNgr4ODex0cP9zSc3NSx8XT6M1JjiZ3HFs7NFLlZ7M/I6F/gzGQkK8ElgKrO+bkqzpvkCRJAh4GbgSCRC7MKoNqiRBzqqoSdjoJNNQTrK8jUF/fEer1BBsbIvViTpExcSIZF87DMmcuuszMOLR68Fyt3siu072N1B3rvTnp/LnjURCzjUJqGUjIrwUWS5K0EdAAd0uS9CBQK8vyK5Ik7QA2EVld84Ysyx8OX3OFs6WqKuG2NoL19QQa6jre1ne8bejaaRpNYzJjLCzCkF+AsaCg+21BAUUlo5NqtNTS5InUYZcbuzYnARR1bk6anIfFFtmclGvPTKq+CcJAnDHkZVlWgK+d8umaqNt/QmQ+XoijsNsdmVbpHIlHve1cox5NYzRisOdjLCzsFeY6qy1pa6h0bU7a28hB2UFL9Oak4uyOk5PE5iRh5BCboZJIuL39lBDvDnXF4+l1f41ejyE/H4M0pSPEC7vCXJ+VlZTLGvuiqir1HZuTDux10OaM/HXStTmpNI/xk3LF5iRhRBIhn2AUn697OuWUkXm4rY+pBJ0Og91O2sRJGAoKMeZHplWMBQXos3NSJshPFdmc5Ow6OandHbl+YDDqmDwtn+JSO+NKxOYkQRAhHweK30+woaGPOfJ6wk5n7wdotRhy8zCNL+6aG+8Mc0NOLhrdyAiyUCjMsUMtHJAdHNrniNqcpGdKRSElkl1sThKEU4iQHyZKMECwsbFXiAfr67vOJu1Bo0Gfm0v6tOk9QtyYX4ghLw+NfmR+q4KB6JOTojYnZRqZfN5oSiSxOUkQTmdkJscQUUMhgo7GyHRKR4jXtzjwHDtOqLmZrsXXUfQ5OaRNmRp1oTNy4dNgt6M1iDljAL8vyMF9TRyUGzl6sPvkJGuWmemzRlFcmkfBKGvSXhwWhFgSIX8GajhM0OGImhvvXk8ebGoCpfe2AF1WFmmTS7tH4h1z5AZ7PlqjWNXRl3a3n4MdR+KdONKKokSCPTsvvePkJDu5+eLkJEE4WyLk6djd2dzUa+lhoL6OoMMB4XCvx+gsVswlE6MudEZWrhRNm0hzWzAOvUg+bU5f14qYumPd1yLshZaus06zc8XJSYJwLkZMyEe26bf0OUcebGxADYV6PUabmYl5/ITec+QFBf3WQteZzSBCvl8tTe2RFTF7G2msi9qcNKb75KTOzUmCIJy7lAr5yDb91h5z5F1v+9mmr01PxzhmbM+dnR3ryXUZyVdFMdGoqkpTgzuy63RvIy0OsTlJEGIp6UK+z236nevJGxpQ/f5ej+ncpt+9/LCwa3SuzcwU87xDTFVV6k90bE6Suzcn6fRaiifnUSzlMUFsThKEmEjYkB/UNv1Ta62kwDb9ZKEoCieOOCPlBPb23Jw0aVo+JaV5HZuTEvZHThBSUlz/x4XbPT2nVaLCXGnvb5t+AYYp0XPkBRgKCiPb9EWQx1Q4pHRsTmrkUK0Dn/eUzUmldkZPyEKvHxmbtQQhEcU85Pf99ve4Dh8jWF9P2N33Nn2jPR/D5Mk9Vq4Y8gvQZ2en7Db9ZBHwh9hf08AB2cHh/U1dm5PSM42UnTeK4lI7o8aJzUmCkChiHvIN730Q2aafZ8c0IbJNPzJXHqm7os/NFUEeZ4qi4GkL4G7z43b5cLf58bj8OFu8nDjSSqjj5CSLzcy0maMokcTmJEFIVDEP+fP+94+0YRqx2/TjTVVVvJ7OAO/41+br8X67O9DXZl0A7AWZjJuYS4mUR26+uGgtCIku5kmbVlSIWxzMMCxUVcXvC/UM7o5ReJsr8rHH7UcJ953gGg1kWEwUjLaRaTWRaTF1vDVH3lpNjBufKw7WEIQkIobTSSQYCHUFd2Tk7ev1fijY/+mL6ZlG8vIzewV3hsVEptVMeoYRbQocVi0IQjcR8gkiHFJ6zIH3DnE/AX/vXbmdzGl6srLTybCaokbh5m0UTd0AAAljSURBVK7ReIbFhE4nrnUIwkhzxpCXJEkLPArMIHJQ9z2yLNdG3f4d4NaOD1/vOA5QiKIoCu3uQNQo3NdrPtzb3n8pBINRR6bVRMEoS4/gjoR3ZERuMIhlioIg9DaQkfxywCzL8jxJkuYCjwDLACRJKgG+AFwIKMAGSZLWyrK8c7ganGhUVcXbHuwK7gN7Gqk76ewxrdLu9vd7IVOn05BpNZOdl9EV3F1TKR1hbjTpxQVOQRAGZSAhvxBYDyDL8mZJkmZH3XYUuFqW5TCAJEkGwDfkrYwTVVUJ+ENdo+62qOWEXVMpbQO5kGntFdyZ1sjH5jSDCHBBEIbNQELeCkSfSReWJEkvy3JIluUg4JAkSQP8N7BdluW9Z3pCu90yuNYOsYA/hKvVi7PV1/HWi6vrnw9nq7drs09fMi0mCkfZsGaZsWWlYe34Z8tOw5plJtNiTskLmYny/RsOqdw3EP0biQYS8i4g+iunlWW56wqgJElm4AmgDfj6QF40FkvwwiEFjztqBN61nLD7/c4zQvtiMuu7gjrz1IuZp7mQabdbaGxswx8I4W9y9/HMya2zf6kolfsGon/JbrC/wAYS8pXAUmB1x5x8VecNHSP4l4H3ZFn+5aBaMAiKotLu9ve+kNnWfTHT6znDhUyLifwiS9fyQUv0hUyLCYNRXMgUBCH5DSTk1wKLJUnaCGiAuyVJehCoBXTAxYBJkqRrOu7/fVmWNw22QadeyOwMbk/05p6201/IzLCYyB6XERl528SFTEEQRq4zhrwsywrwtVM+XRP1/lkd4+PzBmlqcPexnT4yIve0+bsObj5V14XMUdbui5dRFzIzLCbS0sWFTEEQhE4x3wz1qx+u7/e2tAwDOfbM3ksJO0bh6ZlGUd1QEAThLMQ85EunFWAw6XpfyMw0odOLABcEQRhKMQ/5W78yJ6WvgAuCICQSMXQWBEFIYSLkBUEQUpgIeUEQhBQmQl4QBCGFiZAXBEFIYSLkBUEQUpgIeUEQhBQmQl4QBCGFadT+Kn0JgiAISU+M5AVBEFKYCHlBEIQUJkJeEAQhhYmQFwRBSGEi5AVBEFKYCHlBEIQUJkJeEAQhhQ3boSGSJGmBR4EZgB+4R5bl2v/f3r3HSlVdcRz/EgVNeFStprZitSbyC7ExRWsBe6MoEo3vR/+Rig9iKzHWYFONUl8xGmLUCO1fQqQiUcF30RpbBBsExAdUC8b8EjVN01KrGB8ggiL4x943HK935r7mwTmuT3LDnbPnnlmLlbtmz547+xTGfwVcBmwHbrX9dLNiabRe5DYb6AA6r45ylu2PWx7oAEkaC9xue0KX42cAN5JqN8/23DaEN2B18rsKuBR4Px+6zLZbHF6/SRoMzAMOBfYi/X4tLoyXtn69yK3stdsDmAsI2AlMs72+MN7n2jXzylBnA3vbHi9pHHAXcFYO9EDgSuCnpAuBr5C0xPa2JsbTSDVzy44GTra9sS3RNYCka4ApwKddjg8G7gaOyWMrJS22/f/WR9l/tfLLjgYutL2mtVE1zAXAB7anSNoPeA1YDJWoX83csrLX7gwA2z+XNAG4jV19s1+1a+ZyTQfwbA54Namhd/oZsNL2tjzDfQs4somxNFrN3PIs/3BgjqSVkqa2J8QBexs4t5vjo4G3bH9o+3NgBXBcSyNrjFr5QWoU10laIem6FsbUKI8AN+TvB5FmfZ3KXr96uUHJa2f7SeDX+eYhwEeF4X7VrplNfgRQXKL4UtKeNcY2Ad9pYiyNVi+3ocAfSTOOU4DLJZXpCQwA248BX3QzVPbaAXXzA1gITANOBDoknd6ywBrA9mbbmyQNBx4Fri8Ml7p+PeQGJa8dgO3tkuaT+sgDhaF+1a6ZTf4TYHjxsWxvrzE2nK8/Y+3u6uW2BZhte4vtTcAy0tp9VZS9dnVJGgTMsr0xz5b+Aoxpc1h9Julg4Hlgge0HC0Olr1+t3KpSOwDbFwGjgLmShubD/apdM9fkV5LWlx7O69brCmMvA7dJ2pv05sloYP03T7HbqpfbKGCRpDGkJ9EOYH7rQ2yaN4HD83roZtLLxTvbG1JDjQDWSxpNWvc8kfRGX2lI+h7wN+AK20u7DJe6fj3kVoXaTQFG2p5JmjDuyF/Qz9o1s8k/AUyStIq0dnaJpN+S1pQWS/oD8AKpEf7e9tYmxtJoPeW2AFhNWg643/YbbYy1ISRNBobZnpNz/SupdvNs/7e90Q1cl/xmkGaK24Cltp9pb3R9NgPYF7hBUuf69VxgaAXq11NuZa/d48CfJC0HBgPTgXMk9ft3L7YaDiGECosPQ4UQQoVFkw8hhAqLJh9CCBUWTT6EECosmnwIIVRYNPkQuiGpT392JukWSWfm759vTlQh9F0z/04+hG8N2zcWbk5oVxwhdBVNPpSGpJnAL4CNwP9Iuw9em29vJW04di8wEvgBsBy4EDie9CGaLaRPV68DJuf7/N32ofn8NwPYvrnwmAflc+4DfB94yPa1ki4GLgL2B57qPBdwVP65l4A5wETbk/Oxm4Cttm9v6H9MCHXEck0ohbyPdgdwBHAqu/YkEXCB7ZOA04DXbI8n7QQ6ntx0gWOBK0hN/ofAyb186PNJjX0caafUyyXtn8dGAmNsz+i8s+0r879jgUXAREnD8r4qvwQW9DX3EAYimnwoi0nAw7Y/t/0h8GQ+/p7tfwHYfghYImk6aQe/7wLD8v3W2/6P7R2kPUD2682D2r4T+Lek3wGzgSGknUYB1hY2puvuZzcDzwDnkZ6g3ra9obcJh9AIsVwTyuJLup+UfNb5jaTfkJZz5gDPAT8m7S0EaTmn0858fGdhHNJeIV/bfljSXcBhwIOkJ5aTCj/zGT2bR9oO9x3gvl7cP4SGipl8KIslwHmShkgaAZxOuqhC0STgHtsPkBr4T4A96pzzI2BfSQdI2ou0/39Xk4A7bD8CHAwc1MM5oXB9AdsvkJZ1TmDXq48QWiaafCiFvJvgcuAfpH3CN/DNmfQs4CZJa0nX4F0F/KjOOT8G7gBeIc38X+7mbjOBBZLWAFcDr9Y7Z/Zn4PW8lTaknQWXlejylqFCYhfKUAqSxgOjbM/P17p8EZhq+59tDq2m/GbrENKrkOm217Y5pPAtFDP5UBYGzpf0OrAWWLg7N/jsQOBdYHU0+NAuMZMPIYQKi5l8CCFUWDT5EEKosGjyIYRQYdHkQwihwqLJhxBChX0FklhOEgSWEzEAAAAASUVORK5CYII=\n", - "text/plain": [ - "<matplotlib.figure.Figure at 0x10d5d3828>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "df.groupby(['granularity'],as_index=False).mean()[['granularity','c1_val','c2_val','c3_val','c4_val']].plot.line(x=\"granularity\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Selon la granularité et le type" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T21:37:32.047610Z", - "start_time": "2018-04-17T21:37:31.978664Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>granularity</th>\n", - " <th>type</th>\n", - " <th>c1_val</th>\n", - " <th>c2_val</th>\n", - " <th>c3_val</th>\n", - " <th>c4_val</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>12</th>\n", - " <td>3.0</td>\n", - " <td>extension_1</td>\n", - " <td>0.909091</td>\n", - " <td>0.781818</td>\n", - " <td>0.490909</td>\n", - " <td>0.727273</td>\n", - " </tr>\n", - " <tr>\n", - " <th>14</th>\n", - " <td>3.0</td>\n", - " <td>gen_region</td>\n", - " <td>0.927273</td>\n", - " <td>0.781818</td>\n", - " <td>0.472727</td>\n", - " <td>0.727273</td>\n", - " </tr>\n", - " <tr>\n", - " <th>15</th>\n", - " <td>3.0</td>\n", - " <td>normal</td>\n", - " <td>0.960000</td>\n", - " <td>0.800000</td>\n", - " <td>0.540000</td>\n", - " <td>0.740000</td>\n", - " </tr>\n", - " <tr>\n", - " <th>13</th>\n", - " <td>3.0</td>\n", - " <td>gen_country</td>\n", - " <td>1.000000</td>\n", - " <td>0.933333</td>\n", - " <td>0.577778</td>\n", - " <td>0.911111</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " granularity type c1_val c2_val c3_val c4_val\n", - "12 3.0 extension_1 0.909091 0.781818 0.490909 0.727273\n", - "14 3.0 gen_region 0.927273 0.781818 0.472727 0.727273\n", - "15 3.0 normal 0.960000 0.800000 0.540000 0.740000\n", - "13 3.0 gen_country 1.000000 0.933333 0.577778 0.911111" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d_pc=df.groupby(['granularity','type'],as_index=False).mean()[['granularity','type','c1_val','c2_val','c3_val','c4_val']]\n", - "df_is_pareto = d_pc.apply(lambda row: is_pareto_front(d_pc,row, ['granularity','c1_val','c2_val','c3_val','c4_val']), axis=1)\n", - "df_pareto = d_pc.ix[df_is_pareto].sort_values(by=['granularity','c1_val','c2_val','c3_val','c4_val'])\n", - "\n", - "df_pareto" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T22:36:27.688362Z", - "start_time": "2018-04-17T22:36:26.308894Z" - } - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAA2gAAAOyCAYAAAAGs/Z0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzs3X+UnXV9L/p3kqmJsZOawpyLWoplId/DKgW9stpAw1GxqFBpUo/r2KLHAjf8aMmqSzxSpFbAIijctJ7bIxYNiPa0crCKYFug9mCLBLn9pRVFvtz0VLG26OAaSVogYZK5f8ykaw4mmT07Mzvfeeb1Wsu1Zu/n2c/zee8ZWLz9PvvZSyYmJgIAAMDBt/RgDwAAAMAkBQ0AAKARChoAAEAjFDQAAIBGKGgAAACNUNAAAAAaoaABAAA0QkEDoDNKKUtKKTeVUv7LPB3/rFLKH83HsQEgUdAA6IhSyjFJ/meS/3SwZwGAfg0d7AEAYLZKKeckeVuSXUkeS/JLSS5M8pEkj/R4jPOS/Fyt9bVTj/99Jgvej04d7/wkz0ryw0neW2v94BzHAIDvYwUNgAWllHJ8kvcleU2t9bgktyf59Vrrxlrr783iUB9PsraUctjU47MzWfCeneTcJKfXWl+S5A1JrpmzAACwH1bQAFhoXpnkrlrrN5Ok1vr+fg5Sa91eSvnDJG8qpfx2kjclWVtr/ZdSymuT/Gwp5UVJXpzkB+dodgDYLytoACw040km9jwopTx76vLEfmxO8uYkr0nyYK31H0opP5LkS0mOSHJvknce4LwA0DMFDYCF5nNJfqaU8rypx+enz0sQa633J1mS5F1JPjz19AlJRpNcWWu9K8mez6gtO5ChAaAXChoAC0qt9YEkb09yZynl7zK5+nXBARzyw0mOTPLpqcd/muQfk9RSyhczedOQ0SRHHcA5AKAnSyYmJmbeCwAAgHnnJiEAdFYp5e1J3riPzdfWWn9/kPMAwEysoAEAADTCZ9AAAAAaoaABAAA0YuCfQRsd3T6QaypXr16ZsbEnBnGqgelaJnna1rU8SfcyydO+rmWSp31dyyRP+7qWaVB5RkaGl+xrW2dX0IaGuvd1NV3LJE/bupYn6V4medrXtUzytK9rmeRpX9cytZCnswUNAABgoVHQAAAAGqGgAQAANKKnglZK+alSyp/v5fkzSil/VUr5Qinl3DmfDgAAYBGZsaCVUi5OsjnJimc8/wNJfjvJq5K8LMl5pZT/Yz6GBAAAWAx6WUH7+ySv28vzxyTZWmsdq7XuTHJvkv8wl8MBAAAsJjN+D1qt9ZOllBfuZdOqJI9Pe7w9yQ/NdLzVq1cO7PaVIyPDAznPIHUtkzxt61qepHuZ5Glf1zLJ076uZWo1zxlvu20g5/nMpnUDOc+BaPV31K+DnedAvqh6W5Lp0w8n+d5ML+rli9/Oee/d/U+1FzdecsqcHu9gGRkZzujo9oM9xpyRp21dy5N0L5M87etaJnna17VMXcvTj9bzd+13NKg8+yuBB3IXx68leVEp5YdLKc/K5OWNXziA4zXnq1/9SjZuPG/OjnfDDdfn05/+wzk7HgAA0C2zXkErpZyZ5AdrrR8qpVyU5K5MFr0ba63fmusBD5bf//2P5q67/iQrVjz7YI8CAAAsEj0VtFrr15Osmfr5D6Y9/5kkn5mXyQZox46nctVVV+TRRx/N008/nYsuujgveMGP5D3vuTa/+Zvv2u9rP/GJm7N9+7acc8552blzZ8466xfz0Y/enBtuuD4PPfRgtm17PEcddXQuvfSyAaUBAAAWKl9UneTTn/5kDjvs+bn++o/kiiuuyoMPfiUvf/krMzQ0c3999atPz913/1kmJiZy77335KSTTs7OnTsyPDyc97//umze/Hv56lcfyOjodwaQBAAAWMgO5CYhnfHII9/ImjUnJUkOP/xHc/jhZ/b82lWrVuXoo0u+/OUv5Y47PpONG9+a5ctXZGxsLJdddmlWrlyZJ598MuPj4/M1PgAA0BEKWpIjjvixfO1rD+bkk1+eb33rH/PhD38wl1/+np5ff8YZ63PLLX+QHTt25IgjXph77/2LfOc738673311xsbGcs89n8vExMQ8JgAAAKab6zvD78tc3zG+yYI2FyFnc4vMdetel6uvfnc2bjwvu3btylve8rZZneslL3lprrnmPXnzm89JkhxzzI/npptuyIUXnpslS5bk+c9/QR57bHTWGQAAgMWlyYI2aMuXL9/ritnznvf8fOhDN/V0jI9//FP/9vMhhxyazZs/9n37HHfci/ueEYCDa6H+P7EALCwKWo9uu+1T+exn7/y+5y+4YGOOPfa4gzARAADQNQpaj9ate13WrXvdwR4DAADoMAUNAABwKXcjfA8aAABAIxQ0AACARrjEERaBQV2ykLhsATg4XJoFdIWCBgDAvFOioTcucQQAAGiEggYAANAIBQ0AAKARPoMGANAgn9mCxUlBA2iA/xADABKXOAIAADRDQQMAAGiESxwBmHMu2QSA/lhBAwAAaISCBgAA0AgFDQAAoBEKGgAAQCMUNAAAgEYoaAAAAI2Y8Tb7pZSlSa5LcnySHUk21Fq3Ttv+tiRnJtmd5Kpa663zNCsAAECn9fI9aOuTrKi1nlhKWZNkU5J1SVJKeW6StyQ5KslzknwpiYIGAI3zXXUAberlEse1Se5Mklrr/UlOmLbtX5N8I5Pl7DmZXEUDAACgD72soK1K8vi0x7tKKUO11vGpx99M8mCSZUmunulgq1evzNDQslkP2o+RkeGBnGeQupZJnu5p/T1ofb751rX8XcuTdC+TPO3rWiZ52te1THOdp5eCti3J9LMunVbOTkvyvCQ/NvX4rlLKllrrX+7rYGNjT/Q16GyNjAxndHT7QM41KF3LJE83tfwe+B21/fvpR9fyJN3LJE/7upZJnvZ1LVM/efZX6nq5xHFLktOTZOozaA9M2zaW5MkkO2qtTyX5XpLnznpCAAAAelpBuzXJqaWU+5IsSXJ2KeWiJFtrrbeXUn4myf2llN1J7k3y2fkbFwAAoLtmLGi11t1JLnjG0w9N235ZksvmeC4AAIBFxxdVAwAANEJBAwAAaISCBgAA0AgFDQAAoBEKGgAAQCMUNAAAgEYoaAAAAI1Q0AAAABqhoAEAADRCQQMAAGiEggYAANAIBQ0AAKARQwd7AIB+nPPeuwdynhsvOWUg5wEASKygAQAANENBAwAAaISCBgAA0AgFDQAAoBEKGgAAQCMUNAAAgEYoaAAAAI1Q0AAAABqhoAEAADRCQQMAAGiEggYAANAIBQ0AAKARChoAAEAjhmbaoZSyNMl1SY5PsiPJhlrr1mnbT0tyWZIlSf4myYW11on5GRcAAKC7ellBW59kRa31xCSXJNm0Z0MpZTjJtUleW2v9qSRfT3LoPMwJAADQeb0UtLVJ7kySWuv9SU6Ytu2kJA8k2VRK+XySb9daR+d8SgAAgEVgxksck6xK8vi0x7tKKUO11vFMrpa9IsmLk/xLks+XUr5Qa314XwdbvXplhoaWHcjMPRsZGR7IeQapa5nk6Z6uvQfytK1reZLuZZKnfV3LJE/7upZprvP0UtC2JZl+1qVT5SxJvpvkr2qtjyZJKeWeTJa1fRa0sbEn+hx1dkZGhjM6un0g5xqUrmWSp5u69h7I07au5Um6l0me9nUtkzzt61qmfvLsr9T1conjliSnJ0kpZU0mL2nc42+THFtKObSUMpRkTZIHZz0hAAAAPa2g3Zrk1FLKfZm8U+PZpZSLkmyttd5eSnlHkrum9r2l1vqVeZoVAACg02YsaLXW3UkueMbTD03bfnOSm+d4LgAAgEXHF1UDAAA0QkEDAABohIIGAADQCAUNAACgEQoaAABAIxQ0AACARihoAAAAjVDQAAAAGqGgAQAANEJBAwAAaISCBgAA0AgFDQAAoBEKGgAAQCMUNAAAgEYoaAAAAI1Q0AAAABoxdLAH6MU57717IOe58ZJTBnIeAACAvbGCBgAA0AgFDQAAoBEKGgAAQCMUNAAAgEYoaAAAAI1Q0AAAABqhoAEAADRCQQMAAGjEjF9UXUpZmuS6JMcn2ZFkQ6116172+eMkt9Vaf3c+BgUAAOi6XlbQ1idZUWs9McklSTbtZZ8rk6yey8EAAAAWm14K2tokdyZJrfX+JCdM31hKeX2S3Xv2AQAAoD8zXuKYZFWSx6c93lVKGaq1jpdSjk1yZpLXJ3lXLydcvXplhoaWzX7SARgZGT7YI8xoIcw4G/J0T9feA3na1rU8SfcyydO+rmWSp31dyzTXeXopaNuSTD/r0lrr+NTPb07ygiR3J3lhkp2llK/XWve5mjY29kSfo86/0dHtB3uE/RoZGW5+xtmQp5u69h7I07au5Um6l0me9nUtkzzt61qmfvLsr9T1UtC2JDkjyS2llDVJHtizodZ68Z6fSymXJ3l0f+UMAACAfeuloN2a5NRSyn1JliQ5u5RyUZKttdbb53U6AACARWTGglZr3Z3kgmc8/dBe9rt8jmbqvHPee/fAznXjJacM7FwAAMCB8UXVAAAAjVDQAAAAGtHLZ9BgRoO6bNMlmwAAdJkVNAAAgEYoaAAAAI1Q0AAAABqhoAEAADRCQQMAAGiEggYAANAIBQ0AAKARChoAAEAjFDQAAIBGKGgAAACNUNAAAAAaoaABAAA0QkEDAABohIIGAADQiKGDPQC06Jz33j2Q89x4ySkDOQ8AAAuDFTQAAIBGKGgAAACNUNAAAAAaoaABAAA0QkEDAABohIIGAADQCAUNAACgEQoaAABAI2b8oupSytIk1yU5PsmOJBtqrVunbX9rkl+YevgntdYr5mNQAACArutlBW19khW11hOTXJJk054NpZQjk7wxyUlJ1iR5VSnluPkYFAAAoOt6KWhrk9yZJLXW+5OcMG3bN5O8pta6q9Y6keQHkjw151MCAAAsAjNe4phkVZLHpz3eVUoZqrWO11qfTvJYKWVJkmuTfLHW+vD+DrZ69coMDS3rf+J5NDIyfLBHmHNdyyRP+7qWSZ62dS1P0r1M8rSva5nkaV/XMs11nl4K2rYk08+6tNY6vudBKWVFkhuTbE/yKzMdbGzsidnOODCjo9sP9ghzrmuZ5Glf1zLJ07au5Um6l0me9nUtkzzt61qmfvLsr9T1conjliSnJ0kpZU2SB/ZsmFo5uy3J39Vaz6+17pr1dAAAACTpbQXt1iSnllLuS7IkydmllIuSbE2yLMnLkiwvpZw2tf87aq1fmJdpAQAAOmzGglZr3Z3kgmc8/dC0n1fM6UQAAACLlC+qBgAAaISCBgAA0AgFDQAAoBEKGgAAQCMUNAAAgEYoaAAAAI1Q0AAAABqhoAEAADRCQQMAAGiEggYAANAIBQ0AAKARChoAAEAjFDQAAIBGKGgAAACNUNAAAAAaoaABAAA0QkEDAABohIIGAADQCAUNAACgEQoaAABAIxQ0AACARihoAAAAjVDQAAAAGqGgAQAANEJBAwAAaISCBgAA0IihmXYopSxNcl2S45PsSLKh1rp12vZzk5yfZDzJlbXWP5qnWQEAADqtlxW09UlW1FpPTHJJkk17NpRSDkvyq0l+Osmrk1xdSlk+H4MCAAB0XS8FbW2SO5Ok1np/khOmbfvJJFtqrTtqrY8n2ZrkuDmfEgAAYBFYMjExsd8dSimbk3yy1nrH1ONHkhxZax0vpbwpyU/UWn9tatvHknys1vpn8zw3AABA5/SygrYtyfD019Rax/exbTjJ9+ZoNgAAgEWll4K2JcnpSVJKWZPkgWnb/jLJyaWUFaWUH0pyTJKvzPmUAAAAi0AvlzjuuYvjcUmWJDk7k4Vta6319qm7OJ6XybJ3Va31k/M7MgAAQDfNWNAAAAAYDF9UDQAA0AgFDQAAoBEKGgAAQCMUNAAAgEYoaAAAAI1Q0AAAABqhoAEAADRCQQMAAGiEggYAANAIBQ0AAKARChoAAEAjFDQAAIBGKGgAAACNUNAAAAAaoaABAAA0QkEDAABohIIGAADQCAUNAACgEUMHewAAmAullDcleXuSiSRPJPnVWutfz/E5Xp7kv9Vaj53L4wLAHlbQAFjwSiklybVJXlNrfXGSK5N86uBOBQCzZwUNgAWnlHJOkrcl2ZXksSSXJdlQa/3nqV3+OslhpZRn1Vp37uMYr0qyqdb6E1OPn5vkH5IcmeSnk1ya5FlJ/l2Sj9Zaf2MeIwFAEitoACwwpZTjk7wvk6tlxyW5Pckba61/PLV9SZLfSnL7vsrZlM8m+cFSyglTj38xyR8n+V4my98v1VpPSLImyTtKKYfOSyAAmMYKGgALzSuT3FVr/WaS1Frfv2dDKeU5SW5KcniS1+zvILXWiVLKDUnOyuSK29lJLp56/owkry2lnJnkmCRLkjxn7qMAwP9OQQNgoRnP5I1AkiSllGcnOSKTNwb5TJKvJXlFrfXJHo71kSRfLKVsTvLcWuufT5W8Lya5Ncnnk9yYZH0mSxoAzCuXOAKw0Hwuyc+UUp439fj8JNck+Yskn6q1/kKP5Sy11m8l+X+TXJ9k89TTL0qyKsk7a62fSfKyJMuTLJu7CACwd1bQAFhQaq0PlFLenuTOyZs35p8zudL1s0l+vpTy89N2f2Wt9bszHPLDSf4wyc9NPf5ykj9K8lAp5XtJtiZ5MMlRSXbMWRAA2IslExMTM+8FAADAvLOCBkBnlVLemMkvr96b36+1XjvIeQBgJlbQAAAAGuEmIQAAAI0Y+CWOo6PbB7Jkt3r1yoyNPTGIUw1M1zLJ07au5Um6l0me9nUtkzzt61omedrXtUyDyjMyMrzPr27p7Ara0FD37obctUzytK1reZLuZZKnfV3LJE/7upZJnvZ1LVMLeTpb0AAAABYaBQ0AAKARChoAAEAjFDQAAIBGKGgAAACNUNAAAAAaoaABAAA0YuBfVA0AAAvdhXdfPJDzfOCUawZynkGZ6/eta+9PYgUNAACgGVbQAACARWF8fDxXX31F/vmf/zlPP70zv/RL/1fWrn3ZAR/3hhuuzyGHHJL1619/wMdS0IAFyaUlAMBs3XXXn2TVqufmN37jN7Nt2+M566wz56SgzSUFDQAA6KQdO57KVVddkUcffTRPP/103vKWt+UVr3hlkmRiYiLLlu27Dn3iEzdn+/ZtOeec87Jz586cddYv5qMfvTk33HB9HnrowWzb9niOOuroXHrpZXM6s8+gAQAAnfTpT38yhx32/Fx//UdyxRVXpdavZeXK5+SJJ/4173znr+Xcc395n6999atPz913/1kmJiZy77335KSTTs7OnTsyPDyc97//umze/Hv56lcfyOjod+Z0ZitoAABAJz3yyDeyZs1JSZLDD//RHH74mfn2tx/NpZe+PT//86/Pq171mn2+dtWqVTn66JIvf/lLueOOz2Tjxrdm+fIVGRsby2WXXZqVK1fmySefzPj4+JzOrKABAAADMejPdh9xxI/la197MCef/PJ861v/mN/6rWvy6KP/lLe+9eKccMJPzvj6M85Yn1tu+YPs2LEjRxzxwtx771/kO9/5dt797qszNjaWe+75XCYmJuZ0Zpc4AgAAnbRu3evyT//0rWzceF6uvPKyLF26JNu3b89NN23Oxo3nZePG87Jjx1P7fP1LXvLS/K//9fc57bTXJkmOOebH80//9K1ceOG5+Y3f+LU8//kvyGOPjc7pzFbQAACATlq+fHkuv/w9B3SMj3/8U//28yGHHJrNmz/2ffscd9yLD+gc0yloAADAonXbbZ/KZz97Z5LkWc8ays6dk58pu+CCjTn22OMGPo+CBgAALFrr1r0u69a9LkkyMjKc0dHtB3Uen0EDAABohIIGAADQCAUNAACgEQoaAABAIxQ0AACARihoAAAAjVDQAAAAGuF70ABgEbrw7osHcp4PnHLNQM4D0BVW0AAAABox4wpaKWVpkuuSHJ9kR5INtdat07a/LcmZSXYnuarWeus8zQoAANBpvaygrU+yotZ6YpJLkmzas6GU8twkb0lyYpJXJXn/fAwJAACwGPRS0NYmuTNJaq33Jzlh2rZ/TfKNJM+Z+t/uuR4QAABgsejlJiGrkjw+7fGuUspQrXV86vE3kzyYZFmSq2c62OrVKzM0tGzWg/ZjZGR4IOcZpK5lkqdtXcvTj9bfg9bnm62u5Um6mWk2Ws/f+nz96FqmruWZrYWQv9UZ/9P/+OWBnOeWN3xwTo/XS0HblmT6u750Wjk7LcnzkvzY1OO7Silbaq1/ua+DjY090degszUyMpzR0e0DOdegdC2TPG3rWp5+tfwedO131LU8STczzVbL+bv4++lapq7l6Ufr+f2O+vsd7a/U9nKJ45YkpydJKWVNkgembRtL8mSSHbXWp5J8L8lzZz0hAAAAPa2g3Zrk1FLKfUmWJDm7lHJRkq211ttLKT+T5P5Syu4k9yb57PyNCwDAQuS799rnd9SGGQtarXV3kgue8fRD07ZfluSyOZ4LAABg0fFF1QAAAI3o5RJHAFj0XPrTNr8foCusoAEAADRCQQMAAGiEggYAANAIBQ0AAKARChoAAEAj3MURFoFB3d0scYczAIADoaABADTIVwfA4uQSRwAAgEYoaAAAAI1Q0AAAABqhoAEAADTCTUIAGtC1mwF0LQ8ADIoVNAAAgEYoaAAAAI1Q0AAAABqhoAEAADRCQQMAAGiEggYAANAIBQ0AAKARChoAAEAjFDQAAIBGKGgAAACNUNAAAAAaoaABAAA0QkEDAABohIIGAADQiKGZdiilLE1yXZLjk+xIsqHWunXa9tOSXJZkSZK/SXJhrXVifsYFAADorl5W0NYnWVFrPTHJJUk27dlQShlOcm2S19ZafyrJ15McOg9zAgAAdF4vBW1tkjuTpNZ6f5ITpm07KckDSTaVUj6f5Nu11tE5nxIAAGARmPESxySrkjw+7fGuUspQrXU8k6tlr0jy4iT/kuTzpZQv1Fof3tfBVq9emaGhZQcyc89GRoYHcp5B6lomebqna++BPG3rWp6ke5nkaV/XMsnTvq5lmus8vRS0bUmmn3XpVDlLku8m+ata66NJUkq5J5NlbZ8FbWzsiT5HnZ2RkeGMjm4fyLlm68K7Lx7YuT5wyjUDO9dstfw76kfX8vSra++BPG3rWp6ke5nkaV/XMsnTvq5l6ifP/kpdL5c4bklyepKUUtZk8pLGPf42ybGllENLKUNJ1iR5cNYTAgAA0NMK2q1JTi2l3JfJOzWeXUq5KMnWWuvtpZR3JLlrat9baq1fmadZAQAAOm3GglZr3Z3kgmc8/dC07TcnuXmO5wIAAFh0fFE1AABAIxQ0AACARihoAAAAjVDQAAAAGqGgAQAANEJBAwAAaISCBgAA0AgFDQAAoBEKGgAAQCMUNAAAgEYoaAAAAI1Q0AAAABqhoAEAADRCQQMAAGiEggYAANCIoYM9QC8uvPvigZznA6dcM5DzAAAA7I0VNAAAgEYoaAAAAI1Q0AAAABqhoAEAADRCQQMAAGiEggYAANAIBQ0AAKARChoAAEAjFDQAAIBGKGgAAACNUNAAAAAaoaABAAA0YmimHUopS5Ncl+T4JDuSbKi1bt3LPn+c5LZa6+/Ox6AAAABd18sK2vokK2qtJya5JMmmvexzZZLVczkYAADAYtNLQVub5M4kqbXen+SE6RtLKa9PsnvPPgAAAPRnxksck6xK8vi0x7tKKUO11vFSyrFJzkzy+iTv6uWEq1evzNDQstlPOgAjI8MHe4Q513qm1uebra7l6UfX3gN52ta1PEn3MsnTvq5lkqd9Xcs013l6KWjbkkw/69Ja6/jUz29O8oIkdyd5YZKdpZSv11r3uZo2NvZEn6POv9HR7Qd7hDnXcqaRkeGm55utruXpV9feA3na1rU8SfcyydO+rmWSp31dy9RPnv2Vul4K2pYkZyS5pZSyJskDezbUWi/e83Mp5fIkj+6vnAEAALBvvRS0W5OcWkq5L8mSJGeXUi5KsrXWevu8TgcAALCIzFjQaq27k1zwjKcf2st+l8/RTAAAAItSLytoMKML77545p3mwAdOuWYg5wEAgIOhl9vsAwAAMAAKGgAAQCMUNAAAgEYoaAAAAI1Q0AAAABqhoAEAADRCQQMAAGiEggYAANAIBQ0AAKARChoAAEAjFDQAAIBGKGgAAACNUNAAAAAaoaABAAA0QkEDAABohIIGAADQCAUNAACgEUMHewBo0YV3XzyQ83zglGsGch4AABYGK2gAAACNUNAAAAAaoaABAAA0QkEDAABohIIGAADQCAUNAACgEQoaAABAIxQ0AACARihoAAAAjRiaaYdSytIk1yU5PsmOJBtqrVunbX9rkl+YevgntdYr5mNQAACArutlBW19khW11hOTXJJk054NpZQjk7wxyUlJ1iR5VSnluPkYFAAAoOt6KWhrk9yZJLXW+5OcMG3bN5O8pta6q9Y6keQHkjw151MCAAAsAjNe4phkVZLHpz3eVUoZqrWO11qfTvJYKWVJkmuTfLHW+vB8DAoAANB1vRS0bUmGpz1eWmsd3/OglLIiyY1Jtif5lZkOtnr1ygwNLZvtnAMxMjI8804LTNcyydO+rmWSp21dy5N0L5M87etaJnna17VMc52nl4K2JckZSW4ppaxJ8sCeDVMrZ7clubvW+r5eTjg29kQ/cw7E6Oj2gz3CnOtaJnna17VM8rSta3mS7mWSp31dyyRP+7qWqZ88+yt1vRS0W5OcWkq5L8mSJGeXUi5KsjXJsiQvS7K8lHLa1P7vqLV+YdZTAgAALHIzFrRa6+4kFzzj6Yem/bxiTicCAABYpHxRNQAAQCMUNAAAgEYoaAAAAI1Q0AAAABqhoAEAADRCQQMAAGiEggYAANAIBQ0AAKARChoAAEAjFDQAAIBGKGgAAACNUNAAAAAaoaABAAA0QkEDAABohIIGAADQCAUNAACgEQoaAABAIxQ0AACARihoAAAAjVDQAAAAGqGgAQAANEJBAwAAaISCBgAA0AgFDQAAoBEKGgAAQCMUNAAAgEYoaAAAAI1Q0AAAABqhoAEAADRiaKYdSilLk1yX5PgkO5JsqLVunbb93CTnJxlPcmWt9Y8JAWmrAAAgAElEQVTmaVYAAIBO62UFbX2SFbXWE5NckmTTng2llMOS/GqSn07y6iRXl1KWz8egAAAAXddLQVub5M4kqbXen+SEadt+MsmWWuuOWuvjSbYmOW7OpwQAAFgElkxMTOx3h1LK5iSfrLXeMfX4kSRH1lrHSylvSvITtdZfm9r2sSQfq7X+2TzPDQAA0Dm9rKBtSzI8/TW11vF9bBtO8r05mg0AAGBR6aWgbUlyepKUUtYkeWDatr9McnIpZUUp5YeSHJPkK3M+JQAAwCLQyyWOe+7ieFySJUnOzmRh21prvX3qLo7nZbLsXVVr/eT8jgwAANBNMxY0AAAABsMXVQMAADRCQQMAAGiEggYAANAIBQ0AAKARChoAAEAjFDQAAIBGKGgAAACNUNAAAAAaoaABAAA0QkEDAABohIIGAADQCAUNAACgEQoaAABAIxQ0AACARihoAAAAjVDQAAAAGqGgAQAANEJBAwAAaMTQwR4AAOZCKWVjkl9OMpHk75OcW2v9zhyf46wkr6+1vnYujwsAe1hBA2DBK6W8NMl/SXJSrfXYJP9fkt88uFMBwOxZQQNgwSmlnJPkbUl2JXksyS8leVGt9elSyookL0jyDzMc47wkP7dnNayU8u+T/M8kPzp1vPOTPCvJDyd5b631g/MUBwD+jRU0ABaUUsrxSd6X5DW11uOS3J7k16fK2fok/5jkPyT5yAyH+niStaWUw6Yenz31mmcnOTfJ6bXWlyR5Q5Jr5j4JAHy/JRMTEwd7BgDoWSnloiT/Z631TfvZ59wk70hyVK11937225zkoSS/neSRJGtrrf9QSjkkyc8meVGSFyd5ba11ic+gATDfrKABsNCMZ/JGIEmSUsqzSynHllLWTtvnxiRHJFk9w7E2J3lzktckeXCqnP1Iki9Nvf7eJO+cy+EBYH8UNAAWms8l+ZlSyvOmHp+f5HeS3FxKOXTquTcm+Uqt9bv7O1Ct9f4kS5K8K8mHp54+IclokitrrXcl2fMZtWVzmgIA9kJBA2BBqbU+kOTtSe4spfxdJle/3pjkPUn+vJTypSS/kGR9j4f8cJIjk3x66vGfZvJzbLWU8sVM3jRkNMlRcxYCAPbBZ9AAAAAa4Tb7AHRWKeXtmVxd25tra62/P8h5AGAmVtAAAAAaMeMKWillaZLrkhyfZEeSDbXWrdO2n5bkskx+yPpvklxYa9X6AAAAZqmXSxzXJ1lRaz2xlLImyaYk65KklDKc5NokL6+1PlZKuTjJoZn8MPVejY5uH0h5W716ZcbGnhjEqQama5nkaVvX8iTdyyRP+7qWSZ72dS2TPO3rWqZB5RkZGV6yr2293MVxbZI7k3+7HfEJ07adlOSBJJtKKZ9P8u1a6z7L2SANDXXvbshdyyRP27qWJ+leJnna17VM8rSva5nkaV/XMrWQZ8bPoJVSNif5ZK31jqnHjyQ5stY6Xkp5YyZX1F6c5F+SfD7JG2qtD+/reOPjuyZaCA4AAHCQ7HMFrZdLHLclGZ72eGmtdXzq5+8m+ata66NJUkq5J5NlbZ8FbVBLoCMjwxkd3T6Qcw1K1zLJ07au5Um6l0me9nUtkzzt61omedrXtUyDyjMyMrzPbb1c4rglyelJMvUZtAembfvbJMeWUg4tpQwlWZPkwf5HBQAAWLx6WUG7NcmppZT7MrkUd3Yp5aIkW2utt5dS3pHkrql9b6m1fmWeZgUAAOi0GQtarXV3kgue8fRD07bfnOTmOZ4LAABg0enlEkcAAAAGQEEDAABoRC+fQRu4hzecdeDHmPbz0ZtvOuDjAQDAHv389+o+b3O+H/47dvGxggYAANCIJlfQDrZdu3blfe+7Mt/85jeSLMnb3/6OHHnkUQd83BtuuD6HHHJI1q9//YEPCQAAdI4VtL3YsuXzSZIPfvDGnHvuL+dDH7ruIE8EAAAsBlbQkuzY8VSuuuqKPProo3n66adz0UUX5+KLfz1J8u1vP5of/MF9f9P3Jz5xc7Zv35ZzzjkvO3fuzFln/WI++tGbc8MN1+ehhx7Mtm2P56ijjs6ll142qDgAAMACZQUtyac//ckcdtjzc/31H8kVV1yVBx/8SoaGhnLllZflt3/72rzqVaft87WvfvXpufvuP8vExETuvfeenHTSydm5c0eGh4fz/vdfl82bfy9f/eoDGR39zgATAQAAC5EVtCSPPPKNrFlzUpLk8MN/NIcffmaS5J3vvCLf/e5jOe+8s/Lf//sn8uxnP/v7Xrtq1aocfXTJl7/8pdxxx2eyceNbs3z5ioyNjeWyyy7NypUr8+STT2Z8fHygmQAAgIWnyYI2F7cTHRkZzujo9p72PeKIH8vXvvZgTj755fnWt/4xb3jD+px//oX5z//57KxYsSJLly7N0qVL9vn6M85Yn1tu+YPs2LEjRxzxwtx771/kO9/5dt797qszNjaWe+75XCYmJg44EwAA0G1NFrRBW7fudbn66ndn48bzsmvXrvzO71yfT33qE7nwwnMzPj6eX/3Vi7J8+Yp9vv4lL3lprrnmPXnzm89JkhxzzI/npptuyIUXnpslS5bk+c9/QR57bHRQcQAAgAVKQUuyfPnyXH75e/63517ykpfO6hgf//in/u3nQw45NJs3f+z79jnuuBf3NyAAALAoKGg9uu22T+Wzn73z+56/4IKNOfbY4w7CRAAAQNcoaD1at+51WbfudQd7DAAAoAcPbzhr9q/p4zxzcf+M6dxmHwAAoBEKGgAAQCMUNAAAgEYoaAAAAI1Q0AAAABqhoAEAADRCQQMAAGjEjN+DVkpZmuS6JMcn2ZFkQ61167Tt/zXJ2iTbp55aV2t9fB5mBQAA6LRevqh6fZIVtdYTSylrkmxKsm7a9pcmeXWt9bH5GBAAAGCx6OUSx7VJ7kySWuv9SU7Ys2Fqde1FST5UStlSSjlnXqYEAABYBHpZQVuVZPoli7tKKUO11vEkz0nyO0l+K8myJJ8rpfx1rfXL+zrY6tUrMzS07EBm7tnIyPBAzjNIXcskT9u6lifpXiZ5BmfLuv8469c83Md5fvq2T/bxqsFp+XfUj67lSbqXqdU8/fzz3Y9W80/X6owL9XfUS0HblmT6WZdOlbMkeSLJf621PpEkpZS7M/lZtX0WtLGxJ/ocdXZGRoYzOrp95h0XkK5lkqdtXcuTdC+TPN3U8nvQtd9R1/Ik3cvUtTz9aD2/31F/v6P9lbpeLnHckuT0JJn6DNoD07YdnWRLKWVZKeUHMnk55N/OekIAAAB6WkG7NcmppZT7kixJcnYp5aIkW2utt5dSfi/J/UmeTvKxWutX529cAAAWooc3nDX71/RxnqM339THq6AdMxa0WuvuJBc84+mHpm2/Nsm1czwXAADAouOLqgEAABqhoAEAADRCQQMAAGhELzcJAQAAOs6NXNpgBQ0AAKARVtAAgAXP//MPdIUVNAAAgEYoaAAAAI1Q0AAAABqhoAEAADRCQQMAAGiEggYAANAIBQ0AAKARChoAAEAjfFE1AECDfPk2LE5W0AAAABqhoAEAADRCQQMAAGiEggYAANAIBQ0AAKAR7uIIAIuQOwQCtElBAxYk/3EJAHTRjAWtlLI0yXVJjk+yI8mGWuvWvezzx0luq7X+7nwMCgAA0HW9fAZtfZIVtdYTk1ySZNNe9rkyyeq5HAwAAGCx6aWgrU1yZ5LUWu9PcsL0jaWU1yfZvWcfAAAA+tPLZ9BWJXl82uNdpZShWut4KeXYJGcmeX2Sd/VywtWrV2ZoaNnsJ+3DyMjwQM4zSF3LJE/bWs7Tz+fJ+tHye5C0P99stZyna39z8vRnkH+jXcskT3/8zfVvoebppaBtSzL9rEtrreNTP785yQuS3J3khUl2llK+Xmvd52ra2NgTfY46OyMjwxkd3T6Qcw1K1zLJ07au5elXy+9B135HXcvTr669B/K0r2uZ5Glf1zL1k2d/pa6XgrYlyRlJbimlrEnywJ4NtdaL9/xcSrk8yaP7K2cAAADsWy8F7dYkp5ZS7kuyJMnZpZSLkmyttd4+r9MBAAAsIjMWtFrr7iQXPOPph/ay3+VzNBMAAMCi1MtdHAEAABgABQ0AAKARChoAAEAjFDQAAIBGKGgAAACNUNAAAAAa0cv3oDHHHt5wVn+v6+M1R2++qa9zAQAAg2cFDQAAoBEKGgAAQCMUNAAAgEYoaAAAAI1Q0AAAABqhoAEAADRCQQMAAGiEggYAANAIBQ0AAKARChoAAEAjFDQAAIBGKGgAAACNUNAAAAAaoaABAAA0YmimHUopS5Ncl+T4JDuSbKi1bp22/cIkZyWZSPJ/11pvmZ9RAQAAuq2XFbT1SVbUWk9MckmSTXs2lFIOTfLLSU5K8sokm0opS+ZjUAAAgK7rpaCtTXJnktRa709ywp4NtdbHkry41vp0ksOSPFVrnZiPQQEAALpuxksck6xK8vi0x7tKKUO11vEkqbWOl1I2Jrkiyf8z08FWr16ZoaFlfQ07WyMjwwM5z2w9PMBztfoe7NH6fLMlz+AM6p+jlt+DpP35ZqvlPF37m5OnP4P8G+1aJnn642+ufws1Ty8FbVuS6Wdduqec7VFr/W+llA8luaOU8opa6+f2dbCxsSf6m3SWRkaGMzq6fSDnalnL70HXfkfydFPL70HXfkddy9Ovrr0H8rSva5nkaV/XMvWTZ3+lrpeCtiXJGUluKaWsSfLAng2llJLk6iT/McnTmbyJyO5ZTwgAAEBPBe3WJKeWUu5LsiTJ2aWUi5JsrbXeXkr5uyRfyORdHO+otf7F/I0LAADQXTMWtFrr7iQXPOPph6ZtvyKTnz8DAADgAPiiagAAgEYoaAAAAI1Q0AAAABqhoAEAADRCQQMAAGhEL7fZP+ge3nDW7F/Tx3mO3nxTH68COHD+PQcAJFbQAAAAmqGgAQAANEJBAwAAaISCBgAA0AgFDQAAoBEKGgAAQCMUNAAAgEYsiO9BAw7MoL5jK/E9WwAAB0JBA2DO+eJtAOiPSxwBAAAaoaABAAA0QkEDAABohIIGAADQCAUNAACgEQoaAABAI2a8zX4pZWmS65Icn2RHkg211q3Ttr81yS9MPfyTWusV8zEoAABA1/WygrY+yYpa64lJLkmyac+GUsqRSd6Y5KQka5K8qpRy3HwMCgAA0HW9FLS1Se5Mklrr/UlOmLbtm0leU2vdVWudSPIDSZ6a8ykBAAAWgRkvcUyyKsnj0x7vKqUM1VrHa61PJ3mslLIkybVJvlhrfXh/B1u9emWGhpbNasj9HnAOjYwMD+Q8g8qTDC5Tv1qfb7ZazdPFvzn/XuiPPP3rWiZ5+uNvrn/y9MffXP8Wap5eCtq2JNPPurTWOr7nQSllRZIbk2xP8iszHWxs7InZzjgwo6PbD/YIc67lTCMjw03PN1tdy9Ovrr0H8rSta3mS7mWSp31dyyRP+7qWqZ88+yt1vVziuCXJ6UlSSlmT5IE9G6ZWzm5L8ne11vNrrbtmPR0AAABJeltBuzXJqaWU+5IsSXJ2KeWiJFuTLEvysiTLSymnTe3/jlrrF+ZlWgAAgA6bsaDVWncnueAZTz807ecVczoRC9LDG86a/Wv6OM/Rm2/q41UAALAw+KJqAACARihoAAAAjVDQAAAAGqGgAQAANEJBAwAAaISCBgAA0AgFDQAAoBEKGgAAQCMUNAAAgEYoaAAAAI1Q0AAAABqhoAEAADRCQQMAAGiEggYAANAIBQ0AAKARQwd7AGjRwxvOmv1r+jjP0Ztv6uNVAAB0lRU0AACARihoAAAAjVDQAAAAGqGgAQAANEJBAwAAaMSMd3EspSxNcl2S45PsSLKh1rr1GfuMJNmS5Lha61PzMSgAAEDX9bKCtj7JilrriUkuSbJp+sZSyquT/Gn+//buPMyuqsz3+DchQASDRIgizlzNK90I2NJNQBAccMBGuGr3VduLwEUmaQdsFXEAuhFwwKkBlQsIzjOCiiA0ctVg1BYREHk1tmMrEiUMCgSS1P1jn5JDTEhVnXNqr73q+3mePKlzdtU578qqX6XePS3YavjlSZIkSdLMMZEGbTfgIoDMXALstMb21cDTgZuGW5okSZIkzSwTWah6M+CWvserImJOZq4EyMxLACJiQm84f/4mzJmzwaSKnMoCwFOxYMG8aXmf6RoP1DcmxzM1fs9NneOZmtrGA/WNyfFMjd9zU+d4psbvuanr6ngm0qDdCvS/6+zx5mwqli+/fapfOnLLlt3WdglDV9uYHE/5ahuT4ylbbeOB+sbkeMpX25gcT/lqG9NUxnNfTd1ETnFcDOwNEBGLgGsmXYEkSZIkab0mcgTtPGCviLgCmAUcGBFHAUsz84KRVidJkiRJM8h6G7TMXA0ctsbT16/l8x41pJokSZIkaUZyoWpJkiRJKoQNmiRJkiQVwgZNkiRJkgphgyZJkiRJhbBBkyRJkqRC2KBJkiRJUiFs0CRJkiSpEDZokiRJklQIGzRJkiRJKoQNmiRJkiQVwgZNkiRJkgphgyZJkiRJhbBBkyRJkqRC2KBJkiRJUiFs0CRJkiSpEDZokiRJklQIGzRJkiRJKoQNmiRJkiQVwgZNkiRJkgphgyZJkiRJhbBBkyRJkqRCzFnfJ0TEbOB0YAdgBXBwZi7t2/4y4FBgJXBCZn5pRLVKkiRJUtUmcgRtP2BuZu4CHA2cMr4hIrYCXgE8CXgmcFJEbDyKQiVJkiSpdhNp0HYDLgLIzCXATn3b/g5YnJkrMvMWYCmw/dCrlCRJkqQZYNbY2Nh9fkJEnAl8LjO/0nv8S2CbzFwZES8BHp+Zr+9t+zDw4cy8dMR1S5IkSVJ1JnIE7VZgXv/XZObKdWybB9w8pNokSZIkaUaZSIO2GNgbICIWAdf0bfsOsHtEzI2IBwDbAtcOvUpJkiRJmgEmcorj+F0ctwdmAQfSNGxLM/OC3l0cD6Fp9k7MzM+NtmRJkiRJqtN6GzRJkiRJ0vRwoWpJkiRJKoQNmiRJkiQVwgZNkiRJkgphgyZJkiRJhbBBkyRJkqRC2KBJkiRJUiFs0CRJkiSpEDZokiRJklQIGzRJkiRJKoQNmiRJkiQVwgZNkiRJkgphgyZJkiRJhbBBkyRJkqRC2KBJkiRJUiFs0CRJkiSpEDZokiRJklQIGzRJkiRJKoQNmiRJkiQVwgZNklSViNgvIm4d0WvvGRHXjuK1JUkCGzRJUkUi4rHAO/H/N0lSR81puwBJkiYrIg4CXgOsAn4PvBT4A/BR4Cjg4xN4jWcAp2Tm43uPNwd+BmwDPAk4BtgIeBBwbma+efgjkSTp3tzDKEnqlIjYAXgb8KzM3B64AHgj8MHen6sn+FKXAPePiJ16j18EfBm4mab5e2lm7gQsAt4QEVsObxSSJK2dDZokqWueBlycmb8CyMz30DRlKzPz7Im+SGaOAWcBB/SeOhA4s/f8PsATI+JY4F3ALGDToY1AkqR1sEGTJHXNSmBs/EFE3A84DvjbiLgKuBC4X0RcFRFbr+e1PgT8Y0TsCGyemZdHxKbA94G/Aa4EXgvcTdOkSZI0UjZokqSu+Rrw9Ih4SO/xocCSzNwuM3cE9gbuyMwdM/M39/VCmfnfwLdpTo08s/f0Y4HNgDdl5heBPYCNgQ2GPxRJku7Nm4RIkjolM6+JiNcCF0UEwG+BgwZ4yf8LfBZ4bu/x1cCXgOsj4mZgKXAd8BhgxQDvI0nSes0aGxtb/2dJkiRJkkbOI2iSpGpFxD/RXEO2Nh/LzHdMZz2SJK2PR9AkSZIkqRDeJESSJEmSCmGDJkmSJEmFmPZr0JYtu21azqmcP38Tli+/fTreatrUNibHU7baxgP1jcnxlK+2MTme8tU2JsdTvtrGNF3jWbBg3jrX1qz2CNqcOfUtV1PbmBxP2WobD9Q3JsdTvtrG5HjKV9uYHE/5ahtTCeOptkGTJEmSpK6xQZMkSZKkQtigSZIkSVIhbNAkSZIkqRA2aJIkSZJUCBs0SZIkSSrEtK+DJkmSJHXd+0++fFre5/Cj95yW91E5bNAkSZIkTYthN7Y1NrCe4ihJkiRpRlm+/Cae97zn8Itf/Hwor3fWWR/kC1/47FBeywZNkiRJ0oyxcuVK3v72E9loo43bLmWtPMVRkiRJUpVWrLiTE088nhtuuIG7776bo456HZde+lX22+/5fOQjH7rPr/3MZz7JbbfdykEHHcJdd93FAQe8iHPP/SRnnfVBrr/+Om699RYe85iFHHPMsUOt2SNokiRJkqr0hS98jq222poPfvBDHH/8iVx33bVsvvnm7LzzLuv92mc+c28uu+xSxsbG+OY3v86uu+7OXXetYN68ebznPadz5pkf4Yc/vIZly24cas0eQZMkSZJUpV/+8hcsWrQrAA9/+CM4+eSvMWvWLP7zP7/D0qU/5oQT3sLJJ7+LLbbY8i++drPNNmPhwuDqq6/iK1/5Ikce+Wo23nguy5cv59hjj2GTTTbhjjvuYOXKlUOt2SNokiRJkqr0yEc+mh/96DoA/vu/f82CBQ/i1FPP4NRTz+Axj1nIm970r2ttzsbts89+fPrTH2fFihU88pGPYsmSxdx44+84/vgTOeSQl7NixZ2MjY0NtWaPoEmSJEmaFtN9W/x9930eJ530rxx55CGsWrWKV77yNZP6+ic84Ym8/e1vZf/9DwJg223/mnPOOYuXv/xlzJo1i623fii///2yodZsgyZJkiSpShtvvDHHHffWtW479dQzJvQan/jE5//88RZbbMmZZ374Lz5n++13nFqBa2GDJkmSJGnGOv/8z3PJJRcBsNFGc7jrruaassMOO5Ltttt+2uuxQZMkSZI0Y+277/PYd9/nAbBgwTyWLbut1Xq8SYgkSZIkFcIGTZIkSZIKYYMmSZIkSYWwQZMkSZKkQtigSZIkSVIhbNAkSZIkqRDrvc1+RMwGTgd2AFYAB2fm0r7trwFeDKwGTszM80ZUqyRJkiRVbSJH0PYD5mbmLsDRwCnjGyJic+CVwC7AM4D3jKJISZIkSZoJJtKg7QZcBJCZS4Cd+rb9CfgFsGnvz+phFyhJkiRJM8V6T3EENgNu6Xu8KiLmZObK3uNfAdcBGwAnre/F5s/fhDlzNph0oVOxYMG8aXmf6VTbmBxP2WobD9Q3JsdTvtrG5HjKV9uYahvPZHVh/F2ocTLaHs9EGrRbgf4qZ/c1Z88GHgI8uvf44ohYnJnfWdeLLV9++5QKnawFC+axbNlt0/Je06W2MTmestU2HqhvTI6nfLWNyfGUr7Yx1TaeqSh9/LXN0XSN576awImc4rgY2BsgIhYB1/RtWw7cAazIzDuBm4HNp1ypJEmSJM1gEzmCdh6wV0RcAcwCDoyIo4ClmXlBRDwdWBIRq4FvApeMrlxJkiRJqtd6G7TMXA0ctsbT1/dtPxY4dsh1SZIkSdKM40LVkiRJklQIGzRJkiRJKoQNmiRJkiQVwgZNkiRJkgphgyZJkiRJhbBBkyRJkqRCTGQdNEmSJEnqlPeffPm0vM/hR+851NfzCJokSZIkFcIjaJIkSRq5rh7NmEmcozJ4BE2SJEmSCmGDJkmSJEmFsEGTJEmSpELYoEmSJElSIWzQJEmSJKkQNmiSJEmSVAgbNEmSJEkqhA2aJEmSJBXCBk2SJEmSCmGDJkmSJEmFsEGTJEmSpELYoEmSJElSIWzQJEmSJKkQNmiSJEmSVAgbNEmSJEkqhA2aJEmSJBXCBk2SJEmSCmGDJkmSJEmFsEGTJEmSpELYoEmSJElSIeas7xMiYjZwOrADsAI4ODOX9m1/NnAsMAv4HvDyzBwbTbmSJEmSVK+JHEHbD5ibmbsARwOnjG+IiHnAO4C/z8ydgZ8DW46gTkmSJEmq3kQatN2AiwAycwmwU9+2XYFrgFMi4hvA7zJz2dCrlCRJkqQZYL2nOAKbAbf0PV4VEXMycyXN0bKnADsCfwS+ERHfyswfr+vF5s/fhDlzNhik5glbsGDetLzPdKptTI6nbLWNB+obk+MpX21jcjzlq3FMk1Hb+GsbD9Q3pmGPZyIN2q1A/7vO7jVnAH8AvpuZNwBExNdpmrV1NmjLl98+xVInZ8GCeSxbdtu0vNd0qW1MjqdstY0H6huT4ylfbWNyPOWrcUyTVdv4axsP1DemqYznvpq6iZziuBjYGyAiFtGc0jjuSmC7iNgyIuYAi4DrJl2hJEmSJGlCR9DOA/aKiCto7tR4YEQcBSzNzAsi4g3Axb3P/XRmXjuiWiVJkiSpautt0DJzNXDYGk9f37f9k8Anh1yXJEmSJM04LlQtSZIkSYWwQZMkSZKkQtigSZIkSVIhbNAkSZIkqRA2aJIkSZJUCBs0SZIkSSqEDZokSZIkFcIGTZIkSZIKYYMmSZIkSYWwQZMkSZKkQtigSZIkSVIhbNAkSZIkqRA2aJIkSZJUCBs0SZIkSSqEDZokSZIkFcIGTZIkSZIKYYMmSZIkSYWwQZMkSZKkQtigSZIkSVIhbNAkSZIkqRA2aJIkSZJUCBs0SZIkSSqEDZokSZIkFcIGTZIkSZIKYYMmSZIkSYWwQZMkSZKkQtigSZIkSVIhbNAkSZIkqRA2aJIkSZJUiDnr+4SImA2cDuwArAAOzsyla/mcLwPnZ+YHRlGoJEmSJNVuIkfQ9gPmZuYuwNHAKWv5nBOA+cMsTJIkSZJmmok0aLsBFwFk5hJgp/6NEfECYPX450iSJEmSpma9pzgCmwG39D1eFRFzMnNlRGwHvBh4AfCWibzh/PmbMGfOBpOvdAoWLJg3Le8znWobk+MpW23jgfrG5HjKV9uYHE/5ahzTZNQ2/trGA/WNadjjmUiDdivQ/66zM3Nl7+P9gYcClwGPAu6KiJ9n5jqPpi1ffvsUS52cBQvmsWzZbdPyXtOltjE5nrLVNh6ob0yOp3y1jcnxlK/GMU1WbeOvbTxQ35imMp77auom0qAtBvYBPh0Ri4Brxjdk5jeiG04AACAASURBVOvGP46I44Ab7qs5kySpq95/8uXT8j6HH73ntLxPbZwfSbWYSIN2HrBXRFwBzAIOjIijgKWZecFIq5MkSZKkGWS9DVpmrgYOW+Pp69fyeccNqSZJkiRJmpFcqFqSJEmSCmGDJkmSJEmFsEGTJEmSpELYoEmSJElSISZyF0dJkiRNM5cOkGYmj6BJkiRJUiFs0CRJkiSpEDZokiRJklQIr0GTJGkG8vomSSqTR9AkSZIkqRA2aJIkSZJUCBs0SZIkSSqEDZokSZIkFcIGTZIkSZIKYYMmSZIkSYWwQZMkSZKkQtigSZIkSVIhbNAkSZIkqRA2aJIkSZJUCBs0SZIkSSqEDZokSZIkFcIGTZIkSZIKMaftAiRpKt5/8uXT8j6HH73ntLyPJEkSeARNkiRJkorhETRpBpiuo03gESdJkqRBeARNkiRJkgphgyZJkiRJhbBBkyRJkqRCdOIaNO/WJkmSJGkmWG+DFhGzgdOBHYAVwMGZubRv+6uBF/YeXpiZx4+iUEmSJEmq3UROcdwPmJuZuwBHA6eMb4iIbYB/AnYFFgHPiIjtR1GoJEmSJNVuIqc47gZcBJCZSyJip75tvwKelZmrACJiQ+DOoVdZGW95LmlNnsotSZJgYg3aZsAtfY9XRcSczFyZmXcDv4+IWcA7gO9n5o/v68Xmz9+EOXM2mHrFI7Rgwby2Sxi60sdUen2TVdt4pqK2fwPHM71Kr2861PZv4HjKV9uYHE/5ahvTsMczkQbtVqD/XWdn5srxBxExFzgbuA04Yn0vtnz57ZOtcdosW3Zb2yUMXcljWrBgXtH1TVZt45mq2v4NHM/0MUON2v4NHE/5ahuT4ylfbWOaynjuq6mbSIO2GNgH+HRELAKuGd/QO3J2PnBZZr5t0pVJkqrkKZuSJE3NRBq084C9IuIKYBZwYEQcBSwFNgD2ADaOiGf3Pv8NmfmtkVQrSZIkSRVbb4OWmauBw9Z4+vq+j+cOtSJJkiRJmqEmcpt9SZIkSdI0mMgpjtJ6eb2JJEmSNDiPoEmSJElSIWzQJEmSJKkQNmiSJEmSVAgbNEmSJEkqhA2aJEmSJBXCBk2SJEmSCmGDJkmSJEmFcB00aS1c102SJElt8AiaJEmSJBXCBk2SJEmSCmGDJkmSJEmFsEGTJEmSpELYoEmSJElSIWzQJEmSJKkQNmiSJEmSVAgbNEmSJEkqhA2aJEmSJBXCBk2SJEmSCmGDJkmSJEmFsEGTJEmSpELYoEmSJElSIWzQJEmSJKkQNmiSJEmSVAgbNEmSJEkqhA2aJEmSJBXCBk2SJEmSCmGDJkmSJEmFmLO+T4iI2cDpwA7ACuDgzFzat/1lwKHASuCEzPzSiGqVJEmSpKpN5AjafsDczNwFOBo4ZXxDRGwFvAJ4EvBM4KSI2HgUhUqSJElS7SbSoO0GXASQmUuAnfq2/R2wODNXZOYtwFJg+6FXKUmSJEkzwKyxsbH7/ISIOBP4XGZ+pff4l8A2mbkyIl4CPD4zX9/b9mHgw5l56YjrliRJkqTqTOQI2q3AvP6vycyV69g2D7h5SLVJkiRJ0owykQZtMbA3QEQsAq7p2/YdYPeImBsRDwC2Ba4depWSJEmSNANM5BTH8bs4bg/MAg6kadiWZuYFvbs4HkLT7J2YmZ8bbcmSJEmSVKf1NmiSJEmSpOnhQtWSJEmSVAgbNEmSJEkqhA2aJEmSJBViTtsF6L5FxDzggcCyzLy97Xp0bxGxHbAnsAVwI/AfmfnjVosaUG3fc7052gK4MTN/1HY9w1DbHNWmxvmpLUc1zlFNapwfM6TJqOYmIRGxO/AqYDfgLmAl8C3g1My8os3apiIi9geO4J5f/DcHlgOnZ+bH26xtKiqcn22BdwK30yw98VtgPrAzzY6PYzLzh+1VOHk1fc9FxMbA64F/BH4H3EAzP1sDnwbenZl3tFfh1NQ0RwARMQt4Dmvs5AAuyczO/edU4fxUl6MK58gMFcwMla/UDFXRoEXEv9Msmv0J4LrMXN17/vHAS4B5mXlEiyVOSkScQ7P+3Gcy8+a+5x8AvBjYNTP/d0vlTVpt8wMQEcfR/GC9ZS3b5gOvzsy3THthU1Th99w5wMdojmiu7nt+FvAs4EWZuX9L5U1JhXP0VOCNwJX85U6OHYGTMvPS9iqcnNrmB+rLUW1zZIbKZ4bKVnSGxsbGOv9n4cKFD1rP9ge3XeMkxzN3kO2l/altftZS/+y2axjCGKr6nqvxT21ztHDhwkMXLly4wTq2bbBw4cLD265xJs9PjX9qmyMz5B/naODxFJuhKo6gAUTEDsDTgQcANwPfyMzvtlvV1EXEvqwxHuCzXTxlAaqcn22AdwE70ZyuOZtm78uru3oNWk3fcxExFzgU2AvYjHvGc2rXTifpV9McjYuILeiNJzNvarueQdQ2PzXmqLY5AjNUMjPUDSVmqIqbhETEW2gOR14M/AyYBxwXEVdm5ptbLW4KIuI0ml/4vwLcRjOeZwPPBA5usbQpqW1+es4E3pCZ3x5/IiIWAR8CntRaVVNU2/cczTxcBRzDvcfzceB/tljXlNU2RxGxE3A6sAHwR2Be77Sfl3f0utSq5qenqhzVNkdmqBPMUMFKzlAVDRqwV2bu3v9E77qnJUAXG4DtMnOPNZ67ICIWt1LN4GqbH4C5/c0ZQGYuiYi26hlUbd9zW2fmi9Z47uqI+EYr1QxHbXP0HuD5mfmr8Sci4hHAZ2h26HRNbfMD9eWotjkyQ+UzQ2UrNkO1NGgbRsSjMvPnfc89Cli99k8v3uyI2D0z/xzgiHgycHeLNQ2itvkB+EFEnA1cBNxCsxdpb+DqVquautq+5+7s3Wlqzfn5Y6tVDaa2Odqw/z/Fnl8BXT1Nprb5gfpyVNscmaHymaGyFZuhWhq0VwLnRcRGNHcL3AxYARzealVTdwDwroj4RO/xauD7wMtaq2gwtc0PNLeY3Y9m2YDNaMb1JeC8NosawAHU9T33YuAtNN9782jmZzHw0jaLGtAB1DVHX46IS4Gvcs8vLs8ELmy1qqk7gLrmB+rL0QHUNUdmqHxmqGzFZqiam4TAnxfN2wy4NTNva7se3dtMmJ+IeFhm/rrtOqQuiIgncO+dHIsz88p2q5K6wwxJgyk2Q23f4nLEt898U9s1DHk8p7Zdg/Oz3jGd23YNQx5Pbd9zn227BudovePZqe0anJ/1jqmqHNU2R2ao/D9mqOw/JWRodtsN4oh19aLFdTmt7QKGrLb5ITO7etrCutT2PdfV0zDuS21z9A9tFzBktc0P1Jej2ubIDJXPDJWt9QxVc4pjRGwIbM896zJcm5l3tVvV1EXEAuDJ3DOeb2Xmb9utauoqnJ+5wGHA07j3WiCdXdtE0mAiYjvgzsxc2vfczmve8bVLIuLvacZ0ad9z+2bm+S2WNWU1zlGtImIPYHX/DSm6KCIeCNwF/AnYn+a6rY92ed2wcbXMUYmqaNAi4jnAScBP6K1jADwOOCYzv9BmbVMREQcDhwDf5J51Jp4MnJmZH2iztqmobX4AehfIXsVfrgWyKDO7uLbJRuva1sVGOiIWrmtbhxcSr2qOahMRb6a5uHxD4ErgiMwci4jLMvOp7VY3NRFxOrA5zQ3FNgWel5krujqm2uYoIh4LnAzcARyfmT/pPf/+zOzcTbgi4h+AU2jG81FgD5obin0rM09os7apiohX0NxUbDZwObAxTaO2OjOPbLG0KaltjiJiU5r1224GLgM+AqwCDm/7d4Va7uL4RmC3zLx1/ImIeABwKdDFBuBA4EmZ+efblvZ+OVsMdK5Bo775gfrWNrkGeDBwEzCL5haz439v02JdU3U2Td3X04xj3BjQuV/Eeqqao4g4cV3bMvOY6axlSPbOzF0AIuIdNKf8HMG9v/+65vHja1hGxD8Dn6K5e21Xx1TbHJ1Bs/NzQ+ALEfGSzPw+zQ7QLnoN8FfAQ4Aren+votlZ3blf/nteTDOmLYGrMnNrgIj4eqtVTV1tc/RRmp3tj6dZl/dQmgMJpwF7tVhXNQ3ahsDtazx3BwWsYzBFGwL3497rSmxCt8dT0/xAfWub7AZcDDwtM5e3XcwQPAP4f8D/zsz/bruYIaltjm6kWWrjrXT3F+R+fx5DZr42Ij4WEa+l2z/nNoyIjTNzRWb+e0Q8IiLe13ZRA6hujjLzqwARsRT4fEQ8i+6OZzZwe2b+JCKOy8yVABHR5fslzAY2ycwbI+Ll8Ocd7us8I6Jwtc3RAzPz+F7912Tmf0AZ46mlQTsDuDIivknzy/JmNL/MdPU/kn8DvhcRP+Ge8TwGOKrVqqautvmBytY2ycxlEXE08DfAf7Rdz6Ay8/aIOAx4BFBFg1bhHL0nInYCftN/fVOHfSoivgM8KzNvAg4CLgAWtVvWQN4LXBsRu2bmMuB1ND/Pd2+3rCmrbY5WRsQ+wIWZmRFxJM16nBu2XNdUnQtcFRE7ZuZpABHxOQpYk2oAb6P5fW7bzBxfJ/WrwJkt1jSI2ubo7oj4p8z8WETsABARe0L7N1Gs4ho0gIh4MPB33LOOwXcy83ftVjV1ETEH2JZ7xvOj8T0VXVTb/EgaXO9mO3Mz8+a2axmGiHg08MvMXNX33H5dvdYW/jxHK/pvaBART+idStc5Nc1RRDycZofuUb2Gk4h4CvDuzNyx1eKmKCK2yMw/9D1e2Pa1QIOKiNmZubrv8bwurwVb0xxFxFbA6zPz1X3PnQa8LzOzvcoqatAkSZIkqetaP4QnSZIkSWrYoElDFBEPioj7tV2HJEmSuqnqUxwj4vU0d216Z5ev3xoXEe+muUvg2zKzq3cL/LPa5gf+vD7aT4EPZOav265nULXNUW0Zgirn6EHAbbUs+F7b/EB9OaptjsxQ+cxQ2UrIUC13cVyXq2gWo5wPLGu5lmE4h2ax51rmrbb5YS1ro3VdbXN0DnVlCOqbo/cCP42IKnZyUN/8QH05qm2OzFD5zsEMlaz1DFV9BK3rIuJU4MzMvKrtWnRvEXES61hrpqOL7FbJDEmDM0fSYMyQJquWzr1WXwKOiYiH0qx2/rHMvLXlmqYsIo6hWUfndppD4WOZuXW7VU3Z9W0XoAmpKkNQT47cydEpVeXIDKkFZqhAJWeoiiNoEXE1sOUaT3f2G2ZNEbGA5nDrc4HPAv+WmT9tt6rJi4gfALtk5u1t1zIsvfXq/pZmYdBZwNaZ+Yl2q5o8M9QdteQoIta5qHtmnjudtQxD7RmCenJkhspkhrrDDI1eLUfQngd8AnhyLRfFAkTEtsABwD7A14Ddaebs08AT26tsyn4GVDM/PefRNGcPBTYAfkPzvdg1Zqg7qsjR+H9+a9vJ0WZdA6gyQ1BljsxQmcxQd5ihEauiQcvMpRHxXuApwIVt1zNEZwBnAsf376WIiLPbK2kgGwHXRMQ1vcdjmfniNgsagi0zc5eIOBP4Z+CStguaCjPUKbXlqIqdHBVnCOrLkRkqkBnqFDM0YlU0aACZ+dF1bYuIR2bmL6azniH59doOsWbmaW0UMwRva7uAERj/QbtpZt4REZ09Z9gMdUZtOapiJwdUmyGoL0dmqFBmqDPM0IhV06Ctx4eAp7ZdxBRsGBHbAz8GVgNk5l3tljSQ7wNvBv6KZkz/1m45Q/H5iHgL8IOIWEKzrkmNzFA5astRNTs51qOrGYL6cmSGuskMlcMMjdhMadBmtV3AFAVwft/jMWCblmoZhrOB/wd8DNiDZh2Q57ZZ0KD6935FxJdp1jWpkRkqR205mik7ObqaIagvR2aom8xQOczQiM2UBq31TniKDsrM744/iIg9W6xlGLbIzH/vfXxVRLyg1WqGICL2AQ4E5vY9vXdL5YySGSpHVTmaQTs5upohqC9HZqibzFA5zNCIzZQGrVMiYneaw8avjoh39Z6eDRwJbNdaYYO7X0RslZk3RMSDaS7E7Lp3AocCy9suRPeoOENQWY5m0E6Ozqk4R2ZI08IMdUOJGZopDVrXDosvB7YCNgYe0ntuNc2igF32ZuCKiLgVmAcc0nI9w/DDzLy87SKmgRkqR205mik7ObqWIag3R2aom8xQOczQiM2UBu2ytguYjMy8Frg2Iv5vZv6m7XqGJTMvAbaJiC0z8/dt1zMk50fEt4AfjT+RmQe1WM+omKFCVJijmbKTo1MZgnpzZIY6ywwVwgyN3qyxsS6f0ntvEbEXcBTNngoAMrOrd/whIvYH3kAznlk060x07qLSiDg1M4/sNTL3+obLzF1bKmsoIuJ7wNuBm8efy8yL26toMGaoXLXmKCJeChxGJTs5assQ1JMjM9QNZqhcZmj61HYE7d3Aq4BftV3IkLyeZtX5ro9n/ParL2y1itG4ITM/1XYRQ2SGylVrjl7BGjs5Oq62DEE9OTJD3WCGymWGpkltDdovM/PStosYov/KzKVtFzGozPxd78Nj19h0d0T8CjgtM4s573eS7oiIi2jWBBkDyMxj2i1pIGaoUBXnqLadHLVlCCrJkRnqDDNUKDM0fWpr0G6MiA9w71+Wz2i3pIHcHhFfAa6ijl/+7wf8FPgGsAj4W+BG4Fy6u37GhcCqtosYIjNUvtpyVNtOjtoyBPXlyAyVzQyVzwyNWG0N2s96f2/VahXDc2HbBQzZgsx8Ue/jiyPiq5n55oj4eqtVDeaFmfmMtosYIjNUvtpyVNtOjtoyBPXlyAyVzQyVzwyNWFUNWmYeHxHPAf66eZjnr+9rCvez9X9Kp2wWEY/LzOsj4nHAvIjYArh/24UNYHlE7Askza1zycwft1vS1JmhTqgtR1Xt5KgwQ1BfjsxQwcxQJ5ihEauqQYuIk4DHAt8EXhoRu2fmv7Rc1iAO7/09i+YH1c+Bru6dgGZhxo9FxENoLpR9OfC/gLe2WtVgHkRzMfO4MaCzd5syQ51QW46q2slRYYagvhyZoYKZoU4wQyNW2232F2fmk3ofzwKWZObOLZc1FBGxEfDpzNyv7VoGEREPAB4F/DQz/9hyOUPR22v0P2guAu70eiBmqBtqylFEfG2Np8a6fEvtmjME9eTIDJXLDHWDGRqt2W2++QhsGBHjY5rFGms0dNwcoHNrZvSLiOcDlwMfBV4dEW9qt6LBRcQ/AFcAxwBLIuIlLZc0KDNUuNpylJlPAV5Acxvqf2j7P8UhqDlDUEGOzFDxzFDhzNDoVXWKI/ApYHFELAF27j3urIj4Lc0Pplk0c/Wedisa2FE0d/u5CDgB+M/e3112FPDEzPxjRMwDLqP5gdVVZqh8VeWot5PjBJoFQreLiOMy0wwVpMIcmaGymaHymaERq6pBy8xTIuJi4HHAWZl5bds1DSIzH9J2DUO2KjNXRMRYZo5FxJ/aLmgIVo8f2s/M2yLizrYLGoQZ6oTaclTVTo7aMgRV5sgMFcwMdYIZGrEqGrSIODgzz+xdWDp+KPxvIqL1dQwGERGPB84GHgbcAByUmd9vt6qBfDMiPg48rLfGyXfbLmgI/isiTqG52PfJNOuCdI4Z6pTaclTFTo5aMwRV5sgMFcgMdYoZGrEqGjSaO8gAXN9qFcP3PuDgzPxBROwInAY8qeWaBvE2YBeahQCvz8wvtlzPMBwIHArsBVwHHN1uOVNmhrqjthxVsZODejME9eXIDJXJDHWHGRqxKhq0zLy49+FngfnASuBlwIdbK2o4ZmXmDwAy86qIWNl2QQP6cmbuRnPOci1W0ew5uqb3eBc6eOtcM9QpteWoip0cFWcI6suRGSqQGeoUMzRiVTRofT4LvJ/mTizXAWcAz2y1osGsioi/B75B09GvaLmeQd0UEa/k3utMfLXdkgb2eWBLmj1/43eb6lyD1scMla+2HFWxk6NPbRmC+nJkhspmhspnhkastgZtE+CLwKsyc/+IeHrbBQ3oIOCdwMk0P6Re1m45A/sDsGPvDzTNTJcDDfDgzNy17SKGyAyVr7Yc1baTo7YMQX05MkNlM0PlM0MjVluDthHwSuB7EfFXwKYt1zOoPwJnZOYlEXEkcEvbBQ0iMw9c2/MR8f7MPHy66xmS6yNi68z8TduFDIkZKlyFOaptJ0dtGYLKcmSGimeGCmeGRq+2har/BdgaeCvwVJqAd9kngY17H99Eh2+bux7RdgED2A34ZUT8tven642aGequrubo+ojYuu0ihqi2DMHMyZEZKoMZ6i4zNCRVHUHLzMUR8RNgM+CCtusZgk0z80sAmfnxiDi47YJ0b5m5sO0ahskMqQXjOzmW9R6PZWZR/1FORoUZAnNUOjNUPjNUtuIyVFWDFhGnA88Gfss955AWdchyku6KiL2AJcDf0bsQU+3rrftx6toW0OzdQvfwzDx0+isbjBnSdKttJ0eFGQJzVDQz1AlmqGAlZqiqBo3mm/5/ZGYt3/gH01xU+j6ai0o79wt/xY4BToiInWjuYvQ7mtsC7wB8B3hTi7UNwgxpWtS6k4P6MgTmqEhmqFPMUIFKzlBtDdpSYC5we9uFDENmLgX2W/P5Dl+EuS6z2i5gsjLzJuCIiJgHLKK5+8+NwCsy80+tFjcYM9RdXctRrTs5qsoQzKgcmaEymKHuMkNDUluD9gjgFxGxtPd4rLS7sgxJJy/C7DUzz6b5wQtAZn4YeEZrRQ0oM28DLmm7jiEyQ4WrJUcV7+SYKRmCjubIDBXPDBXODI1ebQ3ai9ouQPfpfOA3NOtMQHNeOZl5d2sVaU1mqHxV5ajCnRxmqHxmqGxmqHxmaMRqa9BWAm8DHgR8Brga+EWrFanf7Mx8SdtF6D6ZofKZo7KZofKZobKZofKZoRGrrUE7AzgFeDPNCuDn0hyyVBmujoidgau4Z2/LXe2WpDWYofKZo7KZofKZobKZofKZoRGrbaHq+2XmZTTnKydwZ9sFjUjXLsIctwfNYo3X01yMeX275WgtzFD5zFHZZkqGoLs5MkNlM0PlM0MjVtsRtDsj4pnABhGxiI6HupaLMMdl5g4AEbEFcFNmjrVckv6SGSqcOSpeVRmC+nJkhopnhgpnhkavtgbtEJp1JrYE/gU4rN1yBlbVRZgR8WTgdGAD4DMR8YvMPKvlsnRvZqhw5qh4tWUIKsuRGSqeGSqcGRq92hq0Z2XmC8cfRMQraBYF7KraLsI8AXgy8DngRGAxYKDLYobKZ47KVluGoL4cmaGymaHymaERq6JBi4gXAc8FnhIRT+09PRt4PN0OdW0XYa7OzJsiYiwz74yI29ouSA0z1CnmqEAVZwjqy5EZKpAZ6hQzNGJVNGjARcBvgS2AD/aeWw38tLWKhmMPYJ++x2PANi3VMgxLI+IkYIuIOBpvm1sSM9Qd5qhMtWYI6suRGSqTGeoOMzRis8bG6rmuLyKenZlf6Xt8ZGae2mZNw1DLRZgRMQc4mGZv2I+AD3b1/OtamaHymaOy1ZohqCdHZqhsZqh8Zmj0amvQLgWWAO8FzgT+kJkHtVvV1K15ESbQ6YswI2L/NZ66G/hVZn6zjXr0l8xQ+cxR2WrLENSXIzNUNjNUPjM0erWtg7YX8Aiau+Rc3PVAc89FmDfQXIR5RLvlDOyFwEHAtsBLgSOBEyPi3a1WpX5mqHzmqGy1ZQjqy5EZKpsZKp8ZGrHaGrS3Ao+l+WY5tHfBaZetzsybaBZrvBPo+kWYGwJPzcw30PwAvi0znwzs3G5Z6mOGymeOylZbhqC+HJmhspmh8pmhEautQZsD7J6ZnwL2BF7QbjkDq+0izC1oQk3v7wf2Pt64nXK0FmaofOaobLVlCOrLkRkqmxkqnxkasaoatMx8HbBHRBwCPBzo+poTh9GE+JvAH2kuyOyy02huNft54PvA6RFxDM2dm1QAM9QJ5qhgFWYI6suRGSqYGeoEMzRitd0k5ETgYTTnxJ5Ks9hhZw+N13gRZu8ORo8BlmbmHyJig8xc1XZdapihbjBH5aotQ1BnjsxQucxQN5ih0arqCBqwW2buD/wxM88FHt12QQOq4iLMiPhARGwHkJl/yMxvZ+Yfepu3i4gP3seXa3qZoUKZo86oLUNQSY7MUGeYoUKZoelTy0LV4+ZExFxgLCI2ALreyY9fhLk6ImYDF2bmsyLiirYLm6RjgBMiYicggd8BmwM7At8B3tRibbo3M1Quc9QNtWUI6smRGeoGM1QuMzRNamvQ3gV8D1gAfLv3uMvGL8JcQYcvwuzdueiIiJgHLAK2BG4EXpmZf2q1OK3JDBXKHHVGbRmCSnJkhjrDDBXKDE2fqq5BA4iI+TTnxP4sM3/fdj2DiIj/A7wO+CHwOODtwNbAppn5xjZrU73MkDSYmjIE5kjTzwxppquiQYuIDwCnZua1a9m2I3B4Zh46/ZUNzoswNR3MkDSYmjME5kijZ4ake9TSoD2QZpX2dZ0Te2xmLmuvwslZzw+pHYAjuvxDSuUxQ9JgassQmCNNLzMk3aOKBm3cWs6JXdLFc2Jr/CGlbjBD0mBqyRCYI7XDDEmVNWi1qemHlNQGMyQNzhxJgzFDmiwbNEmSJEkqRG0LVUuSJElSZ9mgSZIkSVIhbNAkSSMTEQ+IiC+0XcewRMSkrguIiH+NiOf2Pv7aaKqSJNVkTtsFSJKqNp/mjmUzUma+pe/hnm3VIUnqDhs0SdIovQ/YOiLOA36UmccARMSHgIuAZwOrgccDDwD+LTM/EhH3B04DtgM2AN6WmZ9Y88Uj4iTgBcDvgd8CFwCX917798CdwPOAs4CHAVsDXwf2B/YAjgFuB7YFrgFe3PucyzPzUb33OA4gM4/re9+H9l5zc+AhwCcy8+iIOAB4Kc3d2r44/lrA3/S+7tvAGcDTMvPFveeOBe7MzLdN8t9WklQhT3GUJI3SK4DfAK8BXhQRsyJiU+BpwPipjw8DdgWeCrwzIrYC3gR8LzOfCDwZeGNEbNP/whGxD7Ab8NfA3sAT+jcDL8nMpwPPAa7KzF2AxwK70GuYeu97JE2D9gjgmRMc14tomrJFwPbAERGxZd94h7+6YgAAAr5JREFUnjDejAJk5it6f+8MfAp4WkTcPyJmAf8EfGSC7ytJqpwNmiRp5DLzv4Cf0zRbzwO+nJkreps/lJl3Z+avgcU0TdfTgcMi4iqaI16b0jRi/fYCPp2Zd2Xmcu5p+ABuzMyf9977E8AlEfEq4N+BLYD79z7v2sz8dWauBn4EPHCC43kn8MuI+BfgvcBGvRoBrszMlffxtX8ELgSe3xvrTzPzNxN5X0lS/TzFUZI0Xc6mOYXwEcBxfc/3NzOze483oDkCdiVARDwYuGmN11vFunc03jH+QUT8M81pkGcAl9KcNjmrt/nOvq8Z6z0/1rcdYEPg7v4Xj4hTgG2Aj9M0hk/v+5o7WL+zaY4S/hdwzgQ+X5I0Q3gETZI0Siu5Z2fgZ2lObdwqM7/d9zn/2Dv18ZHAzsA3gMuAwwEi4iHA1TSNXb9LgOdHxEYRsRnw9zTN1Zr2Aj6YmR/rbd+RpgFcl5uB+RGxICI2Bp61jtd8R2Z+Bng48ND1vCbAqoiYA5CZ36A5FfIp3PvInyRphrNBkySN0u9oTgX8WmbeAXwLWPNmH5sA/wl8GTgkM/8AHA/cLyKupWnWXpeZP42IrXunPZKZF9Kc/vj93tf+hrUfvXoPcGxEXAmcDlwBPHpdBWfmLcA7gO/SHHH7zlo+7STgIxHxPeC1vfrX+Zo95wM/iIi5vcefBy7rO9VTkiRmjY1NakkXSZImrXczjHk0DdrTMvOG3vPn0Nwx8ZxJvNZZmfl/ImIXYGFmnhsRG/Ze+6DMvHroAxii3r/FRjRHAF81fhqnJEngETRJ0vT4W5qbhJwx3pxNRe8OkF/qPUyaO0P+ALgS+GTpzVnPVsANwBKbM0nSmjyCJkmSJEmF8AiaJEmSJBXCBk2SJEmSCmGDJkmSJEmFsEGTJEmSpELYoEmSJElSIWzQJEmSJKkQ/x9u8zkDtYqKlwAAAABJRU5ErkJggg==\n", - "text/plain": [ - "<matplotlib.figure.Figure at 0x10f68a668>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "\n", - "df.groupby(['granularity','type'],as_index=False).mean()[['granularity',\"type\",'c1_val','c2_val','c3_val','c4_val']].plot.bar(x=['type','granularity'],subplots=True,figsize=(15,15))\n", - "plt.savefig(\"granularity.pdf\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Selon la granularité et la mesure" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "ExecuteTime": { - "end_time": "2018-04-17T21:37:33.410198Z", - "start_time": "2018-04-17T21:37:33.303665Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>granularity</th>\n", - " <th>mesure</th>\n", - " <th>c1_val</th>\n", - " <th>c2_val</th>\n", - " <th>c3_val</th>\n", - " <th>c4_val</th>\n", - " <th>mesureL</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>29</th>\n", - " <td>3.0</td>\n", - " <td>3.0</td>\n", - " <td>1.0</td>\n", - " <td>0.600000</td>\n", - " <td>0.400000</td>\n", - " <td>0.400000</td>\n", - " <td>GED</td>\n", - " </tr>\n", - " <tr>\n", - " <th>30</th>\n", - " <td>3.0</td>\n", - " <td>5.0</td>\n", - " <td>1.0</td>\n", - " <td>0.733333</td>\n", - " <td>0.666667</td>\n", - " <td>0.666667</td>\n", - " <td>HED</td>\n", - " </tr>\n", - " <tr>\n", - " <th>33</th>\n", - " <td>3.0</td>\n", - " <td>8.0</td>\n", - " <td>1.0</td>\n", - " <td>0.828571</td>\n", - " <td>0.371429</td>\n", - " <td>0.828571</td>\n", - " <td>BOWSE</td>\n", - " </tr>\n", - " <tr>\n", - " <th>28</th>\n", - " <td>3.0</td>\n", - " <td>2.0</td>\n", - " <td>1.0</td>\n", - " <td>0.833333</td>\n", - " <td>0.500000</td>\n", - " <td>0.833333</td>\n", - " <td>VEO</td>\n", - " </tr>\n", - " <tr>\n", - " <th>27</th>\n", - " <td>3.0</td>\n", - " <td>1.0</td>\n", - " <td>1.0</td>\n", - " <td>0.900000</td>\n", - " <td>0.475000</td>\n", - " <td>0.825000</td>\n", - " <td>MCS</td>\n", - " </tr>\n", - " <tr>\n", - " <th>34</th>\n", - " <td>3.0</td>\n", - " <td>9.0</td>\n", - " <td>1.0</td>\n", - " <td>0.900000</td>\n", - " <td>0.525000</td>\n", - " <td>0.825000</td>\n", - " <td>BOC</td>\n", - " </tr>\n", - " <tr>\n", - " <th>11</th>\n", - " <td>1.0</td>\n", - " <td>3.0</td>\n", - " <td>1.0</td>\n", - " <td>0.920000</td>\n", - " <td>0.360000</td>\n", - " <td>0.200000</td>\n", - " <td>GED</td>\n", - " </tr>\n", - " <tr>\n", - " <th>35</th>\n", - " <td>3.0</td>\n", - " <td>10.0</td>\n", - " <td>1.0</td>\n", - " <td>1.000000</td>\n", - " <td>1.000000</td>\n", - " <td>1.000000</td>\n", - " <td>JACCARD</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " granularity mesure c1_val c2_val c3_val c4_val mesureL\n", - "29 3.0 3.0 1.0 0.600000 0.400000 0.400000 GED\n", - "30 3.0 5.0 1.0 0.733333 0.666667 0.666667 HED\n", - "33 3.0 8.0 1.0 0.828571 0.371429 0.828571 BOWSE\n", - "28 3.0 2.0 1.0 0.833333 0.500000 0.833333 VEO\n", - "27 3.0 1.0 1.0 0.900000 0.475000 0.825000 MCS\n", - "34 3.0 9.0 1.0 0.900000 0.525000 0.825000 BOC\n", - "11 1.0 3.0 1.0 0.920000 0.360000 0.200000 GED\n", - "35 3.0 10.0 1.0 1.000000 1.000000 1.000000 JACCARD" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d_pc=df.groupby(['granularity','mesure'],as_index=False).mean()[['granularity','mesure','c1_val','c2_val','c3_val','c4_val']]\n", - "df_is_pareto = d_pc.apply(lambda row: is_pareto_front(d_pc,row, ['c1_val','c2_val','c3_val','c4_val']), axis=1)\n", - "df_pareto = d_pc.ix[df_is_pareto].sort_values(by=['c1_val','c2_val','c3_val','c4_val'])\n", - "df_pareto[\"mesureL\"]=df_pareto[\"mesure\"].apply(lambda x:df_mesure[df_mesure.id==int(x)].values[0][-1])\n", - "\n", - "df_pareto" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.5" - }, - "toc": { - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "toc_cell": false, - "toc_position": {}, - "toc_section_display": "block", - "toc_window_display": false - }, - "varInspector": { - "cols": { - "lenName": 16, - "lenType": 16, - "lenVar": 40 - }, - "kernels_config": { - "python": { - "delete_cmd_postfix": "", - "delete_cmd_prefix": "del ", - "library": "var_list.py", - "varRefreshCmd": "print(var_dic_list())" - }, - "r": { - "delete_cmd_postfix": ") ", - "delete_cmd_prefix": "rm(", - "library": "var_list.r", - "varRefreshCmd": "cat(var_dic_list()) " - } - }, - "types_to_exclude": [ - "module", - "function", - "builtin_function_or_method", - "instance", - "_Feature" - ], - "window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/run_automatic_annotation.py b/run_automatic_annotation.py index c1e2d45c8ac4b3d88c2847c09673fc3627182ba2..22632827084bc298328a26471a7e33604501a132 100644 --- a/run_automatic_annotation.py +++ b/run_automatic_annotation.py @@ -1,6 +1,9 @@ # coding = utf-8 import os, re, argparse, json,sys, subprocess, glob +import logging +for _ in ("boto", "elasticsearch", "urllib3", "sklearn"): + logging.getLogger(_).setLevel(logging.CRITICAL) parser=argparse.ArgumentParser() @@ -13,10 +16,10 @@ parser.add_argument("outputAnnotation2_dir") args=parser.parse_args() print("Generating Annotation File") -# process=subprocess.run(["python3","generate_annotation_file.py",args.simMatrixInputDir,args.selectedInputFile,args.outputAnnotation_dir]) -# -# if process.returncode == 1: -# raise subprocess.CalledProcessError("The process did not end well !") +process=subprocess.run(["python3","generate_annotation_file.py",args.simMatrixInputDir,args.selectedInputFile,args.outputAnnotation_dir]) + +if process.returncode == 1: + raise subprocess.CalledProcessError("The process did not end well !") fns=glob.glob(os.path.join(args.outputAnnotation_dir,"*.csv")) @@ -25,12 +28,7 @@ if not os.path.exists(args.outputAnnotation2_dir): os.makedirs(args.outputAnnotation2_dir) for fn in fns: print("Processing {0}...".format(fn)) - if os.path.basename(fn).split("_")[-2] in ["extension","gen"]: - graph_dir = "_".join(os.path.basename(fn).split("_")[-2:]).replace(".csv", "") - else: - graph_dir = os.path.basename(fn).split("_")[-1].replace(".csv", "") - print(fn,graph_dir) - print(["python3","auto_fill_annotation.py",fn,os.path.join(args.graphDataDir,graph_dir),os.path.join(args.outputAnnotation2_dir,os.path.basename(fn))]) - process=subprocess.run(["python3","auto_fill_annotation.py",fn,os.path.join(args.graphDataDir,graph_dir),os.path.join(args.outputAnnotation2_dir,os.path.basename(fn))]) + print(["python3","auto_fill_annotation.py",fn,os.path.join(args.graphDataDir),os.path.join(args.outputAnnotation2_dir,os.path.basename(fn))]) + process=subprocess.run(["python3","auto_fill_annotation.py",fn,os.path.join(args.graphDataDir),os.path.join(args.outputAnnotation2_dir,os.path.basename(fn))]) if process.returncode == 1: raise subprocess.CalledProcessError(process,"The process did not end well !") \ No newline at end of file diff --git a/strpython/eval/automatic_annotation.py b/strpython/eval/automatic_annotation.py index 7c681bcde9a1ebd7f9b3734ccff49db34f6b202c..74099153ad9afda59ceedf76df1d70d1b39565fe 100644 --- a/strpython/eval/automatic_annotation.py +++ b/strpython/eval/automatic_annotation.py @@ -44,7 +44,7 @@ class AnnotationAutomatic(object): return True return False - def criterion3(self, str1 :STR , str2: STR): + def criterion3(self, str1 :STR , str2: STR,th=0.2): """ Return True if one or multiple cluster of spatial entities have been found in both STR. Cluster are constructed based on low distance between spatial entities. The clustering method used is Mean-Shift as @@ -54,8 +54,33 @@ class AnnotationAutomatic(object): :return: """ try: - return str1.get_cluster().intersects(str2.get_cluster()).any() - except: + c1=str1.get_cluster() + c2=str2.get_cluster() + c1["area"]=c1.area + c2["area"] = c2.area + c1=c1.sort_values(by="area",ascending=False) + c2=c2.sort_values(by="area",ascending=False) + for ind,rows in c1.iterrows(): + for ind2,rows2 in c2.iterrows(): + if rows.geometry.intersects(rows2.geometry): + #print(gpd.GeoDataFrame(geometry=[rows.geometry])) + inter = gpd.overlay( + gpd.GeoDataFrame(geometry=[rows.geometry]), + gpd.GeoDataFrame(geometry=[rows2.geometry]), + how="intersection", + use_sindex=False + ) + a1,a2=c1.area.sum(),c2.area.sum() + ia=inter.area.sum() + if a1 < a2 and ia/a1 >= th: + return True + elif a1 < a2 and ia/a2 >= th: + return True + + return False + + except Exception as e: + print(e) return False def criterion4(self, str1, str2): diff --git a/strpython/eval/stats.py b/strpython/eval/stats.py index 86e92f19f734827f1d60eb81dec9f17bd17f1be3..40cc4bd321e4eb963accce9366b5465de04a391b 100644 --- a/strpython/eval/stats.py +++ b/strpython/eval/stats.py @@ -30,5 +30,5 @@ def granularity(graph): """ class_list = flattern([get_data(n)["class"] for n in list(graph.nodes())]) if not class_list: - return [] + return "P-PPL" return most_common(class_list) diff --git a/strpython/nlp/bow_se.py b/strpython/nlp/bow_se.py deleted file mode 100644 index 1a76830582762d364bc439579a91a418cec5d7a2..0000000000000000000000000000000000000000 --- a/strpython/nlp/bow_se.py +++ /dev/null @@ -1,89 +0,0 @@ -# coding = utf-8 - -"""Weisfeiler_Lehman GEO graph kernel. - -""" - -import networkx as nx -import numpy as np - - -class BOWSE(object): - __type__ = "sim" - __depreciated__ = True - - @staticmethod - def compare(graph_list, selected,verbose=False): - """Compute the all-pairs kernel values for a list of graphs. - This function can be used to directly compute the kernel - matrix for a list of graphs. The direct computation of the - kernel matrix is faster than the computation of all individual - pairwise kernel values. - Parameters - ---------- - graph_list: list - A list of graphs (list of networkx graphs) - Return - ------ - K: numpy.array, shape = (len(graph_list), len(graph_list)) - The similarity matrix of all graphs in graph_list. - """ - - n = len(graph_list) - k = [0] - n_nodes = 0 - n_max = 0 - - inclusion_dictionnary = {} - - # Compute adjacency lists and n_nodes, the total number of - # nodes in the dataset. - for i in range(n): - n_nodes += graph_list[i].number_of_nodes() - - # Computing the maximum number of nodes in the graphs. It - # will be used in the computation of vectorial - # representation. - if n_max < graph_list[i].number_of_nodes(): - n_max = graph_list[i].number_of_nodes() - - phi = np.zeros((n_nodes, n), dtype=np.uint64) - if verbose: print(inclusion_dictionnary) - # INITIALIZATION: initialize the nodes labels for each graph - # with their labels or with degrees (for unlabeled graphs) - - labels = [0] * n - label_lookup = {} - label_counter = 0 - - # label_lookup is an associative array, which will contain the - # mapping from multiset labels (strings) to short labels - # (integers) - for i in range(n): - nodes = list(graph_list[i].nodes) - # It is assumed that the graph has an attribute - # 'node_label' - labels[i] = np.zeros(len(nodes), dtype=np.int32) - - for j in range(len(nodes)): - if not (nodes[j] in label_lookup): - label_lookup[nodes[j]] = str(label_counter) - labels[i][j] = label_counter - label_counter += 1 - else: - labels[i][j] = label_lookup[nodes[j]] - # labels are associated to a natural number - # starting with 0. - - phi[labels[i][j], i] += 1 - - graph_list[i] = nx.relabel_nodes(graph_list[i], label_lookup) - k = np.dot(phi.transpose(), phi).astype(np.float64) - - # Compute the normalized version of the kernel - k_norm = np.zeros(k.shape) - for i in range(k.shape[0]): - for j in range(k.shape[1]): - k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j]) - - return k_norm diff --git a/synthesize_result.py b/synthesize_result.py new file mode 100644 index 0000000000000000000000000000000000000000..1f871273586602060cd4ba93851c50ed3a88fd48 --- /dev/null +++ b/synthesize_result.py @@ -0,0 +1,20 @@ +# coding = utf-8 +import pandas as pd +import numpy as np +import glob,argparse + +fns=glob.glob("data/agromada_annotation_data_final/*") + +data=[] +for fn in fns: + df=pd.read_csv(fn) + mes=np.unique(df.sim_measure)[0] + type_=np.unique(df.type_str)[0] + val=df.groupby("G1").mean().mean()["c1 c2 c3 c4".split()].values.tolist() + val.insert(0,type_) + val.insert(0,mes) + data.append(val) +data +pd.DataFrame(data,columns="mesure type c1 c2 c3 c4".split()) +res=pd.DataFrame(data,columns="mesure type c1 c2 c3 c4".split()) +res.to_csv('result_mada.csv') \ No newline at end of file