import numpy as np
import networkx as nx
import matplotlib as mpl
import matplotlib.pyplot as plt
from geopy.geocoders import Nominatim
from mpl_toolkits.basemap import Basemap as Basemap
from operator import itemgetter
from collections import OrderedDict
import pandas as pd
import seaborn as sns
import matplotlib.pylab as plt
import os

"""
0	Deal ID
1	Top parent companies
2	Location 1: Location
3	Location 1: Latitude
4	Location 1: Longitude
5	Location 1: Target country
6	Intention of the investment
7	Nature of the deal
8	Negotiation status
9	Operating company: Investor ID
10	Operating company: Name
11	Operating company: Country of registration/origin
"""



def lmiProjection():
    deals_path = "D:\\Mes Donnees\\Land Matrix\\export_transnational\\deals.csv"

    outp = ".\\data_net\\WebsiteLandMatrix_Projection_allTransnational_wSurf_wStatus.csv"

    df_deals = pd.read_csv(deals_path,sep= ";")

    df_proj = df_deals[['Deal ID','Top parent companies','Location 1: Location','Location 1: Latitude','Location 1: Longitude','Location 1: Target country','Intention of the investment','Nature of the deal','Negotiation status','Operating company: Investor ID','Operating company: Name','Operating company: Country of registration/origin','Deal size','Current implementation status']]

    df_proj.to_csv(outp,sep= ";")


def getParentVals(line):
    out = []
    if '|' in line:
        vals = line.split('|')
        for v in vals:
            out.append(v.split('#'))
    else:
        out.append(line.split('#'))
    return out

def stats():
    input_path = "WebsiteLandMatrix_Projection.csv"
    input = open(input_path,'r')

    target_countries = set()
    parent_company_countries = set()
    parent_companies = set()
    operating_companies = set()

    parent_deals = {}
    op_deals = {}

    parent_names = {}
    op_names = {}

    next(input)
    for line in input:
        vals = line.split(';')
        #if len(vals)==12: # FIX punti e virgola a cazzo di cane
        parent_vals =  getParentVals(vals[1])
        if parent_vals is not None:  #skip linee vuote
            for pc in parent_vals:
                if len(pc)==3:
                    parent_c = pc[-1]
                    parent_id = int(pc[-2])
                    parent_company_countries.add(parent_c)
                    parent_companies.add(parent_id)
                    if parent_id not in parent_deals:
                        parent_deals[parent_id] = 1
                        parent_names[parent_id] = pc[0]
                    else:
                        parent_deals[parent_id] += 1
            target_c = vals[5]
            target_countries.add(target_c)
            operating_companies.add(vals[9])
            op_id = int(vals[9])
            if op_id not in op_deals:
                op_deals[op_id] = 1
                op_names[op_id] = vals[10]
            else:
                op_deals[op_id] +=1



    print(len(target_countries),len(parent_company_countries),len(parent_companies))

    x = []
    ones = 0
    for p in parent_deals:
        x.append(parent_deals[p])
        if parent_deals[p]==1:
            ones+=1


    print(len(parent_deals),np.mean(x),np.std(x),np.max(x),ones)

    #print(sorted( ((v,k) for k,v in parent_deals.items()), reverse=True))

    fo_p = open('sorted_parent_companies.csv','w')
    fo_p.write("Parent Company Name;Parent Company Id;#deals\n")
    for (v,k) in sorted( ((v,k) for k,v in parent_deals.items()), reverse=True):
       fo_p.write("%s;%d;%d\n" % (parent_names[int(k)],int(k),v))


    x = []
    ones = 0
    for p in op_deals:
        x.append(op_deals[p])
        if op_deals[p]==1:
            ones+=1

    print(len(op_deals),np.mean(x),np.std(x),np.max(x),ones)

    #print(sorted( ((v,k) for k,v in op_deals.items()), reverse=True))

    fo_op = open('sorted_operating_companies.csv','w')
    fo_op.write("Operating Company Name;Operating Company Id;#deals\n")
    for (v,k) in sorted( ((v,k) for k,v in op_deals.items()), reverse=True):
      fo_op.write("%s;%d;%d\n" % (op_names[int(k)],int(k),v))

def parent_to_target_country_network():
    input_path = "WebsiteLandMatrix_Projection.csv"
    input = open(input_path,'r')

    fout = open("parent_company_country_to_target_country_network.ncol",'w')
    fout_n = open("parent_company_country_to_target_country_network_wNames.txt", 'w')
    fout_dict = open("ids_to_countries.csv",'w')
    countries_to_ids = {}

    ids_to_countries = {}

    G = nx.DiGraph()

    ids = 0

    next(input)
    for line in input:
        vals = line.split(';')
        #if len(vals)==12: # FIX punti e virgola a cazzo di cane
        parent_vals =  getParentVals(vals[1])
        if parent_vals is not None:  #skip linee vuote

            ids_parents = []
            for pc in parent_vals:
                if len(pc)==3:
                    parent_country = pc[-1].strip().replace('"','')
                    if parent_country != '':  #skyppo country vuoti
                        if parent_country not in countries_to_ids:
                            countries_to_ids[parent_country]=ids
                            ids_to_countries[ids]=parent_country
                            if ids==65:
                                print(pc)
                            ids+=1
                        ids_parents.append(countries_to_ids[parent_country])
                    else:
                        print("Empty Parent Country: ",pc)

            target_c = vals[5].strip().replace('"','')
            if target_c not in countries_to_ids:
                countries_to_ids[target_c]=ids
                ids_to_countries[ids] = target_c
                ids+=1
            for pc in ids_parents:
                u = int(pc)
                v = int(countries_to_ids[target_c])
                w = 1
                if G.has_edge(u, v):
                    w+=G[u][v]['weight']
                    G[u][v]['weight'] = w
                else:
                    G.add_edge(u, v, weight=w)

    for e in G.edges:
        fout.write("%d;%d;%d\n" % (e[0],e[1],G[e[0]][e[1]]['weight']))
        fout_n.write("%s;%s;%d\n" % (ids_to_countries[e[0]],ids_to_countries[e[1]],G[e[0]][e[1]]['weight']))

    for id in ids_to_countries:
        fout_dict.write("%s;%s\n" % (id,ids_to_countries[id]))

    fout.close()
    fout_n.close()
    fout_dict.close()
    print("#Edges",G.size())
    print("#Nodes",G.order())

def parent_to_target_country_network_Surface():
    input_path = "WebsiteLandMatrix_Projection_wSurface.csv"
    input = open(input_path,'r')

    fout = open("parent_company_country_to_target_country_network_wSurface.ncol",'w')
    #fout_n = open("parent_company_country_to_target_country_network_wNames.txt", 'w')
    fout_dict = open("ids_to_countries_wSurface.csv",'w')
    countries_to_ids = {}

    ids_to_countries = {}

    G = nx.DiGraph()

    ids = 0

    next(input)
    for line in input:
        vals = line.split(';')
        #if len(vals)==12: # FIX punti e virgola a cazzo di cane
        parent_vals =  getParentVals(vals[1])
        if parent_vals is not None:  #skip linee vuote

            ids_parents = []
            for pc in parent_vals:
                if len(pc)==3:
                    parent_country = pc[-1].strip().replace('"','')
                    if parent_country != '':  #skyppo country vuoti
                        if parent_country not in countries_to_ids:
                            countries_to_ids[parent_country]=ids
                            ids_to_countries[ids]=parent_country
                            if ids==65:
                                print(pc)
                            ids+=1
                        ids_parents.append(countries_to_ids[parent_country])
                    else:
                        print("Empty Parent Country: ",pc)

            target_c = vals[5].strip().replace('"','')
            if target_c not in countries_to_ids:
                countries_to_ids[target_c]=ids
                ids_to_countries[ids] = target_c
                ids+=1
            for pc in ids_parents:
                u = int(pc)
                v = int(countries_to_ids[target_c])
                w = int(vals[12])
                if G.has_edge(u, v):
                    w+=G[u][v]['weight']
                    G[u][v]['weight'] = w
                else:
                    G.add_edge(u, v, weight=w)

    for e in G.edges:
        fout.write("%d;%d;%d\n" % (e[0],e[1],G[e[0]][e[1]]['weight']))
        #fout_n.write("%s;%s;%d\n" % (ids_to_countries[e[0]],ids_to_countries[e[1]],G[e[0]][e[1]]['weight']))

    for id in ids_to_countries:
        fout_dict.write("%s;%s\n" % (id,ids_to_countries[id]))

    fout.close()
    #fout_n.close()
    fout_dict.close()
    print("#Edges",G.size())
    print("#Nodes",G.order())


def parent_to_target_country_network_allTransnational():
    input_path = "D:\\Mes Donnees\\Land Matrix\\export_transnational\\deals.csv"

    df = pd.read_csv(input_path,sep=';',low_memory=False)

    fout = open(".\\data_net\\net_allTransnational.ncol",'w')
    fout_surf = open(".\\data_net\\net_allTransnational_surf.ncol",'w')

    #fout_n = open("parent_company_country_to_target_country_network_wNames.txt", 'w')
    fout_dict = open(".\\data_net\\ids_to_countries_allTransnational.csv",'w')
    countries_to_ids = {}

    ids_to_countries = {}

    G = nx.DiGraph()
    G_surf = nx.DiGraph()

    ids = 0

    for index,row in df.iterrows():
        if not pd.isnull(row["Top parent companies"]):  # skip linee vuote
            parent_vals = getParentVals(row["Top parent companies"])
            ids_parents = []
            for pc in parent_vals:
                if len(pc)==3:
                    parent_country = pc[-1].strip().replace('"','')
                    if parent_country != '':  #skyppo country vuoti
                        if parent_country not in countries_to_ids:
                            countries_to_ids[parent_country]=ids
                            ids_to_countries[ids]=parent_country
                            if ids==65:
                                print(pc)
                            ids+=1
                        ids_parents.append(countries_to_ids[parent_country])
                    else:
                        print("Empty Parent Country: ",pc)
            target_c = row["Location 1: Target country"].strip().replace('"','')
            if target_c not in countries_to_ids:
                countries_to_ids[target_c]=ids
                ids_to_countries[ids] = target_c
                ids+=1
            for pc in ids_parents:
                u = int(pc)
                v = int(countries_to_ids[target_c])
                w = 1
                w_surf = int(row["Deal size"])

                if G.has_edge(u, v):
                    w+=G[u][v]['weight']
                    G[u][v]['weight'] = w
                    w_surf += G_surf[u][v]['weight']
                    G_surf[u][v]['weight'] = w_surf
                else:
                    G.add_edge(u, v, weight=w)
                    G_surf.add_edge(u, v, weight=w_surf)

    for e in G.edges:
        fout.write("%d;%d;%d\n" % (e[0],e[1],G[e[0]][e[1]]['weight']))
    for e in G_surf.edges:
        fout_surf.write("%d;%d;%d\n" % (e[0], e[1], G_surf[e[0]][e[1]]['weight']))

    for id in ids_to_countries:
        fout_dict.write("%s;%s\n" % (id,ids_to_countries[id]))

    fout.close()
    fout_surf.close()
    fout_dict.close()
    print("#Edges",G.size())
    print("#Nodes",G.order())

def parent_to_target_country_network_allTransnational_inOperation():
    #input_path = "D:\\Mes Donnees\\Land Matrix\\export_transnational\\deals.csv"

    self_investors = set()
    #dataset_name = "mines_28092020"

    dataset_name = "global_07072021"

    #input_path = "D:\\Mes Donnees\\Land Matrix\\%s\\deals.csv" % dataset_name

    input_path = "D:\\Mes Donnees\\Land Matrix\\_LURKER\\LM_Transnational_InOperation_07072021\\deals.csv"


    df = pd.read_csv(input_path,sep=';',low_memory=False)

    fout = open(".\\data_net\\net_%s_inOperation.ncol" % dataset_name,'w')
    fout_surf = open(".\\data_net\\net_%s_surf_inOperation.ncol" % dataset_name,'w')

    #fout_n = open("parent_company_country_to_target_country_network_wNames.txt", 'w')
    fout_dict = open(".\\data_net\\ids_to_countries_%s_inOperation.csv" % dataset_name,'w')
    countries_to_ids = {}

    ids_to_countries = {}

    G = nx.DiGraph()
    G_surf = nx.DiGraph()

    ids = 0

    for index,row in df.iterrows():
        if not pd.isnull(row["Top parent companies"]):  # skip linee vuote
            if row["Current implementation status"]=="In operation (production)":
                parent_vals = getParentVals(row["Top parent companies"])
                ids_parents = []
                for pc in parent_vals:
                    if len(pc)==3:
                        parent_country = pc[-1].strip().replace('"','')
                        if parent_country != '':  #skyppo country vuoti
                            if parent_country not in countries_to_ids:
                                countries_to_ids[parent_country]=ids
                                ids_to_countries[ids]=parent_country
                                if ids==65:
                                    print(pc)
                                ids+=1
                            ids_parents.append(countries_to_ids[parent_country])
                        else:
                            print("Empty Parent Country: ",pc)
                target_c = row["Location 1: Target country"].strip().replace('"','')
                if target_c not in countries_to_ids:
                    countries_to_ids[target_c]=ids
                    ids_to_countries[ids] = target_c
                    ids+=1
                for pc in ids_parents:
                    u = int(pc)
                    v = int(countries_to_ids[target_c])
                    w = 1
                    w_surf = int(row["Deal size"])
                    if u!=v :
                        if G.has_edge(u, v):
                            w+=G[u][v]['weight']
                            G[u][v]['weight'] = w
                            w_surf += G_surf[u][v]['weight']
                            G_surf[u][v]['weight'] = w_surf
                        else:
                            G.add_edge(u, v, weight=w)
                            G_surf.add_edge(u, v, weight=w_surf)
                    else :
                        self_investors.add(u)

    for e in G.edges:
        fout.write("%d;%d;%d\n" % (e[0],e[1],G[e[0]][e[1]]['weight']))
    for e in G_surf.edges:
        fout_surf.write("%d;%d;%d\n" % (e[0], e[1], G_surf[e[0]][e[1]]['weight']))

    for id in ids_to_countries:
        fout_dict.write("%s;%s\n" % (id,ids_to_countries[id]))

    fout.close()
    fout_surf.close()
    fout_dict.close()

    fsi = open("self_investors.txt",'w')
    for si in self_investors:
        fsi.write("%d\n" % int(si))
    fsi.close()


    print("#Edges",G.size())
    print("#Nodes",G.order())

def parent_to_target_country_network_allTransnational_inOperation_NewLMFormat():
    #input_path = "D:\\Mes Donnees\\Land Matrix\\export_transnational\\deals.csv"

    lr_write = True #also writes lurkerrank inversed topology
    filter_sectors = True #network on specific intentions of investments
    self_investors = set()
    #dataset_name = "mines_28092020"

    dataset_name = "energy_17032022"
    sectors = ["Renewable Energy"]
    #sectors = ["Biofuels","Fodder","Food crops","Agriculture unspecified","Livestock","Non-food agricultural commodities"]

    #input_path = "D:\\Mes Donnees\\Land Matrix\\%s\\deals.csv" % dataset_name

    input_path = "D:\\Mes Donnees\\Land Matrix\\_LURKER\\LM_Transnational_InOperation_17032022\\deals.csv"
    investors_path = "D:\\Mes Donnees\\Land Matrix\\_LURKER\\LM_Transnational_InOperation_17032022\\investors.csv"

    df_inv = pd.read_csv(investors_path,index_col=0,sep=';')
    df = pd.read_csv(input_path,sep=';',low_memory=False)

    fout = open(".\\data_net\\net_%s_inOperation.ncol" % dataset_name,'w')
    fout_surf = open(".\\data_net\\net_%s_surf_inOperation.ncol" % dataset_name,'w')
    fout_lr = open(".\\data_net\\net_%s_inOperation_LR.ncol" % dataset_name,'w')
    fout_surf_lr = open(".\\data_net\\net_%s_surf_inOperation_LR.ncol" % dataset_name,'w')

    #fout_n = open("parent_company_country_to_target_country_network_wNames.txt", 'w')
    fout_dict = open(".\\data_net\\ids_to_countries_%s_inOperation.csv" % dataset_name,'w')
    countries_to_ids = {}

    ids_to_countries = {}

    G = nx.DiGraph()
    G_surf = nx.DiGraph()

    ids = 0

    for index,row in df.iterrows():
        if not pd.isnull(row["Top parent companies"]):  # skip linee vuote
            if row["Current implementation status"]=="In operation (production)":

                process_row = True
                if filter_sectors:
                    process_row = False
                    if not pd.isnull(row["Intention of investment"]):
                        curr_sectors = row["Intention of investment"].split('#')[-1].split(',')
                        for s in sectors:
                            if s in curr_sectors:
                                process_row=True
                                break
                    #except AttributeError:
                    #        print("Error on sector:",row["Intention of investment"])
                if process_row:
                    parent_vals = getParentVals(row["Top parent companies"])
                    ids_parents = []
                    for pc in parent_vals:
                        if len(pc)>1:
                            company_id = pc[1]

                            if not pd.isnull(df_inv.loc[int(company_id),"Country of registration/origin"]):  #skyppo country vuoti
                                parent_country = df_inv.loc[int(company_id),"Country of registration/origin"]
                                if parent_country not in countries_to_ids:
                                    countries_to_ids[parent_country]=ids
                                    ids_to_countries[ids]=parent_country
                                    #if ids==65:
                                    #    print(pc)
                                    ids+=1
                                ids_parents.append(countries_to_ids[parent_country])
                            else:
                                print("Empty Parent Country: ",pc)
                        else:
                            print("Error in parent company format (missing id?): ",pc)
                            #print("Empty Parent Country: ",pc)
                    target_c = row["Target country"].strip().replace('"','')
                    if target_c not in countries_to_ids:
                        countries_to_ids[target_c]=ids
                        ids_to_countries[ids] = target_c
                        ids+=1
                    for pc in ids_parents:
                        u = int(pc)
                        v = int(countries_to_ids[target_c])
                        w = 1
                        w_surf = int(row["Deal size"])
                        if u!=v :
                            if G.has_edge(u, v):
                                w+=G[u][v]['weight']
                                G[u][v]['weight'] = w
                                w_surf += G_surf[u][v]['weight']
                                G_surf[u][v]['weight'] = w_surf
                            else:
                                G.add_edge(u, v, weight=w)
                                G_surf.add_edge(u, v, weight=w_surf)
                        else :
                            self_investors.add(u)

    for e in G.edges:
        fout.write("%d;%d;%d\n" % (e[0],e[1],G[e[0]][e[1]]['weight']))
        if lr_write:
            fout_lr.write("%d;%d;%d\n" % (e[1],e[0],G[e[0]][e[1]]['weight']))
    for e in G_surf.edges:
        fout_surf.write("%d;%d;%d\n" % (e[0], e[1], G_surf[e[0]][e[1]]['weight']))
        if lr_write:
            fout_surf_lr.write("%d;%d;%d\n" % (e[1], e[0], G_surf[e[0]][e[1]]['weight']))

    for id in ids_to_countries:
        fout_dict.write("%s;%s\n" % (id,ids_to_countries[id]))

    fout.close()
    fout_surf.close()
    fout_dict.close()
    fout_lr.close()
    fout_surf_lr.close()
    fsi = open("self_investors.txt",'w')
    for si in self_investors:
        fsi.write("%d\n" % int(si))
    fsi.close()


    print("#Edges",G.size())
    print("#Nodes",G.order())


def parent_to_target_country_network_allTransnational_inOperation_withYear():
    #input_path = "D:\\Mes Donnees\\Land Matrix\\export_transnational\\deals.csv"

    dataset_name = "global"

    input_path = "D:\\Mes Donnees\\Land Matrix\\%s\\deals.csv" % dataset_name

    df = pd.read_csv(input_path,sep=';',low_memory=False)

    fout = open(".\\data_net\\net_%s_inOperation_dates.ncol" % dataset_name,'w')
    fout_surf = open(".\\data_net\\net_%s_surf_inOperation_dates.ncol" % dataset_name,'w')

    #fout_n = open("parent_company_country_to_target_country_network_wNames.txt", 'w')
    fout_dict = open(".\\data_net\\ids_to_countries_%s_inOperation.csv" % dataset_name,'w')
    countries_to_ids = {}

    ids_to_countries = {}

    G = nx.DiGraph()
    G_surf = nx.DiGraph()

    ids = 0

    for index,row in df.iterrows():
        if not pd.isnull(row["Top parent companies"]):  # skip linee vuote
            if row["Current implementation status"]=="In operation (production)":
                parent_vals = getParentVals(row["Top parent companies"])
                ids_parents = []
                for pc in parent_vals:
                    if len(pc)==3:
                        parent_country = pc[-1].strip().replace('"','')
                        if parent_country != '':  #skyppo country vuoti
                            if parent_country not in countries_to_ids:
                                countries_to_ids[parent_country]=ids
                                ids_to_countries[ids]=parent_country
                                if ids==65:
                                    print(pc)
                                ids+=1
                            ids_parents.append(countries_to_ids[parent_country])
                        else:
                            print("Empty Parent Country: ",pc)
                target_c = row["Location 1: Target country"].strip().replace('"','')
                if target_c not in countries_to_ids:
                    countries_to_ids[target_c]=ids
                    ids_to_countries[ids] = target_c
                    ids+=1
                for pc in ids_parents:
                    u = int(pc)
                    v = int(countries_to_ids[target_c])
                    w = 1
                    w_surf = int(row["Deal size"])

                    if G.has_edge(u, v):
                        w+=G[u][v]['weight']
                        G[u][v]['weight'] = w
                        w_surf += G_surf[u][v]['weight']
                        G_surf[u][v]['weight'] = w_surf
                    else:
                        G.add_edge(u, v, weight=w)
                        G_surf.add_edge(u, v, weight=w_surf, date=row["Contracts 1: Contract date"],year=str(row["Negotiation status"]).split('##')[0])

    for e in G.edges:
        fout.write("%d;%d;%d\n" % (e[0],e[1],G[e[0]][e[1]]['weight']))
    for e in G_surf.edges:
        fout_surf.write("%d;%d;%d;%s;%s\n" % (e[0], e[1], G_surf[e[0]][e[1]]['weight'],G_surf[e[0]][e[1]]['date'],G_surf[e[0]][e[1]]['year']))

    for id in ids_to_countries:
        fout_dict.write("%s;%s\n" % (id,ids_to_countries[id]))

    fout.close()
    fout_surf.close()
    fout_dict.close()
    print("#Edges",G.size())
    print("#Nodes",G.order())


def graph_stats_directed(path):
    #dataset_name = 'mines'
    #dataset_name = "agriculture+biofuel"
    #dataset_name = "global"

    #path = ".\\data_net\\net_%s_surf_inOperation.ncol" % dataset_name

    #path = "D:\\Mes Donnees\\SenegalTrajectories\\senegal_net_w_agg.ncol"



    #path = ".\\data_net\\net_allTransnational.ncol"
    G = nx.read_edgelist(path,delimiter=';',create_using=nx.DiGraph(), nodetype=int, data=(('weight',float),))
    print("#Edges",G.size())
    print("#Nodes",G.order())
    print("Transitivity",nx.transitivity(G))
    #print("Average CC", nx.average_clustering(G))
#    print("Connected",nx.is_connected(G))
 #   print("#Connected Components", nx.number_connected_components(G))
    print("Assortativity", nx.degree_assortativity_coefficient(G))

    sum=0
    for t in G.degree:
        sum+=t[1]
    sum/=len(G.degree)
    print("Avg Degree",sum)

    if (nx.is_weakly_connected(G)):
        print("Average Path Length",nx.average_shortest_path_length(G))
    else:
        print("Not weakly connected")


    tot = 0
    all = 0
    length = 0
    for u in G.nodes():
        for v in G.nodes():
            if v != u:
                if nx.has_path(G, u, v):
                    tot += 1
                    length+=nx.shortest_path_length(G,u,v)
                all += 1

    print(tot, all, tot / all, length/tot)

    count = 0

    for e in G.edges:
        if G.has_edge(e[1],e[0]) and e[0]!=e[1]:
            count+=1
    print("Percentage of reciprocal edges",count/len(G.edges))



def graph_stats_undirected(path):
    #dataset_name = 'mines'
    #dataset_name = "agriculture+biofuel"
    #dataset_name = "global"

    #path = ".\\data_net\\net_%s_surf_inOperation.ncol" % dataset_name
    #path = ".\\data_net\\net_allTransnational.ncol"

    #path = "D:\\Mes Donnees\\SenegalTrajectories\\senegal_net_w_agg.ncol"

    G = nx.read_edgelist(path,delimiter=';', nodetype=int, data=(('weight',float),))
    print("#Edges",G.size())
    print("#Nodes",G.order())
    print("Transitivity",nx.transitivity(G))
    print("Average CC", nx.average_clustering(G))
    print("Connected",nx.is_connected(G))
    print("#Connected Components", nx.number_connected_components(G))
    print("Assortativity", nx.degree_assortativity_coefficient(G))
    sum=0
    for t in G.degree:
        sum+=t[1]
    sum/=len(G.degree)
    print("Avg Degree",sum)

    #print("Average Path Length", nx.average_shortest_path_length(G))
    if (nx.is_connected(G)):
        print("Average Path Length", nx.average_shortest_path_length(G))
    else:
        print("Not weakly connected")
        for g in nx.connected_component_subgraphs(G):
            print("Average Path Length cc", nx.average_shortest_path_length(g),g.nodes())
    tot = 0
    all = 0
    for u in G.nodes():
        for v in G.nodes():
            if v!=u:
                if nx.has_path(G,u,v):
                    tot+=1
                all+=1

    print(tot,all,tot/all)


def reciprocation():
    path = "parent_company_country_to_target_country_network.ncol"
    G = nx.read_edgelist(path,delimiter=';',create_using=nx.DiGraph(), nodetype=int, data=(('weight',float),))
    count = 0

    for e in G.edges:
        if G.has_edge(e[1],e[0]) and e[0]!=e[1]:
            count+=1
            print(e)
    print("Percentage of reciprocal edges",count/len(G.edges))


def rankings():
    os.chdir("D:\\Mes Donnees\\Land Matrix\\mines_28092020\\")

    only_advanced = True
    #dataset_name = 'mines'
    # dataset_name = "agriculture+biofuel"
    # dataset_name = "global"

    #path_d = "ids_to_countries_mines_mlnet.csv"
    #path = "mines_mlnet_flattened.ncol"
    path_d = "ids_to_countries_mines_APR21.csv"
    path = "mines_mlnet_flattened_APR21.ncol"

    #path = ".\\data_net\\net_%s_surf_inOperation.ncol" % dataset_name
    graph = nx.read_edgelist(path, delimiter=';', create_using=nx.DiGraph(), nodetype=int, data=(('weight', int),))
    #path_d = ".\\data_net\\ids_to_countries_%s_inOperation.csv" % dataset_name

    #path = ".\\data_net\\net_allTransnational.ncol"
    #graph = nx.read_edgelist(path, delimiter=';', create_using=nx.DiGraph(), nodetype=int, data=(('weight', int),))
    #path_d = ".\\data_net\\ids_to_countries_allTransnational_ISO.csv"
    names_dict = {}
    fd = open(path_d, 'r')
    for line in fd:
        vals = line.split(';')
        country = vals[1].strip()
        names_dict[int(vals[0])] = country

    #path_iso = ".\\data_net\\ISO country.xlsx"
    path_iso = "ISO country.xlsx"
    iso = pd.read_excel(path_iso, index_col=0, sep=';')

    iso_dict = {}
    for id in names_dict:
        iso_dict[id] = iso.loc[names_dict[id]]['Code ISO']



    size_indeg_dict = dict(graph.in_degree(weight='weight'))
    size_outdeg_dict = dict(graph.out_degree(weight='weight'))
    ord_in = OrderedDict(sorted(size_indeg_dict.items(), key=lambda x: x[1],reverse=True))
    ord_out = OrderedDict(sorted(size_outdeg_dict.items(), key=lambda x: x[1],reverse=True))



    #print(ord_in)
    #print(ord_out)

    inout = {}
    for k in ord_in.keys():
        if ord_in[k]==0 or ord_out[k]==0:
            inout[k]=0
        else:
            inout[k] = float(ord_in[k]/ord_out[k])
    ord_inout = OrderedDict(sorted(inout.items(), key=lambda x: x[1],reverse=True))
    #print(ord_inout)

    pr = nx.pagerank(graph)
    btw = nx.betweenness_centrality(graph)

    df = None

    if not only_advanced:
        df = pd.DataFrame(np.nan, index=iso_dict.values(), columns=['in', 'out', 'inout','pagerank','betweenness'], dtype='float')
        for id in ord_in:
            df.loc[iso_dict[id]]['in'] = size_indeg_dict[id]
            df.loc[iso_dict[id]]['out'] = size_outdeg_dict[id]
            df.loc[iso_dict[id]]['inout'] = inout[id]
            df.loc[iso_dict[id]]['pagerank'] = pr[id]
            df.loc[iso_dict[id]]['betweenness'] = btw[id]
    else :
        df = pd.DataFrame(np.nan, index=iso_dict.values(), columns=['pagerank','betweenness'], dtype='float')
        for id in ord_in:
            df.loc[iso_dict[id]]['pagerank'] = pr[id]
            df.loc[iso_dict[id]]['betweenness'] = btw[id]



    df.to_csv('degrees_ranks_allTransnational.csv', sep=';')

    #df_meta = pd.read_csv(".\\mines\\MetaIndex_complete.csv",index_col=0,sep=';')
    df_meta = pd.read_csv("Heatmap\\META_Index_Mines_Revolution_.csv",index_col=0,sep=';')
    #print(df_meta)

    df_merge =  df.join(df_meta,how="inner")
    print(df_merge)

    df_merge.to_csv('META_Index_Mines_Revolution_complete.csv',sep=';')


def corrs():
    os.chdir("D:\\Mes Donnees\\Land Matrix\\mines_28092020\\Heatmap\\")
    df = pd.read_csv('META_Index_Mines_Revolution_final.csv',sep=';',index_col=0)
    print(df)
    corrs = df.corr(method='pearson')
    print(corrs)

    fg = sns.FacetGrid(corrs)

    fig, (ax) = plt.subplots(1, 1, figsize=(12, 8))
    #fig = plt.figure()


    hm = sns.heatmap(corrs, ax=ax, xticklabels=True, yticklabels=True,square=False,linewidth=0.5)

    #ax = sns.heatmap(df, linewidth=0.5)
    print(ax.get_ylim())
    ax.set_ylim(10, 0.0)
    plt.show()
    corrs.to_csv('LMI_corrs_new.csv',sep=';')
    fig.savefig("LM_corrs_new.png",   bbox_inches='tight')
    plt.close(fig)

def addColumnToMetaIndex():
    os.chdir("D:\\Mes Donnees\\Land Matrix\\mines_28092020\\Heatmap\\")
    df_meta = pd.read_csv("MetaIndex_new2.csv",sep=';',index_col=0)
    df_toadd = pd.read_csv("SizesOut_mines.csv",sep=';',index_col=0,header=None)


    df_meta["Outdegree"]=""
    for index,row in df_toadd.iterrows():
        if index in df_meta.index:
            df_meta.at[index,"Outdegree"]=df_toadd.loc[index][1]

    df_meta.to_csv("MetaIndex_new3.csv",sep=';')




def queryDeals(country_a,country_b):
    input_path = "D:\\Mes Donnees\\Land Matrix\\export_transnational\\deals.csv"

    df = pd.read_csv(input_path, sep=';', low_memory=False)

    tot = 0
    for index, row in df.iterrows():
        if not pd.isnull(row["Top parent companies"]):  # skip linee vuote
       #     if row["Current implementation status"] == "In operation (production)":
                par = False
                parent_vals = getParentVals(row["Top parent companies"])
                ids_parents = []
                for pc in parent_vals:
                    if len(pc) == 3:
                        parent_country = pc[-1].strip().replace('"', '')
                        if parent_country != '':  # skyppo country vuoti
                            if parent_country ==country_a:
                                par=True
                if par:
                    target_c = row["Location 1: Target country"].strip().replace('"', '')
                    if target_c==country_b:
                        for l in ["Top parent companies","Location 1: Target country","Intention of the investment","Deal size"]:
                            print(row[l])
                        print("============================================================")
                        tot+=float(row["Deal size"])
    print("Total Surface:",tot)
    # print("%s;%s;%f" % (country_a,country_b,tot))
    return "%s;%s;%f\n" % (country_a,country_b,tot)

def queryDealsSingle(country_a):
    out = {}

    os.chdir("D:\\Mes Donnees\\Land Matrix\\_LURKER\\LM_Transnational_InOperation_17032022\\")
    input_path = "deals.csv"
    investors_path = "investors.csv"

    df = pd.read_csv(input_path, sep=';', low_memory=False)
    df_inv = pd.read_csv(investors_path,index_col=0,sep=';')

    tot = 0
    for index, row in df.iterrows():

        if not pd.isnull(row["Top parent companies"]):  # skip linee vuote
            #     if row["Current implementation status"] == "In operation (production)":
            par = False
            parent_vals = getParentVals(row["Top parent companies"])
            target_c = row["Target country"].strip().replace('"', '')

            for pc in parent_vals:
                if len(pc)>1:
                    company_id = pc[1]
                    if not pd.isnull(df_inv.loc[int(company_id),"Country of registration/origin"]):  #skyppo country vuoti
                        parent_country = df_inv.loc[int(company_id),"Country of registration/origin"]
                        if parent_country != '':  # skyppo country vuoti
                            if parent_country ==country_a:
                                par=True
                                print("%s;%s;%s;%s" % (country_a,target_c,row["Deal size"],row["Intention of investment"]))

            if target_c==country_a:
                par=True
                for pc in parent_vals:
                    if len(pc)>1:
                        company_id = pc[1]
                        if not pd.isnull(df_inv.loc[int(company_id),"Country of registration/origin"]):  #skyppo country vuoti
                            parent_country = df_inv.loc[int(company_id),"Country of registration/origin"]
                            if parent_country != '':  # skyppo country vuoti
                                print("%s;%s;%s;%s" % (parent_country,target_c,row["Deal size"],row["Intention of investment"]))
            if par:
                #for l in ["Top parent companies","Target country","Intention of investment","Deal size"]:
                #    print(row[l])

                #print("============================================================")
                tot+=float(row["Deal size"])




    print("Total Surface:",tot)
    #print("%s;%s;%f" % (country_a,country_b,tot))
    #return "%s;%s;%f\n" % (country_a,tot)

def queryDealsSequence(deals):
    tab = "from;to;value\n"
    vals = deals.split(';')
    for d in vals:
        print("============================================================")
        print(d)
        print("------------------------------------------------------------")
        cs = d.split(':')[0].split(">")
        country_a = cs[0][0:-1]
        country_b = cs[1][0:-1]
        tab+=queryDeals(country_a,country_b)
        print("============================================================")
    print(tab)

def LM_stats(path):

    df = pd.read_csv(path,sep=';',low_memory=False)
    #for global:
    #print("Deals (in operation)",len(df.index))
    #print("total involved land (ha)",df["Deal size"].sum())
    #sectors = ["Biofuels","Fodder","Food crops","Agriculture unspecified","Livestock","Non-food agricultural commodities"]

    sectors = ["Biofuels"]

    deals_count = 0
    ha_count = 0
    for index,row in df.iterrows():
        process_row = False
        if not pd.isnull(row["Intention of investment"]):
            curr_sectors = row["Intention of investment"].split('#')[-1].split(',')
            for s in sectors:
                if s in curr_sectors:
                    process_row=True
                    break
        if process_row:
            deals_count+=1
            ha_count+=int(row["Deal size"])
    print("Deals (in operation)",deals_count)
    print("total involved land (ha)",ha_count)

if __name__=='__main__':

    queryDealsSingle("Mauritius")
    #addColumnToMetaIndex()

    #corrs()

    #rankings()

    #parent_to_target_country_network_allTransnational_inOperation_NewLMFormat()

    #parent_to_target_country_network_allTransnational_inOperation_withYear()

    #dataset = "agriculture"
    #os.chdir("D:\\Mes Donnees\\Land Matrix\\_LURKER\\net_%s\\" % dataset)
    #path = "net_%s_17032022_inOperation_LR.ncol" % dataset

    #graph_stats_directed(path)
    #print("======================================")
    #graph_stats_undirected(path)

    #rankings()
    #os.chdir("D:\\Mes Donnees\\Land Matrix\\_LURKER\\")
    #LM_stats("LM_Transnational_InOperation_17032022\\deals.csv")