import numpy as np import networkx as nx import matplotlib as mpl import matplotlib.pyplot as plt from geopy.geocoders import Nominatim from mpl_toolkits.basemap import Basemap as Basemap from operator import itemgetter from collections import OrderedDict import pandas as pd import seaborn as sns import matplotlib.pylab as plt import os """ 0 Deal ID 1 Top parent companies 2 Location 1: Location 3 Location 1: Latitude 4 Location 1: Longitude 5 Location 1: Target country 6 Intention of the investment 7 Nature of the deal 8 Negotiation status 9 Operating company: Investor ID 10 Operating company: Name 11 Operating company: Country of registration/origin """ def lmiProjection(): deals_path = "D:\\Mes Donnees\\Land Matrix\\export_transnational\\deals.csv" outp = ".\\data_net\\WebsiteLandMatrix_Projection_allTransnational_wSurf_wStatus.csv" df_deals = pd.read_csv(deals_path,sep= ";") df_proj = df_deals[['Deal ID','Top parent companies','Location 1: Location','Location 1: Latitude','Location 1: Longitude','Location 1: Target country','Intention of the investment','Nature of the deal','Negotiation status','Operating company: Investor ID','Operating company: Name','Operating company: Country of registration/origin','Deal size','Current implementation status']] df_proj.to_csv(outp,sep= ";") def getParentVals(line): out = [] if '|' in line: vals = line.split('|') for v in vals: out.append(v.split('#')) else: out.append(line.split('#')) return out def stats(): input_path = "WebsiteLandMatrix_Projection.csv" input = open(input_path,'r') target_countries = set() parent_company_countries = set() parent_companies = set() operating_companies = set() parent_deals = {} op_deals = {} parent_names = {} op_names = {} next(input) for line in input: vals = line.split(';') #if len(vals)==12: # FIX punti e virgola a cazzo di cane parent_vals = getParentVals(vals[1]) if parent_vals is not None: #skip linee vuote for pc in parent_vals: if len(pc)==3: parent_c = pc[-1] parent_id = int(pc[-2]) parent_company_countries.add(parent_c) parent_companies.add(parent_id) if parent_id not in parent_deals: parent_deals[parent_id] = 1 parent_names[parent_id] = pc[0] else: parent_deals[parent_id] += 1 target_c = vals[5] target_countries.add(target_c) operating_companies.add(vals[9]) op_id = int(vals[9]) if op_id not in op_deals: op_deals[op_id] = 1 op_names[op_id] = vals[10] else: op_deals[op_id] +=1 print(len(target_countries),len(parent_company_countries),len(parent_companies)) x = [] ones = 0 for p in parent_deals: x.append(parent_deals[p]) if parent_deals[p]==1: ones+=1 print(len(parent_deals),np.mean(x),np.std(x),np.max(x),ones) #print(sorted( ((v,k) for k,v in parent_deals.items()), reverse=True)) fo_p = open('sorted_parent_companies.csv','w') fo_p.write("Parent Company Name;Parent Company Id;#deals\n") for (v,k) in sorted( ((v,k) for k,v in parent_deals.items()), reverse=True): fo_p.write("%s;%d;%d\n" % (parent_names[int(k)],int(k),v)) x = [] ones = 0 for p in op_deals: x.append(op_deals[p]) if op_deals[p]==1: ones+=1 print(len(op_deals),np.mean(x),np.std(x),np.max(x),ones) #print(sorted( ((v,k) for k,v in op_deals.items()), reverse=True)) fo_op = open('sorted_operating_companies.csv','w') fo_op.write("Operating Company Name;Operating Company Id;#deals\n") for (v,k) in sorted( ((v,k) for k,v in op_deals.items()), reverse=True): fo_op.write("%s;%d;%d\n" % (op_names[int(k)],int(k),v)) def parent_to_target_country_network(): input_path = "WebsiteLandMatrix_Projection.csv" input = open(input_path,'r') fout = open("parent_company_country_to_target_country_network.ncol",'w') fout_n = open("parent_company_country_to_target_country_network_wNames.txt", 'w') fout_dict = open("ids_to_countries.csv",'w') countries_to_ids = {} ids_to_countries = {} G = nx.DiGraph() ids = 0 next(input) for line in input: vals = line.split(';') #if len(vals)==12: # FIX punti e virgola a cazzo di cane parent_vals = getParentVals(vals[1]) if parent_vals is not None: #skip linee vuote ids_parents = [] for pc in parent_vals: if len(pc)==3: parent_country = pc[-1].strip().replace('"','') if parent_country != '': #skyppo country vuoti if parent_country not in countries_to_ids: countries_to_ids[parent_country]=ids ids_to_countries[ids]=parent_country if ids==65: print(pc) ids+=1 ids_parents.append(countries_to_ids[parent_country]) else: print("Empty Parent Country: ",pc) target_c = vals[5].strip().replace('"','') if target_c not in countries_to_ids: countries_to_ids[target_c]=ids ids_to_countries[ids] = target_c ids+=1 for pc in ids_parents: u = int(pc) v = int(countries_to_ids[target_c]) w = 1 if G.has_edge(u, v): w+=G[u][v]['weight'] G[u][v]['weight'] = w else: G.add_edge(u, v, weight=w) for e in G.edges: fout.write("%d;%d;%d\n" % (e[0],e[1],G[e[0]][e[1]]['weight'])) fout_n.write("%s;%s;%d\n" % (ids_to_countries[e[0]],ids_to_countries[e[1]],G[e[0]][e[1]]['weight'])) for id in ids_to_countries: fout_dict.write("%s;%s\n" % (id,ids_to_countries[id])) fout.close() fout_n.close() fout_dict.close() print("#Edges",G.size()) print("#Nodes",G.order()) def parent_to_target_country_network_Surface(): input_path = "WebsiteLandMatrix_Projection_wSurface.csv" input = open(input_path,'r') fout = open("parent_company_country_to_target_country_network_wSurface.ncol",'w') #fout_n = open("parent_company_country_to_target_country_network_wNames.txt", 'w') fout_dict = open("ids_to_countries_wSurface.csv",'w') countries_to_ids = {} ids_to_countries = {} G = nx.DiGraph() ids = 0 next(input) for line in input: vals = line.split(';') #if len(vals)==12: # FIX punti e virgola a cazzo di cane parent_vals = getParentVals(vals[1]) if parent_vals is not None: #skip linee vuote ids_parents = [] for pc in parent_vals: if len(pc)==3: parent_country = pc[-1].strip().replace('"','') if parent_country != '': #skyppo country vuoti if parent_country not in countries_to_ids: countries_to_ids[parent_country]=ids ids_to_countries[ids]=parent_country if ids==65: print(pc) ids+=1 ids_parents.append(countries_to_ids[parent_country]) else: print("Empty Parent Country: ",pc) target_c = vals[5].strip().replace('"','') if target_c not in countries_to_ids: countries_to_ids[target_c]=ids ids_to_countries[ids] = target_c ids+=1 for pc in ids_parents: u = int(pc) v = int(countries_to_ids[target_c]) w = int(vals[12]) if G.has_edge(u, v): w+=G[u][v]['weight'] G[u][v]['weight'] = w else: G.add_edge(u, v, weight=w) for e in G.edges: fout.write("%d;%d;%d\n" % (e[0],e[1],G[e[0]][e[1]]['weight'])) #fout_n.write("%s;%s;%d\n" % (ids_to_countries[e[0]],ids_to_countries[e[1]],G[e[0]][e[1]]['weight'])) for id in ids_to_countries: fout_dict.write("%s;%s\n" % (id,ids_to_countries[id])) fout.close() #fout_n.close() fout_dict.close() print("#Edges",G.size()) print("#Nodes",G.order()) def parent_to_target_country_network_allTransnational(): input_path = "D:\\Mes Donnees\\Land Matrix\\export_transnational\\deals.csv" df = pd.read_csv(input_path,sep=';',low_memory=False) fout = open(".\\data_net\\net_allTransnational.ncol",'w') fout_surf = open(".\\data_net\\net_allTransnational_surf.ncol",'w') #fout_n = open("parent_company_country_to_target_country_network_wNames.txt", 'w') fout_dict = open(".\\data_net\\ids_to_countries_allTransnational.csv",'w') countries_to_ids = {} ids_to_countries = {} G = nx.DiGraph() G_surf = nx.DiGraph() ids = 0 for index,row in df.iterrows(): if not pd.isnull(row["Top parent companies"]): # skip linee vuote parent_vals = getParentVals(row["Top parent companies"]) ids_parents = [] for pc in parent_vals: if len(pc)==3: parent_country = pc[-1].strip().replace('"','') if parent_country != '': #skyppo country vuoti if parent_country not in countries_to_ids: countries_to_ids[parent_country]=ids ids_to_countries[ids]=parent_country if ids==65: print(pc) ids+=1 ids_parents.append(countries_to_ids[parent_country]) else: print("Empty Parent Country: ",pc) target_c = row["Location 1: Target country"].strip().replace('"','') if target_c not in countries_to_ids: countries_to_ids[target_c]=ids ids_to_countries[ids] = target_c ids+=1 for pc in ids_parents: u = int(pc) v = int(countries_to_ids[target_c]) w = 1 w_surf = int(row["Deal size"]) if G.has_edge(u, v): w+=G[u][v]['weight'] G[u][v]['weight'] = w w_surf += G_surf[u][v]['weight'] G_surf[u][v]['weight'] = w_surf else: G.add_edge(u, v, weight=w) G_surf.add_edge(u, v, weight=w_surf) for e in G.edges: fout.write("%d;%d;%d\n" % (e[0],e[1],G[e[0]][e[1]]['weight'])) for e in G_surf.edges: fout_surf.write("%d;%d;%d\n" % (e[0], e[1], G_surf[e[0]][e[1]]['weight'])) for id in ids_to_countries: fout_dict.write("%s;%s\n" % (id,ids_to_countries[id])) fout.close() fout_surf.close() fout_dict.close() print("#Edges",G.size()) print("#Nodes",G.order()) def parent_to_target_country_network_allTransnational_inOperation(): #input_path = "D:\\Mes Donnees\\Land Matrix\\export_transnational\\deals.csv" self_investors = set() #dataset_name = "mines_28092020" dataset_name = "global_07072021" #input_path = "D:\\Mes Donnees\\Land Matrix\\%s\\deals.csv" % dataset_name input_path = "D:\\Mes Donnees\\Land Matrix\\_LURKER\\LM_Transnational_InOperation_07072021\\deals.csv" df = pd.read_csv(input_path,sep=';',low_memory=False) fout = open(".\\data_net\\net_%s_inOperation.ncol" % dataset_name,'w') fout_surf = open(".\\data_net\\net_%s_surf_inOperation.ncol" % dataset_name,'w') #fout_n = open("parent_company_country_to_target_country_network_wNames.txt", 'w') fout_dict = open(".\\data_net\\ids_to_countries_%s_inOperation.csv" % dataset_name,'w') countries_to_ids = {} ids_to_countries = {} G = nx.DiGraph() G_surf = nx.DiGraph() ids = 0 for index,row in df.iterrows(): if not pd.isnull(row["Top parent companies"]): # skip linee vuote if row["Current implementation status"]=="In operation (production)": parent_vals = getParentVals(row["Top parent companies"]) ids_parents = [] for pc in parent_vals: if len(pc)==3: parent_country = pc[-1].strip().replace('"','') if parent_country != '': #skyppo country vuoti if parent_country not in countries_to_ids: countries_to_ids[parent_country]=ids ids_to_countries[ids]=parent_country if ids==65: print(pc) ids+=1 ids_parents.append(countries_to_ids[parent_country]) else: print("Empty Parent Country: ",pc) target_c = row["Location 1: Target country"].strip().replace('"','') if target_c not in countries_to_ids: countries_to_ids[target_c]=ids ids_to_countries[ids] = target_c ids+=1 for pc in ids_parents: u = int(pc) v = int(countries_to_ids[target_c]) w = 1 w_surf = int(row["Deal size"]) if u!=v : if G.has_edge(u, v): w+=G[u][v]['weight'] G[u][v]['weight'] = w w_surf += G_surf[u][v]['weight'] G_surf[u][v]['weight'] = w_surf else: G.add_edge(u, v, weight=w) G_surf.add_edge(u, v, weight=w_surf) else : self_investors.add(u) for e in G.edges: fout.write("%d;%d;%d\n" % (e[0],e[1],G[e[0]][e[1]]['weight'])) for e in G_surf.edges: fout_surf.write("%d;%d;%d\n" % (e[0], e[1], G_surf[e[0]][e[1]]['weight'])) for id in ids_to_countries: fout_dict.write("%s;%s\n" % (id,ids_to_countries[id])) fout.close() fout_surf.close() fout_dict.close() fsi = open("self_investors.txt",'w') for si in self_investors: fsi.write("%d\n" % int(si)) fsi.close() print("#Edges",G.size()) print("#Nodes",G.order()) def parent_to_target_country_network_allTransnational_inOperation_NewLMFormat(): #input_path = "D:\\Mes Donnees\\Land Matrix\\export_transnational\\deals.csv" lr_write = True #also writes lurkerrank inversed topology filter_sectors = True #network on specific intentions of investments self_investors = set() #dataset_name = "mines_28092020" dataset_name = "energy_17032022" sectors = ["Renewable Energy"] #sectors = ["Biofuels","Fodder","Food crops","Agriculture unspecified","Livestock","Non-food agricultural commodities"] #input_path = "D:\\Mes Donnees\\Land Matrix\\%s\\deals.csv" % dataset_name input_path = "D:\\Mes Donnees\\Land Matrix\\_LURKER\\LM_Transnational_InOperation_17032022\\deals.csv" investors_path = "D:\\Mes Donnees\\Land Matrix\\_LURKER\\LM_Transnational_InOperation_17032022\\investors.csv" df_inv = pd.read_csv(investors_path,index_col=0,sep=';') df = pd.read_csv(input_path,sep=';',low_memory=False) fout = open(".\\data_net\\net_%s_inOperation.ncol" % dataset_name,'w') fout_surf = open(".\\data_net\\net_%s_surf_inOperation.ncol" % dataset_name,'w') fout_lr = open(".\\data_net\\net_%s_inOperation_LR.ncol" % dataset_name,'w') fout_surf_lr = open(".\\data_net\\net_%s_surf_inOperation_LR.ncol" % dataset_name,'w') #fout_n = open("parent_company_country_to_target_country_network_wNames.txt", 'w') fout_dict = open(".\\data_net\\ids_to_countries_%s_inOperation.csv" % dataset_name,'w') countries_to_ids = {} ids_to_countries = {} G = nx.DiGraph() G_surf = nx.DiGraph() ids = 0 for index,row in df.iterrows(): if not pd.isnull(row["Top parent companies"]): # skip linee vuote if row["Current implementation status"]=="In operation (production)": process_row = True if filter_sectors: process_row = False if not pd.isnull(row["Intention of investment"]): curr_sectors = row["Intention of investment"].split('#')[-1].split(',') for s in sectors: if s in curr_sectors: process_row=True break #except AttributeError: # print("Error on sector:",row["Intention of investment"]) if process_row: parent_vals = getParentVals(row["Top parent companies"]) ids_parents = [] for pc in parent_vals: if len(pc)>1: company_id = pc[1] if not pd.isnull(df_inv.loc[int(company_id),"Country of registration/origin"]): #skyppo country vuoti parent_country = df_inv.loc[int(company_id),"Country of registration/origin"] if parent_country not in countries_to_ids: countries_to_ids[parent_country]=ids ids_to_countries[ids]=parent_country #if ids==65: # print(pc) ids+=1 ids_parents.append(countries_to_ids[parent_country]) else: print("Empty Parent Country: ",pc) else: print("Error in parent company format (missing id?): ",pc) #print("Empty Parent Country: ",pc) target_c = row["Target country"].strip().replace('"','') if target_c not in countries_to_ids: countries_to_ids[target_c]=ids ids_to_countries[ids] = target_c ids+=1 for pc in ids_parents: u = int(pc) v = int(countries_to_ids[target_c]) w = 1 w_surf = int(row["Deal size"]) if u!=v : if G.has_edge(u, v): w+=G[u][v]['weight'] G[u][v]['weight'] = w w_surf += G_surf[u][v]['weight'] G_surf[u][v]['weight'] = w_surf else: G.add_edge(u, v, weight=w) G_surf.add_edge(u, v, weight=w_surf) else : self_investors.add(u) for e in G.edges: fout.write("%d;%d;%d\n" % (e[0],e[1],G[e[0]][e[1]]['weight'])) if lr_write: fout_lr.write("%d;%d;%d\n" % (e[1],e[0],G[e[0]][e[1]]['weight'])) for e in G_surf.edges: fout_surf.write("%d;%d;%d\n" % (e[0], e[1], G_surf[e[0]][e[1]]['weight'])) if lr_write: fout_surf_lr.write("%d;%d;%d\n" % (e[1], e[0], G_surf[e[0]][e[1]]['weight'])) for id in ids_to_countries: fout_dict.write("%s;%s\n" % (id,ids_to_countries[id])) fout.close() fout_surf.close() fout_dict.close() fout_lr.close() fout_surf_lr.close() fsi = open("self_investors.txt",'w') for si in self_investors: fsi.write("%d\n" % int(si)) fsi.close() print("#Edges",G.size()) print("#Nodes",G.order()) def parent_to_target_country_network_allTransnational_inOperation_withYear(): #input_path = "D:\\Mes Donnees\\Land Matrix\\export_transnational\\deals.csv" dataset_name = "global" input_path = "D:\\Mes Donnees\\Land Matrix\\%s\\deals.csv" % dataset_name df = pd.read_csv(input_path,sep=';',low_memory=False) fout = open(".\\data_net\\net_%s_inOperation_dates.ncol" % dataset_name,'w') fout_surf = open(".\\data_net\\net_%s_surf_inOperation_dates.ncol" % dataset_name,'w') #fout_n = open("parent_company_country_to_target_country_network_wNames.txt", 'w') fout_dict = open(".\\data_net\\ids_to_countries_%s_inOperation.csv" % dataset_name,'w') countries_to_ids = {} ids_to_countries = {} G = nx.DiGraph() G_surf = nx.DiGraph() ids = 0 for index,row in df.iterrows(): if not pd.isnull(row["Top parent companies"]): # skip linee vuote if row["Current implementation status"]=="In operation (production)": parent_vals = getParentVals(row["Top parent companies"]) ids_parents = [] for pc in parent_vals: if len(pc)==3: parent_country = pc[-1].strip().replace('"','') if parent_country != '': #skyppo country vuoti if parent_country not in countries_to_ids: countries_to_ids[parent_country]=ids ids_to_countries[ids]=parent_country if ids==65: print(pc) ids+=1 ids_parents.append(countries_to_ids[parent_country]) else: print("Empty Parent Country: ",pc) target_c = row["Location 1: Target country"].strip().replace('"','') if target_c not in countries_to_ids: countries_to_ids[target_c]=ids ids_to_countries[ids] = target_c ids+=1 for pc in ids_parents: u = int(pc) v = int(countries_to_ids[target_c]) w = 1 w_surf = int(row["Deal size"]) if G.has_edge(u, v): w+=G[u][v]['weight'] G[u][v]['weight'] = w w_surf += G_surf[u][v]['weight'] G_surf[u][v]['weight'] = w_surf else: G.add_edge(u, v, weight=w) G_surf.add_edge(u, v, weight=w_surf, date=row["Contracts 1: Contract date"],year=str(row["Negotiation status"]).split('##')[0]) for e in G.edges: fout.write("%d;%d;%d\n" % (e[0],e[1],G[e[0]][e[1]]['weight'])) for e in G_surf.edges: fout_surf.write("%d;%d;%d;%s;%s\n" % (e[0], e[1], G_surf[e[0]][e[1]]['weight'],G_surf[e[0]][e[1]]['date'],G_surf[e[0]][e[1]]['year'])) for id in ids_to_countries: fout_dict.write("%s;%s\n" % (id,ids_to_countries[id])) fout.close() fout_surf.close() fout_dict.close() print("#Edges",G.size()) print("#Nodes",G.order()) def graph_stats_directed(path): #dataset_name = 'mines' #dataset_name = "agriculture+biofuel" #dataset_name = "global" #path = ".\\data_net\\net_%s_surf_inOperation.ncol" % dataset_name #path = "D:\\Mes Donnees\\SenegalTrajectories\\senegal_net_w_agg.ncol" #path = ".\\data_net\\net_allTransnational.ncol" G = nx.read_edgelist(path,delimiter=';',create_using=nx.DiGraph(), nodetype=int, data=(('weight',float),)) print("#Edges",G.size()) print("#Nodes",G.order()) print("Transitivity",nx.transitivity(G)) #print("Average CC", nx.average_clustering(G)) # print("Connected",nx.is_connected(G)) # print("#Connected Components", nx.number_connected_components(G)) print("Assortativity", nx.degree_assortativity_coefficient(G)) sum=0 for t in G.degree: sum+=t[1] sum/=len(G.degree) print("Avg Degree",sum) if (nx.is_weakly_connected(G)): print("Average Path Length",nx.average_shortest_path_length(G)) else: print("Not weakly connected") tot = 0 all = 0 length = 0 for u in G.nodes(): for v in G.nodes(): if v != u: if nx.has_path(G, u, v): tot += 1 length+=nx.shortest_path_length(G,u,v) all += 1 print(tot, all, tot / all, length/tot) count = 0 for e in G.edges: if G.has_edge(e[1],e[0]) and e[0]!=e[1]: count+=1 print("Percentage of reciprocal edges",count/len(G.edges)) def graph_stats_undirected(path): #dataset_name = 'mines' #dataset_name = "agriculture+biofuel" #dataset_name = "global" #path = ".\\data_net\\net_%s_surf_inOperation.ncol" % dataset_name #path = ".\\data_net\\net_allTransnational.ncol" #path = "D:\\Mes Donnees\\SenegalTrajectories\\senegal_net_w_agg.ncol" G = nx.read_edgelist(path,delimiter=';', nodetype=int, data=(('weight',float),)) print("#Edges",G.size()) print("#Nodes",G.order()) print("Transitivity",nx.transitivity(G)) print("Average CC", nx.average_clustering(G)) print("Connected",nx.is_connected(G)) print("#Connected Components", nx.number_connected_components(G)) print("Assortativity", nx.degree_assortativity_coefficient(G)) sum=0 for t in G.degree: sum+=t[1] sum/=len(G.degree) print("Avg Degree",sum) #print("Average Path Length", nx.average_shortest_path_length(G)) if (nx.is_connected(G)): print("Average Path Length", nx.average_shortest_path_length(G)) else: print("Not weakly connected") for g in nx.connected_component_subgraphs(G): print("Average Path Length cc", nx.average_shortest_path_length(g),g.nodes()) tot = 0 all = 0 for u in G.nodes(): for v in G.nodes(): if v!=u: if nx.has_path(G,u,v): tot+=1 all+=1 print(tot,all,tot/all) def reciprocation(): path = "parent_company_country_to_target_country_network.ncol" G = nx.read_edgelist(path,delimiter=';',create_using=nx.DiGraph(), nodetype=int, data=(('weight',float),)) count = 0 for e in G.edges: if G.has_edge(e[1],e[0]) and e[0]!=e[1]: count+=1 print(e) print("Percentage of reciprocal edges",count/len(G.edges)) def rankings(): os.chdir("D:\\Mes Donnees\\Land Matrix\\mines_28092020\\") only_advanced = True #dataset_name = 'mines' # dataset_name = "agriculture+biofuel" # dataset_name = "global" #path_d = "ids_to_countries_mines_mlnet.csv" #path = "mines_mlnet_flattened.ncol" path_d = "ids_to_countries_mines_APR21.csv" path = "mines_mlnet_flattened_APR21.ncol" #path = ".\\data_net\\net_%s_surf_inOperation.ncol" % dataset_name graph = nx.read_edgelist(path, delimiter=';', create_using=nx.DiGraph(), nodetype=int, data=(('weight', int),)) #path_d = ".\\data_net\\ids_to_countries_%s_inOperation.csv" % dataset_name #path = ".\\data_net\\net_allTransnational.ncol" #graph = nx.read_edgelist(path, delimiter=';', create_using=nx.DiGraph(), nodetype=int, data=(('weight', int),)) #path_d = ".\\data_net\\ids_to_countries_allTransnational_ISO.csv" names_dict = {} fd = open(path_d, 'r') for line in fd: vals = line.split(';') country = vals[1].strip() names_dict[int(vals[0])] = country #path_iso = ".\\data_net\\ISO country.xlsx" path_iso = "ISO country.xlsx" iso = pd.read_excel(path_iso, index_col=0, sep=';') iso_dict = {} for id in names_dict: iso_dict[id] = iso.loc[names_dict[id]]['Code ISO'] size_indeg_dict = dict(graph.in_degree(weight='weight')) size_outdeg_dict = dict(graph.out_degree(weight='weight')) ord_in = OrderedDict(sorted(size_indeg_dict.items(), key=lambda x: x[1],reverse=True)) ord_out = OrderedDict(sorted(size_outdeg_dict.items(), key=lambda x: x[1],reverse=True)) #print(ord_in) #print(ord_out) inout = {} for k in ord_in.keys(): if ord_in[k]==0 or ord_out[k]==0: inout[k]=0 else: inout[k] = float(ord_in[k]/ord_out[k]) ord_inout = OrderedDict(sorted(inout.items(), key=lambda x: x[1],reverse=True)) #print(ord_inout) pr = nx.pagerank(graph) btw = nx.betweenness_centrality(graph) df = None if not only_advanced: df = pd.DataFrame(np.nan, index=iso_dict.values(), columns=['in', 'out', 'inout','pagerank','betweenness'], dtype='float') for id in ord_in: df.loc[iso_dict[id]]['in'] = size_indeg_dict[id] df.loc[iso_dict[id]]['out'] = size_outdeg_dict[id] df.loc[iso_dict[id]]['inout'] = inout[id] df.loc[iso_dict[id]]['pagerank'] = pr[id] df.loc[iso_dict[id]]['betweenness'] = btw[id] else : df = pd.DataFrame(np.nan, index=iso_dict.values(), columns=['pagerank','betweenness'], dtype='float') for id in ord_in: df.loc[iso_dict[id]]['pagerank'] = pr[id] df.loc[iso_dict[id]]['betweenness'] = btw[id] df.to_csv('degrees_ranks_allTransnational.csv', sep=';') #df_meta = pd.read_csv(".\\mines\\MetaIndex_complete.csv",index_col=0,sep=';') df_meta = pd.read_csv("Heatmap\\META_Index_Mines_Revolution_.csv",index_col=0,sep=';') #print(df_meta) df_merge = df.join(df_meta,how="inner") print(df_merge) df_merge.to_csv('META_Index_Mines_Revolution_complete.csv',sep=';') def corrs(): os.chdir("D:\\Mes Donnees\\Land Matrix\\mines_28092020\\Heatmap\\") df = pd.read_csv('META_Index_Mines_Revolution_final.csv',sep=';',index_col=0) print(df) corrs = df.corr(method='pearson') print(corrs) fg = sns.FacetGrid(corrs) fig, (ax) = plt.subplots(1, 1, figsize=(12, 8)) #fig = plt.figure() hm = sns.heatmap(corrs, ax=ax, xticklabels=True, yticklabels=True,square=False,linewidth=0.5) #ax = sns.heatmap(df, linewidth=0.5) print(ax.get_ylim()) ax.set_ylim(10, 0.0) plt.show() corrs.to_csv('LMI_corrs_new.csv',sep=';') fig.savefig("LM_corrs_new.png", bbox_inches='tight') plt.close(fig) def addColumnToMetaIndex(): os.chdir("D:\\Mes Donnees\\Land Matrix\\mines_28092020\\Heatmap\\") df_meta = pd.read_csv("MetaIndex_new2.csv",sep=';',index_col=0) df_toadd = pd.read_csv("SizesOut_mines.csv",sep=';',index_col=0,header=None) df_meta["Outdegree"]="" for index,row in df_toadd.iterrows(): if index in df_meta.index: df_meta.at[index,"Outdegree"]=df_toadd.loc[index][1] df_meta.to_csv("MetaIndex_new3.csv",sep=';') def queryDeals(country_a,country_b): input_path = "D:\\Mes Donnees\\Land Matrix\\export_transnational\\deals.csv" df = pd.read_csv(input_path, sep=';', low_memory=False) tot = 0 for index, row in df.iterrows(): if not pd.isnull(row["Top parent companies"]): # skip linee vuote # if row["Current implementation status"] == "In operation (production)": par = False parent_vals = getParentVals(row["Top parent companies"]) ids_parents = [] for pc in parent_vals: if len(pc) == 3: parent_country = pc[-1].strip().replace('"', '') if parent_country != '': # skyppo country vuoti if parent_country ==country_a: par=True if par: target_c = row["Location 1: Target country"].strip().replace('"', '') if target_c==country_b: for l in ["Top parent companies","Location 1: Target country","Intention of the investment","Deal size"]: print(row[l]) print("============================================================") tot+=float(row["Deal size"]) print("Total Surface:",tot) # print("%s;%s;%f" % (country_a,country_b,tot)) return "%s;%s;%f\n" % (country_a,country_b,tot) def queryDealsSingle(country_a): out = {} os.chdir("D:\\Mes Donnees\\Land Matrix\\_LURKER\\LM_Transnational_InOperation_17032022\\") input_path = "deals.csv" investors_path = "investors.csv" df = pd.read_csv(input_path, sep=';', low_memory=False) df_inv = pd.read_csv(investors_path,index_col=0,sep=';') tot = 0 for index, row in df.iterrows(): if not pd.isnull(row["Top parent companies"]): # skip linee vuote # if row["Current implementation status"] == "In operation (production)": par = False parent_vals = getParentVals(row["Top parent companies"]) target_c = row["Target country"].strip().replace('"', '') for pc in parent_vals: if len(pc)>1: company_id = pc[1] if not pd.isnull(df_inv.loc[int(company_id),"Country of registration/origin"]): #skyppo country vuoti parent_country = df_inv.loc[int(company_id),"Country of registration/origin"] if parent_country != '': # skyppo country vuoti if parent_country ==country_a: par=True print("%s;%s;%s;%s" % (country_a,target_c,row["Deal size"],row["Intention of investment"])) if target_c==country_a: par=True for pc in parent_vals: if len(pc)>1: company_id = pc[1] if not pd.isnull(df_inv.loc[int(company_id),"Country of registration/origin"]): #skyppo country vuoti parent_country = df_inv.loc[int(company_id),"Country of registration/origin"] if parent_country != '': # skyppo country vuoti print("%s;%s;%s;%s" % (parent_country,target_c,row["Deal size"],row["Intention of investment"])) if par: #for l in ["Top parent companies","Target country","Intention of investment","Deal size"]: # print(row[l]) #print("============================================================") tot+=float(row["Deal size"]) print("Total Surface:",tot) #print("%s;%s;%f" % (country_a,country_b,tot)) #return "%s;%s;%f\n" % (country_a,tot) def queryDealsSequence(deals): tab = "from;to;value\n" vals = deals.split(';') for d in vals: print("============================================================") print(d) print("------------------------------------------------------------") cs = d.split(':')[0].split(">") country_a = cs[0][0:-1] country_b = cs[1][0:-1] tab+=queryDeals(country_a,country_b) print("============================================================") print(tab) def LM_stats(path): df = pd.read_csv(path,sep=';',low_memory=False) #for global: #print("Deals (in operation)",len(df.index)) #print("total involved land (ha)",df["Deal size"].sum()) #sectors = ["Biofuels","Fodder","Food crops","Agriculture unspecified","Livestock","Non-food agricultural commodities"] sectors = ["Biofuels"] deals_count = 0 ha_count = 0 for index,row in df.iterrows(): process_row = False if not pd.isnull(row["Intention of investment"]): curr_sectors = row["Intention of investment"].split('#')[-1].split(',') for s in sectors: if s in curr_sectors: process_row=True break if process_row: deals_count+=1 ha_count+=int(row["Deal size"]) print("Deals (in operation)",deals_count) print("total involved land (ha)",ha_count) if __name__=='__main__': queryDealsSingle("Mauritius") #addColumnToMetaIndex() #corrs() #rankings() #parent_to_target_country_network_allTransnational_inOperation_NewLMFormat() #parent_to_target_country_network_allTransnational_inOperation_withYear() #dataset = "agriculture" #os.chdir("D:\\Mes Donnees\\Land Matrix\\_LURKER\\net_%s\\" % dataset) #path = "net_%s_17032022_inOperation_LR.ncol" % dataset #graph_stats_directed(path) #print("======================================") #graph_stats_undirected(path) #rankings() #os.chdir("D:\\Mes Donnees\\Land Matrix\\_LURKER\\") #LM_stats("LM_Transnational_InOperation_17032022\\deals.csv")