Commit 21df6bc6 authored by Interdonato Roberto's avatar Interdonato Roberto

Upload New File

parent 596b20f7
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
def barplot_peryear():
os.chdir("D:\\Mes Donnees\\Papers\\Modeling-Forecast text mining\\")
input_f = "test_jeremy1_abs-title-key.csv"
df = pd.read_csv(input_f, index_col=0, sep=';')
pub_per_year = {}
for d in df["coverDate"]:
try:
date_time_obj = datetime.datetime.strptime(d, '%Y-%m-%d')
if date_time_obj.year not in pub_per_year:
pub_per_year[date_time_obj.year] = 0
pub_per_year[date_time_obj.year] += 1
except TypeError:
print(d)
except ValueError:
print(d)
#N = len(pub_per_year)
vals = []
years = []
for x in sorted(pub_per_year.keys()):
#vals.append(pub_per_year[x])
years.append(int(x))
#ind = np.arange(N) # the x locations for the groups
ind = list(np.arange(np.min(years),np.max(years)+1,1))
#width = 0.35 # the width of the bars: can also be len(x) sequence
for y in ind:
if y in pub_per_year.keys():
vals.append(pub_per_year[y])
else:
vals.append(0)
fig, ax = plt.subplots()
p1 = plt.bar(ind,vals)
plt.xticks(ind, rotation='vertical')
plt.yticks(np.arange(0, np.max(vals)+100, 100))
for rect in p1:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2., 1.01*height,
'%d' % int(height),
ha='center', va='bottom')
plt.savefig("barplot.png")
plt.show()
def pubs_peryear():
os.chdir("D:\\Mes Donnees\\Papers\\Modeling-Forecast text mining\\")
input_f = "test_jeremy1_abs-title-key.csv"
df = pd.read_csv(input_f, index_col=0, sep=';')
pub_per_year = {}
for d in df["coverDate"]:
try:
date_time_obj = datetime.datetime.strptime(d, '%Y-%m-%d')
if date_time_obj.year not in pub_per_year:
pub_per_year[date_time_obj.year] = 0
pub_per_year[date_time_obj.year] += 1
except TypeError:
print(d)
except ValueError:
print(d)
#N = len(pub_per_year)
vals = []
years = []
for x in sorted(pub_per_year.keys()):
#vals.append(pub_per_year[x])
years.append(int(x))
#ind = np.arange(N) # the x locations for the groups
ind = list(np.arange(np.min(years),np.max(years)+1,1))
#width = 0.35 # the width of the bars: can also be len(x) sequence
fout = open("pubperyear_abs-title-key.csv", 'w')
for y in pub_per_year:
fout.write("%d;%d\n" % (y,pub_per_year[y]))
fout.close()
def barplot_pertopic():
os.chdir("D:\\Mes Donnees\\Papers\\Modeling-Forecast text mining\\")
input_f = "test_jeremy1.csv"
df = pd.read_csv(input_f, index_col=0, sep=';')
topics = "pubs_topics_6.csv"
df_topics = pd.read_csv(topics,index_col=0, sep=';')
years = []
for d in df["coverDate"]:
try:
date_time_obj = datetime.datetime.strptime(d, '%Y-%m-%d')
years.append(date_time_obj.year)
except TypeError:
print(d)
except ValueError:
print(d)
ind = list(np.arange(np.min(years),np.max(years)+1,1))
df_topicyears = pd.DataFrame(0,index=range(0, 7), columns=ind)
#pub_per_year = {}
for index,row in df_topics.iterrows():
d = df.loc[index]["coverDate"]
date_time_obj = datetime.datetime.strptime(d, '%Y-%m-%d')
curr_year = date_time_obj.year
topic = row["topic"]
df_topicyears.loc[topic][curr_year] += 1
""""
try:
date_time_obj = datetime.datetime.strptime(d, '%Y-%m-%d')
curr_year = date_time_obj.year
topic = int(df_topics.loc[df_topics["paper_id"]==index]["topic"])
df_topicyears.loc[topic][curr_year]+=1
except TypeError:
print("TYPE ERROR")
print(d)
except ValueError:
print("VALUE ERROR")
print(d)
"""
#ind = np.arange(N) # the x locations for the groups
#ind = list(np.arange(np.min(years),np.max(years)+1,1))
#width = 0.35 # the width of the bars: can also be len(x) sequence
fig, ax = plt.subplots()
ax = df_topicyears.transpose().plot.bar( stacked=True)
ax.legend(["Groundwater modeling","Biophysical models","Medical applications","Climate models","Global system models","Economic modeling","Teaching applications"]);
#for index,row in df_topicyears.iterrows():
# plt.bar(ind,row.to_list())
#p1 = plt.bar(ind,vals)
#plt.xticks(ind, rotation='vertical')
#plt.yticks(np.arange(0, np.max(vals)+100, 100))
#for rect in p1:
# height = rect.get_height()
# ax.text(rect.get_x() + rect.get_width()/2., 1.01*height,
# '%d' % int(height),
# ha='center', va='bottom')
plt.savefig("barplot.png")
plt.show()
""""
0 Groundwater modeling 0
1 Biophysical models 1
2+6 Medical applications 2
3+8 Climate models 3
4+5 Global system models 4
7 Economic modeling 5
9 Teaching applications 6
"""
def preproc_topics():
os.chdir("D:\\Mes Donnees\\Papers\\Modeling-Forecast text mining\\")
topics = "pubs_topics.csv"
fin = open(topics,'r')
fout = open("pubs_topics_6.csv","w")
fout.write("paper_id;topic\n")
next(fin)
for line in fin:
vals = line.split(';')
old_t = int(vals[1].strip())
new_t = old_t
if old_t==6:
new_t=2
elif old_t==8:
new_t=3
elif old_t==5:
new_t=4
elif old_t==7:
new_t=5
elif old_t==9:
new_t=6
fout.write("%s;%d\n" % (vals[0],new_t))
#preproc_topics()
#barplot_pertopic()
#barplot_peryear()
pubs_peryear()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment