Commit 596b20f7 authored by Interdonato Roberto's avatar Interdonato Roberto

Upload New File

parent 68aa8cea
import pandas as pd
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
group1 = ['prospective', 'anticipation', 'foresights', 'forecast', 'future studies', 'prospective thinking', 'strategic planning', 'prospeccion', 'prospectiva']
group2 = ['modélisation', 'modelling', 'scenario', 'planning', 'mapping', 'simulation', 'quantitative','assessement', 'qualitative', 'narrative','scenarios','modelado']
diz = dict()
stop_words = set(stopwords.words('english'))
input_f = "test_jeremy1_abs-title-key.csv"
df = pd.read_csv(input_f, index_col=0, sep=';')
for d in df["title"]:
# Remove the leading spaces and newline character
line = d.strip()
# Convert the characters in line to
# lowercase to avoid case mismatch
line = line.lower()
# Split the line into words
words = line.split(" ")
# Iterate over each word in line
for word in words:
if word not in group1 and word not in group2 and word not in stop_words:
# Check if the word is already in dictionary
if word in diz:
# Increment count of word by 1
diz[word] = diz[word] + 1
# Add the word to dictionary with count 1
diz[word] = 1
# Print the contents of dictionary
for key in list(diz.keys()):
print(key, ":", diz[key])
fout = open("global_occs_abs-title-key.csv", 'w', encoding="utf-8")
for key in list(diz.keys()):
fout.write("%s;%s\n" % (key,diz[key]))
