Commit cf6b4066 authored by Rémy Decoupes's avatar Rémy Decoupes
Browse files

debug zero shot classifier

parent e12c6931
......@@ -156,17 +156,27 @@ if __name__ == '__main__':
classifier = pipeline("zero-shot-classification",
model="digitalepidemiologylab/covid-twitter-bert-v2-mnli")
candidate_labels_fr = ["covid-19", "grippe aviaire", "AMR", "tiques", "autres"]
candidate_labels_en = ["covid-19", "avian influenza", "AMR", "tick borne", "others"]
candidate_labels_en = ["avian influenza"]
# candidate_labels_en = ["covid-19", "avian influenza", "AMR", "tick borne", "others"]
classifier_results = []
classifier_results_2 = []
for i, tweets in tqdm(df_tfidf.iterrows(), total=df_tfidf.shape[0]):
text = tweets["text"]
try:
text_translated = translator_fr(text)[0]["translation_text"]
classifier_results.append(classifier(text_translated, candidate_labels_en)["scores"])
item = {"text" : text, "scores" : classifier(text_translated, candidate_labels_en)["scores"]}
classifier_results_2.append(item)
except:
df_tfidf.drop([i], inplace=True)
print("text: " + text + " | translated: " + text_translated)
classifier_df = pd.DataFrame(classifier_results, columns=candidate_labels_en)
f=open("analysis-output/test_2.txt", "w")
for l in classifier_results_2:
f.write(l)
f.close()
classifier_df_2 = pd.DataFrame(classifier_results_2)
classifier_df_2.to_csv("analysis-output/acquitaine_test.csv")
df_tfidf = df_tfidf.join(classifier_df)
df_tfidf.to_csv("analysis-output/acquitaine-digitalepidemiologylab.csv")
df_tfidf.to_pickle("analysis-output/acquitaine-digitalepidemiologylab.pkl")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment