Commit 4d787b85 authored by Fize Jacques's avatar Fize Jacques
Browse files

DEBUG

parent 3e95bf0d
No related merge requests found
Showing with 86 additions and 62 deletions
+86 -62
...@@ -26,9 +26,9 @@ if args.corpus_name == "padiweb": ...@@ -26,9 +26,9 @@ if args.corpus_name == "padiweb":
else: else:
corpus_dir = "data/disambiguation_data/mada_disambiguisation" corpus_dir = "data/disambiguation_data/mada_disambiguisation"
data_lang = json.load(open("/Users/jacquesfize/LOD_DATASETS/raw_bvlac/associated_lang.json")) data_lang = json.load(open("/Users/jacquesfize/DATA_THESIS/BVLAC/raw_bvlac/associated_lang.json"))
data_lang = {int(k): v for k, v in data_lang.items()} data_lang = {int(k): (v if v in ["fr",'en'] else "en") for k, v in data_lang.items()}
corpus_files=glob.glob("{0}/*.csv".format(corpus_dir)) corpus_files=glob.glob("{0}/*.csv".format(corpus_dir))
acc_MC,acc_GEO,acc_wiki=[],[],[] acc_MC,acc_GEO,acc_wiki=[],[],[]
i=0 i=0
......
...@@ -33,7 +33,12 @@ def dist(id1, id2): ...@@ -33,7 +33,12 @@ def dist(id1, id2):
def efficiencyMostCommon(df, lang, score="accuracy",k=1): def efficiencyMostCommon(df, lang, score="accuracy",k=1):
df2 = df[-df["GID"].isin(["O", "NR", "o"])][["text", "GID"]] df2 = df[-df["GID"].isin(["O", "NR", "o"])][["text", "GID"]]
df2["disambiguation"] = df2.text.apply(lambda x: disMost_common.disambiguate_(x, lang)[0]) def foo(lang,x):
res = disMost_common.disambiguate(lang, toponyms=[x])
if x in res:
return res[x]
return "O"
df2["disambiguation"] = df2.text.apply(lambda x:foo(lang,x))
if score == "mean_distance_error": if score == "mean_distance_error":
df2["distance"] = df2.apply(lambda row: dist(row.GID, row.disambiguation) if "GID" in row else -1, axis=1) df2["distance"] = df2.apply(lambda row: dist(row.GID, row.disambiguation) if "GID" in row else -1, axis=1)
return df2["distance"][df2["distance"] >= 0].mean() return df2["distance"][df2["distance"] >= 0].mean()
...@@ -45,7 +50,7 @@ def efficiencyMostCommon(df, lang, score="accuracy",k=1): ...@@ -45,7 +50,7 @@ def efficiencyMostCommon(df, lang, score="accuracy",k=1):
def efficiencyGeodict(df, lang, score="accuracy",k=1): def efficiencyGeodict(df, lang, score="accuracy",k=1):
df2 = df[-df["GID"].isin(["O", "NR", "o"])][["text", "GID"]] df2 = df[-df["GID"].isin(["O", "NR", "o"])][["text", "GID"]]
res_dis = disGaurav.eval(df2["text"].unique(), lang) res_dis = disGaurav.disambiguate(lang,toponyms=df2["text"].unique().tolist())
df2["disambiguation"] = df2.text.apply(lambda x: res_dis[x] if x in res_dis else None) df2["disambiguation"] = df2.text.apply(lambda x: res_dis[x] if x in res_dis else None)
if score == "mean_distance_error": if score == "mean_distance_error":
df2["distance"] = df2.apply(lambda row: dist(row.GID, row.disambiguation) if "GID" in row else -1, axis=1) df2["distance"] = df2.apply(lambda row: dist(row.GID, row.disambiguation) if "GID" in row else -1, axis=1)
...@@ -59,7 +64,7 @@ def efficiencyGeodict(df, lang, score="accuracy",k=1): ...@@ -59,7 +64,7 @@ def efficiencyGeodict(df, lang, score="accuracy",k=1):
def efficiencyWiki(df, lang, score="accuracy",k=1): def efficiencyWiki(df, lang, score="accuracy",k=1):
df2 = df[-df["GID"].isin(["O", "NR", "o"])][["text", "GID"]] df2 = df[-df["GID"].isin(["O", "NR", "o"])][["text", "GID"]]
res_dis = disWiki.disambiguate_wiki(df2["text"].unique(), lang) res_dis = disWiki.disambiguate(lang,toponyms=df2["text"].unique().tolist())
df2["disambiguation"] = df2.text.apply(lambda x: res_dis[x] if x in res_dis else None) df2["disambiguation"] = df2.text.apply(lambda x: res_dis[x] if x in res_dis else None)
if score == "mean_distance_error": if score == "mean_distance_error":
df2["distance"] = df2.apply(lambda row: dist(row.GID, row.disambiguation) if "GID" in row else -1, axis=1) df2["distance"] = df2.apply(lambda row: dist(row.GID, row.disambiguation) if "GID" in row else -1, axis=1)
......
...@@ -259,8 +259,8 @@ class AdjacencyMetaRelation(MetaCollector): ...@@ -259,8 +259,8 @@ class AdjacencyMetaRelation(MetaCollector):
for se2 in spatial_entities: for se2 in spatial_entities:
data_se1, data_se2 = data[se1], data[se2] data_se1, data_se2 = data[se1], data[se2]
if data_se1 and data_se2 and "coord" in data_se1 and "coord" in data_se2: if data_se1 and data_se2 and "coord" in data_se1 and "coord" in data_se2:
not_in_stop = len(set(data_se1.class_) & stop_class) < 1 and len( not_in_stop = len(set(data_se1.class_) & stop_class) < 0.5 and len(
set(data_se2.class_) & stop_class) < 1 set(data_se2.class_) & stop_class) < 0.5
self.distances_is_inf_to[se1][se2] = dist_all[se1][se2] < max_d and not_in_stop self.distances_is_inf_to[se1][se2] = dist_all[se1][se2] < max_d and not_in_stop
else: else:
self.distances_is_inf_to[se1][se2] = False self.distances_is_inf_to[se1][se2] = False
......
...@@ -213,7 +213,7 @@ class STR(object): ...@@ -213,7 +213,7 @@ class STR(object):
warnings.warn("Label empty. @en label from Geo-Database will be used.") warnings.warn("Label empty. @en label from Geo-Database will be used.")
label = data_["en"] label = data_["en"]
self.spatial_entities[id] = label self.spatial_entities[id] = label
self.graph.add_node(id, label=label,type="SE") self.graph.add_node(id, label=label,type="S_E")
def add_spatial_entities(self, ids: list, labels: list = []): def add_spatial_entities(self, ids: list, labels: list = []):
...@@ -656,33 +656,37 @@ class STR(object): ...@@ -656,33 +656,37 @@ class STR(object):
def plot(self, title="STR", output_fn=None,se_color ="#4183d7",te_color="#d64541",inc_edge_color="r", def plot(self, title="STR", output_fn=None,se_color ="#4183d7",te_color="#d64541",inc_edge_color="r",
adj_edge_color="g",them_edge_color = "b",figsize=(7,7)): adj_edge_color="g",them_edge_color = "b",figsize=(7,7),scale=2,node_size=700,layout_func=nx.shell_layout,dech=0):
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
plt.figure(figsize=figsize) plt.figure(figsize=figsize)
G = self.graph.copy() G = self.graph.copy()
pos = nx.shell_layout(G, scale=0.5) pos = layout_func(G, scale=scale)
#pos = nx.layout.shell_layout(ext_2_t.graph)
nx.draw_networkx_nodes(G, pos, nodes = list(G.nodes(data=True))
nodelist=[n[0] for n in list(G.nodes(data=True)) if n[1]["type"] == "S_E"], max_n_char= ([len(n[1]["label"]) * node_size for n in nodes])
node_color=se_color, node_size=500)
nx.draw_networkx_nodes(G, pos, nx.draw_networkx_nodes(G, pos,
nodelist=[n[0] for n in list(G.nodes(data=True)) if n[1]["type"] == "T_E"], nodelist=[n[0] for n in nodes if n[1]["type"] == "S_E"],
node_color=te_color, node_size=500) node_color=se_color, node_size=max_n_char)
nx.draw_networkx_labels(G, nx.shell_layout(G, scale=0.5), labels={n[0]: n[1]["label"] for n in G.nodes(data=True)}) nx.draw_networkx_nodes(G, pos,
nx.draw_networkx_edges(G, pos, edgelist=[ed for ed in G.edges(data=True) if ed[2]["type_"] == "inc"], nodelist=[n[0] for n in nodes if n[1]["type"] == "T_E"],
edge_color=inc_edge_color, arrows=True) node_color=te_color, node_size=max_n_char)
nx.draw_networkx_edges(G, pos, edgelist=[ed for ed in G.edges(data=True) if ed[2]["type_"] == "adj"],
edge_color=adj_edge_color, arrows=True) edges = list(G.edges(data=True))
nx.draw_networkx_edges(G, pos, edgelist=[ed for ed in G.edges(data=True) if ed[2]["type_"] == "them"], nx.draw_networkx_labels(G, pos, labels={n[0]: n[1]["label"] for n in nodes},font_color='w')
edge_color=them_edge_color, arrows=True) nx.draw_networkx_edges(G, pos, edgelist=[ed for ed in edges if ed[2]["type_"] == "inc"],
edge_color=inc_edge_color, arrows=True,width=1.5)
nx.draw_networkx_edges(G, pos, edgelist=[ed for ed in edges if ed[2]["type_"] == "adj"],
edge_color=adj_edge_color, arrows=True,width=1.5)
nx.draw_networkx_edges(G, pos, edgelist=[ed for ed in edges if ed[2]["type_"] == "them"],
edge_color=them_edge_color, arrows=True,width=1.5)
plt.title(title) plt.title(title)
plt.axis('off') plt.axis('off')
plt.margins(0.1) plt.margins(0.2)
if output_fn: if output_fn:
plt.savefig(output_fn, bbox_inches='tight') plt.savefig(output_fn, bbox_inches='tight')
else: else:
......
...@@ -53,6 +53,7 @@ def get_extended_with_thematic(extended_str, thematic_str): ...@@ -53,6 +53,7 @@ def get_extended_with_thematic(extended_str, thematic_str):
if not them in new_ext.thematic_entities: if not them in new_ext.thematic_entities:
new_ext.add_thematic_entities(them, thematic_str.thematic_entities[them]) new_ext.add_thematic_entities(them, thematic_str.thematic_entities[them])
new_ext.graph.add_node(them, label=thematic_str.thematic_entities[them], type="T_E") new_ext.graph.add_node(them, label=thematic_str.thematic_entities[them], type="T_E")
print(es,them)
new_ext.graph.add_edge(es, them, color="blue",type_="them") new_ext.graph.add_edge(es, them, color="blue",type_="them")
new_ext.add_thematic_relationships(es, them) new_ext.add_thematic_relationships(es, them)
return new_ext return new_ext
...@@ -159,7 +159,7 @@ class Generalisation(Transformation): ...@@ -159,7 +159,7 @@ class Generalisation(Transformation):
class Expansion(Transformation): class Expansion(Transformation):
def getAroundEntities(self, data, score, distance=150, unit="km", n=1): def getAroundEntities(self, data, score, distance=100, unit="km", n=1,lang="fr",stop_en=[]):
if not "coord" in data: if not "coord" in data:
return [] return []
hits = client.search("gazetteer", "place", { hits = client.search("gazetteer", "place", {
...@@ -168,13 +168,14 @@ class Expansion(Transformation): ...@@ -168,13 +168,14 @@ class Expansion(Transformation):
"must": [ "must": [
{"match_all": {}}, {"match_all": {}},
{"exists": {"field": "score"}}, # Get place with high score {"exists": {"field": "score"}}, # Get place with high score
{"terms": {"class": ["P-PPL", "A-ADM4", "P-PPLC"]}}, #{"terms": {"class": ["P-PPL", "A-ADM4", "P-PPLC"]}},
# Populated Settlement, Last administration level, Capital # Populated Settlement, Last administration level, Capital
{"range": {"score": {"gt": score}}}, # Has a higher score (PR) {"range": {"score": {"gt": score}}}, # Has a higher score (PR)
{"term": {"country": data.other["country"]}} # stay in the same country #{"term": {"country": data.other["country"]}} # stay in the same country
], ],
"must_not": [ "must_not": [
{"terms": {"class": ["A-ADM3", "A-ADM2", "A-ADM1"]}} # No region, departement, ... ! {"terms": {"class": ["A-ADM3", "A-ADM2", "A-ADM1"]}},
{"terms": {lang: stop_en}},
], ],
"filter": { "filter": {
"geo_distance": { "geo_distance": {
...@@ -206,14 +207,15 @@ class Expansion(Transformation): ...@@ -206,14 +207,15 @@ class Expansion(Transformation):
def transform(self, str_: STR, **kwargs): def transform(self, str_: STR, **kwargs):
type_ = "adjacency" type_ = "adjacency"
distance = kwargs.get("distance", 150) distance = kwargs.get("distance", 100)
unit = kwargs.get("unit", 150) unit = kwargs.get("unit", 100)
n = kwargs.get("adjacent_count", 1) n = kwargs.get("adjacent_count", 1)
cp = kwargs.get("cp", True) cp = kwargs.get("cp", True)
lang = kwargs.get("lang","fr")
if type_ == "adjacency": if type_ == "adjacency":
return self.transform_adj(str_, distance, unit, n, cp) return self.transform_adj(str_, distance, unit, n, lang, cp)
def transform_adj(self, str_: STR, distance: int, unit: str, n: int, cp=True) -> STR: def transform_adj(self, str_: STR, distance: int, unit: str, n: int,lang:str, cp=True) -> STR:
graph = str_.graph graph = str_.graph
median, selected_se = self.select_es(graph) median, selected_se = self.select_es(graph)
data_se, scores_ = {}, [] data_se, scores_ = {}, []
...@@ -230,19 +232,21 @@ class Expansion(Transformation): ...@@ -230,19 +232,21 @@ class Expansion(Transformation):
new_nodes = [] new_nodes = []
labels = [] labels = []
stop_en = list(str_.spatial_entities.keys())
for node in selected_se: for node in selected_se:
data_ = data_se[node] data_ = data_se[node]
if (not "P-PPL" in data_.class_) and (not "A-ADM4" in data_.class_): if (not "P-PPL" in data_.class_) and (not "A-ADM4" in data_.class_):
continue continue
if not "country" in data_.other: if not "country" in data_.other:
continue continue
neighbor = self.getAroundEntities(data_, median, distance, unit, n) neighbor = self.getAroundEntities(data_, median, distance, unit, n,lang=lang,stop_en=stop_en)
stop_en.extend(neighbor)
# if not neighbor: # if not neighbor:
# try: # try:
# neighbor = [get_inclusion_chain(node, "P131")[0]] # neighbor = [get_inclusion_chain(node, "P131")[0]]
# except: # except:
# neighbor = [] # neighbor = []
labels.extend([gazetteer.get_by_id(n)[0].label.en for n in neighbor]) labels.extend([gazetteer.get_by_id(n)[0].label[lang] for n in neighbor])
new_nodes.extend(neighbor) new_nodes.extend(neighbor)
new_nodes = list(set(new_nodes)) new_nodes = list(set(new_nodes))
......
...@@ -76,8 +76,8 @@ class Disambiguator(object): ...@@ -76,8 +76,8 @@ class Disambiguator(object):
candidates=[] candidates=[]
candidates.extend(gazetteer.get_by_label(label,lang)) candidates.extend(gazetteer.get_by_label(label,lang))
candidates.extend(gazetteer.get_by_alias(label, lang,score=False)) candidates.extend(gazetteer.get_by_alias(label, lang,score=False))
#candidates.extend(gazetteer.get_n_label_similar(label,lang, n=1,score=False)) candidates.extend(gazetteer.get_n_label_similar(label,lang, n=1,score=False))
#candidates.extend(gazetteer.get_n_alias_similar(label, lang, n=1,score=False)) candidates.extend(gazetteer.get_n_alias_similar(label, lang, n=1,score=False))
return pd.DataFrame([[ return pd.DataFrame([[
r.id, r.id,
label, label,
......
...@@ -41,6 +41,7 @@ class BigramModel: ...@@ -41,6 +41,7 @@ class BigramModel:
if uri1 in self.cooc_freq[uri2]: if uri1 in self.cooc_freq[uri2]:
return self.cooc_freq[uri2][uri1] return self.cooc_freq[uri2][uri1]
#return (self.cooc_freq[uri2][uri1] / self.count_associated[uri1])+pr1 #return (self.cooc_freq[uri2][uri1] / self.count_associated[uri1])+pr1
return nna return nna
...@@ -73,7 +73,7 @@ class ShareProp(Disambiguator): ...@@ -73,7 +73,7 @@ class ShareProp(Disambiguator):
# return fib_no[interP131]+fib_no[interP706] # return fib_no[interP131]+fib_no[interP706]
return self.inclusion_log(interP131) + self.inclusion_log(interP706) return self.inclusion_log(interP131) + self.inclusion_log(interP706)
def Adjacency_P47(self, id1, id2): def Adjacency_P47(self, es1, es2):
""" """
Return true, if two spatial entities are found adjacent using the P47 property (share borders) from Wikidata. Return true, if two spatial entities are found adjacent using the P47 property (share borders) from Wikidata.
Parameters Parameters
...@@ -88,9 +88,10 @@ class ShareProp(Disambiguator): ...@@ -88,9 +88,10 @@ class ShareProp(Disambiguator):
bool bool
true if adjacent using P47 true if adjacent using P47
""" """
data_1, data_2 = gazetteer.get_by_id(id1)[0], gazetteer.get_by_id(id2)[0] # data_1, data_2 = gazetteer.get_by_id(id1)[0], gazetteer.get_by_id(id2)[0]
if "P47" in data_1 and "P47" in data_2:
if id1 in data_2.other.P47 or id2 in data_1.other.P47: if "P47" in es1 and "P47" in es2:
if es1.id in es2.other.P47 or es2.id in es1.other.P47:
return True return True
return False return False
...@@ -130,19 +131,15 @@ class ShareProp(Disambiguator): ...@@ -130,19 +131,15 @@ class ShareProp(Disambiguator):
id_cand = cand.id id_cand = cand.id
score_dc[id_cand] = 0 score_dc[id_cand] = 0
for fixed in fixed_entities: for fixed in fixed_entities:
id_fixed = fixed_entities[fixed].id id_fixed = fixed.id
if self.Adjacency_P47(id_cand, id_fixed): if self.Adjacency_P47(cand, fixed):
score_dc[id_cand] += 3 score_dc[id_cand] += 3
elif self.Adjacency_Hull(id_cand, id_fixed): elif self.Adjacency_Hull(id_cand, id_fixed):
score_dc[id_cand] += 2 score_dc[id_cand] += 2
score_dc[id_cand] += self.get_inclusion_score(id_cand, id_fixed) score_dc[id_cand] += self.get_inclusion_score(id_cand, id_fixed)
m = max(score_dc, key=score_dc.get) m = max(score_dc, key=score_dc.get)
if score_dc[m] < 4: return m
return None
for cand in spat_candidates:
if cand.id == m:
return cand.id
def disambiguate_context_based(self,toponyms,lang): def disambiguate_context_based(self,toponyms,lang):
...@@ -152,21 +149,20 @@ class ShareProp(Disambiguator): ...@@ -152,21 +149,20 @@ class ShareProp(Disambiguator):
for topo in toponyms: for topo in toponyms:
request = self.get_candidates(topo,lang) request = self.get_candidates(topo,lang)
if len(request) > 1: if len(request) > 1:
ambiguous_entities[topo] = request ambiguous_entities[topo] = request.raw.values.tolist()
elif len(request) == 1: elif len(request) == 1:
fixed_entities[topo] = request.iloc[0].raw fixed_entities[topo] = request.iloc[0].raw
d_amb_results = {} d_amb_results = {}
for amb_ent in ambiguous_entities: for topo in ambiguous_entities:
d = self.disambiguateOne(ambiguous_entities[amb_ent], fixed_entities) d = self.disambiguateOne(ambiguous_entities[topo], fixed_entities.values())
if not d: d_amb_results[topo] = d
d_amb_results[amb_ent] = get_most_common_id_v3(amb_ent, lang).id
else:
d_amb_results[amb_ent] = d
for k, v in fixed_entities.items(): for k, v in fixed_entities.items():
selected_en[k] = v.id selected_en[k] = v.id
for k, v in d_amb_results.items(): for k, v in d_amb_results.items():
selected_en[k] = v selected_en[k] = v
return selected_en return selected_en
\ No newline at end of file
...@@ -9,6 +9,7 @@ from ...config.configuration import config ...@@ -9,6 +9,7 @@ from ...config.configuration import config
from ...helpers.geodict_helpers import * from ...helpers.geodict_helpers import *
from .most_common import stop_words,common_words from .most_common import stop_words,common_words
import networkx as nx import networkx as nx
from .most_common import MostCommonDisambiguator
def read_pickle(fn): def read_pickle(fn):
return pickle.load(open(fn,'rb')) return pickle.load(open(fn,'rb'))
...@@ -20,7 +21,7 @@ class WikipediaDisambiguator(Disambiguator): ...@@ -20,7 +21,7 @@ class WikipediaDisambiguator(Disambiguator):
# Load model # Load model
self.model=BigramModel(read_pickle(config.wiki_cooc_dis.cooc_freq),read_pickle(config.wiki_cooc_dis.count)) self.model=BigramModel(read_pickle(config.wiki_cooc_dis.cooc_freq),read_pickle(config.wiki_cooc_dis.count))
self.measure=measure self.measure=measure
self.mostcommon = MostCommonDisambiguator()
def disambiguate_list(self,toponyms,lang): def disambiguate_list(self,toponyms,lang):
result=self.disambiguate_wiki(toponyms,lang) result=self.disambiguate_wiki(toponyms,lang)
...@@ -50,7 +51,20 @@ class WikipediaDisambiguator(Disambiguator): ...@@ -50,7 +51,20 @@ class WikipediaDisambiguator(Disambiguator):
candidates = self.get_candidates(toponym, lang) candidates = self.get_candidates(toponym, lang)
if len(candidates)<1: if len(candidates)<1:
continue continue
candidates = [c.id for ix,c in candidates.iterrows()] f=False
for ix,c in candidates.iterrows():
if c.id in self.model.cooc_freq :
f=True
for ij,c2 in candidates.iterrows():
if c2.id in self.model.cooc_freq and c.id in self.model.cooc_freq[c2.id]:
f=True
if not f:
candidates=self.mostcommon.disambiguate(lang,toponyms=[toponym])
if candidates :
candidates= list(candidates.values())
if not isinstance(candidates,list):
candidates = [c.id for ix,c in candidates.iterrows()]
group_candidate[toponym] = candidates group_candidate[toponym] = candidates
betw_cand[toponym]=candidates betw_cand[toponym]=candidates
for n in candidates: for n in candidates:
...@@ -61,14 +75,14 @@ class WikipediaDisambiguator(Disambiguator): ...@@ -61,14 +75,14 @@ class WikipediaDisambiguator(Disambiguator):
g.add_node(candidate, label=gazetteer.get_by_id(candidate)[0].label[lang]) g.add_node(candidate, label=gazetteer.get_by_id(candidate)[0].label[lang])
data_candidate={ca :gazetteer.get_by_id(ca)[0] for ca in possible_candidates} data_candidate={ca :gazetteer.get_by_id(ca)[0] for ca in possible_candidates}
for candidate in possible_candidates: for candidate in possible_candidates:
for candidate2 in possible_candidates: for candidate2 in possible_candidates:
# Get PageRank score
d = data_candidate[candidate] d = data_candidate[candidate]
sc = d.score sc = d.score
# Compute probability # Compute probability
prob = self.model.get_coocurence_probability(sc, candidate, candidate2) prob = self.model.get_coocurence_probability(sc, candidate, candidate2)
if candidate2 in betw_cand[candidate] or candidate in betw_cand[candidate2]: if candidate2 in betw_cand[candidate] or candidate in betw_cand[candidate2]:
prob = 0.0 prob = 0.0
if prob < 0.0000001: if prob < 0.0000001:
...@@ -82,7 +96,6 @@ class WikipediaDisambiguator(Disambiguator): ...@@ -82,7 +96,6 @@ class WikipediaDisambiguator(Disambiguator):
g.add_edge(candidate, candidate2, weight=prob) g.add_edge(candidate, candidate2, weight=prob)
selected = {} selected = {}
#Take the candidates with the highest degree weighted #Take the candidates with the highest degree weighted
for gr in group_candidate: for gr in group_candidate:
if self.measure == "degree": if self.measure == "degree":
......
...@@ -145,7 +145,7 @@ class Pipeline(object): ...@@ -145,7 +145,7 @@ class Pipeline(object):
str_.build() str_.build()
return str_ return str_
def pipe_transform(self,strs_, cpu_count=cpu_count(),**kwargs): def pipe_transform(self,strs_,**kwargs):
str_s = [ self.transform(str_, **kwargs) for str_ in tqdm(strs_,desc="Transform STR", disable=(not self.verbose))] str_s = [ self.transform(str_, **kwargs) for str_ in tqdm(strs_,desc="Transform STR", disable=(not self.verbose))]
return str_s return str_s
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment