# coding =utf-8
from strpython.models.str import STR

from .models.transformation.transform import Generalisation, Expansion
from .nlp.disambiguator.disambiguator import Disambiguator
from .nlp.disambiguator.most_common import MostCommonDisambiguator
from .nlp.exception.disambiguator import NotADisambiguatorInstance
from .nlp.exception.ner import NotANERInstance
from .nlp.exception.tagger import NotATaggerInstance
from .nlp.ner.ner import NER
from .nlp.ner.stanford_ner import StanfordNER
from .nlp.pos_tagger.tagger import Tagger
from .nlp.pos_tagger.treetagger import TreeTagger
import json,re



class Pipeline(object):
    """
    Class defining a Pipeline instance
    Run the whole treatement on a given text
    """

    def __init__(self,lang="english",**kwargs):
        """
        Constructor


        :param kwargs:
        """
        self.lang=lang[:2]
        self.tagger=kwargs["tagger"] if "tagger" in kwargs else TreeTagger(language=lang)
        self.ner = kwargs["ner"] if "ner" in kwargs else StanfordNER(lang=lang[:2])
        self.disambiguator=kwargs["disambiguator"] if "disambiguator" in kwargs else MostCommonDisambiguator()

    def parse(self,text,debug=False):
        """

        :param text:
        :rtype: list,dict
        """
        output = text
        # If specificate POS
        if self.tagger.active:
            output = self.tagger.tag(output)

        # NER
        output = self.ner.identify(output)

        # Disambiguation
        count,se_identified = self.disambiguator.disambiguate(output, self.lang)
        if debug:
            print(se_identified)

        return count,output,se_identified

    def set_tagger(self,tagger):
        """
        Set POS tagger used in the Pipeline
        :param tagger:
        :return:
        """
        if isinstance(tagger,Tagger):
            self.tagger=tagger
        else:
            raise NotATaggerInstance()

    def set_ner(self,ner):
        """
        Set NER used in the pipeline
        :param ner:
        :return:
        """
        if isinstance(ner,NER):
            self.ner=ner
        else:
            raise NotANERInstance()

    def set_disambiguator(self,disambiguator):
        """

        :param disambiguator:
        :return:
        """
        if isinstance(disambiguator,Disambiguator):
            self.disambiguator=disambiguator
        else:
            raise NotADisambiguatorInstance()


    def build(self,text,se_identified=None, **kwargs):
        """
        Return the corresponding STR for a text.
        :param text:
        :return: STR
        """
        cooc= kwargs.get("cooc",False)
        adj = kwargs.get("adj", True)
        inc = kwargs.get("inc", True)
        toponyms= kwargs.get("toponyms", None)
        stop_words=kwargs.get("stop_words",[])
        if isinstance(toponyms,list):
            se_identified = self.disambiguator.disambiguate_list([top for top in toponyms if not top.lower() in stop_words and not len(re.findall("\d+",top)) != 0 and len(top)>3],self.lang)
            count,output ={},text
        #print(se_identified)
        elif not se_identified:
            count,output, se_identified = self.parse(text)
        else:
            count, output, _ = self.parse(text)
        str_=STR(output,se_identified)
        str_.build(adj=adj,inc=inc)
        str_=self.transform(str_,**kwargs) #TODO : Add count
        return str_,count,str_.spatial_entities

    def transform(self,str_,**kwargs):
        if not "type_trans" in kwargs:
            return str_
        type_trans=kwargs.pop("type_trans")
        if type_trans == "gen":
            str_=Generalisation().transform(str_,**kwargs)
        else:
            str_=Expansion().transform(str_,**kwargs)
        return str_


if __name__ == '__main__':
    pass