diff --git a/annotation.smk b/annotation.smk index c92e44cf89efaecbc66d6fb8f1817bfef881088b..016375d3348080e70aa77b1c5782d770349c0cdb 100644 --- a/annotation.smk +++ b/annotation.smk @@ -40,10 +40,13 @@ rule prodigal: # "sed --in-place 's/*/N/' {output.faa} " def faa_input(wildcards): - if config["PROTEINS-PREDICTOR"] == "FragGeneScan": - return {"faa": "work/FGS/{wildcards.sample}/{wildcards.sample}_FGS.faa".format(wildcards=wildcards)} - elif config["PROTEINS-PREDICTOR"] == "prodigal": - return {"faa": "work/prodigal/{wildcards.sample}/{wildcards.sample}_prodigal.faa".format(wildcards=wildcards)} + if wildcards.sample == "catalogue": + return ["work/cdhit/catalogue.faa"] + else : + if config["PROTEINS-PREDICTOR"] == "FragGeneScan": + return {"faa": "work/FGS/{wildcards.sample}/{wildcards.sample}_FGS.faa".format(wildcards=wildcards)} + elif config["PROTEINS-PREDICTOR"] == "prodigal": + return {"faa": "work/prodigal/{wildcards.sample}/{wildcards.sample}_prodigal.faa".format(wildcards=wildcards)} def ffn_input(wildcards): if config["PROTEINS-PREDICTOR"] == "FragGeneScan": diff --git a/catalogue.smk b/catalogue.smk index 7ff48a1f416ca8c7972fb4cda655c97c725fc663..55eae87e01cf8863d87e822a4f10552a1707f0cc 100644 --- a/catalogue.smk +++ b/catalogue.smk @@ -80,3 +80,29 @@ rule cd_hit_2D: "-d 0 " "-M 0 " "-T {threads} " + +rule faa_catalogue: + input: + ffn = "work/cdhit/catalogue.ffn" + faa = expand("work/prodigal/{sample}/{sample}.faa", sample=config["SAMPLES"]) + output: + list = temp("work/cdhit/catalogue.list") + faa = "work/cdhit/catalogue.faa" + threads: + 2 + shell: + "grep '>' {input.ffn} > {output.list} " + " ; " + "sed --in-place 's/>//' {output.list} " + " ; " + "source activate seqkit-0.10.1 " + " ; " + "seqkit " + "grep " + "--by-name " + "--pattern-file {output.list} " + "--threads {threads} " + "--out-file {output.faa} " + "{input.faa} " + " ; " + "conda deactivate"