diff --git a/annotation.smk b/annotation.smk new file mode 100644 index 0000000000000000000000000000000000000000..78b79b7a7d3e14768aa3cd218d48b65591f7dc17 --- /dev/null +++ b/annotation.smk @@ -0,0 +1,58 @@ +rule FragGeneScan: + input: + unpack(contigs_input) + output: + faa = "work/FGS/{sample}_FGS.faa", + ffn = "work/FGS/{sample}_FGS.ffn", + gff = "work/FGS/{sample}_FGS.gff", + out = temp("work/FGS/{sample}_FGS.out") + threads: + 4 + params: + output = lambda wildcards, output: os.path.splittext(str(output))[0] + shell: + "run_FragGeneScan.pl " + "-genome={input} " + "-out={params.output} " + "-complete=0 " #negative values with complete=1 + "-train=complete " + "-thread={threads} " + " ; " + "sed --in-place 's/*/N/' {output.faa} " + +rule prodigal: + input: + unpack(contigs_input) + output: + faa = "work/prodigal/{sample}_prodigal.faa", + ffn = "work/prodigal/{sample}_prodigal.ffn", + gff = "work/prodigal/{sample}_prodigal.gff" + shell: + "prodigal " + "-f gff " + "-i {input} " + "-a {output.faa} " + "-d {output.ffn} " + "-o {output.gff} " + "-g 11 " + "-p meta " + # " ; " + # "sed --in-place 's/*/N/' {output.faa} " + +def faa_input(wildcards): + if config["PROTEINS-PREDICTOR"] == "FragGeneScan": + return {"faa": "work/FGS/{wildcards.sample}_FGS.faa".format(wildcards=wildcards)} + elif config["PROTEINS-PREDICTOR"] == "prodigal": + return {"faa": "work/prodigal/{wildcards.sample}_prodigal.faa".format(wildcards=wildcards)} + +def ffn_input(wildcards): + if config["PROTEINS-PREDICTOR"] == "FragGeneScan": + return {"fnn": "work/FGS/{wildcards.sample}_FGS.ffn".format(wildcards=wildcards)} + elif config["PROTEINS-PREDICTOR"] == "prodigal": + return {"fnn": "work/prodigal/{wildcards.sample}_prodigal.ffn".format(wildcards=wildcards)} + +def gff_input(wildcards): + if config["PROTEINS-PREDICTOR"] == "FragGeneScan": + return {"gff": "work/FGS/{wildcards.sample}_FGS.gff".format(wildcards=wildcards)} + elif config["PROTEINS-PREDICTOR"] == "prodigal": + return {"gff": "work/prodigal/{wildcards.sample}_prodigal.gff".format(wildcards=wildcards)} diff --git a/assembly.smk b/assembly.smk index d359f42358f35befd36360118249bcb810e24a13..879adbd0815f19dcc880ad1504f12c05eef85fa3 100644 --- a/assembly.smk +++ b/assembly.smk @@ -79,7 +79,7 @@ rule metaspades: min_len = config["CONTIGS_LEN"], mem_tot = 250, input = lambda wildcards, input: " --12 ".join(input), - output = lambda wildcards, output: os.path.dirname(str(output)) + output = lambda wildcards, output: os.path.dirname(str(output.contigs)) shell: "spades.py " "--threads {threads} " diff --git a/config.json b/config.json index c92e0e24b40e24f91ac604e9bdf756ec7bf43b78..6b7ebc0c953843dfb451d102654cea7bd21b438b 100644 --- a/config.json +++ b/config.json @@ -3,5 +3,6 @@ "NORMALIZATION": false, "SORTMERNA": false, "ASSEMBLER": "metaspades", - "CONTIGS_LEN": 1000 + "CONTIGS_LEN": 1000, + "PROTEINS-PREDICTOR": "prodigal" } diff --git a/global.smk b/global.smk index dbb749cead309d1e01e9baaff9e319d5c46b3989..e8bd4042b14137a88e086601d09c415e37804096 100644 --- a/global.smk +++ b/global.smk @@ -15,3 +15,4 @@ include: "../workflow_metagenomics/quality.smk" include: "../workflow_metagenomics/preprocess.smk" include: "../workflow_metagenomics/kaiju.smk" include: "../workflow_metagenomics/assembly.smk" +include: "../workflow_metagenomics/annotation.smk"