Commit da39f1fb authored by Midoux Cedric's avatar Midoux Cedric
Browse files

cdhit catalogue.ffn

No related merge requests found
Showing with 98 additions and 14 deletions
+98 -14
...@@ -2,10 +2,10 @@ rule FragGeneScan: ...@@ -2,10 +2,10 @@ rule FragGeneScan:
input: input:
unpack(contigs_input) unpack(contigs_input)
output: output:
faa = "work/FGS/{sample}_FGS.faa", faa = "work/FGS/{sample}/{sample}_FGS.faa",
ffn = "work/FGS/{sample}_FGS.ffn", ffn = "work/FGS/{sample}/{sample}_FGS.ffn",
gff = "work/FGS/{sample}_FGS.gff", gff = "work/FGS/{sample}/{sample}_FGS.gff",
out = temp("work/FGS/{sample}_FGS.out") out = temp("work/FGS/{sample}/{sample}_FGS.out")
threads: threads:
4 4
params: params:
...@@ -24,9 +24,9 @@ rule prodigal: ...@@ -24,9 +24,9 @@ rule prodigal:
input: input:
unpack(contigs_input) unpack(contigs_input)
output: output:
faa = "work/prodigal/{sample}_prodigal.faa", faa = "work/prodigal/{sample}/{sample}_prodigal.faa",
ffn = "work/prodigal/{sample}_prodigal.ffn", ffn = "work/prodigal/{sample}/{sample}_prodigal.ffn",
gff = "work/prodigal/{sample}_prodigal.gff" gff = "work/prodigal/{sample}/{sample}_prodigal.gff"
shell: shell:
"prodigal " "prodigal "
"-f gff " "-f gff "
...@@ -41,21 +41,21 @@ rule prodigal: ...@@ -41,21 +41,21 @@ rule prodigal:
def faa_input(wildcards): def faa_input(wildcards):
if config["PROTEINS-PREDICTOR"] == "FragGeneScan": if config["PROTEINS-PREDICTOR"] == "FragGeneScan":
return {"faa": "work/FGS/{wildcards.sample}_FGS.faa".format(wildcards=wildcards)} return {"faa": "work/FGS/{wildcards.sample}/{wildcards.sample}_FGS.faa".format(wildcards=wildcards)}
elif config["PROTEINS-PREDICTOR"] == "prodigal": elif config["PROTEINS-PREDICTOR"] == "prodigal":
return {"faa": "work/prodigal/{wildcards.sample}_prodigal.faa".format(wildcards=wildcards)} return {"faa": "work/prodigal/{wildcards.sample}/{wildcards.sample}_prodigal.faa".format(wildcards=wildcards)}
def ffn_input(wildcards): def ffn_input(wildcards):
if config["PROTEINS-PREDICTOR"] == "FragGeneScan": if config["PROTEINS-PREDICTOR"] == "FragGeneScan":
return {"fnn": "work/FGS/{wildcards.sample}_FGS.ffn".format(wildcards=wildcards)} return {"fnn": "work/FGS/{wildcards.sample}/{wildcards.sample}_FGS.ffn".format(wildcards=wildcards)}
elif config["PROTEINS-PREDICTOR"] == "prodigal": elif config["PROTEINS-PREDICTOR"] == "prodigal":
return {"fnn": "work/prodigal/{wildcards.sample}_prodigal.ffn".format(wildcards=wildcards)} return {"fnn": "work/prodigal/{wildcards.sample}/{wildcards.sample}_prodigal.ffn".format(wildcards=wildcards)}
def gff_input(wildcards): def gff_input(wildcards):
if config["PROTEINS-PREDICTOR"] == "FragGeneScan": if config["PROTEINS-PREDICTOR"] == "FragGeneScan":
return {"gff": "work/FGS/{wildcards.sample}_FGS.gff".format(wildcards=wildcards)} return {"gff": "work/FGS/{wildcards.sample}/{wildcards.sample}_FGS.gff".format(wildcards=wildcards)}
elif config["PROTEINS-PREDICTOR"] == "prodigal": elif config["PROTEINS-PREDICTOR"] == "prodigal":
return {"gff": "work/prodigal/{wildcards.sample}_prodigal.gff".format(wildcards=wildcards)} return {"gff": "work/prodigal/{wildcards.sample}/{wildcards.sample}_prodigal.gff".format(wildcards=wildcards)}
def db_input(wildcards): def db_input(wildcards):
if wildcards.db == "nr": if wildcards.db == "nr":
...@@ -156,4 +156,4 @@ rule interproscan: ...@@ -156,4 +156,4 @@ rule interproscan:
"--formats tsv,gff3,html " "--formats tsv,gff3,html "
"--iprlookup " "--iprlookup "
"--goterms " "--goterms "
"--pathways " "--pathways "
\ No newline at end of file
catalogue.smk 0 → 100644
rule partial_genes: #fonctionne uniquement avec prodigal
input:
"work/prodigal/{sample}/{sample}_prodigal.ffn"
output:
"work/prodigal/{sample}/{sample}_partial_{x}.ffn"
threads:
2
shell:
"source activate seqkit-0.10.1 "
" ; "
"seqkit "
"grep "
"--use-regexp "
"--by-name "
"--pattern ';partial={wildcards.x};' "
"--threads {threads} "
"--out-file {output} "
"{input} "
" ; "
"conda deactivate"
rule pool:
input:
expand("work/prodigal/{sample}/{sample}_partial_{{x}}.ffn", sample=config["SAMPLES"])
output:
temp("work/prodigal/pool/partial_{x}.ffn")
shell:
"cat "
"{input} "
" > "
"{output}"
rule pool_incomplet:
input:
expand("work/cdhit/non_redundant_parial_{x}.ffn", x=["01", "10"])
output:
temp("work/cdhit/non_redundant_parial_10-01.ffn")
shell:
"cat "
"{input} "
" > "
"{output}"
rule cd_hit:
input:
"work/prodigal/pool/partial_{x}.ffn"
output:
ffn = "work/cdhit/non_redundant_parial_{x}.ffn",
clustr = "work/cdhit/non_redundant_parial_{x}.ffn.clustr"
threads:
8
shell:
"cd-hit-est "
"-i {input} "
"-o {output.ffn} "
"-c 0.95 "
"-G 1 "
"-aS 0.90 "
"-d 0 "
"-M 0 "
"-T {threads} "
rule cd_hit_2D:
input:
complete = "work/cdhit/non_redundant_parial_00.ffn",
incomplete = "work/cdhit/non_redundant_parial_10-01.ffn"
output:
ffn = "work/cdhit/catalogue.ffn",
clustr = "work/cdhit/catalogue.clustr"
threads:
8
shell:
"cd-hit-est-2d "
"-i {input.complete} "
"-i2 {input.incomplete} "
"-o {output.ffn} "
"-c 0.95 "
"-aS 0.90 "
"-g 1 "
"-d 0 "
"-M 0 "
"-T {threads} "
...@@ -16,6 +16,7 @@ rule all: ...@@ -16,6 +16,7 @@ rule all:
"report/diamond_nr_coassembly.tsv", "report/diamond_nr_coassembly.tsv",
"work/prokka/coassembly_prokka.tsv", "work/prokka/coassembly_prokka.tsv",
"work/interproscan/coassembly.faa.tsv", "work/interproscan/coassembly.faa.tsv",
"work/cdhit/catalogue.ffn",
include: "../workflow_metagenomics/quality.smk" include: "../workflow_metagenomics/quality.smk"
...@@ -24,3 +25,4 @@ include: "../workflow_metagenomics/kaiju.smk" ...@@ -24,3 +25,4 @@ include: "../workflow_metagenomics/kaiju.smk"
include: "../workflow_metagenomics/assembly.smk" include: "../workflow_metagenomics/assembly.smk"
include: "../workflow_metagenomics/annotation.smk" include: "../workflow_metagenomics/annotation.smk"
include: "../workflow_metagenomics/count.smk" include: "../workflow_metagenomics/count.smk"
include: "../workflow_metagenomics/catalogue.smk"
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment