Commit da39f1fb authored by Midoux Cedric's avatar Midoux Cedric
Browse files

cdhit catalogue.ffn

parent c580f2fb
......@@ -2,10 +2,10 @@ rule FragGeneScan:
input:
unpack(contigs_input)
output:
faa = "work/FGS/{sample}_FGS.faa",
ffn = "work/FGS/{sample}_FGS.ffn",
gff = "work/FGS/{sample}_FGS.gff",
out = temp("work/FGS/{sample}_FGS.out")
faa = "work/FGS/{sample}/{sample}_FGS.faa",
ffn = "work/FGS/{sample}/{sample}_FGS.ffn",
gff = "work/FGS/{sample}/{sample}_FGS.gff",
out = temp("work/FGS/{sample}/{sample}_FGS.out")
threads:
4
params:
......@@ -24,9 +24,9 @@ rule prodigal:
input:
unpack(contigs_input)
output:
faa = "work/prodigal/{sample}_prodigal.faa",
ffn = "work/prodigal/{sample}_prodigal.ffn",
gff = "work/prodigal/{sample}_prodigal.gff"
faa = "work/prodigal/{sample}/{sample}_prodigal.faa",
ffn = "work/prodigal/{sample}/{sample}_prodigal.ffn",
gff = "work/prodigal/{sample}/{sample}_prodigal.gff"
shell:
"prodigal "
"-f gff "
......@@ -41,21 +41,21 @@ rule prodigal:
def faa_input(wildcards):
if config["PROTEINS-PREDICTOR"] == "FragGeneScan":
return {"faa": "work/FGS/{wildcards.sample}_FGS.faa".format(wildcards=wildcards)}
return {"faa": "work/FGS/{wildcards.sample}/{wildcards.sample}_FGS.faa".format(wildcards=wildcards)}
elif config["PROTEINS-PREDICTOR"] == "prodigal":
return {"faa": "work/prodigal/{wildcards.sample}_prodigal.faa".format(wildcards=wildcards)}
return {"faa": "work/prodigal/{wildcards.sample}/{wildcards.sample}_prodigal.faa".format(wildcards=wildcards)}
def ffn_input(wildcards):
if config["PROTEINS-PREDICTOR"] == "FragGeneScan":
return {"fnn": "work/FGS/{wildcards.sample}_FGS.ffn".format(wildcards=wildcards)}
return {"fnn": "work/FGS/{wildcards.sample}/{wildcards.sample}_FGS.ffn".format(wildcards=wildcards)}
elif config["PROTEINS-PREDICTOR"] == "prodigal":
return {"fnn": "work/prodigal/{wildcards.sample}_prodigal.ffn".format(wildcards=wildcards)}
return {"fnn": "work/prodigal/{wildcards.sample}/{wildcards.sample}_prodigal.ffn".format(wildcards=wildcards)}
def gff_input(wildcards):
if config["PROTEINS-PREDICTOR"] == "FragGeneScan":
return {"gff": "work/FGS/{wildcards.sample}_FGS.gff".format(wildcards=wildcards)}
return {"gff": "work/FGS/{wildcards.sample}/{wildcards.sample}_FGS.gff".format(wildcards=wildcards)}
elif config["PROTEINS-PREDICTOR"] == "prodigal":
return {"gff": "work/prodigal/{wildcards.sample}_prodigal.gff".format(wildcards=wildcards)}
return {"gff": "work/prodigal/{wildcards.sample}/{wildcards.sample}_prodigal.gff".format(wildcards=wildcards)}
def db_input(wildcards):
if wildcards.db == "nr":
......@@ -156,4 +156,4 @@ rule interproscan:
"--formats tsv,gff3,html "
"--iprlookup "
"--goterms "
"--pathways "
\ No newline at end of file
"--pathways "
rule partial_genes: #fonctionne uniquement avec prodigal
input:
"work/prodigal/{sample}/{sample}_prodigal.ffn"
output:
"work/prodigal/{sample}/{sample}_partial_{x}.ffn"
threads:
2
shell:
"source activate seqkit-0.10.1 "
" ; "
"seqkit "
"grep "
"--use-regexp "
"--by-name "
"--pattern ';partial={wildcards.x};' "
"--threads {threads} "
"--out-file {output} "
"{input} "
" ; "
"conda deactivate"
rule pool:
input:
expand("work/prodigal/{sample}/{sample}_partial_{{x}}.ffn", sample=config["SAMPLES"])
output:
temp("work/prodigal/pool/partial_{x}.ffn")
shell:
"cat "
"{input} "
" > "
"{output}"
rule pool_incomplet:
input:
expand("work/cdhit/non_redundant_parial_{x}.ffn", x=["01", "10"])
output:
temp("work/cdhit/non_redundant_parial_10-01.ffn")
shell:
"cat "
"{input} "
" > "
"{output}"
rule cd_hit:
input:
"work/prodigal/pool/partial_{x}.ffn"
output:
ffn = "work/cdhit/non_redundant_parial_{x}.ffn",
clustr = "work/cdhit/non_redundant_parial_{x}.ffn.clustr"
threads:
8
shell:
"cd-hit-est "
"-i {input} "
"-o {output.ffn} "
"-c 0.95 "
"-G 1 "
"-aS 0.90 "
"-d 0 "
"-M 0 "
"-T {threads} "
rule cd_hit_2D:
input:
complete = "work/cdhit/non_redundant_parial_00.ffn",
incomplete = "work/cdhit/non_redundant_parial_10-01.ffn"
output:
ffn = "work/cdhit/catalogue.ffn",
clustr = "work/cdhit/catalogue.clustr"
threads:
8
shell:
"cd-hit-est-2d "
"-i {input.complete} "
"-i2 {input.incomplete} "
"-o {output.ffn} "
"-c 0.95 "
"-aS 0.90 "
"-g 1 "
"-d 0 "
"-M 0 "
"-T {threads} "
......@@ -16,6 +16,7 @@ rule all:
"report/diamond_nr_coassembly.tsv",
"work/prokka/coassembly_prokka.tsv",
"work/interproscan/coassembly.faa.tsv",
"work/cdhit/catalogue.ffn",
include: "../workflow_metagenomics/quality.smk"
......@@ -24,3 +25,4 @@ include: "../workflow_metagenomics/kaiju.smk"
include: "../workflow_metagenomics/assembly.smk"
include: "../workflow_metagenomics/annotation.smk"
include: "../workflow_metagenomics/count.smk"
include: "../workflow_metagenomics/catalogue.smk"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment