Commit 4ee40425 authored by Midoux Cedric's avatar Midoux Cedric
Browse files

modularity config.json

parent 323e84ef
rule FragGeneScan:
input:
"work/megahit/{reads}/{reads}.contigs.fa"
unpack(contigs_input)
output:
faa = "work/FGS/{reads}_FGS.faa",
ffn = "work/FGS/{reads}_FGS.ffn",
gff = "work/FGS/{reads}_FGS.gff",
out = temp("work/FGS/{reads}_FGS.out")
faa = "work/FGS/{sample}_FGS.faa",
ffn = "work/FGS/{sample}_FGS.ffn",
gff = "work/FGS/{sample}_FGS.gff",
out = temp("work/FGS/{sample}_FGS.out")
threads:
config["THREADS"]
params:
output = "work/FGS/{reads}_FGS"
output = "work/FGS/{sample}_FGS"
shell:
"run_FragGeneScan.pl "
"-genome={input} "
......@@ -20,13 +20,62 @@ rule FragGeneScan:
" ; "
"sed --in-place 's/*/N/' {output.faa} "
rule prodigal:
input:
unpack(contigs_input)
output:
faa = "work/prodigal/{sample}_prodigal.faa",
ffn = "work/prodigal/{sample}_prodigal.ffn",
gff = "work/prodigal/{sample}_prodigal.gff"
shell:
"prodigal "
"-f gff "
"-i {input} "
"-a {output.faa} "
"-d {output.ffn} "
"-o {output.gff} "
"-g 11 "
"-po meta "
rule prokka:
input:
unpack(contigs_input)
output:
err = "work/prokka/{sample}_prokka.err",
faa = "work/prokka/{sample}_prokka.faa",
ffn = "work/prokka/{sample}_prokka.ffn",
fna = "work/prokka/{sample}_prokka.fna",
fsa = "work/prokka/{sample}_prokka.fsa",
gbk = "work/prokka/{sample}_prokka.gbk",
gff = "work/prokka/{sample}_prokka.gff",
log = "work/prokka/{sample}_prokka.log",
sqn = "work/prokka/{sample}_prokka.sqn",
tbl = "work/prokka/{sample}_prokka.tbl",
tsv = "work/prokka/{sample}_prokka.tsv",
txt = "work/prokka/{sample}_prokka.txt"
params:
output = lambda wildcards, output: os.path.dirname(str(output.gff)),
threads:
config["THREADS"]
shell:
"prokka "
"--output-dir {params.output} "
"--force "
"--prefix {wildcards.sample}_prokka "
"--gffver 3 "
"--metagenome "
"--cpus {threads} "
"--notrna "
"--norrna "
"{input} "
rule diamond:
input:
faa = "work/FGS/{reads}_FGS.faa",
faa = "work/FGS/{sample}_FGS.faa",
database = "/db/outils/diamond-0.9.24/nr_tax.dmnd"
output:
daa = "work/DIAMOND/{reads}.daa",
unaligned = "work/DIAMOND/{reads}_unaligned.faa"
daa = "work/DIAMOND/{sample}.daa",
unaligned = "work/DIAMOND/{sample}_unaligned.faa"
threads:
config["THREADS"]
shell:
......@@ -46,9 +95,9 @@ rule diamond:
rule diamondView:
input:
daa = "work/DIAMOND/{reads}.daa"
daa = "work/DIAMOND/{sample}.daa"
output:
tsv = "report/diamond-NR_{reads}.tsv"
tsv = "report/diamond-NR_{sample}.tsv"
params:
keywords = "qseqid qlen sseqid sallseqid slen qstart qend sstart send qseq full_qseq sseq full_sseq evalue bitscore score length pident nident mismatch positive gapopen gaps ppos qframe btop stitle salltitles qcovhsp qtitle qqual" #staxids
shell:
......@@ -77,3 +126,27 @@ rule eggnog:
"--hmm_maxhits 1 "
"--output {params.output} "
"-i {input.faa} "
rule interproscan:
input:
faa = "work/FGS/{sample}_FGS.faa"
output:
tsv = "work/interproscan/{sample}_FGS.faa.tsv",
gff3 = "work/interproscan/{sample}_FGS.faa.gff3",
html = "work/interproscan/{sample}_FGS.faa.html.tar.gz"
params:
output = lambda wildcards, output: os.path.dirname(str(output.tsv)),
mem_tot = int(config["MEM"] * config["THREADS"] * 1e9)
threads:
config["THREADS"]
shell:
"interproscan.sh "
"--cpu {threads} "
"--highmem {params.mem_tot} "
"--input {input.faa} "
"--seqtype p "
"--output-dir {params.output} "
"--formats tsv,gff3,html "
"--iprlookup "
"--goterms "
"--pathways "
def khmer_input(wildcards):
if config["SORTMERNA"]:
return {"R1R2": "work/sortmerna/{wildcards.reads}_mRNA.fastq.gz".format(wildcards=wildcards)}
else:
return {"R1R2": "DATA/trim/{wildcards.reads}_R1R2.fastq.gz".format(wildcards=wildcards)}
rule khmer:
input:
# R1R2 = "work/sortmerna/{reads}_mRNA.fastq.gz"
R1R2 = "DATA/trim/{reads}_R1R2.fastq.gz"
unpack(khmer_input)
output:
R1R2 = "work/khmer/{reads}_R1R2.fastq.gz",
log = "work/khmer/{reads}.log"
threads:
config["THREADS"]
params:
mem_tot = (MEM * config["THREADS"] * 1e9),
mem_tot = (config["MEM"] * config["THREADS"] * 1e9),
K = 32,
C = 100
shell:
......@@ -22,45 +27,118 @@ rule khmer:
"--gzip "
"{input.R1R2}"
def megahit_input(wildcards):
if NORM:
return {"R1R2": "work/khmer/{wildcards.reads}_R1R2.fastq.gz".format(wildcards=wildcards)}
def assembly_input(wildcards):
if wildcards.reads == "coassembly":
if config["NORMALIZATION"]:
return [expand("work/khmer/{reads}_R1R2.fastq.gz", reads=SAMPLES)]
elif config["SORTMERNA"]:
return [expand("work/sortmerna/{reads}_mRNA.fastq.gz", reads=SAMPLES)]
else:
return [expand("DATA/trim/{reads}_R1R2.fastq.gz", reads=SAMPLES)]
else:
#return {"R1R2": "work/sortmerna/{wildcards.reads}_mRNA.fastq.gz".format(wildcards=wildcards)}
return {"R1R2": "DATA/trim/{reads}_R1R2.fastq.gz".format(wildcards=wildcards)}
if config["NORMALIZATION"]:
return {"R1R2": "work/khmer/{wildcards.reads}_R1R2.fastq.gz".format(wildcards=wildcards)}
elif config["SORTMERNA"]:
return {"R1R2": "work/sortmerna/{wildcards.reads}_mRNA.fastq.gz".format(wildcards=wildcards)}
else:
return {"R1R2": "DATA/trim/{wildcards.reads}_R1R2.fastq.gz".format(wildcards=wildcards)}
rule megahit:
input:
unpack(megahit_input)
unpack(assembly_input)
output:
"work/megahit/{reads}/{reads}.contigs.fa"
threads:
config["THREADS"]
params:
mem_tot = (MEM * config["THREADS"] * 1e9)
min_len = config["CONTIGS_LEN"],
mem_tot = int(config["MEM"] * config["THREADS"] * 1e9),
input = lambda wildcards, input: ",".join(input),
output = lambda wildcards, output: os.path.dirname(str(output))
shell:
"megahit "
"--12 {input.R1R2} "
"--12 {params.input} "
"--continue "
"--preset meta-large "
"--num-cpu-threads {threads} "
"--memory {params.mem_tot} "
"--out-dir work/megahit/{wildcards.reads} "
"--out-dir {params.output} "
"--out-prefix {wildcards.reads} "
"--min-contig-len 500 "
"--min-contig-len {params.min_len} "
"--verbose"
rule metaspades:
input:
unpack(assembly_input)
output:
"work/metaSPADES/{reads}/{reads}.contigs.fasta"
threads:
config["THREADS"]
params:
min_len = config["CONTIGS_LEN"],
input = lambda wildcards, input: " --12 ".join(input),
output = lambda wildcards, output: os.path.dirname(str(output))
shell:
"spades.py "
"--threads {threads} "
"--meta "
"--12 {params.input} "
"-o {params.output} "
" ; "
"filterSeq.py "
"-i work/metaSPADES/{wildcards.reads}/contigs.fasta "
"-o work/metaSPADES/{wildcards.reads}/{wildcards.reads}.contigs.fasta "
"--min-length {params.min_len} "
def contigs_input(wildcards):
if config["ASSEMBLER"] == "megahit":
return ["work/megahit/{wildcards.sample}/{wildcards.sample}.contigs.fa".format(wildcards=wildcards)]
elif config["ASSEMBLER"] == "metaspades":
return ["work/metaSPADES/{wildcards.sample}/{wildcards.sample}.contigs.fasta".format(wildcards=wildcards)]
def contigsExpand_input(wildcards):
if config["ASSEMBLER"] == "megahit":
return [expand("work/megahit/{sample}/{sample}.contigs.fa", sample=SAMPLES)]
elif config["ASSEMBLER"] == "metaspades":
return [expand("work/metaSPADES/{sample}/{sample}.contigs.fasta", sample=SAMPLES)]
def coassembly_contigs_input(wildcards):
if config["ASSEMBLER"] == "megahit":
return ["work/megahit/coassembly/coassembly.contigs.fa"]
elif config["ASSEMBLER"] == "metaspades":
return ["work/metaSPADES/coassembly/coassembly.contigs.fasta"]
rule quast:
input:
expand("work/megahit/{contigs}/{contigs}.contigs.fa", contigs=SAMPLES)
unpack(contigsExpand_input)
output:
"report/quast_results/report.html"
threads:
config["THREADS"]
params:
output = lambda wildcards, output: os.path.dirname(str(output))
shell:
"quast "
"--mgm "
"--output-dir {params.output} "
"-L "
"--threads {threads} "
"{input} "
rule coassembly_quast:
input:
unpack(coassembly_contigs_input)
output:
"report/quast_coassembly/report.html"
threads:
config["THREADS"]
params:
output = lambda wildcards, output: os.path.dirname(str(output))
shell:
"quast "
"--mgm "
"--output-dir report/quast_results "
"--output-dir {params.output} "
"-L "
"--threads {threads} "
"{input} "
......@@ -26,7 +26,7 @@
"queue" : "highmem.q,maiage.q",
"cluster" : "-l h_vmem={MEM}G"
},
"coassembly_megahit" :
"metaspades" :
{
"queue" : "highmem.q,maiage.q",
"cluster" : "-l h_vmem={MEM}G"
......
def coassembly_megahit_input(wildcards):
if NORM:
return [expand("work/khmer/{reads}_R1R2.fastq.gz", reads=SAMPLES)]
else:
#return [expand("work/sortmerna/{reads}_mRNA.fastq.gz", reads=SAMPLES)]
return [expand("DATA/trim/{reads}_R1R2.fastq.gz", reads=SAMPLES)]
rule coassembly_megahit:
input:
unpack(coassembly_megahit_input)
output:
"work/megahit/coassembly/coassembly.contigs.fa"
threads:
config["THREADS"]
params:
mem_tot = (MEM * config["THREADS"] * 1e9),
input = lambda wildcards, input: ",".join(input),
output = "coassembly"
shell:
"megahit "
"--12 {params.input} "
"--continue "
"--preset meta-large "
"--num-cpu-threads {threads} "
"--memory {params.mem_tot} "
"--out-dir work/megahit/{params.output} "
"--out-prefix {params.output} "
"--min-contig-len 500 "
"--verbose"
rule coassembly_quast:
input:
"work/megahit/coassembly/coassembly.contigs.fa"
output:
"report/quast_coassembly/report.html"
threads:
config["THREADS"]
params:
output = lambda wildcards, output: os.path.dirname(str(output))
shell:
"quast "
"--mgm "
"--output-dir {params.output} "
"-L "
"--threads {threads} "
"{input} "
......@@ -2,5 +2,8 @@
"THREADS": 12,
"MEM": 15,
"SAMPLES": ["J80_A", "J80_B", "J177_A", "J177_B", "J218_A", "J218_B", "J281_A", "J281_B", "J462_A", "J462_B"],
"NORMALIZATION": false
"NORMALIZATION": false,
"SORTMERNA": false,
"ASSEMBLER": "metaspades",
"CONTIGS_LEN": 1000
}
rule index :
input:
"work/megahit/{contigs}/{contigs}.contigs.fa"
unpack(contigs_input)
output:
expand("work/index/{{contigs}}.{ext}.bt2l", ext=["1", "2", "3", "4", "rev.1", "rev.2"])
expand("work/index/{{sample}}.{ext}.bt2l", ext=["1", "2", "3", "4", "rev.1", "rev.2"])
params:
index = "work/index/{contigs}"
index = "work/index/{sample}"
shell:
"bowtie2-build "
"--large-index "
"{input} "
"{params.index}"
......@@ -16,9 +17,9 @@ rule mapping:
R2 = "DATA/trim/{reads}_R2.fastq.gz",
i = expand("work/index/{{contigs}}.{ext}.bt2l", ext=["1", "2", "3", "4", "rev.1", "rev.2"])
output:
sam = temp("work/bowtie/align-{reads}-{contigs}.sam"),
bam = "work/bowtie/align-{reads}-{contigs}.bam",
bai = "work/bowtie/align-{reads}-{contigs}.bam.bai"
sam = temp("work/bowtie/align_{reads}_{contigs}.sam"),
bam = "work/bowtie/align_{reads}_{contigs}.bam",
bai = "work/bowtie/align_{reads}_{contigs}.bam.bai"
params:
index = "work/index/{contigs}"
threads:
......@@ -47,8 +48,8 @@ rule mapping:
rule count_contig:
input:
bam = "work/bowtie/align-{reads}-{contigs}.bam",
bai = "work/bowtie/align-{reads}-{contigs}.bam.bai"
bam = "work/bowtie/align_{reads}_{contigs}.bam",
bai = "work/bowtie/align_{reads}_{contigs}.bam.bai"
output:
"report/count-contigs-{reads}-{contigs}.tsv"
shell:
......@@ -59,11 +60,11 @@ rule count_contig:
rule count_gene:
input:
bam = "work/bowtie/align-{reads}-{contigs}.bam",
bai = "work/bowtie/align-{reads}-{contigs}.bam.bai",
bam = "work/bowtie/align_{reads}_{contigs}.bam",
bai = "work/bowtie/align_{reads}_{contigs}.bam.bai",
gff = "work/{gff_tool}/{contigs}_{gff_tool}.gff"
output:
"report/count-genes_{gff_tool}-{reads}-{contigs}.tsv"
"report/count-genes-{gff_tool}-{reads}-{contigs}.tsv"
shell:
"htseq-count "
"--format bam "
......
import os
configfile: "./config.json"
MEM=config["MEM"]
SAMPLES=config["SAMPLES"]
NORM = config["NORMALIZATION"]
rule all:
input:
......@@ -26,6 +24,5 @@ include: "quality.smk"
include: "preprocess.smk"
include: "kaiju.smk"
include: "assembly.smk"
include: "coassembly.smk"
include: "annotation.smk"
include: "count.smk"
def kaiju_input(wildcards):
if wildcards.focus == "contigs":
return ["work/megahit/{wildcards.samples}/{wildcards.samples}.contigs.fa".format(wildcards=wildcards)]
if wildcards.focus == "genes":
return ["work/FGS/{wildcards.samples}_FGS.ffn".format(wildcards=wildcards)]
if wildcards.focus == "reads":
return {"R1": "DATA/trim/{wildcards.samples}_R1.fastq.gz".format(wildcards=wildcards), "R2": "DATA/trim/{wildcards.samples}_R2.fastq.gz".format(wildcards=wildcards)}
return contigs_input(wildcards)
elif wildcards.focus == "genes":
return ["work/FGS/{wildcards.sample}_FGS.ffn".format(wildcards=wildcards)]
elif wildcards.focus == "reads":
return {"R1": "DATA/trim/{wildcards.sample}_R1.fastq.gz".format(wildcards=wildcards), "R2": "DATA/trim/{wildcards.sample}_R2.fastq.gz".format(wildcards=wildcards)}
rule kaiju:
input:
unpack(kaiju_input)
output:
"work/kaiju/{focus,[a-z]+}_{samples}.kaijuNR"
"work/kaiju/{focus,[a-z]+}_{sample}.kaijuNR"
threads:
config["THREADS"]
params:
......@@ -25,9 +25,9 @@ rule kaiju:
rule krona:
input:
"work/kaiju/{samples}.kaijuNR"
"work/kaiju/{sample}.kaijuNR"
output:
temp("work/kaiju/{samples}.krona")
temp("work/kaiju/{sample}.krona")
shell:
"kaiju2krona "
"-t /db/outils/kaiju/nr/nodes.dmp "
......@@ -38,9 +38,9 @@ rule krona:
rule kronaHTML:
input:
"work/kaiju/{samples}.krona"
"work/kaiju/{sample}.krona"
output:
"report/{samples}-krona.html"
"report/{sample}-krona.html"
shell:
"ktImportText "
"-o {output} "
......@@ -48,9 +48,9 @@ rule kronaHTML:
rule kronaNames:
input:
"work/kaiju/{samples}.kaijuNR"
"work/kaiju/{sample}.kaijuNR"
output:
"report/{samples}-taxNames.tsv"
"report/{sample}-taxNames.tsv"
shell:
"addTaxonNames "
"-t /db/outils/kaiju/nr/nodes.dmp "
......
......@@ -5,7 +5,7 @@ rule fastp:
output:
R1 = "DATA/trim/{reads}_R1.fastq.gz",
R2 = "DATA/trim/{reads}_R2.fastq.gz",
html = "work/fastp/{reads}_qual-report.html",
html = "work/fastp/{reads}_fastp.html",
json = "work/fastp/{reads}_fastp.json"
threads:
config["THREADS"]
......@@ -67,5 +67,5 @@ rule sortmerna:
" ; "
"pigz "
"-p {threads} "
"{params.R1R2_rRNA}.fastq "
"{params.R1R2_mRNA}.fastq "
"{params.R1R2_rRNA}.fq "
"{params.R1R2_mRNA}.fq "
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment