Commit 47503c73 authored by Midoux Cedric's avatar Midoux Cedric
Browse files

diamond prokka interproscan

parent 53c3d377
......@@ -56,3 +56,104 @@ def gff_input(wildcards):
return {"gff": "work/FGS/{wildcards.sample}_FGS.gff".format(wildcards=wildcards)}
elif config["PROTEINS-PREDICTOR"] == "prodigal":
return {"gff": "work/prodigal/{wildcards.sample}_prodigal.gff".format(wildcards=wildcards)}
def db_input(wildcards):
if wildcards.db == "nr":
return {"db": "/db/outils/diamond-0.9.24/nr_tax.dmnd"}
elif wildcards.db == "phrogs":
return {"db": "/projet/maiage/save/cmidoux/PhROGs/phrogs.dmnd"}
rule diamond:
input:
unpack(faa_input),
unpack(db_input)
output:
daa = "work/DIAMOND/{db}_{sample}.daa",
unaligned = "work/DIAMOND/{db}_{sample}_unaligned.faa"
threads:
20
shell:
"diamond "
"blastp "
"--db {input.db} "
"--query {input.faa} "
"--sensitive "
"--max-target-seqs 1 "
"--threads {threads} "
"--out {output.daa} "
"--outfmt 100 "
"--salltitles "
"--sallseqid "
"--un {output.unaligned} "
"--verbose "
"--tmpdir /projet/tmp/ "
rule diamondView:
input:
daa = "work/DIAMOND/{db}_{sample}.daa"
output:
tsv = "report/diamond_{db}_{sample}.tsv"
params:
keywords = "qseqid qlen sseqid sallseqid slen qstart qend sstart send qseq full_qseq sseq full_sseq evalue bitscore score length pident nident mismatch positive gapopen gaps ppos qframe btop stitle salltitles qcovhsp qtitle qqual" #staxids
shell:
"diamond "
"view "
"--daa {input.daa} "
"--outfmt 6 {params.keywords} "
"--out {output.tsv} "
"--header "
rule prokka:
input:
unpack(contigs_input)
output:
err = "work/prokka/{sample}_prokka.err",
faa = "work/prokka/{sample}_prokka.faa",
ffn = "work/prokka/{sample}_prokka.ffn",
fna = "work/prokka/{sample}_prokka.fna",
fsa = "work/prokka/{sample}_prokka.fsa",
gbk = "work/prokka/{sample}_prokka.gbk",
gff = "work/prokka/{sample}_prokka.gff",
log = "work/prokka/{sample}_prokka.log",
sqn = "work/prokka/{sample}_prokka.sqn",
tbl = "work/prokka/{sample}_prokka.tbl",
tsv = "work/prokka/{sample}_prokka.tsv",
txt = "work/prokka/{sample}_prokka.txt"
params:
output = lambda wildcards, output: os.path.dirname(str(output.gff)),
threads:
8
shell:
"prokka "
"--outdir {params.output} "
"--force "
"--prefix {wildcards.sample}_prokka "
"--gffver 3 "
"--metagenome "
"--cpus {threads} "
"--compliant "
"{input} "
rule interproscan:
input:
unpack(faa_input)
output:
tsv = "work/interproscan/{sample}.faa.tsv",
gff3 = "work/interproscan/{sample}.faa.gff3",
html = "work/interproscan/{sample}.faa.html.tar.gz"
params:
output = lambda wildcards, output: os.path.dirname(str(output.tsv))
threads:
8
shell:
"interproscan.sh "
"--cpu {threads} "
"--highmem "
"--input {input.faa} "
"--seqtype p "
"--output-dir {params.output} "
"--formats tsv,gff3,html "
"--iprlookup "
"--goterms "
"--pathways "
\ No newline at end of file
......@@ -29,6 +29,6 @@
"diamond" :
{
"queue" : "highmem.q,maiage.q",
"cluster" : "-l h_vmem={MEM}G"
"cluster" : "-l h_vmem=4G"
}
}
......@@ -10,6 +10,9 @@ rule all:
expand("report/reads_{sample}-krona.html", sample=config["SAMPLES"]),
"report/contigs_coassembly-taxNames.tsv",
"report/quast_coassembly/report.html",
"report/diamond_nr_coassembly.tsv",
"work/prokka/coassembly_prokka.tsv",
"work/interproscan/coassembly.faa.tsv",
include: "../workflow_metagenomics/quality.smk"
include: "../workflow_metagenomics/preprocess.smk"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment