annotation.smk 3.78 KiB
rule FragGeneScan:
	input:
		unpack(contigs_input)
	output:
		faa = "work/FGS/{sample}_FGS.faa",
		ffn = "work/FGS/{sample}_FGS.ffn",
		gff = "work/FGS/{sample}_FGS.gff",
		out = temp("work/FGS/{sample}_FGS.out")
	threads:
		config["THREADS"]
	params:
		output = "work/FGS/{sample}_FGS"
	shell:
		"run_FragGeneScan.pl "
		"-genome={input} "
		"-out={params.output} "
		"-complete=0 " #negative values with complete=1
		"-train=complete "
		"-thread={threads} "
		" ; "
		"sed --in-place 's/*/N/' {output.faa} "
rule prodigal:
	input:
		unpack(contigs_input)
	output:
		faa = "work/prodigal/{sample}_prodigal.faa",
		ffn = "work/prodigal/{sample}_prodigal.ffn",
		gff = "work/prodigal/{sample}_prodigal.gff"
	shell:
		"prodigal "
		"-f gff "
		"-i {input} "
		"-a {output.faa} "
		"-d {output.ffn} "
		"-o {output.gff} "
		"-g 11 "
		"-po meta "
rule prokka:
	input:
		unpack(contigs_input)
	output:
		err = "work/prokka/{sample}_prokka.err",
		faa = "work/prokka/{sample}_prokka.faa",
		ffn = "work/prokka/{sample}_prokka.ffn",
		fna = "work/prokka/{sample}_prokka.fna",
		fsa = "work/prokka/{sample}_prokka.fsa",
		gbk = "work/prokka/{sample}_prokka.gbk",
		gff = "work/prokka/{sample}_prokka.gff",
		log = "work/prokka/{sample}_prokka.log",
		sqn = "work/prokka/{sample}_prokka.sqn",
		tbl = "work/prokka/{sample}_prokka.tbl",
		tsv = "work/prokka/{sample}_prokka.tsv",
		txt = "work/prokka/{sample}_prokka.txt"
	params:
		output = lambda wildcards, output: os.path.dirname(str(output.gff)),
	threads:
		config["THREADS"]
	shell:
		"prokka "
		"--output-dir {params.output} "
		"--force "
		"--prefix {wildcards.sample}_prokka "
		"--gffver 3 "
		"--metagenome "
		"--cpus {threads} "
		"--notrna "
		"--norrna "
		"{input} "
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
rule diamond: input: faa = "work/FGS/{sample}_FGS.faa", database = "/db/outils/diamond-0.9.24/nr_tax.dmnd" output: daa = "work/DIAMOND/{sample}.daa", unaligned = "work/DIAMOND/{sample}_unaligned.faa" threads: config["THREADS"] shell: "diamond " "blastp " "--db {input.database} " "--query {input.faa} " "--sensitive " "--max-target-seqs 1 " "--threads {threads} " "--out {output.daa} " "--outfmt 100 " "--salltitles " "--sallseqid " "--un {output.unaligned} " "--verbose " rule diamondView: input: daa = "work/DIAMOND/{sample}.daa" output: tsv = "report/diamond-NR_{sample}.tsv" params: keywords = "qseqid qlen sseqid sallseqid slen qstart qend sstart send qseq full_qseq sseq full_sseq evalue bitscore score length pident nident mismatch positive gapopen gaps ppos qframe btop stitle salltitles qcovhsp qtitle qqual" #staxids shell: "diamond " "view " "--daa {input.daa} " "--outfmt 6 {params.keywords} " "--out {output.tsv} " "--header " rule eggnog: input: faa = "work/FGS/{reads}_FGS.faa" output: hits = "work/eggnog/{reads}-{database}.emapper.hmm_hits", orthologs = "work/eggnog/{reads}-{database}.emapper.seed_orthologs", annotations = "work/eggnog/{reads}-{database}.emapper.annotations" threads: config["THREADS"] params: output = "work/eggnog/{reads}-{database}" shell: "emapper.py " "--cpu {threads} " "--database {wildcards.database} " "--hmm_maxhits 1 " "--output {params.output} " "-i {input.faa} " rule interproscan: input: faa = "work/FGS/{sample}_FGS.faa" output: tsv = "work/interproscan/{sample}_FGS.faa.tsv", gff3 = "work/interproscan/{sample}_FGS.faa.gff3", html = "work/interproscan/{sample}_FGS.faa.html.tar.gz" params: output = lambda wildcards, output: os.path.dirname(str(output.tsv)), mem_tot = int(config["MEM"] * config["THREADS"] * 1e9) threads:
141142143144145146147148149150151152153
config["THREADS"] shell: "interproscan.sh " "--cpu {threads} " "--highmem {params.mem_tot} " "--input {input.faa} " "--seqtype p " "--output-dir {params.output} " "--formats tsv,gff3,html " "--iprlookup " "--goterms " "--pathways "