assembly.smk 4.05 KiB
def khmer_input(wildcards):
	if config["SORTMERNA"]:
		return {"R1R2": "work/sortmerna/{wildcards.reads}_mRNA.fastq.gz".format(wildcards=wildcards)}
	else:
		return {"R1R2": "DATA/trim/{wildcards.reads}_R1R2.fastq.gz".format(wildcards=wildcards)}
rule khmer:
	input:
		unpack(khmer_input)
	output:
		R1R2 = "work/khmer/{reads}_R1R2.fastq.gz",
		log = "work/khmer/{reads}.log"
	threads:
		config["THREADS"]
	params:
		mem_tot = (config["MEM"] * config["THREADS"] * 1e9),
		K = 32,
		C = 100
	shell:
		"normalize-by-median.py "
		"--ksize {params.K} "
		"--max-memory-usage {params.mem_tot} "
		"--cutoff {params.C} "
		"--paired "
		"--report {output.log} "
		"--output {output.R1R2} "
		"--gzip "
		"{input.R1R2}"
def assembly_input(wildcards):
	if wildcards.reads == "coassembly":
		if config["NORMALIZATION"]:
			return [expand("work/khmer/{reads}_R1R2.fastq.gz", reads=SAMPLES)]
		elif config["SORTMERNA"]:
			return [expand("work/sortmerna/{reads}_mRNA.fastq.gz", reads=SAMPLES)]
		else:
			return [expand("DATA/trim/{reads}_R1R2.fastq.gz", reads=SAMPLES)]
	else:
		if config["NORMALIZATION"]:
			return {"R1R2": "work/khmer/{wildcards.reads}_R1R2.fastq.gz".format(wildcards=wildcards)}
		elif config["SORTMERNA"]:
			return {"R1R2": "work/sortmerna/{wildcards.reads}_mRNA.fastq.gz".format(wildcards=wildcards)}
		else:
			return {"R1R2": "DATA/trim/{wildcards.reads}_R1R2.fastq.gz".format(wildcards=wildcards)}
rule megahit:
	input:
		unpack(assembly_input)
	output:
		"work/megahit/{reads}/{reads}.contigs.fa"
	threads:
		config["THREADS"]
	params:
		min_len = config["CONTIGS_LEN"],
		mem_tot = int(config["MEM"] * config["THREADS"] * 1e9),
		input = lambda wildcards, input: ",".join(input),
		output = lambda wildcards, output: os.path.dirname(str(output))
	shell:
		"megahit "
		"--12 {params.input} "
		"--continue "
		"--preset meta-large "
		"--num-cpu-threads {threads} "
		"--memory {params.mem_tot} "
		"--out-dir {params.output} "
		"--out-prefix {wildcards.reads} "
		"--min-contig-len {params.min_len} "
		"--verbose"
rule metaspades:
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
input: unpack(assembly_input) output: "work/metaSPADES/{reads}/{reads}.contigs.fasta" threads: config["THREADS"] params: min_len = config["CONTIGS_LEN"], input = lambda wildcards, input: " --12 ".join(input), output = lambda wildcards, output: os.path.dirname(str(output)) shell: "spades.py " "--threads {threads} " "--meta " "--12 {params.input} " "-o {params.output} " " ; " "filterSeq.py " "-i work/metaSPADES/{wildcards.reads}/contigs.fasta " "-o work/metaSPADES/{wildcards.reads}/{wildcards.reads}.contigs.fasta " "--min-length {params.min_len} " def contigs_input(wildcards): if config["ASSEMBLER"] == "megahit": return ["work/megahit/{wildcards.sample}/{wildcards.sample}.contigs.fa".format(wildcards=wildcards)] elif config["ASSEMBLER"] == "metaspades": return ["work/metaSPADES/{wildcards.sample}/{wildcards.sample}.contigs.fasta".format(wildcards=wildcards)] def contigsExpand_input(wildcards): if config["ASSEMBLER"] == "megahit": return [expand("work/megahit/{sample}/{sample}.contigs.fa", sample=SAMPLES)] elif config["ASSEMBLER"] == "metaspades": return [expand("work/metaSPADES/{sample}/{sample}.contigs.fasta", sample=SAMPLES)] def coassembly_contigs_input(wildcards): if config["ASSEMBLER"] == "megahit": return ["work/megahit/coassembly/coassembly.contigs.fa"] elif config["ASSEMBLER"] == "metaspades": return ["work/metaSPADES/coassembly/coassembly.contigs.fasta"] rule quast: input: unpack(contigsExpand_input) output: "report/quast_results/report.html" threads: config["THREADS"] params: output = lambda wildcards, output: os.path.dirname(str(output)) shell: "quast " "--mgm " "--output-dir {params.output} " "-L " "--threads {threads} " "{input} " rule coassembly_quast: input: unpack(coassembly_contigs_input) output: "report/quast_coassembly/report.html" threads: config["THREADS"] params: output = lambda wildcards, output: os.path.dirname(str(output)) shell: "quast " "--mgm "
141142143144145
"--output-dir {params.output} " "-L " "--threads {threads} " "{input} "