binning.smk 1.56 KiB
checkpoint metabat:
	input:
		unpack(contigs_input)
	output:
		clusters = directory("work/metabat/{sample}/bin")
	params:
		clustrsize = 200000
	threads:
	shell:
		"conda activate metabat2-2.12.1 "
		"&& "
		"metabat2 "
		"--inFile {input} "
		"--outFile {output}/{wildcards.sample} "
		"--minContig 5000 "
		"--minClsSize {params.clustrsize} "
		"--numThreads {threads} "
		"--unbinned "
		"--verbose "
		"&& "
		"conda deactivate "
rule metabat_kaiju:
	input:
		clustr = "work/metabat/{sample}/bin/{sample}.{num}.fa",
		kaiju = "report/contigs_{sample}-taxNames.tsv"
	output:
		"work/metabat/{sample}/kaiju/{sample}.{num}_kaiju.tsv"
	shell:
		"grep "
		"'>' "
		"--file {input.clustr} "
		" | "
		"grep "
		"--no-messages "
		"--word-regexp "
		"--file - "
		"{input.kaiju} "
		"> "
		"{output} "
def aggregate_metabat_kaiju(wildcards):
	checkpoint_output = checkpoints.metabat.get(**wildcards).output[0]
	return expand("work/metabat/{sample}/kaiju/{sample}.{i}_kaiju.tsv",
		sample=wildcards.sample,
		i=glob_wildcards(os.path.join(checkpoint_output, "{sample}.{i}.fa")).i)
rule metabat_kaiju_done:
	input:
		aggregate_metabat_kaiju
	output:
		temp("work/metabat/{sample}/kaiju.done")
	shell:
		"touch {output}"
rule checkm:
	input:
		directory("work/metabat/{sample}/bin")
	output:
		"work/checkm/{sample}/lineage.ms"
	threads:
	params:
		output = lambda wildcards, output: os.path.dirname(str(output))
	shell:
		"conda activate checkm-genome-1.0.18 "
		"&& "
		"checkm "
		"lineage_wf "
71727374757677
"-t {threads} " "-x fa " "{input} " "{params.output} " "&& " "conda deactivate "