From 923e099cab423ec35c5c681e70b782ca1425069b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Midoux?= <cedric.midoux@inrae.fr>
Date: Mon, 1 Feb 2021 16:24:20 +0100
Subject: [PATCH] checkpoint metabat checkm

---
 binning.smk | 31 +++++++++++++++----------------
 global.smk  |  3 +--
 2 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/binning.smk b/binning.smk
index 9a26c6a..52c8363 100644
--- a/binning.smk
+++ b/binning.smk
@@ -2,10 +2,8 @@ checkpoint metabat:
 	input:
 		unpack(contigs_input)
 	output:
-		clusters = directory("work/metabat/{sample}")
+		clusters = directory("work/metabat/{sample}/bin")
 	params:
-		#output = "{output}/{sample}",
-		output = "work/metabat/{sample}/{sample}",
 		clustrsize = 200000
 	threads:
 		8
@@ -14,7 +12,7 @@ checkpoint metabat:
 		"&& "
 		"metabat2 "
 		"--inFile {input} "
-		"--outFile {params.output} "
+		"--outFile {output}/{wildcards.sample} "
 		"--minContig 5000 "
 		"--minClsSize {params.clustrsize} "
 		"--numThreads {threads} "
@@ -25,10 +23,10 @@ checkpoint metabat:
 
 rule metabat_kaiju:
 	input:
-		clustr = "work/metabat/{sample}/{sample}.{num}.fa",
+		clustr = "work/metabat/{sample}/bin/{sample}.{num}.fa",
 		kaiju = "report/contigs_{sample}-taxNames.tsv"
 	output:
-		"work/metabat/{sample}/{sample}.{num}_kaiju.tsv"
+		"work/metabat/{sample}/kaiju/{sample}.{num}_kaiju.tsv"
 	shell:
 		"grep "
 		"'>' "
@@ -42,27 +40,28 @@ rule metabat_kaiju:
 		"> "
 		"{output} "
 
-def aggregate_metabat(wildcards):
-	checkpoint_output = checkpoints.metabat.get(**wildcards).output.clusters
-	return expand("work/metabat/{sample}/{sample}.{i}_kaiju.tsv", sample=wildcards.sample, i=glob_wildcards(os.path.join(checkpoint_output, "{sample}.{i}")).i)
+def aggregate_metabat_kaiju(wildcards):
+	checkpoint_output = checkpoints.metabat.get(**wildcards).output[0]
+	return expand("work/metabat/{sample}/kaiju/{sample}.{i}_kaiju.tsv",
+		sample=wildcards.sample,
+		i=glob_wildcards(os.path.join(checkpoint_output, "{sample}.{i}.fa")).i)
 
-rule resume_metabat:
+rule metabat_kaiju_done:
 	input:
-		aggregate_metabat
+		aggregate_metabat_kaiju
 	output:
-		done = temp("work/metabat/{sample}.done")
+		temp("work/metabat/{sample}/kaiju.done")
 	shell:
-		"touch {output.done} "
+		"touch {output}"
 
 rule checkm:
 	input:
-		"work/metabat/{sample}/{sample}.unbinned.fa"
+		directory("work/metabat/{sample}/bin")
 	output:
 		"work/checkm/{sample}/lineage.ms"
 	threads:
 		4
 	params:
-		input = lambda wildcards, input: os.path.dirname(str(input)),
 		output = lambda wildcards, output: os.path.dirname(str(output))
 	shell:
 		"conda activate checkm-genome-1.0.18 "
@@ -71,7 +70,7 @@ rule checkm:
 		"lineage_wf "
 		"-t {threads} "
 		"-x fa "
-		"{params.input} "
+		"{input} "
 		"{params.output} "
 		"&& "
 		"conda deactivate "
diff --git a/global.smk b/global.smk
index 45ac098..f27ef96 100644
--- a/global.smk
+++ b/global.smk
@@ -16,7 +16,6 @@ rule all:
 		"report/diamond_swissprot_coassembly.tsv",
 		"report/contigs_coassembly-taxNames.tsv",
 		"report/genes_coassembly-taxNames.tsv",
-		"work/metabat/coassembly.done",
 		"work/checkm/coassembly/lineage.ms",
 		#catalogue
 		"report/quast_results/report.html",
@@ -25,7 +24,7 @@ rule all:
 		"report/contigs_catalogue-taxNames.tsv",
 		#add-on
 		expand("work/addon/{sample}.small_contigs.taxNames.tsv", sample=config["SAMPLES"]),
-		#expand("work/addon/catalogue.{sample}.unmapped.taxNames.tsv", sample=config["SAMPLES"]),
+		#expand("work/addon/catalogue-{sample}.unmapped.taxNames.tsv", sample=config["SAMPLES"]),
 		#virome
 		expand("report/viromeQC-{sample}.txt", sample=config["SAMPLES"]),
 		expand("work/virhostmatcher/{sample}/done", sample=config["SAMPLES"]),
-- 
GitLab