diff --git a/binning.smk b/binning.smk new file mode 100644 index 0000000000000000000000000000000000000000..9a26c6a2457fcdafb083a07dc4c338eb1e012d6b --- /dev/null +++ b/binning.smk @@ -0,0 +1,77 @@ +checkpoint metabat: + input: + unpack(contigs_input) + output: + clusters = directory("work/metabat/{sample}") + params: + #output = "{output}/{sample}", + output = "work/metabat/{sample}/{sample}", + clustrsize = 200000 + threads: + 8 + shell: + "conda activate metabat2-2.12.1 " + "&& " + "metabat2 " + "--inFile {input} " + "--outFile {params.output} " + "--minContig 5000 " + "--minClsSize {params.clustrsize} " + "--numThreads {threads} " + "--unbinned " + "--verbose " + "&& " + "conda deactivate " + +rule metabat_kaiju: + input: + clustr = "work/metabat/{sample}/{sample}.{num}.fa", + kaiju = "report/contigs_{sample}-taxNames.tsv" + output: + "work/metabat/{sample}/{sample}.{num}_kaiju.tsv" + shell: + "grep " + "'>' " + "--file {input.clustr} " + " | " + "grep " + "--no-messages " + "--word-regexp " + "--file - " + "{input.kaiju} " + "> " + "{output} " + +def aggregate_metabat(wildcards): + checkpoint_output = checkpoints.metabat.get(**wildcards).output.clusters + return expand("work/metabat/{sample}/{sample}.{i}_kaiju.tsv", sample=wildcards.sample, i=glob_wildcards(os.path.join(checkpoint_output, "{sample}.{i}")).i) + +rule resume_metabat: + input: + aggregate_metabat + output: + done = temp("work/metabat/{sample}.done") + shell: + "touch {output.done} " + +rule checkm: + input: + "work/metabat/{sample}/{sample}.unbinned.fa" + output: + "work/checkm/{sample}/lineage.ms" + threads: + 4 + params: + input = lambda wildcards, input: os.path.dirname(str(input)), + output = lambda wildcards, output: os.path.dirname(str(output)) + shell: + "conda activate checkm-genome-1.0.18 " + "&& " + "checkm " + "lineage_wf " + "-t {threads} " + "-x fa " + "{params.input} " + "{params.output} " + "&& " + "conda deactivate " diff --git a/catalogue.smk b/catalogue.smk index ef0adda95db1c00f7bde422b4788ee27ef14e34e..7ddd33256d964b9933a00a0854da3a0bb09bffbb 100644 --- a/catalogue.smk +++ b/catalogue.smk @@ -114,81 +114,3 @@ rule faa_catalogue: "{input.faa} " "&& " "conda deactivate " - -checkpoint metabat: - input: - unpack(contigs_input) - output: - clusters = directory("work/metabat/{sample}") - params: - #output = "{output}/{sample}", - output = "work/metabat/{sample}/{sample}", - clustrsize = 200000 - threads: - 8 - shell: - "conda activate metabat2-2.12.1 " - "&& " - "metabat2 " - "--inFile {input} " - "--outFile {params.output} " - "--minContig 5000 " - "--minClsSize {params.clustrsize} " - "--numThreads {threads} " - "--unbinned " - "--verbose " - "&& " - "conda deactivate " - -rule metabat_kaiju: - input: - clustr = "work/metabat/{sample}/{sample}.{num}.fa", - kaiju = "report/contigs_{sample}-taxNames.tsv" - output: - "work/metabat/{sample}/{sample}.{num}_kaiju.tsv" - shell: - "grep " - "'>' " - "--file {input.clustr} " - " | " - "grep " - "--no-messages " - "--word-regexp " - "--file - " - "{input.kaiju} " - "> " - "{output} " - -def aggregate_metabat(wildcards): - checkpoint_output = checkpoints.metabat.get(**wildcards).output.clusters - return expand("work/metabat/{sample}/{sample}.{i}_kaiju.tsv", sample=wildcards.sample, i=glob_wildcards(os.path.join(checkpoint_output, "{sample}.{i}")).i) - -rule resume_metabat: - input: - aggregate_metabat - output: - done = temp("work/metabat/{sample}.done") - shell: - "touch {output.done} " - -rule checkm: - input: - "work/metabat/{sample}/{sample}.unbinned.fa" - output: - "work/checkm/{sample}/lineage.ms" - threads: - 4 - params: - input = lambda wildcards, input: os.path.dirname(str(input)), - output = lambda wildcards, output: os.path.dirname(str(output)) - shell: - "conda activate checkm-genome-1.0.18 " - "&& " - "checkm " - "lineage_wf " - "-t {threads} " - "-x fa " - "{params.input} " - "{params.output} " - "&& " - "conda deactivate " diff --git a/global.smk b/global.smk index 5cf1cc56c46458daaf6042336c71be3c5445d13a..45ac098734ead81f467df01b869c24fd83e4584b 100644 --- a/global.smk +++ b/global.smk @@ -39,5 +39,6 @@ include: "../workflow_metagenomics/assembly.smk" include: "../workflow_metagenomics/annotation.smk" include: "../workflow_metagenomics/count.smk" include: "../workflow_metagenomics/catalogue.smk" +include: "../workflow_metagenomics/binning.smk" include: "../workflow_metagenomics/add-on.smk" include: "../workflow_metagenomics/virome.smk"