diff --git a/count.smk b/count.smk new file mode 100644 index 0000000000000000000000000000000000000000..4a68e76c4a8bce0f12618a66e685f5eb33893e27 --- /dev/null +++ b/count.smk @@ -0,0 +1,80 @@ +rule index : + input: + unpack(contigs_input) + output: + expand("work/index/{{sample}}.{ext}.bt2l", ext=["1", "2", "3", "4", "rev.1", "rev.2"]) + params: + index = "work/index/{sample}" + shell: + "bowtie2-build " + "--large-index " + "{input} " + "{params.index}" + +rule mapping: + input: + R1 = "DATA/trim/{reads}_R1.fastq.gz", + R2 = "DATA/trim/{reads}_R2.fastq.gz", + i = expand("work/index/{{contigs}}.{ext}.bt2l", ext=["1", "2", "3", "4", "rev.1", "rev.2"]) + output: + sam = temp("work/bowtie/align_{contigs}_{reads}.sam"), + bam = "work/bowtie/align_{contigs}_{reads}.bam", + bai = "work/bowtie/align_{contigs}_{reads}.bam.bai" + params: + index = "work/index/{contigs}" + threads: + 8 + shell: + "bowtie2 " + "--threads {threads} " + "-x {params.index} " + "-1 {input.R1} " + "-2 {input.R2} " + "-S {output.sam} " + " ; " + "samtools view " + "--threads {threads} " + "-b " + "-o {output.bam} " + "{output.sam} " + " ; " + "samtools sort " + "--threads {threads} " + "{output.bam} " + "-o {output.bam} " + " ; " + "samtools index " + "{output.bam}" + +rule count_contig: + input: + bam = "work/bowtie/align_{contigs}_{reads}.bam", + bai = "work/bowtie/align_{contigs}_{reads}.bam.bai" + output: + "report/count-contigs_{contigs}_{reads}.tsv" + shell: + "samtools idxstats " + "{input.bam} " + " > " + "{output}" + +rule count_gene: + input: + unpack(gff_input), + bam = "work/bowtie/align_{contigs}_{reads}.bam", + bai = "work/bowtie/align_{contigs}_{reads}.bam.bai" + output: + "report/count-genes_{contigs}_{reads}.tsv" + shell: + "htseq-count " + "--format bam " + "--stranded no " + "--minaqual 10 " + "--type CDS " + "--idattr ID " + "--mode union " + #"--samout {output} " + "{input.bam} " + "{input.gff} " + " > " + "{output} " \ No newline at end of file diff --git a/global.smk b/global.smk index 92c594a2e12003bd0865c4a977bd733a9c310bb5..02ceb11c9749f684e6b5834439cd54994f5bdda6 100644 --- a/global.smk +++ b/global.smk @@ -9,15 +9,18 @@ rule all: "report/multiqc_report.html", expand("report/reads_{sample}-krona.html", sample=config["SAMPLES"]), "report/quast_coassembly/report.html", - expand("report/contigs_{sample}-krona.html", sample=SAMPLES), + expand("report/count-contigs_coassembly_{sample}.tsv", sample=config["SAMPLES"]), "report/contigs_coassembly-taxNames.tsv", + expand("report/count-contigs_coassembly_{sample}.tsv", sample=config["SAMPLES"]), "report/genes_coassembly-taxNames.tsv", "report/diamond_nr_coassembly.tsv", "work/prokka/coassembly_prokka.tsv", "work/interproscan/coassembly.faa.tsv", + include: "../workflow_metagenomics/quality.smk" include: "../workflow_metagenomics/preprocess.smk" include: "../workflow_metagenomics/kaiju.smk" include: "../workflow_metagenomics/assembly.smk" include: "../workflow_metagenomics/annotation.smk" +include: "../workflow_metagenomics/count.smk"