Commit 19f5a921 authored by Midoux Cedric's avatar Midoux Cedric
Browse files

preprocess.smk

parent 80ceae2f
......@@ -11,11 +11,6 @@
"queue" : "highmem.q,maiage.q",
"cluster" : "-l h_vmem={MEM}G"
},
"interleave" :
{
"queue" : "highmem.q,maiage.q",
"cluster" : "-l h_vmem={MEM}G"
},
"khmer" :
{
"queue" : "highmem.q,maiage.q",
......
......@@ -9,3 +9,4 @@ rule all:
"report/multiqc_report.html",
include: "../workflow_metagenomics/quality.smk"
include: "../workflow_metagenomics/preprocess.smk"
rule fastp:
input:
R1 = "DATA/raw/{reads}_R1.fastq.gz",
R2 = "DATA/raw/{reads}_R2.fastq.gz"
output:
R1 = "DATA/trim/{reads}_R1.fastq.gz",
R2 = "DATA/trim/{reads}_R2.fastq.gz",
html = "work/fastp/{reads}_fastp.html",
json = "work/fastp/{reads}_fastp.json"
threads:
4
shell:
"fastp "
"--in1 {input.R1} "
"--in2 {input.R2} "
"--out1 {output.R1} "
"--out2 {output.R2} "
"--verbose "
"--length_required 50 "
"--html {output.html} "
"--json {output.json} "
"--report_title \"fastp {wildcards.reads} report\" "
"--thread {threads} "
rule interleave:
input:
R1 = "DATA/trim/{reads}_R1.fastq.gz",
R2 = "DATA/trim/{reads}_R2.fastq.gz"
output:
R1R2 = "DATA/trim/{reads}_R1R2.fastq.gz"
shell:
"interleave-reads.py "
"--output {output.R1R2} "
"--gzip "
"{input.R1} "
"{input.R2} "
rule sortmerna:
input:
R1R2 = "DATA/trim/{reads}_R1R2.fastq.gz"
output:
R1R2 = temp("DATA/trim/{reads}_R1R2.fastq"),
R1R2_log = "work/sortmerna/{reads}_rRNA.log",
R1R2_rRNA = "work/sortmerna/{reads}_rRNA.fastq.gz",
R1R2_mRNA = "work/sortmerna/{reads}_mRNA.fastq.gz"
params:
R1R2_rRNA = "work/sortmerna/{reads}_rRNA",
R1R2_mRNA = "work/sortmerna/{reads}_mRNA"
threads:
8
shell:
"unpigz "
"-p {threads} "
"--keep "
"{input.R1R2} "
" ; "
"sortmerna "
"--ref /usr/local/genome/src/sortmerna-2.0/rRNA_databases/rfam-5s-database-id98.fasta,/projet/mig/work/orue/SORTMERNA_index/index/rfam-5s:/usr/local/genome/src/sortmerna-2.0/rRNA_databases/rfam-5.8s-database-id98.fasta,/projet/mig/work/orue/SORTMERNA_index/index/rfam-5.8s:/usr/local/genome/src/sortmerna-2.0/rRNA_databases/silva-arc-16s-id95.fasta,/projet/mig/work/orue/SORTMERNA_index/index/silva-arc-16s-id95:/usr/local/genome/src/sortmerna-2.0/rRNA_databases/silva-arc-23s-id98.fasta,/projet/mig/work/orue/SORTMERNA_index/index/silva-arc-23s-id98:/usr/local/genome/src/sortmerna-2.0/rRNA_databases/silva-bac-16s-id90.fasta,/projet/mig/work/orue/SORTMERNA_index/index/silva-bac-16s-id90:/usr/local/genome/src/sortmerna-2.0/rRNA_databases/silva-bac-23s-id98.fasta,/projet/mig/work/orue/SORTMERNA_index/index/silva-bac-23s-id98:/usr/local/genome/src/sortmerna-2.0/rRNA_databases/silva-euk-18s-id95.fasta,/projet/mig/work/orue/SORTMERNA_index/index/silva-euk-18s-id95:/usr/local/genome/src/sortmerna-2.0/rRNA_databases/silva-euk-28s-id98.fasta,/projet/mig/work/orue/SORTMERNA_index/index/silva-euk-28s-id98 "
"--reads {output.R1R2} "
"--aligned {params.R1R2_rRNA} "
"--other {params.R1R2_mRNA} "
"--fastx "
"--log "
"--paired_in " #both paired-end reads go in --aligned
"-a {threads} "
"-v "
" ; "
"pigz "
"-p {threads} "
"{params.R1R2_rRNA}.fastq "
"{params.R1R2_mRNA}.fastq "
\ No newline at end of file
......@@ -15,7 +15,7 @@ rule fastqc:
rule multiqc:
input:
expand("work/fastqc/{sample}_{R}_fastqc.zip", sample=config["SAMPLES"], R=["R1", "R2"]),
#expand("work/fastp/{sample}_fastp.json", sample=config["SAMPLES"])
expand("work/fastp/{sample}_fastp.json", sample=config["SAMPLES"])
output:
html = "report/multiqc_report.html",
params:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment