preprocess.smk 2.86 KiB
rule fastp:
	input:
		R1 = "DATA/raw/{reads}_R1.fastq.gz",
		R2 = "DATA/raw/{reads}_R2.fastq.gz"
	output:
		R1 = "DATA/trim/{reads}_R1.fastq.gz",
		R2 = "DATA/trim/{reads}_R2.fastq.gz",
		html = "work/fastp/{reads}_fastp.html",
		json = "work/fastp/{reads}_fastp.json"
	threads:
	shell:
		"conda activate fastp-0.20.0 "
		"&& "
		"fastp "
		"--in1 {input.R1} "
		"--in2 {input.R2} "
		"--out1 {output.R1} "
		"--out2 {output.R2} "
		"--verbose "
		"--length_required 50 "
		"--html {output.html} "
		"--json {output.json} "
		"--report_title \"fastp {wildcards.reads} report\" "
		"--thread {threads} "
		"&& "
		"conda deactivate "
rule interleave:
	input:
		R1 = "DATA/trim/{reads}_R1.fastq.gz",
		R2 = "DATA/trim/{reads}_R2.fastq.gz"
	output:
		R1R2 = "DATA/trim/{reads}_R1R2.fastq.gz"
	shell:
		"conda activate khmer-3.0.0a3 "
		"&& "
		"interleave-reads.py "
		"--output {output.R1R2} "
		"--gzip "
		"{input.R1} "
		"{input.R2} "
		"&& "
		"conda deactivate "
rule sortmerna:
	input:
		R1R2 = "DATA/trim/{reads}_R1R2.fastq.gz"
	output:
		R1R2 = temp("DATA/trim/{reads}_R1R2.fastq"),
		R1R2_log = "work/sortmerna/{reads}_rRNA.log",
		R1R2_rRNA = "work/sortmerna/{reads}_rRNA.fastq.gz",
		R1R2_mRNA = "work/sortmerna/{reads}_mRNA.fastq.gz"
	params:
		R1R2_rRNA = "work/sortmerna/{reads}_rRNA",
		R1R2_mRNA = "work/sortmerna/{reads}_mRNA"
	threads:
	shell:
		"unpigz "
		"-p {threads} "
		"--keep "
		"{input.R1R2} "
		"&& "
		"conda activate sortmerna-4.2.0 "
		"&& "
		"sortmerna "
		"--ref /usr/local/genome/src/sortmerna-2.0/rRNA_databases/rfam-5s-database-id98.fasta,/projet/mig/work/orue/SORTMERNA_index/index/rfam-5s:/usr/local/genome/src/sortmerna-2.0/rRNA_databases/rfam-5.8s-database-id98.fasta,/projet/mig/work/orue/SORTMERNA_index/index/rfam-5.8s:/usr/local/genome/src/sortmerna-2.0/rRNA_databases/silva-arc-16s-id95.fasta,/projet/mig/work/orue/SORTMERNA_index/index/silva-arc-16s-id95:/usr/local/genome/src/sortmerna-2.0/rRNA_databases/silva-arc-23s-id98.fasta,/projet/mig/work/orue/SORTMERNA_index/index/silva-arc-23s-id98:/usr/local/genome/src/sortmerna-2.0/rRNA_databases/silva-bac-16s-id90.fasta,/projet/mig/work/orue/SORTMERNA_index/index/silva-bac-16s-id90:/usr/local/genome/src/sortmerna-2.0/rRNA_databases/silva-bac-23s-id98.fasta,/projet/mig/work/orue/SORTMERNA_index/index/silva-bac-23s-id98:/usr/local/genome/src/sortmerna-2.0/rRNA_databases/silva-euk-18s-id95.fasta,/projet/mig/work/orue/SORTMERNA_index/index/silva-euk-18s-id95:/usr/local/genome/src/sortmerna-2.0/rRNA_databases/silva-euk-28s-id98.fasta,/projet/mig/work/orue/SORTMERNA_index/index/silva-euk-28s-id98 "
		"--reads {output.R1R2} "
		"--aligned {params.R1R2_rRNA} "
7172737475767778798081828384
"--other {params.R1R2_mRNA} " "--fastx " "--log " "--paired_in " #both paired-end reads go in --aligned "-a {threads} " "-v " "&& " "conda deactivate " "&& " "pigz " "-p {threads} " "{params.R1R2_rRNA}.fastq " "{params.R1R2_mRNA}.fastq "