From 094afed7c72448c0382b5c0236c4c26510086bf7 Mon Sep 17 00:00:00 2001
From: Cedric Midoux <cedric.midoux@inra.fr>
Date: Tue, 30 Jun 2020 09:09:12 +0200
Subject: [PATCH] blastn spacers crispr

---
 global.smk |  1 +
 virome.smk | 28 ++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/global.smk b/global.smk
index 4aea719..5cf1cc5 100644
--- a/global.smk
+++ b/global.smk
@@ -29,6 +29,7 @@ rule all:
 		#virome
 		expand("report/viromeQC-{sample}.txt", sample=config["SAMPLES"]),
 		expand("work/virhostmatcher/{sample}/done", sample=config["SAMPLES"]),
+		expand("work/blast/spacer_{sample}.tsv", sample=config["SAMPLES"]),
 
 
 include: "../workflow_metagenomics/quality.smk"
diff --git a/virome.smk b/virome.smk
index 42f2a70..e78abfb 100644
--- a/virome.smk
+++ b/virome.smk
@@ -45,3 +45,31 @@ rule virhostmatcher:
 		# "conda deactivate "
 		"&& "
 		"touch {output.done} "
+
+rule crispr:
+	input:
+		unpack(contigs_input)
+	output:
+		"work/blast/{db}_{sample}.tsv"
+	params:
+		db = "DATA/blast/{db}", # wget https://crisprcas.i2bc.paris-saclay.fr/Home/DownloadFile?filename=20190618_spacer_34.zip -O DATA/spacer.zip && unzip DATA/spacer.zip -d DATA && rm DATA/spacer.zip && makeblastdb -in DATA/20190618_spacer_34.fasta -dbtype nucl -out DATA/blast/spacer && rm DATA/20190618_spacer_34.fasta
+		header = "qaccver qlen qseq saccver slen sseq pident mismatch gapopen evalue bitscore qcovs length qstart qend sstart send"
+	threads:
+		20
+	shell:
+		"conda activate blast-2.9.0 "
+		"&& "
+		"blastn "
+		"-task blastn-short "
+		"-db {params.db} "
+		"-query {input} "
+		"-out {output} "
+		"-outfmt '6 {params.header}' "
+		"-evalue 0.003 "
+		"-word_size 6 "
+		"-gapopen 10 "
+		"-gapextend 2 "
+		"-penalty -1 "
+		"-max_target_seqs 1000 "
+		"&& "
+		"conda deactivate "
-- 
GitLab