From 094afed7c72448c0382b5c0236c4c26510086bf7 Mon Sep 17 00:00:00 2001 From: Cedric Midoux <cedric.midoux@inra.fr> Date: Tue, 30 Jun 2020 09:09:12 +0200 Subject: [PATCH] blastn spacers crispr --- global.smk | 1 + virome.smk | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/global.smk b/global.smk index 4aea719..5cf1cc5 100644 --- a/global.smk +++ b/global.smk @@ -29,6 +29,7 @@ rule all: #virome expand("report/viromeQC-{sample}.txt", sample=config["SAMPLES"]), expand("work/virhostmatcher/{sample}/done", sample=config["SAMPLES"]), + expand("work/blast/spacer_{sample}.tsv", sample=config["SAMPLES"]), include: "../workflow_metagenomics/quality.smk" diff --git a/virome.smk b/virome.smk index 42f2a70..e78abfb 100644 --- a/virome.smk +++ b/virome.smk @@ -45,3 +45,31 @@ rule virhostmatcher: # "conda deactivate " "&& " "touch {output.done} " + +rule crispr: + input: + unpack(contigs_input) + output: + "work/blast/{db}_{sample}.tsv" + params: + db = "DATA/blast/{db}", # wget https://crisprcas.i2bc.paris-saclay.fr/Home/DownloadFile?filename=20190618_spacer_34.zip -O DATA/spacer.zip && unzip DATA/spacer.zip -d DATA && rm DATA/spacer.zip && makeblastdb -in DATA/20190618_spacer_34.fasta -dbtype nucl -out DATA/blast/spacer && rm DATA/20190618_spacer_34.fasta + header = "qaccver qlen qseq saccver slen sseq pident mismatch gapopen evalue bitscore qcovs length qstart qend sstart send" + threads: + 20 + shell: + "conda activate blast-2.9.0 " + "&& " + "blastn " + "-task blastn-short " + "-db {params.db} " + "-query {input} " + "-out {output} " + "-outfmt '6 {params.header}' " + "-evalue 0.003 " + "-word_size 6 " + "-gapopen 10 " + "-gapextend 2 " + "-penalty -1 " + "-max_target_seqs 1000 " + "&& " + "conda deactivate " -- GitLab