diff --git a/global.smk b/global.smk index 4aea719d690784906649a62984bfbfa2703efe89..5cf1cc56c46458daaf6042336c71be3c5445d13a 100644 --- a/global.smk +++ b/global.smk @@ -29,6 +29,7 @@ rule all: #virome expand("report/viromeQC-{sample}.txt", sample=config["SAMPLES"]), expand("work/virhostmatcher/{sample}/done", sample=config["SAMPLES"]), + expand("work/blast/spacer_{sample}.tsv", sample=config["SAMPLES"]), include: "../workflow_metagenomics/quality.smk" diff --git a/virome.smk b/virome.smk index 42f2a7085750df3166c957a20e9a5374e7d30aeb..e78abfb021d49466356068f9782aee4acb71bb83 100644 --- a/virome.smk +++ b/virome.smk @@ -45,3 +45,31 @@ rule virhostmatcher: # "conda deactivate " "&& " "touch {output.done} " + +rule crispr: + input: + unpack(contigs_input) + output: + "work/blast/{db}_{sample}.tsv" + params: + db = "DATA/blast/{db}", # wget https://crisprcas.i2bc.paris-saclay.fr/Home/DownloadFile?filename=20190618_spacer_34.zip -O DATA/spacer.zip && unzip DATA/spacer.zip -d DATA && rm DATA/spacer.zip && makeblastdb -in DATA/20190618_spacer_34.fasta -dbtype nucl -out DATA/blast/spacer && rm DATA/20190618_spacer_34.fasta + header = "qaccver qlen qseq saccver slen sseq pident mismatch gapopen evalue bitscore qcovs length qstart qend sstart send" + threads: + 20 + shell: + "conda activate blast-2.9.0 " + "&& " + "blastn " + "-task blastn-short " + "-db {params.db} " + "-query {input} " + "-out {output} " + "-outfmt '6 {params.header}' " + "-evalue 0.003 " + "-word_size 6 " + "-gapopen 10 " + "-gapextend 2 " + "-penalty -1 " + "-max_target_seqs 1000 " + "&& " + "conda deactivate "