diff --git a/global.smk b/global.smk index c301908380334b2b3ca163934cb2f2d2ea3751e5..4aea719d690784906649a62984bfbfa2703efe89 100644 --- a/global.smk +++ b/global.smk @@ -28,6 +28,7 @@ rule all: #expand("work/addon/catalogue.{sample}.unmapped.taxNames.tsv", sample=config["SAMPLES"]), #virome expand("report/viromeQC-{sample}.txt", sample=config["SAMPLES"]), + expand("work/virhostmatcher/{sample}/done", sample=config["SAMPLES"]), include: "../workflow_metagenomics/quality.smk" diff --git a/virome.smk b/virome.smk index 87c9cc9320e8db495be047f51ab25989ce055194..42f2a7085750df3166c957a20e9a5374e7d30aeb 100644 --- a/virome.smk +++ b/virome.smk @@ -19,3 +19,29 @@ rule viromeQC: "--tempdir /projet/tmp/ " "&& " "conda deactivate " + +rule virhostmatcher: + input: + unpack(contigs_input) + output: + done = "work/virhostmatcher/{sample}/done" + params: + splittedGenomes = "work/virhostmatcher/{sample}/splittedGenomes", + output = lambda wildcards, output: os.path.dirname(str(output)) + shell: + "mkdir -p {params.splittedGenomes} " + "&& " + "awk '/^>/ {{if(x>0) {{close(outname); x=0}} match($0, \">([^| ]*)\", record);outname=sprintf(\"{params.splittedGenomes}/%s.fa\",record[1]); if (x>0) {{print >> outname}} else {{print > outname;}} x++; next;}} {{if(x>0) print >> outname;}}' {input} " # https://github.com/soedinglab/WIsH#tricks + "&& " + # "conda activate vhm " + # "&& " + "vhm.py " + "--virusFaDir {params.splittedGenomes} " + "--hostFaDir /projet/irstea/WIsH_2020/Host/ " + "--out {params.output} " + "--taxa /projet/irstea/WIsH_2020/Host.txt " + "--d2star 0 " + # "&& " + # "conda deactivate " + "&& " + "touch {output.done} "