From 8164ff5d6ebbe3048616c7b7ab3e7ff07fef8fbb Mon Sep 17 00:00:00 2001 From: Midoux Cedric <cedric.midoux@irstea.fr> Date: Tue, 11 Jun 2019 16:24:19 +0200 Subject: [PATCH] ghostKOALA - Update README.md --- README.md | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 412877b..5d91233 100644 --- a/README.md +++ b/README.md @@ -11,4 +11,46 @@ ln -s ~/DIGESTOMIC/DATA/DIGESTOMIC_J80_A_26_11_15_S1_all_R1_001.fastq.gz DATA/ra ln -s ~/DIGESTOMIC/DATA/DIGESTOMIC_J80_A_26_11_15_S1_all_R2_001.fastq.gz DATA/raw/J80_A_R2.fastq.gz ln -s ~/DIGESTOMIC/DATA/DIGESTOMIC_J80_B_26_11_15_S2_all_R1_001.fastq.gz DATA/raw/J80_B_R1.fastq.gz ln -s ~/DIGESTOMIC/DATA/DIGESTOMIC_J80_B_26_11_15_S2_all_R2_001.fastq.gz DATA/raw/J80_B_R2.fastq.gz -``` \ No newline at end of file +``` + +## ghostKOALA + +### Submit analysis + +```bash +awk 'NR<=2000000' work/FGS/coassembly_FGS.faa > work/FGS/coassembly_FGS-1.faa +awk 'NR>=2000001&&NR<=4000000' work/FGS/coassembly_FGS.faa > work/FGS/coassembly_FGS-2.faa +awk 'NR>=4000001&&NR<=6000000' work/FGS/coassembly_FGS.faa > work/FGS/coassembly_FGS-3.faa +awk 'NR>=6000001&&NR<=8000000' work/FGS/coassembly_FGS.faa > work/FGS/coassembly_FGS-4.faa +awk 'NR>=8000001&&NR<=10000000' work/FGS/coassembly_FGS.faa > work/FGS/coassembly_FGS-5.faa +awk 'NR>=10000001' work/FGS/coassembly_FGS.faa > work/FGS/coassembly_FGS-6.faa +``` + +* Download files +* Upload on [GhostKOALA webserver](https://www.kegg.jp/ghostkoala/) with `genus_prokaryotes + family_eukaryotes + viruses ` database option +* Submit +* Download from webserver and group in order annotation files `user_ko.txt` +* Join with other tables + +### KEGG orthology table + +[source](http://merenlab.org/2018/01/17/importing-ghostkoala-annotations/) + +```bash +cd work/ghostKOALA +wget 'https://www.genome.jp/kegg-bin/download_htext?htext=ko00001&format=htext&filedir=' -O ko00001.keg + +kegfile="ko00001.keg" + +while read -r prefix content +do + case "$prefix" in A) col1="$content";; \ + B) col2="$content" ;; \ + C) col3="$content";; \ + D) echo -e "$col1\t$col2\t$col3\t$content";; + esac +done < <(sed '/^[#!+]/d;s/<[^>]*>//g;s/^./& /' < "$kegfile") > KO_Orthology_ko00001.txt + +rm ko00001.keg +``` + -- GitLab