diff --git a/README.md b/README.md index 412877bab53ea3acc2addaffdda4b44ab769a501..5d912335f33fddfb063d6455ac6b505682ab1f19 100644 --- a/README.md +++ b/README.md @@ -11,4 +11,46 @@ ln -s ~/DIGESTOMIC/DATA/DIGESTOMIC_J80_A_26_11_15_S1_all_R1_001.fastq.gz DATA/ra ln -s ~/DIGESTOMIC/DATA/DIGESTOMIC_J80_A_26_11_15_S1_all_R2_001.fastq.gz DATA/raw/J80_A_R2.fastq.gz ln -s ~/DIGESTOMIC/DATA/DIGESTOMIC_J80_B_26_11_15_S2_all_R1_001.fastq.gz DATA/raw/J80_B_R1.fastq.gz ln -s ~/DIGESTOMIC/DATA/DIGESTOMIC_J80_B_26_11_15_S2_all_R2_001.fastq.gz DATA/raw/J80_B_R2.fastq.gz -``` \ No newline at end of file +``` + +## ghostKOALA + +### Submit analysis + +```bash +awk 'NR<=2000000' work/FGS/coassembly_FGS.faa > work/FGS/coassembly_FGS-1.faa +awk 'NR>=2000001&&NR<=4000000' work/FGS/coassembly_FGS.faa > work/FGS/coassembly_FGS-2.faa +awk 'NR>=4000001&&NR<=6000000' work/FGS/coassembly_FGS.faa > work/FGS/coassembly_FGS-3.faa +awk 'NR>=6000001&&NR<=8000000' work/FGS/coassembly_FGS.faa > work/FGS/coassembly_FGS-4.faa +awk 'NR>=8000001&&NR<=10000000' work/FGS/coassembly_FGS.faa > work/FGS/coassembly_FGS-5.faa +awk 'NR>=10000001' work/FGS/coassembly_FGS.faa > work/FGS/coassembly_FGS-6.faa +``` + +* Download files +* Upload on [GhostKOALA webserver](https://www.kegg.jp/ghostkoala/) with `genus_prokaryotes + family_eukaryotes + viruses ` database option +* Submit +* Download from webserver and group in order annotation files `user_ko.txt` +* Join with other tables + +### KEGG orthology table + +[source](http://merenlab.org/2018/01/17/importing-ghostkoala-annotations/) + +```bash +cd work/ghostKOALA +wget 'https://www.genome.jp/kegg-bin/download_htext?htext=ko00001&format=htext&filedir=' -O ko00001.keg + +kegfile="ko00001.keg" + +while read -r prefix content +do + case "$prefix" in A) col1="$content";; \ + B) col2="$content" ;; \ + C) col3="$content";; \ + D) echo -e "$col1\t$col2\t$col3\t$content";; + esac +done < <(sed '/^[#!+]/d;s/<[^>]*>//g;s/^./& /' < "$kegfile") > KO_Orthology_ko00001.txt + +rm ko00001.keg +``` +