diff --git a/annotation.smk b/annotation.smk
index c92e44cf89efaecbc66d6fb8f1817bfef881088b..016375d3348080e70aa77b1c5782d770349c0cdb 100644
--- a/annotation.smk
+++ b/annotation.smk
@@ -40,10 +40,13 @@ rule prodigal:
 		# "sed --in-place 's/*/N/' {output.faa} "
 
 def faa_input(wildcards):
-	if config["PROTEINS-PREDICTOR"] == "FragGeneScan":
-		return {"faa": "work/FGS/{wildcards.sample}/{wildcards.sample}_FGS.faa".format(wildcards=wildcards)}
-	elif config["PROTEINS-PREDICTOR"] == "prodigal":
-		return {"faa": "work/prodigal/{wildcards.sample}/{wildcards.sample}_prodigal.faa".format(wildcards=wildcards)}
+	if wildcards.sample == "catalogue":
+		return ["work/cdhit/catalogue.faa"]
+	else :
+		if config["PROTEINS-PREDICTOR"] == "FragGeneScan":
+			return {"faa": "work/FGS/{wildcards.sample}/{wildcards.sample}_FGS.faa".format(wildcards=wildcards)}
+		elif config["PROTEINS-PREDICTOR"] == "prodigal":
+			return {"faa": "work/prodigal/{wildcards.sample}/{wildcards.sample}_prodigal.faa".format(wildcards=wildcards)}
 
 def ffn_input(wildcards):
 	if config["PROTEINS-PREDICTOR"] == "FragGeneScan":
diff --git a/catalogue.smk b/catalogue.smk
index 7ff48a1f416ca8c7972fb4cda655c97c725fc663..55eae87e01cf8863d87e822a4f10552a1707f0cc 100644
--- a/catalogue.smk
+++ b/catalogue.smk
@@ -80,3 +80,29 @@ rule cd_hit_2D:
 		"-d 0 "
 		"-M 0 "
 		"-T {threads} "
+
+rule faa_catalogue:
+	input:
+		ffn = "work/cdhit/catalogue.ffn"
+		faa = expand("work/prodigal/{sample}/{sample}.faa", sample=config["SAMPLES"])
+	output:
+		list = temp("work/cdhit/catalogue.list")
+		faa = "work/cdhit/catalogue.faa"
+	threads:
+		2
+	shell:
+		"grep '>' {input.ffn} > {output.list} "
+		" ; "
+		"sed --in-place 's/>//' {output.list} "
+		" ; "
+		"source activate seqkit-0.10.1 "
+		" ; "
+		"seqkit "
+		"grep "
+		"--by-name "
+		"--pattern-file {output.list} "
+		"--threads {threads} "
+		"--out-file {output.faa} "
+		"{input.faa} "
+		" ; "
+		"conda deactivate"