diff --git a/Snakefile b/Snakefile
index 463699e..4fe7acc 100644
--- a/Snakefile
+++ b/Snakefile
@@ -136,12 +136,12 @@ def get_summaries():
     hmm = ["TIGRFAMs","HAMAP","dbCAN"]
     taxonomy = ["phylum", "class", "order"]
     # code to generate the possible files
-    file_paths = expand("summaries/combined/{hmm}_{taxonomy}.txt",
-        hmm=hmm, taxonomy=taxonomy)
+    file_paths = expand("summaries/combined/{hmm}_{tax_classification}.txt",
+        hmm=hmm, tax_classification=taxonomy)
     file_paths.extend(expand("summaries/hmms_summary/{hmm}_summary.txt",
         hmm=hmm))
-    file_paths.extend(expand("summaries/taxonomy/{taxonomy}.txt",
-        taxonomy=taxonomy))
+    file_paths.extend(expand("summaries/taxonomy/{tax_classification}.txt",
+        tax_classification=taxonomy))
     return file_paths
 
 
@@ -394,7 +394,7 @@ rule aggregate_all_genes:
                 with open(f) as fh:
                     for name, seq, _ in readfx(fh):
                         # per sequence, choose first only
-                        if name.endswith("_2"):
+                        if not name.endswith("_1"):
                             continue
                         print(">%d" % name_index, seq.replace("*", ""), sep="\n", file=out_faa)
                         name_index += 1
@@ -481,7 +481,7 @@ rule split_fasta:
     input:
         fasta = "gene_catalog/clustered_genes.faa"
     output:
-        temp((dynamic("gene_catalog/tmp/clustered_genes_{chunk}.faa")))
+        dynamic("gene_catalog/tmp/clustered_genes_{chunk}.faa")
     params:
         # consider a smaller chunk size
         chunk_size = config.get("chunk_size", 1000000)
@@ -621,9 +621,23 @@ rule combine_sample_output:
         """
 
 
+rule remove_empty_annotations:
+    input:
+        annotations = "gene_catalog/annotations.txt"
+    output:
+        cleaned = "gene_catalog/annotations_cleaned.txt"
+    run:
+        with open(input.annotations) as file, open(output.cleaned,"w") as output:
+            header = next(file)
+            print(header.strip("\n"), file=output)
+            for line in file:
+                toks = line.strip("\r\n").split("\t")
+                if not all(s=='' for s in toks[2:12]):
+                    print(line.strip("\n"), file=output)
+
 rule build_hmms_table:
     input:
-        "gene_catalog/annotations.txt"
+        "gene_catalog/annotations_cleaned.txt"
     output:
         "summaries/hmms_summary/{hmm}_summary.txt"
     params:
@@ -645,7 +659,7 @@ rule build_hmms_table:
 
 rule build_tax_table:
     input:
-        "gene_catalog/annotations.txt"
+        "gene_catalog/annotations_cleaned.txt"
     output:
         "summaries/taxonomy/{tax_classification}.txt"
     params:
@@ -659,7 +673,7 @@ rule build_tax_table:
     shell:
         """
         python scripts/summarize_classifications.py \
-            --group-on kaiju_classification --min-evalue {params.min_evalue} \
+            --group-on kaiju_taxonomy --min-evalue {params.min_evalue} \
             --min-score {params.min_score} --min-len {params.min_len} \
             --tax-level {wildcards.tax_classification} {output} {input}
         """
@@ -667,7 +681,7 @@ rule build_tax_table:
 
 rule build_hmm_and_tax_table:
     input:
-        "gene_catalog/annotations.txt"
+        "gene_catalog/annotations_cleaned.txt"
     output:
         "summaries/combined/{hmm}_{tax_classification}.txt"
     params:
@@ -679,12 +693,12 @@ rule build_hmm_and_tax_table:
     shell:
         """
         python scripts/summarize_classifications.py \
-            --group-on {wildcards.hmm} kaiju_classification \
+            --group-on {wildcards.hmm} kaiju_taxonomy \
             --tax-level {wildcards.tax_classification} --min-evalue {params.min_evalue} \
-            --min-score {params.min_score}--min-len {params.min_len} {output} {input}
+            --min-score {params.min_score} --min-len {params.min_len} {output} {input}
         """
 
-## TODO fix this rule
+
 rule build_krona_ec_input:
     input:
         ec_file = "summaries/hmms_summary/TIGRFAMs_summary.txt",
@@ -731,30 +745,34 @@ rule build_krona_plots:
         """
 
 
-# rule zip_attachments:
-#     input:
-#         function = "summaries/function/ec.txt",
-#         taxonomy = "summaries/taxonomy/order.txt",
-#         krona_tax = "krona_plots/tax.krona.html",
-#         krona_ec = "krona_plots/ec.krona.html"
-#     output:
-#         temp("perseq_downloads.zip")
-#     shell:
-#         """
-#         zip {output} {input.function} {input.taxonomy} {input.combined}
-#         """
+rule zip_attachments:
+    input:
+        function = "summaries/hmms_summary/TIGRFAMs_summary.txt",
+        taxonomy = "summaries/taxonomy/order.txt",
+        combined = "summaries/combined/TIGRFAMs_order.txt",
+        krona_tax = "krona_plots/tax.krona.html",
+        krona_ec = "krona_plots/ec.krona.html"
+    output:
+        "perseq_downloads.tar.gz"
+    conda:
+        CONDAENV
+    shell:
+        """
+        tar -czf {output} {input.function} {input.taxonomy} {input.combined} \
+        {input.krona_tax} {input.krona_ec}
+        """
 
 
 rule build_report:
     input:
-        annotations = "gene_catalog/annotations.txt",
+        annotations = "gene_catalog/annotations_cleaned.txt",
         ee_stats = expand("logs/{sample}_{idx}_eestats.txt", sample=config["samples"].keys(), idx=["R1", "R2"]),
         clean_length_logs = expand("logs/{sample}_03_clean_readlengths.txt", sample=config["samples"].keys()),
         unique_length_logs = expand("logs/{sample}_02_unique_readlengths.txt", sample=config["samples"].keys()),
         clean_logs = expand("logs/{sample}_decontamination.log", sample=config["samples"].keys()),
         merge_logs = expand("logs/{sample}_merge_sequences.log", sample=config["samples"].keys()),
         taxonomy = "summaries/taxonomy/order.txt",
-        zipped_files = "perseq_downloads.zip"
+        zipped_files = "perseq_downloads.tar.gz"
     output:
         "summary.html"
     conda:
diff --git a/envs/environment.yml b/envs/environment.yml
index c8977a0..1ea37c7 100644
--- a/envs/environment.yml
+++ b/envs/environment.yml
@@ -19,5 +19,5 @@ dependencies:
     - seqtk=1.3
     - pip
     - pip:
-      - relatively
+      - relatively>=1.0.5
       - plotly>=3.0
diff --git a/scripts/build_krona.py b/scripts/build_krona.py
index 6222eed..0fc5642 100755
--- a/scripts/build_krona.py
+++ b/scripts/build_krona.py
@@ -17,11 +17,10 @@ def build_tax(tax_file, output):
     # read in tax file to order level from build_tax_table rule
     test_tax = pd.read_table(tax_file)
     col_names = list(test_tax)[1:]
-    taxonomies = ["Kingdom", "Phylum", "Class", "Order"]
     # split taxonomy column into hierarchy
     test_tax[
         ["Kingdom", "Phylum", "Class", "Order"]
-    ] = test_tax.taxonomy_order.str.split("; ", expand=True)
+    ] = test_tax.taxonomy_order.str.split("; ",n=4, expand=True)
     # generate individual files per sample
     for item in col_names:
         df_name = f"test_tax_{item}"
@@ -85,62 +84,96 @@ def build_ec_dict(ec_filename, dat_filename):
     return d
 
 
+# def parse_ec_file(ec_file_from_summaries):
+#     """
+#     Split sequences that aligned to multiple ec numbers into last common ec number
+#     shared amongst matches
+#     """
+#     with gzopen(ec_file_from_summaries) as ec_file_sum:
+#
+#         next(ec_file_sum)
+#         new_ec_dict = {"ec": []}
+#         # split ec numbers that mapped to multiple
+#         for item in ec_file_sum:
+#             toks = item.partition("\t")[0]
+#             ec = toks.split(";")
+#             # print(ec)
+#             first_item = ec[0][:5]
+#             if len(ec) == 1:
+#                 new_ec_dict["ec"].append(ec[0])
+#             else:
+#                 n = 0
+#                 for item in ec[1:]:
+#                     if item[:5] == first_item:
+#                         new_ec_dict["ec"].append(item[:5] + ".-")
+#                         # print('same',item,first_item)
+#                     elif item[:3] == first_item[:3]:
+#                         new_ec_dict["ec"].append(item[:3] + ".-.-")
+#                         # print('same to high level',item,first_item)
+#                     elif item[:1] == first_item[:1]:
+#                         new_ec_dict["ec"].append(item[:1] + ".-.-.-")
+#     return new_ec_dict
+
 def parse_ec_file(ec_file_from_summaries):
     """
-    Split sequences that aligned to multiple ec numbers into last common ec number
-    shared amongst matches
+    For ec numbers that aligned to multiple ecs in TIGRFAMs, the first entry is
+    the only one that will be retained for Krona plots.
     """
-    with gzopen(ec_file_from_summaries) as ec_file_sum:
-
-        next(ec_file_sum)
-        new_ec_dict = {"ec": []}
-        # split ec numbers that mapped to multiple
-        for item in ec_file_sum:
-            toks = item.partition("\t")[0]
-            ec = toks.split(";")
-            # print(ec)
-            first_item = ec[0][:5]
-            if len(ec) == 1:
-                new_ec_dict["ec"].append(ec[0])
-            else:
-                n = 0
-                for item in ec[1:]:
-                    if item[:5] == first_item:
-                        new_ec_dict["ec"].append(item[:5] + ".-")
-                        # print('same',item,first_item)
-                    elif item[:3] == first_item[:3]:
-                        new_ec_dict["ec"].append(item[:3] + ".-.-")
-                        # print('same to high level',item,first_item)
-                    elif item[:1] == first_item[:1]:
-                        new_ec_dict["ec"].append(item[:1] + ".-.-.-")
-    return new_ec_dict
-
-
-def build_ec_output(new_ec_dict, ec_file_from_summaries, parsed_dict, output):
+    ec_df = pd.read_table("ec_file_from_summaries")
+    samples = ec_df.filter(regex="[0-9]+").columns.values.tolist()
+    ec_df = ec_df[["tigrfams_ec"]+samples]
+    ec_df = ec_df.groupby(["tigrfams_ec"]).sum(axis=1).reset_index()
+    ec_df[["tigrfams_ec"]] = ec_df.tigrfams_ec.str.split(",",n=1).str[0]
+    return ec_df
+
+
+# def build_ec_output(new_ec_dict, ec_file_from_summaries, parsed_dict, output):
+#     """
+#     Splits the ec number counts and hierarchy by sample
+#     """
+#     #ec_replace = pd.DataFrame.from_dict(new_ec_dict)
+#     ec_table = pd.read_table(ec_file_from_summaries)
+#     col_names = list(ec_table)[1:]
+#     #ec_table["ec"] = ec_replace["ec"]
+#     ec_hier_dict = pd.DataFrame.from_dict(parsed_dict, orient="index").reset_index()
+#     ec_hier_dict = ec_hier_dict.rename(columns={"index": "ec"})
+#     # print(ec_hier_dict.head(5))
+#     grouped_ec_tbl = ec_table.merge(ec_hier_dict, on="ec", how="inner")
+#     # print(grouped_ec_tbl.head(5))
+#     grouped_ec_tbl = grouped_ec_tbl.rename(
+#         columns={0: "level_1", 1: "level_2", 2: "level_3", 3: "level_4"}
+#     )
+#     # split the samples into separate files
+#     for item in col_names:
+#         df_name = f"test_tax_{item}"
+#         df_name = grouped_ec_tbl[
+#             [f"{item}", "level_1", "level_2", "level_3", "level_4"]
+#         ]
+#
+#         df_name.to_csv(
+#             output + "/" + f"{item}" + "_ec.txt", sep="\t", index=False, header=False
+#         )
+def build_ec_output(ec_df, parsed_dict,output):
     """
     Splits the ec number counts and hierarchy by sample
     """
-    ec_replace = pd.DataFrame.from_dict(new_ec_dict)
-    ec_table = pd.read_table(ec_file_from_summaries)
-    col_names = list(ec_table)[1:]
-    ec_table["ec"] = ec_replace["ec"]
+    ec_table = ec_df
+    samples = list(ec_table)[1:]
     ec_hier_dict = pd.DataFrame.from_dict(parsed_dict, orient="index").reset_index()
-    ec_hier_dict = ec_hier_dict.rename(columns={"index": "ec"})
-    # print(ec_hier_dict.head(5))
-    grouped_ec_tbl = ec_table.merge(ec_hier_dict, on="ec", how="inner")
-    # print(grouped_ec_tbl.head(5))
+    ec_hier_dict = ec_hier_dict.rename(columns={"index": "tigrfams_ec"})
+    grouped_ec_tbl = ec_table.merge(ec_hier_dict, on="tigrfams_ec", how="inner")
     grouped_ec_tbl = grouped_ec_tbl.rename(
         columns={0: "level_1", 1: "level_2", 2: "level_3", 3: "level_4"}
     )
     # split the samples into separate files
-    for item in col_names:
-        df_name = f"test_tax_{item}"
+    for sample in samples:
+        #df_name = f"test_tax_{sample}"
         df_name = grouped_ec_tbl[
-            [f"{item}", "level_1", "level_2", "level_3", "level_4"]
+            [f"{sample}", "level_1", "level_2", "level_3", "level_4"]
         ]
-
+        print(df_name.head())
         df_name.to_csv(
-            output + "/" + f"{item}" + "_ec.txt", sep="\t", index=False, header=False
+            output + "/" + f"{sample}" + "_ec.txt", sep="\t", index=False, header=False
         )
 
 
@@ -148,9 +181,9 @@ def main(tax_file, output, ec_file, ec_file_from_summaries, dat_file):
     if tax_file is not None:
         build_tax(tax_file, output)
     else:
-        new_ec_dict = parse_ec_file(ec_file_from_summaries)
+        ec_df = parse_ec_file(ec_file_from_summaries)
         parsed_dict = build_ec_dict(ec_file, dat_file)
-        build_ec_output(new_ec_dict, ec_file_from_summaries, parsed_dict, output)
+        build_ec_output(ec_df, parsed_dict, output)
 
 
 if __name__ == "__main__":
@@ -167,7 +200,7 @@ def main(tax_file, output, ec_file, ec_file_from_summaries, dat_file):
     p.add_argument(
         "--ec-file-from-summaries",
         type=str,
-        help="file that contains the ec numbers from the diamond alignment",
+        help="file that contains the ec numbers from TIGRFAMs summary",
     )
     p.add_argument(
         "--dat-file",
diff --git a/scripts/build_report.py b/scripts/build_report.py
index ace0eaa..bb45b17 100644
--- a/scripts/build_report.py
+++ b/scripts/build_report.py
@@ -45,16 +45,17 @@ def get_sample_order(lst):
     return [i[0] for i in sorted(lst, key=lambda x: x[1], reverse=True)]
 
 
-def build_taxonomy_plot(txt, value_cols, height=900):
+def build_taxonomy_plot(txt, height=900):
     df = pd.read_table(txt)
+    samples = df.columns.values.tolist()[1:]
     levels = ["kingdom", "phylum", "class", "order"]
     df[levels] = df["taxonomy_order"].str.split(";", expand=True)
     hierarchy = ["phylum", "class", "order"]
     df[hierarchy] = df[hierarchy].fillna("NA")
-    df[value_cols] = df[value_cols].fillna(0)
-    df = df[hierarchy + value_cols]
+    df[samples] = df[samples].fillna(0)
+    df = df[hierarchy + samples]
     dfs = relatively.get_dfs_across_hierarchy(
-        df, hierarchy, value_cols, reorder="shannon", dependent="; "
+        df, hierarchy, samples, reorder="shannon", dependent="; "
     )
     fig = relatively.get_abundance_figure_from_dfs(
         dfs, hierarchy, "Assigned Taxonomy Per Sample", height=height
@@ -66,11 +67,47 @@ def get_sample_name(path, key):
     return [os.path.basename(item).partition(key)[0] for item in path]
 
 
+# def parse_classifications_for_taxonomy(path):
+#     """
+#     SN1035:381:h3cv7bcx2:2:2202:3165:73750	-1	-1
+#     SN1035:381:h3cv7bcx2:1:1105:7280:88523	67.5	40				111	Archaea; Thaumarchaeota; Nitrososphaerales; Nitrososphaeria; Nitrososphaeraceae; Candidatus Nitrosocosmicus; Candidatus Nitrocosmicus oleophilus;
+#     SN1035:381:h3cv7bcx2:1:1204:20140:83036	78.4	37	ko:K00820	glmS, GFPT; glucosamine---fructose-6-phosphate aminotransferase (isomerizing)	2.6.1.16	229	Bacteria; Actinobacteria; NA; NA; NA; NA; Actinobacteria bacterium 13_1_40CM_66_12;
+#     """
+#     logger.info("Parsing {}".format(path))
+#     # hardcoded tax levels :(
+#     taxonomy_level_counter = {
+#         "order": Counter(),
+#         "class": Counter(),
+#         "phylum": Counter(),
+#     }
+#     taxonomy_counter = Counter()
+#     summary_counter = Counter()
+#     with open(path) as fh:
+#         # skip the header
+#         next(fh)
+#         for i, line in enumerate(fh, start=1):
+#             toks = line.strip("\r\n").split("\t")
+#             if toks[3]:
+#                 summary_counter.update(["Assigned Function"])
+#                 if toks[7]:
+#                     summary_counter.update(["Assigned Both"])
+#             if toks[7]:
+#                 taxonomy_counter.update([toks[7]])
+#                 taxonomy = [j.strip() for j in toks[7].split(";")]
+#                 taxonomy_level_counter["order"].update([taxonomy[3]])
+#                 taxonomy_level_counter["class"].update([taxonomy[2]])
+#                 taxonomy_level_counter["phylum"].update([taxonomy[1]])
+#                 summary_counter.update(["Assigned Taxonomy"])
+#     return dict(
+#         taxonomy_level_counter=taxonomy_level_counter,
+#         summary_counter=summary_counter
+#     )
+
 def parse_classifications_for_taxonomy(path):
     """
-    SN1035:381:h3cv7bcx2:2:2202:3165:73750	-1	-1
-    SN1035:381:h3cv7bcx2:1:1105:7280:88523	67.5	40				111	Archaea; Thaumarchaeota; Nitrososphaerales; Nitrososphaeria; Nitrososphaeraceae; Candidatus Nitrosocosmicus; Candidatus Nitrocosmicus oleophilus;
-    SN1035:381:h3cv7bcx2:1:1204:20140:83036	78.4	37	ko:K00820	glmS, GFPT; glucosamine---fructose-6-phosphate aminotransferase (isomerizing)	2.6.1.16	229	Bacteria; Actinobacteria; NA; NA; NA; NA; Actinobacteria bacterium 13_1_40CM_66_12;
+    37056   274     Bacteria; Bacteroidetes; Sphingobacteriia; Sphingobacteriales; NA; NA; Sphingobacteriales bacterium UTBCD1;     3.4.21.92       clpP    ATP-dependent Clp endopeptidase proteolytic subun
+    75331   0                                                                                                                                       0       0       0       0       0       0       0       0
+
     """
     logger.info("Parsing {}".format(path))
     # hardcoded tax levels :(
@@ -85,53 +122,94 @@ def parse_classifications_for_taxonomy(path):
         # skip the header
         next(fh)
         for i, line in enumerate(fh, start=1):
-            toks = line.strip("\r\n").split("\t")
-            if toks[3]:
+            toks = line.strip("\n").split("\t")
+            try:
+                if toks[2]:
+                    taxonomy_counter.update([toks[2]])
+                    taxonomy = [j.strip() for j in toks[2].split(";")]
+                    taxonomy_level_counter["order"].update([taxonomy[3]])
+                    taxonomy_level_counter["class"].update([taxonomy[2]])
+                    taxonomy_level_counter["phylum"].update([taxonomy[1]])
+                    summary_counter.update(["Assigned Taxonomy"])
+                    if not all(s=='' for s in toks[4:12]):
+                        summary_counter.update(["Assigned Both"])
+            except:
+                print(len(line))
+                continue
+            if not all(s=='' for s in toks[4:12]):
                 summary_counter.update(["Assigned Function"])
-                if toks[7]:
-                    summary_counter.update(["Assigned Both"])
-            if toks[7]:
-                taxonomy_counter.update([toks[7]])
-                taxonomy = [j.strip() for j in toks[7].split(";")]
-                taxonomy_level_counter["order"].update([taxonomy[3]])
-                taxonomy_level_counter["class"].update([taxonomy[2]])
-                taxonomy_level_counter["phylum"].update([taxonomy[1]])
-                summary_counter.update(["Assigned Taxonomy"])
     return dict(
         taxonomy_level_counter=taxonomy_level_counter,
         summary_counter=summary_counter
     )
-
-
-def compile_summary_df(classification_tables, tax_levels=["phylum", "class", "order"]):
+def parse_files_for_annotation(path):
     """
-    Reads in multiple sample alignments from diamond in a given directory and merges them into
-    a single pandas.DataFrame. It returns a pandas dataframe for each of th
-    phylum that is ready to plug into the processing function. Also returns total counts
-    which is necessary to calculate the percentage of total that is being represented
+    parses the annotations to retain which samples were assigned function vs
+    taxonomy vs both
     """
-    dfs = {}
-    classifications_per_sample = {}
-    for classification_table in classification_tables:
-        sample = get_sample(classification_table, "_classifications.txt")
-        parsed_taxonomy = parse_classifications_for_taxonomy(classification_table)
-        # assigned #'s in summary table
-        classifications_per_sample[sample] = parsed_taxonomy["summary_counter"]
-        if len(dfs) == 0:
-            for tax_level in tax_levels:
-                dfs[tax_level] = get_df_at_tax_level(
-                    parsed_taxonomy["taxonomy_level_counter"][tax_level],
-                    sample,
-                    tax_level,
-                )
-            continue
-
-        for tax_level in tax_levels:
-            df = get_df_at_tax_level(
-                parsed_taxonomy["taxonomy_level_counter"][tax_level], sample, tax_level
-            )
-            dfs[tax_level] = dfs[tax_level].merge(df, on=tax_level, how="outer")
-    return pd.DataFrame.from_dict(classifications_per_sample, orient="index")
+    summary_counter={}
+    with open(path) as fh:
+        # skip the header
+        header = next(fh)
+        header = header.strip("\n").split("\t")
+        for sample in header[18:]:
+            summary_counter[sample]=Counter()
+        for line in fh:
+            toks = line.strip("\n").split("\t")
+            try:
+                if toks[2]:
+                    for i,sample in enumerate(header[18:]):
+                        if int(toks[18+i]) != 0:
+                            summary_counter[sample].update(["Kaiju+NR"])
+                    #summary_counter.update()
+                    # if not all(s=='' for s in toks[3:12]):
+                    #     for i,sample in enumerate(header[18:]):
+                    #         if str(toks[18+i]) != 0:
+                    #             summary_counter[sample].update(["Assigned Both"])
+                        #summary_counter.update(["Assigned Both"])
+            except:
+                print(len(line))
+                continue
+            if not all(s=='' for s in toks[4:12]):
+                for i,sample in enumerate(header[18:]):
+                    if int(toks[18+i]) != 0:
+                        summary_counter[sample].update(["TIGRFAMs,HAMAP,dbCAN\n hits per sample"])
+    return summary_counter
+
+# def compile_summary_df(classification_tables, tax_levels=["phylum", "class", "order"]):
+#     """
+#     Reads in multiple sample alignments from diamond in a given directory and merges them into
+#     a single pandas.DataFrame. It returns a pandas dataframe for each of
+#     phylum that is ready to plug into the processing function. Also returns total counts
+#     which is necessary to calculate the percentage of total that is being represented
+#     """
+#     dfs = {}
+#     classifications_per_sample = {}
+#     cols = df.filter(regex="[A-Z]+").columns.values.tolist()
+#     samples = df.filter(regex="[0-9]+").columns.values.tolist()
+#     for sample in samples:
+#         df[cols]
+#
+#     for classification_table in classification_tables:
+#         sample = get_sample(classification_table, "_classifications.txt")
+#         parsed_taxonomy = parse_classifications_for_taxonomy(classification_table)
+#         # assigned #'s in summary table
+#         classifications_per_sample[sample] = parsed_taxonomy["summary_counter"]
+#         if len(dfs) == 0:
+#             for tax_level in tax_levels:
+#                 dfs[tax_level] = get_df_at_tax_level(
+#                     parsed_taxonomy["taxonomy_level_counter"][tax_level],
+#                     sample,
+#                     tax_level,
+#                 )
+#             continue
+#
+#         for tax_level in tax_levels:
+#             df = get_df_at_tax_level(
+#                 parsed_taxonomy["taxonomy_level_counter"][tax_level], sample, tax_level
+#             )
+#             dfs[tax_level] = dfs[tax_level].merge(df, on=tax_level, how="outer")
+#     return pd.DataFrame.from_dict(classifications_per_sample, orient="index")
 
 
 def get_df_at_tax_level(count_obj, sample, tax_level):
@@ -173,6 +251,7 @@ def parse_log_files(merge_logs, unique_logs, clean_logs, classifications_per_sam
         "Unique",
         "Clean",
     ]
+    samp_df = pd.DataFrame.from_dict(classifications_per_sample,orient = "index")
     count_table = defaultdict(list)
     # initial read count, join count, join rate, insert size from merge step
     for merge_log in merge_logs:
@@ -196,13 +275,18 @@ def parse_log_files(merge_logs, unique_logs, clean_logs, classifications_per_sam
                     count_table[sample].append(int(line.strip("\r\n").split("\t")[-1]))
 
     log_df = pd.DataFrame.from_dict(count_table, orient="index")
+    print(log_df.head(10))
+    print(log_df.shape)
+    print(samp_df.shape)
     log_df.columns = header
+    print(log_df.head())
     # print("classifications", classifications_per_sample.head())
-    log_df = log_df.merge(classifications_per_sample, left_index=True, right_index=True)
+    log_df = log_df.merge(samp_df, left_index=True, right_index=True)
+    print(log_df.head())
     # print(log_df.head())
     log_df.reset_index(inplace=True)
     header.insert(0, "Sample")
-    header.extend(["Assigned\nFunction", "Assigned\nTaxonomy", "Assigned\nBoth"])
+    header.extend(["TIGRFAMs,HAMAP,dbCAN\n hits per sample", "Kaiju+NR"])
     log_df.columns = header
     sample_summary_table = log_df[header].to_html(
         index=False,
@@ -305,20 +389,25 @@ def main(
     r1_quality_files,
     html,
     conda_env,
-    function_table,
+    taxonomy_table,
     zipped_file
 ):
     clean_logs = glob(clean_logs)
     unique_logs = glob(unique_logs)
     merge_logs = glob(merge_logs)
-    summary_tables = glob(summary_tables)
+    summary_table = summary_tables
+    # parse classifcations for summary
+    assigned_dict = parse_files_for_annotation(summary_table)
     r1_quality_files = glob(r1_quality_files)
-    classifications_per_sample = compile_summary_df(summary_tables)
-    value_cols = get_sample_name(summary_tables, "_classifications.txt")
-    fig = build_taxonomy_plot(taxonomy_table, value_cols)
+    #classifications_per_sample = compile_summary_df(summary_table)
+    #value_cols = get_sample_name(summary_tables, "_classifications.txt")
+    fig = build_taxonomy_plot(taxonomy_table)
     plots = offline.plot(fig, **PLOTLY_PARAMS)
+    # html_tbl = parse_log_files(
+    #     merge_logs, unique_logs, clean_logs, classifications_per_sample
+    # )
     html_tbl = parse_log_files(
-        merge_logs, unique_logs, clean_logs, classifications_per_sample
+        merge_logs, unique_logs, clean_logs, assigned_dict
     )
     quality_plot = build_quality_plot(r1_quality_files)
     conda_env = get_conda_env_str(conda_env)
@@ -333,7 +422,7 @@ def main(
 PerSeq_ - Per sequence functional and taxonomic assignments
 =============================================================
 
-.. _PerSeq: https://github.com/pnnl
+.. _PerSeq: https://github.com/PNNL-CompBio/perseq
 
 
 .. contents::
@@ -385,33 +474,54 @@ def main(
 specified during the decontamination process. Functional annotation and
 taxonomic classification were performed following the decontamination step.
 
+Translation and Clustering
+**************************
+
+Raw nucleotide sequences are translated into protein sequences using Prodigal[4]
+in anonymous mode. These sequences are then clustered at a 90% sequence identity
+using mmseqs[5], to decrease the computational resources required to perform
+annotation. These representative sequences are then annotated for functional
+and taxonomic information.
+
+
 Functional Annotation
 *********************
 
-The blastx algorithm of DIAMOND [4] was used to align nucleotide sequences to
-the KEGG protein reference database [5] consisting of non-redundant, family
-level fungal eukaryotes and genus level prokaryotes
-(``--strand=both --evalue 0.00001``). The highest scoring alignment per
-sequence was used for functional annotation.
+There are three distinct hmm libraries used to infer functional potential:
+HAMAP[6], dbCAN[7], and TIRGRFAMs[8]. The alignment of each seuqnce to each database
+was performed using HMMSearch of HMMer[9]. TIGRFAMs and HAMAP both provide
+enzyme commission (EC), gene and product, while dbCAN2 provides enzyme class,
+enzyme subclass and EC.
 
 Taxonomic Annotation
 ********************
 
 Kmer-based taxonomic classification was performed on the merged reads using
-Kaiju [6] in greedy mode (``-a greedy -E 0.05``). NCBI's nr database [7]
+Kaiju [10] in greedy mode (``-a greedy -E 0.05``). NCBI's nr database [11]
 containing reference sequences for archaea, bacteria, viruses, fungi, and
 microbial eukaryotes was used as the reference index for Kaiju.
 
+Aggregation
+***********
+
+Once functional and taxonomic annotation has completed all of the sequences per
+sample are aligned to the now annotated representative sequences using DIAMOND[12].
+
 References
 **********
 
 1. Rognes T, Flouri T, Nichols B, Quince C, Mahé F. VSEARCH: a versatile open source tool for metagenomics. PeerJ. PeerJ Inc; 2016;4:e2584.
 2. Bushnell B. BBTools [Internet]. Available from: https://sourceforge.net/projects/bbmap/
 3. Li H. Seqtk [Internet]. Available from: https://github.com/lh3/seqtk
-4. Buchfink B, Xie C, Huson DH. Fast and sensitive protein alignment using DIAMOND. Nat. Methods. Nature Publishing Group; 2015;12:59–60.
-5. Kanehisa M, Sato Y, Kawashima M, Furumichi M, Tanabe M. KEGG as a reference resource for gene and protein annotation. Nucleic Acids Res. 2016;44:D457–62.
-6. Menzel P, Ng KL, Krogh A. Fast and sensitive taxonomic classification for metagenomics with Kaiju. Nat Commun. Nature Publishing Group; 2016;7:11257.
-7. NCBI Resource Coordinators. Database resources of the National Center for Biotechnology Information. Nucleic Acids Res. 2018;46:D8–D13.
+4. fd
+5. jdkf
+6. DFRA_DIACA
+7. DFRA_DIACA
+8. def
+9. dk
+10. Menzel P, Ng KL, Krogh A. Fast and sensitive taxonomic classification for metagenomics with Kaiju. Nat Commun. Nature Publishing Group; 2016;7:11257.
+11. NCBI Resource Coordinators. Database resources of the National Center for Biotechnology Information. Nucleic Acids Res. 2018;46:D8–D13.
+12. Buchfink B, Xie C, Huson DH. Fast and sensitive protein alignment using DIAMOND. Nat. Methods. Nature Publishing Group; 2015;12:59–60.
 
 
 Execution Environment
@@ -424,10 +534,10 @@ def main(
 Output
 ------
 
-Classification Tables
-*********************
+Annotations Table
+*****************
 
-Per sample classifications in tables/ contain:
+Table containing annotations across all samples:
 
 .. table::
     :name: classificationtable
@@ -435,8 +545,21 @@ def main(
     =========================  ==========================================================================================================================================
     Header ID                  Definition
     =========================  ==========================================================================================================================================
-    aa_alignment_length        The length of the DIAMOND blastx hit
-    aa_percent_id              The percent ID of the DIAMOND blastx hit; could be used to increase post-processing stringency
+    seq
+    kaiju_length
+    kaiju_taxonomy
+    tigrfams_ec
+    tigrfams_gene
+    tigrfams_product
+    tigrfams_score
+    tigrfams_evalue
+    hamap_ec
+    hamap_gene
+    hamap_product
+    hamap_score
+    hamap_evalue
+    <hmm>_ec       The length of the DIAMOND blastx hit
+    <hmm>_gene             The percent ID of the DIAMOND blastx hit; could be used to increase post-processing stringency
     ec                         Enzyme Commission number from KEGG; semicolon delimited where multiple
     ko                         KEGG entry ID
     product                    KEGG gene ID <semicolon> KEGG product
@@ -468,41 +591,43 @@ def main(
 Function
 ````````
 
-Per function assignments in tables named **summaries/function/<type>.txt**
+Per function assignments in tables named **summaries/hmms_summary/<hmm>.txt**
 contain:
 
 .. table::
     :name: functiondeftable
 
-    ====================  ===================================================================
-    Header ID             Definition
-    ====================  ===================================================================
-    <type>                either KO, EC, or product into which counts have been summed
-    samples names         non-normalized, per sample sum for this particular functional group
-    level_1               KEGG hierarchy [level 1] if KO defined in first column
-    level_2               KEGG hierarchy [level 2] if KO defined in first column
-    level_3               KEGG hierarchy [level 3] if KO defined in first column
-    ====================  ===================================================================
+    ====================         ===================================================================
+    Header ID                    Definition
+    ====================         ===================================================================
+    <hmm>_ec                     either HAMAP,dbCAN or TIGRFAMs EC
+    <hmm>_gene                   either HAMAP,dbCAN or TIGRFAMs gene
+    <hmm>_product                either HAMAP,dbCAN or TIGRFAMs product
+    <hmm>_enzyme_class           dbcan only enzyme class descriptor
+    <hmm>_enzyme_class_subfamily dbcan only enzyme class subfamily descriptor
+    samples names                non-normalized, per sample sum for this particular functional group
+    ====================         ===================================================================
 
 Combined
 ````````
 
 Per taxonomy+function assignments in tables named
-**summaries/combined/<type>_<level>.txt** contain:
+**summaries/combined/<hmm>_<level>.txt** contain:
 
 .. table::
     :name: combineddeftable
 
-    ====================  ====================================================================
-    Header ID             Definition
-    ====================  ====================================================================
-    <type>                either KO, EC, or product; counts are summed using <type>+<taxonomy>
-    taxonomy_<level>      taxonomic level; counts are summed using <type>+<taxonomy>
-    sample names          non-normalized, per sample sum for this particular functional group
-    level_1               KEGG hierarchy [level 1] if KO defined in first column
-    level_2               KEGG hierarchy [level 2] if KO defined in first column
-    level_3               KEGG hierarchy [level 3] if KO defined in first column
-    ====================  ====================================================================
+    ====================         ====================================================================
+    Header ID                    Definition
+    ====================         ====================================================================
+    <hmm>_ec                     either HAMAP,dbCAN or TIGRFAMs EC
+    <hmm>_gene                   either HAMAP,dbCAN or TIGRFAMs gene
+    <hmm>_product                either HAMAP,dbCAN or TIGRFAMs product
+    <hmm>_enzyme_class           dbcan only enzyme class descriptor
+    <hmm>_enzyme_class_subfamily dbcan only enzyme class subfamily descriptor
+    taxonomy_<level>             taxonomic level; counts are summed using <hmm>+<taxonomy>
+    sample names                 non-normalized, per sample sum for this particular functional group
+    ====================         ====================================================================
 
 Downloads
 ---------
@@ -530,7 +655,7 @@ def main(
     p.add_argument("--r1-quality-files")
     p.add_argument("--html")
     p.add_argument("conda_env")
-    p.add_argument("function_table")
+    p.add_argument("taxonomy_table")
     p.add_argument("zipped_file")
     # p.add_argument("taxonomy_table")
     # p.add_argument("taxonomy_function_table")
@@ -545,7 +670,7 @@ def main(
         args.r1_quality_files,
         args.html,
         args.conda_env,
-        args.function_table,
+        args.taxonomy_table,
         args.zipped_file,
         # args.taxonomy_table,
         # args.taxonomy_function_table,
diff --git a/scripts/make_classification_table.py b/scripts/make_classification_table.py
old mode 100644
new mode 100755
index 6214129..ac99a76
--- a/scripts/make_classification_table.py
+++ b/scripts/make_classification_table.py
@@ -48,11 +48,11 @@ def main(kaiju, hamap, dbcan, tigrfams, diamond, output):
     sequence_counts = parse_diamond_outputs(diamond)
 
     # ECs may be a comma delimited list
-    # sequence_id -> [ec, gene, product, hmm_id, score, evalue]
+    # sequence_id -> [ec, gene, product, hmm_id, evalue, score]
     hamap_hits = parse_hmm_hits(hamap)
-    # sequence_id -> [ec, enzyme class, enzyme class subfamily, hmm_id, score, evalue]
+    # sequence_id -> [ec, enzyme class, enzyme class subfamily, hmm_id, evalue, score]
     dbcan_hits = parse_hmm_hits(dbcan)
-    # sequence_id -> [ec, gene, product, hmm_id, score, evalue]
+    # sequence_id -> [ec, gene, product, hmm_id, evalue, score]
     tigrfams_hits = parse_hmm_hits(tigrfams)
 
     output_header = [
@@ -108,7 +108,7 @@ def main(kaiju, hamap, dbcan, tigrfams, diamond, output):
                 tigrfams_ec,
                 tigrfams_gene,
                 tigrfams_product,
-                tigerfams_score,
+                tigrfams_score,
                 tigrfams_evalue,
                 hamap_ec,
                 hamap_gene,
diff --git a/scripts/summarize_classifications.py b/scripts/summarize_classifications.py
index 9e2afa0..8853d67 100755
--- a/scripts/summarize_classifications.py
+++ b/scripts/summarize_classifications.py
@@ -57,71 +57,89 @@
 #     logging.debug(f"Final table length: {len(df)}")
 #     return df
 
-def single_output(df,tax_level,samples,min_evlaue,min_score,min_len):
+def single_output(df,tax_level,samples,min_evalue,min_score,min_len,group_on,output):
     split_idx = TAX_LEVELS[tax_level]
-    group_one=["tigrfams","hamap","dbcan","kaiju"]
-    for item in group_one:
-        hmm_cols = df.filter(like=item).columns.values.tolist()
+    # group_one=["tigrfams","hamap","dbcan","kaiju"]
+    # for item in group_on
+    #test_df.head()
+    if "kaiju_taxonomy" in group_on:
+        hmm_cols = df.filter(like="kaiju").columns.values.tolist()
+        test_df = df[hmm_cols+samples].copy()
+        test_df = test_df[(df["kaiju_length"] > min_len)]
+        test_df["kaiju_taxonomy"] = df["kaiju_taxonomy"].apply(lambda x: "; ".join(str(x).split("; ", split_idx)[0:split_idx]))
+        test_df = test_df.drop(columns=["kaiju_length"]).groupby(["kaiju_taxonomy"]).sum(axis=1).reset_index()
+        test_df = test_df.rename(
+            columns={"kaiju_taxonomy": f"taxonomy_{tax_level}"}
+        )
+    else:
+        hmm_cols = df.filter(like=group_on[0].lower()).columns.values.tolist()
         #TODO might not need
         test_df = df[hmm_cols+samples].copy()
-        if "kaiju" in item:
-            test_df = test_df[(df["kaiju_length"] > min_len)]
-            test_df["kaiju_taxonomy"] = df["kaiju_taxonomy"].apply(lambda x: "; ".join(x.split("; ", split_idx)[0:split_idx]))
-            test_df = test_df.drop(columns=["kaiju_length"]).groupby(["kaiju_taxonomy"]).sum(axis=1).reset_index()
-            test_df = test_df.rename(
-                columns={"kaiju_taxonomy": f"taxonomy_{tax_level}"}
-            )
-        else:
-            metric_cols = test_df.filter(items=[item+"_score",item+"_evalue"]).columns.values.tolist()
-            test_df = test_df[(test_df[item+"_score"] > min_score) & (test_df[item+"_evalue"] < min_evalue)]
-            test_df = test_df.drop(columns=metric_cols).groupby(hmm_cols[0:3]).sum(axis=1).reset_index().head()
-        test_df.to_csv(item+output, sep="\t", index=False)
+        metric_cols = test_df.filter(items=[group_on[0]+"_score",group_on[0]+"_evalue"]).columns.values.tolist()
+        test_df = test_df[(test_df[group_on[0].lower()+"_score"] > min_score) & (test_df[group_on[0].lower()+"_evalue"] < min_evalue)]
+        test_df = test_df.drop(columns=metric_cols).groupby(hmm_cols[0:3]).sum(axis=1).reset_index()
+    test_df.to_csv(output, sep="\t", index=False)
 
 
-def combined_output(df,tax_level,samples,min_evalue,min_score,min_len):
+def combined_output(df,tax_level,samples,min_evalue,min_score,min_len,group_on,output):
     split_idx = TAX_LEVELS[tax_level]
-    group_one=["tigrfams","hamap","dbcan"]
+    # group_one=["tigrfams","hamap","dbcan"]
     kaiju_cols = df.filter(like="kaiju").columns.values.tolist()
-    for item in group_one:
-        hmm_cols = df.filter(like=item).columns.values.tolist()
-        #TODO might not need
-        test_df = df[kaiju_cols+hmm_cols+samples].copy()
-        test_df = test_df[(df["kaiju_length"] > min_len) & (test_df[item+"_score"] > min_score) & (test_df[item+"_evalue"] < min_evalue)]
-        test_df["kaiju_taxonomy"] = df["kaiju_taxonomy"].apply(lambda x: "; ".join(x.split("; ", split_idx)[0:split_idx]))
-        metric_cols = test_df.filter(items=[item+"_score",item+"_evalue"]).columns.values.tolist()
-        metric_cols.append("kaiju_length")
-        test_df = test_df.drop(columns=metric_cols).groupby(["kaiju_taxonomy"]+hmm_cols[0:3]).sum(axis=1).reset_index()
-        test_df = test_df.rename(
-            columns={"kaiju_taxonomy": f"taxonomy_{tax_level}"}
-        )
-        test_df.to_csv("taxonomy_"+item, sep="\t", index=False)
+    # for item in group_one:
+    hmm_cols = df.filter(like=group_on[0].lower()).columns.values.tolist()
+    print(samples)
+    print(group_on[0])
+    #TODO might not need
+    test_df = df[kaiju_cols+hmm_cols+samples].copy()
+    test_df = test_df[(df["kaiju_length"] > min_len) & (test_df[group_on[0].lower()+"_score"] > min_score) & (test_df[group_on[0].lower()+"_evalue"] < min_evalue)]
+    test_df["kaiju_taxonomy"] = df["kaiju_taxonomy"].apply(lambda x: "; ".join(str(x).split("; ", split_idx)[0:split_idx]))
+    metric_cols = test_df.filter(items=[group_on[0].lower()+"_score",group_on[0].lower()+"_evalue"]).columns.values.tolist()
+    print(metric_cols)
+    metric_cols.append("kaiju_length")
+    test_df = test_df.drop(columns=metric_cols).groupby(["kaiju_taxonomy"]+hmm_cols[0:3]).sum(axis=1).reset_index()
+    test_df = test_df.rename(
+        columns={"kaiju_taxonomy": f"taxonomy_{tax_level}"}
+    )
+    test_df.to_csv(output, sep="\t", index=False)
 
 
-def main(output, table, tax_level, min_evalue, min_score, min_len):
-    df = pd.read_table(table)
+def main(output, table, tax_level, min_evalue, min_score, min_len,group_on):
+    df = pd.read_table(table,low_memory=False)
+    col_names = [
+        "seq","kaiju_length","kaiju_taxonomy", "tigrfams_ec", "tigrfams_gene", "tigrfams_product",
+        "tigrfams_score", "tigrfams_evalue", "hamap_ec", "hamap_gene",
+        "hamap_product", "hamap_score", "hamap_evalue", "dbcan_ec",
+        "dbcan_enzyme_class", "dbcan_enzyme_class_subfamily","dbcan_score", "dbcan_evalue"
+    ]
     # remove the rows w/ no information
-    df = df.query("kaiju_length != 0")
-    samples = df.filter(regex="[0-9]+").columns.values.tolist()
-    split_idx = TAX_LEVELS[tax_level]
-    group_one=["tigrfams","hamap","dbcan","kaiju"]
-    for item in group_one:
-        print(item)
-        use_these = df.filter(like=item).columns.values.tolist()
-        print(use_these)
-        #TODO might not need
-        test_df = df[use_these+samples].copy()
-        if "kaiju" in item:
-            test_df = test_df[(df["kaiju_length"] > min_len)]
-            test_df["kaiju_taxonomy"] = df["kaiju_taxonomy"].apply(lambda x: "; ".join(x.split("; ", split_idx)[0:split_idx]))
-            test_df = test_df.drop(columns=["kaiju_length"]).groupby(["kaiju_taxonomy"]).sum(axis=1).reset_index()
-            test_df = test_df.rename(
-                columns={"kaiju_taxonomy": f"taxonomy_{tax_level}"}
-            )
-        else:
-            metric_cols = test_df.filter(items=[item+"_score",item+"_evalue"]).columns.values.tolist()
-            test_df = test_df[(test_df[item+"_score"] > min_score) & (test_df[item+"_evalue"] < min_evalue)]
-            test_df = test_df.drop(columns=metric_cols).groupby(use_these[0:3]).sum(axis=1).reset_index().head()
-        test_df.to_csv(item+output, sep="\t", index=False)
+    # df = df.dropna(
+    # subset= col_names,
+    # thresh=2)
+    samples = [c for c in df.columns.values.tolist() if c not in col_names]
+    if len(group_on) > 1:
+        combined_output(df,tax_level,samples,min_evalue, min_score, min_len, group_on,output)
+    else:
+        single_output(df,tax_level,samples,min_evalue,min_score,min_len,group_on,output)
+    # split_idx = TAX_LEVELS[tax_level]
+    # group_one=["tigrfams","hamap","dbcan","kaiju"]
+    # for item in group_one:
+    #     print(item)
+    #     use_these = df.filter(like=item).columns.values.tolist()
+    #     print(use_these)
+    #     #TODO might not need
+    #     test_df = df[use_these+samples].copy()
+    #     if "kaiju" in item:
+    #         test_df = test_df[(df["kaiju_length"] > min_len)]
+    #         test_df["kaiju_taxonomy"] = df["kaiju_taxonomy"].apply(lambda x: "; ".join(x.split("; ", split_idx)[0:split_idx]))
+    #         test_df = test_df.drop(columns=["kaiju_length"]).groupby(["kaiju_taxonomy"]).sum(axis=1).reset_index()
+    #         test_df = test_df.rename(
+    #             columns={"kaiju_taxonomy": f"taxonomy_{tax_level}"}
+    #         )
+    #     else:
+    #         metric_cols = test_df.filter(items=[item+"_score",item+"_evalue"]).columns.values.tolist()
+    #         test_df = test_df[(test_df[item+"_score"] > min_score) & (test_df[item+"_evalue"] < min_evalue)]
+    #         test_df = test_df.drop(columns=metric_cols).groupby(use_these[0:3]).sum(axis=1).reset_index().head()
+    #     test_df.to_csv(item+output, sep="\t", index=False)
     # tax_split_level = TAX_LEVELS[tax_level]
     # sample_df = None
     # logging.debug(f"Preparing to parse {len(tables)} tables")
@@ -169,7 +187,7 @@ def main(output, table, tax_level, min_evalue, min_score, min_len):
     )
     parser.add_argument(
         "--min-evalue",
-        type=int,
+        type=float,
         default=0.001,
         help="lowest evalue to retain per sequence per sample",
     )
@@ -181,17 +199,17 @@ def main(output, table, tax_level, min_evalue, min_score, min_len):
     )
     parser.add_argument(
         "--min-len",
-        type=int,
+        type=float,
         default=0.001,
         help="minimum kaiju alignment length to retain",
     )
-    # parser.add_argument(
-    #     "--group-on",
-    #     # choices=["kaiju_classification", "ko", "ec", "product"],
-    #     nargs="+",
-    #     default="kaiju_taxonomy",
-    #     help="cluster tables on taxonomy and/or function; headers must match in tables",
-    # )
+    parser.add_argument(
+        "--group-on",
+        # choices=["kaiju_classification", "ko", "ec", "product"],
+        nargs="+",
+        default="kaiju_taxonomy",
+        help="cluster tables on taxonomy and/or function; headers must match in tables",
+    )
     parser.add_argument(
         "--verbose", action="store_true", help="increase output verbosity"
     )
@@ -205,5 +223,5 @@ def main(output, table, tax_level, min_evalue, min_score, min_len):
         args.min_evalue,
         args.min_score,
         args.min_len,
-        # args.group_on,
+        args.group_on,
     )