diff --git a/Makefile b/Makefile index 73b5966..9c08621 100644 --- a/Makefile +++ b/Makefile @@ -32,3 +32,10 @@ generate_queries: # example: make generate_html_templates ap=dqgen/resources/aps/owl-core.csv output=./output generate_html_templates: @ python -m dqgen.entrypoints.cli.generate_html_template $(ap) $(output) + +#----------------------------------------------------------------------------- +# Generator commands +#----------------------------------------------------------------------------- +# example: make generate_asciidoc_templates ap=dqgen/resources/aps/owl-core.csv output=./output +generate_asciidoc_templates: + @ python -m dqgen.entrypoints.cli.generate_asciidoc_template $(ap) $(output) diff --git a/dqgen/entrypoints/cli/generate_asciidoc_template.py b/dqgen/entrypoints/cli/generate_asciidoc_template.py new file mode 100644 index 0000000..4155834 --- /dev/null +++ b/dqgen/entrypoints/cli/generate_asciidoc_template.py @@ -0,0 +1,21 @@ +#!/usr/bin/python3 + +# Date: 2024 +# Author: Generated for AsciiDoc template support +import pathlib + +import click + +from dqgen.services.asciidoc_templates_generator import generate_asciidoc_templates_from_csv + + +@click.command() +@click.argument("file_path", type=click.Path(exists=True, dir_okay=False)) +@click.argument("output_folder", type=click.Path(dir_okay=True, file_okay=False)) +def generate_asciidoc_templates(file_path, output_folder): + generate_asciidoc_templates_from_csv(pathlib.Path(file_path), pathlib.Path(output_folder)) + + +if __name__ == '__main__': + generate_asciidoc_templates() + diff --git a/dqgen/services/__init__.py b/dqgen/services/__init__.py index e7cbdfc..684f144 100644 --- a/dqgen/services/__init__.py +++ b/dqgen/services/__init__.py @@ -13,8 +13,10 @@ from jinja2 import Environment, PackageLoader HTML_TEMPLATES = Environment(loader=PackageLoader("dqgen.resources", "html_templates")) +ASCII_DOC_TEMPLATES = Environment(loader=PackageLoader("dqgen.resources", "asciidoc_templates")) QUERIES_TEMPLATES = Environment(loader=PackageLoader("dqgen.resources", "query_templates")) PATH_TO_STATIC_FOLDER = pathlib.Path(__file__).parent.parent / "resources" / "html_templates" / "static" +PATH_TO_ASCIIDOC_STATIC_FOLDER = pathlib.Path(__file__).parent.parent / "resources" / "asciidoc_templates" / "static" CLASSES_OPERATION_TEMPLATE_MAPPING = { "added_instance": QUERIES_TEMPLATES.get_template("instance_additions.rq"), @@ -82,6 +84,9 @@ TEMPLATE_AND_HTML_FILE_NAME_MAPPING = {"main.html": HTML_TEMPLATES.get_template("main.jinja2"), "statistics.html": HTML_TEMPLATES.get_template("statistics.jinja2")} +TEMPLATE_AND_ASCIIDOC_FILE_NAME_MAPPING = {"main.adoc": ASCII_DOC_TEMPLATES.get_template("main.jinja2"), + "statistics.adoc": ASCII_DOC_TEMPLATES.get_template("statistics.jinja2")} + MULTI_LANGUAGES = ["en", "fr", "de", "es"] SINGLE_LANGUAGE = ["en"] diff --git a/dqgen/services/asciidoc_generator.py b/dqgen/services/asciidoc_generator.py new file mode 100644 index 0000000..6a61066 --- /dev/null +++ b/dqgen/services/asciidoc_generator.py @@ -0,0 +1,35 @@ +from jinja2 import Template + +from dqgen.adapters import template_builder +from dqgen.services import QUERY_FALLBACK_LANGUAGES +from dqgen.services.base_generator import BaseGenerator + + +class AsciiDocGenerator(BaseGenerator): + """ + This class will generate an AsciiDoc template file from an AsciiDoc meta-template + """ + def __init__(self, cls: str, operation: str, output_folder_path: str, template: Template, prop: str = None, + object_property: str = None, new_version_graph: str = None, old_version_graph: str = None, + version_history_graph: str = None, languages: list = QUERY_FALLBACK_LANGUAGES, class_name: str = "", prop_name: str = "", + obj_prop_name: str = ""): + super().__init__(cls, operation, output_folder_path, template, prop, object_property, new_version_graph, + old_version_graph, version_history_graph, languages) + self.file_extension = "adoc" + self.class_name = class_name + self.prop_name = prop_name + self.obj_prop_name = obj_prop_name + + def build_template(self): + """ + This method builds a desired AsciiDoc template from the a meta-template + :return: AsciiDoc template + """ + query_file = self.build_file_name(file_extension='rq') + operation = self.operation.split("_")[0] + return template_builder.build_html_template(jinja2_template=self.template, query_file=query_file, + operation=operation, cls=self.cls, prop=self.prop, + obj_prop=self.object_property, class_name=self.class_name, + prop_name=self.prop_name, + obj_prop_name=self.obj_prop_name) + diff --git a/dqgen/services/asciidoc_templates_generator.py b/dqgen/services/asciidoc_templates_generator.py new file mode 100644 index 0000000..5bdc5b9 --- /dev/null +++ b/dqgen/services/asciidoc_templates_generator.py @@ -0,0 +1,138 @@ +#!/usr/bin/python3 + +# asciidoc_templates_generator.py +# Date: 2024 +# Author: Generated for AsciiDoc template support +import logging +import pathlib +from shutil import copytree, copyfile +from pathlib import Path + +import numpy as np +import pandas as pd +from dqgen.adapters.ap_reader import read_ap_from_csv + +from dqgen.services import INSTANCE_OPERATIONS, PROPERTIES_OPERATIONS, REIFIED_PROPERTIES_OPERATIONS, ASCII_DOC_TEMPLATES, \ + PATH_TO_ASCIIDOC_STATIC_FOLDER, TEMPLATE_AND_ASCIIDOC_FILE_NAME_MAPPING +from dqgen.services.asciidoc_generator import AsciiDocGenerator +from dqgen.services.templates_data_source_builder import build_datasource_for_template, camel_case_to_words +from dqgen.services.validate_application_profile import validate_application_profile + + +def generate_class_level_asciidoc_templates(processed_csv_file: pd.DataFrame, asciidoc_output_folder_path): + """ + generate AsciiDoc templates for each class in the configuration CSV. + """ + + for cls in processed_csv_file["class"].unique(): + for operation in INSTANCE_OPERATIONS: + class_name = cls.split(":")[1] + class_folder_name = class_name.lower() + output_folder_path = asciidoc_output_folder_path + "/" + class_folder_name + pathlib.Path(output_folder_path).mkdir(parents=True, exist_ok=True) + AsciiDocGenerator(cls=cls, operation=operation, + class_name=camel_case_to_words(class_name).title(), + output_folder_path=output_folder_path, + template=ASCII_DOC_TEMPLATES.get_template("instance.jinja2")).to_file() + logging.info("Generated instance AsciiDoc templates ...") + + +def generate_property_level_asciidoc_templates(processed_csv_file: pd.DataFrame, asciidoc_output_folder_path): + """ + generate AsciiDoc template for data properties and their values for each instance in the configuration CSV + """ + for index, row in processed_csv_file.iterrows(): + + if not row["object property"]: + for operation in PROPERTIES_OPERATIONS: + class_folder_name = row["class"].split(":")[1].lower() + if row["property group"] and row["property group"] is not np.NaN: + property_group_folder = row["property group"].replace(" ", "_") + output_folder_path = asciidoc_output_folder_path + "/" + class_folder_name + "/" + property_group_folder + else: + output_folder_path = asciidoc_output_folder_path + "/" + class_folder_name + pathlib.Path(output_folder_path).mkdir(parents=True, exist_ok=True) + AsciiDocGenerator(cls=row["class"], + prop=row["property"], + prop_name=camel_case_to_words(row["property"].split(":")[1]).lower(), + operation=operation, + output_folder_path=output_folder_path, + template=ASCII_DOC_TEMPLATES.get_template("property.jinja2")).to_file() + + logging.info("Generated property AsciiDoc templates ...") + + +def generate_reified_property_level_asciidoc_templates(processed_csv_file: pd.DataFrame, asciidoc_output_folder_path): + """ + generate AsciiDoc template of reified structures for each instance in the configuration CSV + """ + for index, row in processed_csv_file.iterrows(): + if row["object property"]: + for operation in REIFIED_PROPERTIES_OPERATIONS: + class_folder_name = row["class"].split(":")[1].lower() + if row["property group"] and row["property group"] is not np.NaN: + property_group_folder = row["property group"].replace(" ", "_") + output_folder_path = asciidoc_output_folder_path + "/" + class_folder_name + "/" + property_group_folder + else: + output_folder_path = asciidoc_output_folder_path + "/" + class_folder_name + pathlib.Path(output_folder_path).mkdir(parents=True, exist_ok=True) + AsciiDocGenerator(cls=row["class"], + prop=row["property"], + object_property=row["object property"], + prop_name=camel_case_to_words(row["property"].split(":")[1]).lower(), + operation=operation, + output_folder_path=output_folder_path, + template=ASCII_DOC_TEMPLATES.get_template("reified_property.jinja2")).to_file() + + logging.info("Generated reified property AsciiDoc templates ...") + + +def generate_asciidoc_template(processed_csv_file: pd.DataFrame, asciidoc_output_folder_path, template, file_name): + """ + Builds an AsciiDoc page and puts into a specified folder + :param file_name: + :param template: + :param processed_csv_file: + :param asciidoc_output_folder_path: + :return: + """ + + data_source = build_datasource_for_template(processed_csv_file=processed_csv_file, file_extension='adoc') + build_template = template.stream(data_source=data_source) + build_template.dump(asciidoc_output_folder_path + "/" + file_name) + + +def copy_files_from_static_folder(file_list: list, destination_folder: str): + """ + Copy the files from the static folder to a specified destination + :param file_list: + :param destination_folder: + """ + for file in file_list: + file_name = file.name + copyfile(file, destination_folder + "/" + file_name) + + +def generate_asciidoc_templates_from_csv(ap_file_path: pathlib.Path, output_base_dir: pathlib.Path): + """ + generates a set of AsciiDoc templates from the configuration CSV + """ + processed_csv_file = read_ap_from_csv(ap_file_path) + validate_application_profile(application_profile_df=processed_csv_file) + output = Path(output_base_dir) / ap_file_path.stem + asciidoc_output = output / "asciidoc" + asciidoc_output.mkdir(parents=True, exist_ok=True) + + generate_class_level_asciidoc_templates(processed_csv_file=processed_csv_file, asciidoc_output_folder_path=str(asciidoc_output)) + generate_property_level_asciidoc_templates(processed_csv_file=processed_csv_file, + asciidoc_output_folder_path=str(asciidoc_output)) + generate_reified_property_level_asciidoc_templates(processed_csv_file=processed_csv_file, + asciidoc_output_folder_path=str(asciidoc_output)) + + for file_name, template in TEMPLATE_AND_ASCIIDOC_FILE_NAME_MAPPING.items(): + generate_asciidoc_template(processed_csv_file=processed_csv_file, + asciidoc_output_folder_path=str(asciidoc_output), template=template, file_name=file_name) + + # copy static files into the generated asciidoc output directory + copytree(PATH_TO_ASCIIDOC_STATIC_FOLDER, str(asciidoc_output), dirs_exist_ok=True) + diff --git a/dqgen/services/html_templates_generator.py b/dqgen/services/html_templates_generator.py index cab60f7..54fea19 100644 --- a/dqgen/services/html_templates_generator.py +++ b/dqgen/services/html_templates_generator.py @@ -6,9 +6,8 @@ # Email: costezki.eugen@gmail.com import logging import pathlib -from distutils.dir_util import copy_tree +from shutil import copytree, copyfile from pathlib import Path -from shutil import copyfile import numpy as np import pandas as pd @@ -18,7 +17,7 @@ from dqgen.services import INSTANCE_OPERATIONS, PROPERTIES_OPERATIONS, REIFIED_PROPERTIES_OPERATIONS, HTML_TEMPLATES, \ PATH_TO_STATIC_FOLDER, TEMPLATE_AND_HTML_FILE_NAME_MAPPING from dqgen.services.html_generator import HtmlGenerator -from dqgen.services.html_templates_data_source_builder import build_datasource_for_html_template, camel_case_to_words +from dqgen.services.templates_data_source_builder import build_datasource_for_template, camel_case_to_words from dqgen.services.validate_application_profile import validate_application_profile @@ -100,7 +99,7 @@ def generate_html_template(processed_csv_file: pd.DataFrame, html_output_folder_ :return: """ - data_source = build_datasource_for_html_template(processed_csv_file=processed_csv_file) + data_source = build_datasource_for_template(processed_csv_file=processed_csv_file) build_template = template.stream(data_source=data_source) build_template.dump(html_output_folder_path + "/" + file_name) @@ -136,4 +135,5 @@ def generate_html_templates_from_csv(ap_file_path: pathlib.Path, output_base_dir generate_html_template(processed_csv_file=processed_csv_file, html_output_folder_path=str(html_output), template=template, file_name=file_name) - copy_tree(PATH_TO_STATIC_FOLDER, str(html_output)) + # copy static files into the generated html output directory + copytree(PATH_TO_STATIC_FOLDER, str(html_output), dirs_exist_ok=True) diff --git a/dqgen/services/html_templates_data_source_builder.py b/dqgen/services/templates_data_source_builder.py similarity index 84% rename from dqgen/services/html_templates_data_source_builder.py rename to dqgen/services/templates_data_source_builder.py index ee032bc..5979c87 100644 --- a/dqgen/services/html_templates_data_source_builder.py +++ b/dqgen/services/templates_data_source_builder.py @@ -16,7 +16,7 @@ def camel_case_to_words(name: str): return ' '.join(words) -def generate_file_data(cls, class_folder_name, operation, prop=None, obj_prop=None): +def generate_file_data(cls, class_folder_name, operation, prop=None, obj_prop=None, file_extension="html"): """ This method will return a query file path and a count query file name :param cls: @@ -24,12 +24,13 @@ def generate_file_data(cls, class_folder_name, operation, prop=None, obj_prop=No :param operation: :param prop: :param obj_prop: + :param file_extension: extension to use for the generated files (html/adoc) :return: """ file_path = make_file_path(output_folder_path=class_folder_name, file_name=make_file_name(operation=operation, cls=cls, - file_extension="html", + file_extension=file_extension, prop=prop, obj_prop=obj_prop)) count_file_name = make_file_name(operation="count_" + operation, cls=cls, @@ -39,7 +40,7 @@ def generate_file_data(cls, class_folder_name, operation, prop=None, obj_prop=No def iterate_operations(operation_list, file_paths, count_queries, cls, class_folder_name, prop=None, - obj_prop=None): + obj_prop=None, file_extension="html"): """ This method will iterate through a list of operations. It will put one query file path in the file paths list and one count query name in the count query list @@ -50,22 +51,25 @@ def iterate_operations(operation_list, file_paths, count_queries, cls, class_fol :param class_folder_name: :param prop: :param obj_prop: + :param file_extension: extension to use for generated file paths :return: """ for operation in operation_list: file_path, count_file_name = generate_file_data(cls=cls, class_folder_name=class_folder_name, - operation=operation, prop=prop, obj_prop=obj_prop) + operation=operation, prop=prop, obj_prop=obj_prop, + file_extension=file_extension) file_paths.append(file_path) count_queries.append(count_file_name) -def add_instance_changes(data_source, cls, class_name, class_folder_name): +def add_instance_changes(data_source, cls, class_name, class_folder_name, file_extension="html"): """ This method will build the necessary data at the class level and it will add it to a data source dictionary :param data_source: :param cls: :param class_name: :param class_folder_name: + :param file_extension: extension to use for generated file paths :return: """ if "instance_changes" not in data_source[cls].keys(): @@ -75,7 +79,8 @@ def add_instance_changes(data_source, cls, class_name, class_folder_name): instance_count_queries = [] iterate_operations(operation_list=INSTANCE_OPERATIONS, file_paths=instance_file_paths, - count_queries=instance_count_queries, cls=cls, class_folder_name=class_folder_name) + count_queries=instance_count_queries, cls=cls, class_folder_name=class_folder_name, + file_extension=file_extension) data_source[cls]["instance_changes"].update( {"files": instance_file_paths, "count_queries": instance_count_queries}) @@ -104,10 +109,11 @@ def add_prop_group_details(data_source, prop_group_value, cls, prop_name, prop_f {prop_name: {"files": count_prop_file_paths, "label": prop_name}}) -def build_datasource_for_html_template(processed_csv_file: pd.DataFrame) -> dict: +def build_datasource_for_template(processed_csv_file: pd.DataFrame, file_extension: str = "html") -> dict: """ This method will build a data source dictionary from a given application profile dataframe :param processed_csv_file: + :param file_extension: extension to use for generated file paths (default: html) :return: """ data_source = {} @@ -121,7 +127,7 @@ def build_datasource_for_html_template(processed_csv_file: pd.DataFrame) -> dict data_source[row["class"]] = {"label": camel_case_to_words(class_name).title(), "prop_groups": {}} add_instance_changes(data_source=data_source, cls=row["class"], class_name=class_name, - class_folder_name=class_folder_name) + class_folder_name=class_folder_name, file_extension=file_extension) prop_file_paths = [] count_queries = [] @@ -130,16 +136,20 @@ def build_datasource_for_html_template(processed_csv_file: pd.DataFrame) -> dict prop_name = row["property"] iterate_operations(operation_list=PROPERTIES_OPERATIONS, file_paths=prop_file_paths, count_queries=count_queries, cls=row["class"], - class_folder_name=f'{class_folder_name}/{prop_group_folder}', prop=row["property"]) + class_folder_name=f'{class_folder_name}/{prop_group_folder}', prop=row["property"], + file_extension=file_extension) else: prop_name = row["property"] + "/" + row["object property"] iterate_operations(operation_list=REIFIED_PROPERTIES_OPERATIONS, file_paths=prop_file_paths, count_queries=count_queries, cls=row["class"], class_folder_name=f'{class_folder_name}/{prop_group_folder}', prop=row["property"], - obj_prop=row["object property"]) + obj_prop=row["object property"], file_extension=file_extension) add_prop_group_details(data_source=data_source, prop_group_value=row["property group"], cls=row["class"], prop_name=prop_name, prop_file_paths=prop_file_paths, count_prop_file_paths=count_queries) return data_source + +# Backwards compatibility alias +build_datasource_for_html_template = build_datasource_for_template diff --git a/tests/unit/test_asciidoc_generator.py b/tests/unit/test_asciidoc_generator.py new file mode 100644 index 0000000..fa3e510 --- /dev/null +++ b/tests/unit/test_asciidoc_generator.py @@ -0,0 +1,37 @@ +import pathlib +from pathlib import Path + +import pytest +from jinja2 import Template + +from dqgen.adapters.resource_fetcher import get_file_content +from dqgen.services import ASCII_DOC_TEMPLATES +from dqgen.services.asciidoc_generator import AsciiDocGenerator +from dqgen.services.asciidoc_templates_generator import generate_asciidoc_templates_from_csv +from tests.unit.test_queries_generator import PATH_TO_APS + + +def test_instance_asciidoc_generator(tmp_path): + expected_text = """== Added concepts""" + asciidoc_generator = AsciiDocGenerator(cls="skos:Concept", operation="added_instance", class_name="concept", + output_folder_path=str(tmp_path), + template=ASCII_DOC_TEMPLATES.get_template("instance.jinja2")) + + generated_file_path = asciidoc_generator.build_file_path() + asciidoc_generator.to_file() + + generated_file_content = get_file_content(generated_file_path) + assert Path(generated_file_path).is_file() + assert isinstance(generated_file_content, str) + assert expected_text in generated_file_content + + +def test_generate_asciidoc_templates_from_csv(tmp_path): + + generate_asciidoc_templates_from_csv(ap_file_path=PATH_TO_APS / "src_ap_mod.csv", output_base_dir=tmp_path,) + assert pathlib.Path(tmp_path).is_dir() + assert pathlib.Path(tmp_path / "src_ap_mod" / "asciidoc").is_dir() + assert pathlib.Path(tmp_path / "src_ap_mod" / "asciidoc" / "main.adoc").is_file() + + with pytest.raises(ValueError): + generate_asciidoc_templates_from_csv(ap_file_path=PATH_TO_APS / "skos_core.csv", output_base_dir=tmp_path) diff --git a/tests/unit/test_html_data_source_builder.py b/tests/unit/test_data_source_builder.py similarity index 58% rename from tests/unit/test_html_data_source_builder.py rename to tests/unit/test_data_source_builder.py index 08a620f..a32f691 100644 --- a/tests/unit/test_html_data_source_builder.py +++ b/tests/unit/test_data_source_builder.py @@ -1,7 +1,7 @@ import pathlib from dqgen.adapters.ap_reader import read_ap_from_csv -from dqgen.services.html_templates_data_source_builder import camel_case_to_words, build_datasource_for_html_template, \ +from dqgen.services.templates_data_source_builder import camel_case_to_words, build_datasource_for_template, \ generate_file_data, iterate_operations, add_instance_changes, add_prop_group_details @@ -50,11 +50,47 @@ def test_add_prop_group_details(): "preferred labels"].keys()) -def test_build_datasource_for_html_template(): +def test_build_datasource_for_hmtl_template(): path_to_csv_file = pathlib.Path(__file__).parent.parent / "test_data" / "aps" / "src_ap_mod.csv" df = read_ap_from_csv(path_to_csv_file) - data_source = build_datasource_for_html_template(processed_csv_file=df) + data_source = build_datasource_for_template(processed_csv_file=df) print(data_source) assert isinstance(data_source, dict) assert 'skos:Concept', 'skos:Collection' in data_source.keys() assert "label", "prop_groups" in data_source["skos:Concept"].keys() + + # check at least one class-level instance file uses .html + instance_files = data_source.get("skos:Concept", {}).get("instance_changes", {}).get("files", []) + assert any(f.endswith('.html') for f in instance_files) + + # check a property-level file uses .html + prop_groups = data_source.get("skos:Concept", {}).get("prop_groups", {}) + found = False + for group in prop_groups.values(): + paths = group.get("query_template_file_paths", []) + if any(p.endswith('.html') for p in paths): + found = True + break + assert found, "No .html file paths found in property groups" + + +def test_build_datasource_for_adoc_template(): + # Ensure the data source builder emits .adoc file paths when requested + path_to_csv_file = pathlib.Path(__file__).parent.parent / "test_data" / "aps" / "src_ap_mod.csv" + df = read_ap_from_csv(path_to_csv_file) + data_source = build_datasource_for_template(processed_csv_file=df, file_extension="adoc") + + # check at least one class-level instance file uses .adoc + instance_files = data_source.get("skos:Concept", {}).get("instance_changes", {}).get("files", []) + assert any(f.endswith('.adoc') for f in instance_files) + + # check a property-level file uses .adoc + prop_groups = data_source.get("skos:Concept", {}).get("prop_groups", {}) + # find any prop group entry and check its query template file paths + found = False + for group in prop_groups.values(): + paths = group.get("query_template_file_paths", []) + if any(p.endswith('.adoc') for p in paths): + found = True + break + assert found, "No .adoc file paths found in property groups"