Skip to content

Commit 8cd8a2a

Browse files
committed
fix(test): fix broken AsciiDoc imports, refactor data source builder
The query files inserted within the AsciiDoc templates appeared to be HTML files. Add a `file_extension` parameter to produce whatever file extension is needed, currently either `.html` or `.adoc`. Update/add tests accordingly so such accidents can be captured in the future.
1 parent 98de660 commit 8cd8a2a

File tree

4 files changed

+69
-23
lines changed

4 files changed

+69
-23
lines changed

dqgen/services/asciidoc_templates_generator.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55
# Author: Generated for AsciiDoc template support
66
import logging
77
import pathlib
8-
from distutils.dir_util import copy_tree
8+
from shutil import copytree, copyfile
99
from pathlib import Path
10-
from shutil import copyfile
1110

1211
import numpy as np
1312
import pandas as pd
@@ -16,7 +15,7 @@
1615
from dqgen.services import INSTANCE_OPERATIONS, PROPERTIES_OPERATIONS, REIFIED_PROPERTIES_OPERATIONS, ASCII_DOC_TEMPLATES, \
1716
PATH_TO_ASCIIDOC_STATIC_FOLDER, TEMPLATE_AND_ASCIIDOC_FILE_NAME_MAPPING
1817
from dqgen.services.asciidoc_generator import AsciiDocGenerator
19-
from dqgen.services.html_templates_data_source_builder import build_datasource_for_html_template, camel_case_to_words
18+
from dqgen.services.templates_data_source_builder import build_datasource_for_template, camel_case_to_words
2019
from dqgen.services.validate_application_profile import validate_application_profile
2120

2221

@@ -98,7 +97,7 @@ def generate_asciidoc_template(processed_csv_file: pd.DataFrame, asciidoc_output
9897
:return:
9998
"""
10099

101-
data_source = build_datasource_for_html_template(processed_csv_file=processed_csv_file)
100+
data_source = build_datasource_for_template(processed_csv_file=processed_csv_file, file_extension='adoc')
102101
build_template = template.stream(data_source=data_source)
103102
build_template.dump(asciidoc_output_folder_path + "/" + file_name)
104103

@@ -134,5 +133,6 @@ def generate_asciidoc_templates_from_csv(ap_file_path: pathlib.Path, output_base
134133
generate_asciidoc_template(processed_csv_file=processed_csv_file,
135134
asciidoc_output_folder_path=str(asciidoc_output), template=template, file_name=file_name)
136135

137-
copy_tree(PATH_TO_ASCIIDOC_STATIC_FOLDER, str(asciidoc_output))
136+
# copy static files into the generated asciidoc output directory
137+
copytree(PATH_TO_ASCIIDOC_STATIC_FOLDER, str(asciidoc_output), dirs_exist_ok=True)
138138

dqgen/services/html_templates_generator.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@
66
# Email: costezki.eugen@gmail.com
77
import logging
88
import pathlib
9-
from distutils.dir_util import copy_tree
9+
from shutil import copytree, copyfile
1010
from pathlib import Path
11-
from shutil import copyfile
1211

1312
import numpy as np
1413
import pandas as pd
@@ -18,7 +17,7 @@
1817
from dqgen.services import INSTANCE_OPERATIONS, PROPERTIES_OPERATIONS, REIFIED_PROPERTIES_OPERATIONS, HTML_TEMPLATES, \
1918
PATH_TO_STATIC_FOLDER, TEMPLATE_AND_HTML_FILE_NAME_MAPPING
2019
from dqgen.services.html_generator import HtmlGenerator
21-
from dqgen.services.html_templates_data_source_builder import build_datasource_for_html_template, camel_case_to_words
20+
from dqgen.services.templates_data_source_builder import build_datasource_for_template, camel_case_to_words
2221
from dqgen.services.validate_application_profile import validate_application_profile
2322

2423

@@ -100,7 +99,7 @@ def generate_html_template(processed_csv_file: pd.DataFrame, html_output_folder_
10099
:return:
101100
"""
102101

103-
data_source = build_datasource_for_html_template(processed_csv_file=processed_csv_file)
102+
data_source = build_datasource_for_template(processed_csv_file=processed_csv_file)
104103
build_template = template.stream(data_source=data_source)
105104
build_template.dump(html_output_folder_path + "/" + file_name)
106105

@@ -136,4 +135,5 @@ def generate_html_templates_from_csv(ap_file_path: pathlib.Path, output_base_dir
136135
generate_html_template(processed_csv_file=processed_csv_file,
137136
html_output_folder_path=str(html_output), template=template, file_name=file_name)
138137

139-
copy_tree(PATH_TO_STATIC_FOLDER, str(html_output))
138+
# copy static files into the generated html output directory
139+
copytree(PATH_TO_STATIC_FOLDER, str(html_output), dirs_exist_ok=True)

dqgen/services/html_templates_data_source_builder.py renamed to dqgen/services/templates_data_source_builder.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,21 @@ def camel_case_to_words(name: str):
1616
return ' '.join(words)
1717

1818

19-
def generate_file_data(cls, class_folder_name, operation, prop=None, obj_prop=None):
19+
def generate_file_data(cls, class_folder_name, operation, prop=None, obj_prop=None, file_extension="html"):
2020
"""
2121
This method will return a query file path and a count query file name
2222
:param cls:
2323
:param class_folder_name:
2424
:param operation:
2525
:param prop:
2626
:param obj_prop:
27+
:param file_extension: extension to use for the generated files (html/adoc)
2728
:return:
2829
"""
2930
file_path = make_file_path(output_folder_path=class_folder_name,
3031
file_name=make_file_name(operation=operation,
3132
cls=cls,
32-
file_extension="html",
33+
file_extension=file_extension,
3334
prop=prop, obj_prop=obj_prop))
3435
count_file_name = make_file_name(operation="count_" + operation,
3536
cls=cls,
@@ -39,7 +40,7 @@ def generate_file_data(cls, class_folder_name, operation, prop=None, obj_prop=No
3940

4041

4142
def iterate_operations(operation_list, file_paths, count_queries, cls, class_folder_name, prop=None,
42-
obj_prop=None):
43+
obj_prop=None, file_extension="html"):
4344
"""
4445
This method will iterate through a list of operations. It will put one query file path in the file paths list
4546
and one count query name in the count query list
@@ -50,22 +51,25 @@ def iterate_operations(operation_list, file_paths, count_queries, cls, class_fol
5051
:param class_folder_name:
5152
:param prop:
5253
:param obj_prop:
54+
:param file_extension: extension to use for generated file paths
5355
:return:
5456
"""
5557
for operation in operation_list:
5658
file_path, count_file_name = generate_file_data(cls=cls, class_folder_name=class_folder_name,
57-
operation=operation, prop=prop, obj_prop=obj_prop)
59+
operation=operation, prop=prop, obj_prop=obj_prop,
60+
file_extension=file_extension)
5861
file_paths.append(file_path)
5962
count_queries.append(count_file_name)
6063

6164

62-
def add_instance_changes(data_source, cls, class_name, class_folder_name):
65+
def add_instance_changes(data_source, cls, class_name, class_folder_name, file_extension="html"):
6366
"""
6467
This method will build the necessary data at the class level and it will add it to a data source dictionary
6568
:param data_source:
6669
:param cls:
6770
:param class_name:
6871
:param class_folder_name:
72+
:param file_extension: extension to use for generated file paths
6973
:return:
7074
"""
7175
if "instance_changes" not in data_source[cls].keys():
@@ -75,7 +79,8 @@ def add_instance_changes(data_source, cls, class_name, class_folder_name):
7579
instance_count_queries = []
7680

7781
iterate_operations(operation_list=INSTANCE_OPERATIONS, file_paths=instance_file_paths,
78-
count_queries=instance_count_queries, cls=cls, class_folder_name=class_folder_name)
82+
count_queries=instance_count_queries, cls=cls, class_folder_name=class_folder_name,
83+
file_extension=file_extension)
7984

8085
data_source[cls]["instance_changes"].update(
8186
{"files": instance_file_paths, "count_queries": instance_count_queries})
@@ -104,10 +109,11 @@ def add_prop_group_details(data_source, prop_group_value, cls, prop_name, prop_f
104109
{prop_name: {"files": count_prop_file_paths, "label": prop_name}})
105110

106111

107-
def build_datasource_for_html_template(processed_csv_file: pd.DataFrame) -> dict:
112+
def build_datasource_for_template(processed_csv_file: pd.DataFrame, file_extension: str = "html") -> dict:
108113
"""
109114
This method will build a data source dictionary from a given application profile dataframe
110115
:param processed_csv_file:
116+
:param file_extension: extension to use for generated file paths (default: html)
111117
:return:
112118
"""
113119
data_source = {}
@@ -121,7 +127,7 @@ def build_datasource_for_html_template(processed_csv_file: pd.DataFrame) -> dict
121127
data_source[row["class"]] = {"label": camel_case_to_words(class_name).title(), "prop_groups": {}}
122128

123129
add_instance_changes(data_source=data_source, cls=row["class"], class_name=class_name,
124-
class_folder_name=class_folder_name)
130+
class_folder_name=class_folder_name, file_extension=file_extension)
125131

126132
prop_file_paths = []
127133
count_queries = []
@@ -130,16 +136,20 @@ def build_datasource_for_html_template(processed_csv_file: pd.DataFrame) -> dict
130136
prop_name = row["property"]
131137
iterate_operations(operation_list=PROPERTIES_OPERATIONS, file_paths=prop_file_paths,
132138
count_queries=count_queries, cls=row["class"],
133-
class_folder_name=f'{class_folder_name}/{prop_group_folder}', prop=row["property"])
139+
class_folder_name=f'{class_folder_name}/{prop_group_folder}', prop=row["property"],
140+
file_extension=file_extension)
134141
else:
135142
prop_name = row["property"] + "/" + row["object property"]
136143
iterate_operations(operation_list=REIFIED_PROPERTIES_OPERATIONS, file_paths=prop_file_paths,
137144
count_queries=count_queries, cls=row["class"],
138145
class_folder_name=f'{class_folder_name}/{prop_group_folder}', prop=row["property"],
139-
obj_prop=row["object property"])
146+
obj_prop=row["object property"], file_extension=file_extension)
140147

141148
add_prop_group_details(data_source=data_source, prop_group_value=row["property group"], cls=row["class"],
142149
prop_name=prop_name, prop_file_paths=prop_file_paths,
143150
count_prop_file_paths=count_queries)
144151

145152
return data_source
153+
154+
# Backwards compatibility alias
155+
build_datasource_for_html_template = build_datasource_for_template

tests/unit/test_html_data_source_builder.py renamed to tests/unit/test_data_source_builder.py

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pathlib
22

33
from dqgen.adapters.ap_reader import read_ap_from_csv
4-
from dqgen.services.html_templates_data_source_builder import camel_case_to_words, build_datasource_for_html_template, \
4+
from dqgen.services.templates_data_source_builder import camel_case_to_words, build_datasource_for_template, \
55
generate_file_data, iterate_operations, add_instance_changes, add_prop_group_details
66

77

@@ -50,11 +50,47 @@ def test_add_prop_group_details():
5050
"preferred labels"].keys())
5151

5252

53-
def test_build_datasource_for_html_template():
53+
def test_build_datasource_for_hmtl_template():
5454
path_to_csv_file = pathlib.Path(__file__).parent.parent / "test_data" / "aps" / "src_ap_mod.csv"
5555
df = read_ap_from_csv(path_to_csv_file)
56-
data_source = build_datasource_for_html_template(processed_csv_file=df)
56+
data_source = build_datasource_for_template(processed_csv_file=df)
5757
print(data_source)
5858
assert isinstance(data_source, dict)
5959
assert 'skos:Concept', 'skos:Collection' in data_source.keys()
6060
assert "label", "prop_groups" in data_source["skos:Concept"].keys()
61+
62+
# check at least one class-level instance file uses .html
63+
instance_files = data_source.get("skos:Concept", {}).get("instance_changes", {}).get("files", [])
64+
assert any(f.endswith('.html') for f in instance_files)
65+
66+
# check a property-level file uses .html
67+
prop_groups = data_source.get("skos:Concept", {}).get("prop_groups", {})
68+
found = False
69+
for group in prop_groups.values():
70+
paths = group.get("query_template_file_paths", [])
71+
if any(p.endswith('.html') for p in paths):
72+
found = True
73+
break
74+
assert found, "No .html file paths found in property groups"
75+
76+
77+
def test_build_datasource_for_adoc_template():
78+
# Ensure the data source builder emits .adoc file paths when requested
79+
path_to_csv_file = pathlib.Path(__file__).parent.parent / "test_data" / "aps" / "src_ap_mod.csv"
80+
df = read_ap_from_csv(path_to_csv_file)
81+
data_source = build_datasource_for_template(processed_csv_file=df, file_extension="adoc")
82+
83+
# check at least one class-level instance file uses .adoc
84+
instance_files = data_source.get("skos:Concept", {}).get("instance_changes", {}).get("files", [])
85+
assert any(f.endswith('.adoc') for f in instance_files)
86+
87+
# check a property-level file uses .adoc
88+
prop_groups = data_source.get("skos:Concept", {}).get("prop_groups", {})
89+
# find any prop group entry and check its query template file paths
90+
found = False
91+
for group in prop_groups.values():
92+
paths = group.get("query_template_file_paths", [])
93+
if any(p.endswith('.adoc') for p in paths):
94+
found = True
95+
break
96+
assert found, "No .adoc file paths found in property groups"

0 commit comments

Comments
 (0)