Skip to content

Commit 059c7e5

Browse files
committed
continuing to model knowledge graph
1 parent c389a17 commit 059c7e5

File tree

1 file changed

+34
-5
lines changed

1 file changed

+34
-5
lines changed

archive_query_log/export/knowledge_graph.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535

3636

37-
# class WebSearchResultBlock(UuidBaseDocument):
37+
# class ResultBlock():
3838
# last_modified: DefaultDate
3939
# archive: InnerArchive
4040
# provider: InnerProvider
@@ -43,8 +43,6 @@
4343
# content: Text
4444
# rank: Integer
4545
# url: HttpUrl | None = None
46-
# title: Text | None = None
47-
# text: Text | None = None
4846
# parser: InnerParser | None = None
4947
# should_fetch_captures: bool = True
5048
# last_fetched_captures: Date | None = None
@@ -55,6 +53,10 @@
5553
# warc_location_after_serp: WarcLocation | None = None
5654
# warc_downloader_after_serp: InnerDownloader | None = None
5755

56+
# class WebSearchResultBlock(UuidBaseDocument):
57+
# title: Text | None = None
58+
# text: Text | None = None
59+
5860
# class Index:
5961
# settings = {
6062
# "number_of_shards": 20,
@@ -63,6 +65,15 @@
6365

6466

6567

68+
# class SpecialContentsResultBlock(UuidBaseDocument):
69+
# text: Text | None = None
70+
71+
# class Index:
72+
# settings = {
73+
# "number_of_shards": 20,
74+
# "number_of_replicas": 2,
75+
# }
76+
6677
def iter_turtle_triples(provider: Provider) -> Iterator[tuple[str, str, str]]:
6778
entity = f"https://aql.webis.de/provider/{provider.id}"
6879

@@ -83,6 +94,7 @@ def iter_turtle_triples(archive: Archive) -> Iterator[tuple[str, str, str]]:
8394
yield(entity, "schema:name", archive.name)
8495
yield(entity, "aql:mementoAPIBaseURL", archive.memento_api_url)
8596
yield(entity, "aql:cdxAPIBaseURL", archive.cdx_api_url)
97+
# priority not modeled since grayed out
8698
#Missing aqlWikidataUrl -> to be exported with another method
8799

88100
def iter_turtle_triples(capture: Capture) -> Iterator[tuple[str, str, str]]:
@@ -114,7 +126,24 @@ def iter_turtle_triples(serp: SERP) -> Iterator[tuple[str, str, str]]:
114126

115127
for specialcontentsresultblock in serp.warc_special_contents_result_blocks:
116128
yield(entity, "schema:hasPart", f"https://aql.webis.de/specialcontentsresultblock/{specialcontentsresultblock.id}") # iterate over all specialcontentsresultblocks of a serp
117-
129+
# TODO werden hier direkt resultblocks connected oder die Kindklasse websearchresultblock/specialcontentsresultblock?
130+
131+
118132
#TODO how to model superclass of resultblock? we dont have a python class for it
119133

120-
# Model result block as superclass in python first, then in knowledgegraph
134+
# Model result block as superclass in python first, then in knowledgegraph
135+
# Update this once heinrich has pushed his code
136+
137+
138+
#TODO model WebSearchResultBlock and SpecialContentsResultBlock, as well as resultblock, as soon as heinrich has pushed his code
139+
140+
141+
142+
# def iter_turtle_triples(result: Result) -> Iterator[tuple[str, str, str]]: # TODO result doesnt exist in python classes, since results not yet downloaded
143+
# entity = f"https://aql.webis.de/result/{result.id}"
144+
145+
# yield(entity, "schema:identifier", result.id)# TODO all identifiers have been modeled, even though we didnt model grayed out values, some are grayed out why
146+
# yield(entity, "schema:url", result.url)
147+
# yield(entity, "schema:title", result.title)
148+
# yield(entity, "schema:isBasedOn", f"https://aql.webis.de/capture/{result.capture.id}")
149+

0 commit comments

Comments
 (0)