Skip to content

Commit 05c1da3

Browse files
authored
Add Neptune Summary API support (#457)
* Add Neptune Summary API support * Update changelog * Cleaned up mode check, added OC language options --------- Co-authored-by: Michael Chin <chnmch@amazon.com>
1 parent 39b37f5 commit 05c1da3

File tree

5 files changed

+154
-26
lines changed

5 files changed

+154
-26
lines changed

ChangeLog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Starting with v1.31.6, this file will contain a record of major features and upd
77
- Path: 04-Machine-Learning > Sample-Applications > 03-Real-Time-Fraud-Detection-Using-Inductive-Inference.ipynb
88
- New openCypher Language Tutorial notebooks
99
- Path: 06-Language-Tutorials > 02-openCypher
10+
- Added support for Neptune Summary API ([Link to PR](https://github.com/aws/graph-notebook/pull/457))
1011
- Added `--profile-misc-args` option to `%%gremlin` ([Link to PR](https://github.com/aws/graph-notebook/pull/443))
1112
- Added error messaging for incompatible host-specific `%%graph_notebok_config` parameters ([Link to PR](https://github.com/aws/graph-notebook/pull/456))
1213
- Ensure default assignments for all Gremlin nodes when using grouping ([Link to PR](https://github.com/aws/graph-notebook/pull/448))

src/graph_notebook/magics/graph_magic.py

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@
4242
from graph_notebook.neptune.client import ClientBuilder, Client, VALID_FORMATS, PARALLELISM_OPTIONS, PARALLELISM_HIGH, \
4343
LOAD_JOB_MODES, MODE_AUTO, FINAL_LOAD_STATUSES, SPARQL_ACTION, FORMAT_CSV, FORMAT_OPENCYPHER, FORMAT_NTRIPLE, \
4444
FORMAT_NQUADS, FORMAT_RDFXML, FORMAT_TURTLE, STREAM_RDF, STREAM_PG, STREAM_ENDPOINTS, \
45-
NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, STATISTICS_LANGUAGE_INPUTS, STATISTICS_MODES
45+
NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, \
46+
STATISTICS_LANGUAGE_INPUTS, STATISTICS_MODES, SUMMARY_MODES
4647
from graph_notebook.network import SPARQLNetwork
4748
from graph_notebook.network.gremlin.GremlinNetwork import parse_pattern_list_str, GremlinNetwork
4849
from graph_notebook.visualization.rows_and_columns import sparql_get_rows_and_columns, opencypher_get_rows_and_columns
@@ -403,26 +404,61 @@ def statistics(self, line, local_ns: dict = None):
403404
help=f'The language endpoint to use. Valid inputs: {STATISTICS_LANGUAGE_INPUTS}. '
404405
f'Default: propertygraph.',
405406
choices=STATISTICS_LANGUAGE_INPUTS)
406-
parser.add_argument('-m', '--mode', type=str, default='status',
407+
parser.add_argument('-m', '--mode', type=str, default='',
407408
help=f'The action to perform on the statistics endpoint. Valid inputs: {STATISTICS_MODES}. '
408-
f'Default: status')
409+
f'Default: `basic` if `--summary` is specified, otherwise `status`.')
410+
parser.add_argument('--summary', action='store_true', default=False, help="Retrieves the graph summary.")
409411
parser.add_argument('--silent', action='store_true', default=False, help="Display no output.")
410412
parser.add_argument('--store-to', type=str, default='')
411413

412414
args = parser.parse_args(line.split())
413-
414-
if args.mode not in STATISTICS_MODES:
415-
print(f'Invalid mode. Please specify one of: {STATISTICS_MODES}, or leave blank to retrieve status.')
415+
mode = args.mode
416+
417+
if not mode:
418+
mode = 'basic' if args.summary else 'status'
419+
elif (args.summary and mode not in SUMMARY_MODES) or (not args.summary and mode not in STATISTICS_MODES):
420+
err_endpoint_type, err_mode_list, err_default_mode = ("summary", SUMMARY_MODES[1:], "basic summary view") \
421+
if args.summary else ("statistics", STATISTICS_MODES[1:], "status")
422+
print(f'Invalid {err_endpoint_type} mode. Please specify one of: {err_mode_list}, '
423+
f'or leave blank to retrieve {err_default_mode}.')
416424
return
417425

418-
statistics_res = self.client.statistics(args.language, args.mode)
426+
statistics_res = self.client.statistics(args.language, args.summary, mode)
419427
statistics_res.raise_for_status()
420-
res = statistics_res.json()
428+
statistics_res_json = statistics_res.json()
421429
if not args.silent:
422-
print(json.dumps(res, indent=2))
430+
print(json.dumps(statistics_res_json, indent=2))
423431

424432
if args.store_to != '' and local_ns is not None:
425-
local_ns[args.store_to] = res
433+
local_ns[args.store_to] = statistics_res_json
434+
435+
@line_magic
436+
def summary(self, line, local_ns: dict = None):
437+
parser = argparse.ArgumentParser()
438+
parser.add_argument('language', nargs='?', type=str.lower, default="propertygraph",
439+
help=f'The language endpoint to use. Valid inputs: {STATISTICS_LANGUAGE_INPUTS}. '
440+
f'Default: propertygraph.',
441+
choices=STATISTICS_LANGUAGE_INPUTS)
442+
parser.add_argument('--detailed', action='store_true', default=False,
443+
help="Toggles the display of structures fields on or off in the output. If not supplied, "
444+
"we will default to the basic summary display mode.")
445+
parser.add_argument('--silent', action='store_true', default=False, help="Display no output.")
446+
parser.add_argument('--store-to', type=str, default='')
447+
448+
args = parser.parse_args(line.split())
449+
if args.detailed:
450+
mode = "detailed"
451+
else:
452+
mode = "basic"
453+
454+
summary_res = self.client.statistics(args.language, True, mode)
455+
summary_res.raise_for_status()
456+
summary_res_json = summary_res.json()
457+
if not args.silent:
458+
print(json.dumps(summary_res_json, indent=2))
459+
460+
if args.store_to != '' and local_ns is not None:
461+
local_ns[args.store_to] = summary_res_json
426462

427463
@line_magic
428464
def graph_notebook_host(self, line):

src/graph_notebook/neptune/client.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,9 @@
109109
GRAPHSONV2_VARIANTS = ['graphsonv2', 'graphsonv2d0', 'graphsonserializersv2d0']
110110
GRAPHBINARYV1_VARIANTS = ['graphbinaryv1', 'graphbinary', 'graphbinaryserializersv1']
111111

112-
STATISTICS_MODES = ["status", "disableAutoCompute", "enableAutoCompute", "refresh", "delete"]
113-
STATISTICS_LANGUAGE_INPUTS = ["propertygraph", "pg", "gremlin", "sparql", "rdf"]
112+
STATISTICS_MODES = ["", "status", "disableAutoCompute", "enableAutoCompute", "refresh", "delete"]
113+
SUMMARY_MODES = ["", "basic", "detailed"]
114+
STATISTICS_LANGUAGE_INPUTS = ["propertygraph", "pg", "gremlin", "oc", "opencypher", "sparql", "rdf"]
114115

115116

116117
def is_allowed_neptune_host(hostname: str, host_allowlist: list):
@@ -744,22 +745,30 @@ def _query_status(self, language: str, *, query_id: str = '', **kwargs) -> reque
744745
res = self._http_session.send(req, verify=self.ssl_verify)
745746
return res
746747

747-
def statistics(self, language: str, mode: str = '') -> requests.Response:
748+
def statistics(self, language: str, summary: bool = False, mode: str = '') -> requests.Response:
748749
headers = {
749750
'Accept': 'application/json'
750751
}
751-
if language in ["pg", "gremlin"]:
752-
language = "propertygraph"
753-
elif language == "rdf":
754-
language = "sparql"
752+
if language in ["gremlin", "oc", "opencypher"]:
753+
language = "pg"
754+
elif language == "sparql":
755+
language = "rdf"
756+
755757
url = f'{self._http_protocol}://{self.host}:{self.port}/{language}/statistics'
756-
if mode in ['', 'status']:
757-
req = self._prepare_request('GET', url, headers=headers)
758-
elif mode == 'delete':
759-
req = self._prepare_request('DELETE', url, headers=headers)
758+
data = {'mode': mode}
759+
760+
if summary:
761+
summary_url = url + '/summary'
762+
if not data['mode']:
763+
data['mode'] = 'basic'
764+
req = self._prepare_request('GET', summary_url, data=json.dumps(data), headers=headers)
760765
else:
761-
data = {'mode': mode}
762-
req = self._prepare_request('POST', url, data=json.dumps(data), headers=headers)
766+
if mode in ['', 'status']:
767+
req = self._prepare_request('GET', url, headers=headers)
768+
elif mode == 'delete':
769+
req = self._prepare_request('DELETE', url, headers=headers)
770+
else:
771+
req = self._prepare_request('POST', url, data=json.dumps(data), headers=headers)
763772
res = self._http_session.send(req)
764773
return res
765774

test/integration/iam/statistics/test_statistics_with_iam.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def test_statistics_disable_autocompute(self, lang):
3636
expected = {
3737
"status": "200 OK"
3838
}
39-
disable_res = self.client.statistics(lang, 'disableAutoCompute')
39+
disable_res = self.client.statistics(lang, False, 'disableAutoCompute')
4040
assert disable_res.status_code == 200
4141
disable_status = disable_res.json()
4242
self.assertEqual(disable_status, expected)
@@ -52,7 +52,7 @@ def test_statistics_enable_autocompute(self, lang):
5252
expected = {
5353
"status": "200 OK"
5454
}
55-
enable_res = self.client.statistics(lang, 'enableAutoCompute')
55+
enable_res = self.client.statistics(lang, False, 'enableAutoCompute')
5656
assert enable_res.status_code == 200
5757
enable_status = enable_res.json()
5858
self.assertEqual(enable_status, expected)
@@ -82,7 +82,7 @@ def test_statistics_delete(self, lang):
8282
"statisticsId": -1
8383
}
8484
}
85-
res = self.client.statistics(lang, 'delete')
85+
res = self.client.statistics(lang, False, 'delete')
8686
assert res.status_code == 200
8787
statistics_status = res.json()
8888
self.assertEqual(statistics_status, expected)
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
"""
2+
Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
SPDX-License-Identifier: Apache-2.0
4+
"""
5+
import pytest
6+
from botocore.session import get_session
7+
8+
from test.integration import IntegrationTest
9+
from parameterized import parameterized
10+
11+
lang_list = ["pg", "sparql"]
12+
summary_detailed_fields_pg = ["nodeStructures", "edgeStructures"]
13+
summary_detailed_fields_rdf = ["subjectStructures"]
14+
15+
16+
class TestSummaryWithIAM(IntegrationTest):
17+
def setUp(self) -> None:
18+
super().setUp()
19+
self.client = self.client_builder.with_iam(get_session()).build()
20+
21+
@pytest.mark.neptune
22+
@pytest.mark.iam
23+
@parameterized.expand(lang_list)
24+
def test_summary_default(self, lang):
25+
expected_payload_fields = ['version', 'lastStatisticsComputationTime', 'graphSummary']
26+
res = self.client.statistics(lang, True)
27+
assert res.status_code == 200
28+
summary_default = res.json()
29+
self.assertEqual(summary_default['status'], '200 OK')
30+
res_payload_fields = list(summary_default['payload'].keys())
31+
for x in expected_payload_fields:
32+
self.assertIn(x, res_payload_fields)
33+
34+
@pytest.mark.neptune
35+
@pytest.mark.iam
36+
def test_summary_basic_pg(self):
37+
res = self.client.statistics("pg", True, "basic")
38+
assert res.status_code == 200
39+
summary_pg_basic = res.json()
40+
self.assertEqual(summary_pg_basic['status'], '200 OK')
41+
summary_pg_fields = list(summary_pg_basic['payload']['graphSummary'].keys())
42+
43+
self.assertIn("numNodes", summary_pg_fields)
44+
for x in summary_detailed_fields_pg:
45+
self.assertNotIn(x, summary_pg_fields)
46+
47+
@pytest.mark.neptune
48+
@pytest.mark.iam
49+
def test_summary_basic_rdf(self):
50+
res = self.client.statistics("rdf", True, "basic")
51+
assert res.status_code == 200
52+
summary_rdf_basic = res.json()
53+
self.assertEqual(summary_rdf_basic['status'], '200 OK')
54+
summary_rdf_fields = list(summary_rdf_basic['payload']['graphSummary'].keys())
55+
56+
self.assertIn("numDistinctSubjects", summary_rdf_fields)
57+
for x in summary_detailed_fields_rdf:
58+
self.assertNotIn(x, summary_rdf_fields)
59+
60+
@pytest.mark.neptune
61+
@pytest.mark.iam
62+
def test_summary_detailed_pg(self):
63+
res = self.client.statistics("pg", True, "detailed")
64+
assert res.status_code == 200
65+
summary_pg_detailed = res.json()
66+
self.assertEqual(summary_pg_detailed['status'], '200 OK')
67+
summary_pg_fields = list(summary_pg_detailed['payload']['graphSummary'].keys())
68+
69+
for x in summary_detailed_fields_pg:
70+
self.assertIn(x, summary_pg_fields)
71+
72+
@pytest.mark.neptune
73+
@pytest.mark.iam
74+
def test_summary_detailed_rdf(self):
75+
res = self.client.statistics("rdf", True, "detailed")
76+
assert res.status_code == 200
77+
summary_rdf_detailed = res.json()
78+
self.assertEqual(summary_rdf_detailed['status'], '200 OK')
79+
summary_rdf_fields = list(summary_rdf_detailed['payload']['graphSummary'].keys())
80+
81+
for x in summary_detailed_fields_rdf:
82+
self.assertIn(x, summary_rdf_fields)

0 commit comments

Comments
 (0)