Skip to content

Commit 7ef53ee

Browse files
authored
Add support for Gremlin proxy host and Neptune HTTP query visualization (#530)
* Add Gremlin proxy host fix and Neptune HTTP query support * update changelog
1 parent 3e0989a commit 7ef53ee

File tree

4 files changed

+105
-43
lines changed

4 files changed

+105
-43
lines changed

ChangeLog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ Starting with v1.31.6, this file will contain a record of major features and upd
55
## Upcoming
66
- Added `--explain-type` option to `%%gremlin` ([Link to PR](https://github.com/aws/graph-notebook/pull/503))
77
- Added general documentation for `%%graph_notebook_config` options ([Link to PR](https://github.com/aws/graph-notebook/pull/504))
8+
- Added support for Gremlin proxy hosts and visualization of Neptune HTTP results ([Link to PR](https://github.com/aws/graph-notebook/pull/530))
89
- Modified Dockerfile to support Python 3.10 ([Link to PR](https://github.com/aws/graph-notebook/pull/519))
910
- Updated Docker documentation with platform-specific run commands ([Link to PR](https://github.com/aws/graph-notebook/pull/502))
1011
- Fixed deprecation warnings in GitHub workflows ([Link to PR](https://github.com/aws/graph-notebook/pull/506))

src/graph_notebook/magics/graph_magic.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from SPARQLWrapper import SPARQLWrapper
3030
from botocore.session import get_session
3131
from gremlin_python.driver.protocol import GremlinServerError
32+
from gremlin_python.structure.graph import Path
3233
from IPython.core.display import HTML, display_html, display
3334
from IPython.core.magic import (Magics, magics_class, cell_magic, line_magic, line_cell_magic, needs_local_scope)
3435
from ipywidgets.widgets.widget_description import DescriptionStyle
@@ -809,15 +810,15 @@ def gremlin(self, line, cell, local_ns: dict = None):
809810
parser.add_argument('--explain-type', type=str.lower, default='',
810811
help='Explain mode to use when using the explain query mode.')
811812
parser.add_argument('-p', '--path-pattern', default='', help='path pattern')
812-
parser.add_argument('-g', '--group-by', type=str, default='T.label',
813+
parser.add_argument('-g', '--group-by', type=str, default='',
813814
help='Property used to group nodes (e.g. code, T.region) default is T.label')
814815
parser.add_argument('-gd', '--group-by-depth', action='store_true', default=False,
815816
help="Group nodes based on path hierarchy")
816817
parser.add_argument('-gr', '--group-by-raw', action='store_true', default=False,
817818
help="Group nodes by the raw result")
818-
parser.add_argument('-d', '--display-property', type=str, default='T.label',
819+
parser.add_argument('-d', '--display-property', type=str, default='',
819820
help='Property to display the value of on each node, default is T.label')
820-
parser.add_argument('-de', '--edge-display-property', type=str, default='T.label',
821+
parser.add_argument('-de', '--edge-display-property', type=str, default='',
821822
help='Property to display the value of on each edge, default is T.label')
822823
parser.add_argument('-t', '--tooltip-property', type=str, default='',
823824
help='Property to display the value of on each node tooltip. If not specified, tooltip '
@@ -937,8 +938,16 @@ def gremlin(self, line, cell, local_ns: dict = None):
937938
else:
938939
first_tab_html = pre_container_template.render(content='No profile found')
939940
else:
941+
using_http = False
940942
query_start = time.time() * 1000 # time.time() returns time in seconds w/high precision; x1000 to get in ms
941-
query_res = self.client.gremlin_query(cell, transport_args=transport_args)
943+
if self.graph_notebook_config.proxy_host != '' and self.client.is_neptune_domain():
944+
using_http = True
945+
query_res_http = self.client.gremlin_http_query(cell, headers={'Accept': 'application/vnd.gremlin-v1.0+json;types=false'})
946+
query_res_http.raise_for_status()
947+
query_res_http_json = query_res_http.json()
948+
query_res = query_res_http_json['result']['data']
949+
else:
950+
query_res = self.client.gremlin_query(cell, transport_args=transport_args)
942951
query_time = time.time() * 1000 - query_start
943952
if not args.silent:
944953
gremlin_metadata = build_gremlin_metadata_from_query(query_type='query', results=query_res,
@@ -952,18 +961,30 @@ def gremlin(self, line, cell, local_ns: dict = None):
952961
logger.debug(f'edge_display_property: {args.edge_display_property}')
953962
logger.debug(f'label_max_length: {args.label_max_length}')
954963
logger.debug(f'ignore_groups: {args.ignore_groups}')
955-
gn = GremlinNetwork(group_by_property=args.group_by, display_property=args.display_property,
964+
gn = GremlinNetwork(group_by_property=args.group_by,
965+
display_property=args.display_property,
956966
group_by_raw=args.group_by_raw,
957967
group_by_depth=args.group_by_depth,
958968
edge_display_property=args.edge_display_property,
959969
tooltip_property=args.tooltip_property,
960970
edge_tooltip_property=args.edge_tooltip_property,
961971
label_max_length=args.label_max_length,
962972
edge_label_max_length=args.edge_label_max_length,
963-
ignore_groups=args.ignore_groups)
973+
ignore_groups=args.ignore_groups,
974+
using_http=using_http)
975+
976+
if using_http and 'path()' in cell and query_res:
977+
first_path = query_res[0]
978+
if isinstance(first_path, dict) and first_path.keys() == {'labels', 'objects'}:
979+
query_res_to_path_type = []
980+
for path in query_res:
981+
new_path_list = path['objects']
982+
new_path = Path(labels=[], objects=new_path_list)
983+
query_res_to_path_type.append(new_path)
984+
query_res = query_res_to_path_type
964985

965986
if args.path_pattern == '':
966-
gn.add_results(query_res)
987+
gn.add_results(query_res, is_http=using_http)
967988
else:
968989
pattern = parse_pattern_list_str(args.path_pattern)
969990
gn.add_results_with_pattern(query_res, pattern)

src/graph_notebook/neptune/client.py

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@
1717
from botocore.awsrequest import AWSRequest
1818
from gremlin_python.driver import client, serializer
1919
from gremlin_python.driver.protocol import GremlinServerError
20+
from gremlin_python.driver.aiohttp.transport import AiohttpTransport
2021
from neo4j import GraphDatabase, DEFAULT_DATABASE
2122
from neo4j.exceptions import AuthError
2223
from base64 import b64encode
2324
import nest_asyncio
2425

2526
from graph_notebook.neptune.bolt_auth_token import NeptuneBoltAuthToken
2627

27-
2828
# This patch is no longer needed when graph_notebook is using the a Gremlin Python
2929
# client >= 3.5.0 as the HashableDict is now part of that client driver.
3030
# import graph_notebook.neptune.gremlin.graphsonV3d0_MapType_objectify_patch # noqa F401
@@ -45,7 +45,7 @@
4545
# TODO: add doc links to each command
4646

4747
FORMAT_CSV = 'csv'
48-
FORMAT_OPENCYPHER='opencypher'
48+
FORMAT_OPENCYPHER = 'opencypher'
4949
FORMAT_NTRIPLE = 'ntriples'
5050
FORMAT_NQUADS = 'nquads'
5151
FORMAT_RDFXML = 'rdfxml'
@@ -191,11 +191,19 @@ def is_neptune_domain(self):
191191
return is_allowed_neptune_host(hostname=self.target_host, host_allowlist=self.neptune_hosts)
192192

193193
def get_uri_with_port(self, use_websocket=False, use_proxy=False):
194-
protocol = self._http_protocol
195194
if use_websocket is True:
196195
protocol = self._ws_protocol
196+
else:
197+
protocol = self._http_protocol
197198

198-
uri = f'{protocol}://{self.host}:{self.port}'
199+
if use_proxy is True:
200+
uri_host = self.proxy_host
201+
uri_port = self.proxy_port
202+
else:
203+
uri_host = self.target_host
204+
uri_port = self.target_port
205+
206+
uri = f'{protocol}://{uri_host}:{uri_port}'
199207
return uri
200208

201209
def sparql_query(self, query: str, headers=None, explain: str = '', path: str = '') -> requests.Response:
@@ -267,11 +275,20 @@ def sparql_cancel(self, query_id: str, silent: bool = False):
267275
def get_gremlin_connection(self, transport_kwargs) -> client.Client:
268276
nest_asyncio.apply()
269277

270-
ws_url = f'{self.get_uri_with_port(use_websocket=True)}/gremlin'
271-
request = self._prepare_request('GET', ws_url)
278+
ws_url = f'{self.get_uri_with_port(use_websocket=True, use_proxy=False)}/gremlin'
279+
if self.proxy_host != '':
280+
proxy_http_url = f'{self.get_uri_with_port(use_websocket=False, use_proxy=True)}/gremlin'
281+
transport_factory_args = lambda: AiohttpTransport(call_from_event_loop=True, proxy=proxy_http_url,
282+
**transport_kwargs)
283+
request = self._prepare_request('GET', proxy_http_url)
284+
else:
285+
transport_factory_args = lambda: AiohttpTransport(**transport_kwargs)
286+
request = self._prepare_request('GET', ws_url)
287+
272288
traversal_source = 'g' if self.is_neptune_domain() else self.gremlin_traversal_source
273-
return client.Client(ws_url, traversal_source, username=self.gremlin_username,
274-
password=self.gremlin_password, message_serializer=self.gremlin_serializer,
289+
return client.Client(ws_url, traversal_source, transport_factory=transport_factory_args,
290+
username=self.gremlin_username, password=self.gremlin_password,
291+
message_serializer=self.gremlin_serializer,
275292
headers=dict(request.headers), **transport_kwargs)
276293

277294
def gremlin_query(self, query, transport_args=None, bindings=None):
@@ -298,7 +315,8 @@ def gremlin_http_query(self, query, headers=None) -> requests.Response:
298315
if headers is None:
299316
headers = {}
300317

301-
uri = f'{self.get_uri_with_port()}/gremlin'
318+
use_proxy = True if self.proxy_host != '' else False
319+
uri = f'{self.get_uri_with_port(use_websocket=False, use_proxy=use_proxy)}/gremlin'
302320
data = {'gremlin': query}
303321
req = self._prepare_request('POST', uri, data=json.dumps(data), headers=headers)
304322
res = self._http_session.send(req, verify=self.ssl_verify)
@@ -431,7 +449,7 @@ def stream(self, url, **kwargs) -> requests.Response:
431449
params = {}
432450
for k, v in kwargs.items():
433451
params[k] = v
434-
req = self._prepare_request('GET', url, params=params,data='')
452+
req = self._prepare_request('GET', url, params=params, data='')
435453
res = self._http_session.send(req, verify=self.ssl_verify)
436454
return res.json()
437455

@@ -850,7 +868,7 @@ def with_sparql_path(self, path: str):
850868
def with_gremlin_traversal_source(self, traversal_source: str):
851869
self.args['gremlin_traversal_source'] = traversal_source
852870
return ClientBuilder(self.args)
853-
871+
854872
def with_gremlin_login(self, username: str, password: str):
855873
self.args['gremlin_username'] = username
856874
self.args['gremlin_password'] = password
@@ -859,7 +877,7 @@ def with_gremlin_login(self, username: str, password: str):
859877
def with_gremlin_serializer(self, message_serializer: str):
860878
self.args['gremlin_serializer'] = message_serializer
861879
return ClientBuilder(self.args)
862-
880+
863881
def with_neo4j_login(self, username: str, password: str, auth: bool, database: str):
864882
self.args['neo4j_username'] = username
865883
self.args['neo4j_password'] = password

0 commit comments

Comments
 (0)