From 9172921524f738f0a2ffc7ce4610e249af8d86dd Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Wed, 12 Apr 2023 20:21:35 +0200 Subject: [PATCH 1/5] Fix bug where base_url was only partially returned when not specified in config file. --- optimade/server/routers/utils.py | 24 +++++++++++++++++----- tests/server/routers/test_utils.py | 33 ++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/optimade/server/routers/utils.py b/optimade/server/routers/utils.py index e79983547..53cdee693 100644 --- a/optimade/server/routers/utils.py +++ b/optimade/server/routers/utils.py @@ -232,16 +232,30 @@ def get_base_url( Take the base URL from the config file, if it exists, otherwise use the request. """ + if CONFIG.base_url: + return CONFIG.base_url.rstrip("/") + + from optimade.server.schemas import ENTRY_INFO_SCHEMAS + parsed_url_request = ( urllib.parse.urlparse(parsed_url_request) if isinstance(parsed_url_request, str) else parsed_url_request ) - return ( - CONFIG.base_url.rstrip("/") - if CONFIG.base_url - else f"{parsed_url_request.scheme}://{parsed_url_request.netloc}" - ) + base_url = f"{parsed_url_request.scheme}://{parsed_url_request.netloc}" + if parsed_url_request.path: + split_path = re.split(r"/v[0-9]+(\.[0-9]+){0,2}", parsed_url_request.path, 1) + if len(split_path) == 1: + available_endpoints = ["info", "links"] + list(ENTRY_INFO_SCHEMAS) + for endpoint in available_endpoints: + split_path = parsed_url_request.path.split("/" + endpoint) + if len(split_path) > 1: + break + else: + split_path[0] = split_path[0].rstrip("/") + base_url = base_url + split_path[0] + + return base_url def get_entries( diff --git a/tests/server/routers/test_utils.py b/tests/server/routers/test_utils.py index 37b1f8ee5..118096b77 100644 --- a/tests/server/routers/test_utils.py +++ b/tests/server/routers/test_utils.py @@ -111,3 +111,36 @@ def test_get_providers_warning(caplog, top_dir): from optimade.server.data import providers assert providers == providers_cache + + +def test_get_base_url(): + """ + This tests whether the base_url is correctly extracted from the request. + """ + from optimade.server.routers.utils import get_base_url + from optimade.server.config import CONFIG + + base_url_org = CONFIG.base_url + CONFIG.base_url = None + request_urls = ( + "http://www.example.com", + "http://www.example.com/", + "http://www.example.com/optimade/v1/links", + "http://www.structures.com/links", + "https://www.links.org/optimade/structures/123456", + ) + base_urls = ( + "http://www.example.com", + "http://www.example.com", + "http://www.example.com/optimade", + "http://www.structures.com", + "https://www.links.org/optimade", + ) + results = [] + for request_url in request_urls: + results.append(get_base_url(request_url)) + + CONFIG.base_url = base_url_org + + for i in range(len(base_urls)): + assert results[i] == base_urls[i] From 9c306bb2c22be1548f00d9c32a0b71b595eba423 Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Thu, 13 Apr 2023 13:05:53 +0200 Subject: [PATCH 2/5] Place import of ENTRY_INFO_SCHEMAS deeper in the code so it is only executed when needed. --- optimade/server/routers/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/optimade/server/routers/utils.py b/optimade/server/routers/utils.py index 53cdee693..d330956fd 100644 --- a/optimade/server/routers/utils.py +++ b/optimade/server/routers/utils.py @@ -235,8 +235,6 @@ def get_base_url( if CONFIG.base_url: return CONFIG.base_url.rstrip("/") - from optimade.server.schemas import ENTRY_INFO_SCHEMAS - parsed_url_request = ( urllib.parse.urlparse(parsed_url_request) if isinstance(parsed_url_request, str) @@ -246,6 +244,8 @@ def get_base_url( if parsed_url_request.path: split_path = re.split(r"/v[0-9]+(\.[0-9]+){0,2}", parsed_url_request.path, 1) if len(split_path) == 1: + from optimade.server.schemas import ENTRY_INFO_SCHEMAS + available_endpoints = ["info", "links"] + list(ENTRY_INFO_SCHEMAS) for endpoint in available_endpoints: split_path = parsed_url_request.path.split("/" + endpoint) From d72a734759b00fdade540dc639e3ac9a0bedd61d Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Thu, 13 Apr 2023 13:22:54 +0200 Subject: [PATCH 3/5] Made code a bit shorter. --- optimade/server/routers/utils.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/optimade/server/routers/utils.py b/optimade/server/routers/utils.py index d330956fd..f39c6300e 100644 --- a/optimade/server/routers/utils.py +++ b/optimade/server/routers/utils.py @@ -246,14 +246,11 @@ def get_base_url( if len(split_path) == 1: from optimade.server.schemas import ENTRY_INFO_SCHEMAS - available_endpoints = ["info", "links"] + list(ENTRY_INFO_SCHEMAS) - for endpoint in available_endpoints: + for endpoint in ["info", "links"] + list(ENTRY_INFO_SCHEMAS): split_path = parsed_url_request.path.split("/" + endpoint) if len(split_path) > 1: break - else: - split_path[0] = split_path[0].rstrip("/") - base_url = base_url + split_path[0] + base_url = base_url + split_path[0].rstrip("/") return base_url From 790ce45f4d3c5ef163fc2acbb4733a9e02601e7f Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Tue, 18 Apr 2023 18:40:53 +0200 Subject: [PATCH 4/5] Use rootpath config parameter for generating the baseurl instead of path from parsed URL. --- optimade/server/routers/utils.py | 12 ++---------- tests/server/routers/test_utils.py | 10 ++++++---- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/optimade/server/routers/utils.py b/optimade/server/routers/utils.py index f39c6300e..704f61830 100644 --- a/optimade/server/routers/utils.py +++ b/optimade/server/routers/utils.py @@ -241,16 +241,8 @@ def get_base_url( else parsed_url_request ) base_url = f"{parsed_url_request.scheme}://{parsed_url_request.netloc}" - if parsed_url_request.path: - split_path = re.split(r"/v[0-9]+(\.[0-9]+){0,2}", parsed_url_request.path, 1) - if len(split_path) == 1: - from optimade.server.schemas import ENTRY_INFO_SCHEMAS - - for endpoint in ["info", "links"] + list(ENTRY_INFO_SCHEMAS): - split_path = parsed_url_request.path.split("/" + endpoint) - if len(split_path) > 1: - break - base_url = base_url + split_path[0].rstrip("/") + if CONFIG.root_path: + base_url = base_url + CONFIG.root_path.rstrip("/") return base_url diff --git a/tests/server/routers/test_utils.py b/tests/server/routers/test_utils.py index 118096b77..e73332710 100644 --- a/tests/server/routers/test_utils.py +++ b/tests/server/routers/test_utils.py @@ -121,7 +121,9 @@ def test_get_base_url(): from optimade.server.config import CONFIG base_url_org = CONFIG.base_url + root_path_org = CONFIG.root_path CONFIG.base_url = None + CONFIG.root_path = "/optimade" request_urls = ( "http://www.example.com", "http://www.example.com/", @@ -130,10 +132,10 @@ def test_get_base_url(): "https://www.links.org/optimade/structures/123456", ) base_urls = ( - "http://www.example.com", - "http://www.example.com", "http://www.example.com/optimade", - "http://www.structures.com", + "http://www.example.com/optimade", + "http://www.example.com/optimade", + "http://www.structures.com/optimade", "https://www.links.org/optimade", ) results = [] @@ -141,6 +143,6 @@ def test_get_base_url(): results.append(get_base_url(request_url)) CONFIG.base_url = base_url_org - + CONFIG.root_path = root_path_org for i in range(len(base_urls)): assert results[i] == base_urls[i] From d0c76cd1aac8074952389678ff6b1101d6399f0e Mon Sep 17 00:00:00 2001 From: Johan Bergsma <29785380+JPBergsma@users.noreply.github.com> Date: Tue, 18 Apr 2023 19:15:09 +0200 Subject: [PATCH 5/5] Change the order of the dependancies to satisfy ruff. --- tests/server/routers/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/server/routers/test_utils.py b/tests/server/routers/test_utils.py index e73332710..fc88509d7 100644 --- a/tests/server/routers/test_utils.py +++ b/tests/server/routers/test_utils.py @@ -117,8 +117,8 @@ def test_get_base_url(): """ This tests whether the base_url is correctly extracted from the request. """ - from optimade.server.routers.utils import get_base_url from optimade.server.config import CONFIG + from optimade.server.routers.utils import get_base_url base_url_org = CONFIG.base_url root_path_org = CONFIG.root_path