Skip to content

Commit ef0ed26

Browse files
authored
feat: improve DRS URI resolution (#200)
1 parent f5568c4 commit ef0ed26

File tree

3 files changed

+49
-13
lines changed

3 files changed

+49
-13
lines changed

cwl_wes/config/app_config.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,7 @@ drs:
105105
port: Null # use this port for resolving DRS URIs; set to `Null` to use default (443)
106106
base_path: Null # use this base path for resolving DRS URIs; set to `Null` to use default (`ga4gh/drs/v1`)
107107
use_http: False # use `http` for resolving DRS URIs; set to `False` to use default (`https`)
108+
file_types: # extensions of files to scan for DRS URI resolution
109+
- cwl
110+
- yaml
111+
- yml

cwl_wes/ga4gh/wes/endpoints/run_workflow.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,12 @@ def __create_run_environment(
249249
break
250250

251251
# translate DRS URIs to access URLs
252+
file_types: List[str] = get_conf_type(
253+
current_app.config,
254+
'drs',
255+
'file_types',
256+
types=(list),
257+
)
252258
supported_access_methods: List[str] = get_conf_type(
253259
current_app.config,
254260
'service_info',
@@ -260,21 +266,22 @@ def __create_run_environment(
260266
'drs',
261267
'port',
262268
)
269+
base_path: Optional[str] = get_conf(
270+
current_app.config,
271+
'drs',
272+
'base_path',
273+
)
263274
use_http: bool = get_conf(
264275
current_app.config,
265276
'drs',
266277
'use_http',
267278
)
268279
translate_drs_uris(
269-
path=document['internal']['param_file_path'],
270-
supported_access_methods=supported_access_methods,
271-
port=port,
272-
use_http=use_http,
273-
)
274-
translate_drs_uris(
275-
path=document['internal']['cwl_path'],
280+
path=document['internal']['workflow_files'],
281+
file_types=file_types,
276282
supported_access_methods=supported_access_methods,
277283
port=port,
284+
base_path=base_path,
278285
use_http=use_http,
279286
)
280287

@@ -329,7 +336,7 @@ def __process_workflow_attachments(data: Dict) -> Dict:
329336
)
330337

331338
# Create directory for storing workflow files
332-
workflow_dir = os.path.abspath(
339+
data['internal']['workflow_files'] = workflow_dir = os.path.abspath(
333340
os.path.join(
334341
data['internal']['out_dir'], 'workflow_files'
335342
)

cwl_wes/ga4gh/wes/endpoints/utils/drs.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
def translate_drs_uris(
2626
path: str,
27+
file_types: List[str],
2728
supported_access_methods: List[str],
2829
port: Optional[int] = None,
2930
base_path: Optional[str] = None,
@@ -37,6 +38,7 @@ def translate_drs_uris(
3738
3839
Arguments:
3940
path: File or directory containing files.
41+
file_types: Extensions of files to scan.
4042
supported_access_methods: List of access methods/file transfer
4143
protocols supported by this service, provided in the order of
4244
preference.
@@ -54,10 +56,15 @@ def translate_drs_uris(
5456
_RE_OBJECT_ID = rf"(?P<drs_uri>drs:\/\/{_RE_DOMAIN}\/\S+)"
5557

5658
# get absolute paths of file or directory (including subdirectories)
57-
files = abs_paths(dir=path) if os.path.isdir(path) else [path]
59+
logger.debug(f"Collecting file(s) for provided path '{path}'...")
60+
files = abs_paths(
61+
dir=path,
62+
file_ext=file_types,
63+
) if os.path.isdir(path) else [path]
5864

5965
# replace any DRS URIs in any file in place
6066
for _file in files:
67+
logger.debug(f"Scanning file '{_file}' for DRS URIs...")
6168
with FileInput(_file, inplace=True) as _f:
6269
for line in _f:
6370
sys.stdout.write(
@@ -76,18 +83,24 @@ def translate_drs_uris(
7683
)
7784

7885

79-
def abs_paths(dir: str) -> Iterator[str]:
80-
"""Yields absolute paths of all files in directory and subdirectories.
86+
def abs_paths(
87+
dir: str,
88+
file_ext: List[str],
89+
) -> Iterator[str]:
90+
"""Yields absolute paths of files with the indicated file extensions in
91+
specified directory and subdirectories.
8192
8293
Arguments:
8394
dir: Directory to search files in.
95+
file_ext: List of file extensions for files to return.
8496
8597
Returns:
8698
Generator yielding absolute file paths.
8799
"""
88100
for dirpath, _, files in os.walk(dir):
89101
for _file in files:
90-
yield os.path.abspath(os.path.join(dirpath, _file))
102+
if _file.endswith(tuple(file_ext)):
103+
yield os.path.abspath(os.path.join(dirpath, _file))
91104

92105

93106
def get_replacement_string(
@@ -166,6 +179,7 @@ def get_access_url_from_drs(
166179
use_http=use_http,
167180
)
168181
except InvalidURI:
182+
logger.error(f"The provided DRS URI '{drs_uri}' is invalid.")
169183
raise BadRequest
170184

171185
# get DRS object
@@ -174,11 +188,15 @@ def get_access_url_from_drs(
174188
object_id=drs_uri
175189
)
176190
except (ConnectionError, InvalidResponseError):
191+
logger.error(f"Could not connect to DRS host for DRS URI '{drs_uri}'.")
177192
raise InternalServerError
178193
if isinstance(object, Error):
179194
if object.status_code == 404:
195+
logger.error(f"Could not access DRS host for DRS URI '{drs_uri}'.")
180196
raise BadRequest
197+
# TODO: handle 401 & 403
181198
else:
199+
logger.error(f"DRS returned error: {object}'.")
182200
raise InternalServerError
183201

184202
# get access methods and access method types/protocols
@@ -189,12 +207,19 @@ def get_access_url_from_drs(
189207
# TODO: add support for access URL headers
190208
for supported_method in supported_access_methods:
191209
try:
192-
return str(
210+
access_url = str(
193211
available_methods
194212
[available_types.index(supported_method)].access_url.url
195213
)
214+
logger.info(
215+
f"Resolved DRS URI '{drs_uri}' to access link '{access_url}'."
216+
)
217+
return access_url
196218
except ValueError:
197219
continue
198220

199221
# no method was found
222+
logger.error(
223+
f"Could not find a supported access URL for DRS URI '{drs_uri}'."
224+
)
200225
raise BadRequest

0 commit comments

Comments
 (0)