Skip to content

Commit 62d1f73

Browse files
authored
Merge pull request #202 from package-url/add_tests_for_purl-spec
Add tests for latest purl-spec
2 parents d079d0d + 100b947 commit 62d1f73

File tree

6 files changed

+216
-12
lines changed

6 files changed

+216
-12
lines changed

.github/workflows/ci.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ jobs:
1010
steps:
1111
- name: Checkout
1212
uses: actions/checkout@v4
13+
with:
14+
submodules: recursive
1315

1416
- name: Setup Python environment
1517
uses: actions/setup-python@v5
@@ -23,7 +25,7 @@ jobs:
2325
- name: Validate
2426
run: |
2527
isort --check-only src/ tests/
26-
black --check --line-length 100 .
28+
black --check --line-length 100 src/ tests/
2729
mypy
2830
2931
build-and-test:
@@ -44,6 +46,8 @@ jobs:
4446
steps:
4547
- name: Checkout
4648
uses: actions/checkout@v4
49+
with:
50+
submodules: recursive
4751

4852
- name: Setup Python environment
4953
uses: actions/setup-python@v5
@@ -56,7 +60,7 @@ jobs:
5660
pip install -e .[build]
5761
5862
- name: Test
59-
run: py.test -vvs
63+
run: py.test -vvs --ignore=spec/
6064

6165
- name: Build
6266
run: python setup.py build sdist bdist_wheel

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "spec"]
2+
path = spec
3+
url = https://github.com/package-url/purl-spec.git

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
PYTHON_EXE?=python3
2727
ACTIVATE?=. bin/activate;
2828
VIRTUALENV_PYZ=thirdparty/virtualenv.pyz
29-
BLACK_ARGS=--exclude=".cache|lib|bin|var" --line-length 100
29+
BLACK_ARGS=src/ --exclude="\.cache|lib|bin|var" --line-length 100
3030

3131
virtualenv:
3232
@echo "-> Bootstrap the virtualenv with PYTHON_EXE=${PYTHON_EXE}"
@@ -52,7 +52,7 @@ isort:
5252

5353
black:
5454
@echo "-> Apply black code formatter"
55-
@${ACTIVATE} black ${BLACK_ARGS} .
55+
@${ACTIVATE} black ${BLACK_ARGS}
5656

5757
mypy:
5858
@echo "-> Type check the Python code."

spec

Submodule spec added at a627e02

src/packageurl/__init__.py

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from collections.abc import Mapping
3030
from typing import TYPE_CHECKING
3131
from typing import Any
32+
from typing import Optional
3233
from typing import Union
3334
from typing import overload
3435
from urllib.parse import quote as _percent_quote
@@ -116,37 +117,66 @@ def normalize_namespace(
116117

117118
namespace_str = namespace if isinstance(namespace, str) else namespace.decode("utf-8")
118119
namespace_str = namespace_str.strip().strip("/")
119-
if ptype in ("bitbucket", "github", "pypi", "gitlab"):
120+
if ptype in ("bitbucket", "github", "pypi", "gitlab", "composer"):
120121
namespace_str = namespace_str.lower()
121122
segments = [seg for seg in namespace_str.split("/") if seg.strip()]
122123
segments_quoted = map(get_quoter(encode), segments)
123124
return "/".join(segments_quoted) or None
124125

125126

127+
def normalize_mlflow_name(
128+
name_str: str,
129+
qualifiers: Union[str, bytes, dict[str, str], None],
130+
) -> Optional[str]:
131+
"""MLflow purl names are case-sensitive for Azure ML, it is case sensitive and must be kept as-is in the package URL
132+
For Databricks, it is case insensitive and must be lowercased in the package URL"""
133+
if isinstance(qualifiers, dict):
134+
repo_url = qualifiers.get("repository_url")
135+
if repo_url and "azureml" in repo_url.lower():
136+
return name_str
137+
if repo_url and "databricks" in repo_url.lower():
138+
return name_str.lower()
139+
if isinstance(qualifiers, str):
140+
if "azureml" in qualifiers.lower():
141+
return name_str
142+
if "databricks" in qualifiers.lower():
143+
return name_str.lower()
144+
return name_str
145+
146+
126147
def normalize_name(
127-
name: AnyStr | None, ptype: str | None, encode: bool | None = True
128-
) -> str | None:
148+
name: AnyStr | None,
149+
qualifiers: Union[Union[str, bytes], dict[str, str], None],
150+
ptype: str | None,
151+
encode: bool | None = True,
152+
) -> Optional[str]:
129153
if not name:
130154
return None
131155

132156
name_str = name if isinstance(name, str) else name.decode("utf-8")
133157
quoter = get_quoter(encode)
134158
name_str = quoter(name_str)
135159
name_str = name_str.strip().strip("/")
136-
if ptype in ("bitbucket", "github", "pypi", "gitlab"):
160+
if ptype and ptype in ("mlflow"):
161+
return normalize_mlflow_name(name_str, qualifiers)
162+
if ptype in ("bitbucket", "github", "pypi", "gitlab", "composer"):
137163
name_str = name_str.lower()
138164
if ptype == "pypi":
139165
name_str = name_str.replace("_", "-")
140166
return name_str or None
141167

142168

143-
def normalize_version(version: AnyStr | None, encode: bool | None = True) -> str | None:
169+
def normalize_version(
170+
version: AnyStr | None, ptype: Optional[Union[str, bytes]], encode: bool | None = True
171+
) -> str | None:
144172
if not version:
145173
return None
146174

147175
version_str = version if isinstance(version, str) else version.decode("utf-8")
148176
quoter = get_quoter(encode)
149177
version_str = quoter(version_str.strip())
178+
if ptype and isinstance(ptype, str) and ptype in ("huggingface"):
179+
return version_str.lower()
150180
return version_str or None
151181

152182

@@ -304,8 +334,8 @@ def normalize(
304334
"""
305335
type_norm = normalize_type(type, encode)
306336
namespace_norm = normalize_namespace(namespace, type_norm, encode)
307-
name_norm = normalize_name(name, type_norm, encode)
308-
version_norm = normalize_version(version, encode)
337+
name_norm = normalize_name(name, qualifiers, type_norm, encode)
338+
version_norm = normalize_version(version, type, encode)
309339
qualifiers_norm = normalize_qualifiers(qualifiers, encode)
310340
subpath_norm = normalize_subpath(subpath, encode)
311341
return type_norm, namespace_norm, name_norm, version_norm, qualifiers_norm, subpath_norm
@@ -464,8 +494,19 @@ def from_string(cls, purl: str) -> Self:
464494
if not type_ or not sep:
465495
raise ValueError(f"purl is missing the required type component: {purl!r}.")
466496

497+
valid_chars = string.ascii_letters + string.digits + ".-_"
498+
if not all(c in valid_chars for c in type_):
499+
raise ValueError(
500+
f"purl type must be composed only of ASCII letters and numbers, period, dash and underscore: {type_!r}."
501+
)
502+
503+
if type_[0] in string.digits:
504+
raise ValueError(f"purl type cannot start with a number: {type_!r}.")
505+
467506
type_ = type_.lower()
468507

508+
original_remainder = remainder
509+
469510
scheme, authority, path, qualifiers_str, subpath = _urlsplit(
470511
url=remainder, scheme="", allow_fragments=True
471512
)
@@ -480,7 +521,9 @@ def from_string(cls, purl: str) -> Self:
480521
path = authority + ":" + path
481522

482523
if scheme:
483-
path = scheme + ":" + path
524+
# This is a way to preserve the casing of the original scheme
525+
original_scheme = original_remainder.split(":", 1)[0]
526+
path = original_scheme + ":" + path
484527

485528
path = path.lstrip("/")
486529

tests/test_purl_spec.py

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
# Copyright (c) the purl authors
2+
# SPDX-License-Identifier: MIT
3+
#
4+
# Permission is hereby granted, free of charge, to any person obtaining a copy
5+
# of this software and associated documentation files (the "Software"), to deal
6+
# in the Software without restriction, including without limitation the rights
7+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8+
# copies of the Software, and to permit persons to whom the Software is
9+
# furnished to do so, subject to the following conditions:
10+
#
11+
# The above copyright notice and this permission notice shall be included in all
12+
# copies or substantial portions of the Software.
13+
#
14+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20+
# SOFTWARE.
21+
22+
# Visit https://github.com/package-url/packageurl-python for support and
23+
# download.
24+
25+
import json
26+
import os
27+
28+
import pytest
29+
30+
from packageurl import PackageURL
31+
32+
current_dir = os.path.dirname(__file__)
33+
root_dir = os.path.abspath(os.path.join(current_dir, ".."))
34+
spec_file_path = os.path.join(root_dir, "spec", "tests", "spec", "specification-test.json")
35+
36+
with open(spec_file_path, "r", encoding="utf-8") as f:
37+
test_cases = json.load(f)
38+
39+
tests = test_cases["tests"]
40+
41+
parse_tests = [t for t in tests if t["test_type"] == "parse"]
42+
build_tests = [t for t in tests if t["test_type"] == "build"]
43+
44+
45+
def load_spec_files(spec_dir):
46+
"""
47+
Load all JSON files from the given directory into a dictionary.
48+
Key = filename, Value = parsed JSON content
49+
"""
50+
spec_data = {}
51+
for filename in os.listdir(spec_dir):
52+
if filename.endswith("-test.json"):
53+
filepath = os.path.join(spec_dir, filename)
54+
with open(filepath, "r", encoding="utf-8") as f:
55+
try:
56+
data = json.load(f)
57+
spec_data[filename] = data["tests"]
58+
except json.JSONDecodeError as e:
59+
print(f"Error parsing {filename}: {e}")
60+
return spec_data
61+
62+
63+
SPEC_DIR = os.path.join(os.path.dirname(__file__), "..", "spec", "tests", "types")
64+
spec_dict = load_spec_files(SPEC_DIR)
65+
66+
flattened_cases = []
67+
for filename, cases in spec_dict.items():
68+
for case in cases:
69+
flattened_cases.append((filename, case["description"], case))
70+
71+
72+
@pytest.mark.parametrize(
73+
"description, input_str, expected_output, expected_failure",
74+
[
75+
(t["description"], t["input"], t["expected_output"], t["expected_failure"])
76+
for t in parse_tests
77+
],
78+
)
79+
def test_parse(description, input_str, expected_output, expected_failure):
80+
if expected_failure:
81+
with pytest.raises(Exception):
82+
PackageURL.from_string(input_str)
83+
else:
84+
result = PackageURL.from_string(input_str)
85+
assert result.to_string() == expected_output
86+
87+
88+
@pytest.mark.parametrize(
89+
"description, input_dict, expected_output, expected_failure",
90+
[
91+
(t["description"], t["input"], t["expected_output"], t["expected_failure"])
92+
for t in build_tests
93+
],
94+
)
95+
def test_build(description, input_dict, expected_output, expected_failure):
96+
kwargs = {
97+
"type": input_dict.get("type"),
98+
"namespace": input_dict.get("namespace"),
99+
"name": input_dict.get("name"),
100+
"version": input_dict.get("version"),
101+
"qualifiers": input_dict.get("qualifiers"),
102+
"subpath": input_dict.get("subpath"),
103+
}
104+
105+
if expected_failure:
106+
with pytest.raises(Exception):
107+
PackageURL(**kwargs).to_string()
108+
else:
109+
purl = PackageURL(**kwargs)
110+
assert purl.to_string() == expected_output
111+
112+
113+
@pytest.mark.parametrize("filename,description,test_case", flattened_cases)
114+
def test_package_type_case(filename, description, test_case):
115+
test_type = test_case["test_type"]
116+
expected_failure = test_case.get("expected_failure", False)
117+
118+
if expected_failure:
119+
with pytest.raises(Exception):
120+
run_test_case(test_case, test_type, description)
121+
else:
122+
run_test_case(test_case, test_type, description)
123+
124+
125+
def run_test_case(case, test_type, desc):
126+
if test_type == "parse":
127+
purl = PackageURL.from_string(case["input"])
128+
expected = case["expected_output"]
129+
assert purl.type == expected["type"]
130+
assert purl.namespace == expected["namespace"]
131+
assert purl.name == expected["name"]
132+
assert purl.version == expected["version"]
133+
if expected["qualifiers"]:
134+
assert purl.qualifiers == expected["qualifiers"]
135+
else:
136+
assert not purl.qualifiers
137+
assert purl.subpath == expected["subpath"]
138+
139+
elif test_type == "roundtrip":
140+
purl = PackageURL.from_string(case["input"])
141+
assert purl.to_string() == case["expected_output"]
142+
143+
elif test_type == "build":
144+
input_data = case["input"]
145+
purl = PackageURL(
146+
type=input_data["type"],
147+
namespace=input_data["namespace"],
148+
name=input_data["name"],
149+
version=input_data["version"],
150+
qualifiers=input_data.get("qualifiers"),
151+
subpath=input_data.get("subpath"),
152+
)
153+
assert purl.to_string() == case["expected_output"]

0 commit comments

Comments
 (0)