Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: recursive

- name: Setup Python environment
uses: actions/setup-python@v5
Expand All @@ -23,7 +25,7 @@ jobs:
- name: Validate
run: |
isort --check-only src/ tests/
black --check --line-length 100 .
black --check --line-length 100 src/ tests/
mypy

build-and-test:
Expand All @@ -44,6 +46,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: recursive

- name: Setup Python environment
uses: actions/setup-python@v5
Expand All @@ -56,7 +60,7 @@ jobs:
pip install -e .[build]

- name: Test
run: py.test -vvs
run: py.test -vvs --ignore=spec/

- name: Build
run: python setup.py build sdist bdist_wheel
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "spec"]
path = spec
url = https://github.com/package-url/purl-spec.git
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
PYTHON_EXE?=python3
ACTIVATE?=. bin/activate;
VIRTUALENV_PYZ=thirdparty/virtualenv.pyz
BLACK_ARGS=--exclude=".cache|lib|bin|var" --line-length 100
BLACK_ARGS=src/ --exclude="\.cache|lib|bin|var" --line-length 100

virtualenv:
@echo "-> Bootstrap the virtualenv with PYTHON_EXE=${PYTHON_EXE}"
Expand All @@ -52,7 +52,7 @@ isort:

black:
@echo "-> Apply black code formatter"
@${ACTIVATE} black ${BLACK_ARGS} .
@${ACTIVATE} black ${BLACK_ARGS}

mypy:
@echo "-> Type check the Python code."
Expand Down
1 change: 1 addition & 0 deletions spec
Submodule spec added at a627e0
59 changes: 51 additions & 8 deletions src/packageurl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from collections.abc import Mapping
from typing import TYPE_CHECKING
from typing import Any
from typing import Optional
from typing import Union
from typing import overload
from urllib.parse import quote as _percent_quote
Expand Down Expand Up @@ -116,37 +117,66 @@ def normalize_namespace(

namespace_str = namespace if isinstance(namespace, str) else namespace.decode("utf-8")
namespace_str = namespace_str.strip().strip("/")
if ptype in ("bitbucket", "github", "pypi", "gitlab"):
if ptype in ("bitbucket", "github", "pypi", "gitlab", "composer"):
namespace_str = namespace_str.lower()
segments = [seg for seg in namespace_str.split("/") if seg.strip()]
segments_quoted = map(get_quoter(encode), segments)
return "/".join(segments_quoted) or None


def normalize_mlflow_name(
name_str: str,
qualifiers: Union[str, bytes, dict[str, str], None],
) -> Optional[str]:
"""MLflow purl names are case-sensitive for Azure ML, it is case sensitive and must be kept as-is in the package URL
For Databricks, it is case insensitive and must be lowercased in the package URL"""
if isinstance(qualifiers, dict):
repo_url = qualifiers.get("repository_url")
if repo_url and "azureml" in repo_url.lower():
return name_str
if repo_url and "databricks" in repo_url.lower():
return name_str.lower()
if isinstance(qualifiers, str):
if "azureml" in qualifiers.lower():
return name_str
if "databricks" in qualifiers.lower():
return name_str.lower()
return name_str


def normalize_name(
name: AnyStr | None, ptype: str | None, encode: bool | None = True
) -> str | None:
name: AnyStr | None,
qualifiers: Union[Union[str, bytes], dict[str, str], None],
ptype: str | None,
encode: bool | None = True,
) -> Optional[str]:
if not name:
return None

name_str = name if isinstance(name, str) else name.decode("utf-8")
quoter = get_quoter(encode)
name_str = quoter(name_str)
name_str = name_str.strip().strip("/")
if ptype in ("bitbucket", "github", "pypi", "gitlab"):
if ptype and ptype in ("mlflow"):
return normalize_mlflow_name(name_str, qualifiers)
if ptype in ("bitbucket", "github", "pypi", "gitlab", "composer"):
name_str = name_str.lower()
if ptype == "pypi":
name_str = name_str.replace("_", "-")
return name_str or None


def normalize_version(version: AnyStr | None, encode: bool | None = True) -> str | None:
def normalize_version(
version: AnyStr | None, ptype: Optional[Union[str, bytes]], encode: bool | None = True
) -> str | None:
if not version:
return None

version_str = version if isinstance(version, str) else version.decode("utf-8")
quoter = get_quoter(encode)
version_str = quoter(version_str.strip())
if ptype and isinstance(ptype, str) and ptype in ("huggingface"):
return version_str.lower()
return version_str or None


Expand Down Expand Up @@ -304,8 +334,8 @@ def normalize(
"""
type_norm = normalize_type(type, encode)
namespace_norm = normalize_namespace(namespace, type_norm, encode)
name_norm = normalize_name(name, type_norm, encode)
version_norm = normalize_version(version, encode)
name_norm = normalize_name(name, qualifiers, type_norm, encode)
version_norm = normalize_version(version, type, encode)
qualifiers_norm = normalize_qualifiers(qualifiers, encode)
subpath_norm = normalize_subpath(subpath, encode)
return type_norm, namespace_norm, name_norm, version_norm, qualifiers_norm, subpath_norm
Expand Down Expand Up @@ -464,8 +494,19 @@ def from_string(cls, purl: str) -> Self:
if not type_ or not sep:
raise ValueError(f"purl is missing the required type component: {purl!r}.")

valid_chars = string.ascii_letters + string.digits + ".-_"
if not all(c in valid_chars for c in type_):
raise ValueError(
f"purl type must be composed only of ASCII letters and numbers, period, dash and underscore: {type_!r}."
)

if type_[0] in string.digits:
raise ValueError(f"purl type cannot start with a number: {type_!r}.")

type_ = type_.lower()

original_remainder = remainder

scheme, authority, path, qualifiers_str, subpath = _urlsplit(
url=remainder, scheme="", allow_fragments=True
)
Expand All @@ -480,7 +521,9 @@ def from_string(cls, purl: str) -> Self:
path = authority + ":" + path

if scheme:
path = scheme + ":" + path
# This is a way to preserve the casing of the original scheme
original_scheme = original_remainder.split(":", 1)[0]
path = original_scheme + ":" + path

path = path.lstrip("/")

Expand Down
153 changes: 153 additions & 0 deletions tests/test_purl_spec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
# Copyright (c) the purl authors
# SPDX-License-Identifier: MIT
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# Visit https://github.com/package-url/packageurl-python for support and
# download.

import json
import os

import pytest

from packageurl import PackageURL

current_dir = os.path.dirname(__file__)
root_dir = os.path.abspath(os.path.join(current_dir, ".."))
spec_file_path = os.path.join(root_dir, "spec", "tests", "spec", "specification-test.json")

with open(spec_file_path, "r", encoding="utf-8") as f:
test_cases = json.load(f)

tests = test_cases["tests"]

parse_tests = [t for t in tests if t["test_type"] == "parse"]
build_tests = [t for t in tests if t["test_type"] == "build"]


def load_spec_files(spec_dir):
"""
Load all JSON files from the given directory into a dictionary.
Key = filename, Value = parsed JSON content
"""
spec_data = {}
for filename in os.listdir(spec_dir):
if filename.endswith("-test.json"):
filepath = os.path.join(spec_dir, filename)
with open(filepath, "r", encoding="utf-8") as f:
try:
data = json.load(f)
spec_data[filename] = data["tests"]
except json.JSONDecodeError as e:
print(f"Error parsing {filename}: {e}")
return spec_data


SPEC_DIR = os.path.join(os.path.dirname(__file__), "..", "spec", "tests", "types")
spec_dict = load_spec_files(SPEC_DIR)

flattened_cases = []
for filename, cases in spec_dict.items():
for case in cases:
flattened_cases.append((filename, case["description"], case))


@pytest.mark.parametrize(
"description, input_str, expected_output, expected_failure",
[
(t["description"], t["input"], t["expected_output"], t["expected_failure"])
for t in parse_tests
],
)
def test_parse(description, input_str, expected_output, expected_failure):
if expected_failure:
with pytest.raises(Exception):
PackageURL.from_string(input_str)
else:
result = PackageURL.from_string(input_str)
assert result.to_string() == expected_output


@pytest.mark.parametrize(
"description, input_dict, expected_output, expected_failure",
[
(t["description"], t["input"], t["expected_output"], t["expected_failure"])
for t in build_tests
],
)
def test_build(description, input_dict, expected_output, expected_failure):
kwargs = {
"type": input_dict.get("type"),
"namespace": input_dict.get("namespace"),
"name": input_dict.get("name"),
"version": input_dict.get("version"),
"qualifiers": input_dict.get("qualifiers"),
"subpath": input_dict.get("subpath"),
}

if expected_failure:
with pytest.raises(Exception):
PackageURL(**kwargs).to_string()
else:
purl = PackageURL(**kwargs)
assert purl.to_string() == expected_output


@pytest.mark.parametrize("filename,description,test_case", flattened_cases)
def test_package_type_case(filename, description, test_case):
test_type = test_case["test_type"]
expected_failure = test_case.get("expected_failure", False)

if expected_failure:
with pytest.raises(Exception):
run_test_case(test_case, test_type, description)
else:
run_test_case(test_case, test_type, description)


def run_test_case(case, test_type, desc):
if test_type == "parse":
purl = PackageURL.from_string(case["input"])
expected = case["expected_output"]
assert purl.type == expected["type"]
assert purl.namespace == expected["namespace"]
assert purl.name == expected["name"]
assert purl.version == expected["version"]
if expected["qualifiers"]:
assert purl.qualifiers == expected["qualifiers"]
else:
assert not purl.qualifiers
assert purl.subpath == expected["subpath"]

elif test_type == "roundtrip":
purl = PackageURL.from_string(case["input"])
assert purl.to_string() == case["expected_output"]

elif test_type == "build":
input_data = case["input"]
purl = PackageURL(
type=input_data["type"],
namespace=input_data["namespace"],
name=input_data["name"],
version=input_data["version"],
qualifiers=input_data.get("qualifiers"),
subpath=input_data.get("subpath"),
)
assert purl.to_string() == case["expected_output"]
Loading