From 4c237ce0cb12da22ee2a2eeda659bf3cc45510a7 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 7 Aug 2025 12:14:04 +0530 Subject: [PATCH 01/19] Add purl-spec as submodule Signed-off-by: Tushar Goel --- .gitmodules | 3 +++ spec | 1 + 2 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 spec diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..2b13e85 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "spec"] + path = spec + url = https://github.com/package-url/purl-spec.git diff --git a/spec b/spec new file mode 160000 index 0000000..c53ba0e --- /dev/null +++ b/spec @@ -0,0 +1 @@ +Subproject commit c53ba0e2e249939f41ea6de1fa1984e8d831ef68 From f1fecb32260a996d130aabd297884d5fb936413e Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 7 Aug 2025 12:16:02 +0530 Subject: [PATCH 02/19] Add submodule support in CI Signed-off-by: Tushar Goel --- .github/workflows/ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8748e04..42da895 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + with: + submodules: recursive - name: Setup Python environment uses: actions/setup-python@v5 @@ -44,6 +46,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + with: + submodules: recursive - name: Setup Python environment uses: actions/setup-python@v5 From 10b4df94e2be0c1528b99d21bcd9a5966a44e51f Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 7 Aug 2025 12:22:32 +0530 Subject: [PATCH 03/19] Ignore spec in black Signed-off-by: Tushar Goel --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2b1593b..b5a7812 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ PYTHON_EXE?=python3 ACTIVATE?=. bin/activate; VIRTUALENV_PYZ=thirdparty/virtualenv.pyz -BLACK_ARGS=--exclude=".cache|lib|bin|var" --line-length 100 +BLACK_ARGS=--exclude=".cache|lib|bin|var|^spec/" --line-length 100 virtualenv: @echo "-> Bootstrap the virtualenv with PYTHON_EXE=${PYTHON_EXE}" From 5f0924c3e0f19bc73568786d181ab27ff4e7986e Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 7 Aug 2025 12:33:52 +0530 Subject: [PATCH 04/19] Update CI Signed-off-by: Tushar Goel --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 42da895..4cbf097 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,7 +25,7 @@ jobs: - name: Validate run: | isort --check-only src/ tests/ - black --check --line-length 100 . + black --check --line-length 100 src/ tests/ mypy build-and-test: From 265754c82842da956ebf26bd359fb182603af709 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 7 Aug 2025 12:37:42 +0530 Subject: [PATCH 05/19] Update CI Signed-off-by: Tushar Goel --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4cbf097..eeed9bc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,7 +60,7 @@ jobs: pip install -e .[build] - name: Test - run: py.test -vvs + run: py.test -vvs /tests - name: Build run: python setup.py build sdist bdist_wheel From b8a6d8470c09c5e9f425895155087a5f71642608 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 7 Aug 2025 12:40:20 +0530 Subject: [PATCH 06/19] Update CI Signed-off-by: Tushar Goel --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eeed9bc..affcef5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,7 +60,7 @@ jobs: pip install -e .[build] - name: Test - run: py.test -vvs /tests + run: py.test -vvs --ignore=spec/ - name: Build run: python setup.py build sdist bdist_wheel From 83c1a6ae37c88fed67a5c41be15fe6ad707df30a Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 7 Aug 2025 13:24:39 +0530 Subject: [PATCH 07/19] Add tests for spec parsing Signed-off-by: Tushar Goel --- src/packageurl/__init__.py | 4 +++ tests/test_purl_spec.py | 58 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 tests/test_purl_spec.py diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index 2b7e052..c8ce486 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -463,6 +463,10 @@ def from_string(cls, purl: str) -> Self: type_, sep, remainder = remainder.partition("/") if not type_ or not sep: raise ValueError(f"purl is missing the required type component: {purl!r}.") + + # check if type starts with a number + if type_[0] in string.digits: + raise ValueError(f"purl type cannot start with a number: {type_!r}.") type_ = type_.lower() diff --git a/tests/test_purl_spec.py b/tests/test_purl_spec.py new file mode 100644 index 0000000..be5a6bf --- /dev/null +++ b/tests/test_purl_spec.py @@ -0,0 +1,58 @@ +import json +import os + +import pytest + +from packageurl import PackageURL + +current_dir = os.path.dirname(__file__) +root_dir = os.path.abspath(os.path.join(current_dir, "..")) +spec_file_path = os.path.join(root_dir, "spec", "tests", "spec", "specification-test.json") + +valid_purl_types_file = os.path.join(root_dir, "spec", "purl-types-index.json") + + +with open(spec_file_path, "r", encoding="utf-8") as f: + test_cases = json.load(f) + +with open(valid_purl_types_file, "r", encoding="utf-8") as f: + valid_purl_types = json.load(f) + +tests = test_cases["tests"] + +parse_tests = [t for t in tests if t["test_type"] == "parse"] +build_tests = [t for t in tests if t["test_type"] == "build"] + +@pytest.mark.parametrize("description, input_str, expected_output, expected_failure", [ + (t["description"], t["input"], t["expected_output"], t["expected_failure"]) + for t in parse_tests +]) +def test_parse(description, input_str, expected_output, expected_failure): + if expected_failure: + with pytest.raises(Exception): + PackageURL.from_string(input_str) + else: + result = PackageURL.from_string(input_str) + assert result.to_string() == expected_output + + +@pytest.mark.parametrize("description, input_dict, expected_output, expected_failure", [ + (t["description"], t["input"], t["expected_output"], t["expected_failure"]) + for t in build_tests +]) +def test_build(description, input_dict, expected_output, expected_failure): + kwargs = { + "type": input_dict.get("type"), + "namespace": input_dict.get("namespace"), + "name": input_dict.get("name"), + "version": input_dict.get("version"), + "qualifiers": input_dict.get("qualifiers"), + "subpath": input_dict.get("subpath"), + } + + if expected_failure: + with pytest.raises(Exception): + PackageURL(**kwargs).to_string() + else: + purl = PackageURL(**kwargs) + assert purl.to_string() == expected_output From b3ca8b356f69bda72d3a3bc6b3d6c31ab4ccb082 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 7 Aug 2025 18:36:15 +0530 Subject: [PATCH 08/19] Fix tests Signed-off-by: Tushar Goel --- Makefile | 4 ++-- src/packageurl/__init__.py | 11 +++++++++-- tests/test_purl_spec.py | 1 + 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index b5a7812..1ee5201 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ PYTHON_EXE?=python3 ACTIVATE?=. bin/activate; VIRTUALENV_PYZ=thirdparty/virtualenv.pyz -BLACK_ARGS=--exclude=".cache|lib|bin|var|^spec/" --line-length 100 +BLACK_ARGS=src/ --exclude="\.cache|lib|bin|var" --line-length 100 virtualenv: @echo "-> Bootstrap the virtualenv with PYTHON_EXE=${PYTHON_EXE}" @@ -52,7 +52,7 @@ isort: black: @echo "-> Apply black code formatter" - @${ACTIVATE} black ${BLACK_ARGS} . + @${ACTIVATE} black ${BLACK_ARGS} mypy: @echo "-> Type check the Python code." diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index c8ce486..05eeef6 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -463,8 +463,15 @@ def from_string(cls, purl: str) -> Self: type_, sep, remainder = remainder.partition("/") if not type_ or not sep: raise ValueError(f"purl is missing the required type component: {purl!r}.") - - # check if type starts with a number + + if not all(c in string.ascii_letters + string.digits + "-._" for c in type_): + raise ValueError( + f"purl type must be composed only of ASCII letters and numbers, period, dash and underscore: {type_!r}." + ) + + if ":" in type_: + raise ValueError(f"purl type cannot contain a colon: {type_!r}.") + if type_[0] in string.digits: raise ValueError(f"purl type cannot start with a number: {type_!r}.") diff --git a/tests/test_purl_spec.py b/tests/test_purl_spec.py index be5a6bf..c90e920 100644 --- a/tests/test_purl_spec.py +++ b/tests/test_purl_spec.py @@ -31,6 +31,7 @@ def test_parse(description, input_str, expected_output, expected_failure): if expected_failure: with pytest.raises(Exception): PackageURL.from_string(input_str) + # assert None ==PackageURL.from_string(input_str) else: result = PackageURL.from_string(input_str) assert result.to_string() == expected_output From 61e4bd5d45510d8ac496f28aec3749f0ead03c9e Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 7 Aug 2025 19:06:19 +0530 Subject: [PATCH 09/19] Add tests for purl types Signed-off-by: Tushar Goel --- src/packageurl/__init__.py | 2 +- tests/test_purl_spec.py | 91 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index 05eeef6..0d899fe 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -178,7 +178,7 @@ def normalize_qualifiers( Raise ValueError on errors. """ if not qualifiers: - return None if encode else {} + return None if isinstance(qualifiers, basestring): qualifiers_str = qualifiers if isinstance(qualifiers, str) else qualifiers.decode("utf-8") diff --git a/tests/test_purl_spec.py b/tests/test_purl_spec.py index c90e920..373727f 100644 --- a/tests/test_purl_spec.py +++ b/tests/test_purl_spec.py @@ -1,3 +1,27 @@ +# Copyright (c) the purl authors +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Visit https://github.com/package-url/packageurl-python for support and +# download. + import json import os @@ -57,3 +81,70 @@ def test_build(description, input_dict, expected_output, expected_failure): else: purl = PackageURL(**kwargs) assert purl.to_string() == expected_output + + +def load_spec_files(spec_dir): + """ + Load all JSON files from the given directory into a dictionary. + Key = filename, Value = parsed JSON content + """ + spec_data = {} + for filename in os.listdir(spec_dir): + if filename.endswith("-test.json"): + filepath = os.path.join(spec_dir, filename) + with open(filepath, 'r', encoding='utf-8') as f: + try: + data = json.load(f) + spec_data[filename] = data["tests"] + except json.JSONDecodeError as e: + print(f"Error parsing {filename}: {e}") + return spec_data + + +SPEC_DIR = os.path.join(os.path.dirname(__file__), '..', 'spec', 'tests', 'types') +spec_dict = load_spec_files(SPEC_DIR) + +flattened_cases = [] +for filename, cases in spec_dict.items(): + for case in cases: + flattened_cases.append((filename, case["description"], case)) + + +@pytest.mark.parametrize("filename,description,test_case", flattened_cases) +def test_package_type_case(filename, description, test_case): + test_type = test_case["test_type"] + expected_failure = test_case.get("expected_failure", False) + + if expected_failure: + with pytest.raises(Exception): + run_test_case(test_case, test_type, description) + else: + run_test_case(test_case, test_type, description) + + +def run_test_case(case, test_type, desc): + if test_type == "parse": + purl = PackageURL.from_string(case["input"]) + expected = case["expected_output"] + assert purl.type == expected["type"] + assert purl.namespace == expected["namespace"] + assert purl.name == expected["name"] + assert purl.version == expected["version"] + assert purl.qualifiers == expected["qualifiers"] + assert purl.subpath == expected["subpath"] + + elif test_type == "roundtrip": + purl = PackageURL.from_string(case["input"]) + assert purl.to_string() == case["expected_output"] + + elif test_type == "build": + input_data = case["input"] + purl = PackageURL( + type=input_data["type"], + namespace=input_data["namespace"], + name=input_data["name"], + version=input_data["version"], + qualifiers=input_data.get("qualifiers"), + subpath=input_data.get("subpath"), + ) + assert purl.to_string() == case["expected_output"] From 47dd757b615f70a150114845e58cfbfd637c2594 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 7 Aug 2025 19:07:10 +0530 Subject: [PATCH 10/19] Fix linting issues Signed-off-by: Tushar Goel --- src/packageurl/__init__.py | 24 ++++++++++++++++-------- tests/test_purl_spec.py | 27 +++++++++++++++++---------- 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index 0d899fe..9b3e96f 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -79,11 +79,13 @@ def unquote(s: AnyStr) -> str: @overload -def get_quoter(encode: bool = True) -> Callable[[AnyStr], str]: ... +def get_quoter(encode: bool = True) -> Callable[[AnyStr], str]: + ... @overload -def get_quoter(encode: None) -> Callable[[str], str]: ... +def get_quoter(encode: None) -> Callable[[str], str]: + ... def get_quoter(encode: bool | None = True) -> Callable[[AnyStr], str] | Callable[[str], str]: @@ -153,19 +155,22 @@ def normalize_version(version: AnyStr | None, encode: bool | None = True) -> str @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: Literal[True] = ... -) -> str | None: ... +) -> str | None: + ... @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: Literal[False] | None -) -> dict[str, str]: ... +) -> dict[str, str]: + ... @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: bool | None = ... -) -> str | dict[str, str] | None: ... +) -> str | dict[str, str] | None: + ... def normalize_qualifiers( @@ -256,7 +261,8 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: Literal[True] = ..., -) -> tuple[str, str | None, str, str | None, str | None, str | None]: ... +) -> tuple[str, str | None, str, str | None, str | None, str | None]: + ... @overload @@ -268,7 +274,8 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: Literal[False] | None, -) -> tuple[str, str | None, str, str | None, dict[str, str], str | None]: ... +) -> tuple[str, str | None, str, str | None, dict[str, str], str | None]: + ... @overload @@ -280,7 +287,8 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: bool | None = ..., -) -> tuple[str, str | None, str, str | None, str | dict[str, str] | None, str | None]: ... +) -> tuple[str, str | None, str, str | None, str | dict[str, str] | None, str | None]: + ... def normalize( diff --git a/tests/test_purl_spec.py b/tests/test_purl_spec.py index 373727f..2810ac7 100644 --- a/tests/test_purl_spec.py +++ b/tests/test_purl_spec.py @@ -47,10 +47,14 @@ parse_tests = [t for t in tests if t["test_type"] == "parse"] build_tests = [t for t in tests if t["test_type"] == "build"] -@pytest.mark.parametrize("description, input_str, expected_output, expected_failure", [ - (t["description"], t["input"], t["expected_output"], t["expected_failure"]) - for t in parse_tests -]) + +@pytest.mark.parametrize( + "description, input_str, expected_output, expected_failure", + [ + (t["description"], t["input"], t["expected_output"], t["expected_failure"]) + for t in parse_tests + ], +) def test_parse(description, input_str, expected_output, expected_failure): if expected_failure: with pytest.raises(Exception): @@ -61,10 +65,13 @@ def test_parse(description, input_str, expected_output, expected_failure): assert result.to_string() == expected_output -@pytest.mark.parametrize("description, input_dict, expected_output, expected_failure", [ - (t["description"], t["input"], t["expected_output"], t["expected_failure"]) - for t in build_tests -]) +@pytest.mark.parametrize( + "description, input_dict, expected_output, expected_failure", + [ + (t["description"], t["input"], t["expected_output"], t["expected_failure"]) + for t in build_tests + ], +) def test_build(description, input_dict, expected_output, expected_failure): kwargs = { "type": input_dict.get("type"), @@ -92,7 +99,7 @@ def load_spec_files(spec_dir): for filename in os.listdir(spec_dir): if filename.endswith("-test.json"): filepath = os.path.join(spec_dir, filename) - with open(filepath, 'r', encoding='utf-8') as f: + with open(filepath, "r", encoding="utf-8") as f: try: data = json.load(f) spec_data[filename] = data["tests"] @@ -101,7 +108,7 @@ def load_spec_files(spec_dir): return spec_data -SPEC_DIR = os.path.join(os.path.dirname(__file__), '..', 'spec', 'tests', 'types') +SPEC_DIR = os.path.join(os.path.dirname(__file__), "..", "spec", "tests", "types") spec_dict = load_spec_files(SPEC_DIR) flattened_cases = [] From d563792b2409d3c2e8bd81f553ebfde82dabb79d Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 7 Aug 2025 20:19:59 +0530 Subject: [PATCH 11/19] Update tests Signed-off-by: Tushar Goel --- src/packageurl/__init__.py | 26 +++++++++----------------- tests/test_purl_spec.py | 9 +++++++-- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index 9b3e96f..691ee31 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -79,13 +79,11 @@ def unquote(s: AnyStr) -> str: @overload -def get_quoter(encode: bool = True) -> Callable[[AnyStr], str]: - ... +def get_quoter(encode: bool = True) -> Callable[[AnyStr], str]: ... @overload -def get_quoter(encode: None) -> Callable[[str], str]: - ... +def get_quoter(encode: None) -> Callable[[str], str]: ... def get_quoter(encode: bool | None = True) -> Callable[[AnyStr], str] | Callable[[str], str]: @@ -135,7 +133,7 @@ def normalize_name( quoter = get_quoter(encode) name_str = quoter(name_str) name_str = name_str.strip().strip("/") - if ptype in ("bitbucket", "github", "pypi", "gitlab"): + if ptype in ("bitbucket", "github", "pypi", "gitlab", "huggingface"): name_str = name_str.lower() if ptype == "pypi": name_str = name_str.replace("_", "-") @@ -155,22 +153,19 @@ def normalize_version(version: AnyStr | None, encode: bool | None = True) -> str @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: Literal[True] = ... -) -> str | None: - ... +) -> str | None: ... @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: Literal[False] | None -) -> dict[str, str]: - ... +) -> dict[str, str]: ... @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: bool | None = ... -) -> str | dict[str, str] | None: - ... +) -> str | dict[str, str] | None: ... def normalize_qualifiers( @@ -261,8 +256,7 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: Literal[True] = ..., -) -> tuple[str, str | None, str, str | None, str | None, str | None]: - ... +) -> tuple[str, str | None, str, str | None, str | None, str | None]: ... @overload @@ -274,8 +268,7 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: Literal[False] | None, -) -> tuple[str, str | None, str, str | None, dict[str, str], str | None]: - ... +) -> tuple[str, str | None, str, str | None, dict[str, str], str | None]: ... @overload @@ -287,8 +280,7 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: bool | None = ..., -) -> tuple[str, str | None, str, str | None, str | dict[str, str] | None, str | None]: - ... +) -> tuple[str, str | None, str, str | None, str | dict[str, str] | None, str | None]: ... def normalize( diff --git a/tests/test_purl_spec.py b/tests/test_purl_spec.py index 2810ac7..1bad4a9 100644 --- a/tests/test_purl_spec.py +++ b/tests/test_purl_spec.py @@ -116,6 +116,11 @@ def load_spec_files(spec_dir): for case in cases: flattened_cases.append((filename, case["description"], case)) +def test_cran(): + p = PackageURL(** {'name': 'URI::PackageURL', 'type': + 'cran', 'namespace': None, 'qualifiers': None, 'subpath': None}) + p.to_string() + @pytest.mark.parametrize("filename,description,test_case", flattened_cases) def test_package_type_case(filename, description, test_case): @@ -142,7 +147,7 @@ def run_test_case(case, test_type, desc): elif test_type == "roundtrip": purl = PackageURL.from_string(case["input"]) - assert purl.to_string() == case["expected_output"] + assert purl == PackageURL.from_string(case["expected_output"]) elif test_type == "build": input_data = case["input"] @@ -154,4 +159,4 @@ def run_test_case(case, test_type, desc): qualifiers=input_data.get("qualifiers"), subpath=input_data.get("subpath"), ) - assert purl.to_string() == case["expected_output"] + assert purl == PackageURL.from_string(case["expected_output"]) From eafd33a0fe3ef7a1123fc7f7946aa34c9c74aeee Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 8 Aug 2025 13:21:50 +0530 Subject: [PATCH 12/19] Change tests Signed-off-by: Tushar Goel --- src/packageurl/contrib/purl2url.py | 3 ++- tests/test_purl_spec.py | 8 ++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/packageurl/contrib/purl2url.py b/src/packageurl/contrib/purl2url.py index 5806251..08cc73c 100644 --- a/src/packageurl/contrib/purl2url.py +++ b/src/packageurl/contrib/purl2url.py @@ -78,7 +78,8 @@ def get_download_url(purl): # Fallback on the `download_url` qualifier when available. purl_data = PackageURL.from_string(purl) - return purl_data.qualifiers.get("download_url", None) + if purl_data.qualifiers: + return purl_data.qualifiers.get("download_url", None) def get_inferred_urls(purl): diff --git a/tests/test_purl_spec.py b/tests/test_purl_spec.py index 1bad4a9..a35d0c4 100644 --- a/tests/test_purl_spec.py +++ b/tests/test_purl_spec.py @@ -116,10 +116,6 @@ def load_spec_files(spec_dir): for case in cases: flattened_cases.append((filename, case["description"], case)) -def test_cran(): - p = PackageURL(** {'name': 'URI::PackageURL', 'type': - 'cran', 'namespace': None, 'qualifiers': None, 'subpath': None}) - p.to_string() @pytest.mark.parametrize("filename,description,test_case", flattened_cases) @@ -147,7 +143,7 @@ def run_test_case(case, test_type, desc): elif test_type == "roundtrip": purl = PackageURL.from_string(case["input"]) - assert purl == PackageURL.from_string(case["expected_output"]) + assert purl.to_string() == case["expected_output"] elif test_type == "build": input_data = case["input"] @@ -159,4 +155,4 @@ def run_test_case(case, test_type, desc): qualifiers=input_data.get("qualifiers"), subpath=input_data.get("subpath"), ) - assert purl == PackageURL.from_string(case["expected_output"]) + assert purl.to_string() == case["expected_output"] From 3b9c6dff178c4f81b8d8ea38e1c948c96b95349d Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 12 Aug 2025 14:44:05 +0530 Subject: [PATCH 13/19] Revert changes Signed-off-by: Tushar Goel --- src/packageurl/__init__.py | 2 +- src/packageurl/contrib/purl2url.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index 691ee31..4002d20 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -178,7 +178,7 @@ def normalize_qualifiers( Raise ValueError on errors. """ if not qualifiers: - return None + return None if encode else {} if isinstance(qualifiers, basestring): qualifiers_str = qualifiers if isinstance(qualifiers, str) else qualifiers.decode("utf-8") diff --git a/src/packageurl/contrib/purl2url.py b/src/packageurl/contrib/purl2url.py index 08cc73c..5806251 100644 --- a/src/packageurl/contrib/purl2url.py +++ b/src/packageurl/contrib/purl2url.py @@ -78,8 +78,7 @@ def get_download_url(purl): # Fallback on the `download_url` qualifier when available. purl_data = PackageURL.from_string(purl) - if purl_data.qualifiers: - return purl_data.qualifiers.get("download_url", None) + return purl_data.qualifiers.get("download_url", None) def get_inferred_urls(purl): From 48dee0d0b8f8c4995cd45bf44432ff27c500bb6b Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 13 Aug 2025 02:02:02 +0530 Subject: [PATCH 14/19] Add support for mlflow Signed-off-by: Tushar Goel --- src/packageurl/__init__.py | 33 +++++++++++++++++++++++++++------ tests/test_purl_spec.py | 5 ++++- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index 4002d20..e50427f 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -116,7 +116,7 @@ def normalize_namespace( namespace_str = namespace if isinstance(namespace, str) else namespace.decode("utf-8") namespace_str = namespace_str.strip().strip("/") - if ptype in ("bitbucket", "github", "pypi", "gitlab"): + if ptype in ("bitbucket", "github", "pypi", "gitlab", "composer"): namespace_str = namespace_str.lower() segments = [seg for seg in namespace_str.split("/") if seg.strip()] segments_quoted = map(get_quoter(encode), segments) @@ -124,7 +124,10 @@ def normalize_namespace( def normalize_name( - name: AnyStr | None, ptype: str | None, encode: bool | None = True + name: AnyStr | None, + qualifiers: str | dict | None, + ptype: str | None, + encode: bool | None = True, ) -> str | None: if not name: return None @@ -133,20 +136,38 @@ def normalize_name( quoter = get_quoter(encode) name_str = quoter(name_str) name_str = name_str.strip().strip("/") - if ptype in ("bitbucket", "github", "pypi", "gitlab", "huggingface"): + if ptype and ptype in ("mlflow"): + # MLflow purl names are case-sensitive for Azure ML, it is case sensitive and must be kept as-is in the package URL + # For Databricks, it is case insensitive and must be lowercased in the package URL + if isinstance(qualifiers, dict): + repo_url = qualifiers.get("repository_url") + if repo_url and "azureml" in repo_url.lower(): + return name_str + if repo_url and "databricks" in repo_url.lower(): + return name_str.lower() + if isinstance(qualifiers, str): + if "azureml" in qualifiers.lower(): + return name_str + if "databricks" in qualifiers.lower(): + return name_str.lower() + if ptype in ("bitbucket", "github", "pypi", "gitlab", "composer"): name_str = name_str.lower() if ptype == "pypi": name_str = name_str.replace("_", "-") return name_str or None -def normalize_version(version: AnyStr | None, encode: bool | None = True) -> str | None: +def normalize_version( + version: AnyStr | None, ptype: str | None, encode: bool | None = True +) -> str | None: if not version: return None version_str = version if isinstance(version, str) else version.decode("utf-8") quoter = get_quoter(encode) version_str = quoter(version_str.strip()) + if ptype and ptype in ("huggingface"): + return version_str.lower() return version_str or None @@ -304,8 +325,8 @@ def normalize( """ type_norm = normalize_type(type, encode) namespace_norm = normalize_namespace(namespace, type_norm, encode) - name_norm = normalize_name(name, type_norm, encode) - version_norm = normalize_version(version, encode) + name_norm = normalize_name(name, qualifiers, type_norm, encode) + version_norm = normalize_version(version, type, encode) qualifiers_norm = normalize_qualifiers(qualifiers, encode) subpath_norm = normalize_subpath(subpath, encode) return type_norm, namespace_norm, name_norm, version_norm, qualifiers_norm, subpath_norm diff --git a/tests/test_purl_spec.py b/tests/test_purl_spec.py index a35d0c4..27448c8 100644 --- a/tests/test_purl_spec.py +++ b/tests/test_purl_spec.py @@ -138,7 +138,10 @@ def run_test_case(case, test_type, desc): assert purl.namespace == expected["namespace"] assert purl.name == expected["name"] assert purl.version == expected["version"] - assert purl.qualifiers == expected["qualifiers"] + if expected["qualifiers"]: + assert purl.qualifiers == expected["qualifiers"] + else: + assert not purl.qualifiers assert purl.subpath == expected["subpath"] elif test_type == "roundtrip": From dc77d35dfe99f85d05c162f1c4bbe05414ded8df Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 14 Aug 2025 16:16:11 +0530 Subject: [PATCH 15/19] Fix colon parsing in purl parsing Signed-off-by: Tushar Goel --- src/packageurl/__init__.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index e50427f..4d90f42 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -29,6 +29,7 @@ from collections.abc import Mapping from typing import TYPE_CHECKING from typing import Any +from typing import Optional from typing import Union from typing import overload from urllib.parse import quote as _percent_quote @@ -125,7 +126,7 @@ def normalize_namespace( def normalize_name( name: AnyStr | None, - qualifiers: str | dict | None, + qualifiers: Union[Union[str, bytes], dict[str, str], None], ptype: str | None, encode: bool | None = True, ) -> str | None: @@ -158,7 +159,7 @@ def normalize_name( def normalize_version( - version: AnyStr | None, ptype: str | None, encode: bool | None = True + version: AnyStr | None, ptype: Optional[Union[str, bytes]], encode: bool | None = True ) -> str | None: if not version: return None @@ -166,7 +167,7 @@ def normalize_version( version_str = version if isinstance(version, str) else version.decode("utf-8") quoter = get_quoter(encode) version_str = quoter(version_str.strip()) - if ptype and ptype in ("huggingface"): + if ptype and isinstance(ptype, str) and ptype in ("huggingface"): return version_str.lower() return version_str or None @@ -498,6 +499,8 @@ def from_string(cls, purl: str) -> Self: type_ = type_.lower() + original_remainder = remainder + scheme, authority, path, qualifiers_str, subpath = _urlsplit( url=remainder, scheme="", allow_fragments=True ) @@ -512,7 +515,9 @@ def from_string(cls, purl: str) -> Self: path = authority + ":" + path if scheme: - path = scheme + ":" + path + # This is a way to preserve the casing of the original scheme + original_scheme = original_remainder.split(":", 1)[0] + path = original_scheme + ":" + path path = path.lstrip("/") From 2efa591e36a61d6efd07bbd0a2387f822aca9026 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 14 Aug 2025 16:18:09 +0530 Subject: [PATCH 16/19] Update spec commit Signed-off-by: Tushar Goel --- spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec b/spec index c53ba0e..a627e02 160000 --- a/spec +++ b/spec @@ -1 +1 @@ -Subproject commit c53ba0e2e249939f41ea6de1fa1984e8d831ef68 +Subproject commit a627e02e97b3a43de3938c3d8f67da7a51395578 From c127000ede8f7a4989888abb662320c01a615e83 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 14 Aug 2025 16:19:16 +0530 Subject: [PATCH 17/19] Fix linting Signed-off-by: Tushar Goel --- src/packageurl/__init__.py | 24 ++++++++++++++++-------- tests/test_purl_spec.py | 1 - 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index 4d90f42..335e5f2 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -80,11 +80,13 @@ def unquote(s: AnyStr) -> str: @overload -def get_quoter(encode: bool = True) -> Callable[[AnyStr], str]: ... +def get_quoter(encode: bool = True) -> Callable[[AnyStr], str]: + ... @overload -def get_quoter(encode: None) -> Callable[[str], str]: ... +def get_quoter(encode: None) -> Callable[[str], str]: + ... def get_quoter(encode: bool | None = True) -> Callable[[AnyStr], str] | Callable[[str], str]: @@ -175,19 +177,22 @@ def normalize_version( @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: Literal[True] = ... -) -> str | None: ... +) -> str | None: + ... @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: Literal[False] | None -) -> dict[str, str]: ... +) -> dict[str, str]: + ... @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: bool | None = ... -) -> str | dict[str, str] | None: ... +) -> str | dict[str, str] | None: + ... def normalize_qualifiers( @@ -278,7 +283,8 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: Literal[True] = ..., -) -> tuple[str, str | None, str, str | None, str | None, str | None]: ... +) -> tuple[str, str | None, str, str | None, str | None, str | None]: + ... @overload @@ -290,7 +296,8 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: Literal[False] | None, -) -> tuple[str, str | None, str, str | None, dict[str, str], str | None]: ... +) -> tuple[str, str | None, str, str | None, dict[str, str], str | None]: + ... @overload @@ -302,7 +309,8 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: bool | None = ..., -) -> tuple[str, str | None, str, str | None, str | dict[str, str] | None, str | None]: ... +) -> tuple[str, str | None, str, str | None, str | dict[str, str] | None, str | None]: + ... def normalize( diff --git a/tests/test_purl_spec.py b/tests/test_purl_spec.py index 27448c8..951bbad 100644 --- a/tests/test_purl_spec.py +++ b/tests/test_purl_spec.py @@ -117,7 +117,6 @@ def load_spec_files(spec_dir): flattened_cases.append((filename, case["description"], case)) - @pytest.mark.parametrize("filename,description,test_case", flattened_cases) def test_package_type_case(filename, description, test_case): test_type = test_case["test_type"] From 3a58ba5841e0e8bafec6d81a0840d0b726f050e5 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 14 Aug 2025 16:28:45 +0530 Subject: [PATCH 18/19] Fix linting issues Signed-off-by: Tushar Goel --- src/packageurl/__init__.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index 335e5f2..4d90f42 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -80,13 +80,11 @@ def unquote(s: AnyStr) -> str: @overload -def get_quoter(encode: bool = True) -> Callable[[AnyStr], str]: - ... +def get_quoter(encode: bool = True) -> Callable[[AnyStr], str]: ... @overload -def get_quoter(encode: None) -> Callable[[str], str]: - ... +def get_quoter(encode: None) -> Callable[[str], str]: ... def get_quoter(encode: bool | None = True) -> Callable[[AnyStr], str] | Callable[[str], str]: @@ -177,22 +175,19 @@ def normalize_version( @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: Literal[True] = ... -) -> str | None: - ... +) -> str | None: ... @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: Literal[False] | None -) -> dict[str, str]: - ... +) -> dict[str, str]: ... @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: bool | None = ... -) -> str | dict[str, str] | None: - ... +) -> str | dict[str, str] | None: ... def normalize_qualifiers( @@ -283,8 +278,7 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: Literal[True] = ..., -) -> tuple[str, str | None, str, str | None, str | None, str | None]: - ... +) -> tuple[str, str | None, str, str | None, str | None, str | None]: ... @overload @@ -296,8 +290,7 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: Literal[False] | None, -) -> tuple[str, str | None, str, str | None, dict[str, str], str | None]: - ... +) -> tuple[str, str | None, str, str | None, dict[str, str], str | None]: ... @overload @@ -309,8 +302,7 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: bool | None = ..., -) -> tuple[str, str | None, str, str | None, str | dict[str, str] | None, str | None]: - ... +) -> tuple[str, str | None, str, str | None, str | dict[str, str] | None, str | None]: ... def normalize( From 100b947944cf62b2e7da4e615912b556db1563fb Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 19 Aug 2025 09:53:23 +0530 Subject: [PATCH 19/19] Address review comments Signed-off-by: Tushar Goel --- src/packageurl/__init__.py | 42 +++++++++++++++----------- tests/test_purl_spec.py | 61 +++++++++++++++++--------------------- 2 files changed, 51 insertions(+), 52 deletions(-) diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index 4d90f42..a2c445e 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -124,12 +124,32 @@ def normalize_namespace( return "/".join(segments_quoted) or None +def normalize_mlflow_name( + name_str: str, + qualifiers: Union[str, bytes, dict[str, str], None], +) -> Optional[str]: + """MLflow purl names are case-sensitive for Azure ML, it is case sensitive and must be kept as-is in the package URL + For Databricks, it is case insensitive and must be lowercased in the package URL""" + if isinstance(qualifiers, dict): + repo_url = qualifiers.get("repository_url") + if repo_url and "azureml" in repo_url.lower(): + return name_str + if repo_url and "databricks" in repo_url.lower(): + return name_str.lower() + if isinstance(qualifiers, str): + if "azureml" in qualifiers.lower(): + return name_str + if "databricks" in qualifiers.lower(): + return name_str.lower() + return name_str + + def normalize_name( name: AnyStr | None, qualifiers: Union[Union[str, bytes], dict[str, str], None], ptype: str | None, encode: bool | None = True, -) -> str | None: +) -> Optional[str]: if not name: return None @@ -138,19 +158,7 @@ def normalize_name( name_str = quoter(name_str) name_str = name_str.strip().strip("/") if ptype and ptype in ("mlflow"): - # MLflow purl names are case-sensitive for Azure ML, it is case sensitive and must be kept as-is in the package URL - # For Databricks, it is case insensitive and must be lowercased in the package URL - if isinstance(qualifiers, dict): - repo_url = qualifiers.get("repository_url") - if repo_url and "azureml" in repo_url.lower(): - return name_str - if repo_url and "databricks" in repo_url.lower(): - return name_str.lower() - if isinstance(qualifiers, str): - if "azureml" in qualifiers.lower(): - return name_str - if "databricks" in qualifiers.lower(): - return name_str.lower() + return normalize_mlflow_name(name_str, qualifiers) if ptype in ("bitbucket", "github", "pypi", "gitlab", "composer"): name_str = name_str.lower() if ptype == "pypi": @@ -486,14 +494,12 @@ def from_string(cls, purl: str) -> Self: if not type_ or not sep: raise ValueError(f"purl is missing the required type component: {purl!r}.") - if not all(c in string.ascii_letters + string.digits + "-._" for c in type_): + valid_chars = string.ascii_letters + string.digits + ".-_" + if not all(c in valid_chars for c in type_): raise ValueError( f"purl type must be composed only of ASCII letters and numbers, period, dash and underscore: {type_!r}." ) - if ":" in type_: - raise ValueError(f"purl type cannot contain a colon: {type_!r}.") - if type_[0] in string.digits: raise ValueError(f"purl type cannot start with a number: {type_!r}.") diff --git a/tests/test_purl_spec.py b/tests/test_purl_spec.py index 951bbad..1d02b1f 100644 --- a/tests/test_purl_spec.py +++ b/tests/test_purl_spec.py @@ -33,21 +33,42 @@ root_dir = os.path.abspath(os.path.join(current_dir, "..")) spec_file_path = os.path.join(root_dir, "spec", "tests", "spec", "specification-test.json") -valid_purl_types_file = os.path.join(root_dir, "spec", "purl-types-index.json") - - with open(spec_file_path, "r", encoding="utf-8") as f: test_cases = json.load(f) -with open(valid_purl_types_file, "r", encoding="utf-8") as f: - valid_purl_types = json.load(f) - tests = test_cases["tests"] parse_tests = [t for t in tests if t["test_type"] == "parse"] build_tests = [t for t in tests if t["test_type"] == "build"] +def load_spec_files(spec_dir): + """ + Load all JSON files from the given directory into a dictionary. + Key = filename, Value = parsed JSON content + """ + spec_data = {} + for filename in os.listdir(spec_dir): + if filename.endswith("-test.json"): + filepath = os.path.join(spec_dir, filename) + with open(filepath, "r", encoding="utf-8") as f: + try: + data = json.load(f) + spec_data[filename] = data["tests"] + except json.JSONDecodeError as e: + print(f"Error parsing {filename}: {e}") + return spec_data + + +SPEC_DIR = os.path.join(os.path.dirname(__file__), "..", "spec", "tests", "types") +spec_dict = load_spec_files(SPEC_DIR) + +flattened_cases = [] +for filename, cases in spec_dict.items(): + for case in cases: + flattened_cases.append((filename, case["description"], case)) + + @pytest.mark.parametrize( "description, input_str, expected_output, expected_failure", [ @@ -59,7 +80,6 @@ def test_parse(description, input_str, expected_output, expected_failure): if expected_failure: with pytest.raises(Exception): PackageURL.from_string(input_str) - # assert None ==PackageURL.from_string(input_str) else: result = PackageURL.from_string(input_str) assert result.to_string() == expected_output @@ -90,33 +110,6 @@ def test_build(description, input_dict, expected_output, expected_failure): assert purl.to_string() == expected_output -def load_spec_files(spec_dir): - """ - Load all JSON files from the given directory into a dictionary. - Key = filename, Value = parsed JSON content - """ - spec_data = {} - for filename in os.listdir(spec_dir): - if filename.endswith("-test.json"): - filepath = os.path.join(spec_dir, filename) - with open(filepath, "r", encoding="utf-8") as f: - try: - data = json.load(f) - spec_data[filename] = data["tests"] - except json.JSONDecodeError as e: - print(f"Error parsing {filename}: {e}") - return spec_data - - -SPEC_DIR = os.path.join(os.path.dirname(__file__), "..", "spec", "tests", "types") -spec_dict = load_spec_files(SPEC_DIR) - -flattened_cases = [] -for filename, cases in spec_dict.items(): - for case in cases: - flattened_cases.append((filename, case["description"], case)) - - @pytest.mark.parametrize("filename,description,test_case", flattened_cases) def test_package_type_case(filename, description, test_case): test_type = test_case["test_type"]