From bfab4895a7378d37b1d215ab67770822ca098f11 Mon Sep 17 00:00:00 2001 From: Joshua Kugler Date: Wed, 5 Feb 2025 13:28:18 -0900 Subject: [PATCH 1/3] feat: fix parsing of names and namespaces with colons This attempts to comply with the specification. Closes #152 (I think) See discussion at https://github.com/package-url/packageurl-python/issues/152 Signed-off-by: Joshua Kugler --- src/packageurl/__init__.py | 41 +++++++++++++++++++++++------------ tests/test_packageurl.py | 44 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 14 deletions(-) diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index 8199e39..19ca118 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -77,11 +77,13 @@ def unquote(s: AnyStr) -> str: @overload -def get_quoter(encode: bool = True) -> Callable[[AnyStr], str]: ... +def get_quoter(encode: bool = True) -> Callable[[AnyStr], str]: + ... @overload -def get_quoter(encode: None) -> Callable[[str], str]: ... +def get_quoter(encode: None) -> Callable[[str], str]: + ... def get_quoter(encode: bool | None = True) -> Callable[[AnyStr], str] | Callable[[str], str]: @@ -151,19 +153,22 @@ def normalize_version(version: AnyStr | None, encode: bool | None = True) -> str @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: Literal[True] = ... -) -> str | None: ... +) -> str | None: + ... @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: Literal[False] | None -) -> dict[str, str]: ... +) -> dict[str, str]: + ... @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: bool | None = ... -) -> str | dict[str, str] | None: ... +) -> str | dict[str, str] | None: + ... def normalize_qualifiers( @@ -251,7 +256,8 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: Literal[True] = ..., -) -> tuple[str, str | None, str, str | None, str | None, str | None]: ... +) -> tuple[str, str | None, str, str | None, str | None, str | None]: + ... @overload @@ -263,7 +269,8 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: Literal[False] | None, -) -> tuple[str, str | None, str, str | None, dict[str, str], str | None]: ... +) -> tuple[str, str | None, str, str | None, dict[str, str], str | None]: + ... @overload @@ -275,7 +282,8 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: bool | None = ..., -) -> tuple[str, str | None, str, str | None, str | dict[str, str] | None, str | None]: ... +) -> tuple[str, str | None, str, str | None, str | dict[str, str] | None, str | None]: + ... def normalize( @@ -459,12 +467,17 @@ def from_string(cls, purl: str) -> Self: url=remainder, scheme="", allow_fragments=True ) - if scheme or authority: - msg = ( - f'Invalid purl {purl!r} cannot contain a "user:pass@host:port" ' - f"URL Authority component: {authority!r}." - ) - raise ValueError(msg) + # The spec (seems) to allow colons in the name and namespace. + # urllib.urlsplit splits on : considers them parts of scheme + # and authority. + # Other libraries do not care about this. + # See https://github.com/package-url/packageurl-python/issues/152#issuecomment-2637692538 + # We do + ":" + to put the colon back that urlsplit removed. + if authority: + path = authority + ":" + path + + if scheme: + path = scheme + ":" + path path = path.lstrip("/") diff --git a/tests/test_packageurl.py b/tests/test_packageurl.py index cb419e2..54885ae 100644 --- a/tests/test_packageurl.py +++ b/tests/test_packageurl.py @@ -330,3 +330,47 @@ def test_to_dict_custom_empty_value(self): def test_purl_is_hashable(): s = {PackageURL(name="hashable", type="pypi")} assert len(s) == 1 + + +def test_colons_in_name_are_handled_correctly() -> None: + p = PackageURL.from_string( + "pkg:nuget/libiconv:%20character%20set%20conversion%20library@1.9?package-id=e11a609df352e292" + ) + + assert p.type == "nuget" + assert p.namespace is None + assert p.name == "libiconv: character set conversion library" + assert p.version == "1.9" + assert p.qualifiers == {"package-id": "e11a609df352e292"} + assert p.subpath == None + + assert PackageURL.from_string(p.to_string()).to_string() == p.to_string() + + +def test_colons_in_namespace_are_handled_correctly() -> None: + p = PackageURL.from_string( + "pkg:nuget/an:odd:space/libiconv:%20character%20set%20conversion%20library@1.9?package-id=e11a609df352e292" + ) + + assert p.type == "nuget" + assert p.namespace == "an:odd:space" + assert p.name == "libiconv: character set conversion library" + assert p.version == "1.9" + assert p.qualifiers == {"package-id": "e11a609df352e292"} + assert p.subpath == None + + assert PackageURL.from_string(p.to_string()).to_string() == p.to_string() + + +def test_encoding_stuff_with_colons_correctly() -> None: + p = PackageURL( + type="nuget", + namespace="an:odd:space", + name="libiconv: character set conversion library", + version="1.9", + qualifiers={"package-id": "e11a609df352e292"}, + ) + assert ( + p.to_string() + == "pkg:nuget/an:odd:space/libiconv:%20character%20set%20conversion%20library@1.9?package-id=e11a609df352e292" + ) From 714acd7d04bfee12d98d540ff1f20fa8ecf7681d Mon Sep 17 00:00:00 2001 From: Joshua Kugler Date: Wed, 12 Feb 2025 17:24:41 -0900 Subject: [PATCH 2/3] Ran `make valid` Change a bunch of things I never touched, but ok... :) Signed-off-by: Joshua Kugler --- src/packageurl/__init__.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index 19ca118..9677b73 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -77,13 +77,11 @@ def unquote(s: AnyStr) -> str: @overload -def get_quoter(encode: bool = True) -> Callable[[AnyStr], str]: - ... +def get_quoter(encode: bool = True) -> Callable[[AnyStr], str]: ... @overload -def get_quoter(encode: None) -> Callable[[str], str]: - ... +def get_quoter(encode: None) -> Callable[[str], str]: ... def get_quoter(encode: bool | None = True) -> Callable[[AnyStr], str] | Callable[[str], str]: @@ -153,22 +151,19 @@ def normalize_version(version: AnyStr | None, encode: bool | None = True) -> str @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: Literal[True] = ... -) -> str | None: - ... +) -> str | None: ... @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: Literal[False] | None -) -> dict[str, str]: - ... +) -> dict[str, str]: ... @overload def normalize_qualifiers( qualifiers: AnyStr | dict[str, str] | None, encode: bool | None = ... -) -> str | dict[str, str] | None: - ... +) -> str | dict[str, str] | None: ... def normalize_qualifiers( @@ -256,8 +251,7 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: Literal[True] = ..., -) -> tuple[str, str | None, str, str | None, str | None, str | None]: - ... +) -> tuple[str, str | None, str, str | None, str | None, str | None]: ... @overload @@ -269,8 +263,7 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: Literal[False] | None, -) -> tuple[str, str | None, str, str | None, dict[str, str], str | None]: - ... +) -> tuple[str, str | None, str, str | None, dict[str, str], str | None]: ... @overload @@ -282,8 +275,7 @@ def normalize( qualifiers: AnyStr | dict[str, str] | None, subpath: AnyStr | None, encode: bool | None = ..., -) -> tuple[str, str | None, str, str | None, str | dict[str, str] | None, str | None]: - ... +) -> tuple[str, str | None, str, str | None, str | dict[str, str] | None, str | None]: ... def normalize( From 1c3baf71a1b6c7509c648405cdfb4743067e3a85 Mon Sep 17 00:00:00 2001 From: Joshua Kugler Date: Tue, 18 Feb 2025 11:19:11 -0900 Subject: [PATCH 3/3] Fixed a couple tests to use is instead of equals Signed-off-by: Joshua Kugler --- tests/test_packageurl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_packageurl.py b/tests/test_packageurl.py index 54885ae..5d14220 100644 --- a/tests/test_packageurl.py +++ b/tests/test_packageurl.py @@ -342,7 +342,7 @@ def test_colons_in_name_are_handled_correctly() -> None: assert p.name == "libiconv: character set conversion library" assert p.version == "1.9" assert p.qualifiers == {"package-id": "e11a609df352e292"} - assert p.subpath == None + assert p.subpath is None assert PackageURL.from_string(p.to_string()).to_string() == p.to_string() @@ -357,7 +357,7 @@ def test_colons_in_namespace_are_handled_correctly() -> None: assert p.name == "libiconv: character set conversion library" assert p.version == "1.9" assert p.qualifiers == {"package-id": "e11a609df352e292"} - assert p.subpath == None + assert p.subpath is None assert PackageURL.from_string(p.to_string()).to_string() == p.to_string()