Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions src/packageurl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,12 +459,17 @@ def from_string(cls, purl: str) -> Self:
url=remainder, scheme="", allow_fragments=True
)

if scheme or authority:
msg = (
f'Invalid purl {purl!r} cannot contain a "user:pass@host:port" '
f"URL Authority component: {authority!r}."
)
raise ValueError(msg)
# The spec (seems) to allow colons in the name and namespace.
# urllib.urlsplit splits on : considers them parts of scheme
# and authority.
# Other libraries do not care about this.
# See https://github.com/package-url/packageurl-python/issues/152#issuecomment-2637692538
# We do + ":" + to put the colon back that urlsplit removed.
if authority:
path = authority + ":" + path

if scheme:
path = scheme + ":" + path

path = path.lstrip("/")

Expand Down
44 changes: 44 additions & 0 deletions tests/test_packageurl.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,3 +330,47 @@ def test_to_dict_custom_empty_value(self):
def test_purl_is_hashable():
s = {PackageURL(name="hashable", type="pypi")}
assert len(s) == 1


def test_colons_in_name_are_handled_correctly() -> None:
p = PackageURL.from_string(
"pkg:nuget/libiconv:%20character%20set%20conversion%20library@1.9?package-id=e11a609df352e292"
)

assert p.type == "nuget"
assert p.namespace is None
assert p.name == "libiconv: character set conversion library"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a real name, seen in the wild? I do not think this would be a valid NuGet name.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is the name that started me down this nightmare of journey. :) It's a name picked up by syft in a binary component.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is the name that started me down this nightmare of journey. :) It's a name picked up by syft in a binary component.

Do you have a URL to this? Because afaik this is not a legal NuGet name. It could be a bug in syft, which is known to create incorrect PURLs.

assert p.version == "1.9"
assert p.qualifiers == {"package-id": "e11a609df352e292"}
assert p.subpath is None

assert PackageURL.from_string(p.to_string()).to_string() == p.to_string()


def test_colons_in_namespace_are_handled_correctly() -> None:
p = PackageURL.from_string(
"pkg:nuget/an:odd:space/libiconv:%20character%20set%20conversion%20library@1.9?package-id=e11a609df352e292"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you use actual real life data rather than made up ones? In all cases the colon should be encoded there IMHO.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't seen a : in a namespace, but just in a name. I used a real-life name in this test that I had found in an SBOM that was given to us.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, so you saw a colon in a namespace in a PURL? I would really like to see that exact PURL and which tool did create that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not have a namespace with a colon, but I did see a name with a colon.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sjn you have :: colons in CPAN, do you?

)

assert p.type == "nuget"
assert p.namespace == "an:odd:space"
assert p.name == "libiconv: character set conversion library"
assert p.version == "1.9"
assert p.qualifiers == {"package-id": "e11a609df352e292"}
assert p.subpath is None

assert PackageURL.from_string(p.to_string()).to_string() == p.to_string()


def test_encoding_stuff_with_colons_correctly() -> None:
p = PackageURL(
type="nuget",
namespace="an:odd:space",
name="libiconv: character set conversion library",
version="1.9",
qualifiers={"package-id": "e11a609df352e292"},
)
assert (
p.to_string()
== "pkg:nuget/an:odd:space/libiconv:%20character%20set%20conversion%20library@1.9?package-id=e11a609df352e292"
)
Loading