diff --git a/etc/scripts/generate_validators.py b/etc/scripts/generate_validators.py new file mode 100644 index 0000000..d3dddcb --- /dev/null +++ b/etc/scripts/generate_validators.py @@ -0,0 +1,267 @@ +# Generate a simple script based on provided list for package types + +""" +{ + "$schema": "https://packageurl.org/schemas/purl-type-definition.schema-1.0.json", + "$id": "https://packageurl.org/types/pypi-definition.json", + "type": "pypi", + "type_name": "PyPI", + "description": "Python packages", + "repository": { + "use_repository": true, + "default_repository_url": "https://pypi.org", + "note": "Previously https://pypi.python.org" + }, + "namespace_definition": { + "requirement": "prohibited", + "note": "there is no namespace" + }, + "name_definition": { + "native_name": "name", + "case_sensitive": false, + "normalization_rules": [ + "Replace underscore _ with dash -", + "Replace dot . with underscore _ when used in distribution (sdist, wheel) names" + ], + "note": "PyPI treats - and _ as the same character and is not case sensitive. Therefore a PyPI package name must be lowercased and underscore _ replaced with a dash -. Note that PyPI itself is preserving the case of package names. When used in distribution and wheel names, the dot . is replaced with an underscore _" + }, + "version_definition": { + "case_sensitive": false, + "native_name": "version" + }, + "qualifiers_definition": [ + { + "key": "file_name", + "requirement": "optional", + "description": "The file_name qualifier selects a particular distribution file (case-sensitive). For naming convention, see the Python Packaging User Guide on source distributions https://packaging.python.org/en/latest/specifications/source-distribution-format/#source-distribution-file-name and on binary distributions https://packaging.python.org/en/latest/specifications/binary-distribution-format/#file-name-convention and the rules for platform compatibility tags https://packaging.python.org/en/latest/specifications/platform-compatibility-tags/" + } + ], + "examples": [ + "pkg:pypi/django@1.11.1", + "pkg:pypi/django@1.11.1?filename=Django-1.11.1.tar.gz", + "pkg:pypi/django@1.11.1?filename=Django-1.11.1-py2.py3-none-any.whl", + "pkg:pypi/django-allauth@12.23" + ] +} +""" +from packageurl import PackageURL +from pathlib import Path +import json + +HEADER = '''# Copyright (c) the purl authors +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Visit https://github.com/package-url/packageurl-python for support and +# download. + +""" +Validate each type according to the PURL spec type definitions +""" + +class TypeValidator: + @classmethod + def validate(cls, purl, strict=False): + if not strict: + purl = cls.normalize(purl) + + if cls.namespace_requirement == "prohibited" and purl.namespace: + yield f"Namespace is prohibited for purl type: {cls.type!r}" + + elif cls.namespace_requirement == "required" and not purl.namespace: + yield f"Namespace is required for purl type: {cls.type!r}" + + if ( + not cls.namespace_case_sensitive + and purl.namespace + and purl.namespace.lower() != purl.namespace + ): + yield f"Namespace is not lowercased for purl type: {cls.type!r}" + + if not cls.name_case_sensitive and purl.name and purl.name.lower() != purl.name: + yield f"Name is not lowercased for purl type: {cls.type!r}" + + if not cls.version_case_sensitive and purl.version and purl.version.lower() != purl.version: + yield f"Version is not lowercased for purl type: {cls.type!r}" + + yield from cls.validate_type(purl, strict=strict) + + @classmethod + def normalize(cls, purl): + from packageurl import PackageURL + from packageurl import normalize + + type_norm, namespace_norm, name_norm, version_norm, qualifiers_norm, subpath_norm = ( + normalize( + purl.type, + purl.namespace, + purl.name, + purl.version, + purl.qualifiers, + purl.subpath, + encode=False, + ) + ) + + return PackageURL( + type=type_norm, + namespace=namespace_norm, + name=name_norm, + version=version_norm, + qualifiers=qualifiers_norm, + subpath=subpath_norm, + ) + + @classmethod + def validate_type(cls, purl, strict=False): + if strict: + yield from cls.validate_qualifiers(purl=purl) + + @classmethod + def validate_qualifiers(cls, purl): + if not purl.qualifiers: + return + + purl_qualifiers_keys = set(purl.qualifiers.keys()) + allowed_qualifiers_set = cls.allowed_qualifiers + + disallowed = purl_qualifiers_keys - allowed_qualifiers_set + + if disallowed: + yield ( + f"Invalid qualifiers found: {', '.join(sorted(disallowed))}. " + f"Allowed qualifiers are: {', '.join(sorted(allowed_qualifiers_set))}" + ) +''' + + +TEMPLATE = """ +class {class_name}({validator_class}): + type = "{type}" + type_name = "{type_name}" + description = '''{description}''' + use_repository = {use_repository} + default_repository_url = "{default_repository_url}" + namespace_requirement = "{namespace_requirement}" + allowed_qualifiers = {allowed_qualifiers} + namespace_case_sensitive = {namespace_case_sensitive} + name_case_sensitive = {name_case_sensitive} + version_case_sensitive = {version_case_sensitive} + purl_pattern = "{purl_pattern}" +""" + + +def generate_validators(): + """ + Generate validators for all package types defined in the packageurl specification. + """ + + base_dir = Path(__file__).parent.parent.parent + + types_dir = base_dir / "spec" / "types" + + script_parts = [HEADER] + + validators_by_type = {} + + for type in sorted(types_dir.glob("*.json")): + type_def = json.loads(type.read_text()) + + _type = type_def["type"] + standard_validator_class = "TypeValidator" + + class_prefix = _type.capitalize() + class_name = f"{class_prefix}{standard_validator_class}" + validators_by_type[_type] = class_name + name_normalization_rules=type_def["name_definition"].get("normalization_rules") or [] + allowed_qualifiers = [defintion.get("key") for defintion in type_def.get("qualifiers_definition") or []] + namespace_case_sensitive = type_def["namespace_definition"].get("case_sensitive") or False + name_case_sensitive = type_def["name_definition"].get("case_sensitive") or False + version_definition = type_def.get("version_definition") or {} + version_case_sensitive = version_definition.get("case_sensitive") or True + repository = type_def.get("repository") + use_repository_url = repository.get("use_repository") or False + + if use_repository_url and "repsitory_url" not in allowed_qualifiers: + allowed_qualifiers.append("repository_url") + + allowed_qualifiers = set(allowed_qualifiers) + + type_validator = TEMPLATE.format(**dict( + class_name=class_name, + validator_class=standard_validator_class, + type=_type, + type_name=type_def["type_name"], + description=type_def["description"], + use_repository=type_def["repository"]["use_repository"], + default_repository_url=type_def["repository"].get("default_repository_url") or "", + namespace_requirement=type_def["namespace_definition"]["requirement"], + name_normalization_rules=name_normalization_rules, + allowed_qualifiers=allowed_qualifiers or [], + namespace_case_sensitive=namespace_case_sensitive, + name_case_sensitive=name_case_sensitive, + version_case_sensitive=version_case_sensitive, + purl_pattern=f"pkg:{_type}/.*" + )) + + script_parts.append(type_validator) + + script_parts.append(generate_validators_by_type(validators_by_type=validators_by_type)) + # script_parts.append(attach_router(validators_by_type.values())) + + validate_script = base_dir / "src" / "packageurl" / "validate.py" + + validate_script.write_text("\n".join(script_parts)) + + +def generate_validators_by_type(validators_by_type): + """ + Return a python snippet that maps a type to it's TypeValidator class + """ + snippets = [] + for type, class_name in validators_by_type.items(): + snippet = f" {type!r} : {class_name}," + snippets.append(snippet) + + snippets = "\n".join(snippets) + start = "VALIDATORS_BY_TYPE = {" + end = "}" + return f"{start}\n{snippets}\n{end}" + +def attach_router(classes): + snippets = [] + for class_name in classes: + snippet = f" {class_name}," + snippets.append(snippet) + snippets = "\n".join(snippets) + start = "PACKAGE_REGISTRY = [ \n" + end = "\n ]" + classes = f"{start}{snippets}{end}" + router_code = ''' +validate_router = Router() + +for pkg_class in PACKAGE_REGISTRY: + validate_router.append(pattern=pkg_class.purl_pattern, endpoint=pkg_class.validate) + ''' + return f"{classes}{router_code}" + + +if __name__ == "__main__": + generate_validators() \ No newline at end of file diff --git a/spec b/spec index a627e02..ce67457 160000 --- a/spec +++ b/spec @@ -1 +1 @@ -Subproject commit a627e02e97b3a43de3938c3d8f67da7a51395578 +Subproject commit ce6745797a85a3121f2f1aef718d52f26d3f6a84 diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index a2c445e..9d6e3e6 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -24,6 +24,7 @@ from __future__ import annotations +import re import string from collections import namedtuple from collections.abc import Mapping @@ -36,6 +37,8 @@ from urllib.parse import unquote as _percent_unquote from urllib.parse import urlsplit as _urlsplit +from packageurl.contrib.route import NoRouteAvailable + if TYPE_CHECKING: from collections.abc import Callable from collections.abc import Iterable @@ -117,8 +120,21 @@ def normalize_namespace( namespace_str = namespace if isinstance(namespace, str) else namespace.decode("utf-8") namespace_str = namespace_str.strip().strip("/") - if ptype in ("bitbucket", "github", "pypi", "gitlab", "composer"): + if ptype in ( + "bitbucket", + "github", + "pypi", + "gitlab", + "composer", + "luarocks", + "qpkg", + "alpm", + "apk", + "hex", + ): namespace_str = namespace_str.lower() + if ptype and ptype in ("cpan"): + namespace_str = namespace_str.upper() segments = [seg for seg in namespace_str.split("/") if seg.strip()] segments_quoted = map(get_quoter(encode), segments) return "/".join(segments_quoted) or None @@ -159,9 +175,24 @@ def normalize_name( name_str = name_str.strip().strip("/") if ptype and ptype in ("mlflow"): return normalize_mlflow_name(name_str, qualifiers) - if ptype in ("bitbucket", "github", "pypi", "gitlab", "composer"): + if ptype in ( + "bitbucket", + "github", + "pypi", + "gitlab", + "composer", + "luarocks", + "oci", + "npm", + "alpm", + "apk", + "bitnami", + "hex", + ): name_str = name_str.lower() if ptype == "pypi": + name_str = name_str.replace("_", "-").lower() + if ptype == "hackage": name_str = name_str.replace("_", "-") return name_str or None @@ -175,7 +206,7 @@ def normalize_version( version_str = version if isinstance(version, str) else version.decode("utf-8") quoter = get_quoter(encode) version_str = quoter(version_str.strip()) - if ptype and isinstance(ptype, str) and ptype in ("huggingface"): + if ptype and isinstance(ptype, str) and ptype in ("huggingface", "oci"): return version_str.lower() return version_str or None @@ -366,6 +397,7 @@ def __new__( version: AnyStr | None = None, qualifiers: AnyStr | dict[str, str] | None = None, subpath: AnyStr | None = None, + normalize_purl: bool = True, ) -> Self: required = dict(type=type, name=name) for key, value in required.items(): @@ -391,23 +423,43 @@ def __new__( f"Invalid purl: qualifiers argument must be a dict or a string: {qualifiers!r}." ) - ( - type_norm, - namespace_norm, - name_norm, - version_norm, - qualifiers_norm, - subpath_norm, - ) = normalize(type, namespace, name, version, qualifiers, subpath, encode=None) + type_final: str + namespace_final: Optional[str] + name_final: str + version_final: Optional[str] + qualifiers_final: dict[str, str] + subpath_final: Optional[str] + + if normalize_purl: + ( + type_final, + namespace_final, + name_final, + version_final, + qualifiers_final, + subpath_final, + ) = normalize(type, namespace, name, version, qualifiers, subpath, encode=None) + else: + from packageurl.utils import ensure_str + + type_final = ensure_str(type) or "" + namespace_final = ensure_str(namespace) + name_final = ensure_str(name) or "" + version_final = ensure_str(version) + if isinstance(qualifiers, dict): + qualifiers_final = qualifiers + else: + qualifiers_final = {} + subpath_final = ensure_str(subpath) return super().__new__( cls, - type=type_norm, - namespace=namespace_norm, - name=name_norm, - version=version_norm, - qualifiers=qualifiers_norm, - subpath=subpath_norm, + type=type_final, + namespace=namespace_final, + name=name_final, + version=version_final, + qualifiers=qualifiers_final, + subpath=subpath_final, ) def __str__(self, *args: Any, **kwargs: Any) -> str: @@ -469,6 +521,22 @@ def to_string(self, encode: bool | None = True) -> str: return "".join(purl) + def validate(self, strict: bool = False) -> list[str]: + """ + Validate this PackageURL object and return a list of validation error messages. + """ + from packageurl.validate import VALIDATORS_BY_TYPE + + if self: + try: + validator_class = VALIDATORS_BY_TYPE.get(self.type) + if not validator_class: + return [f"Given type: {self.type} can not be validated"] + messages = list(validator_class.validate(self, strict)) # type: ignore[no-untyped-call] + return messages + except NoRouteAvailable: + return [f"Given type: {self.type} can not be validated"] + @classmethod def from_string(cls, purl: str) -> Self: """ diff --git a/src/packageurl/utils.py b/src/packageurl/utils.py index 46e3022..855d672 100644 --- a/src/packageurl/utils.py +++ b/src/packageurl/utils.py @@ -24,6 +24,9 @@ # Visit https://github.com/package-url/packageurl-python for support and # download. +from typing import Optional +from typing import Union + from packageurl import PackageURL @@ -51,3 +54,11 @@ def get_golang_purl(go_package: str): name = parts[-1] namespace = "/".join(parts[:-1]) return PackageURL(type="golang", namespace=namespace, name=name, version=version) + + +def ensure_str(value: Optional[Union[str, bytes]]) -> Optional[str]: + if value is None: + return None + if isinstance(value, bytes): + return value.decode("utf-8") # or whatever encoding is right + return value diff --git a/src/packageurl/validate.py b/src/packageurl/validate.py new file mode 100644 index 0000000..87a6e10 --- /dev/null +++ b/src/packageurl/validate.py @@ -0,0 +1,617 @@ +# Copyright (c) the purl authors +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Visit https://github.com/package-url/packageurl-python for support and +# download. + +""" +Validate each type according to the PURL spec type definitions +""" + + +class TypeValidator: + @classmethod + def validate(cls, purl, strict=False): + if not strict: + purl = cls.normalize(purl) + + if cls.namespace_requirement == "prohibited" and purl.namespace: + yield f"Namespace is prohibited for purl type: {cls.type!r}" + + elif cls.namespace_requirement == "required" and not purl.namespace: + yield f"Namespace is required for purl type: {cls.type!r}" + + if purl.type == "cpan": + if purl.namespace and purl.namespace != purl.namespace.upper(): + yield f"Namespace must be uppercase for purl type: {cls.type!r}" + elif ( + not cls.namespace_case_sensitive + and purl.namespace + and purl.namespace.lower() != purl.namespace + ): + yield f"Namespace is not lowercased for purl type: {cls.type!r}" + + if not cls.name_case_sensitive and purl.name and purl.name.lower() != purl.name: + yield f"Name is not lowercased for purl type: {cls.type!r}" + + if not cls.version_case_sensitive and purl.version and purl.version.lower() != purl.version: + yield f"Version is not lowercased for purl type: {cls.type!r}" + + yield from cls.validate_type(purl, strict=strict) + + @classmethod + def normalize(cls, purl): + from packageurl import PackageURL + from packageurl import normalize + + type_norm, namespace_norm, name_norm, version_norm, qualifiers_norm, subpath_norm = ( + normalize( + purl.type, + purl.namespace, + purl.name, + purl.version, + purl.qualifiers, + purl.subpath, + encode=False, + ) + ) + + return PackageURL( + type=type_norm, + namespace=namespace_norm, + name=name_norm, + version=version_norm, + qualifiers=qualifiers_norm, + subpath=subpath_norm, + ) + + @classmethod + def validate_type(cls, purl, strict=False): + if strict: + yield from cls.validate_qualifiers(purl=purl) + + @classmethod + def validate_qualifiers(cls, purl): + if not purl.qualifiers: + return + + purl_qualifiers_keys = set(purl.qualifiers.keys()) + allowed_qualifiers_set = cls.allowed_qualifiers + + disallowed = purl_qualifiers_keys - allowed_qualifiers_set + + if disallowed: + yield ( + f"Invalid qualifiers found: {', '.join(sorted(disallowed))}. " + f"Allowed qualifiers are: {', '.join(sorted(allowed_qualifiers_set))}" + ) + + +class AlpmTypeValidator(TypeValidator): + type = "alpm" + type_name = "Arch Linux package" + description = """Arch Linux packages and other users of the libalpm/pacman package manager.""" + use_repository = True + default_repository_url = "" + namespace_requirement = "required" + allowed_qualifiers = {"repository_url", "arch"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:alpm/.*" + + +class ApkTypeValidator(TypeValidator): + type = "apk" + type_name = "APK-based packages" + description = """Alpine Linux APK-based packages""" + use_repository = True + default_repository_url = "" + namespace_requirement = "required" + allowed_qualifiers = {"repository_url", "arch"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:apk/.*" + + +class BitbucketTypeValidator(TypeValidator): + type = "bitbucket" + type_name = "Bitbucket" + description = """Bitbucket-based packages""" + use_repository = True + default_repository_url = "https://bitbucket.org" + namespace_requirement = "required" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:bitbucket/.*" + + +class BitnamiTypeValidator(TypeValidator): + type = "bitnami" + type_name = "Bitnami" + description = """Bitnami-based packages""" + use_repository = True + default_repository_url = "https://downloads.bitnami.com/files/stacksmith" + namespace_requirement = "prohibited" + allowed_qualifiers = {"distro", "repository_url", "arch"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:bitnami/.*" + + +class CargoTypeValidator(TypeValidator): + type = "cargo" + type_name = "Cargo" + description = """Cargo packages for Rust""" + use_repository = True + default_repository_url = "https://crates.io/" + namespace_requirement = "prohibited" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = False + name_case_sensitive = True + version_case_sensitive = True + purl_pattern = "pkg:cargo/.*" + + +class CocoapodsTypeValidator(TypeValidator): + type = "cocoapods" + type_name = "CocoaPods" + description = """CocoaPods pods""" + use_repository = True + default_repository_url = "https://cdn.cocoapods.org/" + namespace_requirement = "prohibited" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = False + name_case_sensitive = True + version_case_sensitive = True + purl_pattern = "pkg:cocoapods/.*" + + +class ComposerTypeValidator(TypeValidator): + type = "composer" + type_name = "Composer" + description = """Composer PHP packages""" + use_repository = True + default_repository_url = "https://packagist.org" + namespace_requirement = "required" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:composer/.*" + + +class ConanTypeValidator(TypeValidator): + type = "conan" + type_name = "Conan C/C++ packages" + description = """Conan C/C++ packages. The purl is designed to closely resemble the Conan-native /@/ syntax for package references as specified in https://docs.conan.io/en/1.46/cheatsheet.html#package-terminology""" + use_repository = True + default_repository_url = "https://center.conan.io" + namespace_requirement = "optional" + allowed_qualifiers = {"channel", "rrev", "user", "repository_url", "prev"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:conan/.*" + + +class CondaTypeValidator(TypeValidator): + type = "conda" + type_name = "Conda" + description = """conda is for Conda packages""" + use_repository = True + default_repository_url = "https://repo.anaconda.com" + namespace_requirement = "prohibited" + allowed_qualifiers = {"channel", "build", "subdir", "repository_url", "type"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:conda/.*" + + +class CpanTypeValidator(TypeValidator): + type = "cpan" + type_name = "CPAN" + description = """CPAN Perl packages""" + use_repository = True + default_repository_url = "https://www.cpan.org/" + namespace_requirement = "optional" + allowed_qualifiers = {"repository_url", "ext", "vcs_url", "download_url"} + namespace_case_sensitive = False + name_case_sensitive = True + version_case_sensitive = True + purl_pattern = "pkg:cpan/.*" + + @classmethod + def validate_type(cls, purl, strict=False): + if purl.namespace and "::" in purl.name: + yield f"Name must not contain '::' when Namespace is absent for purl type: {cls.type!r}" + if not purl.namespace and "-" in purl.name: + yield f"Name must not contain '-' when Namespace is absent for purl type: {cls.type!r}" + yield from super().validate_type(purl, strict) + + +class CranTypeValidator(TypeValidator): + type = "cran" + type_name = "CRAN" + description = """CRAN R packages""" + use_repository = True + default_repository_url = "https://cran.r-project.org" + namespace_requirement = "prohibited" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = False + name_case_sensitive = True + version_case_sensitive = True + purl_pattern = "pkg:cran/.*" + + +class DebTypeValidator(TypeValidator): + type = "deb" + type_name = "Debian package" + description = """Debian packages, Debian derivatives, and Ubuntu packages""" + use_repository = True + default_repository_url = "" + namespace_requirement = "required" + allowed_qualifiers = {"repository_url", "arch"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:deb/.*" + + +class DockerTypeValidator(TypeValidator): + type = "docker" + type_name = "Docker image" + description = """for Docker images""" + use_repository = True + default_repository_url = "https://hub.docker.com" + namespace_requirement = "optional" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:docker/.*" + + +class GemTypeValidator(TypeValidator): + type = "gem" + type_name = "RubyGems" + description = """RubyGems""" + use_repository = True + default_repository_url = "https://rubygems.org" + namespace_requirement = "prohibited" + allowed_qualifiers = {"repository_url", "platform"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:gem/.*" + + +class GenericTypeValidator(TypeValidator): + type = "generic" + type_name = "Generic Package" + description = """The generic type is for plain, generic packages that do not fit anywhere else such as for "upstream-from-distro" packages. In particular this is handy for a plain version control repository such as a bare git repo in combination with a vcs_url.""" + use_repository = False + default_repository_url = "" + namespace_requirement = "optional" + allowed_qualifiers = {"checksum", "download_url"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:generic/.*" + + +class GithubTypeValidator(TypeValidator): + type = "github" + type_name = "GitHub" + description = """GitHub-based packages""" + use_repository = True + default_repository_url = "https://github.com" + namespace_requirement = "required" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:github/.*" + + +class GolangTypeValidator(TypeValidator): + type = "golang" + type_name = "Go package" + description = """Go packages""" + use_repository = True + default_repository_url = "" + namespace_requirement = "required" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:golang/.*" + + +class HackageTypeValidator(TypeValidator): + type = "hackage" + type_name = "Haskell package" + description = """Haskell packages""" + use_repository = True + default_repository_url = "https://hackage.haskell.org" + namespace_requirement = "prohibited" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = False + name_case_sensitive = True + version_case_sensitive = True + purl_pattern = "pkg:hackage/.*" + + @classmethod + def validate_type(cls, purl, strict=False): + if "_" in purl.name: + yield f"Name contains underscores but should be kebab-case for purl type: {cls.type!r}" + yield from super().validate_type(purl, strict) + + +class HexTypeValidator(TypeValidator): + type = "hex" + type_name = "Hex" + description = """Hex packages""" + use_repository = True + default_repository_url = "https://repo.hex.pm" + namespace_requirement = "optional" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:hex/.*" + + +class HuggingfaceTypeValidator(TypeValidator): + type = "huggingface" + type_name = "HuggingFace models" + description = """Hugging Face ML models""" + use_repository = True + default_repository_url = "" + namespace_requirement = "required" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = True + name_case_sensitive = True + version_case_sensitive = True + purl_pattern = "pkg:huggingface/.*" + + +class LuarocksTypeValidator(TypeValidator): + type = "luarocks" + type_name = "LuaRocks" + description = """Lua packages installed with LuaRocks""" + use_repository = True + default_repository_url = "" + namespace_requirement = "optional" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:luarocks/.*" + + +class MavenTypeValidator(TypeValidator): + type = "maven" + type_name = "Maven" + description = """PURL type for Maven JARs and related artifacts.""" + use_repository = True + default_repository_url = "https://repo.maven.apache.org/maven2/" + namespace_requirement = "required" + allowed_qualifiers = {"repository_url", "type", "classifier"} + namespace_case_sensitive = True + name_case_sensitive = True + version_case_sensitive = True + purl_pattern = "pkg:maven/.*" + + +class MlflowTypeValidator(TypeValidator): + type = "mlflow" + type_name = "" + description = """MLflow ML models (Azure ML, Databricks, etc.)""" + use_repository = True + default_repository_url = "" + namespace_requirement = "prohibited" + allowed_qualifiers = {"repository_url", "run_id", "model_uuid"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:mlflow/.*" + + +class NpmTypeValidator(TypeValidator): + type = "npm" + type_name = "Node NPM packages" + description = """PURL type for npm packages.""" + use_repository = True + default_repository_url = "https://registry.npmjs.org/" + namespace_requirement = "optional" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:npm/.*" + + +class NugetTypeValidator(TypeValidator): + type = "nuget" + type_name = "NuGet" + description = """NuGet .NET packages""" + use_repository = True + default_repository_url = "https://www.nuget.org" + namespace_requirement = "prohibited" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = False + name_case_sensitive = True + version_case_sensitive = True + purl_pattern = "pkg:nuget/.*" + + +class OciTypeValidator(TypeValidator): + type = "oci" + type_name = "OCI image" + description = """For artifacts stored in registries that conform to the OCI Distribution Specification https://github.com/opencontainers/distribution-spec including container images built by Docker and others""" + use_repository = True + default_repository_url = "" + namespace_requirement = "prohibited" + allowed_qualifiers = {"repository_url", "tag", "arch"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:oci/.*" + + +class PubTypeValidator(TypeValidator): + type = "pub" + type_name = "Pub" + description = """Dart and Flutter pub packages""" + use_repository = True + default_repository_url = "https://pub.dartlang.org" + namespace_requirement = "prohibited" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:pub/.*" + + @classmethod + def validate_type(cls, purl, strict=False): + if any(not (c.islower() or c.isdigit() or c == "_") for c in purl.name): + yield f"Name contains invalid characters but should only contain lowercase letters, digits, or underscores for purl type: {cls.type!r}" + if " " in purl.name: + yield f"Name contains spaces but should use underscores instead for purl type: {cls.type!r}" + yield from super().validate_type(purl, strict) + + +class PypiTypeValidator(TypeValidator): + type = "pypi" + type_name = "PyPI" + description = """Python packages""" + use_repository = True + default_repository_url = "https://pypi.org" + namespace_requirement = "prohibited" + allowed_qualifiers = {"file_name", "repository_url"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:pypi/.*" + + @classmethod + def validate_type(cls, purl, strict=False): + if "_" in purl.name: + yield f"Name cannot contain `_` for purl type:{cls.type!r}" + yield from super().validate_type(purl, strict) + + +class QpkgTypeValidator(TypeValidator): + type = "qpkg" + type_name = "QNX package" + description = """QNX packages""" + use_repository = True + default_repository_url = "" + namespace_requirement = "required" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = False + name_case_sensitive = False + version_case_sensitive = True + purl_pattern = "pkg:qpkg/.*" + + +class RpmTypeValidator(TypeValidator): + type = "rpm" + type_name = "RPM" + description = """RPM packages""" + use_repository = True + default_repository_url = "" + namespace_requirement = "required" + allowed_qualifiers = {"repository_url", "arch", "epoch"} + namespace_case_sensitive = False + name_case_sensitive = True + version_case_sensitive = True + purl_pattern = "pkg:rpm/.*" + + +class SwidTypeValidator(TypeValidator): + type = "swid" + type_name = "Software Identification (SWID) Tag" + description = """PURL type for ISO-IEC 19770-2 Software Identification (SWID) tags.""" + use_repository = False + default_repository_url = "" + namespace_requirement = "optional" + allowed_qualifiers = {"tag_creator_name", "tag_creator_regid", "tag_version", "tag_id", "patch"} + namespace_case_sensitive = True + name_case_sensitive = True + version_case_sensitive = True + purl_pattern = "pkg:swid/.*" + + +class SwiftTypeValidator(TypeValidator): + type = "swift" + type_name = "Swift packages" + description = """Swift packages""" + use_repository = True + default_repository_url = "" + namespace_requirement = "required" + allowed_qualifiers = {"repository_url"} + namespace_case_sensitive = True + name_case_sensitive = True + version_case_sensitive = True + purl_pattern = "pkg:swift/.*" + + +VALIDATORS_BY_TYPE = { + "alpm": AlpmTypeValidator, + "apk": ApkTypeValidator, + "bitbucket": BitbucketTypeValidator, + "bitnami": BitnamiTypeValidator, + "cargo": CargoTypeValidator, + "cocoapods": CocoapodsTypeValidator, + "composer": ComposerTypeValidator, + "conan": ConanTypeValidator, + "conda": CondaTypeValidator, + "cpan": CpanTypeValidator, + "cran": CranTypeValidator, + "deb": DebTypeValidator, + "docker": DockerTypeValidator, + "gem": GemTypeValidator, + "generic": GenericTypeValidator, + "github": GithubTypeValidator, + "golang": GolangTypeValidator, + "hackage": HackageTypeValidator, + "hex": HexTypeValidator, + "huggingface": HuggingfaceTypeValidator, + "luarocks": LuarocksTypeValidator, + "maven": MavenTypeValidator, + "mlflow": MlflowTypeValidator, + "npm": NpmTypeValidator, + "nuget": NugetTypeValidator, + "oci": OciTypeValidator, + "pub": PubTypeValidator, + "pypi": PypiTypeValidator, + "qpkg": QpkgTypeValidator, + "rpm": RpmTypeValidator, + "swid": SwidTypeValidator, + "swift": SwiftTypeValidator, +} diff --git a/tests/test_purl_spec.py b/tests/test_purl_spec.py index 1d02b1f..de036f4 100644 --- a/tests/test_purl_spec.py +++ b/tests/test_purl_spec.py @@ -151,3 +151,26 @@ def run_test_case(case, test_type, desc): subpath=input_data.get("subpath"), ) assert purl.to_string() == case["expected_output"] + + elif test_type == "validation": + input_data = case["input"] + test_group = case.get("test_group") + if test_group not in ("base", "advanced"): + raise Exception(test_group) + strict = True + if test_group == "advanced": + strict = False + purl = PackageURL( + type=input_data["type"], + namespace=input_data["namespace"], + name=input_data["name"], + version=input_data["version"], + qualifiers=input_data.get("qualifiers"), + subpath=input_data.get("subpath"), + normalize_purl=not strict, + ) + messages = purl.validate(strict=strict) + if case.get("expected_messages"): + assert messages == case["expected_messages"] + else: + assert not messages