Address review comments

TG1999 · TG1999 · commit 100b947944cf · 2025-08-19T09:53:23.000+05:30
Signed-off-by: Tushar Goel &lt;tushar.goel.dav@gmail.com&gt;
diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py
@@ -124,12 +124,32 @@ def normalize_namespace(
     return "/".join(segments_quoted) or None
 
 
+def normalize_mlflow_name(
+    name_str: str,
+    qualifiers: Union[str, bytes, dict[str, str], None],
+) -> Optional[str]:
+    """MLflow purl names are case-sensitive for Azure ML, it is case sensitive and must be kept as-is in the package URL
+    For Databricks, it is case insensitive and must be lowercased in the package URL"""
+    if isinstance(qualifiers, dict):
+        repo_url = qualifiers.get("repository_url")
+        if repo_url and "azureml" in repo_url.lower():
+            return name_str
+        if repo_url and "databricks" in repo_url.lower():
+            return name_str.lower()
+    if isinstance(qualifiers, str):
+        if "azureml" in qualifiers.lower():
+            return name_str
+        if "databricks" in qualifiers.lower():
+            return name_str.lower()
+    return name_str
+
+
 def normalize_name(
     name: AnyStr | None,
     qualifiers: Union[Union[str, bytes], dict[str, str], None],
     ptype: str | None,
     encode: bool | None = True,
-) -> str | None:
+) -> Optional[str]:
     if not name:
         return None
 
@@ -138,19 +158,7 @@ def normalize_name(
     name_str = quoter(name_str)
     name_str = name_str.strip().strip("/")
     if ptype and ptype in ("mlflow"):
-        # MLflow purl names are case-sensitive for Azure ML, it is case sensitive and must be kept as-is in the package URL
-        # For Databricks, it is case insensitive and must be lowercased in the package URL
-        if isinstance(qualifiers, dict):
-            repo_url = qualifiers.get("repository_url")
-            if repo_url and "azureml" in repo_url.lower():
-                return name_str
-            if repo_url and "databricks" in repo_url.lower():
-                return name_str.lower()
-        if isinstance(qualifiers, str):
-            if "azureml" in qualifiers.lower():
-                return name_str
-            if "databricks" in qualifiers.lower():
-                return name_str.lower()
+        return normalize_mlflow_name(name_str, qualifiers)
     if ptype in ("bitbucket", "github", "pypi", "gitlab", "composer"):
         name_str = name_str.lower()
     if ptype == "pypi":
@@ -486,14 +494,12 @@ def from_string(cls, purl: str) -> Self:
         if not type_ or not sep:
             raise ValueError(f"purl is missing the required type component: {purl!r}.")
 
-        if not all(c in string.ascii_letters + string.digits + "-._" for c in type_):
+        valid_chars = string.ascii_letters + string.digits + ".-_"
+        if not all(c in valid_chars for c in type_):
             raise ValueError(
                 f"purl type must be composed only of ASCII letters and numbers, period, dash and underscore: {type_!r}."
             )
 
-        if ":" in type_:
-            raise ValueError(f"purl type cannot contain a colon: {type_!r}.")
-
         if type_[0] in string.digits:
             raise ValueError(f"purl type cannot start with a number: {type_!r}.")
 
diff --git a/tests/test_purl_spec.py b/tests/test_purl_spec.py
@@ -33,21 +33,42 @@
 root_dir = os.path.abspath(os.path.join(current_dir, ".."))
 spec_file_path = os.path.join(root_dir, "spec", "tests", "spec", "specification-test.json")
 
-valid_purl_types_file = os.path.join(root_dir, "spec", "purl-types-index.json")
-
-
 with open(spec_file_path, "r", encoding="utf-8") as f:
     test_cases = json.load(f)
 
-with open(valid_purl_types_file, "r", encoding="utf-8") as f:
-    valid_purl_types = json.load(f)
-
 tests = test_cases["tests"]
 
 parse_tests = [t for t in tests if t["test_type"] == "parse"]
 build_tests = [t for t in tests if t["test_type"] == "build"]
 
 
+def load_spec_files(spec_dir):
+    """
+    Load all JSON files from the given directory into a dictionary.
+    Key = filename, Value = parsed JSON content
+    """
+    spec_data = {}
+    for filename in os.listdir(spec_dir):
+        if filename.endswith("-test.json"):
+            filepath = os.path.join(spec_dir, filename)
+            with open(filepath, "r", encoding="utf-8") as f:
+                try:
+                    data = json.load(f)
+                    spec_data[filename] = data["tests"]
+                except json.JSONDecodeError as e:
+                    print(f"Error parsing {filename}: {e}")
+    return spec_data
+
+
+SPEC_DIR = os.path.join(os.path.dirname(__file__), "..", "spec", "tests", "types")
+spec_dict = load_spec_files(SPEC_DIR)
+
+flattened_cases = []
+for filename, cases in spec_dict.items():
+    for case in cases:
+        flattened_cases.append((filename, case["description"], case))
+
+
 @pytest.mark.parametrize(
     "description, input_str, expected_output, expected_failure",
     [
@@ -59,7 +80,6 @@ def test_parse(description, input_str, expected_output, expected_failure):
     if expected_failure:
         with pytest.raises(Exception):
             PackageURL.from_string(input_str)
-        # assert None ==PackageURL.from_string(input_str)
     else:
         result = PackageURL.from_string(input_str)
         assert result.to_string() == expected_output
@@ -90,33 +110,6 @@ def test_build(description, input_dict, expected_output, expected_failure):
         assert purl.to_string() == expected_output
 
 
-def load_spec_files(spec_dir):
-    """
-    Load all JSON files from the given directory into a dictionary.
-    Key = filename, Value = parsed JSON content
-    """
-    spec_data = {}
-    for filename in os.listdir(spec_dir):
-        if filename.endswith("-test.json"):
-            filepath = os.path.join(spec_dir, filename)
-            with open(filepath, "r", encoding="utf-8") as f:
-                try:
-                    data = json.load(f)
-                    spec_data[filename] = data["tests"]
-                except json.JSONDecodeError as e:
-                    print(f"Error parsing {filename}: {e}")
-    return spec_data
-
-
-SPEC_DIR = os.path.join(os.path.dirname(__file__), "..", "spec", "tests", "types")
-spec_dict = load_spec_files(SPEC_DIR)
-
-flattened_cases = []
-for filename, cases in spec_dict.items():
-    for case in cases:
-        flattened_cases.append((filename, case["description"], case))
-
-
 @pytest.mark.parametrize("filename,description,test_case", flattened_cases)
 def test_package_type_case(filename, description, test_case):
     test_type = test_case["test_type"]