Skip to content

Commit 2af10cf

Browse files
committed
Add initial support for affected_by_commits and fixed_by_commits
Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent ab99939 commit 2af10cf

File tree

69 files changed

+6508
-3603
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+6508
-3603
lines changed

vulnerabilities/importer.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from vulnerabilities.severity_systems import ScoringSystem
3838
from vulnerabilities.utils import classproperty
3939
from vulnerabilities.utils import get_reference_id
40+
from vulnerabilities.utils import is_commit
4041
from vulnerabilities.utils import is_cve
4142
from vulnerabilities.utils import nearest_patched_package
4243
from vulnerabilities.utils import purl_to_dict
@@ -194,6 +195,57 @@ def from_url(cls, url):
194195
return cls(url=url)
195196

196197

198+
@dataclasses.dataclass(eq=True)
199+
@functools.total_ordering
200+
class Commit:
201+
commit_hash: str
202+
vcs_url: str
203+
204+
commit_rank: Optional[int] = 0
205+
commit_author: Optional[str] = None
206+
commit_message: Optional[str] = None
207+
# commit_date: Optional[datetime] = None
208+
209+
def __post_init__(self):
210+
if not self.commit_hash:
211+
raise ValueError("Commit must have a non-empty commit_hash.")
212+
if not self.vcs_url:
213+
raise ValueError("Commit must have a non-empty vcs_url.")
214+
if not isinstance(self.commit_hash, str):
215+
self.commit_hash = str(self.commit_hash)
216+
217+
def _cmp_key(self):
218+
return (self.commit_rank, self.commit_hash, self.vcs_url)
219+
220+
def __lt__(self, other):
221+
if not isinstance(other, Commit):
222+
return NotImplemented
223+
return self.commit_rank < other.commit_rank
224+
225+
def to_dict(self) -> dict:
226+
"""Return a normalized dictionary representation of the commit."""
227+
return {
228+
"commit_hash": self.commit_hash,
229+
"vcs_url": self.vcs_url,
230+
"commit_rank": self.commit_rank,
231+
"commit_author": self.commit_author,
232+
"commit_message": self.commit_message,
233+
# "commit_date": self.commit_date.isoformat() if self.commit_date else None,
234+
}
235+
236+
@classmethod
237+
def from_dict(cls, data: dict) -> "Commit":
238+
"""Create a Commit instance from a dictionary."""
239+
return cls(
240+
commit_hash=str(data.get("commit_hash", "")),
241+
vcs_url=data.get("vcs_url", ""),
242+
commit_rank=data.get("commit_rank", 0),
243+
commit_author=data.get("commit_author"),
244+
commit_message=data.get("commit_message"),
245+
# commit_date=data.get("commit_date"),
246+
)
247+
248+
197249
class UnMergeablePackageError(Exception):
198250
"""
199251
Raised when a package cannot be merged with another one.
@@ -218,6 +270,8 @@ class AffectedPackage:
218270
package: PackageURL
219271
affected_version_range: Optional[VersionRange] = None
220272
fixed_version: Optional[Version] = None
273+
fixed_by_commits: List[Commit] = dataclasses.field(default_factory=list)
274+
affected_by_commits: List[Commit] = dataclasses.field(default_factory=list)
221275

222276
def __post_init__(self):
223277
if self.package.version:
@@ -248,6 +302,8 @@ def _cmp_key(self):
248302
str(self.package),
249303
str(self.affected_version_range or ""),
250304
str(self.fixed_version or ""),
305+
str(self.affected_by_commits or []),
306+
str(self.fixed_by_commits or []),
251307
)
252308

253309
@classmethod
@@ -294,6 +350,12 @@ def to_dict(self):
294350
"package": purl_to_dict(self.package),
295351
"affected_version_range": affected_version_range,
296352
"fixed_version": str(self.fixed_version) if self.fixed_version else None,
353+
"affected_by_commits": [
354+
affected_by_commit.to_dict() for affected_by_commit in self.affected_by_commits
355+
],
356+
"fixed_by_commits": [
357+
fixed_by_commit.to_dict() for fixed_by_commit in self.fixed_by_commits
358+
],
297359
}
298360

299361
@classmethod
@@ -304,6 +366,8 @@ def from_dict(cls, affected_pkg: dict):
304366
package = PackageURL(**affected_pkg["package"])
305367
affected_version_range = None
306368
affected_range = affected_pkg["affected_version_range"]
369+
affected_by_commits = affected_pkg.get("affected_by_commits") or []
370+
fixed_by_commits = affected_pkg.get("fixed_by_commits") or []
307371

308372
# TODO: "None" is a likely bug
309373
if affected_range and affected_range != "None":
@@ -335,6 +399,12 @@ def from_dict(cls, affected_pkg: dict):
335399
package=package,
336400
affected_version_range=affected_version_range,
337401
fixed_version=fixed_version,
402+
affected_by_commits=[
403+
Commit.from_dict(affected_by_commit) for affected_by_commit in affected_by_commits
404+
],
405+
fixed_by_commits=[
406+
Commit.from_dict(fixed_by_commit) for fixed_by_commit in fixed_by_commits
407+
],
338408
)
339409

340410

@@ -350,6 +420,8 @@ class AffectedPackageV2:
350420
package: PackageURL
351421
affected_version_range: Optional[VersionRange] = None
352422
fixed_version_range: Optional[VersionRange] = None
423+
fixed_by_commits: List[Commit] = dataclasses.field(default_factory=list)
424+
affected_by_commits: List[Commit] = dataclasses.field(default_factory=list)
353425

354426
def __post_init__(self):
355427
if self.package.version:
@@ -372,6 +444,8 @@ def _cmp_key(self):
372444
str(self.package),
373445
str(self.affected_version_range or ""),
374446
str(self.fixed_version_range or ""),
447+
str(self.affected_by_commits or []),
448+
str(self.fixed_by_commits or []),
375449
)
376450

377451
def to_dict(self):
@@ -385,6 +459,12 @@ def to_dict(self):
385459
"package": purl_to_dict(self.package),
386460
"affected_version_range": affected_version_range,
387461
"fixed_version_range": fixed_version_range,
462+
"affected_by_commits": [
463+
affected_by_commit.to_dict() for affected_by_commit in self.affected_by_commits
464+
],
465+
"fixed_by_commits": [
466+
fixed_by_commit.to_dict() for fixed_by_commit in self.fixed_by_commits
467+
],
388468
}
389469

390470
@classmethod
@@ -396,6 +476,8 @@ def from_dict(cls, affected_pkg: dict):
396476
fixed_version_range = None
397477
affected_range = affected_pkg["affected_version_range"]
398478
fixed_range = affected_pkg["fixed_version_range"]
479+
affected_by_commits = affected_pkg.get("affected_by_commits") or []
480+
fixed_by_commits = affected_pkg.get("fixed_by_commits") or []
399481

400482
try:
401483
affected_version_range = VersionRange.from_string(affected_range)
@@ -413,10 +495,27 @@ def from_dict(cls, affected_pkg: dict):
413495
)
414496
return
415497

498+
invalid_fix_commits = [c for c in fixed_by_commits if not is_commit(c.commit_hash)]
499+
invalid_affected_commits = [c for c in affected_by_commits if not is_commit(c.commit_hash)]
500+
501+
if invalid_fix_commits or invalid_affected_commits:
502+
logger.error(
503+
f"Invalid commit hash(es) found. "
504+
f"Invalid fixed_by_commits: {invalid_fix_commits}, "
505+
f"Invalid affected_by_commits: {invalid_affected_commits}"
506+
)
507+
return
508+
416509
return cls(
417510
package=package,
418511
affected_version_range=affected_version_range,
419512
fixed_version_range=fixed_version_range,
513+
affected_by_commits=[
514+
Commit.from_dict(affected_by_commit) for affected_by_commit in affected_by_commits
515+
],
516+
fixed_by_commits=[
517+
Commit.from_dict(fixed_by_commit) for fixed_by_commit in fixed_by_commits
518+
],
420519
)
421520

422521

vulnerabilities/importers/curl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def parse_advisory_data(raw_data) -> AdvisoryData:
9797
... ]
9898
... }
9999
>>> parse_advisory_data(raw_data)
100-
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json', original_advisory_text=None)
100+
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0')),"affected_by_commits": [],"fixed_by_commits": []], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json', original_advisory_text=None)
101101
"""
102102

103103
affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else []

vulnerabilities/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2982,6 +2982,8 @@ def to_dict(self):
29822982
"package": purl_to_dict(self.base_purl),
29832983
"affected_version_range": self.affecting_vers,
29842984
"fixed_version_range": self.fixed_vers,
2985+
"affected_by_commits": [commit.to_dict() for commit in self.affecting_commits.all()],
2986+
"fixed_by_commits": [commit.to_dict() for commit in self.fixed_by_commits.all()],
29852987
}
29862988

29872989
def to_affected_package_data(self):

vulnerabilities/pipes/advisory.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from vulnerabilities.models import AdvisoryWeakness
3030
from vulnerabilities.models import AffectedByPackageRelatedVulnerability
3131
from vulnerabilities.models import Alias
32+
from vulnerabilities.models import CodeCommit
3233
from vulnerabilities.models import FixingPackageRelatedVulnerability
3334
from vulnerabilities.models import Package
3435
from vulnerabilities.models import VulnerabilityReference
@@ -96,6 +97,37 @@ def get_or_create_advisory_weaknesses(weaknesses: List[str]) -> List[AdvisoryWea
9697
return list(AdvisoryWeakness.objects.filter(cwe_id__in=weaknesses))
9798

9899

100+
def get_or_create_advisory_code_commits(code_commits: List) -> List["CodeCommit"]:
101+
"""
102+
Given a list of affected commit objects (each with commit_hash and vcs_url),
103+
create any missing CodeCommit entries and return the full list of CodeCommit objects.
104+
"""
105+
106+
potential_matches = CodeCommit.objects.filter(
107+
vcs_url__in=[v for v, _ in code_commits],
108+
commit_hash__in=[h for _, h in code_commits],
109+
)
110+
111+
existing_commits = {c for c in potential_matches if (c.vcs_url, c.commit_hash) in code_commits}
112+
113+
to_create = [
114+
CodeCommit(
115+
commit_hash=commit.commit_hash,
116+
vcs_url=commit.vcs_url,
117+
commit_author=getattr(commit, "commit_author", None),
118+
commit_message=getattr(commit, "commit_message", None),
119+
commit_date=getattr(commit, "commit_date", None),
120+
)
121+
for commit in code_commits
122+
if commit not in existing_commits
123+
]
124+
125+
if to_create:
126+
CodeCommit.objects.bulk_create(to_create, ignore_conflicts=True)
127+
128+
return list(existing_commits)
129+
130+
99131
def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable = None):
100132
from vulnerabilities.utils import compute_content_id
101133

@@ -216,6 +248,13 @@ def insert_advisory_v2(
216248
impact.affecting_packages.add(*affected_packages_v2)
217249
impact.fixed_by_packages.add(*fixed_packages_v2)
218250

251+
affected_commit_v2 = get_or_create_advisory_code_commits(
252+
affected_pkg.affected_by_commits
253+
)
254+
fixed_commit_v2 = get_or_create_advisory_code_commits(affected_pkg.fixed_by_commits)
255+
impact.affecting_packages.add(*affected_commit_v2)
256+
impact.fixed_by_packages.add(*fixed_commit_v2)
257+
219258
return advisory_obj
220259

221260

vulnerabilities/tests/pipes/test_advisory.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@
1919
from vulnerabilities import models
2020
from vulnerabilities.importer import AdvisoryData
2121
from vulnerabilities.importer import AffectedPackage
22+
from vulnerabilities.importer import Commit
2223
from vulnerabilities.importer import Reference
2324
from vulnerabilities.models import AdvisoryAlias
2425
from vulnerabilities.models import AdvisoryReference
2526
from vulnerabilities.models import AdvisorySeverity
2627
from vulnerabilities.models import AdvisoryWeakness
2728
from vulnerabilities.pipes.advisory import get_or_create_advisory_aliases
29+
from vulnerabilities.pipes.advisory import get_or_create_advisory_code_commits
2830
from vulnerabilities.pipes.advisory import get_or_create_advisory_references
2931
from vulnerabilities.pipes.advisory import get_or_create_advisory_severities
3032
from vulnerabilities.pipes.advisory import get_or_create_advisory_weaknesses
@@ -41,6 +43,8 @@ def setUp(self):
4143
AffectedPackage(
4244
package=PackageURL(type="pypi", name="dummy"),
4345
affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"),
46+
affected_by_commits=[Commit(commit_hash="ab45wa4", vcs_url="http://test")],
47+
fixed_by_commits=[Commit(commit_hash="78wte7", vcs_url="http://test")],
4448
)
4549
],
4650
references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")],
@@ -160,6 +164,20 @@ def advisory_references():
160164
]
161165

162166

167+
@pytest.fixture
168+
def advisory_commit():
169+
return [
170+
Commit(
171+
commit_hash="ef1659c01708b2111d6f06e2aa32f0f9d8768e10",
172+
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
173+
),
174+
Commit(
175+
commit_hash="eccbb45ac2d9c0eb7e22ea82d1fc49f9f4cda818",
176+
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
177+
),
178+
]
179+
180+
163181
@pytest.fixture
164182
def advisory_severities():
165183
class Severity:
@@ -225,3 +243,13 @@ def test_get_or_create_advisory_weaknesses(advisory_weaknesses):
225243
for w in weaknesses:
226244
assert isinstance(w, AdvisoryWeakness)
227245
assert w.cwe_id in advisory_weaknesses
246+
247+
248+
@pytest.mark.django_db
249+
def test_get_or_create_advisory_commit(advisory_commit):
250+
commits = get_or_create_advisory_code_commits(advisory_commit)
251+
assert len(commits) == len(advisory_commit)
252+
for commit in commits:
253+
assert isinstance(commit, Commit)
254+
assert commit.commit_hash in [c.commit_hash for c in advisory_commit]
255+
assert commit.vcs_url in [c.vcs_url for c in advisory_commit]

vulnerabilities/tests/pipes/test_vulnerablecode_importer_pipeline_v2.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
from packageurl import PackageURL
1717
from univers.version_range import VersionRange
1818

19-
from vulnerabilities.importer import AdvisoryData
19+
from vulnerabilities.importer import AdvisoryData, Commit
2020
from vulnerabilities.importer import AffectedPackageV2
21-
from vulnerabilities.models import AdvisoryV2
21+
from vulnerabilities.models import AdvisoryV2, CodeCommit
2222
from vulnerabilities.models import ImpactedPackage
2323
from vulnerabilities.models import PackageV2
2424
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
@@ -51,11 +51,15 @@ def dummy_advisory():
5151
package=PackageURL.from_string("pkg:npm/foobar"),
5252
affected_version_range=VersionRange.from_string("vers:npm/<=1.2.3"),
5353
fixed_version_range=VersionRange.from_string("vers:npm/1.2.4"),
54+
affected_by_commits=[Commit(commit_hash="ab45wa4", vcs_url="http://test")],
55+
fixed_by_commits=[Commit(commit_hash="78wte7", vcs_url="http://test")],
5456
),
5557
AffectedPackageV2(
5658
package=PackageURL.from_string("pkg:npm/foobar"),
5759
affected_version_range=VersionRange.from_string("vers:npm/<=3.2.3"),
5860
fixed_version_range=VersionRange.from_string("vers:npm/3.2.4"),
61+
affected_by_commits=[Commit(commit_hash="ab45wa4", vcs_url="http://test")],
62+
fixed_by_commits=[Commit(commit_hash="78wte7", vcs_url="http://test")],
5963
),
6064
],
6165
advisory_id="ADV-123",
@@ -92,4 +96,5 @@ def test_advisory_import_atomicity(dummy_importer):
9296
dummy_importer.collect_and_store_advisories()
9397
assert AdvisoryV2.objects.count() == 1
9498
assert ImpactedPackage.objects.count() == 2
99+
assert CodeCommit.objects.count() == 2
95100
assert PackageV2.objects.count() == 4

vulnerabilities/tests/test_data/apache_httpd/CVE-1999-1199-apache-httpd-expected.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
"subpath": ""
1515
},
1616
"affected_version_range": "vers:apache/1.3.0|1.3.1|!=1.3.2",
17-
"fixed_version": null
17+
"fixed_version": null,
18+
"affected_by_commits": [],
19+
"fixed_by_commits": []
1820
}
1921
],
2022
"references": [

vulnerabilities/tests/test_data/apache_httpd/CVE-2017-9798-apache-httpd-expected.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
"subpath": ""
1515
},
1616
"affected_version_range": "vers:apache/2.2.0|2.2.2|2.2.3|2.2.4|2.2.5|2.2.6|2.2.8|2.2.9|2.2.10|2.2.11|2.2.12|2.2.13|2.2.14|2.2.15|2.2.16|2.2.17|2.2.18|2.2.19|2.2.20|2.2.21|2.2.22|2.2.23|2.2.24|2.2.25|2.2.26|2.2.27|2.2.29|2.2.31|2.2.32|2.2.34|2.4.1|2.4.2|2.4.3|2.4.4|2.4.6|2.4.7|2.4.9|2.4.10|2.4.12|2.4.16|2.4.17|2.4.18|2.4.20|2.4.23|2.4.25|2.4.26|2.4.27|!=2.4.28",
17-
"fixed_version": null
17+
"fixed_version": null,
18+
"affected_by_commits": [],
19+
"fixed_by_commits": []
1820
}
1921
],
2022
"references": [

vulnerabilities/tests/test_data/apache_httpd/CVE-2021-44224-apache-httpd-expected.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
"subpath": ""
1515
},
1616
"affected_version_range": "vers:apache/>=2.4.7|<=2.4.51|!=2.4.52",
17-
"fixed_version": null
17+
"fixed_version": null,
18+
"affected_by_commits": [],
19+
"fixed_by_commits": []
1820
}
1921
],
2022
"references": [

0 commit comments

Comments
 (0)