Skip to content

Commit a8ec9f1

Browse files
committed
Update affected_by_commits and fixed_by_commits to be separate fields in Advisory
Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent ab99939 commit a8ec9f1

File tree

8 files changed

+213
-2
lines changed

8 files changed

+213
-2
lines changed

vulnerabilities/importer.py

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@
3737
from vulnerabilities.severity_systems import ScoringSystem
3838
from vulnerabilities.utils import classproperty
3939
from vulnerabilities.utils import get_reference_id
40+
from vulnerabilities.utils import is_commit
4041
from vulnerabilities.utils import is_cve
41-
from vulnerabilities.utils import nearest_patched_package
4242
from vulnerabilities.utils import purl_to_dict
4343
from vulnerabilities.utils import update_purl_version
4444

@@ -194,6 +194,60 @@ def from_url(cls, url):
194194
return cls(url=url)
195195

196196

197+
@dataclasses.dataclass(eq=True)
198+
@functools.total_ordering
199+
class CodeCommitData:
200+
commit_hash: str
201+
vcs_url: str
202+
203+
commit_author: Optional[str] = None
204+
commit_message: Optional[str] = None
205+
commit_date: Optional[datetime.datetime] = None
206+
207+
def __post_init__(self):
208+
if not self.commit_hash:
209+
raise ValueError("Commit must have a non-empty commit_hash.")
210+
211+
if not is_commit(self.commit_hash):
212+
raise ValueError("Commit must be a valid a commit_hash.")
213+
214+
if not self.vcs_url:
215+
raise ValueError("Commit must have a non-empty vcs_url.")
216+
if not isinstance(self.commit_hash, str):
217+
self.commit_hash = str(self.commit_hash)
218+
219+
def __lt__(self, other):
220+
if not isinstance(other, CodeCommitData):
221+
return NotImplemented
222+
return self._cmp_key() < other._cmp_key()
223+
224+
# TODO: Add cache
225+
def _cmp_key(self):
226+
return (self.commit_hash, self.vcs_url, self.commit_author, self.commit_message)
227+
228+
def to_dict(self) -> dict:
229+
"""Return a normalized dictionary representation of the commit."""
230+
return {
231+
"commit_hash": self.commit_hash,
232+
"vcs_url": self.vcs_url,
233+
"commit_author": self.commit_author,
234+
"commit_message": self.commit_message,
235+
"commit_date": self.commit_date,
236+
}
237+
238+
@classmethod
239+
def from_dict(cls, data: dict):
240+
"""Create a Commit instance from a dictionary."""
241+
commit_date = data.get("commit_date")
242+
return cls(
243+
commit_hash=str(data.get("commit_hash", "")),
244+
vcs_url=data.get("vcs_url", ""),
245+
commit_author=data.get("commit_author"),
246+
commit_message=data.get("commit_message"),
247+
commit_date=datetime.datetime.fromisoformat(commit_date) if commit_date else None,
248+
)
249+
250+
197251
class UnMergeablePackageError(Exception):
198252
"""
199253
Raised when a package cannot be merged with another one.
@@ -444,6 +498,8 @@ class AdvisoryData:
444498
date_published: Optional[datetime.datetime] = None
445499
weaknesses: List[int] = dataclasses.field(default_factory=list)
446500
severities: List[VulnerabilitySeverity] = dataclasses.field(default_factory=list)
501+
fixed_by_commits: List[CodeCommitData] = dataclasses.field(default_factory=list)
502+
affected_by_commits: List[CodeCommitData] = dataclasses.field(default_factory=list)
447503
url: Optional[str] = None
448504
original_advisory_text: Optional[str] = None
449505

@@ -476,6 +532,12 @@ def to_dict(self):
476532
"severities": [sev.to_dict() for sev in self.severities],
477533
"date_published": self.date_published.isoformat() if self.date_published else None,
478534
"weaknesses": self.weaknesses,
535+
"affected_by_commits": [
536+
affected_by_commit.to_dict() for affected_by_commit in self.affected_by_commits
537+
],
538+
"fixed_by_commits": [
539+
fixed_by_commit.to_dict() for fixed_by_commit in self.fixed_by_commits
540+
],
479541
"url": self.url if self.url else "",
480542
}
481543
return {
@@ -537,6 +599,19 @@ class AdvisoryDataV2:
537599
weaknesses: List[int] = dataclasses.field(default_factory=list)
538600
url: Optional[str] = None
539601

602+
# TODO
603+
# Update from_dict and to_dict methods
604+
# Update compute_checksum method
605+
# Update BaseV2 Importer Pipeline
606+
# Change related tests
607+
# Add tests for these newly introduced fields
608+
# have a strong test for insert_advisory_v2 method
609+
# CodeCommitData importer
610+
# remove commit_rank from CodeCommitData importer
611+
612+
fixed_by_commits: List[CodeCommitData] = dataclasses.field(default_factory=list)
613+
affected_by_commits: List[CodeCommitData] = dataclasses.field(default_factory=list)
614+
540615
def __post_init__(self):
541616
if self.date_published and not self.date_published.tzinfo:
542617
logger.warning(f"AdvisoryData with no tzinfo: {self!r}")
@@ -559,6 +634,12 @@ def to_dict(self):
559634
"references": [ref.to_dict() for ref in self.references],
560635
"date_published": self.date_published.isoformat() if self.date_published else None,
561636
"weaknesses": self.weaknesses,
637+
"affected_by_commits": [
638+
affected_by_commit.to_dict() for affected_by_commit in self.affected_by_commits
639+
],
640+
"fixed_by_commits": [
641+
fixed_by_commit.to_dict() for fixed_by_commit in self.fixed_by_commits
642+
],
562643
"url": self.url if self.url else "",
563644
}
564645

@@ -578,6 +659,14 @@ def from_dict(cls, advisory_data):
578659
if date_published
579660
else None,
580661
"weaknesses": advisory_data["weaknesses"],
662+
"affected_by_commits": [
663+
CodeCommitData.from_dict(affected_by_commit)
664+
for affected_by_commit in advisory_data["affected_by_commits"]
665+
],
666+
"fixed_by_commits": [
667+
CodeCommitData.from_dict(fixed_by_commit)
668+
for fixed_by_commit in advisory_data["fixed_by_commits"]
669+
],
581670
"url": advisory_data.get("url") or None,
582671
}
583672
return cls(**transformed)

vulnerabilities/importers/curl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def parse_advisory_data(raw_data) -> AdvisoryData:
9797
... ]
9898
... }
9999
>>> parse_advisory_data(raw_data)
100-
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json', original_advisory_text=None)
100+
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], fixed_by_commits=[], affected_by_commits=[], url='https://curl.se/docs/CVE-2024-2379.json', original_advisory_text=None)
101101
"""
102102

103103
affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else []

vulnerabilities/pipes/advisory.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from typing import Union
1818

1919
from django.db import transaction
20+
from django.db.models import Q
2021
from django.db.models.query import QuerySet
2122

2223
from vulnerabilities.importer import AdvisoryData
@@ -29,6 +30,7 @@
2930
from vulnerabilities.models import AdvisoryWeakness
3031
from vulnerabilities.models import AffectedByPackageRelatedVulnerability
3132
from vulnerabilities.models import Alias
33+
from vulnerabilities.models import CodeCommit
3234
from vulnerabilities.models import FixingPackageRelatedVulnerability
3335
from vulnerabilities.models import Package
3436
from vulnerabilities.models import VulnerabilityReference
@@ -96,6 +98,42 @@ def get_or_create_advisory_weaknesses(weaknesses: List[str]) -> List[AdvisoryWea
9698
return list(AdvisoryWeakness.objects.filter(cwe_id__in=weaknesses))
9799

98100

101+
def get_or_create_advisory_code_commits(code_commits_data: List) -> List["CodeCommit"]:
102+
"""
103+
Given a list of commit-like objects (each with commit_hash and vcs_url),
104+
create any missing CodeCommit entries and return the full list of CodeCommit objects.
105+
"""
106+
if not code_commits_data:
107+
return []
108+
109+
pairs = [(c.commit_hash, c.vcs_url) for c in code_commits_data]
110+
111+
query = Q()
112+
for commit_hash, vcs_url in pairs:
113+
query |= Q(commit_hash=commit_hash, vcs_url=vcs_url)
114+
115+
existing_commits_qs = CodeCommit.objects.filter(query)
116+
existing_pairs = set(existing_commits_qs.values_list("commit_hash", "vcs_url"))
117+
118+
to_create = [
119+
CodeCommit(
120+
commit_hash=c.commit_hash,
121+
vcs_url=c.vcs_url,
122+
commit_author=getattr(c, "commit_author", None),
123+
commit_message=getattr(c, "commit_message", None),
124+
commit_date=getattr(c, "commit_date", None),
125+
)
126+
for c in code_commits_data
127+
if (c.commit_hash, c.vcs_url) not in existing_pairs
128+
]
129+
130+
if to_create:
131+
CodeCommit.objects.bulk_create(to_create, ignore_conflicts=True)
132+
133+
all_commits = CodeCommit.objects.filter(query)
134+
return list(all_commits)
135+
136+
99137
def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable = None):
100138
from vulnerabilities.utils import compute_content_id
101139

@@ -150,6 +188,9 @@ def insert_advisory_v2(
150188
severities = get_or_create_advisory_severities(severities=advisory.severities)
151189
weaknesses = get_or_create_advisory_weaknesses(weaknesses=advisory.weaknesses)
152190
content_id = compute_content_id(advisory_data=advisory)
191+
affected_by_commits = get_or_create_advisory_code_commits(advisory.affected_by_commits)
192+
fixed_by_commits = get_or_create_advisory_code_commits(advisory.fixed_by_commits)
193+
153194
try:
154195
default_data = {
155196
"datasource_id": pipeline_id,
@@ -216,6 +257,9 @@ def insert_advisory_v2(
216257
impact.affecting_packages.add(*affected_packages_v2)
217258
impact.fixed_by_packages.add(*fixed_packages_v2)
218259

260+
impact.affecting_commits.add(*affected_by_commits)
261+
impact.fixed_by_commits.add(*fixed_by_commits)
262+
219263
return advisory_obj
220264

221265

vulnerabilities/tests/pipes/test_advisory.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,15 @@
1919
from vulnerabilities import models
2020
from vulnerabilities.importer import AdvisoryData
2121
from vulnerabilities.importer import AffectedPackage
22+
from vulnerabilities.importer import CodeCommitData
2223
from vulnerabilities.importer import Reference
2324
from vulnerabilities.models import AdvisoryAlias
2425
from vulnerabilities.models import AdvisoryReference
2526
from vulnerabilities.models import AdvisorySeverity
2627
from vulnerabilities.models import AdvisoryWeakness
28+
from vulnerabilities.models import CodeCommit
2729
from vulnerabilities.pipes.advisory import get_or_create_advisory_aliases
30+
from vulnerabilities.pipes.advisory import get_or_create_advisory_code_commits
2831
from vulnerabilities.pipes.advisory import get_or_create_advisory_references
2932
from vulnerabilities.pipes.advisory import get_or_create_advisory_severities
3033
from vulnerabilities.pipes.advisory import get_or_create_advisory_weaknesses
@@ -44,6 +47,18 @@ def setUp(self):
4447
)
4548
],
4649
references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")],
50+
affected_by_commits=[
51+
CodeCommitData(
52+
commit_hash="9ff29db8ec3adefefce0d37c3c9b5b2c22e59fac",
53+
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
54+
)
55+
],
56+
fixed_by_commits=[
57+
CodeCommitData(
58+
commit_hash="9ff29db8ec3adefefce0d37c3c9b5b2c22e59fac",
59+
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
60+
)
61+
],
4762
date_published=timezone.now(),
4863
url="https://test.com",
4964
)
@@ -160,6 +175,23 @@ def advisory_references():
160175
]
161176

162177

178+
@pytest.fixture
179+
def advisory_commit():
180+
return [
181+
CodeCommitData(
182+
commit_hash="ef1659c01708b2111d6f06e2aa32f0f9d8768e10",
183+
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
184+
commit_author="tester1",
185+
commit_message="message1",
186+
commit_date=datetime.now(),
187+
),
188+
CodeCommitData(
189+
commit_hash="eccbb45ac2d9c0eb7e22ea82d1fc49f9f4cda818",
190+
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
191+
),
192+
]
193+
194+
163195
@pytest.fixture
164196
def advisory_severities():
165197
class Severity:
@@ -225,3 +257,13 @@ def test_get_or_create_advisory_weaknesses(advisory_weaknesses):
225257
for w in weaknesses:
226258
assert isinstance(w, AdvisoryWeakness)
227259
assert w.cwe_id in advisory_weaknesses
260+
261+
262+
@pytest.mark.django_db
263+
def test_get_or_create_advisory_commit(advisory_commit):
264+
commits = get_or_create_advisory_code_commits(advisory_commit)
265+
assert len(commits) == len(advisory_commit)
266+
for commit in commits:
267+
assert isinstance(commit, CodeCommit)
268+
assert commit.commit_hash in [c.commit_hash for c in advisory_commit]
269+
assert commit.vcs_url in [c.vcs_url for c in advisory_commit]

vulnerabilities/tests/pipes/test_vulnerablecode_importer_pipeline_v2.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818

1919
from vulnerabilities.importer import AdvisoryData
2020
from vulnerabilities.importer import AffectedPackageV2
21+
from vulnerabilities.importer import CodeCommitData
2122
from vulnerabilities.models import AdvisoryV2
23+
from vulnerabilities.models import CodeCommit
2224
from vulnerabilities.models import ImpactedPackage
2325
from vulnerabilities.models import PackageV2
2426
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
@@ -59,6 +61,18 @@ def dummy_advisory():
5961
),
6062
],
6163
advisory_id="ADV-123",
64+
fixed_by_commits=[
65+
CodeCommitData(
66+
commit_hash="9ff29db8ec3adefefce0d37c3c9b5b2c22e59fac",
67+
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
68+
)
69+
],
70+
affected_by_commits=[
71+
CodeCommitData(
72+
commit_hash="ab99939678dc36b3bee0f366493df1aeef521df4",
73+
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
74+
)
75+
],
6276
date_published=datetime.now() - timedelta(days=10),
6377
url="https://example.com/advisory/1",
6478
)
@@ -92,4 +106,5 @@ def test_advisory_import_atomicity(dummy_importer):
92106
dummy_importer.collect_and_store_advisories()
93107
assert AdvisoryV2.objects.count() == 1
94108
assert ImpactedPackage.objects.count() == 2
109+
assert CodeCommit.objects.count() == 2
95110
assert PackageV2.objects.count() == 4

vulnerabilities/tests/test_data/archlinux/archlinux_advisoryv2-expected.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
"severities": [],
3030
"date_published": null,
3131
"weaknesses": [],
32+
"affected_by_commits": [],
33+
"fixed_by_commits": [],
3234
"url": "https://security.archlinux.org/AVG-2781.json"
3335
},
3436
{
@@ -63,6 +65,8 @@
6365
"severities": [],
6466
"date_published": null,
6567
"weaknesses": [],
68+
"affected_by_commits": [],
69+
"fixed_by_commits": [],
6670
"url": "https://security.archlinux.org/AVG-2780.json"
6771
},
6872
{
@@ -101,6 +105,8 @@
101105
"severities": [],
102106
"date_published": null,
103107
"weaknesses": [],
108+
"affected_by_commits": [],
109+
"fixed_by_commits": [],
104110
"url": "https://security.archlinux.org/AVG-4.json"
105111
}
106112
]

0 commit comments

Comments
 (0)