Skip to content

Commit 77cd6e0

Browse files
authored
Update fingerprint_stemmed_codebase_resources (#1961)
* Only run code stemming on source files that we can handle * Update test Signed-off-by: Jono Yang <jyang@nexb.com>
1 parent aadeb7f commit 77cd6e0

File tree

2 files changed

+8
-2
lines changed

2 files changed

+8
-2
lines changed

scanpipe/pipes/matchcode.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from matchcode_toolkit.fingerprinting import get_file_fingerprint_hashes
3131
from matchcode_toolkit.fingerprinting import get_line_by_pos
3232
from matchcode_toolkit.fingerprinting import get_stemmed_file_fingerprint_hashes
33+
from matchcode_toolkit.stemming import TS_LANGUAGE_CONF
3334
from scancode import Scanner
3435

3536
from scanpipe.pipes import codebase
@@ -285,7 +286,9 @@ def fingerprint_stemmed_codebase_resources(
285286
"""
286287
# Checking for None to make the distinction with an empty resource_qs queryset
287288
if resource_qs is None:
288-
resource_qs = project.codebaseresources.filter(is_text=True)
289+
resource_qs = project.codebaseresources.filter(
290+
programming_language__in=TS_LANGUAGE_CONF.keys()
291+
)
289292

290293
if to_codebase_only:
291294
resource_qs = resource_qs.to_codebase()

scanpipe/tests/pipes/test_matchcode.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,10 @@ def test_scanpipe_pipes_matchcode_fingerprint_stemmed_codebase_resources(self):
399399
self.project1.codebase_path,
400400
)
401401
codebase_resource3 = CodebaseResource.objects.create(
402-
project=self.project1, path="inherits.js", is_text=True
402+
project=self.project1,
403+
path="inherits.js",
404+
is_text=True,
405+
programming_language="JavaScript",
403406
)
404407

405408
matchcode.fingerprint_stemmed_codebase_resources(self.project1)

0 commit comments

Comments
 (0)