-
Notifications
You must be signed in to change notification settings - Fork 1
Add CodeQL for Python MCP and taskflow #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
d6ea161
bad9ab4
fe60ea1
72a154c
4af4a65
ab3ec17
a864ffa
b13e365
8c2f42f
341fadd
5be4848
6705d60
51b91b9
3d1fd19
d274b99
a3261aa
2b50b82
009c3a2
29eb221
eb5a0ff
9385063
3ad757d
22ba2d2
621deb8
d72359b
b30cbab
7b37b41
44941a5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -134,7 +134,7 @@ celerybeat.pid | |
| *.sage.py | ||
|
|
||
| # Environments | ||
| .env | ||
| *.env | ||
| .venv | ||
| env/ | ||
| venv/ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| Queries in support of the CodeQL MCP Server are maintained as query packs. | ||
|
|
||
| If you add your own queries, please follow established conventions for normal CodeQL query pack development. | ||
|
|
||
| To run the CodeQL for Python server: | ||
| - create a codespace, preferably with more cores | ||
| - install CodeQL extension for VS Code | ||
| - press `Ctrl/Cmd + Shift + P` and type "CodeQL: Install Pack Dependencies". Choose "sylwia-budzynska/mcp-python" and press "OK". | ||
| - find the path to the codeql binary, which comes preinstalled with the VS Code CodeQL extension, with the command: | ||
| ```bash | ||
| find ~ -type f -name codeql -executable 2>/dev/null | ||
| ``` | ||
| It will most likely look similar to this: | ||
| ``` | ||
| /home/codespace/.vscode-remote/data/User/globalStorage/github.vscode-codeql/distribution1/codeql/codeql | ||
| ``` | ||
| - create a folder named 'data' | ||
| - create or update your `.env` file in the root of this project with values for: | ||
| ``` | ||
| COPILOT_TOKEN= # a fine-grained GitHub personal access token with permission for "copilot chat" | ||
| CODEQL_DBS_BASE_PATH="/workspaces/seclab-taskflows/data/codeql_databases" #path to folder with your CodeQL databases | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| # Example values for a local setup, run with `python -m seclab_taskflow_agent -t seclab_taskflows.taskflows.audit.remote_sources_local` | ||
| MEMCACHE_STATE_DIR="/workspaces/seclab-taskflows/data" # path to folder for storing the memcache database | ||
| DATA_DIR="/workspaces/seclab-taskflows/data" # path to folder for storing the codeql_sqlite databases and all other data | ||
| GITHUB_PERSONAL_ACCESS_TOKEN= # can be the same token as COPILOT_TOKEN. Or another one, with access e.g. to private repositories | ||
| CODEQL_CLI= # output of command `find ~ -type f -name codeql -executable 2>/dev/null` | ||
| # Example docker env run with ./run_seclab_agent.sh [...] | ||
| # CODEQL_CLI="codeql" | ||
| # CODEQL_DBS_BASE_PATH="/app/data/codeql_databases" | ||
| # MEMCACHE_STATE_DIR="/app/data" | ||
| # DATA_DIR="/app/data" | ||
| ``` | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| # SPDX-FileCopyrightText: 2025 GitHub | ||
| # SPDX-License-Identifier: MIT | ||
|
|
||
| from sqlalchemy import Text | ||
| from sqlalchemy.orm import DeclarativeBase, mapped_column, Mapped | ||
| from typing import Optional | ||
|
|
||
| class Base(DeclarativeBase): | ||
| pass | ||
|
|
||
|
|
||
| class Source(Base): | ||
| __tablename__ = 'source' | ||
|
|
||
| id: Mapped[int] = mapped_column(primary_key=True) | ||
| repo: Mapped[str] | ||
| source_location: Mapped[str] | ||
| line: Mapped[int] | ||
| type: Mapped[str] | ||
| notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True) | ||
|
|
||
| def __repr__(self): | ||
| return (f"<Source(id={self.id}, repo={self.repo}, " | ||
| f"location={self.source_location}, type={self.type}, " | ||
| # f"line={self.line},", | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| f"notes={self.notes})>") | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,216 @@ | ||||||
| # SPDX-FileCopyrightText: 2025 GitHub | ||||||
| # SPDX-License-Identifier: MIT | ||||||
|
|
||||||
|
|
||||||
| import logging | ||||||
| logging.basicConfig( | ||||||
| level=logging.DEBUG, | ||||||
| format='%(asctime)s - %(levelname)s - %(message)s', | ||||||
| filename='logs/mcp_codeql_python.log', | ||||||
sylwia-budzynska marked this conversation as resolved.
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| filemode='a' | ||||||
| ) | ||||||
| from seclab_taskflow_agent.mcp_servers.codeql.client import run_query, file_from_uri, list_src_files, _debug_log, search_in_src_archive | ||||||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
|
|
||||||
| from pydantic import Field | ||||||
| #from mcp.server.fastmcp import FastMCP, Context | ||||||
| from fastmcp import FastMCP # use FastMCP 2.0 | ||||||
sylwia-budzynska marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| from pathlib import Path | ||||||
| import os | ||||||
| import csv | ||||||
| import json | ||||||
| from sqlalchemy import create_engine | ||||||
| from sqlalchemy.orm import Session | ||||||
|
|
||||||
| from .codeql_sqlite_models import Base, Source | ||||||
sylwia-budzynska marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| from .utils import process_repo | ||||||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
|
|
||||||
| MEMORY = Path(os.getenv('CODEQL_SQLITE_DIR', default='/app/my_data')) | ||||||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| mcp = FastMCP("CodeQL-Python") | ||||||
|
|
||||||
| CODEQL_DBS_BASE_PATH = Path(os.getenv('CODEQL_DBS_BASE_PATH', default='/workspaces/seclab-taskflow-agent/my_data')) | ||||||
|
|
||||||
| # tool name -> templated query lookup for supported languages | ||||||
| TEMPLATED_QUERY_PATHS = { | ||||||
| # to add a language, port the templated query pack and add its definition here | ||||||
| 'python': { | ||||||
| 'remote_sources': 'queries/mcp-python/remote_sources.ql' | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
|
|
||||||
| def source_to_dict(result): | ||||||
| return { | ||||||
| "source_id": result.id, | ||||||
| "repo": result.repo, | ||||||
| "source_location": result.source_location, | ||||||
| "line": result.line, | ||||||
| "type": result.type, | ||||||
| "notes": result.notes | ||||||
| } | ||||||
|
|
||||||
| def _resolve_query_path(language: str, query: str) -> Path: | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably should refactor |
||||||
| global TEMPLATED_QUERY_PATHS | ||||||
| if language not in TEMPLATED_QUERY_PATHS: | ||||||
| raise RuntimeError(f"Error: Language `{language}` not supported!") | ||||||
| query_path = TEMPLATED_QUERY_PATHS[language].get(query) | ||||||
| if not query_path: | ||||||
| raise RuntimeError(f"Error: query `{query}` not supported for `{language}`!") | ||||||
| return Path(query_path) | ||||||
|
|
||||||
|
|
||||||
| def _resolve_db_path(relative_db_path: str | Path): | ||||||
| global CODEQL_DBS_BASE_PATH | ||||||
| # path joins will return "/B" if "/A" / "////B" etc. as well | ||||||
| # not windows compatible and probably needs additional hardening | ||||||
sylwia-budzynska marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| relative_db_path = str(relative_db_path).strip().lstrip('/') | ||||||
| relative_db_path = Path(relative_db_path) | ||||||
| absolute_path = (CODEQL_DBS_BASE_PATH / relative_db_path).resolve() | ||||||
| if not str(absolute_path).startswith(str(CODEQL_DBS_BASE_PATH.resolve())): | ||||||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| raise RuntimeError(f"Error: Database path {absolute_path} is outside the base path {CODEQL_DBS_BASE_PATH}") | ||||||
| if not absolute_path.is_dir(): | ||||||
| _debug_log(f"Database path not found: {absolute_path}") | ||||||
| raise RuntimeError(f"Error: Database not found at {absolute_path}!") | ||||||
| return str(absolute_path) | ||||||
|
|
||||||
| # This sqlite database is specifically made for CodeQL for Python MCP. | ||||||
| class CodeqlSqliteBackend: | ||||||
| def __init__(self, memcache_state_dir: str): | ||||||
| self.memcache_state_dir = memcache_state_dir | ||||||
| self.location_pattern = r'^([a-zA-Z]+)(:\d+){4}$' | ||||||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| if not Path(self.memcache_state_dir).exists(): | ||||||
| db_dir = 'sqlite://' | ||||||
| else: | ||||||
| db_dir = f'sqlite:///{self.memcache_state_dir}/codeql_sqlite.db' | ||||||
| self.engine = create_engine(db_dir, echo=False) | ||||||
| Base.metadata.create_all(self.engine, tables=[Source.__table__]) | ||||||
|
|
||||||
|
|
||||||
| def store_new_source(self, repo, source_location, line, type, notes, update = False): | ||||||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| with Session(self.engine) as session: | ||||||
| existing = session.query(Source).filter_by(repo = repo, source_location = source_location, line = line).first() | ||||||
| if existing: | ||||||
| existing.notes = (existing.notes or "") + notes | ||||||
| session.commit() | ||||||
| return f"Updated notes for source at {source_location}, line {line} in {repo}." | ||||||
| else: | ||||||
| if update: | ||||||
| return f"No source exists at repo {repo}, location {source_location}, line {line} to update." | ||||||
| new_source = Source(repo = repo, source_location = source_location, line = line, type = type, notes = notes) | ||||||
|
||||||
| new_source = Source(repo = repo, source_location = source_location, line = line, type = type, notes = notes) | |
| new_source = Source(repo = repo, source_location = source_location, line = line, type = type, notes = notes) |
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| --- | ||
| lockVersion: 1.0.0 | ||
| dependencies: | ||
| codeql/concepts: | ||
| version: 0.0.8 | ||
| codeql/controlflow: | ||
| version: 2.0.18 | ||
| codeql/dataflow: | ||
| version: 2.0.18 | ||
| codeql/mad: | ||
| version: 1.0.34 | ||
| codeql/python-all: | ||
| version: 4.1.0 | ||
| codeql/regex: | ||
| version: 1.0.34 | ||
| codeql/ssa: | ||
| version: 2.0.10 | ||
| codeql/threat-models: | ||
| version: 1.0.34 | ||
| codeql/tutorial: | ||
| version: 1.0.34 | ||
| codeql/typetracking: | ||
| version: 2.0.18 | ||
| codeql/util: | ||
| version: 2.0.21 | ||
| codeql/xml: | ||
| version: 1.0.34 | ||
| codeql/yaml: | ||
| version: 1.0.34 | ||
| compiled: false |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| --- | ||
| library: false | ||
| warnOnImplicitThis: false | ||
| name: sylwia-budzynska/mcp-python | ||
| version: 0.0.1 | ||
| dependencies: | ||
| codeql/python-all: ^4.1.0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| /** | ||
| * This is an automatically generated file | ||
| * @name Hello world | ||
| * @kind problem | ||
| * @problem.severity warning | ||
| * @id python/example/hello-world | ||
| */ | ||
|
|
||
| import python | ||
|
|
||
| from File f | ||
| select f, "Hello, world!" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| /** | ||
| * @id mcp-python/remote-sources | ||
| * @name Python Remote Sources | ||
| * @description Identifies nodes that act as remote sources in Python code, along with their locations. | ||
| * @tags source, location | ||
| */ | ||
| import python | ||
| import semmle.python.dataflow.new.RemoteFlowSources | ||
|
|
||
| // string normalizeLocation(Location l) { | ||
| // result = l.getFile().getRelativePath() + ":" + l.getStartLine().toString() + ":" + l.getStartColumn().toString() | ||
| // + ":" + l.getEndLine().toString() + ":" + l.getEndColumn().toString() | ||
| // } | ||
sylwia-budzynska marked this conversation as resolved.
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| from RemoteFlowSource source | ||
| select | ||
| "Remote source {0} is defined at {1} line {2}", | ||
| "source,location,line", | ||
| source.getSourceType(), | ||
| source.getLocation().getFile().getRelativePath(), | ||
| source.getLocation().getStartLine().toString() | ||
Uh oh!
There was an error while loading. Please reload this page.