-
Notifications
You must be signed in to change notification settings - Fork 1
Add CodeQL for Python MCP and taskflow #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
d6ea161
bad9ab4
fe60ea1
72a154c
4af4a65
ab3ec17
a864ffa
b13e365
8c2f42f
341fadd
5be4848
6705d60
51b91b9
3d1fd19
d274b99
a3261aa
2b50b82
009c3a2
29eb221
eb5a0ff
9385063
3ad757d
22ba2d2
621deb8
d72359b
b30cbab
7b37b41
44941a5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -134,7 +134,7 @@ celerybeat.pid | |
| *.sage.py | ||
|
|
||
| # Environments | ||
| .env | ||
| *.env | ||
| .venv | ||
| env/ | ||
| venv/ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| Queries in support of the CodeQL MCP Server are maintained as query packs. | ||
|
|
||
| If you add your own queries, please follow established conventions for normal CodeQL query pack development. | ||
|
|
||
| To run the CodeQL for Python server: | ||
| - create a codespace, preferably with more cores | ||
| - install CodeQL extension for VS Code | ||
| - press `Ctrl/Cmd + Shift + P` and type "CodeQL: Install Pack Dependencies". Choose "sylwia-budzynska/mcp-python" and press "OK". | ||
| - find the path to the codeql binary, which comes preinstalled with the VS Code CodeQL extension, with the command: | ||
| ```bash | ||
| find ~ -type f -name codeql -executable 2>/dev/null | ||
| ``` | ||
| It will most likely look similar to this: | ||
| ``` | ||
| /home/codespace/.vscode-remote/data/User/globalStorage/github.vscode-codeql/distribution1/codeql/codeql | ||
| ``` | ||
| - create a folder named 'data' | ||
| - create or update your `.env` file in the root of this project with values for: | ||
| ``` | ||
| COPILOT_TOKEN= # a fine-grained GitHub personal access token with permssion for "copilot chat" | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| CODEQL_DBS_BASE_PATH="/workspaces/seclab-taskflows/data/codeql_databases" #path to folder with your CodeQL databases | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| # Example values for a local setup, run with `python -m seclab_taskflow_agent -t seclab_taskflows.taskflows.audit.remote_sources_local` | ||
| MEMCACHE_STATE_DIR="/workspaces/seclab-taskflows/data" # path to folder for storing the memcache database | ||
| DATA_DIR="/workspaces/seclab-taskflows/data" # path to folder for storing the codeql_sqlite databases and all other data | ||
| GITHUB_PERSONAL_ACCESS_TOKEN= # can be the same token as COPILOT_TOKEN. Or another one, with access e.g. to private repositories | ||
| CODEQL_CLI= # output of command `find ~ -type f -name codeql -executable 2>/dev/null` | ||
| # Example docker env run with ./run_seclab_agent.sh [...] | ||
| # CODEQL_CLI="codeql" | ||
| # CODEQL_DBS_BASE_PATH="/app/data/codeql_databases" | ||
| # MEMCACHE_STATE_DIR="/app/data" | ||
| # DATA_DIR="/app/data" | ||
| ``` | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| # SPDX-FileCopyrightText: 2025 GitHub | ||
| # SPDX-License-Identifier: MIT | ||
|
|
||
| from sqlalchemy import String, Text, Integer, ForeignKey, Column | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| from sqlalchemy.orm import DeclarativeBase, mapped_column, Mapped, relationship | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| from typing import Optional | ||
|
|
||
| class Base(DeclarativeBase): | ||
| pass | ||
|
|
||
|
|
||
| class Source(Base): | ||
| __tablename__ = 'source' | ||
|
|
||
| id: Mapped[int] = mapped_column(primary_key=True) | ||
| repo: Mapped[str] | ||
| source_location: Mapped[str] | ||
| type: Mapped[str] | ||
| notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True) | ||
|
|
||
| def __repr__(self): | ||
| return (f"<Source(id={self.id}, repo={self.repo}, " | ||
| f"location={self.source_location}, type={self.type}, " | ||
| # f"line={self.line},", | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| f"notes={self.notes})>") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,260 @@ | ||
| # SPDX-FileCopyrightText: 2025 GitHub | ||
| # SPDX-License-Identifier: MIT | ||
|
|
||
|
|
||
| import logging | ||
| logging.basicConfig( | ||
| level=logging.DEBUG, | ||
| format='%(asctime)s - %(levelname)s - %(message)s', | ||
| filename='logs/mcp_codeql_python.log', | ||
sylwia-budzynska marked this conversation as resolved.
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| filemode='a' | ||
| ) | ||
| from seclab_taskflow_agent.mcp_servers.codeql.client import run_query, file_from_uri, list_src_files, _debug_log, search_in_src_archive | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| from pydantic import Field | ||
| #from mcp.server.fastmcp import FastMCP, Context | ||
| from fastmcp import FastMCP, Context # use FastMCP 2.0 | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| from pathlib import Path | ||
| import os | ||
| import csv | ||
| import json | ||
| from sqlalchemy import create_engine | ||
| from sqlalchemy.orm import Session | ||
| from pathlib import Path | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| import zipfile | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| import httpx | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| import aiofiles | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| from .codeql_sqlite_models import Base, Source | ||
sylwia-budzynska marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| MEMORY = Path(os.getenv('CODEQL_SQLITE_DIR', default='/app/my_data')) | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| mcp = FastMCP("CodeQL-Python") | ||
|
|
||
| CODEQL_DBS_BASE_PATH = Path(os.getenv('CODEQL_DBS_BASE_PATH', default='/workspaces/seclab-taskflow-agent/my_data')) | ||
|
|
||
| # tool name -> templated query lookup for supported languages | ||
| TEMPLATED_QUERY_PATHS = { | ||
| # to add a language, port the templated query pack and add its definition here | ||
| 'python': { | ||
| 'remote_sources': 'queries/mcp-python/remote_sources.ql' | ||
| } | ||
| } | ||
|
|
||
|
|
||
| def source_to_dict(result): | ||
| return { | ||
| "source_id": result.id, | ||
| "repo": result.repo, | ||
| "source_location": result.source_location, | ||
| "type": result.type, | ||
| "notes": result.notes | ||
| } | ||
|
|
||
| def _resolve_query_path(language: str, query: str) -> Path: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably should refactor |
||
| global TEMPLATED_QUERY_PATHS | ||
| if language not in TEMPLATED_QUERY_PATHS: | ||
| raise RuntimeError(f"Error: Language `{language}` not supported!") | ||
| query_path = TEMPLATED_QUERY_PATHS[language].get(query) | ||
| if not query_path: | ||
| raise RuntimeError(f"Error: query `{query}` not supported for `{language}`!") | ||
| return Path(query_path) | ||
|
|
||
|
|
||
| def _resolve_db_path(relative_db_path: str | Path): | ||
| global CODEQL_DBS_BASE_PATH | ||
| # path joins will return "/B" if "/A" / "////B" etc. as well | ||
| # not windows compatible and probably needs additional hardening | ||
sylwia-budzynska marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| relative_db_path = str(relative_db_path).strip().lstrip('/') | ||
| relative_db_path = Path(relative_db_path) | ||
| absolute_path = CODEQL_DBS_BASE_PATH / relative_db_path | ||
| if not absolute_path.is_dir(): | ||
| _debug_log(f"Database path not found: {absolute_path}") | ||
| raise RuntimeError(f"Error: Database not found at {absolute_path}!") | ||
| return str(absolute_path) | ||
sylwia-budzynska marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # This sqlite database is specifically made for CodeQL for Python MCP. | ||
| class CodeqlSqliteBackend: | ||
| def __init__(self, memcache_state_dir: str): | ||
| self.memcache_state_dir = memcache_state_dir | ||
| self.location_pattern = r'^([a-zA-Z]+)(:\d+){4}$' | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if not Path(self.memcache_state_dir).exists(): | ||
| db_dir = 'sqlite://' | ||
| else: | ||
| db_dir = f'sqlite:///{self.memcache_state_dir}/codeql_sqlite.db' | ||
| self.engine = create_engine(db_dir, echo=False) | ||
| Base.metadata.create_all(self.engine, tables=[Source.__table__]) | ||
|
|
||
|
|
||
| def store_new_source(self, repo, source_location, type, notes, update = False): | ||
| with Session(self.engine) as session: | ||
| existing = session.query(Source).filter_by(repo = repo, source_location = source_location).first() | ||
| if existing: | ||
| existing.notes += notes | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| session.commit() | ||
| return f"Updated notes for source at {source_location} in {repo}." | ||
| else: | ||
| if update: | ||
| return f"No source exists at repo {repo}, location {source_location}" | ||
| new_source = Source(repo = repo, source_location = source_location, type = type, notes = notes) | ||
| session.add(new_source) | ||
| session.commit() | ||
| return f"Added new source for {source_location} in {repo}." | ||
|
|
||
| def get_sources(self, repo): | ||
| with Session(self.engine) as session: | ||
| results = session.query(Source).filter_by(repo=repo).all() | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| sources = [source_to_dict(source) for source in results] | ||
| return sources | ||
|
|
||
|
|
||
| # our query result format is: "human readable template {val0} {val1},'key0,key1',val0,val1" | ||
| def _csv_parse(raw): | ||
| results = [] | ||
| reader = csv.reader(raw.strip().splitlines()) | ||
| try: | ||
| for i, row in enumerate(reader): | ||
| if i == 0: | ||
| continue | ||
| # col1 has what we care about, but offer flexibility | ||
| keys = row[1].split(',') | ||
| this_obj = {'description': row[0].format(*row[2:])} | ||
| for j, k in enumerate(keys): | ||
| this_obj[k.strip()] = row[j + 2] | ||
| results.append(this_obj) | ||
| except csv.Error as e: | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return ["Error: CSV parsing error: " + str(e)] | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return results | ||
|
|
||
|
|
||
| def _run_query(query_name: str, database_path: str, language: str, template_values: dict): | ||
| """Run a CodeQL query and return the results""" | ||
|
|
||
| try: | ||
| database_path = _resolve_db_path(database_path) | ||
| except RuntimeError: | ||
| return f"The database path for {database_path} could not be resolved" | ||
| try: | ||
| query_path = _resolve_query_path(language, query_name) | ||
| except RuntimeError: | ||
| return f"The query {query_name} is not supported for language: {language}" | ||
| try: | ||
| csv = run_query(Path(__file__).parent.resolve() / | ||
| query_path, | ||
| database_path, | ||
| fmt='csv', | ||
| template_values=template_values, | ||
| log_stderr=True) | ||
| return _csv_parse(csv) | ||
| except Exception as e: | ||
| return f"The query {query_name} encountered an error: {e}" | ||
|
|
||
| def _get_file_contents(db: str | Path, uri: str): | ||
| """Retrieve file contents from a CodeQL database""" | ||
| db = Path(db) | ||
| return file_from_uri(uri, db) | ||
|
|
||
| backend = CodeqlSqliteBackend(MEMORY) | ||
|
|
||
| @mcp.tool() | ||
| def remote_sources(owner: str, repo: str, | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| database_path: str = Field(description="The CodeQL database path."), | ||
| language: str = Field(description="The language used for the CodeQL database.")): | ||
| """List all remote sources and their locations in a CodeQL database, then store the results in a database.""" | ||
|
|
||
| repo = f"{owner}/{repo}" | ||
| results = _run_query('remote_sources', database_path, language, {}) | ||
|
|
||
| # Check if results is an error (list of strings) or valid data (list of dicts) | ||
sylwia-budzynska marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if results and isinstance(results[0], str): | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return f"Error: {results[0]}" | ||
|
|
||
| # Store each result as a source | ||
| stored_count = 0 | ||
| for result in results: | ||
| backend.store_new_source( | ||
| repo=repo, | ||
| source_location=result.get('location', ''), | ||
| type=result.get('source', ''), | ||
| notes='', #result.get('description', ''), | ||
| update=False | ||
| ) | ||
| stored_count += 1 | ||
|
|
||
| return f"Stored {stored_count} remote sources in {repo}." | ||
|
|
||
| @mcp.tool() | ||
| def fetch_sources(owner: str, repo: str): | ||
| """ | ||
| Fetch all sources from the repo | ||
| """ | ||
| repo = f"{owner}/{repo}" | ||
| return json.dumps(backend.get_sources(repo)) | ||
|
|
||
| @mcp.tool() | ||
| def add_source_notes(owner: str, repo: str, | ||
| database_path: str = Field(description="The CodeQL database path."), | ||
| source_location: str = Field(description="The path to the file and column info that contains the source"), | ||
| notes: str = Field(description="The notes to append to this source", default="")): | ||
| """ | ||
| Add new notes to an existing source. The notes will be appended to any existing notes. | ||
| """ | ||
| repo = f"{owner}/{repo}" | ||
| try: | ||
| database_path = _resolve_db_path(database_path) | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| except RuntimeError: | ||
| return f"The database path for {database_path} could not be resolved" | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return backend.store_new_source(repo, source_location, "", notes, update=True) | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| @mcp.tool() | ||
| def clear_codeql_repo(owner: str, repo: str): | ||
| """ | ||
| Clear all data for a given repo from the database | ||
| """ | ||
| repo = f"{owner}/{repo}" | ||
| with Session(backend.engine) as session: | ||
| deleted_sources = session.query(Source).filter_by(repo=repo).delete() | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| # deleted_apps = session.query(Application).filter_by(repo=repo).delete() | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| session.commit() | ||
| return f"Cleared {deleted_sources} sources from repo {repo}." | ||
|
|
||
| @mcp.tool() | ||
| def get_file_contents( | ||
| file_uri: str = Field(description="The file URI to get contents for. The URI scheme is defined as `file://path` and `file://path:region`. Examples of file URI: `file:///path/to/file:1:2:3:4`, `file:///path/to/file`. File URIs optionally contain a region definition that looks like `start_line:start_column:end_line:end_column` which will limit the contents returned to the specified region, for example `file:///path/to/file:1:2:3:4` indicates a file region of `1:2:3:4` which would return the content of the file starting at line 1, column 1 and ending at line 3 column 4. Line and column indices are 1-based, meaning line and column values start at 1. If the region is ommitted the full contents of the file will be returned, for example `file:///path/to/file` returns the full contents of `/path/to/file`."), | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| database_path: str = Field(description="The path to the CodeQL database.")): | ||
| """Get the contents of a file URI from a CodeQL database path.""" | ||
|
|
||
| database_path = _resolve_db_path(database_path) | ||
| try: | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| # fix up any incorrectly formatted relative path uri | ||
| if not file_uri.startswith('file:///'): | ||
| if file_uri.startswith('file://'): | ||
| file_uri = file_uri[len('file://'):] | ||
| file_uri = 'file:///' + file_uri.lstrip('/') | ||
| results = _get_file_contents(database_path, file_uri) | ||
| except Exception as e: | ||
| results = f"Error: could not retrieve {file_uri}: {e}" | ||
| return results | ||
|
|
||
| @mcp.tool() | ||
| def list_source_files(database_path: str = Field(description="The path to the CodeQL database."), | ||
| regex_filter: str = Field(description="Optional Regex filter.", default = r'[\s\S]+')): | ||
| """List the available source files in a CodeQL database using their file:// URI""" | ||
| database_path = _resolve_db_path(database_path) | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| results = list_src_files(database_path, as_uri=True) | ||
| return json.dumps([{'uri': item} for item in results if re.search(regex_filter, item)], indent=2) | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| @mcp.tool() | ||
| def search_in_source_code(database_path: str = Field(description="The path to the CodeQL database."), | ||
| search_term: str = Field(description="The term to search in the source code")): | ||
| """ | ||
| Search for a string in the source code. Returns the line number and file. | ||
| """ | ||
| resolved_database_path = _resolve_db_path(database_path) | ||
sylwia-budzynska marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| results = search_in_src_archive(resolved_database_path, search_term) | ||
| out = [] | ||
| if isinstance(results, dict): | ||
| for k,v in results.items(): | ||
| out.append({"database" : database_path, "path" : k, "lines" : v}) | ||
| return json.dumps(out, indent = 2) | ||
|
|
||
| if __name__ == "__main__": | ||
| mcp.run(show_banner=False, transport="http", host="127.0.0.1", port=9998) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| --- | ||
| lockVersion: 1.0.0 | ||
| dependencies: | ||
| codeql/concepts: | ||
| version: 0.0.8 | ||
| codeql/controlflow: | ||
| version: 2.0.18 | ||
| codeql/dataflow: | ||
| version: 2.0.18 | ||
| codeql/mad: | ||
| version: 1.0.34 | ||
| codeql/python-all: | ||
| version: 4.1.0 | ||
| codeql/regex: | ||
| version: 1.0.34 | ||
| codeql/ssa: | ||
| version: 2.0.10 | ||
| codeql/threat-models: | ||
| version: 1.0.34 | ||
| codeql/tutorial: | ||
| version: 1.0.34 | ||
| codeql/typetracking: | ||
| version: 2.0.18 | ||
| codeql/util: | ||
| version: 2.0.21 | ||
| codeql/xml: | ||
| version: 1.0.34 | ||
| codeql/yaml: | ||
| version: 1.0.34 | ||
| compiled: false |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| --- | ||
| library: false | ||
| warnOnImplicitThis: false | ||
| name: sylwia-budzynska/mcp-python | ||
| version: 0.0.1 | ||
| dependencies: | ||
| codeql/python-all: ^4.1.0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| /** | ||
| * This is an automatically generated file | ||
| * @name Hello world | ||
| * @kind problem | ||
| * @problem.severity warning | ||
| * @id python/example/hello-world | ||
| */ | ||
|
|
||
| import python | ||
|
|
||
| from File f | ||
| select f, "Hello, world!" |
Uh oh!
There was an error while loading. Please reload this page.